diff options
Diffstat (limited to 'drivers/cpufreq')
63 files changed, 1637 insertions, 1658 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index d64b07ec48e5..78702a08364f 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -217,6 +217,18 @@ config CPUFREQ_DT If in doubt, say N. +config CPUFREQ_DT_RUST + tristate "Rust based Generic DT based cpufreq driver" + depends on HAVE_CLK && OF && RUST + select CPUFREQ_DT_PLATDEV + select PM_OPP + help + This adds a Rust based generic DT based cpufreq driver for frequency + management. It supports both uniprocessor (UP) and symmetric + multiprocessor (SMP) systems. + + If in doubt, say N. + config CPUFREQ_VIRT tristate "Virtual cpufreq driver" depends on GENERIC_ARCH_TOPOLOGY diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 4f9cb943d945..0d46402e3094 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ config ARM_BRCMSTB_AVS_CPUFREQ tristate "Broadcom STB AVS CPUfreq driver" depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST - default y + default y if ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ help Some Broadcom STB SoCs use a co-processor running proprietary firmware ("AVS") to handle voltage and frequency scaling. This driver provides @@ -88,7 +88,7 @@ config ARM_HIGHBANK_CPUFREQ tristate "Calxeda Highbank-based" depends on ARCH_HIGHBANK || COMPILE_TEST depends on CPUFREQ_DT && REGULATOR && PL320_MBOX - default m + default m if ARCH_HIGHBANK help This adds the CPUFreq driver for Calxeda Highbank SoC based boards. @@ -133,7 +133,7 @@ config ARM_MEDIATEK_CPUFREQ config ARM_MEDIATEK_CPUFREQ_HW tristate "MediaTek CPUFreq HW driver" depends on ARCH_MEDIATEK || COMPILE_TEST - default m + default m if ARCH_MEDIATEK help Support for the CPUFreq HW driver. Some MediaTek chipsets have a HW engine to offload the steps @@ -181,7 +181,7 @@ config ARM_RASPBERRYPI_CPUFREQ config ARM_S3C64XX_CPUFREQ bool "Samsung S3C64XX" depends on CPU_S3C6410 || COMPILE_TEST - default y + default CPU_S3C6410 help This adds the CPUFreq driver for Samsung S3C6410 SoC. @@ -190,7 +190,7 @@ config ARM_S3C64XX_CPUFREQ config ARM_S5PV210_CPUFREQ bool "Samsung S5PV210 and S5PC110" depends on CPU_S5PV210 || COMPILE_TEST - default y + default CPU_S5PV210 help This adds the CPUFreq driver for Samsung S5PV210 and S5PC110 SoCs. @@ -214,7 +214,7 @@ config ARM_SCMI_CPUFREQ config ARM_SPEAR_CPUFREQ bool "SPEAr CPUFreq support" depends on PLAT_SPEAR || COMPILE_TEST - default y + default PLAT_SPEAR help This adds the CPUFreq driver support for SPEAr SOCs. @@ -233,7 +233,7 @@ config ARM_TEGRA20_CPUFREQ tristate "Tegra20/30 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra20/30 SOCs. @@ -241,7 +241,7 @@ config ARM_TEGRA124_CPUFREQ bool "Tegra124 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra124 SOCs. @@ -256,14 +256,14 @@ config ARM_TEGRA194_CPUFREQ tristate "Tegra194 CPUFreq support" depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST) depends on TEGRA_BPMP - default y + default ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC help This adds CPU frequency driver support for Tegra194 SOCs. config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST - default y + default ARCH_OMAP2PLUS || ARCH_K3 help This driver enables valid OPPs on the running platform based on values contained within the SoC in use. Enable this in order to diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc index eb678fa5260a..551e65d35a1d 100644 --- a/drivers/cpufreq/Kconfig.powerpc +++ b/drivers/cpufreq/Kconfig.powerpc @@ -1,22 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -config CPU_FREQ_CBE - tristate "CBE frequency scaling" - depends on CBE_RAS && PPC_CELL - default m - help - This adds the cpufreq driver for Cell BE processors. - For details, take a look at <file:Documentation/cpu-freq/>. - If you don't have such processor, say N - -config CPU_FREQ_CBE_PMI - bool "CBE frequency scaling using PMI interface" - depends on CPU_FREQ_CBE - default n - help - Select this, if you want to use the PMI interface to switch - frequencies. Using PMI, the processor will not only be able to run at - lower speed, but also at lower core voltage. - config CPU_FREQ_PMAC bool "Support for Apple PowerBooks" depends on ADB_PMU && PPC32 diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 97c2d4f15d76..2c5c228408bf 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -340,3 +340,15 @@ config X86_SPEEDSTEP_RELAXED_CAP_CHECK option lets the probing code bypass some of those checks if the parameter "relaxed_check=1" is passed to the module. +config CPUFREQ_ARCH_CUR_FREQ + default y + bool "Current frequency derived from HW provided feedback" + help + This determines whether the scaling_cur_freq sysfs attribute returns + the last requested frequency or a more precise value based on hardware + provided feedback (as architected counters). + Given that a more precise frequency can now be provided via the + cpuinfo_avg_freq attribute, by enabling this option, + scaling_cur_freq maintains the provision of a counter based frequency, + for compatibility reasons. + diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 890fff99f37d..681d687b5a18 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -15,11 +15,13 @@ obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o +obj-$(CONFIG_CPUFREQ_DT_RUST) += rcpufreq_dt.o obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o obj-$(CONFIG_CPUFREQ_VIRT) += virtual-cpufreq.o # Traces CFLAGS_amd-pstate-trace.o := -I$(src) +CFLAGS_powernv-cpufreq.o := -I$(src) amd_pstate-y := amd-pstate.o amd-pstate-trace.o ################################################################################## @@ -91,9 +93,6 @@ obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o ################################################################################## # PowerPC platform drivers -obj-$(CONFIG_CPU_FREQ_CBE) += ppc-cbe-cpufreq.o -ppc-cbe-cpufreq-y += ppc_cbe_cpufreq_pervasive.o ppc_cbe_cpufreq.o -obj-$(CONFIG_CPU_FREQ_CBE_PMI) += ppc_cbe_cpufreq_pmi.o obj-$(CONFIG_QORIQ_CPUFREQ) += qoriq-cpufreq.o obj-$(CONFIG_CPU_FREQ_PMAC) += pmac32-cpufreq.o obj-$(CONFIG_CPU_FREQ_PMAC64) += pmac64-cpufreq.o diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 463b69a2dff5..4f7f9201598d 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -79,11 +79,11 @@ static bool boost_state(unsigned int cpu) case X86_VENDOR_INTEL: case X86_VENDOR_CENTAUR: case X86_VENDOR_ZHAOXIN: - rdmsrl_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &msr); + rdmsrq_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &msr); return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); case X86_VENDOR_HYGON: case X86_VENDOR_AMD: - rdmsrl_on_cpu(cpu, MSR_K7_HWCR, &msr); + rdmsrq_on_cpu(cpu, MSR_K7_HWCR, &msr); return !(msr & MSR_K7_HWCR_CPB_DIS); } return false; @@ -110,14 +110,14 @@ static int boost_set_msr(bool enable) return -EINVAL; } - rdmsrl(msr_addr, val); + rdmsrq(msr_addr, val); if (enable) val &= ~msr_mask; else val |= msr_mask; - wrmsrl(msr_addr, val); + wrmsrq(msr_addr, val); return 0; } @@ -660,7 +660,7 @@ static u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq) nominal_perf = perf_caps.nominal_perf; if (nominal_freq) - *nominal_freq = perf_caps.nominal_freq; + *nominal_freq = perf_caps.nominal_freq * 1000; if (!highest_perf || !nominal_perf) { pr_debug("CPU%d: highest or nominal performance missing\n", cpu); @@ -909,6 +909,20 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); + if (acpi_cpufreq_driver.set_boost) { + if (policy->boost_supported) { + /* + * The firmware may have altered boost state while the + * CPU was offline (for example during a suspend-resume + * cycle). + */ + if (policy->boost_enabled != boost_state(cpu)) + set_boost(policy, policy->boost_enabled); + } else { + policy->boost_supported = true; + } + } + return result; err_unreg: @@ -949,7 +963,6 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy) } static struct freq_attr *acpi_cpufreq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, &freqdomain_cpus, #ifdef CONFIG_X86_ACPI_CPUFREQ_CPB &cpb, diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index f457d4af2c62..32e1bdc588c5 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -90,7 +90,8 @@ TRACE_EVENT(amd_pstate_epp_perf, u8 epp, u8 min_perf, u8 max_perf, - bool boost + bool boost, + bool changed ), TP_ARGS(cpu_id, @@ -98,7 +99,8 @@ TRACE_EVENT(amd_pstate_epp_perf, epp, min_perf, max_perf, - boost), + boost, + changed), TP_STRUCT__entry( __field(unsigned int, cpu_id) @@ -107,6 +109,7 @@ TRACE_EVENT(amd_pstate_epp_perf, __field(u8, min_perf) __field(u8, max_perf) __field(bool, boost) + __field(bool, changed) ), TP_fast_assign( @@ -116,15 +119,17 @@ TRACE_EVENT(amd_pstate_epp_perf, __entry->min_perf = min_perf; __entry->max_perf = max_perf; __entry->boost = boost; + __entry->changed = changed; ), - TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u", + TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u, changed=%u", (unsigned int)__entry->cpu_id, (u8)__entry->min_perf, (u8)__entry->max_perf, (u8)__entry->highest_perf, (u8)__entry->epp, - (bool)__entry->boost + (bool)__entry->boost, + (bool)__entry->changed ) ); diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 3a0a380c3590..447b9aa5ce40 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -22,39 +22,33 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/bitfield.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/fs.h> +#include <linux/cleanup.h> #include <acpi/cppc_acpi.h> +#include <asm/msr.h> + #include "amd-pstate.h" -/* - * Abbreviations: - * amd_pstate_ut: used as a shortform for AMD P-State unit test. - * It helps to keep variable names smaller, simpler - */ -enum amd_pstate_ut_result { - AMD_PSTATE_UT_RESULT_PASS, - AMD_PSTATE_UT_RESULT_FAIL, -}; struct amd_pstate_ut_struct { const char *name; - void (*func)(u32 index); - enum amd_pstate_ut_result result; + int (*func)(u32 index); }; /* * Kernel module for testing the AMD P-State unit test */ -static void amd_pstate_ut_acpi_cpc_valid(u32 index); -static void amd_pstate_ut_check_enabled(u32 index); -static void amd_pstate_ut_check_perf(u32 index); -static void amd_pstate_ut_check_freq(u32 index); -static void amd_pstate_ut_check_driver(u32 index); +static int amd_pstate_ut_acpi_cpc_valid(u32 index); +static int amd_pstate_ut_check_enabled(u32 index); +static int amd_pstate_ut_check_perf(u32 index); +static int amd_pstate_ut_check_freq(u32 index); +static int amd_pstate_ut_check_driver(u32 index); static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = { {"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid }, @@ -77,71 +71,67 @@ static bool get_shared_mem(void) /* * check the _CPC object is present in SBIOS. */ -static void amd_pstate_ut_acpi_cpc_valid(u32 index) +static int amd_pstate_ut_acpi_cpc_valid(u32 index) { - if (acpi_cpc_valid()) - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + if (!acpi_cpc_valid()) { pr_err("%s the _CPC object is not present in SBIOS!\n", __func__); + return -EINVAL; } + + return 0; } -static void amd_pstate_ut_pstate_enable(u32 index) +/* + * check if amd pstate is enabled + */ +static int amd_pstate_ut_check_enabled(u32 index) { - int ret = 0; u64 cppc_enable = 0; + int ret; - ret = rdmsrl_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable); + if (get_shared_mem()) + return 0; + + ret = rdmsrq_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable); if (ret) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s rdmsrl_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret); - return; + pr_err("%s rdmsrq_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret); + return ret; } - if (cppc_enable) - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + + if (!cppc_enable) { pr_err("%s amd pstate must be enabled!\n", __func__); + return -EINVAL; } -} -/* - * check if amd pstate is enabled - */ -static void amd_pstate_ut_check_enabled(u32 index) -{ - if (get_shared_mem()) - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - else - amd_pstate_ut_pstate_enable(index); + return 0; } /* * check if performance values are reasonable. * highest_perf >= nominal_perf > lowest_nonlinear_perf > lowest_perf > 0 */ -static void amd_pstate_ut_check_perf(u32 index) +static int amd_pstate_ut_check_perf(u32 index) { int cpu = 0, ret = 0; u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0; u64 cap1 = 0; struct cppc_perf_caps cppc_perf; - struct cpufreq_policy *policy = NULL; - struct amd_cpudata *cpudata = NULL; + union perf_cached cur_perf; + + for_each_online_cpu(cpu) { + struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; + struct amd_cpudata *cpudata; - for_each_possible_cpu(cpu) { policy = cpufreq_cpu_get(cpu); if (!policy) - break; + continue; cpudata = policy->driver_data; if (get_shared_mem()) { ret = cppc_get_perf_caps(cpu, &cppc_perf); if (ret) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret); - goto skip_test; + return ret; } highest_perf = cppc_perf.highest_perf; @@ -149,52 +139,46 @@ static void amd_pstate_ut_check_perf(u32 index) lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; lowest_perf = cppc_perf.lowest_perf; } else { - ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); + ret = rdmsrq_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); if (ret) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret); - goto skip_test; + return ret; } - highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); - nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); - lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1); - lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); + highest_perf = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1); + nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1); + lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1); + lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); } - if (highest_perf != READ_ONCE(cpudata->highest_perf) && !cpudata->hw_prefcore) { + cur_perf = READ_ONCE(cpudata->perf); + if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) { pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n", - __func__, cpu, highest_perf, cpudata->highest_perf); - goto skip_test; + __func__, cpu, highest_perf, cur_perf.highest_perf); + return -EINVAL; } - if ((nominal_perf != READ_ONCE(cpudata->nominal_perf)) || - (lowest_nonlinear_perf != READ_ONCE(cpudata->lowest_nonlinear_perf)) || - (lowest_perf != READ_ONCE(cpudata->lowest_perf))) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + if (nominal_perf != cur_perf.nominal_perf || + (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) || + (lowest_perf != cur_perf.lowest_perf)) { pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n", - __func__, cpu, nominal_perf, cpudata->nominal_perf, - lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf, - lowest_perf, cpudata->lowest_perf); - goto skip_test; + __func__, cpu, nominal_perf, cur_perf.nominal_perf, + lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf, + lowest_perf, cur_perf.lowest_perf); + return -EINVAL; } if (!((highest_perf >= nominal_perf) && (nominal_perf > lowest_nonlinear_perf) && - (lowest_nonlinear_perf > lowest_perf) && + (lowest_nonlinear_perf >= lowest_perf) && (lowest_perf > 0))) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n", __func__, cpu, highest_perf, nominal_perf, lowest_nonlinear_perf, lowest_perf); - goto skip_test; + return -EINVAL; } - cpufreq_cpu_put(policy); } - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - return; -skip_test: - cpufreq_cpu_put(policy); + return 0; } /* @@ -202,59 +186,50 @@ skip_test: * max_freq >= nominal_freq > lowest_nonlinear_freq > min_freq > 0 * check max freq when set support boost mode. */ -static void amd_pstate_ut_check_freq(u32 index) +static int amd_pstate_ut_check_freq(u32 index) { int cpu = 0; - struct cpufreq_policy *policy = NULL; - struct amd_cpudata *cpudata = NULL; - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { + struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; + struct amd_cpudata *cpudata; + policy = cpufreq_cpu_get(cpu); if (!policy) - break; + continue; cpudata = policy->driver_data; - if (!((cpudata->max_freq >= cpudata->nominal_freq) && + if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) && (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && - (cpudata->lowest_nonlinear_freq > cpudata->min_freq) && - (cpudata->min_freq > 0))) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + (cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) && + (policy->cpuinfo.min_freq > 0))) { pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", - __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, - cpudata->lowest_nonlinear_freq, cpudata->min_freq); - goto skip_test; + __func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq, + cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq); + return -EINVAL; } if (cpudata->lowest_nonlinear_freq != policy->min) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n", __func__, cpu, cpudata->lowest_nonlinear_freq, policy->min); - goto skip_test; + return -EINVAL; } if (cpudata->boost_supported) { - if ((policy->max == cpudata->max_freq) || - (policy->max == cpudata->nominal_freq)) - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + if ((policy->max != policy->cpuinfo.max_freq) && + (policy->max != cpudata->nominal_freq)) { pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", - __func__, cpu, policy->max, cpudata->max_freq, + __func__, cpu, policy->max, policy->cpuinfo.max_freq, cpudata->nominal_freq); - goto skip_test; + return -EINVAL; } } else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d must support boost!\n", __func__, cpu); - goto skip_test; + return -EINVAL; } - cpufreq_cpu_put(policy); } - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - return; -skip_test: - cpufreq_cpu_put(policy); + return 0; } static int amd_pstate_set_mode(enum amd_pstate_mode mode) @@ -266,15 +241,16 @@ static int amd_pstate_set_mode(enum amd_pstate_mode mode) return amd_pstate_update_status(mode_str, strlen(mode_str)); } -static void amd_pstate_ut_check_driver(u32 index) +static int amd_pstate_ut_check_driver(u32 index) { enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE; + enum amd_pstate_mode orig_mode = amd_pstate_get_status(); int ret; for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) { ret = amd_pstate_set_mode(mode1); if (ret) - goto out; + return ret; for (mode2 = AMD_PSTATE_DISABLE; mode2 < AMD_PSTATE_MAX; mode2++) { if (mode1 == mode2) continue; @@ -283,15 +259,15 @@ static void amd_pstate_ut_check_driver(u32 index) goto out; } } + out: if (ret) pr_warn("%s: failed to update status for %s->%s: %d\n", __func__, amd_pstate_get_mode_string(mode1), amd_pstate_get_mode_string(mode2), ret); - amd_pstate_ut_cases[index].result = ret ? - AMD_PSTATE_UT_RESULT_FAIL : - AMD_PSTATE_UT_RESULT_PASS; + amd_pstate_set_mode(orig_mode); + return ret; } static int __init amd_pstate_ut_init(void) @@ -299,16 +275,12 @@ static int __init amd_pstate_ut_init(void) u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases); for (i = 0; i < arr_size; i++) { - amd_pstate_ut_cases[i].func(i); - switch (amd_pstate_ut_cases[i].result) { - case AMD_PSTATE_UT_RESULT_PASS: + int ret = amd_pstate_ut_cases[i].func(i); + + if (ret) + pr_err("%-4d %-20s\t fail: %d!\n", i+1, amd_pstate_ut_cases[i].name, ret); + else pr_info("%-4d %-20s\t success!\n", i+1, amd_pstate_ut_cases[i].name); - break; - case AMD_PSTATE_UT_RESULT_FAIL: - default: - pr_info("%-4d %-20s\t fail!\n", i+1, amd_pstate_ut_cases[i].name); - break; - } } return 0; diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1b26845703f6..f3477ab37742 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -85,15 +85,9 @@ static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; -static bool cppc_enabled; static bool amd_pstate_prefcore = true; static struct quirk_entry *quirks; -#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) -#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) -#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) -#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) - /* * AMD Energy Preference Performance (EPP) * The EPP is used in the CCLK DPM controller to drive @@ -142,6 +136,19 @@ static struct quirk_entry quirk_amd_7k62 = { .lowest_freq = 550, }; +static inline u8 freq_to_perf(union perf_cached perf, u32 nominal_freq, unsigned int freq_val) +{ + u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * perf.nominal_perf, nominal_freq); + + return (u8)clamp(perf_val, perf.lowest_perf, perf.highest_perf); +} + +static inline u32 perf_to_freq(union perf_cached perf, u32 nominal_freq, u8 perf_val) +{ + return DIV_ROUND_UP_ULL((u64)nominal_freq * perf_val, + perf.nominal_perf); +} + static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) { /** @@ -183,7 +190,6 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) return -EINVAL; } -static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); static u8 msr_get_epp(struct amd_cpudata *cpudata) @@ -191,7 +197,7 @@ static u8 msr_get_epp(struct amd_cpudata *cpudata) u64 value; int ret; - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); + ret = rdmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); if (ret < 0) { pr_debug("Could not retrieve energy perf value (%d)\n", ret); return ret; @@ -221,9 +227,10 @@ static u8 shmem_get_epp(struct amd_cpudata *cpudata) return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp); } -static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf, +static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { + struct amd_cpudata *cpudata = policy->driver_data; u64 value, prev; value = prev = READ_ONCE(cpudata->cppc_req_cached); @@ -235,38 +242,50 @@ static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf, value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = READ_ONCE(cpudata->perf); + + trace_amd_pstate_epp_perf(cpudata->cpu, + perf.highest_perf, + epp, + min_perf, + max_perf, + policy->boost_enabled, + value != prev); + } + if (value == prev) return 0; if (fast_switch) { - wrmsrl(MSR_AMD_CPPC_REQ, value); + wrmsrq(MSR_AMD_CPPC_REQ, value); return 0; } else { - int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + int ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); if (ret) return ret; } WRITE_ONCE(cpudata->cppc_req_cached, value); - WRITE_ONCE(cpudata->epp_cached, epp); return 0; } DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); -static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, +static inline int amd_pstate_update_perf(struct cpufreq_policy *policy, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { - return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, + return static_call(amd_pstate_update_perf)(policy, min_perf, des_perf, max_perf, epp, fast_switch); } -static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) +static int msr_set_epp(struct cpufreq_policy *policy, u8 epp) { + struct amd_cpudata *cpudata = policy->driver_data; u64 value, prev; int ret; @@ -274,17 +293,29 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) value &= ~AMD_CPPC_EPP_PERF_MASK; value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = cpudata->perf; + + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, + epp, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, + cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, + cpudata->cppc_req_cached), + policy->boost_enabled, + value != prev); + } + if (value == prev) return 0; - ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + ret = wrmsrq_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); if (ret) { pr_err("failed to set energy perf value (%d)\n", ret); return ret; } /* update both so that msr_update_perf() can effectively check */ - WRITE_ONCE(cpudata->epp_cached, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; @@ -292,17 +323,35 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); -static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u8 epp) +static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp) { - return static_call(amd_pstate_set_epp)(cpudata, epp); + return static_call(amd_pstate_set_epp)(policy, epp); } -static int shmem_set_epp(struct amd_cpudata *cpudata, u8 epp) +static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) { - int ret; + struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; + u8 epp_cached; + u64 value; + int ret; + + + epp_cached = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = cpudata->perf; + + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, + epp, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, + cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, + cpudata->cppc_req_cached), + policy->boost_enabled, + epp != epp_cached); + } - if (epp == cpudata->epp_cached) + if (epp == epp_cached) return 0; perf_ctrls.energy_perf = epp; @@ -311,109 +360,39 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u8 epp) pr_debug("failed to set energy perf value (%d)\n", ret); return ret; } - WRITE_ONCE(cpudata->epp_cached, epp); + + value = READ_ONCE(cpudata->cppc_req_cached); + value &= ~AMD_CPPC_EPP_PERF_MASK; + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; } -static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, - int pref_index) +static inline int msr_cppc_enable(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - u8 epp; - - if (!pref_index) - epp = cpudata->epp_default; - else - epp = epp_values[pref_index]; - - if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { - pr_debug("EPP cannot be set under performance policy\n"); - return -EBUSY; - } - - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, - epp, - FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), - policy->boost_enabled); - } - - return amd_pstate_set_epp(cpudata, epp); + return wrmsrq_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1); } -static inline int msr_cppc_enable(bool enable) +static int shmem_cppc_enable(struct cpufreq_policy *policy) { - int ret, cpu; - unsigned long logical_proc_id_mask = 0; - - /* - * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared. - */ - if (!enable) - return 0; - - if (enable == cppc_enabled) - return 0; - - for_each_present_cpu(cpu) { - unsigned long logical_id = topology_logical_package_id(cpu); - - if (test_bit(logical_id, &logical_proc_id_mask)) - continue; - - set_bit(logical_id, &logical_proc_id_mask); - - ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE, - enable); - if (ret) - return ret; - } - - cppc_enabled = enable; - return 0; -} - -static int shmem_cppc_enable(bool enable) -{ - int cpu, ret = 0; - struct cppc_perf_ctrls perf_ctrls; - - if (enable == cppc_enabled) - return 0; - - for_each_present_cpu(cpu) { - ret = cppc_set_enable(cpu, enable); - if (ret) - return ret; - - /* Enable autonomous mode for EPP */ - if (cppc_state == AMD_PSTATE_ACTIVE) { - /* Set desired perf as zero to allow EPP firmware control */ - perf_ctrls.desired_perf = 0; - ret = cppc_set_perf(cpu, &perf_ctrls); - if (ret) - return ret; - } - } - - cppc_enabled = enable; - return ret; + return cppc_set_enable(policy->cpu, 1); } DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable); -static inline int amd_pstate_cppc_enable(bool enable) +static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy) { - return static_call(amd_pstate_cppc_enable)(enable); + return static_call(amd_pstate_cppc_enable)(policy); } static int msr_init_perf(struct amd_cpudata *cpudata) { - u64 cap1, numerator; + union perf_cached perf = READ_ONCE(cpudata->perf); + u64 cap1, numerator, cppc_req; + u8 min_perf; - int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, + int ret = rdmsrq_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &cap1); if (ret) return ret; @@ -422,20 +401,40 @@ static int msr_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - WRITE_ONCE(cpudata->highest_perf, numerator); - WRITE_ONCE(cpudata->max_limit_perf, numerator); - WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); - WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); - WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); - WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); - WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req); + if (ret) + return ret; + + WRITE_ONCE(cpudata->cppc_req_cached, cppc_req); + min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req); + + /* + * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an + * indication that the min_perf value is the one specified through the BIOS option + */ + cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK); + + if (!cppc_req) + perf.bios_min_perf = min_perf; + + perf.highest_perf = numerator; + perf.max_limit_perf = numerator; + perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); + perf.nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1); + perf.lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1); + perf.lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); + WRITE_ONCE(cpudata->perf, perf); + WRITE_ONCE(cpudata->prefcore_ranking, FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1)); + return 0; } static int shmem_init_perf(struct amd_cpudata *cpudata) { struct cppc_perf_caps cppc_perf; + union perf_cached perf = READ_ONCE(cpudata->perf); u64 numerator; + bool auto_sel; int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) @@ -445,19 +444,19 @@ static int shmem_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - WRITE_ONCE(cpudata->highest_perf, numerator); - WRITE_ONCE(cpudata->max_limit_perf, numerator); - WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); - WRITE_ONCE(cpudata->lowest_nonlinear_perf, - cppc_perf.lowest_nonlinear_perf); - WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + perf.highest_perf = numerator; + perf.max_limit_perf = numerator; + perf.min_limit_perf = cppc_perf.lowest_perf; + perf.nominal_perf = cppc_perf.nominal_perf; + perf.lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; + perf.lowest_perf = cppc_perf.lowest_perf; + WRITE_ONCE(cpudata->perf, perf); WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); - WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) return 0; - ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf); + ret = cppc_get_auto_sel(cpudata->cpu, &auto_sel); if (ret) { pr_warn("failed to get auto_sel, ret: %d\n", ret); return 0; @@ -479,23 +478,56 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, +static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { + struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; + u64 value, prev; + int ret; if (cppc_state == AMD_PSTATE_ACTIVE) { - int ret = shmem_set_epp(cpudata, epp); + int ret = shmem_set_epp(policy, epp); if (ret) return ret; } + value = prev = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = READ_ONCE(cpudata->perf); + + trace_amd_pstate_epp_perf(cpudata->cpu, + perf.highest_perf, + epp, + min_perf, + max_perf, + policy->boost_enabled, + value != prev); + } + + if (value == prev) + return 0; + perf_ctrls.max_perf = max_perf; perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; - return cppc_set_perf(cpudata->cpu, &perf_ctrls); + ret = cppc_set_perf(cpudata->cpu, &perf_ctrls); + if (ret) + return ret; + + WRITE_ONCE(cpudata->cppc_req_cached, value); + + return 0; } static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) @@ -504,8 +536,8 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) unsigned long flags; local_irq_save(flags); - rdmsrl(MSR_IA32_APERF, aperf); - rdmsrl(MSR_IA32_MPERF, mperf); + rdmsrq(MSR_IA32_APERF, aperf); + rdmsrq(MSR_IA32_MPERF, mperf); tsc = rdtsc(); if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { @@ -534,106 +566,103 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) { - unsigned long max_freq; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); - u8 nominal_perf = READ_ONCE(cpudata->nominal_perf); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); + union perf_cached perf = READ_ONCE(cpudata->perf); if (!policy) return; + /* limit the max perf when core performance boost feature is disabled */ + if (!cpudata->boost_supported) + max_perf = min_t(u8, perf.nominal_perf, max_perf); + des_perf = clamp_t(u8, des_perf, min_perf, max_perf); - max_freq = READ_ONCE(cpudata->max_limit_freq); - policy->cur = div_u64(des_perf * max_freq, max_perf); + policy->cur = perf_to_freq(perf, cpudata->nominal_freq, des_perf); if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { min_perf = des_perf; des_perf = 0; } - /* limit the max perf when core performance boost feature is disabled */ - if (!cpudata->boost_supported) - max_perf = min_t(u8, nominal_perf, max_perf); - if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, cpudata->cpu, fast_switch); } - amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); - - cpufreq_cpu_put(policy); + amd_pstate_update_perf(policy, min_perf, des_perf, max_perf, 0, fast_switch); } static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) { /* * Initialize lower frequency limit (i.e.policy->min) with - * lowest_nonlinear_frequency which is the most energy efficient - * frequency. Override the initial value set by cpufreq core and - * amd-pstate qos_requests. + * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS, + * Override the initial value set by cpufreq core and amd-pstate qos_requests. */ if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { - struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = + cpufreq_cpu_get(policy_data->cpu); struct amd_cpudata *cpudata; + union perf_cached perf; if (!policy) return -EINVAL; cpudata = policy->driver_data; - policy_data->min = cpudata->lowest_nonlinear_freq; - cpufreq_cpu_put(policy); + perf = READ_ONCE(cpudata->perf); + + if (perf.bios_min_perf) + policy_data->min = perf_to_freq(perf, cpudata->nominal_freq, + perf.bios_min_perf); + else + policy_data->min = cpudata->lowest_nonlinear_freq; } cpufreq_verify_within_cpu_limits(policy_data); - pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min); return 0; } -static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) +static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u8 max_limit_perf, min_limit_perf, max_perf; - u32 max_freq; struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); - max_perf = READ_ONCE(cpudata->highest_perf); - max_freq = READ_ONCE(cpudata->max_freq); - max_limit_perf = div_u64(policy->max * max_perf, max_freq); - min_limit_perf = div_u64(policy->min * max_perf, max_freq); - - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); - - WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); - WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); + perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max); WRITE_ONCE(cpudata->max_limit_freq, policy->max); - WRITE_ONCE(cpudata->min_limit_freq, policy->min); - return 0; + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { + perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf); + WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq)); + } else { + perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); + WRITE_ONCE(cpudata->min_limit_freq, policy->min); + } + + WRITE_ONCE(cpudata->perf, perf); } static int amd_pstate_update_freq(struct cpufreq_policy *policy, unsigned int target_freq, bool fast_switch) { struct cpufreq_freqs freqs; - struct amd_cpudata *cpudata = policy->driver_data; - u8 des_perf, cap_perf; + struct amd_cpudata *cpudata; + union perf_cached perf; + u8 des_perf; - if (!cpudata->max_freq) - return -ENODEV; + cpudata = policy->driver_data; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); - cap_perf = READ_ONCE(cpudata->highest_perf); + perf = READ_ONCE(cpudata->perf); freqs.old = policy->cur; freqs.new = target_freq; - des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, - cpudata->max_freq); + des_perf = freq_to_perf(perf, cpudata->nominal_freq, target_freq); WARN_ON(fast_switch && !policy->fast_switch_enabled); /* @@ -644,8 +673,8 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy, if (!fast_switch) cpufreq_freq_transition_begin(policy, &freqs); - amd_pstate_update(cpudata, cpudata->min_limit_perf, des_perf, - cpudata->max_limit_perf, fast_switch, + amd_pstate_update(cpudata, perf.min_limit_perf, des_perf, + perf.max_limit_perf, fast_switch, policy->governor->flags); if (!fast_switch) @@ -674,9 +703,10 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long target_perf, unsigned long capacity) { - u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + u8 max_perf, min_perf, des_perf, cap_perf; + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata; + union perf_cached perf; if (!policy) return; @@ -686,8 +716,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); - cap_perf = READ_ONCE(cpudata->highest_perf); - min_limit_perf = READ_ONCE(cpudata->min_limit_perf); + perf = READ_ONCE(cpudata->perf); + cap_perf = perf.highest_perf; des_perf = cap_perf; if (target_perf < capacity) @@ -698,28 +728,26 @@ static void amd_pstate_adjust_perf(unsigned int cpu, else min_perf = cap_perf; - if (min_perf < min_limit_perf) - min_perf = min_limit_perf; + if (min_perf < perf.min_limit_perf) + min_perf = perf.min_limit_perf; - max_perf = cpudata->max_limit_perf; + max_perf = perf.max_limit_perf; if (max_perf < min_perf) max_perf = min_perf; - des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, policy->governor->flags); - cpufreq_cpu_put(policy); } static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) { struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); u32 nominal_freq, max_freq; int ret = 0; nominal_freq = READ_ONCE(cpudata->nominal_freq); - max_freq = READ_ONCE(cpudata->max_freq); + max_freq = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf); if (on) policy->cpuinfo.max_freq = max_freq; @@ -746,7 +774,6 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) pr_err("Boost mode is not supported by this processor or SBIOS\n"); return -EOPNOTSUPP; } - guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_cpu_boost_update(policy, state); refresh_frequency_limits(policy); @@ -769,7 +796,7 @@ static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) goto exit_err; } - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); + ret = rdmsrq_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); if (ret) { pr_err_once("failed to read initial CPU boost state!\n"); ret = -EIO; @@ -788,18 +815,8 @@ exit_err: static void amd_perf_ctl_reset(unsigned int cpu) { - wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); -} - -/* - * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks - * due to locking, so queue the work for later. - */ -static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) -{ - sched_set_itmt_support(); + wrmsrq_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); } -static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); #define CPPC_MAX_PERF U8_MAX @@ -811,54 +828,35 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) cpudata->hw_prefcore = true; - /* - * The priorities can be set regardless of whether or not - * sched_set_itmt_support(true) has been called and it is valid to - * update them at any time after it has been called. - */ + /* Priorities must be initialized before ITMT support can be toggled on. */ sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu); - - schedule_work(&sched_prefcore_work); } -static void amd_pstate_update_limits(unsigned int cpu) +static void amd_pstate_update_limits(struct cpufreq_policy *policy) { - struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata; u32 prev_high = 0, cur_high = 0; - int ret; bool highest_perf_changed = false; + unsigned int cpu = policy->cpu; if (!amd_pstate_prefcore) return; - policy = cpufreq_cpu_get(cpu); - if (!policy) + if (amd_get_highest_perf(cpu, &cur_high)) return; cpudata = policy->driver_data; - guard(mutex)(&amd_pstate_driver_lock); - - ret = amd_get_highest_perf(cpu, &cur_high); - if (ret) { - cpufreq_cpu_put(policy); - return; - } - prev_high = READ_ONCE(cpudata->prefcore_ranking); highest_perf_changed = (prev_high != cur_high); if (highest_perf_changed) { WRITE_ONCE(cpudata->prefcore_ranking, cur_high); - if (cur_high < CPPC_MAX_PERF) + if (cur_high < CPPC_MAX_PERF) { sched_set_itmt_core_prio((int)cur_high, cpu); + sched_update_asym_prefer_cpu(cpu, prev_high, cur_high); + } } - cpufreq_cpu_put(policy); - - if (!highest_perf_changed) - cpufreq_update_policy(cpu); - } /* @@ -896,48 +894,45 @@ static u32 amd_pstate_get_transition_latency(unsigned int cpu) } /* - * amd_pstate_init_freq: Initialize the max_freq, min_freq, - * nominal_freq and lowest_nonlinear_freq for - * the @cpudata object. + * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq + * for the @cpudata object. * - * Requires: highest_perf, lowest_perf, nominal_perf and - * lowest_nonlinear_perf members of @cpudata to be - * initialized. + * Requires: all perf members of @cpudata to be initialized. * - * Returns 0 on success, non-zero value on failure. + * Returns 0 on success, non-zero value on failure. */ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { - int ret; - u32 min_freq, max_freq; - u8 highest_perf, nominal_perf, lowest_nonlinear_perf; - u32 nominal_freq, lowest_nonlinear_freq; + u32 min_freq, max_freq, nominal_freq, lowest_nonlinear_freq; struct cppc_perf_caps cppc_perf; + union perf_cached perf; + int ret; ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret; - - if (quirks && quirks->lowest_freq) - min_freq = quirks->lowest_freq; - else - min_freq = cppc_perf.lowest_freq; + perf = READ_ONCE(cpudata->perf); if (quirks && quirks->nominal_freq) nominal_freq = quirks->nominal_freq; else nominal_freq = cppc_perf.nominal_freq; + nominal_freq *= 1000; + + if (quirks && quirks->lowest_freq) { + min_freq = quirks->lowest_freq; + perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq); + WRITE_ONCE(cpudata->perf, perf); + } else + min_freq = cppc_perf.lowest_freq; - highest_perf = READ_ONCE(cpudata->highest_perf); - nominal_perf = READ_ONCE(cpudata->nominal_perf); - max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf); + min_freq *= 1000; - lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); - lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf); - WRITE_ONCE(cpudata->min_freq, min_freq * 1000); - WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); - WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); - WRITE_ONCE(cpudata->max_freq, max_freq * 1000); + WRITE_ONCE(cpudata->nominal_freq, nominal_freq); + + max_freq = perf_to_freq(perf, nominal_freq, perf.highest_perf); + lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf); + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); /** * Below values need to be initialized correctly, otherwise driver will fail to load @@ -962,9 +957,10 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, ret; - struct device *dev; struct amd_cpudata *cpudata; + union perf_cached perf; + struct device *dev; + int ret; /* * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, @@ -995,19 +991,23 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) if (ret) goto free_cpudata1; - min_freq = READ_ONCE(cpudata->min_freq); - max_freq = READ_ONCE(cpudata->max_freq); - policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); - policy->min = min_freq; - policy->max = max_freq; + perf = READ_ONCE(cpudata->perf); - policy->cpuinfo.min_freq = min_freq; - policy->cpuinfo.max_freq = max_freq; + policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, + cpudata->nominal_freq, + perf.lowest_perf); + policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, + cpudata->nominal_freq, + perf.highest_perf); - policy->boost_enabled = READ_ONCE(cpudata->boost_supported); + ret = amd_pstate_cppc_enable(policy); + if (ret) + goto free_cpudata1; + + policy->boost_supported = READ_ONCE(cpudata->boost_supported); /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; @@ -1029,9 +1029,6 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) goto free_cpudata2; } - cpudata->max_limit_freq = max_freq; - cpudata->min_limit_freq = min_freq; - policy->driver_data = cpudata; if (!current_pstate_driver->adjust_perf) @@ -1042,6 +1039,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) free_cpudata2: freq_qos_remove_request(&cpudata->req[0]); free_cpudata1: + pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret); kfree(cpudata); return ret; } @@ -1049,6 +1047,10 @@ free_cpudata1: static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); + + /* Reset CPPC_REQ MSR to the BIOS value */ + amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); freq_qos_remove_request(&cpudata->req[1]); freq_qos_remove_request(&cpudata->req[0]); @@ -1056,28 +1058,6 @@ static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) kfree(cpudata); } -static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) -{ - int ret; - - ret = amd_pstate_cppc_enable(true); - if (ret) - pr_err("failed to enable amd-pstate during resume, return %d\n", ret); - - return ret; -} - -static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) -{ - int ret; - - ret = amd_pstate_cppc_enable(false); - if (ret) - pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); - - return ret; -} - /* Sysfs attributes */ /* @@ -1088,27 +1068,27 @@ static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, char *buf) { - int max_freq; - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; + union perf_cached perf; - max_freq = READ_ONCE(cpudata->max_freq); - if (max_freq < 0) - return max_freq; + cpudata = policy->driver_data; + perf = READ_ONCE(cpudata->perf); - return sysfs_emit(buf, "%u\n", max_freq); + return sysfs_emit(buf, "%u\n", + perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf)); } static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, char *buf) { - int freq; - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; + union perf_cached perf; - freq = READ_ONCE(cpudata->lowest_nonlinear_freq); - if (freq < 0) - return freq; + cpudata = policy->driver_data; + perf = READ_ONCE(cpudata->perf); - return sysfs_emit(buf, "%u\n", freq); + return sysfs_emit(buf, "%u\n", + perf_to_freq(perf, cpudata->nominal_freq, perf.lowest_nonlinear_perf)); } /* @@ -1118,12 +1098,11 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, char *buf) { - u8 perf; - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; - perf = READ_ONCE(cpudata->highest_perf); + cpudata = policy->driver_data; - return sysfs_emit(buf, "%u\n", perf); + return sysfs_emit(buf, "%u\n", cpudata->perf.highest_perf); } static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, @@ -1170,8 +1149,10 @@ static ssize_t show_energy_performance_available_preferences( static ssize_t store_energy_performance_preference( struct cpufreq_policy *policy, const char *buf, size_t count) { + struct amd_cpudata *cpudata = policy->driver_data; char str_preference[21]; ssize_t ret; + u8 epp; ret = sscanf(buf, "%20s", str_preference); if (ret != 1) @@ -1181,9 +1162,17 @@ static ssize_t store_energy_performance_preference( if (ret < 0) return -EINVAL; - guard(mutex)(&amd_pstate_limits_lock); + if (!ret) + epp = cpudata->epp_default; + else + epp = epp_values[ret]; + + if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + pr_debug("EPP cannot be set under performance policy\n"); + return -EBUSY; + } - ret = amd_pstate_set_energy_pref_index(policy, ret); + ret = amd_pstate_set_epp(policy, epp); return ret ? ret : count; } @@ -1192,9 +1181,11 @@ static ssize_t show_energy_performance_preference( struct cpufreq_policy *policy, char *buf) { struct amd_cpudata *cpudata = policy->driver_data; - u8 preference; + u8 preference, epp; - switch (cpudata->epp_cached) { + epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); + + switch (epp) { case AMD_CPPC_EPP_PERFORMANCE: preference = EPP_INDEX_PERFORMANCE; break; @@ -1216,7 +1207,9 @@ static ssize_t show_energy_performance_preference( static void amd_pstate_driver_cleanup(void) { - amd_pstate_cppc_enable(false); + if (amd_pstate_prefcore) + sched_clear_itmt_support(); + cppc_state = AMD_PSTATE_DISABLE; current_pstate_driver = NULL; } @@ -1250,14 +1243,6 @@ static int amd_pstate_register_driver(int mode) cppc_state = mode; - ret = amd_pstate_cppc_enable(true); - if (ret) { - pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n", - ret); - amd_pstate_driver_cleanup(); - return ret; - } - /* at least one CPU supports CPB */ current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); @@ -1267,6 +1252,10 @@ static int amd_pstate_register_driver(int mode) return ret; } + /* Enable ITMT support once all CPUs have initialized their asym priorities. */ + if (amd_pstate_prefcore) + sched_set_itmt_support(); + return 0; } @@ -1343,6 +1332,12 @@ static ssize_t amd_pstate_show_status(char *buf) return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); } +int amd_pstate_get_status(void) +{ + return cppc_state; +} +EXPORT_SYMBOL_GPL(amd_pstate_get_status); + int amd_pstate_update_status(const char *buf, size_t size) { int mode_idx; @@ -1355,8 +1350,10 @@ int amd_pstate_update_status(const char *buf, size_t size) if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX) return -EINVAL; - if (mode_state_machine[cppc_state][mode_idx]) + if (mode_state_machine[cppc_state][mode_idx]) { + guard(mutex)(&amd_pstate_driver_lock); return mode_state_machine[cppc_state][mode_idx](mode_idx); + } return 0; } @@ -1377,7 +1374,6 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, char *p = memchr(buf, '\n', count); int ret; - guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_update_status(buf, p ? p - buf : count); return ret < 0 ? ret : count; @@ -1453,10 +1449,10 @@ static bool amd_pstate_acpi_pm_profile_undefined(void) static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, ret; struct amd_cpudata *cpudata; + union perf_cached perf; struct device *dev; - u64 value; + int ret; /* * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, @@ -1487,20 +1483,25 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) if (ret) goto free_cpudata1; - min_freq = READ_ONCE(cpudata->min_freq); - max_freq = READ_ONCE(cpudata->max_freq); + perf = READ_ONCE(cpudata->perf); + + policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, + cpudata->nominal_freq, + perf.lowest_perf); + policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, + cpudata->nominal_freq, + perf.highest_perf); + policy->driver_data = cpudata; + + ret = amd_pstate_cppc_enable(policy); + if (ret) + goto free_cpudata1; - policy->cpuinfo.min_freq = min_freq; - policy->cpuinfo.max_freq = max_freq; /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; - policy->driver_data = cpudata; - - policy->min = policy->cpuinfo.min_freq; - policy->max = policy->cpuinfo.max_freq; - policy->boost_enabled = READ_ONCE(cpudata->boost_supported); + policy->boost_supported = READ_ONCE(cpudata->boost_supported); /* * Set the policy to provide a valid fallback value in case @@ -1515,18 +1516,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; } - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); - if (ret) - return ret; - WRITE_ONCE(cpudata->cppc_req_cached, value); - - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); - if (ret) - return ret; - WRITE_ONCE(cpudata->cppc_cap1_cached, value); - } - ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); + ret = amd_pstate_set_epp(policy, cpudata->epp_default); if (ret) return ret; @@ -1535,6 +1525,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return 0; free_cpudata1: + pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret); kfree(cpudata); return ret; } @@ -1544,6 +1535,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata) { + union perf_cached perf = READ_ONCE(cpudata->perf); + + /* Reset CPPC_REQ MSR to the BIOS value */ + amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); + kfree(cpudata); policy->driver_data = NULL; } @@ -1554,24 +1550,21 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf; u8 epp; - amd_pstate_update_min_max_limit(policy); + if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) + amd_pstate_update_min_max_limit(policy); if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) epp = 0; else - epp = READ_ONCE(cpudata->epp_cached); + epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, - cpudata->min_limit_perf, - cpudata->max_limit_perf, - policy->boost_enabled); - } + perf = READ_ONCE(cpudata->perf); - return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, epp, false); + return amd_pstate_update_perf(policy, perf.min_limit_perf, 0U, + perf.max_limit_perf, epp, false); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) @@ -1582,9 +1575,6 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) if (!policy->cpuinfo.max_freq) return -ENODEV; - pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", - policy->cpuinfo.max_freq, policy->max); - cpudata->policy = policy->policy; ret = amd_pstate_epp_update_limit(policy); @@ -1600,73 +1590,38 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) -{ - struct amd_cpudata *cpudata = policy->driver_data; - u8 max_perf; - int ret; - - ret = amd_pstate_cppc_enable(true); - if (ret) - pr_err("failed to enable amd pstate during resume, return %d\n", ret); - - max_perf = READ_ONCE(cpudata->highest_perf); - - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, - cpudata->epp_cached, - FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - max_perf, policy->boost_enabled); - } - - return amd_pstate_epp_update_limit(policy); -} - -static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) +static int amd_pstate_cpu_online(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - int ret; - - pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - - ret = amd_pstate_epp_reenable(policy); - if (ret) - return ret; - cpudata->suspended = false; - - return 0; + return amd_pstate_cppc_enable(policy); } -static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) +static int amd_pstate_cpu_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u8 min_perf; - - if (cpudata->suspended) - return 0; - - min_perf = READ_ONCE(cpudata->lowest_perf); - - guard(mutex)(&amd_pstate_limits_lock); - - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, - AMD_CPPC_EPP_BALANCE_POWERSAVE, - min_perf, min_perf, policy->boost_enabled); - } + union perf_cached perf = READ_ONCE(cpudata->perf); - return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, - AMD_CPPC_EPP_BALANCE_POWERSAVE, false); + /* + * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified + * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the + * limits, epp and desired perf will get reset to the cached values in cpudata struct + */ + return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); } -static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) +static int amd_pstate_suspend(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); int ret; - /* avoid suspending when EPP is not enabled */ - if (cppc_state != AMD_PSTATE_ACTIVE) - return 0; + /* + * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified + * min_perf value across kexec reboots. If this CPU is just resumed back without kexec, + * the limits, epp and desired perf will get reset to the cached values in cpudata struct + */ + ret = amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); + if (ret) + return ret; /* invalidate to ensure it's rewritten during resume */ cpudata->cppc_req_cached = 0; @@ -1674,23 +1629,31 @@ static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) /* set this flag to avoid setting core offline*/ cpudata->suspended = true; - /* disable CPPC in lowlevel firmware */ - ret = amd_pstate_cppc_enable(false); - if (ret) - pr_err("failed to suspend, return %d\n", ret); - return 0; } +static int amd_pstate_resume(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); + int cur_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->cur); + + /* Set CPPC_REQ to last sane value until the governor updates it */ + return amd_pstate_update_perf(policy, perf.min_limit_perf, cur_perf, perf.max_limit_perf, + 0U, false); +} + static int amd_pstate_epp_resume(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; if (cpudata->suspended) { - guard(mutex)(&amd_pstate_limits_lock); + int ret; /* enable amd pstate from suspend state*/ - amd_pstate_epp_reenable(policy); + ret = amd_pstate_epp_update_limit(policy); + if (ret) + return ret; cpudata->suspended = false; } @@ -1705,8 +1668,10 @@ static struct cpufreq_driver amd_pstate_driver = { .fast_switch = amd_pstate_fast_switch, .init = amd_pstate_cpu_init, .exit = amd_pstate_cpu_exit, - .suspend = amd_pstate_cpu_suspend, - .resume = amd_pstate_cpu_resume, + .online = amd_pstate_cpu_online, + .offline = amd_pstate_cpu_offline, + .suspend = amd_pstate_suspend, + .resume = amd_pstate_resume, .set_boost = amd_pstate_set_boost, .update_limits = amd_pstate_update_limits, .name = "amd-pstate", @@ -1719,9 +1684,9 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .setpolicy = amd_pstate_epp_set_policy, .init = amd_pstate_epp_cpu_init, .exit = amd_pstate_epp_cpu_exit, - .offline = amd_pstate_epp_cpu_offline, - .online = amd_pstate_epp_cpu_online, - .suspend = amd_pstate_epp_suspend, + .offline = amd_pstate_cpu_offline, + .online = amd_pstate_cpu_online, + .suspend = amd_pstate_suspend, .resume = amd_pstate_epp_resume, .update_limits = amd_pstate_update_limits, .set_boost = amd_pstate_set_boost, @@ -1873,7 +1838,6 @@ static int __init amd_pstate_init(void) global_attr_free: cpufreq_unregister_driver(current_pstate_driver); - amd_pstate_cppc_enable(false); return ret; } device_initcall(amd_pstate_init); diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index 19d405c6d805..cb45fdca27a6 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -13,6 +13,38 @@ /********************************************************************* * AMD P-state INTERFACE * *********************************************************************/ + +/** + * union perf_cached - A union to cache performance-related data. + * @highest_perf: the maximum performance an individual processor may reach, + * assuming ideal conditions + * For platforms that support the preferred core feature, the highest_perf value maybe + * configured to any value in the range 166-255 by the firmware (because the preferred + * core ranking is encoded in the highest_perf value). To maintain consistency across + * all platforms, we split the highest_perf and preferred core ranking values into + * cpudata->perf.highest_perf and cpudata->prefcore_ranking. + * @nominal_perf: the maximum sustained performance level of the processor, + * assuming ideal operating conditions + * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power + * savings are achieved + * @lowest_perf: the absolute lowest performance level of the processor + * @min_limit_perf: Cached value of the performance corresponding to policy->min + * @max_limit_perf: Cached value of the performance corresponding to policy->max + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option + */ +union perf_cached { + struct { + u8 highest_perf; + u8 nominal_perf; + u8 lowest_nonlinear_perf; + u8 lowest_perf; + u8 min_limit_perf; + u8 max_limit_perf; + u8 bios_min_perf; + }; + u64 val; +}; + /** * struct amd_aperf_mperf * @aperf: actual performance frequency clock count @@ -30,24 +62,11 @@ struct amd_aperf_mperf { * @cpu: CPU number * @req: constraint request to apply * @cppc_req_cached: cached performance request hints - * @highest_perf: the maximum performance an individual processor may reach, - * assuming ideal conditions - * For platforms that do not support the preferred core feature, the - * highest_pef may be configured with 166 or 255, to avoid max frequency - * calculated wrongly. we take the fixed value as the highest_perf. - * @nominal_perf: the maximum sustained performance level of the processor, - * assuming ideal operating conditions - * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power - * savings are achieved - * @lowest_perf: the absolute lowest performance level of the processor + * @perf: cached performance-related data * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher * priority. - * @min_limit_perf: Cached value of the performance corresponding to policy->min - * @max_limit_perf: Cached value of the performance corresponding to policy->max * @min_limit_freq: Cached value of policy->min (in khz) * @max_limit_freq: Cached value of policy->max (in khz) - * @max_freq: the frequency (in khz) that mapped to highest_perf - * @min_freq: the frequency (in khz) that mapped to lowest_perf * @nominal_freq: the frequency (in khz) that mapped to nominal_perf * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf * @cur: Difference of Aperf/Mperf/tsc count between last and current sample @@ -59,7 +78,6 @@ struct amd_aperf_mperf { * AMD P-State driver supports preferred core featue. * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value - * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value * * The amd_cpudata is key private data for each CPU thread in AMD P-State, and * represents all the attributes and goals that AMD P-State requests at runtime. @@ -70,18 +88,11 @@ struct amd_cpudata { struct freq_qos_request req[2]; u64 cppc_req_cached; - u8 highest_perf; - u8 nominal_perf; - u8 lowest_nonlinear_perf; - u8 lowest_perf; - u8 prefcore_ranking; - u8 min_limit_perf; - u8 max_limit_perf; - u32 min_limit_freq; - u32 max_limit_freq; + union perf_cached perf; - u32 max_freq; - u32 min_freq; + u8 prefcore_ranking; + u32 min_limit_freq; + u32 max_limit_freq; u32 nominal_freq; u32 lowest_nonlinear_freq; @@ -93,9 +104,7 @@ struct amd_cpudata { bool hw_prefcore; /* EPP feature related attributes*/ - u8 epp_cached; u32 policy; - u64 cppc_cap1_cached; bool suspended; u8 epp_default; }; @@ -112,6 +121,7 @@ enum amd_pstate_mode { AMD_PSTATE_MAX, }; const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode); +int amd_pstate_get_status(void); int amd_pstate_update_status(const char *buf, size_t size); #endif /* _LINUX_AMD_PSTATE_H */ diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 59b19b9975e8..13fed4b9e02b 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -129,7 +129,7 @@ static int __init amd_freq_sensitivity_init(void) pci_dev_put(pcidev); } - if (rdmsrl_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val)) + if (rdmsrq_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val)) return -ENODEV; if (!(val >> CLASS_CODE_SHIFT)) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index 269b18c62d04..b1d29b7af232 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -134,11 +134,17 @@ static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = { static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct apple_cpu_priv *priv = policy->driver_data; + struct cpufreq_policy *policy; + struct apple_cpu_priv *priv; struct cpufreq_frequency_table *p; unsigned int pstate; + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + if (priv->info->cur_pstate_mask) { u32 reg = readl_relaxed(priv->reg_base + APPLE_DVFS_STATUS); @@ -229,12 +235,6 @@ static int apple_soc_cpufreq_find_cluster(struct cpufreq_policy *policy, return 0; } -static struct freq_attr *apple_soc_cpufreq_hw_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, /* Filled in below if boost is enabled */ - NULL, -}; - static int apple_soc_cpufreq_init(struct cpufreq_policy *policy) { int ret, i; @@ -316,16 +316,6 @@ static int apple_soc_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = true; policy->suspend_freq = freq_table[0].frequency; - if (policy_has_boost_freq(policy)) { - ret = cpufreq_enable_boost_support(); - if (ret) { - dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); - } else { - apple_soc_cpufreq_hw_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; - apple_soc_cpufreq_driver.boost_enabled = true; - } - } - return 0; out_free_cpufreq_table: @@ -360,7 +350,7 @@ static struct cpufreq_driver apple_soc_cpufreq_driver = { .target_index = apple_soc_cpufreq_set_target, .fast_switch = apple_soc_cpufreq_fast_switch, .register_em = cpufreq_register_em_with_opp, - .attr = apple_soc_cpufreq_hw_attr, + .set_boost = cpufreq_boost_set_sw, .suspend = cpufreq_generic_suspend, }; diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index bea41ccabf1f..f28a4435fba7 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -102,11 +102,7 @@ struct armada_37xx_dvfs { }; static struct armada_37xx_dvfs armada_37xx_dvfs[] = { - /* - * The cpufreq scaling for 1.2 GHz variant of the SOC is currently - * unstable because we do not know how to configure it properly. - */ - /* {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, */ + {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, {.cpu_freq_max = 1000*1000*1000, .divider = {1, 2, 4, 5} }, {.cpu_freq_max = 800*1000*1000, .divider = {1, 2, 3, 4} }, {.cpu_freq_max = 600*1000*1000, .divider = {2, 4, 5, 6} }, diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index 5a3545bd0d8d..006f4c554dd7 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -132,7 +132,7 @@ static int __init armada_8k_cpufreq_init(void) int ret = 0, opps_index = 0, cpu, nb_cpus; struct freq_table *freq_tables; struct device_node *node; - static struct cpumask cpus; + static struct cpumask cpus, shared_cpus; node = of_find_matching_node_and_match(NULL, armada_8k_cpufreq_of_match, NULL); @@ -154,7 +154,6 @@ static int __init armada_8k_cpufreq_init(void) * divisions of it). */ for_each_cpu(cpu, &cpus) { - struct cpumask shared_cpus; struct device *cpu_dev; struct clk *clk; diff --git a/drivers/cpufreq/bmips-cpufreq.c b/drivers/cpufreq/bmips-cpufreq.c index 17a4c174553d..36051880640b 100644 --- a/drivers/cpufreq/bmips-cpufreq.c +++ b/drivers/cpufreq/bmips-cpufreq.c @@ -150,7 +150,6 @@ static struct cpufreq_driver bmips_cpufreq_driver = { .get = bmips_cpufreq_get, .init = bmips_cpufreq_init, .exit = bmips_cpufreq_exit, - .attr = cpufreq_generic_attr, .name = BMIPS_CPUFREQ_PREFIX, }; diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 2fd0f6be6fa3..7b841a086acc 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -720,7 +720,6 @@ cpufreq_freq_attr_ro(brcm_avs_voltage); cpufreq_freq_attr_ro(brcm_avs_frequency); static struct freq_attr *brcm_avs_cpufreq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, &brcm_avs_pstate, &brcm_avs_mode, &brcm_avs_pmap, diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 8f512448382f..c8e63c7aa9a3 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -34,8 +34,6 @@ */ static LIST_HEAD(cpu_data_list); -static bool boost_supported; - static struct cpufreq_driver cppc_cpufreq_driver; #ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE @@ -653,7 +651,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * is supported. */ if (caps->highest_perf > caps->nominal_perf) - boost_supported = true; + policy->boost_supported = true; /* Set policy->cur to max now. The governors will adjust later. */ policy->cur = cppc_perf_to_khz(caps, caps->highest_perf); @@ -749,7 +747,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) int ret; if (!policy) - return -ENODEV; + return 0; cpu_data = policy->driver_data; @@ -791,11 +789,6 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) struct cppc_perf_caps *caps = &cpu_data->perf_caps; int ret; - if (!boost_supported) { - pr_err("BOOST not supported by CPU or firmware\n"); - return -EINVAL; - } - if (state) policy->max = cppc_perf_to_khz(caps, caps->highest_perf); else @@ -815,15 +808,124 @@ static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf) return cpufreq_show_cpus(cpu_data->shared_cpu_map, buf); } + +static ssize_t show_auto_select(struct cpufreq_policy *policy, char *buf) +{ + bool val; + int ret; + + ret = cppc_get_auto_sel(policy->cpu, &val); + + /* show "<unsupported>" when this register is not supported by cpc */ + if (ret == -EOPNOTSUPP) + return sysfs_emit(buf, "<unsupported>\n"); + + if (ret) + return ret; + + return sysfs_emit(buf, "%d\n", val); +} + +static ssize_t store_auto_select(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + bool val; + int ret; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + ret = cppc_set_auto_sel(policy->cpu, val); + if (ret) + return ret; + + return count; +} + +static ssize_t show_auto_act_window(struct cpufreq_policy *policy, char *buf) +{ + u64 val; + int ret; + + ret = cppc_get_auto_act_window(policy->cpu, &val); + + /* show "<unsupported>" when this register is not supported by cpc */ + if (ret == -EOPNOTSUPP) + return sysfs_emit(buf, "<unsupported>\n"); + + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", val); +} + +static ssize_t store_auto_act_window(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + u64 usec; + int ret; + + ret = kstrtou64(buf, 0, &usec); + if (ret) + return ret; + + ret = cppc_set_auto_act_window(policy->cpu, usec); + if (ret) + return ret; + + return count; +} + +static ssize_t show_energy_performance_preference_val(struct cpufreq_policy *policy, char *buf) +{ + u64 val; + int ret; + + ret = cppc_get_epp_perf(policy->cpu, &val); + + /* show "<unsupported>" when this register is not supported by cpc */ + if (ret == -EOPNOTSUPP) + return sysfs_emit(buf, "<unsupported>\n"); + + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", val); +} + +static ssize_t store_energy_performance_preference_val(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + u64 val; + int ret; + + ret = kstrtou64(buf, 0, &val); + if (ret) + return ret; + + ret = cppc_set_epp(policy->cpu, val); + if (ret) + return ret; + + return count; +} + cpufreq_freq_attr_ro(freqdomain_cpus); +cpufreq_freq_attr_rw(auto_select); +cpufreq_freq_attr_rw(auto_act_window); +cpufreq_freq_attr_rw(energy_performance_preference_val); static struct freq_attr *cppc_cpufreq_attr[] = { &freqdomain_cpus, + &auto_select, + &auto_act_window, + &energy_performance_preference_val, NULL, }; static struct cpufreq_driver cppc_cpufreq_driver = { - .flags = CPUFREQ_CONST_LOOPS, + .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, .verify = cppc_verify_policy, .target = cppc_cpufreq_set_target, .get = cppc_cpufreq_get_rate, diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 2aa00769cf09..a010da0f6337 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -175,6 +175,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sm8350", }, { .compatible = "qcom,sm8450", }, { .compatible = "qcom,sm8550", }, + { .compatible = "qcom,sm8650", }, { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index f3913eea5e55..e80dd982a3e2 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -36,12 +36,6 @@ struct private_data { static LIST_HEAD(priv_list); -static struct freq_attr *cpufreq_dt_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, /* Extra space for boost-attr if required */ - NULL, -}; - static struct private_data *cpufreq_dt_find_data(int cpu) { struct private_data *priv; @@ -120,21 +114,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = transition_latency; policy->dvfs_possible_from_any_cpu = true; - /* Support turbo/boost mode */ - if (policy_has_boost_freq(policy)) { - /* This gets disabled by core on driver unregister */ - ret = cpufreq_enable_boost_support(); - if (ret) - goto out_clk_put; - cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; - } - return 0; - -out_clk_put: - clk_put(cpu_clk); - - return ret; } static int cpufreq_online(struct cpufreq_policy *policy) @@ -169,7 +149,7 @@ static struct cpufreq_driver dt_cpufreq_driver = { .offline = cpufreq_offline, .register_em = cpufreq_register_em_with_opp, .name = "cpufreq-dt", - .attr = cpufreq_dt_attr, + .set_boost = cpufreq_boost_set_sw, .suspend = cpufreq_generic_suspend, }; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 934e0e19824c..628f5b633b61 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -88,6 +88,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_governor *new_gov, unsigned int new_pol); static bool cpufreq_boost_supported(void); +static int cpufreq_boost_trigger_state(int state); /* * Two notifier lists: the "policy" list is involved in the @@ -254,51 +255,6 @@ void cpufreq_cpu_put(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(cpufreq_cpu_put); -/** - * cpufreq_cpu_release - Unlock a policy and decrement its usage counter. - * @policy: cpufreq policy returned by cpufreq_cpu_acquire(). - */ -void cpufreq_cpu_release(struct cpufreq_policy *policy) -{ - if (WARN_ON(!policy)) - return; - - lockdep_assert_held(&policy->rwsem); - - up_write(&policy->rwsem); - - cpufreq_cpu_put(policy); -} - -/** - * cpufreq_cpu_acquire - Find policy for a CPU, mark it as busy and lock it. - * @cpu: CPU to find the policy for. - * - * Call cpufreq_cpu_get() to get a reference on the cpufreq policy for @cpu and - * if the policy returned by it is not NULL, acquire its rwsem for writing. - * Return the policy if it is active or release it and return NULL otherwise. - * - * The policy returned by this function has to be released with the help of - * cpufreq_cpu_release() in order to release its rwsem and balance its usage - * counter properly. - */ -struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) -{ - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - - if (!policy) - return NULL; - - down_write(&policy->rwsem); - - if (policy_is_inactive(policy)) { - cpufreq_cpu_release(policy); - return NULL; - } - - return policy; -} - /********************************************************************* * EXTERNALLY AFFECTING FREQUENCY CHANGES * *********************************************************************/ @@ -535,16 +491,18 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); static unsigned int __resolve_freq(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) + unsigned int target_freq, + unsigned int min, unsigned int max, + unsigned int relation) { unsigned int idx; - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (!policy->freq_table) return target_freq; - idx = cpufreq_frequency_table_target(policy, target_freq, relation); + idx = cpufreq_frequency_table_target(policy, target_freq, min, max, relation); policy->cached_resolved_idx = idx; policy->cached_target_freq = target_freq; return policy->freq_table[idx].frequency; @@ -564,7 +522,21 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq) { - return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE); + unsigned int min = READ_ONCE(policy->min); + unsigned int max = READ_ONCE(policy->max); + + /* + * If this function runs in parallel with cpufreq_set_policy(), it may + * read policy->min before the update and policy->max after the update + * or the other way around, so there is no ordering guarantee. + * + * Resolve this by always honoring the max (in case it comes from + * thermal throttling or similar). + */ + if (unlikely(min > max)) + min = max; + + return __resolve_freq(policy, target_freq, min, max, CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -619,6 +591,22 @@ static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf) return sysfs_emit(buf, "%d\n", policy->boost_enabled); } +static int policy_set_boost(struct cpufreq_policy *policy, bool enable) +{ + int ret; + + if (policy->boost_enabled == enable) + return 0; + + policy->boost_enabled = enable; + + ret = cpufreq_driver->set_boost(policy, enable); + if (ret) + policy->boost_enabled = !policy->boost_enabled; + + return ret; +} + static ssize_t store_local_boost(struct cpufreq_policy *policy, const char *buf, size_t count) { @@ -631,21 +619,14 @@ static ssize_t store_local_boost(struct cpufreq_policy *policy, if (!cpufreq_driver->boost_enabled) return -EINVAL; - if (policy->boost_enabled == enable) - return count; - - policy->boost_enabled = enable; - - cpus_read_lock(); - ret = cpufreq_driver->set_boost(policy, enable); - cpus_read_unlock(); + if (!policy->boost_supported) + return -EINVAL; - if (ret) { - policy->boost_enabled = !policy->boost_enabled; - return ret; - } + ret = policy_set_boost(policy, enable); + if (!ret) + return count; - return count; + return ret; } static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost); @@ -729,18 +710,26 @@ show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); -__weak unsigned int arch_freq_get_on_cpu(int cpu) +__weak int arch_freq_get_on_cpu(int cpu) { - return 0; + return -EOPNOTSUPP; +} + +static inline bool cpufreq_avg_freq_supported(struct cpufreq_policy *policy) +{ + return arch_freq_get_on_cpu(policy->cpu) != -EOPNOTSUPP; } static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) { ssize_t ret; - unsigned int freq; + int freq; + + freq = IS_ENABLED(CONFIG_CPUFREQ_ARCH_CUR_FREQ) + ? arch_freq_get_on_cpu(policy->cpu) + : 0; - freq = arch_freq_get_on_cpu(policy->cpu); - if (freq) + if (freq > 0) ret = sysfs_emit(buf, "%u\n", freq); else if (cpufreq_driver->setpolicy && cpufreq_driver->get) ret = sysfs_emit(buf, "%u\n", cpufreq_driver->get(policy->cpu)); @@ -785,6 +774,19 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, } /* + * show_cpuinfo_avg_freq - average CPU frequency as detected by hardware + */ +static ssize_t show_cpuinfo_avg_freq(struct cpufreq_policy *policy, + char *buf) +{ + int avg_freq = arch_freq_get_on_cpu(policy->cpu); + + if (avg_freq > 0) + return sysfs_emit(buf, "%u\n", avg_freq); + return avg_freq != 0 ? avg_freq : -EINVAL; +} + +/* * show_scaling_governor - show the current policy for the specified CPU */ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) @@ -804,7 +806,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) static ssize_t store_scaling_governor(struct cpufreq_policy *policy, const char *buf, size_t count) { - char str_governor[16]; + char str_governor[CPUFREQ_NAME_LEN]; int ret; ret = sscanf(buf, "%15s", str_governor); @@ -915,9 +917,9 @@ static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, if (!policy->governor || !policy->governor->store_setspeed) return -EINVAL; - ret = sscanf(buf, "%u", &freq); - if (ret != 1) - return -EINVAL; + ret = kstrtouint(buf, 0, &freq); + if (ret) + return ret; policy->governor->store_setspeed(policy, freq); @@ -946,6 +948,7 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) } cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400); +cpufreq_freq_attr_ro(cpuinfo_avg_freq); cpufreq_freq_attr_ro(cpuinfo_min_freq); cpufreq_freq_attr_ro(cpuinfo_max_freq); cpufreq_freq_attr_ro(cpuinfo_transition_latency); @@ -983,17 +986,16 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) { struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); - ssize_t ret = -EBUSY; if (!fattr->show) return -EIO; - down_read(&policy->rwsem); + guard(cpufreq_policy_read)(policy); + if (likely(!policy_is_inactive(policy))) - ret = fattr->show(policy, buf); - up_read(&policy->rwsem); + return fattr->show(policy, buf); - return ret; + return -EBUSY; } static ssize_t store(struct kobject *kobj, struct attribute *attr, @@ -1001,17 +1003,16 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, { struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); - ssize_t ret = -EBUSY; if (!fattr->store) return -EIO; - down_write(&policy->rwsem); + guard(cpufreq_policy_write)(policy); + if (likely(!policy_is_inactive(policy))) - ret = fattr->store(policy, buf, count); - up_write(&policy->rwsem); + return fattr->store(policy, buf, count); - return ret; + return -EBUSY; } static void cpufreq_sysfs_release(struct kobject *kobj) @@ -1059,6 +1060,21 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) struct freq_attr **drv_attr; int ret = 0; + /* Attributes that need freq_table */ + if (policy->freq_table) { + ret = sysfs_create_file(&policy->kobj, + &cpufreq_freq_attr_scaling_available_freqs.attr); + if (ret) + return ret; + + if (cpufreq_boost_supported()) { + ret = sysfs_create_file(&policy->kobj, + &cpufreq_freq_attr_scaling_boost_freqs.attr); + if (ret) + return ret; + } + } + /* set up files for this cpu device */ drv_attr = cpufreq_driver->attr; while (drv_attr && *drv_attr) { @@ -1073,6 +1089,12 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return ret; } + if (cpufreq_avg_freq_supported(policy)) { + ret = sysfs_create_file(&policy->kobj, &cpuinfo_avg_freq.attr); + if (ret) + return ret; + } + ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); if (ret) return ret; @@ -1148,7 +1170,8 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp if (cpumask_test_cpu(cpu, policy->cpus)) return 0; - down_write(&policy->rwsem); + guard(cpufreq_policy_write)(policy); + if (has_target()) cpufreq_stop_governor(policy); @@ -1159,7 +1182,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp if (ret) pr_err("%s: Failed to start governor\n", __func__); } - up_write(&policy->rwsem); + return ret; } @@ -1179,9 +1202,10 @@ static void handle_update(struct work_struct *work) container_of(work, struct cpufreq_policy, update); pr_debug("handle_update for cpu %u called\n", policy->cpu); - down_write(&policy->rwsem); + + guard(cpufreq_policy_write)(policy); + refresh_frequency_limits(policy); - up_write(&policy->rwsem); } static int cpufreq_notifier_min(struct notifier_block *nb, unsigned long freq, @@ -1207,11 +1231,11 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy) struct kobject *kobj; struct completion *cmp; - down_write(&policy->rwsem); - cpufreq_stats_free_table(policy); - kobj = &policy->kobj; - cmp = &policy->kobj_unregister; - up_write(&policy->rwsem); + scoped_guard(cpufreq_policy_write, policy) { + cpufreq_stats_free_table(policy); + kobj = &policy->kobj; + cmp = &policy->kobj_unregister; + } kobject_put(kobj); /* @@ -1260,6 +1284,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) goto err_free_real_cpus; } + init_rwsem(&policy->rwsem); + freq_constraints_init(&policy->constraints); policy->nb_min.notifier_call = cpufreq_notifier_min; @@ -1282,12 +1308,10 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) } INIT_LIST_HEAD(&policy->policy_list); - init_rwsem(&policy->rwsem); spin_lock_init(&policy->transition_lock); init_waitqueue_head(&policy->transition_wait); INIT_WORK(&policy->update, handle_update); - policy->cpu = cpu; return policy; err_min_qos_notifier: @@ -1356,35 +1380,17 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) kfree(policy); } -static int cpufreq_online(unsigned int cpu) +static int cpufreq_policy_online(struct cpufreq_policy *policy, + unsigned int cpu, bool new_policy) { - struct cpufreq_policy *policy; - bool new_policy; unsigned long flags; unsigned int j; int ret; - pr_debug("%s: bringing CPU%u online\n", __func__, cpu); - - /* Check if this CPU already has a policy to manage it */ - policy = per_cpu(cpufreq_cpu_data, cpu); - if (policy) { - WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus)); - if (!policy_is_inactive(policy)) - return cpufreq_add_policy_cpu(policy, cpu); + guard(cpufreq_policy_write)(policy); - /* This is the only online CPU for the policy. Start over. */ - new_policy = false; - down_write(&policy->rwsem); - policy->cpu = cpu; - policy->governor = NULL; - } else { - new_policy = true; - policy = cpufreq_policy_alloc(cpu); - if (!policy) - return -ENOMEM; - down_write(&policy->rwsem); - } + policy->cpu = cpu; + policy->governor = NULL; if (!new_policy && cpufreq_driver->online) { /* Recover policy->cpus using related_cpus */ @@ -1407,7 +1413,7 @@ static int cpufreq_online(unsigned int cpu) if (ret) { pr_debug("%s: %d: initialization failed\n", __func__, __LINE__); - goto out_free_policy; + goto out_clear_policy; } /* @@ -1558,7 +1564,55 @@ static int cpufreq_online(unsigned int cpu) goto out_destroy_policy; } - up_write(&policy->rwsem); + return 0; + +out_destroy_policy: + for_each_cpu(j, policy->real_cpus) + remove_cpu_dev_symlink(policy, j, get_cpu_device(j)); + +out_offline_policy: + if (cpufreq_driver->offline) + cpufreq_driver->offline(policy); + +out_exit_policy: + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + +out_clear_policy: + cpumask_clear(policy->cpus); + + return ret; +} + +static int cpufreq_online(unsigned int cpu) +{ + struct cpufreq_policy *policy; + bool new_policy; + int ret; + + pr_debug("%s: bringing CPU%u online\n", __func__, cpu); + + /* Check if this CPU already has a policy to manage it */ + policy = per_cpu(cpufreq_cpu_data, cpu); + if (policy) { + WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus)); + if (!policy_is_inactive(policy)) + return cpufreq_add_policy_cpu(policy, cpu); + + /* This is the only online CPU for the policy. Start over. */ + new_policy = false; + } else { + new_policy = true; + policy = cpufreq_policy_alloc(cpu); + if (!policy) + return -ENOMEM; + } + + ret = cpufreq_policy_online(policy, cpu, new_policy); + if (ret) { + cpufreq_policy_free(policy); + return ret; + } kobject_uevent(&policy->kobj, KOBJ_ADD); @@ -1570,41 +1624,24 @@ static int cpufreq_online(unsigned int cpu) if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); - /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - if (cpufreq_driver->set_boost && - policy->boost_enabled != cpufreq_boost_enabled()) { - policy->boost_enabled = cpufreq_boost_enabled(); - ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); + /* + * Let the per-policy boost flag mirror the cpufreq_driver boost during + * initialization for a new policy. For an existing policy, maintain the + * previous boost value unless global boost is disabled. + */ + if (cpufreq_driver->set_boost && policy->boost_supported && + (new_policy || !cpufreq_boost_enabled())) { + ret = policy_set_boost(policy, cpufreq_boost_enabled()); if (ret) { /* If the set_boost fails, the online operation is not affected */ pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu, - policy->boost_enabled ? "enable" : "disable"); - policy->boost_enabled = !policy->boost_enabled; + str_enable_disable(cpufreq_boost_enabled())); } } pr_debug("initialization complete\n"); return 0; - -out_destroy_policy: - for_each_cpu(j, policy->real_cpus) - remove_cpu_dev_symlink(policy, j, get_cpu_device(j)); - -out_offline_policy: - if (cpufreq_driver->offline) - cpufreq_driver->offline(policy); - -out_exit_policy: - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - -out_free_policy: - cpumask_clear(policy->cpus); - up_write(&policy->rwsem); - - cpufreq_policy_free(policy); - return ret; } /** @@ -1694,11 +1731,10 @@ static int cpufreq_offline(unsigned int cpu) return 0; } - down_write(&policy->rwsem); + guard(cpufreq_policy_write)(policy); __cpufreq_offline(cpu, policy); - up_write(&policy->rwsem); return 0; } @@ -1715,33 +1751,29 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (!policy) return; - down_write(&policy->rwsem); + scoped_guard(cpufreq_policy_write, policy) { + if (cpu_online(cpu)) + __cpufreq_offline(cpu, policy); - if (cpu_online(cpu)) - __cpufreq_offline(cpu, policy); + remove_cpu_dev_symlink(policy, cpu, dev); - remove_cpu_dev_symlink(policy, cpu, dev); + if (!cpumask_empty(policy->real_cpus)) + return; - if (!cpumask_empty(policy->real_cpus)) { - up_write(&policy->rwsem); - return; - } + /* + * Unregister cpufreq cooling once all the CPUs of the policy + * are removed. + */ + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { + cpufreq_cooling_unregister(policy->cdev); + policy->cdev = NULL; + } - /* - * Unregister cpufreq cooling once all the CPUs of the policy are - * removed. - */ - if (cpufreq_thermal_control_enabled(cpufreq_driver)) { - cpufreq_cooling_unregister(policy->cdev); - policy->cdev = NULL; + /* We did light-weight exit earlier, do full tear down now */ + if (cpufreq_driver->offline && cpufreq_driver->exit) + cpufreq_driver->exit(policy); } - /* We did light-weight exit earlier, do full tear down now */ - if (cpufreq_driver->offline && cpufreq_driver->exit) - cpufreq_driver->exit(policy); - - up_write(&policy->rwsem); - cpufreq_policy_free(policy); } @@ -1811,27 +1843,26 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b */ unsigned int cpufreq_quick_get(unsigned int cpu) { - struct cpufreq_policy *policy; - unsigned int ret_freq = 0; + struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; unsigned long flags; read_lock_irqsave(&cpufreq_driver_lock, flags); if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) { - ret_freq = cpufreq_driver->get(cpu); + unsigned int ret_freq = cpufreq_driver->get(cpu); + read_unlock_irqrestore(&cpufreq_driver_lock, flags); + return ret_freq; } read_unlock_irqrestore(&cpufreq_driver_lock, flags); policy = cpufreq_cpu_get(cpu); - if (policy) { - ret_freq = policy->cur; - cpufreq_cpu_put(policy); - } + if (policy) + return policy->cur; - return ret_freq; + return 0; } EXPORT_SYMBOL(cpufreq_quick_get); @@ -1843,15 +1874,13 @@ EXPORT_SYMBOL(cpufreq_quick_get); */ unsigned int cpufreq_quick_get_max(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - unsigned int ret_freq = 0; + struct cpufreq_policy *policy __free(put_cpufreq_policy); - if (policy) { - ret_freq = policy->max; - cpufreq_cpu_put(policy); - } + policy = cpufreq_cpu_get(cpu); + if (policy) + return policy->max; - return ret_freq; + return 0; } EXPORT_SYMBOL(cpufreq_quick_get_max); @@ -1863,15 +1892,13 @@ EXPORT_SYMBOL(cpufreq_quick_get_max); */ __weak unsigned int cpufreq_get_hw_max_freq(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - unsigned int ret_freq = 0; + struct cpufreq_policy *policy __free(put_cpufreq_policy); - if (policy) { - ret_freq = policy->cpuinfo.max_freq; - cpufreq_cpu_put(policy); - } + policy = cpufreq_cpu_get(cpu); + if (policy) + return policy->cpuinfo.max_freq; - return ret_freq; + return 0; } EXPORT_SYMBOL(cpufreq_get_hw_max_freq); @@ -1891,19 +1918,18 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy) */ unsigned int cpufreq_get(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - unsigned int ret_freq = 0; + struct cpufreq_policy *policy __free(put_cpufreq_policy); - if (policy) { - down_read(&policy->rwsem); - if (cpufreq_driver->get) - ret_freq = __cpufreq_get(policy); - up_read(&policy->rwsem); + policy = cpufreq_cpu_get(cpu); + if (!policy) + return 0; - cpufreq_cpu_put(policy); - } + guard(cpufreq_policy_read)(policy); - return ret_freq; + if (cpufreq_driver->get) + return __cpufreq_get(policy); + + return 0; } EXPORT_SYMBOL(cpufreq_get); @@ -1962,9 +1988,9 @@ void cpufreq_suspend(void) for_each_active_policy(policy) { if (has_target()) { - down_write(&policy->rwsem); - cpufreq_stop_governor(policy); - up_write(&policy->rwsem); + scoped_guard(cpufreq_policy_write, policy) { + cpufreq_stop_governor(policy); + } } if (cpufreq_driver->suspend && cpufreq_driver->suspend(policy)) @@ -2005,9 +2031,9 @@ void cpufreq_resume(void) pr_err("%s: Failed to resume driver: %s\n", __func__, cpufreq_driver->name); } else if (has_target()) { - down_write(&policy->rwsem); - ret = cpufreq_start_governor(policy); - up_write(&policy->rwsem); + scoped_guard(cpufreq_policy_write, policy) { + ret = cpufreq_start_governor(policy); + } if (ret) pr_err("%s: Failed to start governor for CPU%u's policy\n", @@ -2337,7 +2363,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - target_freq = __resolve_freq(policy, target_freq, relation); + target_freq = __resolve_freq(policy, target_freq, policy->min, + policy->max, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); @@ -2374,15 +2401,9 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - int ret; - - down_write(&policy->rwsem); - - ret = __cpufreq_driver_target(policy, target_freq, relation); - - up_write(&policy->rwsem); + guard(cpufreq_policy_write)(policy); - return ret; + return __cpufreq_driver_target(policy, target_freq, relation); } EXPORT_SYMBOL_GPL(cpufreq_driver_target); @@ -2554,31 +2575,6 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); * POLICY INTERFACE * *********************************************************************/ -/** - * cpufreq_get_policy - get the current cpufreq_policy - * @policy: struct cpufreq_policy into which the current cpufreq_policy - * is written - * @cpu: CPU to find the policy for - * - * Reads the current cpufreq policy. - */ -int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) -{ - struct cpufreq_policy *cpu_policy; - if (!policy) - return -EINVAL; - - cpu_policy = cpufreq_cpu_get(cpu); - if (!cpu_policy) - return -EINVAL; - - memcpy(policy, cpu_policy, sizeof(*policy)); - - cpufreq_cpu_put(cpu_policy); - return 0; -} -EXPORT_SYMBOL(cpufreq_get_policy); - DEFINE_PER_CPU(unsigned long, cpufreq_pressure); /** @@ -2661,11 +2657,18 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * Resolve policy min/max to available frequencies. It ensures * no frequency resolution will neither overshoot the requested maximum * nor undershoot the requested minimum. + * + * Avoid storing intermediate values in policy->max or policy->min and + * compiler optimizations around them because they may be accessed + * concurrently by cpufreq_driver_resolve_freq() during the update. */ - policy->min = new_data.min; - policy->max = new_data.max; - policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L); - policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, + new_data.min, new_data.max, + CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, new_data.min, + new_data.max, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); + trace_cpu_frequency_limits(policy); cpufreq_update_pressure(policy); @@ -2713,15 +2716,32 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, pr_debug("starting governor %s failed\n", policy->governor->name); if (old_gov) { policy->governor = old_gov; - if (cpufreq_init_governor(policy)) + if (cpufreq_init_governor(policy)) { + policy->governor = NULL; + } else if (cpufreq_start_governor(policy)) { + cpufreq_exit_governor(policy); policy->governor = NULL; - else - cpufreq_start_governor(policy); + } } return ret; } +static void cpufreq_policy_refresh(struct cpufreq_policy *policy) +{ + guard(cpufreq_policy_write)(policy); + + /* + * BIOS might change freq behind our back + * -> ask driver for current freq and notify governors about a change + */ + if (cpufreq_driver->get && has_target() && + (cpufreq_suspended || WARN_ON(!cpufreq_verify_current_freq(policy, false)))) + return; + + refresh_frequency_limits(policy); +} + /** * cpufreq_update_policy - Re-evaluate an existing cpufreq policy. * @cpu: CPU to re-evaluate the policy for. @@ -2733,23 +2753,13 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, */ void cpufreq_update_policy(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy); + policy = cpufreq_cpu_get(cpu); if (!policy) return; - /* - * BIOS might change freq behind our back - * -> ask driver for current freq and notify governors about a change - */ - if (cpufreq_driver->get && has_target() && - (cpufreq_suspended || WARN_ON(!cpufreq_verify_current_freq(policy, false)))) - goto unlock; - - refresh_frequency_limits(policy); - -unlock: - cpufreq_cpu_release(policy); + cpufreq_policy_refresh(policy); } EXPORT_SYMBOL(cpufreq_update_policy); @@ -2758,29 +2768,27 @@ EXPORT_SYMBOL(cpufreq_update_policy); * @cpu: CPU to update the policy limits for. * * Invoke the driver's ->update_limits callback if present or call - * cpufreq_update_policy() for @cpu. + * cpufreq_policy_refresh() for @cpu. */ void cpufreq_update_limits(unsigned int cpu) { - struct cpufreq_policy *policy; + struct cpufreq_policy *policy __free(put_cpufreq_policy); policy = cpufreq_cpu_get(cpu); if (!policy) return; if (cpufreq_driver->update_limits) - cpufreq_driver->update_limits(cpu); + cpufreq_driver->update_limits(policy); else - cpufreq_update_policy(cpu); - - cpufreq_cpu_put(policy); + cpufreq_policy_refresh(policy); } EXPORT_SYMBOL_GPL(cpufreq_update_limits); /********************************************************************* * BOOST * *********************************************************************/ -static int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state) +int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state) { int ret; @@ -2799,15 +2807,18 @@ static int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state) return 0; } +EXPORT_SYMBOL_GPL(cpufreq_boost_set_sw); -int cpufreq_boost_trigger_state(int state) +static int cpufreq_boost_trigger_state(int state) { struct cpufreq_policy *policy; unsigned long flags; int ret = 0; - if (cpufreq_driver->boost_enabled == state) - return 0; + /* + * Don't compare 'cpufreq_driver->boost_enabled' with 'state' here to + * make sure all policies are in sync with global boost flag. + */ write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver->boost_enabled = state; @@ -2815,12 +2826,12 @@ int cpufreq_boost_trigger_state(int state) cpus_read_lock(); for_each_active_policy(policy) { - policy->boost_enabled = state; - ret = cpufreq_driver->set_boost(policy, state); - if (ret) { - policy->boost_enabled = !policy->boost_enabled; + if (!policy->boost_supported) + continue; + + ret = policy_set_boost(policy, state); + if (ret) goto err_reset_state; - } } cpus_read_unlock(); @@ -2862,21 +2873,6 @@ static void remove_boost_sysfs_file(void) sysfs_remove_file(cpufreq_global_kobject, &boost.attr); } -int cpufreq_enable_boost_support(void) -{ - if (!cpufreq_driver) - return -EINVAL; - - if (cpufreq_boost_supported()) - return 0; - - cpufreq_driver->set_boost = cpufreq_boost_set_sw; - - /* This will get removed on driver unregister */ - return create_boost_sysfs_file(); -} -EXPORT_SYMBOL_GPL(cpufreq_enable_boost_support); - bool cpufreq_boost_enabled(void) { return cpufreq_driver->boost_enabled; @@ -2951,15 +2947,6 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) cpufreq_driver = driver_data; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - /* - * Mark support for the scheduler's frequency invariance engine for - * drivers that implement target(), target_index() or fast_switch(). - */ - if (!cpufreq_driver->setpolicy) { - static_branch_enable_cpuslocked(&cpufreq_freq_invariance); - pr_debug("supports frequency invariance"); - } - if (driver_data->setpolicy) driver_data->flags |= CPUFREQ_CONST_LOOPS; @@ -2990,6 +2977,15 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) hp_online = ret; ret = 0; + /* + * Mark support for the scheduler's frequency invariance engine for + * drivers that implement target(), target_index() or fast_switch(). + */ + if (!cpufreq_driver->setpolicy) { + static_branch_enable_cpuslocked(&cpufreq_freq_invariance); + pr_debug("supports frequency invariance"); + } + pr_debug("driver %s up and running\n", driver_data->name); goto out; @@ -3060,6 +3056,36 @@ static int __init cpufreq_core_init(void) return 0; } + +static bool cpufreq_policy_is_good_for_eas(unsigned int cpu) +{ + struct cpufreq_policy *policy __free(put_cpufreq_policy); + + policy = cpufreq_cpu_get(cpu); + if (!policy) { + pr_debug("cpufreq policy not set for CPU: %d\n", cpu); + return false; + } + + return sugov_is_governor(policy); +} + +bool cpufreq_ready_for_eas(const struct cpumask *cpu_mask) +{ + unsigned int cpu; + + /* Do not attempt EAS if schedutil is not being used. */ + for_each_cpu(cpu, cpu_mask) { + if (!cpufreq_policy_is_good_for_eas(cpu)) { + pr_debug("rd %*pbl: schedutil is mandatory for EAS\n", + cpumask_pr_args(cpu_mask)); + return false; + } + } + + return true; +} + module_param(off, int, 0444); module_param_string(default_governor, default_governor, CPUFREQ_NAME_LEN, 0444); core_initcall(cpufreq_core_init); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a7c38b8b3e78..0e65d37c9231 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -76,7 +76,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_next; } - index = cpufreq_frequency_table_target(policy, freq_next, relation); + index = cpufreq_frequency_table_target(policy, freq_next, policy->min, + policy->max, relation); freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c index 8736be3a06ce..2c277eb3795a 100644 --- a/drivers/cpufreq/davinci-cpufreq.c +++ b/drivers/cpufreq/davinci-cpufreq.c @@ -101,7 +101,6 @@ static struct cpufreq_driver davinci_driver = { .get = cpufreq_generic_get, .init = davinci_cpu_init, .name = "davinci", - .attr = cpufreq_generic_attr, }; static int __init davinci_cpufreq_probe(struct platform_device *pdev) diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c index 6e958b09e1b5..320a0af2266a 100644 --- a/drivers/cpufreq/e_powersaver.c +++ b/drivers/cpufreq/e_powersaver.c @@ -225,12 +225,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy) return -ENODEV; } /* Enable Enhanced PowerSaver */ - rdmsrl(MSR_IA32_MISC_ENABLE, val); + rdmsrq(MSR_IA32_MISC_ENABLE, val); if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; - wrmsrl(MSR_IA32_MISC_ENABLE, val); + wrmsrq(MSR_IA32_MISC_ENABLE, val); /* Can be locked at 0 */ - rdmsrl(MSR_IA32_MISC_ENABLE, val); + rdmsrq(MSR_IA32_MISC_ENABLE, val); if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { pr_info("Can't enable Enhanced PowerSaver\n"); return -ENODEV; @@ -376,7 +376,6 @@ static struct cpufreq_driver eps_driver = { .exit = eps_cpu_exit, .get = eps_get, .name = "e_powersaver", - .attr = cpufreq_generic_attr, }; diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c index 4ce5eb35dc46..fc5a58088b35 100644 --- a/drivers/cpufreq/elanfreq.c +++ b/drivers/cpufreq/elanfreq.c @@ -21,7 +21,6 @@ #include <linux/cpufreq.h> #include <asm/cpu_device_id.h> -#include <asm/msr.h> #include <linux/timex.h> #include <linux/io.h> @@ -194,7 +193,6 @@ static struct cpufreq_driver elanfreq_driver = { .target_index = elanfreq_target, .init = elanfreq_cpu_init, .name = "elanfreq", - .attr = cpufreq_generic_attr, }; static const struct x86_cpu_id elan_id[] = { diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 10e80d912b8d..35de513af6c9 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -14,7 +14,7 @@ * FREQUENCY TABLE HELPERS * *********************************************************************/ -bool policy_has_boost_freq(struct cpufreq_policy *policy) +static bool policy_has_boost_freq(struct cpufreq_policy *policy) { struct cpufreq_frequency_table *pos, *table = policy->freq_table; @@ -27,7 +27,6 @@ bool policy_has_boost_freq(struct cpufreq_policy *policy) return false; } -EXPORT_SYMBOL_GPL(policy_has_boost_freq); int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table) @@ -116,8 +115,8 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -148,7 +147,7 @@ int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, cpufreq_for_each_valid_entry_idx(pos, table, i) { freq = pos->frequency; - if ((freq < policy->min) || (freq > policy->max)) + if (freq < min || freq > max) continue; if (freq == target_freq) { optimal.driver_data = i; @@ -276,7 +275,6 @@ static ssize_t scaling_available_frequencies_show(struct cpufreq_policy *policy, return show_available_freqs(policy, buf, false); } cpufreq_attr_available_freq(scaling_available); -EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs); /* * scaling_boost_frequencies_show - show available boost frequencies for @@ -288,13 +286,6 @@ static ssize_t scaling_boost_frequencies_show(struct cpufreq_policy *policy, return show_available_freqs(policy, buf, true); } cpufreq_attr_available_freq(scaling_boost); -EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_boost_freqs); - -struct freq_attr *cpufreq_generic_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; -EXPORT_SYMBOL_GPL(cpufreq_generic_attr); static int set_freq_table_sorted(struct cpufreq_policy *policy) { @@ -367,6 +358,10 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) if (ret) return ret; + /* Driver's may have set this field already */ + if (policy_has_boost_freq(policy)) + policy->boost_supported = true; + return set_freq_table_sorted(policy); } diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index f3c99f378ad6..db1c88e9d3f9 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -207,7 +207,6 @@ static struct cpufreq_driver imx6q_cpufreq_driver = { .init = imx6q_cpufreq_init, .register_em = cpufreq_register_em_with_opp, .name = "imx6q-cpufreq", - .attr = cpufreq_generic_attr, .suspend = cpufreq_generic_suspend, }; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9c4cc01fd51a..06a1c7dd081f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -221,6 +221,7 @@ struct global_params { * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. + * @pd_registered: Set when a perf domain is registered for this CPU. * @hwp_notify_work: workqueue for HWP notifications. * * This structure stores per CPU instance data for all CPUs. @@ -260,6 +261,9 @@ struct cpudata { unsigned int sched_flags; u32 hwp_boost_min; bool suspended; +#ifdef CONFIG_ENERGY_MODEL + bool pd_registered; +#endif struct delayed_work hwp_notify_work; }; @@ -303,6 +307,7 @@ static bool hwp_is_hybrid; static struct cpufreq_driver *intel_pstate_driver __read_mostly; +#define INTEL_PSTATE_CORE_SCALING 100000 #define HYBRID_SCALING_FACTOR_ADL 78741 #define HYBRID_SCALING_FACTOR_MTL 80000 #define HYBRID_SCALING_FACTOR_LNL 86957 @@ -311,7 +316,7 @@ static int hybrid_scaling_factor; static inline int core_get_scaling(void) { - return 100000; + return INTEL_PSTATE_CORE_SCALING; } #ifdef CONFIG_ACPI @@ -598,7 +603,10 @@ static bool turbo_is_disabled(void) { u64 misc_en; - rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); + if (!cpu_feature_enabled(X86_FEATURE_IDA)) + return true; + + rdmsrq(MSR_IA32_MISC_ENABLE, misc_en); return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); } @@ -620,7 +628,7 @@ static s16 intel_pstate_get_epb(struct cpudata *cpu_data) if (!boot_cpu_has(X86_FEATURE_EPB)) return -ENXIO; - ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); + ret = rdmsrq_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); if (ret) return (s16)ret; @@ -637,7 +645,7 @@ static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data) * MSR_HWP_REQUEST, so need to read and get EPP. */ if (!hwp_req_data) { - epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, + epp = rdmsrq_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &hwp_req_data); if (epp) return epp; @@ -659,12 +667,12 @@ static int intel_pstate_set_epb(int cpu, s16 pref) if (!boot_cpu_has(X86_FEATURE_EPB)) return -ENXIO; - ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); + ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); if (ret) return ret; epb = (epb & ~0x0f) | pref; - wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb); + wrmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb); return 0; } @@ -762,7 +770,7 @@ static int intel_pstate_set_epp(struct cpudata *cpu, u32 epp) * function, so it cannot run in parallel with the update below. */ WRITE_ONCE(cpu->hwp_req_cached, value); - ret = wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); + ret = wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); if (!ret) cpu->epp_cached = epp; @@ -916,7 +924,7 @@ static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf) if (ratio <= 0) { u64 cap; - rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap); + rdmsrq_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap); ratio = HWP_GUARANTEED_PERF(cap); } @@ -936,6 +944,8 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; +static bool no_cas __ro_after_init; + static struct cpudata *hybrid_max_perf_cpu __read_mostly; /* * Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata, @@ -943,12 +953,124 @@ static struct cpudata *hybrid_max_perf_cpu __read_mostly; */ static DEFINE_MUTEX(hybrid_capacity_lock); +#ifdef CONFIG_ENERGY_MODEL +#define HYBRID_EM_STATE_COUNT 4 + +static int hybrid_active_power(struct device *dev, unsigned long *power, + unsigned long *freq) +{ + /* + * Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100% + * of the maximum capacity such that two CPUs of the same type will be + * regarded as equally attractive if the utilization of each of them + * falls into the same bin, which should prevent tasks from being + * migrated between them too often. + * + * For this purpose, return the "frequency" of 2 for the first + * performance level and otherwise leave the value set by the caller. + */ + if (!*freq) + *freq = 2; + + /* No power information. */ + *power = EM_MAX_POWER; + + return 0; +} + +static int hybrid_get_cost(struct device *dev, unsigned long freq, + unsigned long *cost) +{ + struct pstate_data *pstate = &all_cpu_data[dev->id]->pstate; + struct cpu_cacheinfo *cacheinfo = get_cpu_cacheinfo(dev->id); + + /* + * The smaller the perf-to-frequency scaling factor, the larger the IPC + * ratio between the given CPU and the least capable CPU in the system. + * Regard that IPC ratio as the primary cost component and assume that + * the scaling factors for different CPU types will differ by at least + * 5% and they will not be above INTEL_PSTATE_CORE_SCALING. + * + * Add the freq value to the cost, so that the cost of running on CPUs + * of the same type in different "utilization bins" is different. + */ + *cost = div_u64(100ULL * INTEL_PSTATE_CORE_SCALING, pstate->scaling) + freq; + /* + * Increase the cost slightly for CPUs able to access L3 to avoid + * touching it in case some other CPUs of the same type can do the work + * without it. + */ + if (cacheinfo) { + unsigned int i; + + /* Check if L3 cache is there. */ + for (i = 0; i < cacheinfo->num_leaves; i++) { + if (cacheinfo->info_list[i].level == 3) { + *cost += 2; + break; + } + } + } + + return 0; +} + +static bool hybrid_register_perf_domain(unsigned int cpu) +{ + static const struct em_data_callback cb + = EM_ADV_DATA_CB(hybrid_active_power, hybrid_get_cost); + struct cpudata *cpudata = all_cpu_data[cpu]; + struct device *cpu_dev; + + /* + * Registering EM perf domains without enabling asymmetric CPU capacity + * support is not really useful and one domain should not be registered + * more than once. + */ + if (!hybrid_max_perf_cpu || cpudata->pd_registered) + return false; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return false; + + if (em_dev_register_perf_domain(cpu_dev, HYBRID_EM_STATE_COUNT, &cb, + cpumask_of(cpu), false)) + return false; + + cpudata->pd_registered = true; + + return true; +} + +static void hybrid_register_all_perf_domains(void) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) + hybrid_register_perf_domain(cpu); +} + +static void hybrid_update_perf_domain(struct cpudata *cpu) +{ + if (cpu->pd_registered) + em_adjust_cpu_capacity(cpu->cpu); +} +#else /* !CONFIG_ENERGY_MODEL */ +static inline bool hybrid_register_perf_domain(unsigned int cpu) { return false; } +static inline void hybrid_register_all_perf_domains(void) {} +static inline void hybrid_update_perf_domain(struct cpudata *cpu) {} +#endif /* CONFIG_ENERGY_MODEL */ + static void hybrid_set_cpu_capacity(struct cpudata *cpu) { arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf, cpu->capacity_perf, cpu->pstate.max_pstate_physical); + hybrid_update_perf_domain(cpu); + + topology_set_cpu_scale(cpu->cpu, arch_scale_cpu_capacity(cpu->cpu)); pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu, cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf, @@ -1037,10 +1159,19 @@ static void hybrid_refresh_cpu_capacity_scaling(void) guard(mutex)(&hybrid_capacity_lock); __hybrid_refresh_cpu_capacity_scaling(); + /* + * Perf domains are not registered before setting hybrid_max_perf_cpu, + * so register them all after setting up CPU capacity scaling. + */ + hybrid_register_all_perf_domains(); } static void hybrid_init_cpu_capacity_scaling(bool refresh) { + /* Bail out if enabling capacity-aware scheduling is prohibited. */ + if (no_cas) + return; + /* * If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity * scaling has been enabled already and the driver is just changing the @@ -1060,7 +1191,7 @@ static void hybrid_init_cpu_capacity_scaling(bool refresh) hybrid_refresh_cpu_capacity_scaling(); /* * Disabling ITMT causes sched domains to be rebuilt to disable asym - * packing and enable asym capacity. + * packing and enable asym capacity and EAS. */ sched_clear_itmt_support(); } @@ -1082,7 +1213,7 @@ static void __intel_pstate_get_hwp_cap(struct cpudata *cpu) { u64 cap; - rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap); + rdmsrq_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap); WRITE_ONCE(cpu->hwp_cap_cached, cap); cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap); cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap); @@ -1138,6 +1269,14 @@ static void hybrid_update_capacity(struct cpudata *cpu) } hybrid_set_cpu_capacity(cpu); + /* + * If the CPU was offline to start with and it is going online for the + * first time, a perf domain needs to be registered for it if hybrid + * capacity scaling has been enabled already. In that case, sched + * domains need to be rebuilt to take the new perf domain into account. + */ + if (hybrid_register_perf_domain(cpu->cpu)) + em_rebuild_sched_domains(); unlock: mutex_unlock(&hybrid_capacity_lock); @@ -1156,7 +1295,7 @@ static void intel_pstate_hwp_set(unsigned int cpu) if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) min = max; - rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); + rdmsrq_on_cpu(cpu, MSR_HWP_REQUEST, &value); value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); @@ -1203,7 +1342,7 @@ static void intel_pstate_hwp_set(unsigned int cpu) } skip_epp: WRITE_ONCE(cpu_data->hwp_req_cached, value); - wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); + wrmsrq_on_cpu(cpu, MSR_HWP_REQUEST, value); } static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata); @@ -1250,7 +1389,7 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) if (boot_cpu_has(X86_FEATURE_HWP_EPP)) value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE); - wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); + wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); mutex_lock(&hybrid_capacity_lock); @@ -1279,7 +1418,7 @@ static void set_power_ctl_ee_state(bool input) u64 power_ctl; mutex_lock(&intel_pstate_driver_lock); - rdmsrl(MSR_IA32_POWER_CTL, power_ctl); + rdmsrq(MSR_IA32_POWER_CTL, power_ctl); if (input) { power_ctl &= ~BIT(MSR_IA32_POWER_CTL_BIT_EE); power_ctl_ee_state = POWER_CTL_EE_ENABLE; @@ -1287,7 +1426,7 @@ static void set_power_ctl_ee_state(bool input) power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE); power_ctl_ee_state = POWER_CTL_EE_DISABLE; } - wrmsrl(MSR_IA32_POWER_CTL, power_ctl); + wrmsrq(MSR_IA32_POWER_CTL, power_ctl); mutex_unlock(&intel_pstate_driver_lock); } @@ -1296,7 +1435,7 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata); static void intel_pstate_hwp_reenable(struct cpudata *cpu) { intel_pstate_hwp_enable(cpu); - wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, READ_ONCE(cpu->hwp_req_cached)); + wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, READ_ONCE(cpu->hwp_req_cached)); } static int intel_pstate_suspend(struct cpufreq_policy *policy) @@ -1347,9 +1486,11 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } -static void __intel_pstate_update_max_freq(struct cpudata *cpudata, - struct cpufreq_policy *policy) +static void __intel_pstate_update_max_freq(struct cpufreq_policy *policy, + struct cpudata *cpudata) { + guard(cpufreq_policy_write)(policy); + if (hwp_active) intel_pstate_get_hwp_cap(cpudata); @@ -1359,42 +1500,34 @@ static void __intel_pstate_update_max_freq(struct cpudata *cpudata, refresh_frequency_limits(policy); } -static void intel_pstate_update_limits(unsigned int cpu) +static bool intel_pstate_update_max_freq(struct cpudata *cpudata) { - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - struct cpudata *cpudata; + struct cpufreq_policy *policy __free(put_cpufreq_policy); + policy = cpufreq_cpu_get(cpudata->cpu); if (!policy) - return; + return false; - cpudata = all_cpu_data[cpu]; + __intel_pstate_update_max_freq(policy, cpudata); - __intel_pstate_update_max_freq(cpudata, policy); + return true; +} - /* Prevent the driver from being unregistered now. */ - mutex_lock(&intel_pstate_driver_lock); +static void intel_pstate_update_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpudata = all_cpu_data[policy->cpu]; - cpufreq_cpu_release(policy); + __intel_pstate_update_max_freq(policy, cpudata); hybrid_update_capacity(cpudata); - - mutex_unlock(&intel_pstate_driver_lock); } static void intel_pstate_update_limits_for_all(void) { int cpu; - for_each_possible_cpu(cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - - if (!policy) - continue; - - __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); - - cpufreq_cpu_release(policy); - } + for_each_possible_cpu(cpu) + intel_pstate_update_max_freq(all_cpu_data[cpu]); mutex_lock(&hybrid_capacity_lock); @@ -1697,7 +1830,7 @@ static ssize_t show_energy_efficiency(struct kobject *kobj, struct kobj_attribut u64 power_ctl; int enable; - rdmsrl(MSR_IA32_POWER_CTL, power_ctl); + rdmsrq(MSR_IA32_POWER_CTL, power_ctl); enable = !!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE)); return sprintf(buf, "%d\n", !enable); } @@ -1834,13 +1967,8 @@ static void intel_pstate_notify_work(struct work_struct *work) { struct cpudata *cpudata = container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); - - if (policy) { - __intel_pstate_update_max_freq(cpudata, policy); - - cpufreq_cpu_release(policy); + if (intel_pstate_update_max_freq(cpudata)) { /* * The driver will not be unregistered while this function is * running, so update the capacity without acquiring the driver @@ -1849,7 +1977,7 @@ static void intel_pstate_notify_work(struct work_struct *work) hybrid_update_capacity(cpudata); } - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); + wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } static DEFINE_RAW_SPINLOCK(hwp_notify_lock); @@ -1871,7 +1999,7 @@ void notify_hwp_interrupt(void) if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) status_mask |= HWP_HIGHEST_PERF_CHANGE_STATUS; - rdmsrl_safe(MSR_HWP_STATUS, &value); + rdmsrq_safe(MSR_HWP_STATUS, &value); if (!(value & status_mask)) return; @@ -1888,7 +2016,7 @@ void notify_hwp_interrupt(void) return; ack_intr: - wrmsrl_safe(MSR_HWP_STATUS, 0); + wrmsrq_safe(MSR_HWP_STATUS, 0); raw_spin_unlock_irqrestore(&hwp_notify_lock, flags); } @@ -1899,8 +2027,8 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) if (!cpu_feature_enabled(X86_FEATURE_HWP_NOTIFY)) return; - /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); + /* wrmsrq_on_cpu has to be outside spinlock as this can result in IPC */ + wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); raw_spin_lock_irq(&hwp_notify_lock); cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask); @@ -1927,9 +2055,9 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE)) interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ; - /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask); - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); + /* wrmsrq_on_cpu has to be outside spinlock as this can result in IPC */ + wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask); + wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } } @@ -1968,9 +2096,9 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) { /* First disable HWP notification interrupt till we activate again */ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); + wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); - wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); + wrmsrq_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); intel_pstate_enable_hwp_interrupt(cpudata); @@ -1984,7 +2112,7 @@ static int atom_get_min_pstate(int not_used) { u64 value; - rdmsrl(MSR_ATOM_CORE_RATIOS, value); + rdmsrq(MSR_ATOM_CORE_RATIOS, value); return (value >> 8) & 0x7F; } @@ -1992,7 +2120,7 @@ static int atom_get_max_pstate(int not_used) { u64 value; - rdmsrl(MSR_ATOM_CORE_RATIOS, value); + rdmsrq(MSR_ATOM_CORE_RATIOS, value); return (value >> 16) & 0x7F; } @@ -2000,7 +2128,7 @@ static int atom_get_turbo_pstate(int not_used) { u64 value; - rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value); + rdmsrq(MSR_ATOM_CORE_TURBO_RATIOS, value); return value & 0x7F; } @@ -2035,7 +2163,7 @@ static int silvermont_get_scaling(void) static int silvermont_freq_table[] = { 83300, 100000, 133300, 116700, 80000}; - rdmsrl(MSR_FSB_FREQ, value); + rdmsrq(MSR_FSB_FREQ, value); i = value & 0x7; WARN_ON(i > 4); @@ -2051,7 +2179,7 @@ static int airmont_get_scaling(void) 83300, 100000, 133300, 116700, 80000, 93300, 90000, 88900, 87500}; - rdmsrl(MSR_FSB_FREQ, value); + rdmsrq(MSR_FSB_FREQ, value); i = value & 0xF; WARN_ON(i > 8); @@ -2062,7 +2190,7 @@ static void atom_get_vid(struct cpudata *cpudata) { u64 value; - rdmsrl(MSR_ATOM_CORE_VIDS, value); + rdmsrq(MSR_ATOM_CORE_VIDS, value); cpudata->vid.min = int_tofp((value >> 8) & 0x7f); cpudata->vid.max = int_tofp((value >> 16) & 0x7f); cpudata->vid.ratio = div_fp( @@ -2070,7 +2198,7 @@ static void atom_get_vid(struct cpudata *cpudata) int_tofp(cpudata->pstate.max_pstate - cpudata->pstate.min_pstate)); - rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value); + rdmsrq(MSR_ATOM_CORE_TURBO_VIDS, value); cpudata->vid.turbo = value & 0x7f; } @@ -2078,7 +2206,7 @@ static int core_get_min_pstate(int cpu) { u64 value; - rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value); + rdmsrq_on_cpu(cpu, MSR_PLATFORM_INFO, &value); return (value >> 40) & 0xFF; } @@ -2086,7 +2214,7 @@ static int core_get_max_pstate_physical(int cpu) { u64 value; - rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value); + rdmsrq_on_cpu(cpu, MSR_PLATFORM_INFO, &value); return (value >> 8) & 0xFF; } @@ -2100,13 +2228,13 @@ static int core_get_tdp_ratio(int cpu, u64 plat_info) int err; /* Get the TDP level (0, 1, 2) to get ratios */ - err = rdmsrl_safe_on_cpu(cpu, MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); + err = rdmsrq_safe_on_cpu(cpu, MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); if (err) return err; /* TDP MSR are continuous starting at 0x648 */ tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03); - err = rdmsrl_safe_on_cpu(cpu, tdp_msr, &tdp_ratio); + err = rdmsrq_safe_on_cpu(cpu, tdp_msr, &tdp_ratio); if (err) return err; @@ -2131,7 +2259,7 @@ static int core_get_max_pstate(int cpu) int tdp_ratio; int err; - rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info); + rdmsrq_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info); max_pstate = (plat_info >> 8) & 0xFF; tdp_ratio = core_get_tdp_ratio(cpu, plat_info); @@ -2143,7 +2271,7 @@ static int core_get_max_pstate(int cpu) return tdp_ratio; } - err = rdmsrl_safe_on_cpu(cpu, MSR_TURBO_ACTIVATION_RATIO, &tar); + err = rdmsrq_safe_on_cpu(cpu, MSR_TURBO_ACTIVATION_RATIO, &tar); if (!err) { int tar_levels; @@ -2163,7 +2291,7 @@ static int core_get_turbo_pstate(int cpu) u64 value; int nont, ret; - rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value); + rdmsrq_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value); nont = core_get_max_pstate(cpu); ret = (value) & 255; if (ret <= nont) @@ -2192,7 +2320,7 @@ static int knl_get_turbo_pstate(int cpu) u64 value; int nont, ret; - rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value); + rdmsrq_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value); nont = core_get_max_pstate(cpu); ret = (((value) >> 8) & 0xFF); if (ret <= nont) @@ -2200,28 +2328,20 @@ static int knl_get_turbo_pstate(int cpu) return ret; } -static void hybrid_get_type(void *data) -{ - u8 *cpu_type = data; - - *cpu_type = get_this_hybrid_cpu_type(); -} - static int hwp_get_cpu_scaling(int cpu) { if (hybrid_scaling_factor) { - u8 cpu_type = 0; - - smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); + struct cpuinfo_x86 *c = &cpu_data(cpu); + u8 cpu_type = c->topo.intel_type; /* * Return the hybrid scaling factor for P-cores and use the * default core scaling for E-cores. */ - if (cpu_type == 0x40) + if (cpu_type == INTEL_CPU_TYPE_CORE) return hybrid_scaling_factor; - if (cpu_type == 0x20) + if (cpu_type == INTEL_CPU_TYPE_ATOM) return core_get_scaling(); } @@ -2246,7 +2366,7 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) * the CPU being updated, so force the register update to run on the * right CPU. */ - wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, + wrmsrq_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); } @@ -2353,7 +2473,7 @@ static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) return; hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min; - wrmsrl(MSR_HWP_REQUEST, hwp_req); + wrmsrq(MSR_HWP_REQUEST, hwp_req); cpu->last_update = cpu->sample.time; } @@ -2366,7 +2486,7 @@ static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu) expired = time_after64(cpu->sample.time, cpu->last_update + hwp_boost_hold_time_ns); if (expired) { - wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached); + wrmsrq(MSR_HWP_REQUEST, cpu->hwp_req_cached); cpu->hwp_boost_min = 0; } } @@ -2427,8 +2547,8 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) u64 tsc; local_irq_save(flags); - rdmsrl(MSR_IA32_APERF, aperf); - rdmsrl(MSR_IA32_MPERF, mperf); + rdmsrq(MSR_IA32_APERF, aperf); + rdmsrq(MSR_IA32_MPERF, mperf); tsc = rdtsc(); if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) { local_irq_restore(flags); @@ -2522,7 +2642,7 @@ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) return; cpu->pstate.current_pstate = pstate; - wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); + wrmsrq(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); } static void intel_pstate_adjust_pstate(struct cpudata *cpu) @@ -2655,6 +2775,8 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(INTEL_TIGERLAKE, core_funcs), X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs), X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs), + X86_MATCH(INTEL_GRANITERAPIDS_D, core_funcs), + X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); @@ -3102,19 +3224,19 @@ static void intel_cpufreq_hwp_update(struct cpudata *cpu, u32 min, u32 max, WRITE_ONCE(cpu->hwp_req_cached, value); if (fast_switch) - wrmsrl(MSR_HWP_REQUEST, value); + wrmsrq(MSR_HWP_REQUEST, value); else - wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); + wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); } static void intel_cpufreq_perf_ctl_update(struct cpudata *cpu, u32 target_pstate, bool fast_switch) { if (fast_switch) - wrmsrl(MSR_IA32_PERF_CTL, + wrmsrq(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, target_pstate)); else - wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, + wrmsrq_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, target_pstate)); } @@ -3129,8 +3251,8 @@ static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy, int max_pstate = policy->strict_target ? target_pstate : cpu->max_perf_ratio; - intel_cpufreq_hwp_update(cpu, target_pstate, max_pstate, 0, - fast_switch); + intel_cpufreq_hwp_update(cpu, target_pstate, max_pstate, + target_pstate, fast_switch); } else if (target_pstate != old_pstate) { intel_cpufreq_perf_ctl_update(cpu, target_pstate, fast_switch); } @@ -3258,7 +3380,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) intel_pstate_get_hwp_cap(cpu); - rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value); + rdmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value); WRITE_ONCE(cpu->hwp_req_cached, value); cpu->epp_cached = intel_pstate_get_epp(cpu, value); @@ -3325,7 +3447,7 @@ static int intel_cpufreq_suspend(struct cpufreq_policy *policy) * written by it may not be suitable. */ value &= ~HWP_DESIRED_PERF(~0L); - wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); + wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); WRITE_ONCE(cpu->hwp_req_cached, value); } @@ -3575,7 +3697,7 @@ static bool __init intel_pstate_platform_pwr_mgmt_exists(void) id = x86_match_cpu(intel_pstate_cpu_oob_ids); if (id) { - rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr); + rdmsrq(MSR_MISC_PWR_MGMT, misc_pwr); if (misc_pwr & BITMASK_OOB) { pr_debug("Bit 8 or 18 in the MISC_PWR_MGMT MSR set\n"); pr_debug("P states are controlled in Out of Band mode by the firmware/hardware\n"); @@ -3631,7 +3753,7 @@ static bool intel_pstate_hwp_is_enabled(void) { u64 value; - rdmsrl(MSR_PM_ENABLE, value); + rdmsrq(MSR_PM_ENABLE, value); return !!(value & 0x1); } @@ -3688,6 +3810,15 @@ static int __init intel_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; + /* + * The Intel pstate driver will be ignored if the platform + * firmware has its own power management modes. + */ + if (intel_pstate_platform_pwr_mgmt_exists()) { + pr_info("P-states controlled by the platform\n"); + return -ENODEV; + } + id = x86_match_cpu(hwp_support_ids); if (id) { hwp_forced = intel_pstate_hwp_is_enabled(); @@ -3743,15 +3874,6 @@ static int __init intel_pstate_init(void) default_driver = &intel_cpufreq; hwp_cpu_matched: - /* - * The Intel pstate driver will be ignored if the platform - * firmware has its own power management modes. - */ - if (intel_pstate_platform_pwr_mgmt_exists()) { - pr_info("P-states controlled by the platform\n"); - return -ENODEV; - } - if (!hwp_active && hwp_only) return -ENOTSUPP; @@ -3835,6 +3957,9 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "no_hwp")) no_hwp = 1; + if (!strcmp(str, "no_cas")) + no_cas = true; + if (!strcmp(str, "force")) force_load = 1; if (!strcmp(str, "hwp_only")) diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index 312f2654d1d5..24b285cbeb8d 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -96,7 +96,6 @@ static struct cpufreq_driver kirkwood_cpufreq_driver = { .target_index = kirkwood_cpufreq_target, .init = kirkwood_cpufreq_cpu_init, .name = "kirkwood-cpufreq", - .attr = cpufreq_generic_attr, }; static int kirkwood_cpufreq_probe(struct platform_device *pdev) diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index bd6fe8638d39..ba0e08c8486a 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -136,7 +136,7 @@ static void do_longhaul1(unsigned int mults_index) { union msr_bcr2 bcr2; - rdmsrl(MSR_VIA_BCR2, bcr2.val); + rdmsrq(MSR_VIA_BCR2, bcr2.val); /* Enable software clock multiplier */ bcr2.bits.ESOFTBF = 1; bcr2.bits.CLOCKMUL = mults_index & 0xff; @@ -144,16 +144,16 @@ static void do_longhaul1(unsigned int mults_index) /* Sync to timer tick */ safe_halt(); /* Change frequency on next halt or sleep */ - wrmsrl(MSR_VIA_BCR2, bcr2.val); + wrmsrq(MSR_VIA_BCR2, bcr2.val); /* Invoke transition */ ACPI_FLUSH_CPU_CACHE(); halt(); /* Disable software clock multiplier */ local_irq_disable(); - rdmsrl(MSR_VIA_BCR2, bcr2.val); + rdmsrq(MSR_VIA_BCR2, bcr2.val); bcr2.bits.ESOFTBF = 0; - wrmsrl(MSR_VIA_BCR2, bcr2.val); + wrmsrq(MSR_VIA_BCR2, bcr2.val); } /* For processor with Longhaul MSR */ @@ -164,7 +164,7 @@ static void do_powersaver(int cx_address, unsigned int mults_index, union msr_longhaul longhaul; u32 t; - rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + rdmsrq(MSR_VIA_LONGHAUL, longhaul.val); /* Setup new frequency */ if (!revid_errata) longhaul.bits.RevisionKey = longhaul.bits.RevisionID; @@ -180,7 +180,7 @@ static void do_powersaver(int cx_address, unsigned int mults_index, /* Raise voltage if necessary */ if (can_scale_voltage && dir) { longhaul.bits.EnableSoftVID = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + wrmsrq(MSR_VIA_LONGHAUL, longhaul.val); /* Change voltage */ if (!cx_address) { ACPI_FLUSH_CPU_CACHE(); @@ -194,12 +194,12 @@ static void do_powersaver(int cx_address, unsigned int mults_index, t = inl(acpi_gbl_FADT.xpm_timer_block.address); } longhaul.bits.EnableSoftVID = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + wrmsrq(MSR_VIA_LONGHAUL, longhaul.val); } /* Change frequency on next halt or sleep */ longhaul.bits.EnableSoftBusRatio = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + wrmsrq(MSR_VIA_LONGHAUL, longhaul.val); if (!cx_address) { ACPI_FLUSH_CPU_CACHE(); halt(); @@ -212,12 +212,12 @@ static void do_powersaver(int cx_address, unsigned int mults_index, } /* Disable bus ratio bit */ longhaul.bits.EnableSoftBusRatio = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + wrmsrq(MSR_VIA_LONGHAUL, longhaul.val); /* Reduce voltage if necessary */ if (can_scale_voltage && !dir) { longhaul.bits.EnableSoftVID = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + wrmsrq(MSR_VIA_LONGHAUL, longhaul.val); /* Change voltage */ if (!cx_address) { ACPI_FLUSH_CPU_CACHE(); @@ -231,7 +231,7 @@ static void do_powersaver(int cx_address, unsigned int mults_index, t = inl(acpi_gbl_FADT.xpm_timer_block.address); } longhaul.bits.EnableSoftVID = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + wrmsrq(MSR_VIA_LONGHAUL, longhaul.val); } } @@ -534,7 +534,7 @@ static void longhaul_setup_voltagescaling(void) unsigned int j, speed, pos, kHz_step, numvscales; int min_vid_speed; - rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + rdmsrq(MSR_VIA_LONGHAUL, longhaul.val); if (!(longhaul.bits.RevisionID & 1)) { pr_info("Voltage scaling not supported by CPU\n"); return; @@ -906,7 +906,6 @@ static struct cpufreq_driver longhaul_driver = { .get = longhaul_get, .init = longhaul_cpu_init, .name = "longhaul", - .attr = cpufreq_generic_attr, }; static const struct x86_cpu_id longhaul_id[] = { diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c index ed1a6dbad638..39a6c4315a60 100644 --- a/drivers/cpufreq/loongson2_cpufreq.c +++ b/drivers/cpufreq/loongson2_cpufreq.c @@ -91,7 +91,6 @@ static struct cpufreq_driver loongson2_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = loongson2_cpufreq_target, .get = cpufreq_generic_get, - .attr = cpufreq_generic_attr, }; static const struct platform_device_id platform_device_ids[] = { diff --git a/drivers/cpufreq/loongson3_cpufreq.c b/drivers/cpufreq/loongson3_cpufreq.c index bd34bf0fafa5..1e8715ea1b77 100644 --- a/drivers/cpufreq/loongson3_cpufreq.c +++ b/drivers/cpufreq/loongson3_cpufreq.c @@ -299,15 +299,6 @@ static int loongson3_cpufreq_cpu_init(struct cpufreq_policy *policy) per_cpu(freq_data, i) = per_cpu(freq_data, cpu); } - if (policy_has_boost_freq(policy)) { - ret = cpufreq_enable_boost_support(); - if (ret < 0) { - pr_warn("cpufreq: Failed to enable boost: %d\n", ret); - return ret; - } - loongson3_cpufreq_driver.boost_enabled = true; - } - return 0; } @@ -337,8 +328,8 @@ static struct cpufreq_driver loongson3_cpufreq_driver = { .offline = loongson3_cpufreq_cpu_offline, .get = loongson3_cpufreq_get, .target_index = loongson3_cpufreq_target, - .attr = cpufreq_generic_attr, .verify = cpufreq_generic_frequency_table_verify, + .set_boost = cpufreq_boost_set_sw, .suspend = cpufreq_generic_suspend, }; diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 478257523cc3..74f1b4c796e4 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -293,7 +293,6 @@ static struct cpufreq_driver cpufreq_mtk_hw_driver = { .register_em = mtk_cpufreq_register_em, .fast_switch = mtk_cpufreq_hw_fast_switch, .name = "mtk-cpufreq-hw", - .attr = cpufreq_generic_attr, }; static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 2e4f9ca0af35..f3f02c4b6888 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -618,7 +618,6 @@ static struct cpufreq_driver mtk_cpufreq_driver = { .exit = mtk_cpufreq_exit, .register_em = cpufreq_register_em_with_opp, .name = "mtk-cpufreq", - .attr = cpufreq_generic_attr, }; static int mtk_cpufreq_probe(struct platform_device *pdev) diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 106220c0fd11..bbb01d93b54b 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -147,7 +147,6 @@ static struct cpufreq_driver omap_driver = { .exit = omap_cpu_exit, .register_em = cpufreq_register_em_with_opp, .name = "omap", - .attr = cpufreq_generic_attr, }; static int omap_cpufreq_probe(struct platform_device *pdev) diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c index ef0a3216a386..69c19233fcd4 100644 --- a/drivers/cpufreq/p4-clockmod.c +++ b/drivers/cpufreq/p4-clockmod.c @@ -227,7 +227,6 @@ static struct cpufreq_driver p4clockmod_driver = { .init = cpufreq_p4_cpu_init, .get = cpufreq_p4_get, .name = "p4-clockmod", - .attr = cpufreq_generic_attr, }; static const struct x86_cpu_id cpufreq_p4_id[] = { diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index 5fc9cb480516..a3931349360f 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -245,7 +245,6 @@ static struct cpufreq_driver pas_cpufreq_driver = { .exit = pas_cpufreq_cpu_exit, .verify = cpufreq_generic_frequency_table_verify, .target_index = pas_cpufreq_target, - .attr = cpufreq_generic_attr, }; /* diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 6c9f0888a2a7..a22c22bd693a 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -439,7 +439,6 @@ static struct cpufreq_driver pmac_cpufreq_driver = { .suspend = pmac_cpufreq_suspend, .resume = pmac_cpufreq_resume, .flags = CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, - .attr = cpufreq_generic_attr, .name = "powermac", }; diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c index 74ff6c47df29..80897ec8f00e 100644 --- a/drivers/cpufreq/pmac64-cpufreq.c +++ b/drivers/cpufreq/pmac64-cpufreq.c @@ -332,7 +332,6 @@ static struct cpufreq_driver g5_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = g5_cpufreq_target, .get = g5_cpufreq_get_speed, - .attr = cpufreq_generic_attr, }; diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c index f0a4a6c31204..99d2244e03b0 100644 --- a/drivers/cpufreq/powernow-k6.c +++ b/drivers/cpufreq/powernow-k6.c @@ -253,7 +253,6 @@ static struct cpufreq_driver powernow_k6_driver = { .exit = powernow_k6_cpu_exit, .get = powernow_k6_get, .name = "powernow-k6", - .attr = cpufreq_generic_attr, }; static const struct x86_cpu_id powernow_k6_ids[] = { diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index 4271446c8725..31039330a3ba 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -219,13 +219,13 @@ static void change_FID(int fid) { union msr_fidvidctl fidvidctl; - rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); + rdmsrq(MSR_K7_FID_VID_CTL, fidvidctl.val); if (fidvidctl.bits.FID != fid) { fidvidctl.bits.SGTC = latency; fidvidctl.bits.FID = fid; fidvidctl.bits.VIDC = 0; fidvidctl.bits.FIDC = 1; - wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); + wrmsrq(MSR_K7_FID_VID_CTL, fidvidctl.val); } } @@ -234,13 +234,13 @@ static void change_VID(int vid) { union msr_fidvidctl fidvidctl; - rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); + rdmsrq(MSR_K7_FID_VID_CTL, fidvidctl.val); if (fidvidctl.bits.VID != vid) { fidvidctl.bits.SGTC = latency; fidvidctl.bits.VID = vid; fidvidctl.bits.FIDC = 0; fidvidctl.bits.VIDC = 1; - wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); + wrmsrq(MSR_K7_FID_VID_CTL, fidvidctl.val); } } @@ -260,7 +260,7 @@ static int powernow_target(struct cpufreq_policy *policy, unsigned int index) fid = powernow_table[index].driver_data & 0xFF; vid = (powernow_table[index].driver_data & 0xFF00) >> 8; - rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); + rdmsrq(MSR_K7_FID_VID_STATUS, fidvidstatus.val); cfid = fidvidstatus.bits.CFID; freqs.old = fsb * fid_codes[cfid] / 10; @@ -557,7 +557,7 @@ static unsigned int powernow_get(unsigned int cpu) if (cpu) return 0; - rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); + rdmsrq(MSR_K7_FID_VID_STATUS, fidvidstatus.val); cfid = fidvidstatus.bits.CFID; return fsb * fid_codes[cfid] / 10; @@ -598,7 +598,7 @@ static int powernow_cpu_init(struct cpufreq_policy *policy) if (policy->cpu != 0) return -ENODEV; - rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); + rdmsrq(MSR_K7_FID_VID_STATUS, fidvidstatus.val); recalibrate_cpu_khz(); @@ -667,7 +667,6 @@ static struct cpufreq_driver powernow_driver = { .init = powernow_cpu_init, .exit = powernow_cpu_exit, .name = "powernow-k7", - .attr = cpufreq_generic_attr, }; static int __init powernow_init(void) diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index a01170f7d01c..f7512b4e923e 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -482,7 +482,7 @@ static void check_supported_cpu(void *_rc) cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { - pr_info("Power state transitions not supported\n"); + pr_info_once("Power state transitions not supported\n"); return; } *rc = 0; @@ -1143,7 +1143,6 @@ static struct cpufreq_driver cpufreq_amd64_driver = { .exit = powernowk8_cpu_exit, .get = powernowk8_get, .name = "powernow-k8", - .attr = cpufreq_generic_attr, }; static void __request_acpi_cpufreq(void) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index ae79d909943b..7d9a5f656de8 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -21,7 +21,6 @@ #include <linux/string_choices.h> #include <linux/cpu.h> #include <linux/hashtable.h> -#include <trace/events/power.h> #include <asm/cputhreads.h> #include <asm/firmware.h> @@ -30,6 +29,9 @@ #include <asm/opal.h> #include <linux/timer.h> +#define CREATE_TRACE_POINTS +#include "powernv-trace.h" + #define POWERNV_MAX_PSTATES_ORDER 8 #define POWERNV_MAX_PSTATES (1UL << (POWERNV_MAX_PSTATES_ORDER)) #define PMSR_PSAFE_ENABLE (1UL << 30) @@ -386,12 +388,8 @@ static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy, static struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = __ATTR_RO(cpuinfo_nominal_freq); -#define SCALING_BOOST_FREQS_ATTR_INDEX 2 - static struct freq_attr *powernv_cpu_freq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, &cpufreq_freq_attr_cpuinfo_nominal_freq, - &cpufreq_freq_attr_scaling_boost_freqs, NULL, }; @@ -671,7 +669,8 @@ static inline void queue_gpstate_timer(struct global_pstate_info *gpstates) */ static void gpstate_timer_handler(struct timer_list *t) { - struct global_pstate_info *gpstates = from_timer(gpstates, t, timer); + struct global_pstate_info *gpstates = timer_container_of(gpstates, t, + timer); struct cpufreq_policy *policy = gpstates->policy; int gpstate_idx, lpstate_idx; unsigned long val; @@ -806,7 +805,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, if (gpstate_idx != new_index) queue_gpstate_timer(gpstates); else - del_timer_sync(&gpstates->timer); + timer_delete_sync(&gpstates->timer); gpstates_done: freq_data.gpstate_id = idx_to_pstate(gpstate_idx); @@ -884,7 +883,7 @@ static void powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy) freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min); smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); if (gpstates) - del_timer_sync(&gpstates->timer); + timer_delete_sync(&gpstates->timer); kfree(policy->driver_data); } @@ -1128,9 +1127,7 @@ static int __init powernv_cpufreq_init(void) goto out; if (powernv_pstate_info.wof_enabled) - powernv_cpufreq_driver.boost_enabled = true; - else - powernv_cpu_freq_attr[SCALING_BOOST_FREQS_ATTR_INDEX] = NULL; + powernv_cpufreq_driver.set_boost = cpufreq_boost_set_sw; rc = cpufreq_register_driver(&powernv_cpufreq_driver); if (rc) { @@ -1138,9 +1135,6 @@ static int __init powernv_cpufreq_init(void) goto cleanup; } - if (powernv_pstate_info.wof_enabled) - cpufreq_enable_boost_support(); - register_reboot_notifier(&powernv_cpufreq_reboot_nb); opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); diff --git a/drivers/cpufreq/powernv-trace.h b/drivers/cpufreq/powernv-trace.h new file mode 100644 index 000000000000..8cadb7c9427b --- /dev/null +++ b/drivers/cpufreq/powernv-trace.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#if !defined(_POWERNV_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _POWERNV_TRACE_H + +#include <linux/cpufreq.h> +#include <linux/tracepoint.h> +#include <linux/trace_events.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power + +TRACE_EVENT(powernv_throttle, + + TP_PROTO(int chip_id, const char *reason, int pmax), + + TP_ARGS(chip_id, reason, pmax), + + TP_STRUCT__entry( + __field(int, chip_id) + __string(reason, reason) + __field(int, pmax) + ), + + TP_fast_assign( + __entry->chip_id = chip_id; + __assign_str(reason); + __entry->pmax = pmax; + ), + + TP_printk("Chip %d Pmax %d %s", __entry->chip_id, + __entry->pmax, __get_str(reason)) +); + +#endif /* _POWERNV_TRACE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE powernv-trace + +#include <trace/define_trace.h> diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c deleted file mode 100644 index 98595b3ea13f..000000000000 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ /dev/null @@ -1,173 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * cpufreq driver for the cell processor - * - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 - * - * Author: Christian Krafft <krafft@de.ibm.com> - */ - -#include <linux/cpufreq.h> -#include <linux/module.h> -#include <linux/of.h> - -#include <asm/machdep.h> -#include <asm/cell-regs.h> - -#include "ppc_cbe_cpufreq.h" - -/* the CBE supports an 8 step frequency scaling */ -static struct cpufreq_frequency_table cbe_freqs[] = { - {0, 1, 0}, - {0, 2, 0}, - {0, 3, 0}, - {0, 4, 0}, - {0, 5, 0}, - {0, 6, 0}, - {0, 8, 0}, - {0, 10, 0}, - {0, 0, CPUFREQ_TABLE_END}, -}; - -/* - * hardware specific functions - */ - -static int set_pmode(unsigned int cpu, unsigned int slow_mode) -{ - int rc; - - if (cbe_cpufreq_has_pmi) - rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode); - else - rc = cbe_cpufreq_set_pmode(cpu, slow_mode); - - pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu)); - - return rc; -} - -/* - * cpufreq functions - */ - -static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - struct cpufreq_frequency_table *pos; - const u32 *max_freqp; - u32 max_freq; - int cur_pmode; - struct device_node *cpu; - - cpu = of_get_cpu_node(policy->cpu, NULL); - - if (!cpu) - return -ENODEV; - - pr_debug("init cpufreq on CPU %d\n", policy->cpu); - - /* - * Let's check we can actually get to the CELL regs - */ - if (!cbe_get_cpu_pmd_regs(policy->cpu) || - !cbe_get_cpu_mic_tm_regs(policy->cpu)) { - pr_info("invalid CBE regs pointers for cpufreq\n"); - of_node_put(cpu); - return -EINVAL; - } - - max_freqp = of_get_property(cpu, "clock-frequency", NULL); - - of_node_put(cpu); - - if (!max_freqp) - return -EINVAL; - - /* we need the freq in kHz */ - max_freq = *max_freqp / 1000; - - pr_debug("max clock-frequency is at %u kHz\n", max_freq); - pr_debug("initializing frequency table\n"); - - /* initialize frequency table */ - cpufreq_for_each_entry(pos, cbe_freqs) { - pos->frequency = max_freq / pos->driver_data; - pr_debug("%d: %d\n", (int)(pos - cbe_freqs), pos->frequency); - } - - /* if DEBUG is enabled set_pmode() measures the latency - * of a transition */ - policy->cpuinfo.transition_latency = 25000; - - cur_pmode = cbe_cpufreq_get_pmode(policy->cpu); - pr_debug("current pmode is at %d\n",cur_pmode); - - policy->cur = cbe_freqs[cur_pmode].frequency; - -#ifdef CONFIG_SMP - cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); -#endif - - policy->freq_table = cbe_freqs; - cbe_cpufreq_pmi_policy_init(policy); - return 0; -} - -static void cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy) -{ - cbe_cpufreq_pmi_policy_exit(policy); -} - -static int cbe_cpufreq_target(struct cpufreq_policy *policy, - unsigned int cbe_pmode_new) -{ - pr_debug("setting frequency for cpu %d to %d kHz, " \ - "1/%d of max frequency\n", - policy->cpu, - cbe_freqs[cbe_pmode_new].frequency, - cbe_freqs[cbe_pmode_new].driver_data); - - return set_pmode(policy->cpu, cbe_pmode_new); -} - -static struct cpufreq_driver cbe_cpufreq_driver = { - .verify = cpufreq_generic_frequency_table_verify, - .target_index = cbe_cpufreq_target, - .init = cbe_cpufreq_cpu_init, - .exit = cbe_cpufreq_cpu_exit, - .name = "cbe-cpufreq", - .flags = CPUFREQ_CONST_LOOPS, -}; - -/* - * module init and destoy - */ - -static int __init cbe_cpufreq_init(void) -{ - int ret; - - if (!machine_is(cell)) - return -ENODEV; - - cbe_cpufreq_pmi_init(); - - ret = cpufreq_register_driver(&cbe_cpufreq_driver); - if (ret) - cbe_cpufreq_pmi_exit(); - - return ret; -} - -static void __exit cbe_cpufreq_exit(void) -{ - cpufreq_unregister_driver(&cbe_cpufreq_driver); - cbe_cpufreq_pmi_exit(); -} - -module_init(cbe_cpufreq_init); -module_exit(cbe_cpufreq_exit); - -MODULE_DESCRIPTION("cpufreq driver for Cell BE processors"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.h b/drivers/cpufreq/ppc_cbe_cpufreq.h deleted file mode 100644 index 00cd8633b0d9..000000000000 --- a/drivers/cpufreq/ppc_cbe_cpufreq.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ppc_cbe_cpufreq.h - * - * This file contains the definitions used by the cbe_cpufreq driver. - * - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 - * - * Author: Christian Krafft <krafft@de.ibm.com> - * - */ - -#include <linux/cpufreq.h> -#include <linux/types.h> - -int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode); -int cbe_cpufreq_get_pmode(int cpu); - -int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode); - -#if IS_ENABLED(CONFIG_CPU_FREQ_CBE_PMI) -extern bool cbe_cpufreq_has_pmi; -void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy); -void cbe_cpufreq_pmi_policy_exit(struct cpufreq_policy *policy); -void cbe_cpufreq_pmi_init(void); -void cbe_cpufreq_pmi_exit(void); -#else -#define cbe_cpufreq_has_pmi (0) -static inline void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy) {} -static inline void cbe_cpufreq_pmi_policy_exit(struct cpufreq_policy *policy) {} -static inline void cbe_cpufreq_pmi_init(void) {} -static inline void cbe_cpufreq_pmi_exit(void) {} -#endif diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c b/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c deleted file mode 100644 index 04830cd95333..000000000000 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * pervasive backend for the cbe_cpufreq driver - * - * This driver makes use of the pervasive unit to - * engage the desired frequency. - * - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 - * - * Author: Christian Krafft <krafft@de.ibm.com> - */ - -#include <linux/io.h> -#include <linux/kernel.h> -#include <linux/time.h> -#include <asm/machdep.h> -#include <asm/hw_irq.h> -#include <asm/cell-regs.h> - -#include "ppc_cbe_cpufreq.h" - -/* to write to MIC register */ -static u64 MIC_Slow_Fast_Timer_table[] = { - [0 ... 7] = 0x007fc00000000000ull, -}; - -/* more values for the MIC */ -static u64 MIC_Slow_Next_Timer_table[] = { - 0x0000240000000000ull, - 0x0000268000000000ull, - 0x000029C000000000ull, - 0x00002D0000000000ull, - 0x0000300000000000ull, - 0x0000334000000000ull, - 0x000039C000000000ull, - 0x00003FC000000000ull, -}; - - -int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode) -{ - struct cbe_pmd_regs __iomem *pmd_regs; - struct cbe_mic_tm_regs __iomem *mic_tm_regs; - unsigned long flags; - u64 value; -#ifdef DEBUG - long time; -#endif - - local_irq_save(flags); - - mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); - pmd_regs = cbe_get_cpu_pmd_regs(cpu); - -#ifdef DEBUG - time = jiffies; -#endif - - out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); - out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); - - out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); - out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); - - value = in_be64(&pmd_regs->pmcr); - /* set bits to zero */ - value &= 0xFFFFFFFFFFFFFFF8ull; - /* set bits to next pmode */ - value |= pmode; - - out_be64(&pmd_regs->pmcr, value); - -#ifdef DEBUG - /* wait until new pmode appears in status register */ - value = in_be64(&pmd_regs->pmsr) & 0x07; - while (value != pmode) { - cpu_relax(); - value = in_be64(&pmd_regs->pmsr) & 0x07; - } - - time = jiffies - time; - time = jiffies_to_msecs(time); - pr_debug("had to wait %lu ms for a transition using " \ - "pervasive unit\n", time); -#endif - local_irq_restore(flags); - - return 0; -} - - -int cbe_cpufreq_get_pmode(int cpu) -{ - int ret; - struct cbe_pmd_regs __iomem *pmd_regs; - - pmd_regs = cbe_get_cpu_pmd_regs(cpu); - ret = in_be64(&pmd_regs->pmsr) & 0x07; - - return ret; -} - diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c deleted file mode 100644 index 6f0c32592416..000000000000 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ /dev/null @@ -1,150 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * pmi backend for the cbe_cpufreq driver - * - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 - * - * Author: Christian Krafft <krafft@de.ibm.com> - */ - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/timer.h> -#include <linux/init.h> -#include <linux/pm_qos.h> -#include <linux/slab.h> - -#include <asm/processor.h> -#include <asm/pmi.h> -#include <asm/cell-regs.h> - -#ifdef DEBUG -#include <asm/time.h> -#endif - -#include "ppc_cbe_cpufreq.h" - -bool cbe_cpufreq_has_pmi = false; -EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi); - -/* - * hardware specific functions - */ - -int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode) -{ - int ret; - pmi_message_t pmi_msg; -#ifdef DEBUG - long time; -#endif - pmi_msg.type = PMI_TYPE_FREQ_CHANGE; - pmi_msg.data1 = cbe_cpu_to_node(cpu); - pmi_msg.data2 = pmode; - -#ifdef DEBUG - time = jiffies; -#endif - pmi_send_message(pmi_msg); - -#ifdef DEBUG - time = jiffies - time; - time = jiffies_to_msecs(time); - pr_debug("had to wait %lu ms for a transition using " \ - "PMI\n", time); -#endif - ret = pmi_msg.data2; - pr_debug("PMI returned slow mode %d\n", ret); - - return ret; -} -EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi); - - -static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg) -{ - struct cpufreq_policy *policy; - struct freq_qos_request *req; - u8 node, slow_mode; - int cpu, ret; - - BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); - - node = pmi_msg.data1; - slow_mode = pmi_msg.data2; - - cpu = cbe_node_to_cpu(node); - - pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode); - - policy = cpufreq_cpu_get(cpu); - if (!policy) { - pr_warn("cpufreq policy not found cpu%d\n", cpu); - return; - } - - req = policy->driver_data; - - ret = freq_qos_update_request(req, - policy->freq_table[slow_mode].frequency); - if (ret < 0) - pr_warn("Failed to update freq constraint: %d\n", ret); - else - pr_debug("limiting node %d to slow mode %d\n", node, slow_mode); - - cpufreq_cpu_put(policy); -} - -static struct pmi_handler cbe_pmi_handler = { - .type = PMI_TYPE_FREQ_CHANGE, - .handle_pmi_message = cbe_cpufreq_handle_pmi, -}; - -void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy) -{ - struct freq_qos_request *req; - int ret; - - if (!cbe_cpufreq_has_pmi) - return; - - req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) - return; - - ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MAX, - policy->freq_table[0].frequency); - if (ret < 0) { - pr_err("Failed to add freq constraint (%d)\n", ret); - kfree(req); - return; - } - - policy->driver_data = req; -} -EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_policy_init); - -void cbe_cpufreq_pmi_policy_exit(struct cpufreq_policy *policy) -{ - struct freq_qos_request *req = policy->driver_data; - - if (cbe_cpufreq_has_pmi) { - freq_qos_remove_request(req); - kfree(req); - } -} -EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_policy_exit); - -void cbe_cpufreq_pmi_init(void) -{ - if (!pmi_register_handler(&cbe_pmi_handler)) - cbe_cpufreq_has_pmi = true; -} -EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_init); - -void cbe_cpufreq_pmi_exit(void) -{ - pmi_unregister_handler(&cbe_pmi_handler); - cbe_cpufreq_has_pmi = false; -} -EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_exit); diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index dce7cad1813f..8422704a3b10 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -566,12 +566,6 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) return -ENODEV; } - if (policy_has_boost_freq(policy)) { - ret = cpufreq_enable_boost_support(); - if (ret) - dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); - } - return qcom_cpufreq_hw_lmh_init(policy, index); } @@ -595,12 +589,6 @@ static void qcom_cpufreq_ready(struct cpufreq_policy *policy) enable_irq(data->throttle_irq); } -static struct freq_attr *qcom_cpufreq_hw_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - &cpufreq_freq_attr_scaling_boost_freqs, - NULL -}; - static struct cpufreq_driver cpufreq_qcom_hw_driver = { .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_HAVE_GOVERNOR_PER_POLICY | @@ -615,8 +603,8 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = { .register_em = cpufreq_register_em_with_opp, .fast_switch = qcom_cpufreq_hw_fast_switch, .name = "qcom-cpufreq-hw", - .attr = qcom_cpufreq_hw_attr, .ready = qcom_cpufreq_ready, + .set_boost = cpufreq_boost_set_sw, }; static unsigned long qcom_cpufreq_hw_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index a37ce051236c..8d1f5ac59132 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -254,7 +254,6 @@ static struct cpufreq_driver qoriq_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = qoriq_cpufreq_target, .get = cpufreq_generic_get, - .attr = cpufreq_generic_attr, }; static const struct of_device_id qoriq_cpufreq_blacklist[] = { diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs new file mode 100644 index 000000000000..43c87d0259b6 --- /dev/null +++ b/drivers/cpufreq/rcpufreq_dt.rs @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust based implementation of the cpufreq-dt driver. + +use kernel::{ + c_str, + clk::Clk, + cpu, cpufreq, + cpumask::CpumaskVar, + device::{Core, Device}, + error::code::*, + fmt, + macros::vtable, + module_platform_driver, of, opp, platform, + prelude::*, + str::CString, + sync::Arc, +}; + +/// Finds exact supply name from the OF node. +fn find_supply_name_exact(dev: &Device, name: &str) -> Option<CString> { + let prop_name = CString::try_from_fmt(fmt!("{}-supply", name)).ok()?; + dev.property_present(&prop_name) + .then(|| CString::try_from_fmt(fmt!("{name}")).ok()) + .flatten() +} + +/// Finds supply name for the CPU from DT. +fn find_supply_names(dev: &Device, cpu: cpu::CpuId) -> Option<KVec<CString>> { + // Try "cpu0" for older DTs, fallback to "cpu". + let name = (cpu.as_u32() == 0) + .then(|| find_supply_name_exact(dev, "cpu0")) + .flatten() + .or_else(|| find_supply_name_exact(dev, "cpu"))?; + + let mut list = KVec::with_capacity(1, GFP_KERNEL).ok()?; + list.push(name, GFP_KERNEL).ok()?; + + Some(list) +} + +/// Represents the cpufreq dt device. +struct CPUFreqDTDevice { + opp_table: opp::Table, + freq_table: opp::FreqTable, + _mask: CpumaskVar, + _token: Option<opp::ConfigToken>, + _clk: Clk, +} + +#[derive(Default)] +struct CPUFreqDTDriver; + +#[vtable] +impl opp::ConfigOps for CPUFreqDTDriver {} + +#[vtable] +impl cpufreq::Driver for CPUFreqDTDriver { + const NAME: &'static CStr = c_str!("cpufreq-dt"); + const FLAGS: u16 = cpufreq::flags::NEED_INITIAL_FREQ_CHECK | cpufreq::flags::IS_COOLING_DEV; + const BOOST_ENABLED: bool = true; + + type PData = Arc<CPUFreqDTDevice>; + + fn init(policy: &mut cpufreq::Policy) -> Result<Self::PData> { + let cpu = policy.cpu(); + // SAFETY: The CPU device is only used during init; it won't get hot-unplugged. The cpufreq + // core registers with CPU notifiers and the cpufreq core/driver won't use the CPU device, + // once the CPU is hot-unplugged. + let dev = unsafe { cpu::from_cpu(cpu)? }; + let mut mask = CpumaskVar::new_zero(GFP_KERNEL)?; + + mask.set(cpu); + + let token = find_supply_names(dev, cpu) + .map(|names| { + opp::Config::<Self>::new() + .set_regulator_names(names)? + .set(dev) + }) + .transpose()?; + + // Get OPP-sharing information from "operating-points-v2" bindings. + let fallback = match opp::Table::of_sharing_cpus(dev, &mut mask) { + Ok(()) => false, + Err(e) if e == ENOENT => { + // "operating-points-v2" not supported. If the platform hasn't + // set sharing CPUs, fallback to all CPUs share the `Policy` + // for backward compatibility. + opp::Table::sharing_cpus(dev, &mut mask).is_err() + } + Err(e) => return Err(e), + }; + + // Initialize OPP tables for all policy cpus. + // + // For platforms not using "operating-points-v2" bindings, we do this + // before updating policy cpus. Otherwise, we will end up creating + // duplicate OPPs for the CPUs. + // + // OPPs might be populated at runtime, don't fail for error here unless + // it is -EPROBE_DEFER. + let mut opp_table = match opp::Table::from_of_cpumask(dev, &mut mask) { + Ok(table) => table, + Err(e) => { + if e == EPROBE_DEFER { + return Err(e); + } + + // The table is added dynamically ? + opp::Table::from_dev(dev)? + } + }; + + // The OPP table must be initialized, statically or dynamically, by this point. + opp_table.opp_count()?; + + // Set sharing cpus for fallback scenario. + if fallback { + mask.setall(); + opp_table.set_sharing_cpus(&mut mask)?; + } + + let mut transition_latency = opp_table.max_transition_latency_ns() as u32; + if transition_latency == 0 { + transition_latency = cpufreq::ETERNAL_LATENCY_NS; + } + + policy + .set_dvfs_possible_from_any_cpu(true) + .set_suspend_freq(opp_table.suspend_freq()) + .set_transition_latency_ns(transition_latency); + + let freq_table = opp_table.cpufreq_table()?; + // SAFETY: The `freq_table` is not dropped while it is getting used by the C code. + unsafe { policy.set_freq_table(&freq_table) }; + + // SAFETY: The returned `clk` is not dropped while it is getting used by the C code. + let clk = unsafe { policy.set_clk(dev, None)? }; + + mask.copy(policy.cpus()); + + Ok(Arc::new( + CPUFreqDTDevice { + opp_table, + freq_table, + _mask: mask, + _token: token, + _clk: clk, + }, + GFP_KERNEL, + )?) + } + + fn exit(_policy: &mut cpufreq::Policy, _data: Option<Self::PData>) -> Result { + Ok(()) + } + + fn online(_policy: &mut cpufreq::Policy) -> Result { + // We did light-weight tear down earlier, nothing to do here. + Ok(()) + } + + fn offline(_policy: &mut cpufreq::Policy) -> Result { + // Preserve policy->data and don't free resources on light-weight + // tear down. + Ok(()) + } + + fn suspend(policy: &mut cpufreq::Policy) -> Result { + policy.generic_suspend() + } + + fn verify(data: &mut cpufreq::PolicyData) -> Result { + data.generic_verify() + } + + fn target_index(policy: &mut cpufreq::Policy, index: cpufreq::TableIndex) -> Result { + let Some(data) = policy.data::<Self::PData>() else { + return Err(ENOENT); + }; + + let freq = data.freq_table.freq(index)?; + data.opp_table.set_rate(freq) + } + + fn get(policy: &mut cpufreq::Policy) -> Result<u32> { + policy.generic_get() + } + + fn set_boost(_policy: &mut cpufreq::Policy, _state: i32) -> Result { + Ok(()) + } + + fn register_em(policy: &mut cpufreq::Policy) { + policy.register_em_opp() + } +} + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + <CPUFreqDTDriver as platform::Driver>::IdInfo, + [(of::DeviceId::new(c_str!("operating-points-v2")), ())] +); + +impl platform::Driver for CPUFreqDTDriver { + type IdInfo = (); + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + + fn probe( + pdev: &platform::Device<Core>, + _id_info: Option<&Self::IdInfo>, + ) -> Result<Pin<KBox<Self>>> { + cpufreq::Registration::<CPUFreqDTDriver>::new_foreign_owned(pdev.as_ref())?; + Ok(KBox::new(Self {}, GFP_KERNEL)?.into()) + } +} + +module_platform_driver! { + type: CPUFreqDTDriver, + name: "cpufreq-dt", + author: "Viresh Kumar <viresh.kumar@linaro.org>", + description: "Generic CPUFreq DT driver", + license: "GPL v2", +} diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c index 330c8d6cf93c..b360f03a116f 100644 --- a/drivers/cpufreq/sc520_freq.c +++ b/drivers/cpufreq/sc520_freq.c @@ -21,7 +21,6 @@ #include <linux/io.h> #include <asm/cpu_device_id.h> -#include <asm/msr.h> #define MMCR_BASE 0xfffef000 /* The default base address */ #define OFFS_CPUCTL 0x2 /* CPU Control Register */ @@ -92,7 +91,6 @@ static struct cpufreq_driver sc520_freq_driver = { .target_index = sc520_freq_target, .init = sc520_freq_cpu_init, .name = "sc520_freq", - .attr = cpufreq_generic_attr, }; static const struct x86_cpu_id sc520_ids[] = { diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 914bf2c940a0..ef078426bfd5 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -37,11 +37,17 @@ static struct cpufreq_driver scmi_cpufreq_driver; static unsigned int scmi_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct scmi_data *priv = policy->driver_data; + struct cpufreq_policy *policy; + struct scmi_data *priv; unsigned long rate; int ret; + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + ret = perf_ops->freq_get(ph, priv->domain_id, &rate, false); if (ret) return 0; @@ -171,12 +177,6 @@ scmi_get_rate_limit(u32 domain, bool has_fast_switch) return rate_limit; } -static struct freq_attr *scmi_cpufreq_hw_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, - NULL, -}; - static int scmi_limit_notify_cb(struct notifier_block *nb, unsigned long event, void *data) { struct scmi_data *priv = container_of(nb, struct scmi_data, limit_notify_nb); @@ -303,17 +303,6 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->transition_delay_us = scmi_get_rate_limit(domain, policy->fast_switch_possible); - if (policy_has_boost_freq(policy)) { - ret = cpufreq_enable_boost_support(); - if (ret) { - dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); - goto out_free_table; - } else { - scmi_cpufreq_hw_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; - scmi_cpufreq_driver.boost_enabled = true; - } - } - ret = freq_qos_add_request(&policy->constraints, &priv->limits_freq_req, FREQ_QOS_MAX, FREQ_QOS_MAX_DEFAULT_VALUE); if (ret < 0) { @@ -395,15 +384,49 @@ static struct cpufreq_driver scmi_cpufreq_driver = { CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, - .attr = scmi_cpufreq_hw_attr, .target_index = scmi_cpufreq_set_target, .fast_switch = scmi_cpufreq_fast_switch, .get = scmi_cpufreq_get_rate, .init = scmi_cpufreq_init, .exit = scmi_cpufreq_exit, .register_em = scmi_cpufreq_register_em, + .set_boost = cpufreq_boost_set_sw, }; +static bool scmi_dev_used_by_cpus(struct device *scmi_dev) +{ + struct device_node *scmi_np = dev_of_node(scmi_dev); + struct device_node *cpu_np, *np; + struct device *cpu_dev; + int cpu, idx; + + if (!scmi_np) + return false; + + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + continue; + + cpu_np = dev_of_node(cpu_dev); + + np = of_parse_phandle(cpu_np, "clocks", 0); + of_node_put(np); + + if (np == scmi_np) + return true; + + idx = of_property_match_string(cpu_np, "power-domain-names", "perf"); + np = of_parse_phandle(cpu_np, "power-domains", idx); + of_node_put(np); + + if (np == scmi_np) + return true; + } + + return false; +} + static int scmi_cpufreq_probe(struct scmi_device *sdev) { int ret; @@ -412,7 +435,7 @@ static int scmi_cpufreq_probe(struct scmi_device *sdev) handle = sdev->handle; - if (!handle) + if (!handle || !scmi_dev_used_by_cpus(dev)) return -ENODEV; scmi_cpufreq_driver.driver_data = sdev; diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 1f97b949763f..dcbb0ae7dd47 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -29,9 +29,16 @@ static struct scpi_ops *scpi_ops; static unsigned int scpi_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct scpi_data *priv = policy->driver_data; - unsigned long rate = clk_get_rate(priv->clk); + struct cpufreq_policy *policy; + struct scpi_data *priv; + unsigned long rate; + + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + rate = clk_get_rate(priv->clk); return rate / 1000; } @@ -184,7 +191,6 @@ static struct cpufreq_driver scpi_cpufreq_driver = { CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, - .attr = cpufreq_generic_attr, .get = scpi_cpufreq_get_rate, .init = scpi_cpufreq_init, .exit = scpi_cpufreq_exit, diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c index aa74036d0420..9c0b01e00508 100644 --- a/drivers/cpufreq/sh-cpufreq.c +++ b/drivers/cpufreq/sh-cpufreq.c @@ -151,7 +151,6 @@ static struct cpufreq_driver sh_cpufreq_driver = { .verify = sh_cpufreq_verify, .init = sh_cpufreq_cpu_init, .exit = sh_cpufreq_cpu_exit, - .attr = cpufreq_generic_attr, }; static int __init sh_cpufreq_module_init(void) diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index d8ab5b01d46d..707c71090cc3 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -165,7 +165,6 @@ static struct cpufreq_driver spear_cpufreq_driver = { .target_index = spear_cpufreq_target, .get = cpufreq_generic_get, .init = spear_cpufreq_init, - .attr = cpufreq_generic_attr, }; static int spear_cpufreq_probe(struct platform_device *pdev) diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c index 3fafedb983b5..3e6e85a92212 100644 --- a/drivers/cpufreq/speedstep-centrino.c +++ b/drivers/cpufreq/speedstep-centrino.c @@ -507,7 +507,6 @@ static struct cpufreq_driver centrino_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = centrino_target, .get = get_cur_freq, - .attr = cpufreq_generic_attr, }; /* diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c index f2076d72bf39..262cfbde9ca7 100644 --- a/drivers/cpufreq/speedstep-ich.c +++ b/drivers/cpufreq/speedstep-ich.c @@ -315,7 +315,6 @@ static struct cpufreq_driver speedstep_driver = { .target_index = speedstep_target, .init = speedstep_cpu_init, .get = speedstep_get, - .attr = cpufreq_generic_attr, }; static const struct x86_cpu_id ss_smi_ids[] = { diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c index 0ce9d4b6dfcc..39265884c3f1 100644 --- a/drivers/cpufreq/speedstep-smi.c +++ b/drivers/cpufreq/speedstep-smi.c @@ -295,7 +295,6 @@ static struct cpufreq_driver speedstep_driver = { .init = speedstep_cpu_init, .get = speedstep_get, .resume = speedstep_resume, - .attr = cpufreq_generic_attr, }; static const struct x86_cpu_id ss_smi_ids[] = { diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 47d6840b3489..744312a44279 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -194,7 +194,9 @@ static int sun50i_cpufreq_get_efuse(void) struct nvmem_cell *speedbin_nvmem; const struct of_device_id *match; struct device *cpu_dev; - u32 *speedbin; + void *speedbin_ptr; + u32 speedbin = 0; + size_t len; int ret; cpu_dev = get_cpu_device(0); @@ -217,14 +219,18 @@ static int sun50i_cpufreq_get_efuse(void) return dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), "Could not get nvmem cell\n"); - speedbin = nvmem_cell_read(speedbin_nvmem, NULL); + speedbin_ptr = nvmem_cell_read(speedbin_nvmem, &len); nvmem_cell_put(speedbin_nvmem); - if (IS_ERR(speedbin)) - return PTR_ERR(speedbin); + if (IS_ERR(speedbin_ptr)) + return PTR_ERR(speedbin_ptr); - ret = opp_data->efuse_xlate(*speedbin); + if (len <= 4) + memcpy(&speedbin, speedbin_ptr, len); + speedbin = le32_to_cpu(speedbin); - kfree(speedbin); + ret = opp_data->efuse_xlate(speedbin); + + kfree(speedbin_ptr); return ret; }; diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index c7761eb99f3c..cbabb726c664 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -73,11 +73,18 @@ static int tegra186_cpufreq_init(struct cpufreq_policy *policy) { struct tegra186_cpufreq_data *data = cpufreq_get_driver_data(); unsigned int cluster = data->cpus[policy->cpu].bpmp_cluster_id; + u32 cpu; policy->freq_table = data->clusters[cluster].table; policy->cpuinfo.transition_latency = 300 * 1000; policy->driver_data = NULL; + /* set same policy for all cpus in a cluster */ + for (cpu = 0; cpu < ARRAY_SIZE(tegra186_cpus); cpu++) { + if (data->cpus[cpu].bpmp_cluster_id == cluster) + cpumask_set_cpu(cpu, policy->cpus); + } + return 0; } @@ -123,7 +130,6 @@ static struct cpufreq_driver tegra186_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = tegra186_cpufreq_set_target, .init = tegra186_cpufreq_init, - .attr = cpufreq_generic_attr, }; static struct cpufreq_frequency_table *init_vhint_table( diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index 9055dd398e7f..9b4f516f313e 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -589,7 +589,6 @@ static struct cpufreq_driver tegra194_cpufreq_driver = { .exit = tegra194_cpufreq_exit, .online = tegra194_cpufreq_online, .offline = tegra194_cpufreq_offline, - .attr = cpufreq_generic_attr, }; static struct tegra_cpufreq_ops tegra194_cpufreq_ops = { diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index 0f86cdb7ec8a..65fea47b82e6 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -471,7 +471,6 @@ static struct cpufreq_driver ve_spc_cpufreq_driver = { .init = ve_spc_cpufreq_init, .exit = ve_spc_cpufreq_exit, .register_em = cpufreq_register_em_with_opp, - .attr = cpufreq_generic_attr, }; #ifdef CONFIG_BL_SWITCHER diff --git a/drivers/cpufreq/virtual-cpufreq.c b/drivers/cpufreq/virtual-cpufreq.c index 272dc3c85106..7dd1b0c263c7 100644 --- a/drivers/cpufreq/virtual-cpufreq.c +++ b/drivers/cpufreq/virtual-cpufreq.c @@ -265,7 +265,6 @@ static struct cpufreq_driver cpufreq_virt_driver = { .verify = virt_cpufreq_verify_policy, .target = virt_cpufreq_target, .fast_switch = virt_cpufreq_fast_switch, - .attr = cpufreq_generic_attr, }; static int virt_cpufreq_driver_probe(struct platform_device *pdev) |