diff options
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r-- | drivers/cpufreq/Kconfig.arm | 17 | ||||
-rw-r--r-- | drivers/cpufreq/Kconfig.x86 | 1 | ||||
-rw-r--r-- | drivers/cpufreq/Makefile | 3 | ||||
-rw-r--r-- | drivers/cpufreq/arm_big_little.h | 2 | ||||
-rw-r--r-- | drivers/cpufreq/arm_big_little_dt.c | 4 | ||||
-rw-r--r-- | drivers/cpufreq/cppc_cpufreq.c | 176 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq-dt.c | 10 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 112 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 31 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 32 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.h | 1 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 10 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_opp.c | 114 | ||||
-rw-r--r-- | drivers/cpufreq/exynos5440-cpufreq.c | 6 | ||||
-rw-r--r-- | drivers/cpufreq/imx6q-cpufreq.c | 56 | ||||
-rw-r--r-- | drivers/cpufreq/integrator-cpufreq.c | 2 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 390 | ||||
-rw-r--r-- | drivers/cpufreq/mt8173-cpufreq.c | 6 | ||||
-rw-r--r-- | drivers/cpufreq/powernv-cpufreq.c | 10 | ||||
-rw-r--r-- | drivers/cpufreq/tegra20-cpufreq.c | 2 |
20 files changed, 650 insertions, 335 deletions
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index cd0391e46c6d..642fd49793b0 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -227,3 +227,20 @@ config ARM_PXA2xx_CPUFREQ This add the CPUFreq driver support for Intel PXA2xx SOCs. If in doubt, say N. + +config ACPI_CPPC_CPUFREQ + tristate "CPUFreq driver based on the ACPI CPPC spec" + depends on ACPI + select ACPI_CPPC_LIB + default n + help + This adds a CPUFreq driver which uses CPPC methods + as described in the ACPIv5.1 spec. CPPC stands for + Collaborative Processor Performance Controls. It + is based on an abstract continuous scale of CPU + performance values which allows the remote power + processor to flexibly optimize for power and + performance. CPPC relies on power management firmware + support for its operation. + + If in doubt, say N. diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index c59bdcb83217..adbd1de1cea5 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -5,6 +5,7 @@ config X86_INTEL_PSTATE bool "Intel P state control" depends on X86 + select ACPI_PROCESSOR if ACPI help This driver provides a P state for Intel core processors. The driver implements an internal governor and will become diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 41340384f11f..d11309c487d0 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -1,6 +1,5 @@ # CPUfreq core obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o -obj-$(CONFIG_PM_OPP) += cpufreq_opp.o # CPUfreq stats obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o @@ -76,6 +75,8 @@ obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o obj-$(CONFIG_ARM_TEGRA20_CPUFREQ) += tegra20-cpufreq.o obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o +obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o + ################################################################################## # PowerPC platform drivers diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h index a211f7db9d32..b88889d9387e 100644 --- a/drivers/cpufreq/arm_big_little.h +++ b/drivers/cpufreq/arm_big_little.h @@ -28,7 +28,7 @@ struct cpufreq_arm_bL_ops { /* * This must set opp table for cpu_dev in a similar way as done by - * of_init_opp_table(). + * dev_pm_opp_of_add_table(). */ int (*init_opp_table)(struct device *cpu_dev); diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c index 36d91dba2965..16ddeefe9443 100644 --- a/drivers/cpufreq/arm_big_little_dt.c +++ b/drivers/cpufreq/arm_big_little_dt.c @@ -54,7 +54,7 @@ static int dt_init_opp_table(struct device *cpu_dev) return -ENOENT; } - ret = of_init_opp_table(cpu_dev); + ret = dev_pm_opp_of_add_table(cpu_dev); of_node_put(np); return ret; @@ -82,7 +82,7 @@ static struct cpufreq_arm_bL_ops dt_bL_ops = { .name = "dt-bl", .get_transition_latency = dt_get_transition_latency, .init_opp_table = dt_init_opp_table, - .free_opp_table = of_free_opp_table, + .free_opp_table = dev_pm_opp_of_remove_table, }; static int generic_bL_probe(struct platform_device *pdev) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c new file mode 100644 index 000000000000..93c219fab850 --- /dev/null +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -0,0 +1,176 @@ +/* + * CPPC (Collaborative Processor Performance Control) driver for + * interfacing with the CPUfreq layer and governors. See + * cppc_acpi.c for CPPC specific methods. + * + * (C) Copyright 2014, 2015 Linaro Ltd. + * Author: Ashwin Chaugule <ashwin.chaugule@linaro.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#define pr_fmt(fmt) "CPPC Cpufreq:" fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/cpu.h> +#include <linux/cpufreq.h> +#include <linux/vmalloc.h> + +#include <acpi/cppc_acpi.h> + +/* + * These structs contain information parsed from per CPU + * ACPI _CPC structures. + * e.g. For each CPU the highest, lowest supported + * performance capabilities, desired performance level + * requested etc. + */ +static struct cpudata **all_cpu_data; + +static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + struct cpudata *cpu; + struct cpufreq_freqs freqs; + int ret = 0; + + cpu = all_cpu_data[policy->cpu]; + + cpu->perf_ctrls.desired_perf = target_freq; + freqs.old = policy->cur; + freqs.new = target_freq; + + cpufreq_freq_transition_begin(policy, &freqs); + ret = cppc_set_perf(cpu->cpu, &cpu->perf_ctrls); + cpufreq_freq_transition_end(policy, &freqs, ret != 0); + + if (ret) + pr_debug("Failed to set target on CPU:%d. ret:%d\n", + cpu->cpu, ret); + + return ret; +} + +static int cppc_verify_policy(struct cpufreq_policy *policy) +{ + cpufreq_verify_within_cpu_limits(policy); + return 0; +} + +static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) +{ + int cpu_num = policy->cpu; + struct cpudata *cpu = all_cpu_data[cpu_num]; + int ret; + + cpu->perf_ctrls.desired_perf = cpu->perf_caps.lowest_perf; + + ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls); + if (ret) + pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", + cpu->perf_caps.lowest_perf, cpu_num, ret); +} + +static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + unsigned int cpu_num = policy->cpu; + int ret = 0; + + cpu = all_cpu_data[policy->cpu]; + + cpu->cpu = cpu_num; + ret = cppc_get_perf_caps(policy->cpu, &cpu->perf_caps); + + if (ret) { + pr_debug("Err reading CPU%d perf capabilities. ret:%d\n", + cpu_num, ret); + return ret; + } + + policy->min = cpu->perf_caps.lowest_perf; + policy->max = cpu->perf_caps.highest_perf; + policy->cpuinfo.min_freq = policy->min; + policy->cpuinfo.max_freq = policy->max; + + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + cpumask_copy(policy->cpus, cpu->shared_cpu_map); + else { + /* Support only SW_ANY for now. */ + pr_debug("Unsupported CPU co-ord type\n"); + return -EFAULT; + } + + cpumask_set_cpu(policy->cpu, policy->cpus); + cpu->cur_policy = policy; + + /* Set policy->cur to max now. The governors will adjust later. */ + policy->cur = cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf; + + ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls); + if (ret) + pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", + cpu->perf_caps.highest_perf, cpu_num, ret); + + return ret; +} + +static struct cpufreq_driver cppc_cpufreq_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = cppc_verify_policy, + .target = cppc_cpufreq_set_target, + .init = cppc_cpufreq_cpu_init, + .stop_cpu = cppc_cpufreq_stop_cpu, + .name = "cppc_cpufreq", +}; + +static int __init cppc_cpufreq_init(void) +{ + int i, ret = 0; + struct cpudata *cpu; + + if (acpi_disabled) + return -ENODEV; + + all_cpu_data = kzalloc(sizeof(void *) * num_possible_cpus(), GFP_KERNEL); + if (!all_cpu_data) + return -ENOMEM; + + for_each_possible_cpu(i) { + all_cpu_data[i] = kzalloc(sizeof(struct cpudata), GFP_KERNEL); + if (!all_cpu_data[i]) + goto out; + + cpu = all_cpu_data[i]; + if (!zalloc_cpumask_var(&cpu->shared_cpu_map, GFP_KERNEL)) + goto out; + } + + ret = acpi_get_psd_map(all_cpu_data); + if (ret) { + pr_debug("Error parsing PSD data. Aborting cpufreq registration.\n"); + goto out; + } + + ret = cpufreq_register_driver(&cppc_cpufreq_driver); + if (ret) + goto out; + + return ret; + +out: + for_each_possible_cpu(i) + if (all_cpu_data[i]) + kfree(all_cpu_data[i]); + + kfree(all_cpu_data); + return -ENODEV; +} + +late_initcall(cppc_cpufreq_init); diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 7c0d70e2a861..90d64081ddb3 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -216,7 +216,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) } /* Get OPP-sharing information from "operating-points-v2" bindings */ - ret = of_get_cpus_sharing_opps(cpu_dev, policy->cpus); + ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus); if (ret) { /* * operating-points-v2 not supported, fallback to old method of @@ -238,7 +238,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) * * OPPs might be populated at runtime, don't check for error here */ - of_cpumask_init_opp_table(policy->cpus); + dev_pm_opp_of_cpumask_add_table(policy->cpus); /* * But we need OPP table to function so if it is not there let's @@ -261,7 +261,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) * OPP tables are initialized only for policy->cpu, do it for * others as well. */ - ret = set_cpus_sharing_opps(cpu_dev, policy->cpus); + ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus); if (ret) dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", __func__, ret); @@ -368,7 +368,7 @@ out_free_cpufreq_table: out_free_priv: kfree(priv); out_free_opp: - of_cpumask_free_opp_table(policy->cpus); + dev_pm_opp_of_cpumask_remove_table(policy->cpus); out_node_put: of_node_put(np); out_put_reg_clk: @@ -385,7 +385,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy) cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); - of_cpumask_free_opp_table(policy->related_cpus); + dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); clk_put(policy->clk); if (!IS_ERR(priv->cpu_reg)) regulator_put(priv->cpu_reg); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 25c4c15103a0..7c48e7316d91 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -843,18 +843,11 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, down_write(&policy->rwsem); - /* Updating inactive policies is invalid, so avoid doing that. */ - if (unlikely(policy_is_inactive(policy))) { - ret = -EBUSY; - goto unlock_policy_rwsem; - } - if (fattr->store) ret = fattr->store(policy, buf, count); else ret = -EIO; -unlock_policy_rwsem: up_write(&policy->rwsem); unlock: put_online_cpus(); @@ -880,49 +873,6 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -struct kobject *cpufreq_global_kobject; -EXPORT_SYMBOL(cpufreq_global_kobject); - -static int cpufreq_global_kobject_usage; - -int cpufreq_get_global_kobject(void) -{ - if (!cpufreq_global_kobject_usage++) - return kobject_add(cpufreq_global_kobject, - &cpu_subsys.dev_root->kobj, "%s", "cpufreq"); - - return 0; -} -EXPORT_SYMBOL(cpufreq_get_global_kobject); - -void cpufreq_put_global_kobject(void) -{ - if (!--cpufreq_global_kobject_usage) - kobject_del(cpufreq_global_kobject); -} -EXPORT_SYMBOL(cpufreq_put_global_kobject); - -int cpufreq_sysfs_create_file(const struct attribute *attr) -{ - int ret = cpufreq_get_global_kobject(); - - if (!ret) { - ret = sysfs_create_file(cpufreq_global_kobject, attr); - if (ret) - cpufreq_put_global_kobject(); - } - - return ret; -} -EXPORT_SYMBOL(cpufreq_sysfs_create_file); - -void cpufreq_sysfs_remove_file(const struct attribute *attr) -{ - sysfs_remove_file(cpufreq_global_kobject, attr); - cpufreq_put_global_kobject(); -} -EXPORT_SYMBOL(cpufreq_sysfs_remove_file); - static int add_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu) { struct device *cpu_dev; @@ -960,9 +910,6 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) /* Some related CPUs might not be present (physically hotplugged) */ for_each_cpu(j, policy->real_cpus) { - if (j == policy->kobj_cpu) - continue; - ret = add_cpu_dev_symlink(policy, j); if (ret) break; @@ -976,12 +923,8 @@ static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) unsigned int j; /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu(j, policy->real_cpus) { - if (j == policy->kobj_cpu) - continue; - + for_each_cpu(j, policy->real_cpus) remove_cpu_dev_symlink(policy, j); - } } static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) @@ -1079,7 +1022,6 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); struct cpufreq_policy *policy; - int ret; if (WARN_ON(!dev)) return NULL; @@ -1097,13 +1039,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) goto err_free_rcpumask; - ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &dev->kobj, - "cpufreq"); - if (ret) { - pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); - goto err_free_real_cpus; - } - + kobject_init(&policy->kobj, &ktype_cpufreq); INIT_LIST_HEAD(&policy->policy_list); init_rwsem(&policy->rwsem); spin_lock_init(&policy->transition_lock); @@ -1112,14 +1048,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) INIT_WORK(&policy->update, handle_update); policy->cpu = cpu; - - /* Set this once on allocation */ - policy->kobj_cpu = cpu; - return policy; -err_free_real_cpus: - free_cpumask_var(policy->real_cpus); err_free_rcpumask: free_cpumask_var(policy->related_cpus); err_free_cpumask: @@ -1221,9 +1151,19 @@ static int cpufreq_online(unsigned int cpu) if (new_policy) { /* related_cpus should at least include policy->cpus. */ - cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + cpumask_copy(policy->related_cpus, policy->cpus); /* Remember CPUs present at the policy creation time. */ cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); + + /* Name and add the kobject */ + ret = kobject_add(&policy->kobj, cpufreq_global_kobject, + "policy%u", + cpumask_first(policy->related_cpus)); + if (ret) { + pr_err("%s: failed to add policy->kobj: %d\n", __func__, + ret); + goto out_exit_policy; + } } /* @@ -1467,22 +1407,7 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) return; } - if (cpu != policy->kobj_cpu) { - remove_cpu_dev_symlink(policy, cpu); - } else { - /* - * The CPU owning the policy object is going away. Move it to - * another suitable CPU. - */ - unsigned int new_cpu = cpumask_first(policy->real_cpus); - struct device *new_dev = get_cpu_device(new_cpu); - - dev_dbg(dev, "%s: Moving policy object to CPU%u\n", __func__, new_cpu); - - sysfs_remove_link(&new_dev->kobj, "cpufreq"); - policy->kobj_cpu = new_cpu; - WARN_ON(kobject_move(&policy->kobj, &new_dev->kobj)); - } + remove_cpu_dev_symlink(policy, cpu); } static void handle_update(struct work_struct *work) @@ -2425,7 +2350,7 @@ static int create_boost_sysfs_file(void) if (!cpufreq_driver->set_boost) cpufreq_driver->set_boost = cpufreq_boost_set_sw; - ret = cpufreq_sysfs_create_file(&boost.attr); + ret = sysfs_create_file(cpufreq_global_kobject, &boost.attr); if (ret) pr_err("%s: cannot register global BOOST sysfs file\n", __func__); @@ -2436,7 +2361,7 @@ static int create_boost_sysfs_file(void) static void remove_boost_sysfs_file(void) { if (cpufreq_boost_supported()) - cpufreq_sysfs_remove_file(&boost.attr); + sysfs_remove_file(cpufreq_global_kobject, &boost.attr); } int cpufreq_enable_boost_support(void) @@ -2584,12 +2509,15 @@ static struct syscore_ops cpufreq_syscore_ops = { .shutdown = cpufreq_suspend, }; +struct kobject *cpufreq_global_kobject; +EXPORT_SYMBOL(cpufreq_global_kobject); + static int __init cpufreq_core_init(void) { if (cpufreq_disabled()) return -ENODEV; - cpufreq_global_kobject = kobject_create(); + cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj); BUG_ON(!cpufreq_global_kobject); register_syscore_ops(&cpufreq_syscore_ops); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 84a1506950a7..1fa1deb6e91f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -23,6 +23,19 @@ static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); +static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +static +#endif +struct cpufreq_governor cpufreq_gov_conservative = { + .name = "conservative", + .governor = cs_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, struct cpufreq_policy *policy) { @@ -119,12 +132,14 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, struct cpufreq_freqs *freq = data; struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, freq->cpu); - struct cpufreq_policy *policy; + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); - if (!dbs_info->enable) + if (!policy) return 0; - policy = dbs_info->cdbs.shared->policy; + /* policy isn't governed by conservative governor */ + if (policy->governor != &cpufreq_gov_conservative) + return 0; /* * we only care if our internally tracked freq moves outside the 'valid' @@ -367,16 +382,6 @@ static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE -static -#endif -struct cpufreq_governor cpufreq_gov_conservative = { - .name = "conservative", - .governor = cs_cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static int __init cpufreq_gov_dbs_init(void) { return cpufreq_register_governor(&cpufreq_gov_conservative); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 939197ffa4ac..11258c4c1b17 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -348,29 +348,21 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, latency * LATENCY_MULTIPLIER)); - if (!have_governor_per_policy()) { - if (WARN_ON(cpufreq_get_global_kobject())) { - ret = -EINVAL; - goto cdata_exit; - } + if (!have_governor_per_policy()) cdata->gdbs_data = dbs_data; - } ret = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr(dbs_data)); if (ret) - goto put_kobj; + goto reset_gdbs_data; policy->governor_data = dbs_data; return 0; -put_kobj: - if (!have_governor_per_policy()) { +reset_gdbs_data: + if (!have_governor_per_policy()) cdata->gdbs_data = NULL; - cpufreq_put_global_kobject(); - } -cdata_exit: cdata->exit(dbs_data, !policy->governor->initialized); free_common_dbs_info: free_common_dbs_info(policy, cdata); @@ -394,10 +386,8 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy, sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr(dbs_data)); - if (!have_governor_per_policy()) { + if (!have_governor_per_policy()) cdata->gdbs_data = NULL; - cpufreq_put_global_kobject(); - } cdata->exit(dbs_data, policy->governor->initialized == 1); kfree(dbs_data); @@ -463,7 +453,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, cdata->get_cpu_dbs_info_s(cpu); cs_dbs_info->down_skip = 0; - cs_dbs_info->enable = 1; cs_dbs_info->requested_freq = policy->cur; } else { struct od_ops *od_ops = cdata->gov_ops; @@ -482,9 +471,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, static int cpufreq_governor_stop(struct cpufreq_policy *policy, struct dbs_data *dbs_data) { - struct common_dbs_data *cdata = dbs_data->cdata; - unsigned int cpu = policy->cpu; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); struct cpu_common_dbs_info *shared = cdbs->shared; /* State should be equivalent to START */ @@ -493,13 +480,6 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy, gov_cancel_work(dbs_data, policy); - if (cdata->governor == GOV_CONSERVATIVE) { - struct cs_cpu_dbs_info_s *cs_dbs_info = - cdata->get_cpu_dbs_info_s(cpu); - - cs_dbs_info->enable = 0; - } - shared->policy = NULL; mutex_destroy(&shared->timer_mutex); return 0; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 50f171796632..5621bb03e874 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -170,7 +170,6 @@ struct cs_cpu_dbs_info_s { struct cpu_dbs_info cdbs; unsigned int down_skip; unsigned int requested_freq; - unsigned int enable:1; }; /* Per policy Governors sysfs tunables */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 1fa9088c84a8..03ac6ce54042 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -267,27 +267,19 @@ static void update_sampling_rate(struct dbs_data *dbs_data, dbs_info = &per_cpu(od_cpu_dbs_info, cpu); cpufreq_cpu_put(policy); - mutex_lock(&dbs_info->cdbs.shared->timer_mutex); - - if (!delayed_work_pending(&dbs_info->cdbs.dwork)) { - mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); + if (!delayed_work_pending(&dbs_info->cdbs.dwork)) continue; - } next_sampling = jiffies + usecs_to_jiffies(new_rate); appointed_at = dbs_info->cdbs.dwork.timer.expires; if (time_before(next_sampling, appointed_at)) { - - mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); cancel_delayed_work_sync(&dbs_info->cdbs.dwork); - mutex_lock(&dbs_info->cdbs.shared->timer_mutex); gov_queue_work(dbs_data, policy, usecs_to_jiffies(new_rate), true); } - mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); } } diff --git a/drivers/cpufreq/cpufreq_opp.c b/drivers/cpufreq/cpufreq_opp.c deleted file mode 100644 index 0f5e6d5f6da0..000000000000 --- a/drivers/cpufreq/cpufreq_opp.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Generic OPP helper interface for CPUFreq drivers - * - * Copyright (C) 2009-2014 Texas Instruments Incorporated. - * Nishanth Menon - * Romit Dasgupta - * Kevin Hilman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/cpufreq.h> -#include <linux/device.h> -#include <linux/err.h> -#include <linux/errno.h> -#include <linux/export.h> -#include <linux/kernel.h> -#include <linux/pm_opp.h> -#include <linux/rcupdate.h> -#include <linux/slab.h> - -/** - * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device - * @dev: device for which we do this operation - * @table: Cpufreq table returned back to caller - * - * Generate a cpufreq table for a provided device- this assumes that the - * opp list is already initialized and ready for usage. - * - * This function allocates required memory for the cpufreq table. It is - * expected that the caller does the required maintenance such as freeing - * the table as required. - * - * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM - * if no memory available for the operation (table is not populated), returns 0 - * if successful and table is populated. - * - * WARNING: It is important for the callers to ensure refreshing their copy of - * the table if any of the mentioned functions have been invoked in the interim. - * - * Locking: The internal device_opp and opp structures are RCU protected. - * Since we just use the regular accessor functions to access the internal data - * structures, we use RCU read lock inside this function. As a result, users of - * this function DONOT need to use explicit locks for invoking. - */ -int dev_pm_opp_init_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table) -{ - struct dev_pm_opp *opp; - struct cpufreq_frequency_table *freq_table = NULL; - int i, max_opps, ret = 0; - unsigned long rate; - - rcu_read_lock(); - - max_opps = dev_pm_opp_get_opp_count(dev); - if (max_opps <= 0) { - ret = max_opps ? max_opps : -ENODATA; - goto out; - } - - freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_ATOMIC); - if (!freq_table) { - ret = -ENOMEM; - goto out; - } - - for (i = 0, rate = 0; i < max_opps; i++, rate++) { - /* find next rate */ - opp = dev_pm_opp_find_freq_ceil(dev, &rate); - if (IS_ERR(opp)) { - ret = PTR_ERR(opp); - goto out; - } - freq_table[i].driver_data = i; - freq_table[i].frequency = rate / 1000; - - /* Is Boost/turbo opp ? */ - if (dev_pm_opp_is_turbo(opp)) - freq_table[i].flags = CPUFREQ_BOOST_FREQ; - } - - freq_table[i].driver_data = i; - freq_table[i].frequency = CPUFREQ_TABLE_END; - - *table = &freq_table[0]; - -out: - rcu_read_unlock(); - if (ret) - kfree(freq_table); - - return ret; -} -EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table); - -/** - * dev_pm_opp_free_cpufreq_table() - free the cpufreq table - * @dev: device for which we do this operation - * @table: table to free - * - * Free up the table allocated by dev_pm_opp_init_cpufreq_table - */ -void dev_pm_opp_free_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table) -{ - if (!table) - return; - - kfree(*table); - *table = NULL; -} -EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table); diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c index 21a90ed7f3d8..c0f3373706f4 100644 --- a/drivers/cpufreq/exynos5440-cpufreq.c +++ b/drivers/cpufreq/exynos5440-cpufreq.c @@ -360,7 +360,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev) goto err_put_node; } - ret = of_init_opp_table(dvfs_info->dev); + ret = dev_pm_opp_of_add_table(dvfs_info->dev); if (ret) { dev_err(dvfs_info->dev, "failed to init OPP table: %d\n", ret); goto err_put_node; @@ -424,7 +424,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev) err_free_table: dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table); err_free_opp: - of_free_opp_table(dvfs_info->dev); + dev_pm_opp_of_remove_table(dvfs_info->dev); err_put_node: of_node_put(np); dev_err(&pdev->dev, "%s: failed initialization\n", __func__); @@ -435,7 +435,7 @@ static int exynos_cpufreq_remove(struct platform_device *pdev) { cpufreq_unregister_driver(&exynos_driver); dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table); - of_free_opp_table(dvfs_info->dev); + dev_pm_opp_of_remove_table(dvfs_info->dev); return 0; } diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 380a90d3c57e..ef1fa8145419 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -30,6 +30,10 @@ static struct clk *pll1_sw_clk; static struct clk *step_clk; static struct clk *pll2_pfd2_396m_clk; +/* clk used by i.MX6UL */ +static struct clk *pll2_bus_clk; +static struct clk *secondary_sel_clk; + static struct device *cpu_dev; static bool free_opp; static struct cpufreq_frequency_table *freq_table; @@ -91,16 +95,36 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) * The setpoints are selected per PLL/PDF frequencies, so we need to * reprogram PLL for frequency scaling. The procedure of reprogramming * PLL1 is as below. - * + * For i.MX6UL, it has a secondary clk mux, the cpu frequency change + * flow is slightly different from other i.MX6 OSC. + * The cpu frequeny change flow for i.MX6(except i.MX6UL) is as below: * - Enable pll2_pfd2_396m_clk and reparent pll1_sw_clk to it * - Reprogram pll1_sys_clk and reparent pll1_sw_clk back to it * - Disable pll2_pfd2_396m_clk */ - clk_set_parent(step_clk, pll2_pfd2_396m_clk); - clk_set_parent(pll1_sw_clk, step_clk); - if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) { - clk_set_rate(pll1_sys_clk, new_freq * 1000); + if (of_machine_is_compatible("fsl,imx6ul")) { + /* + * When changing pll1_sw_clk's parent to pll1_sys_clk, + * CPU may run at higher than 528MHz, this will lead to + * the system unstable if the voltage is lower than the + * voltage of 528MHz, so lower the CPU frequency to one + * half before changing CPU frequency. + */ + clk_set_rate(arm_clk, (old_freq >> 1) * 1000); clk_set_parent(pll1_sw_clk, pll1_sys_clk); + if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) + clk_set_parent(secondary_sel_clk, pll2_bus_clk); + else + clk_set_parent(secondary_sel_clk, pll2_pfd2_396m_clk); + clk_set_parent(step_clk, secondary_sel_clk); + clk_set_parent(pll1_sw_clk, step_clk); + } else { + clk_set_parent(step_clk, pll2_pfd2_396m_clk); + clk_set_parent(pll1_sw_clk, step_clk); + if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) { + clk_set_rate(pll1_sys_clk, new_freq * 1000); + clk_set_parent(pll1_sw_clk, pll1_sys_clk); + } } /* Ensure the arm clock divider is what we expect */ @@ -186,6 +210,16 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) goto put_clk; } + if (of_machine_is_compatible("fsl,imx6ul")) { + pll2_bus_clk = clk_get(cpu_dev, "pll2_bus"); + secondary_sel_clk = clk_get(cpu_dev, "secondary_sel"); + if (IS_ERR(pll2_bus_clk) || IS_ERR(secondary_sel_clk)) { + dev_err(cpu_dev, "failed to get clocks specific to imx6ul\n"); + ret = -ENOENT; + goto put_clk; + } + } + arm_reg = regulator_get(cpu_dev, "arm"); pu_reg = regulator_get_optional(cpu_dev, "pu"); soc_reg = regulator_get(cpu_dev, "soc"); @@ -202,7 +236,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) */ num = dev_pm_opp_get_opp_count(cpu_dev); if (num < 0) { - ret = of_init_opp_table(cpu_dev); + ret = dev_pm_opp_of_add_table(cpu_dev); if (ret < 0) { dev_err(cpu_dev, "failed to init OPP table: %d\n", ret); goto put_reg; @@ -312,7 +346,7 @@ free_freq_table: dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); out_free_opp: if (free_opp) - of_free_opp_table(cpu_dev); + dev_pm_opp_of_remove_table(cpu_dev); put_reg: if (!IS_ERR(arm_reg)) regulator_put(arm_reg); @@ -331,6 +365,10 @@ put_clk: clk_put(step_clk); if (!IS_ERR(pll2_pfd2_396m_clk)) clk_put(pll2_pfd2_396m_clk); + if (!IS_ERR(pll2_bus_clk)) + clk_put(pll2_bus_clk); + if (!IS_ERR(secondary_sel_clk)) + clk_put(secondary_sel_clk); of_node_put(np); return ret; } @@ -340,7 +378,7 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev) cpufreq_unregister_driver(&imx6q_cpufreq_driver); dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); if (free_opp) - of_free_opp_table(cpu_dev); + dev_pm_opp_of_remove_table(cpu_dev); regulator_put(arm_reg); if (!IS_ERR(pu_reg)) regulator_put(pu_reg); @@ -350,6 +388,8 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev) clk_put(pll1_sw_clk); clk_put(step_clk); clk_put(pll2_pfd2_396m_clk); + clk_put(pll2_bus_clk); + clk_put(secondary_sel_clk); return 0; } diff --git a/drivers/cpufreq/integrator-cpufreq.c b/drivers/cpufreq/integrator-cpufreq.c index 2faa4216bf2a..79e3ff2771a6 100644 --- a/drivers/cpufreq/integrator-cpufreq.c +++ b/drivers/cpufreq/integrator-cpufreq.c @@ -221,6 +221,8 @@ static const struct of_device_id integrator_cpufreq_match[] = { { }, }; +MODULE_DEVICE_TABLE(of, integrator_cpufreq_match); + static struct platform_driver integrator_cpufreq_driver = { .driver = { .name = "integrator-cpufreq", diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index aa33b92b3e3e..93a3c635ea27 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -34,6 +34,10 @@ #include <asm/cpu_device_id.h> #include <asm/cpufeature.h> +#if IS_ENABLED(CONFIG_ACPI) +#include <acpi/processor.h> +#endif + #define BYT_RATIOS 0x66a #define BYT_VIDS 0x66b #define BYT_TURBO_RATIOS 0x66c @@ -43,7 +47,6 @@ #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) #define fp_toint(X) ((X) >> FRAC_BITS) - static inline int32_t mul_fp(int32_t x, int32_t y) { return ((int64_t)x * (int64_t)y) >> FRAC_BITS; @@ -78,6 +81,7 @@ struct pstate_data { int current_pstate; int min_pstate; int max_pstate; + int max_pstate_physical; int scaling; int turbo_pstate; }; @@ -113,6 +117,9 @@ struct cpudata { u64 prev_mperf; u64 prev_tsc; struct sample sample; +#if IS_ENABLED(CONFIG_ACPI) + struct acpi_processor_performance acpi_perf_data; +#endif }; static struct cpudata **all_cpu_data; @@ -127,6 +134,7 @@ struct pstate_adjust_policy { struct pstate_funcs { int (*get_max)(void); + int (*get_max_physical)(void); int (*get_min)(void); int (*get_turbo)(void); int (*get_scaling)(void); @@ -142,6 +150,7 @@ struct cpu_defaults { static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; +static int no_acpi_perf; struct perf_limits { int no_turbo; @@ -154,9 +163,24 @@ struct perf_limits { int max_sysfs_pct; int min_policy_pct; int min_sysfs_pct; + int max_perf_ctl; + int min_perf_ctl; +}; + +static struct perf_limits performance_limits = { + .no_turbo = 0, + .turbo_disabled = 0, + .max_perf_pct = 100, + .max_perf = int_tofp(1), + .min_perf_pct = 100, + .min_perf = int_tofp(1), + .max_policy_pct = 100, + .max_sysfs_pct = 100, + .min_policy_pct = 0, + .min_sysfs_pct = 0, }; -static struct perf_limits limits = { +static struct perf_limits powersave_limits = { .no_turbo = 0, .turbo_disabled = 0, .max_perf_pct = 100, @@ -167,8 +191,163 @@ static struct perf_limits limits = { .max_sysfs_pct = 100, .min_policy_pct = 0, .min_sysfs_pct = 0, + .max_perf_ctl = 0, + .min_perf_ctl = 0, }; +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE +static struct perf_limits *limits = &performance_limits; +#else +static struct perf_limits *limits = &powersave_limits; +#endif + +#if IS_ENABLED(CONFIG_ACPI) +/* + * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and + * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and + * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state + * ratio, out of it only high 8 bits are used. For example 0x1700 is setting + * target ratio 0x17. The _PSS control value stores in a format which can be + * directly written to PERF_CTL MSR. But in intel_pstate driver this shift + * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). + * This function converts the _PSS control value to intel pstate driver format + * for comparison and assignment. + */ +static int convert_to_native_pstate_format(struct cpudata *cpu, int index) +{ + return cpu->acpi_perf_data.states[index].control >> 8; +} + +static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + int ret; + bool turbo_absent = false; + int max_pstate_index; + int min_pss_ctl, max_pss_ctl, turbo_pss_ctl; + int i; + + cpu = all_cpu_data[policy->cpu]; + + pr_debug("intel_pstate: default limits 0x%x 0x%x 0x%x\n", + cpu->pstate.min_pstate, cpu->pstate.max_pstate, + cpu->pstate.turbo_pstate); + + if (!cpu->acpi_perf_data.shared_cpu_map && + zalloc_cpumask_var_node(&cpu->acpi_perf_data.shared_cpu_map, + GFP_KERNEL, cpu_to_node(policy->cpu))) { + return -ENOMEM; + } + + ret = acpi_processor_register_performance(&cpu->acpi_perf_data, + policy->cpu); + if (ret) + return ret; + + /* + * Check if the control value in _PSS is for PERF_CTL MSR, which should + * guarantee that the states returned by it map to the states in our + * list directly. + */ + if (cpu->acpi_perf_data.control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) + return -EIO; + + pr_debug("intel_pstate: CPU%u - ACPI _PSS perf data\n", policy->cpu); + for (i = 0; i < cpu->acpi_perf_data.state_count; i++) + pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", + (i == cpu->acpi_perf_data.state ? '*' : ' '), i, + (u32) cpu->acpi_perf_data.states[i].core_frequency, + (u32) cpu->acpi_perf_data.states[i].power, + (u32) cpu->acpi_perf_data.states[i].control); + + /* + * If there is only one entry _PSS, simply ignore _PSS and continue as + * usual without taking _PSS into account + */ + if (cpu->acpi_perf_data.state_count < 2) + return 0; + + turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); + min_pss_ctl = convert_to_native_pstate_format(cpu, + cpu->acpi_perf_data.state_count - 1); + /* Check if there is a turbo freq in _PSS */ + if (turbo_pss_ctl <= cpu->pstate.max_pstate && + turbo_pss_ctl > cpu->pstate.min_pstate) { + pr_debug("intel_pstate: no turbo range exists in _PSS\n"); + limits->no_turbo = limits->turbo_disabled = 1; + cpu->pstate.turbo_pstate = cpu->pstate.max_pstate; + turbo_absent = true; + } + + /* Check if the max non turbo p state < Intel P state max */ + max_pstate_index = turbo_absent ? 0 : 1; + max_pss_ctl = convert_to_native_pstate_format(cpu, max_pstate_index); + if (max_pss_ctl < cpu->pstate.max_pstate && + max_pss_ctl > cpu->pstate.min_pstate) + cpu->pstate.max_pstate = max_pss_ctl; + + /* check If min perf > Intel P State min */ + if (min_pss_ctl > cpu->pstate.min_pstate && + min_pss_ctl < cpu->pstate.max_pstate) { + cpu->pstate.min_pstate = min_pss_ctl; + policy->cpuinfo.min_freq = min_pss_ctl * cpu->pstate.scaling; + } + + if (turbo_absent) + policy->cpuinfo.max_freq = cpu->pstate.max_pstate * + cpu->pstate.scaling; + else { + policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * + cpu->pstate.scaling; + /* + * The _PSS table doesn't contain whole turbo frequency range. + * This just contains +1 MHZ above the max non turbo frequency, + * with control value corresponding to max turbo ratio. But + * when cpufreq set policy is called, it will call with this + * max frequency, which will cause a reduced performance as + * this driver uses real max turbo frequency as the max + * frequeny. So correct this frequency in _PSS table to + * correct max turbo frequency based on the turbo ratio. + * Also need to convert to MHz as _PSS freq is in MHz. + */ + cpu->acpi_perf_data.states[0].core_frequency = + turbo_pss_ctl * 100; + } + + pr_debug("intel_pstate: Updated limits using _PSS 0x%x 0x%x 0x%x\n", + cpu->pstate.min_pstate, cpu->pstate.max_pstate, + cpu->pstate.turbo_pstate); + pr_debug("intel_pstate: policy max_freq=%d Khz min_freq = %d KHz\n", + policy->cpuinfo.max_freq, policy->cpuinfo.min_freq); + + return 0; +} + +static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + + if (!no_acpi_perf) + return 0; + + cpu = all_cpu_data[policy->cpu]; + acpi_processor_unregister_performance(policy->cpu); + return 0; +} + +#else +static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy) +{ + return 0; +} + +static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ + return 0; +} +#endif + static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { pid->setpoint = setpoint; @@ -255,7 +434,7 @@ static inline void update_turbo_state(void) cpu = all_cpu_data[0]; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - limits.turbo_disabled = + limits->turbo_disabled = (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } @@ -274,14 +453,14 @@ static void intel_pstate_hwp_set(void) for_each_online_cpu(cpu) { rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); - adj_range = limits.min_perf_pct * range / 100; + adj_range = limits->min_perf_pct * range / 100; min = hw_min + adj_range; value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); - adj_range = limits.max_perf_pct * range / 100; + adj_range = limits->max_perf_pct * range / 100; max = hw_min + adj_range; - if (limits.no_turbo) { + if (limits->no_turbo) { hw_max = HWP_GUARANTEED_PERF(cap); if (hw_max < max) max = hw_max; @@ -350,7 +529,7 @@ static void __init intel_pstate_debug_expose_params(void) static ssize_t show_##file_name \ (struct kobject *kobj, struct attribute *attr, char *buf) \ { \ - return sprintf(buf, "%u\n", limits.object); \ + return sprintf(buf, "%u\n", limits->object); \ } static ssize_t show_turbo_pct(struct kobject *kobj, @@ -386,10 +565,10 @@ static ssize_t show_no_turbo(struct kobject *kobj, ssize_t ret; update_turbo_state(); - if (limits.turbo_disabled) - ret = sprintf(buf, "%u\n", limits.turbo_disabled); + if (limits->turbo_disabled) + ret = sprintf(buf, "%u\n", limits->turbo_disabled); else - ret = sprintf(buf, "%u\n", limits.no_turbo); + ret = sprintf(buf, "%u\n", limits->no_turbo); return ret; } @@ -405,12 +584,12 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, return -EINVAL; update_turbo_state(); - if (limits.turbo_disabled) { + if (limits->turbo_disabled) { pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n"); return -EPERM; } - limits.no_turbo = clamp_t(int, input, 0, 1); + limits->no_turbo = clamp_t(int, input, 0, 1); if (hwp_active) intel_pstate_hwp_set(); @@ -428,11 +607,15 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; - limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); - limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); - limits.max_perf_pct = max(limits.min_policy_pct, limits.max_perf_pct); - limits.max_perf_pct = max(limits.min_perf_pct, limits.max_perf_pct); - limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + limits->max_sysfs_pct = clamp_t(int, input, 0 , 100); + limits->max_perf_pct = min(limits->max_policy_pct, + limits->max_sysfs_pct); + limits->max_perf_pct = max(limits->min_policy_pct, + limits->max_perf_pct); + limits->max_perf_pct = max(limits->min_perf_pct, + limits->max_perf_pct); + limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), + int_tofp(100)); if (hwp_active) intel_pstate_hwp_set(); @@ -449,11 +632,15 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; - limits.min_sysfs_pct = clamp_t(int, input, 0 , 100); - limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); - limits.min_perf_pct = min(limits.max_policy_pct, limits.min_perf_pct); - limits.min_perf_pct = min(limits.max_perf_pct, limits.min_perf_pct); - limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); + limits->min_sysfs_pct = clamp_t(int, input, 0 , 100); + limits->min_perf_pct = max(limits->min_policy_pct, + limits->min_sysfs_pct); + limits->min_perf_pct = min(limits->max_policy_pct, + limits->min_perf_pct); + limits->min_perf_pct = min(limits->max_perf_pct, + limits->min_perf_pct); + limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), + int_tofp(100)); if (hwp_active) intel_pstate_hwp_set(); @@ -533,7 +720,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (limits.no_turbo && !limits.turbo_disabled) + if (limits->no_turbo && !limits->turbo_disabled) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -591,7 +778,7 @@ static int core_get_min_pstate(void) return (value >> 40) & 0xFF; } -static int core_get_max_pstate(void) +static int core_get_max_pstate_physical(void) { u64 value; @@ -599,6 +786,46 @@ static int core_get_max_pstate(void) return (value >> 8) & 0xFF; } +static int core_get_max_pstate(void) +{ + u64 tar; + u64 plat_info; + int max_pstate; + int err; + + rdmsrl(MSR_PLATFORM_INFO, plat_info); + max_pstate = (plat_info >> 8) & 0xFF; + + err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar); + if (!err) { + /* Do some sanity checking for safety */ + if (plat_info & 0x600000000) { + u64 tdp_ctrl; + u64 tdp_ratio; + int tdp_msr; + + err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); + if (err) + goto skip_tar; + + tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl; + err = rdmsrl_safe(tdp_msr, &tdp_ratio); + if (err) + goto skip_tar; + + if (tdp_ratio - 1 == tar) { + max_pstate = tar; + pr_debug("max_pstate=TAC %x\n", max_pstate); + } else { + goto skip_tar; + } + } + } + +skip_tar: + return max_pstate; +} + static int core_get_turbo_pstate(void) { u64 value; @@ -622,7 +849,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (limits.no_turbo && !limits.turbo_disabled) + if (limits->no_turbo && !limits->turbo_disabled) val |= (u64)1 << 32; wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); @@ -652,6 +879,7 @@ static struct cpu_defaults core_params = { }, .funcs = { .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, .get_min = core_get_min_pstate, .get_turbo = core_get_turbo_pstate, .get_scaling = core_get_scaling, @@ -670,6 +898,7 @@ static struct cpu_defaults byt_params = { }, .funcs = { .get_max = byt_get_max_pstate, + .get_max_physical = byt_get_max_pstate, .get_min = byt_get_min_pstate, .get_turbo = byt_get_turbo_pstate, .set = byt_set_pstate, @@ -689,6 +918,7 @@ static struct cpu_defaults knl_params = { }, .funcs = { .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, .get_min = core_get_min_pstate, .get_turbo = knl_get_turbo_pstate, .get_scaling = core_get_scaling, @@ -702,7 +932,7 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) int max_perf_adj; int min_perf; - if (limits.no_turbo || limits.turbo_disabled) + if (limits->no_turbo || limits->turbo_disabled) max_perf = cpu->pstate.max_pstate; /* @@ -710,12 +940,23 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) * policy, or by cpu specific default values determined through * experimentation. */ - max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); - *max = clamp_t(int, max_perf_adj, - cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); + if (limits->max_perf_ctl && limits->max_sysfs_pct >= + limits->max_policy_pct) { + *max = limits->max_perf_ctl; + } else { + max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), + limits->max_perf)); + *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, + cpu->pstate.turbo_pstate); + } - min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); - *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); + if (limits->min_perf_ctl) { + *min = limits->min_perf_ctl; + } else { + min_perf = fp_toint(mul_fp(int_tofp(max_perf), + limits->min_perf)); + *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); + } } static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) @@ -743,6 +984,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); cpu->pstate.max_pstate = pstate_funcs.get_max(); + cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); cpu->pstate.scaling = pstate_funcs.get_scaling(); @@ -761,7 +1003,8 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) sample->freq = fp_toint( mul_fp(int_tofp( - cpu->pstate.max_pstate * cpu->pstate.scaling / 100), + cpu->pstate.max_pstate_physical * + cpu->pstate.scaling / 100), core_pct)); sample->core_pct_busy = (int32_t)core_pct; @@ -834,7 +1077,7 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) * specified pstate. */ core_busy = cpu->sample.core_pct_busy; - max_pstate = int_tofp(cpu->pstate.max_pstate); + max_pstate = int_tofp(cpu->pstate.max_pstate_physical); current_pstate = int_tofp(cpu->pstate.current_pstate); core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); @@ -988,37 +1231,63 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) static int intel_pstate_set_policy(struct cpufreq_policy *policy) { +#if IS_ENABLED(CONFIG_ACPI) + struct cpudata *cpu; + int i; +#endif + pr_debug("intel_pstate: %s max %u policy->max %u\n", __func__, + policy->cpuinfo.max_freq, policy->max); if (!policy->cpuinfo.max_freq) return -ENODEV; if (policy->policy == CPUFREQ_POLICY_PERFORMANCE && policy->max >= policy->cpuinfo.max_freq) { - limits.min_policy_pct = 100; - limits.min_perf_pct = 100; - limits.min_perf = int_tofp(1); - limits.max_policy_pct = 100; - limits.max_perf_pct = 100; - limits.max_perf = int_tofp(1); - limits.no_turbo = 0; + pr_debug("intel_pstate: set performance\n"); + limits = &performance_limits; return 0; } - limits.min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; - limits.min_policy_pct = clamp_t(int, limits.min_policy_pct, 0 , 100); - limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq; - limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); + pr_debug("intel_pstate: set powersave\n"); + limits = &powersave_limits; + limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; + limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100); + limits->max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq; + limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100); /* Normalize user input to [min_policy_pct, max_policy_pct] */ - limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct); - limits.min_perf_pct = min(limits.max_policy_pct, limits.min_perf_pct); - limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); - limits.max_perf_pct = max(limits.min_policy_pct, limits.max_perf_pct); + limits->min_perf_pct = max(limits->min_policy_pct, + limits->min_sysfs_pct); + limits->min_perf_pct = min(limits->max_policy_pct, + limits->min_perf_pct); + limits->max_perf_pct = min(limits->max_policy_pct, + limits->max_sysfs_pct); + limits->max_perf_pct = max(limits->min_policy_pct, + limits->max_perf_pct); /* Make sure min_perf_pct <= max_perf_pct */ - limits.min_perf_pct = min(limits.max_perf_pct, limits.min_perf_pct); + limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); - limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), + int_tofp(100)); + limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), + int_tofp(100)); + +#if IS_ENABLED(CONFIG_ACPI) + cpu = all_cpu_data[policy->cpu]; + for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { + int control; + + control = convert_to_native_pstate_format(cpu, i); + if (control * cpu->pstate.scaling == policy->max) + limits->max_perf_ctl = control; + if (control * cpu->pstate.scaling == policy->min) + limits->min_perf_ctl = control; + } + + pr_debug("intel_pstate: max %u policy_max %u perf_ctl [0x%x-0x%x]\n", + policy->cpuinfo.max_freq, policy->max, limits->min_perf_ctl, + limits->max_perf_ctl); +#endif if (hwp_active) intel_pstate_hwp_set(); @@ -1062,7 +1331,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; - if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100) + if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100) policy->policy = CPUFREQ_POLICY_PERFORMANCE; else policy->policy = CPUFREQ_POLICY_POWERSAVE; @@ -1074,18 +1343,30 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + if (!no_acpi_perf) + intel_pstate_init_perf_limits(policy); + /* + * If there is no acpi perf data or error, we ignore and use Intel P + * state calculated limits, So this is not fatal error. + */ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); return 0; } +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + return intel_pstate_exit_perf_limits(policy); +} + static struct cpufreq_driver intel_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_pstate_verify_policy, .setpolicy = intel_pstate_set_policy, .get = intel_pstate_get, .init = intel_pstate_cpu_init, + .exit = intel_pstate_cpu_exit, .stop_cpu = intel_pstate_stop_cpu, .name = "intel_pstate", }; @@ -1118,6 +1399,7 @@ static void copy_pid_params(struct pstate_adjust_policy *policy) static void copy_cpu_funcs(struct pstate_funcs *funcs) { pstate_funcs.get_max = funcs->get_max; + pstate_funcs.get_max_physical = funcs->get_max_physical; pstate_funcs.get_min = funcs->get_min; pstate_funcs.get_turbo = funcs->get_turbo; pstate_funcs.get_scaling = funcs->get_scaling; @@ -1126,7 +1408,6 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) } #if IS_ENABLED(CONFIG_ACPI) -#include <acpi/processor.h> static bool intel_pstate_no_acpi_pss(void) { @@ -1318,6 +1599,9 @@ static int __init intel_pstate_setup(char *str) force_load = 1; if (!strcmp(str, "hwp_only")) hwp_only = 1; + if (!strcmp(str, "no_acpi")) + no_acpi_perf = 1; + return 0; } early_param("intel_pstate", intel_pstate_setup); diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c index 49caed293a3b..83001dc5b646 100644 --- a/drivers/cpufreq/mt8173-cpufreq.c +++ b/drivers/cpufreq/mt8173-cpufreq.c @@ -344,7 +344,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) /* Both presence and absence of sram regulator are valid cases. */ sram_reg = regulator_get_exclusive(cpu_dev, "sram"); - ret = of_init_opp_table(cpu_dev); + ret = dev_pm_opp_of_add_table(cpu_dev); if (ret) { pr_warn("no OPP table for cpu%d\n", cpu); goto out_free_resources; @@ -378,7 +378,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) return 0; out_free_opp_table: - of_free_opp_table(cpu_dev); + dev_pm_opp_of_remove_table(cpu_dev); out_free_resources: if (!IS_ERR(proc_reg)) @@ -404,7 +404,7 @@ static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info) if (!IS_ERR(info->inter_clk)) clk_put(info->inter_clk); - of_free_opp_table(info->cpu_dev); + dev_pm_opp_of_remove_table(info->cpu_dev); } static int mtk_cpufreq_init(struct cpufreq_policy *policy) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 64994e10638e..cb501386eb6e 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -327,8 +327,14 @@ static void powernv_cpufreq_throttle_check(void *data) if (chips[i].throttled) goto next; chips[i].throttled = true; - pr_info("CPU %d on Chip %u has Pmax reduced to %d\n", cpu, - chips[i].id, pmsr_pmax); + if (pmsr_pmax < powernv_pstate_info.nominal) + pr_crit("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", + cpu, chips[i].id, pmsr_pmax, + powernv_pstate_info.nominal); + else + pr_info("CPU %d on Chip %u has Pmax reduced below turbo frequency (%d < %d)\n", + cpu, chips[i].id, pmsr_pmax, + powernv_pstate_info.max); } else if (chips[i].throttled) { chips[i].throttled = false; pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, diff --git a/drivers/cpufreq/tegra20-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c index 8084c7f7e206..2bd62845e9d5 100644 --- a/drivers/cpufreq/tegra20-cpufreq.c +++ b/drivers/cpufreq/tegra20-cpufreq.c @@ -175,9 +175,7 @@ static struct cpufreq_driver tegra_cpufreq_driver = { .exit = tegra_cpu_exit, .name = "tegra", .attr = cpufreq_generic_attr, -#ifdef CONFIG_PM .suspend = cpufreq_generic_suspend, -#endif }; static int __init tegra_cpufreq_init(void) |