diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2015-09-01 16:52:35 +0300 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2015-09-01 16:52:35 +0300 |
commit | 4ffe18c2556f2848c4e57457915b08a63dc00fd5 (patch) | |
tree | 0e5045b2137095c9ad88cb7c5062e5801e377936 | |
parent | 498012511a060575a56551d28a10bb392aa361b5 (diff) | |
parent | 72e624de6e6f0d5a638fbc23842aa76ae048e9e7 (diff) | |
download | linux-4ffe18c2556f2848c4e57457915b08a63dc00fd5.tar.xz |
Merge branch 'pm-cpufreq'
* pm-cpufreq: (53 commits)
cpufreq: speedstep-lib: Use monotonic clock
cpufreq: powernv: Increase the verbosity of OCC console messages
cpufreq: sfi: use kmemdup rather than duplicating its implementation
cpufreq: drop !cpufreq_driver check from cpufreq_parse_governor()
cpufreq: rename cpufreq_real_policy as cpufreq_user_policy
cpufreq: remove redundant 'policy' field from user_policy
cpufreq: remove redundant 'governor' field from user_policy
cpufreq: update user_policy.* on success
cpufreq: use memcpy() to copy policy
cpufreq: remove redundant CPUFREQ_INCOMPATIBLE notifier event
cpufreq: mediatek: Add MT8173 cpufreq driver
dt-bindings: mediatek: Add MT8173 CPU DVFS clock bindings
intel_pstate: append more Oracle OEM table id to vendor bypass list
intel_pstate: Add SKY-S support
intel_pstate: Fix possible overflow complained by Coverity
cpufreq: Correct a freq check in cpufreq_set_policy()
cpufreq: Lock CPU online/offline in cpufreq_register_driver()
cpufreq: Replace recover_policy with new_policy in cpufreq_online()
cpufreq: Separate CPU device registration from CPU online
cpufreq: powernv: Restore cpu frequency to policy->cur on unthrottling
...
28 files changed, 1280 insertions, 458 deletions
diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt index 70933eadc308..ba78e7c2a069 100644 --- a/Documentation/cpu-freq/core.txt +++ b/Documentation/cpu-freq/core.txt @@ -55,16 +55,13 @@ transition notifiers. ---------------------------- These are notified when a new policy is intended to be set. Each -CPUFreq policy notifier is called three times for a policy transition: +CPUFreq policy notifier is called twice for a policy transition: 1.) During CPUFREQ_ADJUST all CPUFreq notifiers may change the limit if they see a need for this - may it be thermal considerations or hardware limitations. -2.) During CPUFREQ_INCOMPATIBLE only changes may be done in order to avoid - hardware failure. - -3.) And during CPUFREQ_NOTIFY all notifiers are informed of the new policy +2.) And during CPUFREQ_NOTIFY all notifiers are informed of the new policy - if two hardware drivers failed to agree on a new policy before this stage, the incompatible hardware shall be shut down, and the user informed of this. diff --git a/Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt b/Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt new file mode 100644 index 000000000000..52b457c23eed --- /dev/null +++ b/Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt @@ -0,0 +1,83 @@ +Device Tree Clock bindins for CPU DVFS of Mediatek MT8173 SoC + +Required properties: +- clocks: A list of phandle + clock-specifier pairs for the clocks listed in clock names. +- clock-names: Should contain the following: + "cpu" - The multiplexer for clock input of CPU cluster. + "intermediate" - A parent of "cpu" clock which is used as "intermediate" clock + source (usually MAINPLL) when the original CPU PLL is under + transition and not stable yet. + Please refer to Documentation/devicetree/bindings/clk/clock-bindings.txt for + generic clock consumer properties. +- proc-supply: Regulator for Vproc of CPU cluster. + +Optional properties: +- sram-supply: Regulator for Vsram of CPU cluster. When present, the cpufreq driver + needs to do "voltage tracking" to step by step scale up/down Vproc and + Vsram to fit SoC specific needs. When absent, the voltage scaling + flow is handled by hardware, hence no software "voltage tracking" is + needed. + +Example: +-------- + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a53"; + reg = <0x000>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0>; + clocks = <&infracfg CLK_INFRA_CA53SEL>, + <&apmixedsys CLK_APMIXED_MAINPLL>; + clock-names = "cpu", "intermediate"; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a53"; + reg = <0x001>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0>; + clocks = <&infracfg CLK_INFRA_CA53SEL>, + <&apmixedsys CLK_APMIXED_MAINPLL>; + clock-names = "cpu", "intermediate"; + }; + + cpu2: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x100>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0>; + clocks = <&infracfg CLK_INFRA_CA57SEL>, + <&apmixedsys CLK_APMIXED_MAINPLL>; + clock-names = "cpu", "intermediate"; + }; + + cpu3: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x101>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0>; + clocks = <&infracfg CLK_INFRA_CA57SEL>, + <&apmixedsys CLK_APMIXED_MAINPLL>; + clock-names = "cpu", "intermediate"; + }; + + &cpu0 { + proc-supply = <&mt6397_vpca15_reg>; + }; + + &cpu1 { + proc-supply = <&mt6397_vpca15_reg>; + }; + + &cpu2 { + proc-supply = <&da9211_vcpu_reg>; + sram-supply = <&mt6397_vsramca7_reg>; + }; + + &cpu3 { + proc-supply = <&da9211_vcpu_reg>; + sram-supply = <&mt6397_vsramca7_reg>; + }; diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index e9e4c52f3685..64dc9f547fb6 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -361,6 +361,7 @@ enum opal_msg_type { OPAL_MSG_HMI_EVT, OPAL_MSG_DPO, OPAL_MSG_PRD, + OPAL_MSG_OCC, OPAL_MSG_TYPE_MAX, }; @@ -700,6 +701,17 @@ struct opal_prd_msg_header { struct opal_prd_msg; +#define OCC_RESET 0 +#define OCC_LOAD 1 +#define OCC_THROTTLE 2 +#define OCC_MAX_THROTTLE_STATUS 5 + +struct opal_occ_msg { + __be64 type; + __be64 chip; + __be64 throttle_status; +}; + /* * SG entries * diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 53cfe8ba9799..bb01dea39fdc 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -83,7 +83,7 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb, if (ignore_ppc) return 0; - if (event != CPUFREQ_INCOMPATIBLE) + if (event != CPUFREQ_ADJUST) return 0; mutex_lock(&performance_mutex); @@ -780,9 +780,7 @@ acpi_processor_register_performance(struct acpi_processor_performance EXPORT_SYMBOL(acpi_processor_register_performance); -void -acpi_processor_unregister_performance(struct acpi_processor_performance - *performance, unsigned int cpu) +void acpi_processor_unregister_performance(unsigned int cpu) { struct acpi_processor *pr; diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index cc8a71c267b8..2bacf24a19a9 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -130,6 +130,13 @@ config ARM_KIRKWOOD_CPUFREQ This adds the CPUFreq driver for Marvell Kirkwood SoCs. +config ARM_MT8173_CPUFREQ + bool "Mediatek MT8173 CPUFreq support" + depends on ARCH_MEDIATEK && REGULATOR + select PM_OPP + help + This adds the CPUFreq driver support for Mediatek MT8173 SoC. + config ARM_OMAP2PLUS_CPUFREQ bool "TI OMAP2+" depends on ARCH_OMAP2PLUS diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 2169bf792db7..9c75fafd2901 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_ARM_HISI_ACPU_CPUFREQ) += hisi-acpu-cpufreq.o obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o obj-$(CONFIG_ARM_INTEGRATOR) += integrator-cpufreq.o obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o +obj-$(CONFIG_ARM_MT8173_CPUFREQ) += mt8173-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_PXA2xx_CPUFREQ) += pxa2xx-cpufreq.o obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 0136dfcdabf0..15b921a9248c 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -65,18 +65,21 @@ enum { #define MSR_K7_HWCR_CPB_DIS (1ULL << 25) struct acpi_cpufreq_data { - struct acpi_processor_performance *acpi_data; struct cpufreq_frequency_table *freq_table; unsigned int resume; unsigned int cpu_feature; + unsigned int acpi_perf_cpu; cpumask_var_t freqdomain_cpus; }; -static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); - /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance __percpu *acpi_perf_data; +static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data) +{ + return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu); +} + static struct cpufreq_driver acpi_cpufreq_driver; static unsigned int acpi_pstate_strict; @@ -144,7 +147,7 @@ static int _store_boost(int val) static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; return cpufreq_show_cpus(data->freqdomain_cpus, buf); } @@ -202,7 +205,7 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) struct acpi_processor_performance *perf; int i; - perf = data->acpi_data; + perf = to_perf_data(data); for (i = 0; i < perf->state_count; i++) { if (value == perf->states[i].status) @@ -221,7 +224,7 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) else msr &= INTEL_MSR_RANGE; - perf = data->acpi_data; + perf = to_perf_data(data); cpufreq_for_each_entry(pos, data->freq_table) if (msr == perf->states[pos->driver_data].status) @@ -327,7 +330,8 @@ static void drv_write(struct drv_cmd *cmd) put_cpu(); } -static u32 get_cur_val(const struct cpumask *mask) +static u32 +get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) { struct acpi_processor_performance *perf; struct drv_cmd cmd; @@ -335,7 +339,7 @@ static u32 get_cur_val(const struct cpumask *mask) if (unlikely(cpumask_empty(mask))) return 0; - switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) { + switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_CTL; @@ -346,7 +350,7 @@ static u32 get_cur_val(const struct cpumask *mask) break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; + perf = to_perf_data(data); cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -364,19 +368,24 @@ static u32 get_cur_val(const struct cpumask *mask) static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); + struct acpi_cpufreq_data *data; + struct cpufreq_policy *policy; unsigned int freq; unsigned int cached_freq; pr_debug("get_cur_freq_on_cpu (%d)\n", cpu); - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { + policy = cpufreq_cpu_get(cpu); + if (unlikely(!policy)) return 0; - } - cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); + data = policy->driver_data; + cpufreq_cpu_put(policy); + if (unlikely(!data || !data->freq_table)) + return 0; + + cached_freq = data->freq_table[to_perf_data(data)->state].frequency; + freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. @@ -397,7 +406,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, unsigned int i; for (i = 0; i < 100; i++) { - cur_freq = extract_freq(get_cur_val(mask), data); + cur_freq = extract_freq(get_cur_val(mask, data), data); if (cur_freq == freq) return 1; udelay(10); @@ -408,18 +417,17 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int index) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; struct drv_cmd cmd; unsigned int next_perf_state = 0; /* Index into perf table */ int result = 0; - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { + if (unlikely(data == NULL || data->freq_table == NULL)) { return -ENODEV; } - perf = data->acpi_data; + perf = to_perf_data(data); next_perf_state = data->freq_table[index].driver_data; if (perf->state == next_perf_state) { if (unlikely(data->resume)) { @@ -482,8 +490,9 @@ out: static unsigned long acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) { - struct acpi_processor_performance *perf = data->acpi_data; + struct acpi_processor_performance *perf; + perf = to_perf_data(data); if (cpu_khz) { /* search the closest match to cpu_khz */ unsigned int i; @@ -672,17 +681,17 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) goto err_free; } - data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); - per_cpu(acfreq_data, cpu) = data; + perf = per_cpu_ptr(acpi_perf_data, cpu); + data->acpi_perf_cpu = cpu; + policy->driver_data = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; - result = acpi_processor_register_performance(data->acpi_data, cpu); + result = acpi_processor_register_performance(perf, cpu); if (result) goto err_free_mask; - perf = data->acpi_data; policy->shared_type = perf->shared_type; /* @@ -838,26 +847,25 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) err_freqfree: kfree(data->freq_table); err_unreg: - acpi_processor_unregister_performance(perf, cpu); + acpi_processor_unregister_performance(cpu); err_free_mask: free_cpumask_var(data->freqdomain_cpus); err_free: kfree(data); - per_cpu(acfreq_data, cpu) = NULL; + policy->driver_data = NULL; return result; } static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; pr_debug("acpi_cpufreq_cpu_exit\n"); if (data) { - per_cpu(acfreq_data, policy->cpu) = NULL; - acpi_processor_unregister_performance(data->acpi_data, - policy->cpu); + policy->driver_data = NULL; + acpi_processor_unregister_performance(data->acpi_perf_cpu); free_cpumask_var(data->freqdomain_cpus); kfree(data->freq_table); kfree(data); @@ -868,7 +876,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) static int acpi_cpufreq_resume(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; pr_debug("acpi_cpufreq_resume\n"); @@ -880,7 +888,9 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy) static struct freq_attr *acpi_cpufreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, &freqdomain_cpus, - NULL, /* this is a placeholder for cpb, do not remove */ +#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB + &cpb, +#endif NULL, }; @@ -953,17 +963,16 @@ static int __init acpi_cpufreq_init(void) * only if configured. This is considered legacy code, which * will probably be removed at some point in the future. */ - if (check_amd_hwpstate_cpu(0)) { - struct freq_attr **iter; - - pr_debug("adding sysfs entry for cpb\n"); + if (!check_amd_hwpstate_cpu(0)) { + struct freq_attr **attr; - for (iter = acpi_cpufreq_attr; *iter != NULL; iter++) - ; + pr_debug("CPB unsupported, do not expose it\n"); - /* make sure there is a terminator behind it */ - if (iter[1] == NULL) - *iter = &cpb; + for (attr = acpi_cpufreq_attr; *attr; attr++) + if (*attr == &cpb) { + *attr = NULL; + break; + } } #endif acpi_cpufreq_boost_init(); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7a3c30c4336f..a05cc75cc45d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -112,12 +112,6 @@ static inline bool has_target(void) return cpufreq_driver->target_index || cpufreq_driver->target; } -/* - * rwsem to guarantee that cpufreq driver module doesn't unload during critical - * sections - */ -static DECLARE_RWSEM(cpufreq_rwsem); - /* internal prototypes */ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); @@ -277,10 +271,6 @@ EXPORT_SYMBOL_GPL(cpufreq_generic_get); * If corresponding call cpufreq_cpu_put() isn't made, the policy wouldn't be * freed as that depends on the kobj count. * - * It also takes a read-lock of 'cpufreq_rwsem' and doesn't put it back if a - * valid policy is found. This is done to make sure the driver doesn't get - * unregistered while the policy is being used. - * * Return: A valid policy on success, otherwise NULL on failure. */ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) @@ -291,9 +281,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) if (WARN_ON(cpu >= nr_cpu_ids)) return NULL; - if (!down_read_trylock(&cpufreq_rwsem)) - return NULL; - /* get the cpufreq driver */ read_lock_irqsave(&cpufreq_driver_lock, flags); @@ -306,9 +293,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) read_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (!policy) - up_read(&cpufreq_rwsem); - return policy; } EXPORT_SYMBOL_GPL(cpufreq_cpu_get); @@ -320,13 +304,10 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_get); * * This decrements the kobject reference count incremented earlier by calling * cpufreq_cpu_get(). - * - * It also drops the read-lock of 'cpufreq_rwsem' taken at cpufreq_cpu_get(). */ void cpufreq_cpu_put(struct cpufreq_policy *policy) { kobject_put(&policy->kobj); - up_read(&cpufreq_rwsem); } EXPORT_SYMBOL_GPL(cpufreq_cpu_put); @@ -539,9 +520,6 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, { int err = -EINVAL; - if (!cpufreq_driver) - goto out; - if (cpufreq_driver->setpolicy) { if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { *policy = CPUFREQ_POLICY_PERFORMANCE; @@ -576,7 +554,6 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, mutex_unlock(&cpufreq_governor_mutex); } -out: return err; } @@ -625,9 +602,7 @@ static ssize_t store_##file_name \ int ret, temp; \ struct cpufreq_policy new_policy; \ \ - ret = cpufreq_get_policy(&new_policy, policy->cpu); \ - if (ret) \ - return -EINVAL; \ + memcpy(&new_policy, policy, sizeof(*policy)); \ \ ret = sscanf(buf, "%u", &new_policy.object); \ if (ret != 1) \ @@ -681,9 +656,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, char str_governor[16]; struct cpufreq_policy new_policy; - ret = cpufreq_get_policy(&new_policy, policy->cpu); - if (ret) - return ret; + memcpy(&new_policy, policy, sizeof(*policy)); ret = sscanf(buf, "%15s", str_governor); if (ret != 1) @@ -694,14 +667,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, return -EINVAL; ret = cpufreq_set_policy(policy, &new_policy); - - policy->user_policy.policy = policy->policy; - policy->user_policy.governor = policy->governor; - - if (ret) - return ret; - else - return count; + return ret ? ret : count; } /** @@ -851,9 +817,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) struct freq_attr *fattr = to_attr(attr); ssize_t ret; - if (!down_read_trylock(&cpufreq_rwsem)) - return -EINVAL; - down_read(&policy->rwsem); if (fattr->show) @@ -862,7 +825,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) ret = -EIO; up_read(&policy->rwsem); - up_read(&cpufreq_rwsem); return ret; } @@ -879,9 +841,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, if (!cpu_online(policy->cpu)) goto unlock; - if (!down_read_trylock(&cpufreq_rwsem)) - goto unlock; - down_write(&policy->rwsem); /* Updating inactive policies is invalid, so avoid doing that. */ @@ -897,8 +856,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, unlock_policy_rwsem: up_write(&policy->rwsem); - - up_read(&cpufreq_rwsem); unlock: put_online_cpus(); @@ -1027,8 +984,7 @@ static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) } } -static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, - struct device *dev) +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) { struct freq_attr **drv_attr; int ret = 0; @@ -1060,11 +1016,10 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, return cpufreq_add_dev_symlink(policy); } -static void cpufreq_init_policy(struct cpufreq_policy *policy) +static int cpufreq_init_policy(struct cpufreq_policy *policy) { struct cpufreq_governor *gov = NULL; struct cpufreq_policy new_policy; - int ret = 0; memcpy(&new_policy, policy, sizeof(*policy)); @@ -1083,16 +1038,10 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy) cpufreq_parse_governor(gov->name, &new_policy.policy, NULL); /* set default policy */ - ret = cpufreq_set_policy(policy, &new_policy); - if (ret) { - pr_debug("setting policy failed\n"); - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - } + return cpufreq_set_policy(policy, &new_policy); } -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, - unsigned int cpu, struct device *dev) +static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) { int ret = 0; @@ -1126,33 +1075,15 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, return 0; } -static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) -{ - struct cpufreq_policy *policy; - unsigned long flags; - - read_lock_irqsave(&cpufreq_driver_lock, flags); - policy = per_cpu(cpufreq_cpu_data, cpu); - read_unlock_irqrestore(&cpufreq_driver_lock, flags); - - if (likely(policy)) { - /* Policy should be inactive here */ - WARN_ON(!policy_is_inactive(policy)); - - down_write(&policy->rwsem); - policy->cpu = cpu; - policy->governor = NULL; - up_write(&policy->rwsem); - } - - return policy; -} - -static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) +static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) { + struct device *dev = get_cpu_device(cpu); struct cpufreq_policy *policy; int ret; + if (WARN_ON(!dev)) + return NULL; + policy = kzalloc(sizeof(*policy), GFP_KERNEL); if (!policy) return NULL; @@ -1180,10 +1111,10 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); - policy->cpu = dev->id; + policy->cpu = cpu; /* Set this once on allocation */ - policy->kobj_cpu = dev->id; + policy->kobj_cpu = cpu; return policy; @@ -1245,59 +1176,34 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) kfree(policy); } -/** - * cpufreq_add_dev - add a CPU device - * - * Adds the cpufreq interface for a CPU device. - * - * The Oracle says: try running cpufreq registration/unregistration concurrently - * with with cpu hotplugging and all hell will break loose. Tried to clean this - * mess up, but more thorough testing is needed. - Mathieu - */ -static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) +static int cpufreq_online(unsigned int cpu) { - unsigned int j, cpu = dev->id; - int ret = -ENOMEM; struct cpufreq_policy *policy; + bool new_policy; unsigned long flags; - bool recover_policy = !sif; - - pr_debug("adding CPU %u\n", cpu); - - if (cpu_is_offline(cpu)) { - /* - * Only possible if we are here from the subsys_interface add - * callback. A hotplug notifier will follow and we will handle - * it as CPU online then. For now, just create the sysfs link, - * unless there is no policy or the link is already present. - */ - policy = per_cpu(cpufreq_cpu_data, cpu); - return policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) - ? add_cpu_dev_symlink(policy, cpu) : 0; - } + unsigned int j; + int ret; - if (!down_read_trylock(&cpufreq_rwsem)) - return 0; + pr_debug("%s: bringing CPU%u online\n", __func__, cpu); /* Check if this CPU already has a policy to manage it */ policy = per_cpu(cpufreq_cpu_data, cpu); - if (policy && !policy_is_inactive(policy)) { + if (policy) { WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus)); - ret = cpufreq_add_policy_cpu(policy, cpu, dev); - up_read(&cpufreq_rwsem); - return ret; - } + if (!policy_is_inactive(policy)) + return cpufreq_add_policy_cpu(policy, cpu); - /* - * Restore the saved policy when doing light-weight init and fall back - * to the full init if that fails. - */ - policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL; - if (!policy) { - recover_policy = false; - policy = cpufreq_policy_alloc(dev); + /* This is the only online CPU for the policy. Start over. */ + new_policy = false; + down_write(&policy->rwsem); + policy->cpu = cpu; + policy->governor = NULL; + up_write(&policy->rwsem); + } else { + new_policy = true; + policy = cpufreq_policy_alloc(cpu); if (!policy) - goto nomem_out; + return -ENOMEM; } cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1308,17 +1214,17 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) ret = cpufreq_driver->init(policy); if (ret) { pr_debug("initialization failed\n"); - goto err_set_policy_cpu; + goto out_free_policy; } down_write(&policy->rwsem); - /* related cpus should atleast have policy->cpus */ - cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); - - /* Remember which CPUs have been present at the policy creation time. */ - if (!recover_policy) + if (new_policy) { + /* related_cpus should at least include policy->cpus. */ + cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + /* Remember CPUs present at the policy creation time. */ cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); + } /* * affected cpus must always be the one, which are online. We aren't @@ -1326,7 +1232,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) */ cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); - if (!recover_policy) { + if (new_policy) { policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; @@ -1340,7 +1246,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) policy->cur = cpufreq_driver->get(policy->cpu); if (!policy->cur) { pr_err("%s: ->get() failed\n", __func__); - goto err_get_freq; + goto out_exit_policy; } } @@ -1387,10 +1293,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); - if (!recover_policy) { - ret = cpufreq_add_dev_interface(policy, dev); + if (new_policy) { + ret = cpufreq_add_dev_interface(policy); if (ret) - goto err_out_unregister; + goto out_exit_policy; blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); @@ -1399,18 +1305,19 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) write_unlock_irqrestore(&cpufreq_driver_lock, flags); } - cpufreq_init_policy(policy); - - if (!recover_policy) { - policy->user_policy.policy = policy->policy; - policy->user_policy.governor = policy->governor; + ret = cpufreq_init_policy(policy); + if (ret) { + pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n", + __func__, cpu, ret); + /* cpufreq_policy_free() will notify based on this */ + new_policy = false; + goto out_exit_policy; } + up_write(&policy->rwsem); kobject_uevent(&policy->kobj, KOBJ_ADD); - up_read(&cpufreq_rwsem); - /* Callback for handling stuff after policy is ready */ if (cpufreq_driver->ready) cpufreq_driver->ready(policy); @@ -1419,24 +1326,47 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return 0; -err_out_unregister: -err_get_freq: +out_exit_policy: up_write(&policy->rwsem); if (cpufreq_driver->exit) cpufreq_driver->exit(policy); -err_set_policy_cpu: - cpufreq_policy_free(policy, recover_policy); -nomem_out: - up_read(&cpufreq_rwsem); +out_free_policy: + cpufreq_policy_free(policy, !new_policy); + return ret; +} + +/** + * cpufreq_add_dev - the cpufreq interface for a CPU device. + * @dev: CPU device. + * @sif: Subsystem interface structure pointer (not used) + */ +static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) +{ + unsigned cpu = dev->id; + int ret; + + dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu); + + if (cpu_online(cpu)) { + ret = cpufreq_online(cpu); + } else { + /* + * A hotplug notifier will follow and we will handle it as CPU + * online then. For now, just create the sysfs link, unless + * there is no policy or the link is already present. + */ + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + + ret = policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) + ? add_cpu_dev_symlink(policy, cpu) : 0; + } return ret; } -static int __cpufreq_remove_dev_prepare(struct device *dev) +static void cpufreq_offline_prepare(unsigned int cpu) { - unsigned int cpu = dev->id; - int ret = 0; struct cpufreq_policy *policy; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); @@ -1444,11 +1374,11 @@ static int __cpufreq_remove_dev_prepare(struct device *dev) policy = cpufreq_cpu_get_raw(cpu); if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); - return -EINVAL; + return; } if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) pr_err("%s: Failed to stop governor\n", __func__); } @@ -1469,7 +1399,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev) /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); @@ -1479,28 +1409,24 @@ static int __cpufreq_remove_dev_prepare(struct device *dev) } else if (cpufreq_driver->stop_cpu) { cpufreq_driver->stop_cpu(policy); } - - return ret; } -static int __cpufreq_remove_dev_finish(struct device *dev) +static void cpufreq_offline_finish(unsigned int cpu) { - unsigned int cpu = dev->id; - int ret; struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); - return -EINVAL; + return; } /* Only proceed for inactive policies */ if (!policy_is_inactive(policy)) - return 0; + return; /* If cpu is last user of policy, free policy */ if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) pr_err("%s: Failed to exit governor\n", __func__); } @@ -1512,8 +1438,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev) */ if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - - return 0; } /** @@ -1530,8 +1454,8 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) return 0; if (cpu_online(cpu)) { - __cpufreq_remove_dev_prepare(dev); - __cpufreq_remove_dev_finish(dev); + cpufreq_offline_prepare(cpu); + cpufreq_offline_finish(cpu); } cpumask_clear_cpu(cpu, policy->real_cpus); @@ -2247,7 +2171,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); - if (new_policy->min > policy->max || new_policy->max < policy->min) + /* + * This check works well when we store new min/max freq attributes, + * because new_policy is a copy of policy with one field updated. + */ + if (new_policy->min > new_policy->max) return -EINVAL; /* verify the cpu speed can be set within this limit */ @@ -2259,10 +2187,6 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_ADJUST, new_policy); - /* adjust if necessary - hardware incompatibility*/ - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_INCOMPATIBLE, new_policy); - /* * verify the cpu speed can be set within this limit, which might be * different to the first one @@ -2296,16 +2220,31 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, old_gov = policy->governor; /* end old governor */ if (old_gov) { - __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + if (ret) { + /* This can happen due to race with other operations */ + pr_debug("%s: Failed to Stop Governor: %s (%d)\n", + __func__, old_gov->name, ret); + return ret; + } + up_write(&policy->rwsem); - __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); down_write(&policy->rwsem); + + if (ret) { + pr_err("%s: Failed to Exit Governor: %s (%d)\n", + __func__, old_gov->name, ret); + return ret; + } } /* start new governor */ policy->governor = new_policy->governor; - if (!__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) { - if (!__cpufreq_governor(policy, CPUFREQ_GOV_START)) + ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); + if (!ret) { + ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + if (!ret) goto out; up_write(&policy->rwsem); @@ -2317,11 +2256,13 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, pr_debug("starting governor %s failed\n", policy->governor->name); if (old_gov) { policy->governor = old_gov; - __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); - __cpufreq_governor(policy, CPUFREQ_GOV_START); + if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) + policy->governor = NULL; + else + __cpufreq_governor(policy, CPUFREQ_GOV_START); } - return -EINVAL; + return ret; out: pr_debug("governor: change or update limits\n"); @@ -2350,8 +2291,6 @@ int cpufreq_update_policy(unsigned int cpu) memcpy(&new_policy, policy, sizeof(*policy)); new_policy.min = policy->user_policy.min; new_policy.max = policy->user_policy.max; - new_policy.policy = policy->user_policy.policy; - new_policy.governor = policy->user_policy.governor; /* * BIOS might change freq behind our back @@ -2387,27 +2326,23 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct device *dev; - dev = get_cpu_device(cpu); - if (dev) { - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - cpufreq_add_dev(dev, NULL); - break; + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + cpufreq_online(cpu); + break; - case CPU_DOWN_PREPARE: - __cpufreq_remove_dev_prepare(dev); - break; + case CPU_DOWN_PREPARE: + cpufreq_offline_prepare(cpu); + break; - case CPU_POST_DEAD: - __cpufreq_remove_dev_finish(dev); - break; + case CPU_POST_DEAD: + cpufreq_offline_finish(cpu); + break; - case CPU_DOWN_FAILED: - cpufreq_add_dev(dev, NULL); - break; - } + case CPU_DOWN_FAILED: + cpufreq_online(cpu); + break; } return NOTIFY_OK; } @@ -2515,10 +2450,14 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) pr_debug("trying to register driver %s\n", driver_data->name); + /* Protect against concurrent CPU online/offline. */ + get_online_cpus(); + write_lock_irqsave(&cpufreq_driver_lock, flags); if (cpufreq_driver) { write_unlock_irqrestore(&cpufreq_driver_lock, flags); - return -EEXIST; + ret = -EEXIST; + goto out; } cpufreq_driver = driver_data; write_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -2557,7 +2496,10 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) register_hotcpu_notifier(&cpufreq_cpu_notifier); pr_debug("driver %s up and running\n", driver_data->name); - return 0; +out: + put_online_cpus(); + return ret; + err_if_unreg: subsys_interface_unregister(&cpufreq_interface); err_boost_unreg: @@ -2567,7 +2509,7 @@ err_null_driver: write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - return ret; + goto out; } EXPORT_SYMBOL_GPL(cpufreq_register_driver); @@ -2588,19 +2530,20 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) pr_debug("unregistering driver %s\n", driver->name); + /* Protect against concurrent cpu hotplug */ + get_online_cpus(); subsys_interface_unregister(&cpufreq_interface); if (cpufreq_boost_supported()) cpufreq_sysfs_remove_file(&boost.attr); unregister_hotcpu_notifier(&cpufreq_cpu_notifier); - down_write(&cpufreq_rwsem); write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - up_write(&cpufreq_rwsem); + put_online_cpus(); return 0; } diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index c86a10c30912..84a1506950a7 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -47,7 +47,7 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, static void cs_check_cpu(int cpu, unsigned int load) { struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -102,26 +102,15 @@ static void cs_check_cpu(int cpu, unsigned int load) } } -static void cs_dbs_timer(struct work_struct *work) +static unsigned int cs_dbs_timer(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, bool modify_all) { - struct cs_cpu_dbs_info_s *dbs_info = container_of(work, - struct cs_cpu_dbs_info_s, cdbs.work.work); - unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; - struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info, - cpu); - struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - int delay = delay_for_sampling_rate(cs_tuners->sampling_rate); - bool modify_all = true; - mutex_lock(&core_dbs_info->cdbs.timer_mutex); - if (!need_load_eval(&core_dbs_info->cdbs, cs_tuners->sampling_rate)) - modify_all = false; - else - dbs_check_cpu(dbs_data, cpu); + if (modify_all) + dbs_check_cpu(dbs_data, cdbs->shared->policy->cpu); - gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all); - mutex_unlock(&core_dbs_info->cdbs.timer_mutex); + return delay_for_sampling_rate(cs_tuners->sampling_rate); } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, @@ -135,7 +124,7 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (!dbs_info->enable) return 0; - policy = dbs_info->cdbs.cur_policy; + policy = dbs_info->cdbs.shared->policy; /* * we only care if our internally tracked freq moves outside the 'valid' diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 57a39f8a92b7..939197ffa4ac 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -32,10 +32,10 @@ static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) { - struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - struct cpufreq_policy *policy; + struct cpufreq_policy *policy = cdbs->shared->policy; unsigned int sampling_rate; unsigned int max_load = 0; unsigned int ignore_nice; @@ -60,11 +60,9 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) ignore_nice = cs_tuners->ignore_nice_load; } - policy = cdbs->cur_policy; - /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs; + struct cpu_dbs_info *j_cdbs; u64 cur_wall_time, cur_idle_time; unsigned int idle_time, wall_time; unsigned int load; @@ -163,9 +161,9 @@ EXPORT_SYMBOL_GPL(dbs_check_cpu); static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, unsigned int delay) { - struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); - mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay); + mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay); } void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, @@ -199,33 +197,63 @@ EXPORT_SYMBOL_GPL(gov_queue_work); static inline void gov_cancel_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy) { - struct cpu_dbs_common_info *cdbs; + struct cpu_dbs_info *cdbs; int i; for_each_cpu(i, policy->cpus) { cdbs = dbs_data->cdata->get_cpu_cdbs(i); - cancel_delayed_work_sync(&cdbs->work); + cancel_delayed_work_sync(&cdbs->dwork); } } /* Will return if we need to evaluate cpu load again or not */ -bool need_load_eval(struct cpu_dbs_common_info *cdbs, - unsigned int sampling_rate) +static bool need_load_eval(struct cpu_common_dbs_info *shared, + unsigned int sampling_rate) { - if (policy_is_shared(cdbs->cur_policy)) { + if (policy_is_shared(shared->policy)) { ktime_t time_now = ktime_get(); - s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp); + s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); /* Do nothing if we recently have sampled */ if (delta_us < (s64)(sampling_rate / 2)) return false; else - cdbs->time_stamp = time_now; + shared->time_stamp = time_now; } return true; } -EXPORT_SYMBOL_GPL(need_load_eval); + +static void dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info, + dwork.work); + struct cpu_common_dbs_info *shared = cdbs->shared; + struct cpufreq_policy *policy = shared->policy; + struct dbs_data *dbs_data = policy->governor_data; + unsigned int sampling_rate, delay; + bool modify_all = true; + + mutex_lock(&shared->timer_mutex); + + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + + sampling_rate = cs_tuners->sampling_rate; + } else { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + sampling_rate = od_tuners->sampling_rate; + } + + if (!need_load_eval(cdbs->shared, sampling_rate)) + modify_all = false; + + delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all); + gov_queue_work(dbs_data, policy, delay, modify_all); + + mutex_unlock(&shared->timer_mutex); +} static void set_sampling_rate(struct dbs_data *dbs_data, unsigned int sampling_rate) @@ -239,6 +267,37 @@ static void set_sampling_rate(struct dbs_data *dbs_data, } } +static int alloc_common_dbs_info(struct cpufreq_policy *policy, + struct common_dbs_data *cdata) +{ + struct cpu_common_dbs_info *shared; + int j; + + /* Allocate memory for the common information for policy->cpus */ + shared = kzalloc(sizeof(*shared), GFP_KERNEL); + if (!shared) + return -ENOMEM; + + /* Set shared for all CPUs, online+offline */ + for_each_cpu(j, policy->related_cpus) + cdata->get_cpu_cdbs(j)->shared = shared; + + return 0; +} + +static void free_common_dbs_info(struct cpufreq_policy *policy, + struct common_dbs_data *cdata) +{ + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + struct cpu_common_dbs_info *shared = cdbs->shared; + int j; + + for_each_cpu(j, policy->cpus) + cdata->get_cpu_cdbs(j)->shared = NULL; + + kfree(shared); +} + static int cpufreq_governor_init(struct cpufreq_policy *policy, struct dbs_data *dbs_data, struct common_dbs_data *cdata) @@ -246,9 +305,18 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, unsigned int latency; int ret; + /* State should be equivalent to EXIT */ + if (policy->governor_data) + return -EBUSY; + if (dbs_data) { if (WARN_ON(have_governor_per_policy())) return -EINVAL; + + ret = alloc_common_dbs_info(policy, cdata); + if (ret) + return ret; + dbs_data->usage_count++; policy->governor_data = dbs_data; return 0; @@ -258,12 +326,16 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, if (!dbs_data) return -ENOMEM; + ret = alloc_common_dbs_info(policy, cdata); + if (ret) + goto free_dbs_data; + dbs_data->cdata = cdata; dbs_data->usage_count = 1; ret = cdata->init(dbs_data, !policy->governor->initialized); if (ret) - goto free_dbs_data; + goto free_common_dbs_info; /* policy latency is in ns. Convert it to us first */ latency = policy->cpuinfo.transition_latency / 1000; @@ -300,15 +372,22 @@ put_kobj: } cdata_exit: cdata->exit(dbs_data, !policy->governor->initialized); +free_common_dbs_info: + free_common_dbs_info(policy, cdata); free_dbs_data: kfree(dbs_data); return ret; } -static void cpufreq_governor_exit(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_exit(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) { struct common_dbs_data *cdata = dbs_data->cdata; + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + + /* State should be equivalent to INIT */ + if (!cdbs->shared || cdbs->shared->policy) + return -EBUSY; policy->governor_data = NULL; if (!--dbs_data->usage_count) { @@ -323,6 +402,9 @@ static void cpufreq_governor_exit(struct cpufreq_policy *policy, cdata->exit(dbs_data, policy->governor->initialized == 1); kfree(dbs_data); } + + free_common_dbs_info(policy, cdata); + return 0; } static int cpufreq_governor_start(struct cpufreq_policy *policy, @@ -330,12 +412,17 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_common_dbs_info *shared = cdbs->shared; int io_busy = 0; if (!policy->cur) return -EINVAL; + /* State should be equivalent to INIT */ + if (!shared || shared->policy) + return -EBUSY; + if (cdata->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -349,12 +436,14 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, io_busy = od_tuners->io_is_busy; } + shared->policy = policy; + shared->time_stamp = ktime_get(); + mutex_init(&shared->timer_mutex); + for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs = cdata->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); unsigned int prev_load; - j_cdbs->cpu = j; - j_cdbs->cur_policy = policy; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); @@ -366,8 +455,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - mutex_init(&j_cdbs->timer_mutex); - INIT_DEFERRABLE_WORK(&j_cdbs->work, cdata->gov_dbs_timer); + INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer); } if (cdata->governor == GOV_CONSERVATIVE) { @@ -386,20 +474,24 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, od_ops->powersave_bias_init_cpu(cpu); } - /* Initiate timer time stamp */ - cpu_cdbs->time_stamp = ktime_get(); - gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate), true); return 0; } -static void cpufreq_governor_stop(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_stop(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; - struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_common_dbs_info *shared = cdbs->shared; + + /* State should be equivalent to START */ + if (!shared || !shared->policy) + return -EBUSY; + + gov_cancel_work(dbs_data, policy); if (cdata->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -408,38 +500,40 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy, cs_dbs_info->enable = 0; } - gov_cancel_work(dbs_data, policy); - - mutex_destroy(&cpu_cdbs->timer_mutex); - cpu_cdbs->cur_policy = NULL; + shared->policy = NULL; + mutex_destroy(&shared->timer_mutex); + return 0; } -static void cpufreq_governor_limits(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_limits(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; - struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); - if (!cpu_cdbs->cur_policy) - return; + /* State should be equivalent to START */ + if (!cdbs->shared || !cdbs->shared->policy) + return -EBUSY; - mutex_lock(&cpu_cdbs->timer_mutex); - if (policy->max < cpu_cdbs->cur_policy->cur) - __cpufreq_driver_target(cpu_cdbs->cur_policy, policy->max, + mutex_lock(&cdbs->shared->timer_mutex); + if (policy->max < cdbs->shared->policy->cur) + __cpufreq_driver_target(cdbs->shared->policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > cpu_cdbs->cur_policy->cur) - __cpufreq_driver_target(cpu_cdbs->cur_policy, policy->min, + else if (policy->min > cdbs->shared->policy->cur) + __cpufreq_driver_target(cdbs->shared->policy, policy->min, CPUFREQ_RELATION_L); dbs_check_cpu(dbs_data, cpu); - mutex_unlock(&cpu_cdbs->timer_mutex); + mutex_unlock(&cdbs->shared->timer_mutex); + + return 0; } int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event) { struct dbs_data *dbs_data; - int ret = 0; + int ret; /* Lock governor to block concurrent initialization of governor */ mutex_lock(&cdata->mutex); @@ -449,7 +543,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, else dbs_data = cdata->gdbs_data; - if (WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT))) { + if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { ret = -EINVAL; goto unlock; } @@ -459,17 +553,19 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, ret = cpufreq_governor_init(policy, dbs_data, cdata); break; case CPUFREQ_GOV_POLICY_EXIT: - cpufreq_governor_exit(policy, dbs_data); + ret = cpufreq_governor_exit(policy, dbs_data); break; case CPUFREQ_GOV_START: ret = cpufreq_governor_start(policy, dbs_data); break; case CPUFREQ_GOV_STOP: - cpufreq_governor_stop(policy, dbs_data); + ret = cpufreq_governor_stop(policy, dbs_data); break; case CPUFREQ_GOV_LIMITS: - cpufreq_governor_limits(policy, dbs_data); + ret = cpufreq_governor_limits(policy, dbs_data); break; + default: + ret = -EINVAL; } unlock: diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 34736f5e869d..50f171796632 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -109,7 +109,7 @@ store_one(_gov, file_name) /* create helper routines */ #define define_get_cpu_dbs_routines(_dbs_info) \ -static struct cpu_dbs_common_info *get_cpu_cdbs(int cpu) \ +static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ { \ return &per_cpu(_dbs_info, cpu).cdbs; \ } \ @@ -128,9 +128,20 @@ static void *get_cpu_dbs_info_s(int cpu) \ * cs_*: Conservative governor */ +/* Common to all CPUs of a policy */ +struct cpu_common_dbs_info { + struct cpufreq_policy *policy; + /* + * percpu mutex that serializes governor limit change with dbs_timer + * invocation. We do not want dbs_timer to run when user is changing + * the governor or limits. + */ + struct mutex timer_mutex; + ktime_t time_stamp; +}; + /* Per cpu structures */ -struct cpu_dbs_common_info { - int cpu; +struct cpu_dbs_info { u64 prev_cpu_idle; u64 prev_cpu_wall; u64 prev_cpu_nice; @@ -141,19 +152,12 @@ struct cpu_dbs_common_info { * wake-up from idle. */ unsigned int prev_load; - struct cpufreq_policy *cur_policy; - struct delayed_work work; - /* - * percpu mutex that serializes governor limit change with gov_dbs_timer - * invocation. We do not want gov_dbs_timer to run when user is changing - * the governor or limits. - */ - struct mutex timer_mutex; - ktime_t time_stamp; + struct delayed_work dwork; + struct cpu_common_dbs_info *shared; }; struct od_cpu_dbs_info_s { - struct cpu_dbs_common_info cdbs; + struct cpu_dbs_info cdbs; struct cpufreq_frequency_table *freq_table; unsigned int freq_lo; unsigned int freq_lo_jiffies; @@ -163,7 +167,7 @@ struct od_cpu_dbs_info_s { }; struct cs_cpu_dbs_info_s { - struct cpu_dbs_common_info cdbs; + struct cpu_dbs_info cdbs; unsigned int down_skip; unsigned int requested_freq; unsigned int enable:1; @@ -204,9 +208,11 @@ struct common_dbs_data { */ struct dbs_data *gdbs_data; - struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu); + struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); void *(*get_cpu_dbs_info_s)(int cpu); - void (*gov_dbs_timer)(struct work_struct *work); + unsigned int (*gov_dbs_timer)(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, + bool modify_all); void (*gov_check_cpu)(int cpu, unsigned int load); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); @@ -265,8 +271,6 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); -bool need_load_eval(struct cpu_dbs_common_info *cdbs, - unsigned int sampling_rate); int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event); void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 3c1e10f2304c..1fa9088c84a8 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -155,7 +155,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) static void od_check_cpu(int cpu, unsigned int load) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; struct dbs_data *dbs_data = policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; @@ -191,46 +191,40 @@ static void od_check_cpu(int cpu, unsigned int load) } } -static void od_dbs_timer(struct work_struct *work) +static unsigned int od_dbs_timer(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, bool modify_all) { - struct od_cpu_dbs_info_s *dbs_info = - container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work); - unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; - struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info, + struct cpufreq_policy *policy = cdbs->shared->policy; + unsigned int cpu = policy->cpu; + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; - int delay = 0, sample_type = core_dbs_info->sample_type; - bool modify_all = true; + int delay = 0, sample_type = dbs_info->sample_type; - mutex_lock(&core_dbs_info->cdbs.timer_mutex); - if (!need_load_eval(&core_dbs_info->cdbs, od_tuners->sampling_rate)) { - modify_all = false; + if (!modify_all) goto max_delay; - } /* Common NORMAL_SAMPLE setup */ - core_dbs_info->sample_type = OD_NORMAL_SAMPLE; + dbs_info->sample_type = OD_NORMAL_SAMPLE; if (sample_type == OD_SUB_SAMPLE) { - delay = core_dbs_info->freq_lo_jiffies; - __cpufreq_driver_target(core_dbs_info->cdbs.cur_policy, - core_dbs_info->freq_lo, CPUFREQ_RELATION_H); + delay = dbs_info->freq_lo_jiffies; + __cpufreq_driver_target(policy, dbs_info->freq_lo, + CPUFREQ_RELATION_H); } else { dbs_check_cpu(dbs_data, cpu); - if (core_dbs_info->freq_lo) { + if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ - core_dbs_info->sample_type = OD_SUB_SAMPLE; - delay = core_dbs_info->freq_hi_jiffies; + dbs_info->sample_type = OD_SUB_SAMPLE; + delay = dbs_info->freq_hi_jiffies; } } max_delay: if (!delay) delay = delay_for_sampling_rate(od_tuners->sampling_rate - * core_dbs_info->rate_mult); + * dbs_info->rate_mult); - gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all); - mutex_unlock(&core_dbs_info->cdbs.timer_mutex); + return delay; } /************************** sysfs interface ************************/ @@ -273,27 +267,27 @@ static void update_sampling_rate(struct dbs_data *dbs_data, dbs_info = &per_cpu(od_cpu_dbs_info, cpu); cpufreq_cpu_put(policy); - mutex_lock(&dbs_info->cdbs.timer_mutex); + mutex_lock(&dbs_info->cdbs.shared->timer_mutex); - if (!delayed_work_pending(&dbs_info->cdbs.work)) { - mutex_unlock(&dbs_info->cdbs.timer_mutex); + if (!delayed_work_pending(&dbs_info->cdbs.dwork)) { + mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); continue; } next_sampling = jiffies + usecs_to_jiffies(new_rate); - appointed_at = dbs_info->cdbs.work.timer.expires; + appointed_at = dbs_info->cdbs.dwork.timer.expires; if (time_before(next_sampling, appointed_at)) { - mutex_unlock(&dbs_info->cdbs.timer_mutex); - cancel_delayed_work_sync(&dbs_info->cdbs.work); - mutex_lock(&dbs_info->cdbs.timer_mutex); + mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); + cancel_delayed_work_sync(&dbs_info->cdbs.dwork); + mutex_lock(&dbs_info->cdbs.shared->timer_mutex); - gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, - usecs_to_jiffies(new_rate), true); + gov_queue_work(dbs_data, policy, + usecs_to_jiffies(new_rate), true); } - mutex_unlock(&dbs_info->cdbs.timer_mutex); + mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); } } @@ -556,13 +550,16 @@ static void od_set_powersave_bias(unsigned int powersave_bias) get_online_cpus(); for_each_online_cpu(cpu) { + struct cpu_common_dbs_info *shared; + if (cpumask_test_cpu(cpu, &done)) continue; - policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy; - if (!policy) + shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared; + if (!shared) continue; + policy = shared->policy; cpumask_or(&done, &done, policy->cpus); if (policy->governor != &cpufreq_gov_ondemand) diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c index a0d2a423cea9..4085244c8a67 100644 --- a/drivers/cpufreq/e_powersaver.c +++ b/drivers/cpufreq/e_powersaver.c @@ -78,7 +78,7 @@ static int eps_acpi_init(void) static int eps_acpi_exit(struct cpufreq_policy *policy) { if (eps_acpi_cpu_perf) { - acpi_processor_unregister_performance(eps_acpi_cpu_perf, 0); + acpi_processor_unregister_performance(0); free_cpumask_var(eps_acpi_cpu_perf->shared_cpu_map); kfree(eps_acpi_cpu_perf); eps_acpi_cpu_perf = NULL; diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c index c30aaa6a54e8..0202429f1c5b 100644 --- a/drivers/cpufreq/ia64-acpi-cpufreq.c +++ b/drivers/cpufreq/ia64-acpi-cpufreq.c @@ -29,7 +29,6 @@ MODULE_LICENSE("GPL"); struct cpufreq_acpi_io { struct acpi_processor_performance acpi_data; - struct cpufreq_frequency_table *freq_table; unsigned int resume; }; @@ -221,6 +220,7 @@ acpi_cpufreq_cpu_init ( unsigned int cpu = policy->cpu; struct cpufreq_acpi_io *data; unsigned int result = 0; + struct cpufreq_frequency_table *freq_table; pr_debug("acpi_cpufreq_cpu_init\n"); @@ -254,10 +254,10 @@ acpi_cpufreq_cpu_init ( } /* alloc freq_table */ - data->freq_table = kzalloc(sizeof(*data->freq_table) * + freq_table = kzalloc(sizeof(*freq_table) * (data->acpi_data.state_count + 1), GFP_KERNEL); - if (!data->freq_table) { + if (!freq_table) { result = -ENOMEM; goto err_unreg; } @@ -276,14 +276,14 @@ acpi_cpufreq_cpu_init ( for (i = 0; i <= data->acpi_data.state_count; i++) { if (i < data->acpi_data.state_count) { - data->freq_table[i].frequency = + freq_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; } else { - data->freq_table[i].frequency = CPUFREQ_TABLE_END; + freq_table[i].frequency = CPUFREQ_TABLE_END; } } - result = cpufreq_table_validate_and_show(policy, data->freq_table); + result = cpufreq_table_validate_and_show(policy, freq_table); if (result) { goto err_freqfree; } @@ -311,9 +311,9 @@ acpi_cpufreq_cpu_init ( return (result); err_freqfree: - kfree(data->freq_table); + kfree(freq_table); err_unreg: - acpi_processor_unregister_performance(&data->acpi_data, cpu); + acpi_processor_unregister_performance(cpu); err_free: kfree(data); acpi_io_data[cpu] = NULL; @@ -332,8 +332,8 @@ acpi_cpufreq_cpu_exit ( if (data) { acpi_io_data[policy->cpu] = NULL; - acpi_processor_unregister_performance(&data->acpi_data, - policy->cpu); + acpi_processor_unregister_performance(policy->cpu); + kfree(policy->freq_table); kfree(data); } diff --git a/drivers/cpufreq/integrator-cpufreq.c b/drivers/cpufreq/integrator-cpufreq.c index 129e266f7621..2faa4216bf2a 100644 --- a/drivers/cpufreq/integrator-cpufreq.c +++ b/drivers/cpufreq/integrator-cpufreq.c @@ -98,11 +98,10 @@ static int integrator_set_target(struct cpufreq_policy *policy, /* get current setting */ cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET); - if (machine_is_integrator()) { + if (machine_is_integrator()) vco.s = (cm_osc >> 8) & 7; - } else if (machine_is_cintegrator()) { + else if (machine_is_cintegrator()) vco.s = 1; - } vco.v = cm_osc & 255; vco.r = 22; freqs.old = icst_hz(&cclk_params, vco) / 1000; @@ -163,11 +162,10 @@ static unsigned int integrator_get(unsigned int cpu) /* detect memory etc. */ cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET); - if (machine_is_integrator()) { + if (machine_is_integrator()) vco.s = (cm_osc >> 8) & 7; - } else { + else vco.s = 1; - } vco.v = cm_osc & 255; vco.r = 22; @@ -203,7 +201,7 @@ static int __init integrator_cpufreq_probe(struct platform_device *pdev) struct resource *res; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) + if (!res) return -ENODEV; cm_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); @@ -234,6 +232,6 @@ static struct platform_driver integrator_cpufreq_driver = { module_platform_driver_probe(integrator_cpufreq_driver, integrator_cpufreq_probe); -MODULE_AUTHOR ("Russell M. King"); -MODULE_DESCRIPTION ("cpufreq driver for ARM Integrator CPUs"); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Russell M. King"); +MODULE_DESCRIPTION("cpufreq driver for ARM Integrator CPUs"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fcb929ec5304..31d0548638e8 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -484,12 +484,11 @@ static void __init intel_pstate_sysfs_expose_params(void) } /************************** sysfs end ************************/ -static void intel_pstate_hwp_enable(void) +static void intel_pstate_hwp_enable(struct cpudata *cpudata) { - hwp_active++; pr_info("intel_pstate: HWP enabled\n"); - wrmsrl( MSR_PM_ENABLE, 0x1); + wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } static int byt_get_min_pstate(void) @@ -522,7 +521,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) int32_t vid_fp; u32 vid; - val = pstate << 8; + val = (u64)pstate << 8; if (limits.no_turbo && !limits.turbo_disabled) val |= (u64)1 << 32; @@ -611,7 +610,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) { u64 val; - val = pstate << 8; + val = (u64)pstate << 8; if (limits.no_turbo && !limits.turbo_disabled) val |= (u64)1 << 32; @@ -909,6 +908,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x4c, byt_params), ICPU(0x4e, core_params), ICPU(0x4f, core_params), + ICPU(0x5e, core_params), ICPU(0x56, core_params), ICPU(0x57, knl_params), {} @@ -933,6 +933,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu = all_cpu_data[cpunum]; cpu->cpu = cpunum; + + if (hwp_active) + intel_pstate_hwp_enable(cpu); + intel_pstate_get_cpu_pstates(cpu); init_timer_deferrable(&cpu->timer); @@ -1170,6 +1174,10 @@ static struct hw_vendor_info vendor_info[] = { {1, "ORACLE", "X4270M3 ", PPC}, {1, "ORACLE", "X4270M2 ", PPC}, {1, "ORACLE", "X4170M2 ", PPC}, + {1, "ORACLE", "X4170 M3", PPC}, + {1, "ORACLE", "X4275 M3", PPC}, + {1, "ORACLE", "X6-2 ", PPC}, + {1, "ORACLE", "Sudbury ", PPC}, {0, "", ""}, }; @@ -1246,7 +1254,7 @@ static int __init intel_pstate_init(void) return -ENOMEM; if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) - intel_pstate_hwp_enable(); + hwp_active++; if (!hwp_active && hwp_only) goto out; diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c new file mode 100644 index 000000000000..49caed293a3b --- /dev/null +++ b/drivers/cpufreq/mt8173-cpufreq.c @@ -0,0 +1,527 @@ +/* + * Copyright (c) 2015 Linaro Ltd. + * Author: Pi-Cheng Chen <pi-cheng.chen@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk.h> +#include <linux/cpu.h> +#include <linux/cpu_cooling.h> +#include <linux/cpufreq.h> +#include <linux/cpumask.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pm_opp.h> +#include <linux/regulator/consumer.h> +#include <linux/slab.h> +#include <linux/thermal.h> + +#define MIN_VOLT_SHIFT (100000) +#define MAX_VOLT_SHIFT (200000) +#define MAX_VOLT_LIMIT (1150000) +#define VOLT_TOL (10000) + +/* + * The struct mtk_cpu_dvfs_info holds necessary information for doing CPU DVFS + * on each CPU power/clock domain of Mediatek SoCs. Each CPU cluster in + * Mediatek SoCs has two voltage inputs, Vproc and Vsram. In some cases the two + * voltage inputs need to be controlled under a hardware limitation: + * 100mV < Vsram - Vproc < 200mV + * + * When scaling the clock frequency of a CPU clock domain, the clock source + * needs to be switched to another stable PLL clock temporarily until + * the original PLL becomes stable at target frequency. + */ +struct mtk_cpu_dvfs_info { + struct device *cpu_dev; + struct regulator *proc_reg; + struct regulator *sram_reg; + struct clk *cpu_clk; + struct clk *inter_clk; + struct thermal_cooling_device *cdev; + int intermediate_voltage; + bool need_voltage_tracking; +}; + +static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info, + int new_vproc) +{ + struct regulator *proc_reg = info->proc_reg; + struct regulator *sram_reg = info->sram_reg; + int old_vproc, old_vsram, new_vsram, vsram, vproc, ret; + + old_vproc = regulator_get_voltage(proc_reg); + old_vsram = regulator_get_voltage(sram_reg); + /* Vsram should not exceed the maximum allowed voltage of SoC. */ + new_vsram = min(new_vproc + MIN_VOLT_SHIFT, MAX_VOLT_LIMIT); + + if (old_vproc < new_vproc) { + /* + * When scaling up voltages, Vsram and Vproc scale up step + * by step. At each step, set Vsram to (Vproc + 200mV) first, + * then set Vproc to (Vsram - 100mV). + * Keep doing it until Vsram and Vproc hit target voltages. + */ + do { + old_vsram = regulator_get_voltage(sram_reg); + old_vproc = regulator_get_voltage(proc_reg); + + vsram = min(new_vsram, old_vproc + MAX_VOLT_SHIFT); + + if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) { + vsram = MAX_VOLT_LIMIT; + + /* + * If the target Vsram hits the maximum voltage, + * try to set the exact voltage value first. + */ + ret = regulator_set_voltage(sram_reg, vsram, + vsram); + if (ret) + ret = regulator_set_voltage(sram_reg, + vsram - VOLT_TOL, + vsram); + + vproc = new_vproc; + } else { + ret = regulator_set_voltage(sram_reg, vsram, + vsram + VOLT_TOL); + + vproc = vsram - MIN_VOLT_SHIFT; + } + if (ret) + return ret; + + ret = regulator_set_voltage(proc_reg, vproc, + vproc + VOLT_TOL); + if (ret) { + regulator_set_voltage(sram_reg, old_vsram, + old_vsram); + return ret; + } + } while (vproc < new_vproc || vsram < new_vsram); + } else if (old_vproc > new_vproc) { + /* + * When scaling down voltages, Vsram and Vproc scale down step + * by step. At each step, set Vproc to (Vsram - 200mV) first, + * then set Vproc to (Vproc + 100mV). + * Keep doing it until Vsram and Vproc hit target voltages. + */ + do { + old_vproc = regulator_get_voltage(proc_reg); + old_vsram = regulator_get_voltage(sram_reg); + + vproc = max(new_vproc, old_vsram - MAX_VOLT_SHIFT); + ret = regulator_set_voltage(proc_reg, vproc, + vproc + VOLT_TOL); + if (ret) + return ret; + + if (vproc == new_vproc) + vsram = new_vsram; + else + vsram = max(new_vsram, vproc + MIN_VOLT_SHIFT); + + if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) { + vsram = MAX_VOLT_LIMIT; + + /* + * If the target Vsram hits the maximum voltage, + * try to set the exact voltage value first. + */ + ret = regulator_set_voltage(sram_reg, vsram, + vsram); + if (ret) + ret = regulator_set_voltage(sram_reg, + vsram - VOLT_TOL, + vsram); + } else { + ret = regulator_set_voltage(sram_reg, vsram, + vsram + VOLT_TOL); + } + + if (ret) { + regulator_set_voltage(proc_reg, old_vproc, + old_vproc); + return ret; + } + } while (vproc > new_vproc + VOLT_TOL || + vsram > new_vsram + VOLT_TOL); + } + + return 0; +} + +static int mtk_cpufreq_set_voltage(struct mtk_cpu_dvfs_info *info, int vproc) +{ + if (info->need_voltage_tracking) + return mtk_cpufreq_voltage_tracking(info, vproc); + else + return regulator_set_voltage(info->proc_reg, vproc, + vproc + VOLT_TOL); +} + +static int mtk_cpufreq_set_target(struct cpufreq_policy *policy, + unsigned int index) +{ + struct cpufreq_frequency_table *freq_table = policy->freq_table; + struct clk *cpu_clk = policy->clk; + struct clk *armpll = clk_get_parent(cpu_clk); + struct mtk_cpu_dvfs_info *info = policy->driver_data; + struct device *cpu_dev = info->cpu_dev; + struct dev_pm_opp *opp; + long freq_hz, old_freq_hz; + int vproc, old_vproc, inter_vproc, target_vproc, ret; + + inter_vproc = info->intermediate_voltage; + + old_freq_hz = clk_get_rate(cpu_clk); + old_vproc = regulator_get_voltage(info->proc_reg); + + freq_hz = freq_table[index].frequency * 1000; + + rcu_read_lock(); + opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz); + if (IS_ERR(opp)) { + rcu_read_unlock(); + pr_err("cpu%d: failed to find OPP for %ld\n", + policy->cpu, freq_hz); + return PTR_ERR(opp); + } + vproc = dev_pm_opp_get_voltage(opp); + rcu_read_unlock(); + + /* + * If the new voltage or the intermediate voltage is higher than the + * current voltage, scale up voltage first. + */ + target_vproc = (inter_vproc > vproc) ? inter_vproc : vproc; + if (old_vproc < target_vproc) { + ret = mtk_cpufreq_set_voltage(info, target_vproc); + if (ret) { + pr_err("cpu%d: failed to scale up voltage!\n", + policy->cpu); + mtk_cpufreq_set_voltage(info, old_vproc); + return ret; + } + } + + /* Reparent the CPU clock to intermediate clock. */ + ret = clk_set_parent(cpu_clk, info->inter_clk); + if (ret) { + pr_err("cpu%d: failed to re-parent cpu clock!\n", + policy->cpu); + mtk_cpufreq_set_voltage(info, old_vproc); + WARN_ON(1); + return ret; + } + + /* Set the original PLL to target rate. */ + ret = clk_set_rate(armpll, freq_hz); + if (ret) { + pr_err("cpu%d: failed to scale cpu clock rate!\n", + policy->cpu); + clk_set_parent(cpu_clk, armpll); + mtk_cpufreq_set_voltage(info, old_vproc); + return ret; + } + + /* Set parent of CPU clock back to the original PLL. */ + ret = clk_set_parent(cpu_clk, armpll); + if (ret) { + pr_err("cpu%d: failed to re-parent cpu clock!\n", + policy->cpu); + mtk_cpufreq_set_voltage(info, inter_vproc); + WARN_ON(1); + return ret; + } + + /* + * If the new voltage is lower than the intermediate voltage or the + * original voltage, scale down to the new voltage. + */ + if (vproc < inter_vproc || vproc < old_vproc) { + ret = mtk_cpufreq_set_voltage(info, vproc); + if (ret) { + pr_err("cpu%d: failed to scale down voltage!\n", + policy->cpu); + clk_set_parent(cpu_clk, info->inter_clk); + clk_set_rate(armpll, old_freq_hz); + clk_set_parent(cpu_clk, armpll); + return ret; + } + } + + return 0; +} + +static void mtk_cpufreq_ready(struct cpufreq_policy *policy) +{ + struct mtk_cpu_dvfs_info *info = policy->driver_data; + struct device_node *np = of_node_get(info->cpu_dev->of_node); + + if (WARN_ON(!np)) + return; + + if (of_find_property(np, "#cooling-cells", NULL)) { + info->cdev = of_cpufreq_cooling_register(np, + policy->related_cpus); + + if (IS_ERR(info->cdev)) { + dev_err(info->cpu_dev, + "running cpufreq without cooling device: %ld\n", + PTR_ERR(info->cdev)); + + info->cdev = NULL; + } + } + + of_node_put(np); +} + +static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) +{ + struct device *cpu_dev; + struct regulator *proc_reg = ERR_PTR(-ENODEV); + struct regulator *sram_reg = ERR_PTR(-ENODEV); + struct clk *cpu_clk = ERR_PTR(-ENODEV); + struct clk *inter_clk = ERR_PTR(-ENODEV); + struct dev_pm_opp *opp; + unsigned long rate; + int ret; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) { + pr_err("failed to get cpu%d device\n", cpu); + return -ENODEV; + } + + cpu_clk = clk_get(cpu_dev, "cpu"); + if (IS_ERR(cpu_clk)) { + if (PTR_ERR(cpu_clk) == -EPROBE_DEFER) + pr_warn("cpu clk for cpu%d not ready, retry.\n", cpu); + else + pr_err("failed to get cpu clk for cpu%d\n", cpu); + + ret = PTR_ERR(cpu_clk); + return ret; + } + + inter_clk = clk_get(cpu_dev, "intermediate"); + if (IS_ERR(inter_clk)) { + if (PTR_ERR(inter_clk) == -EPROBE_DEFER) + pr_warn("intermediate clk for cpu%d not ready, retry.\n", + cpu); + else + pr_err("failed to get intermediate clk for cpu%d\n", + cpu); + + ret = PTR_ERR(inter_clk); + goto out_free_resources; + } + + proc_reg = regulator_get_exclusive(cpu_dev, "proc"); + if (IS_ERR(proc_reg)) { + if (PTR_ERR(proc_reg) == -EPROBE_DEFER) + pr_warn("proc regulator for cpu%d not ready, retry.\n", + cpu); + else + pr_err("failed to get proc regulator for cpu%d\n", + cpu); + + ret = PTR_ERR(proc_reg); + goto out_free_resources; + } + + /* Both presence and absence of sram regulator are valid cases. */ + sram_reg = regulator_get_exclusive(cpu_dev, "sram"); + + ret = of_init_opp_table(cpu_dev); + if (ret) { + pr_warn("no OPP table for cpu%d\n", cpu); + goto out_free_resources; + } + + /* Search a safe voltage for intermediate frequency. */ + rate = clk_get_rate(inter_clk); + rcu_read_lock(); + opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate); + if (IS_ERR(opp)) { + rcu_read_unlock(); + pr_err("failed to get intermediate opp for cpu%d\n", cpu); + ret = PTR_ERR(opp); + goto out_free_opp_table; + } + info->intermediate_voltage = dev_pm_opp_get_voltage(opp); + rcu_read_unlock(); + + info->cpu_dev = cpu_dev; + info->proc_reg = proc_reg; + info->sram_reg = IS_ERR(sram_reg) ? NULL : sram_reg; + info->cpu_clk = cpu_clk; + info->inter_clk = inter_clk; + + /* + * If SRAM regulator is present, software "voltage tracking" is needed + * for this CPU power domain. + */ + info->need_voltage_tracking = !IS_ERR(sram_reg); + + return 0; + +out_free_opp_table: + of_free_opp_table(cpu_dev); + +out_free_resources: + if (!IS_ERR(proc_reg)) + regulator_put(proc_reg); + if (!IS_ERR(sram_reg)) + regulator_put(sram_reg); + if (!IS_ERR(cpu_clk)) + clk_put(cpu_clk); + if (!IS_ERR(inter_clk)) + clk_put(inter_clk); + + return ret; +} + +static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info) +{ + if (!IS_ERR(info->proc_reg)) + regulator_put(info->proc_reg); + if (!IS_ERR(info->sram_reg)) + regulator_put(info->sram_reg); + if (!IS_ERR(info->cpu_clk)) + clk_put(info->cpu_clk); + if (!IS_ERR(info->inter_clk)) + clk_put(info->inter_clk); + + of_free_opp_table(info->cpu_dev); +} + +static int mtk_cpufreq_init(struct cpufreq_policy *policy) +{ + struct mtk_cpu_dvfs_info *info; + struct cpufreq_frequency_table *freq_table; + int ret; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + ret = mtk_cpu_dvfs_info_init(info, policy->cpu); + if (ret) { + pr_err("%s failed to initialize dvfs info for cpu%d\n", + __func__, policy->cpu); + goto out_free_dvfs_info; + } + + ret = dev_pm_opp_init_cpufreq_table(info->cpu_dev, &freq_table); + if (ret) { + pr_err("failed to init cpufreq table for cpu%d: %d\n", + policy->cpu, ret); + goto out_release_dvfs_info; + } + + ret = cpufreq_table_validate_and_show(policy, freq_table); + if (ret) { + pr_err("%s: invalid frequency table: %d\n", __func__, ret); + goto out_free_cpufreq_table; + } + + /* CPUs in the same cluster share a clock and power domain. */ + cpumask_copy(policy->cpus, &cpu_topology[policy->cpu].core_sibling); + policy->driver_data = info; + policy->clk = info->cpu_clk; + + return 0; + +out_free_cpufreq_table: + dev_pm_opp_free_cpufreq_table(info->cpu_dev, &freq_table); + +out_release_dvfs_info: + mtk_cpu_dvfs_info_release(info); + +out_free_dvfs_info: + kfree(info); + + return ret; +} + +static int mtk_cpufreq_exit(struct cpufreq_policy *policy) +{ + struct mtk_cpu_dvfs_info *info = policy->driver_data; + + cpufreq_cooling_unregister(info->cdev); + dev_pm_opp_free_cpufreq_table(info->cpu_dev, &policy->freq_table); + mtk_cpu_dvfs_info_release(info); + kfree(info); + + return 0; +} + +static struct cpufreq_driver mt8173_cpufreq_driver = { + .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .verify = cpufreq_generic_frequency_table_verify, + .target_index = mtk_cpufreq_set_target, + .get = cpufreq_generic_get, + .init = mtk_cpufreq_init, + .exit = mtk_cpufreq_exit, + .ready = mtk_cpufreq_ready, + .name = "mtk-cpufreq", + .attr = cpufreq_generic_attr, +}; + +static int mt8173_cpufreq_probe(struct platform_device *pdev) +{ + int ret; + + ret = cpufreq_register_driver(&mt8173_cpufreq_driver); + if (ret) + pr_err("failed to register mtk cpufreq driver\n"); + + return ret; +} + +static struct platform_driver mt8173_cpufreq_platdrv = { + .driver = { + .name = "mt8173-cpufreq", + }, + .probe = mt8173_cpufreq_probe, +}; + +static int mt8173_cpufreq_driver_init(void) +{ + struct platform_device *pdev; + int err; + + if (!of_machine_is_compatible("mediatek,mt8173")) + return -ENODEV; + + err = platform_driver_register(&mt8173_cpufreq_platdrv); + if (err) + return err; + + /* + * Since there's no place to hold device registration code and no + * device tree based way to match cpufreq driver yet, both the driver + * and the device registration codes are put here to handle defer + * probing. + */ + pdev = platform_device_register_simple("mt8173-cpufreq", -1, NULL, 0); + if (IS_ERR(pdev)) { + pr_err("failed to register mtk-cpufreq platform device\n"); + return PTR_ERR(pdev); + } + + return 0; +} +device_initcall(mt8173_cpufreq_driver_init); diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index 37c5742482d8..c1ae1999770a 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -421,7 +421,7 @@ static int powernow_acpi_init(void) return 0; err2: - acpi_processor_unregister_performance(acpi_processor_perf, 0); + acpi_processor_unregister_performance(0); err1: free_cpumask_var(acpi_processor_perf->shared_cpu_map); err05: @@ -661,7 +661,7 @@ static int powernow_cpu_exit(struct cpufreq_policy *policy) { #ifdef CONFIG_X86_POWERNOW_K7_ACPI if (acpi_processor_perf) { - acpi_processor_unregister_performance(acpi_processor_perf, 0); + acpi_processor_unregister_performance(0); free_cpumask_var(acpi_processor_perf->shared_cpu_map); kfree(acpi_processor_perf); } diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 5c035d04d827..0b5bf135b090 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -795,7 +795,7 @@ err_out_mem: kfree(powernow_table); err_out: - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + acpi_processor_unregister_performance(data->cpu); /* data->acpi_data.state_count informs us at ->exit() * whether ACPI was used */ @@ -863,8 +863,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { if (data->acpi_data.state_count) - acpi_processor_unregister_performance(&data->acpi_data, - data->cpu); + acpi_processor_unregister_performance(data->cpu); free_cpumask_var(data->acpi_data.shared_cpu_map); } diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index ebef0d8279c7..64994e10638e 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -27,20 +27,31 @@ #include <linux/smp.h> #include <linux/of.h> #include <linux/reboot.h> +#include <linux/slab.h> #include <asm/cputhreads.h> #include <asm/firmware.h> #include <asm/reg.h> #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */ +#include <asm/opal.h> #define POWERNV_MAX_PSTATES 256 #define PMSR_PSAFE_ENABLE (1UL << 30) #define PMSR_SPR_EM_DISABLE (1UL << 31) #define PMSR_MAX(x) ((x >> 32) & 0xFF) -#define PMSR_LP(x) ((x >> 48) & 0xFF) static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; -static bool rebooting, throttled; +static bool rebooting, throttled, occ_reset; + +static struct chip { + unsigned int id; + bool throttled; + cpumask_t mask; + struct work_struct throttle; + bool restore; +} *chips; + +static int nr_chips; /* * Note: The set of pstates consists of contiguous integers, the @@ -298,28 +309,35 @@ static inline unsigned int get_nominal_index(void) return powernv_pstate_info.max - powernv_pstate_info.nominal; } -static void powernv_cpufreq_throttle_check(unsigned int cpu) +static void powernv_cpufreq_throttle_check(void *data) { + unsigned int cpu = smp_processor_id(); unsigned long pmsr; - int pmsr_pmax, pmsr_lp; + int pmsr_pmax, i; pmsr = get_pmspr(SPRN_PMSR); + for (i = 0; i < nr_chips; i++) + if (chips[i].id == cpu_to_chip_id(cpu)) + break; + /* Check for Pmax Capping */ pmsr_pmax = (s8)PMSR_MAX(pmsr); if (pmsr_pmax != powernv_pstate_info.max) { - throttled = true; - pr_info("CPU %d Pmax is reduced to %d\n", cpu, pmsr_pmax); - pr_info("Max allowed Pstate is capped\n"); + if (chips[i].throttled) + goto next; + chips[i].throttled = true; + pr_info("CPU %d on Chip %u has Pmax reduced to %d\n", cpu, + chips[i].id, pmsr_pmax); + } else if (chips[i].throttled) { + chips[i].throttled = false; + pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, + chips[i].id, pmsr_pmax); } - /* - * Check for Psafe by reading LocalPstate - * or check if Psafe_mode_active is set in PMSR. - */ - pmsr_lp = (s8)PMSR_LP(pmsr); - if ((pmsr_lp < powernv_pstate_info.min) || - (pmsr & PMSR_PSAFE_ENABLE)) { + /* Check if Psafe_mode_active is set in PMSR. */ +next: + if (pmsr & PMSR_PSAFE_ENABLE) { throttled = true; pr_info("Pstate set to safe frequency\n"); } @@ -350,7 +368,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, return 0; if (!throttled) - powernv_cpufreq_throttle_check(smp_processor_id()); + powernv_cpufreq_throttle_check(NULL); freq_data.pstate_id = powernv_freqs[new_index].driver_data; @@ -395,6 +413,119 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { .notifier_call = powernv_cpufreq_reboot_notifier, }; +void powernv_cpufreq_work_fn(struct work_struct *work) +{ + struct chip *chip = container_of(work, struct chip, throttle); + unsigned int cpu; + cpumask_var_t mask; + + smp_call_function_any(&chip->mask, + powernv_cpufreq_throttle_check, NULL, 0); + + if (!chip->restore) + return; + + chip->restore = false; + cpumask_copy(mask, &chip->mask); + for_each_cpu_and(cpu, mask, cpu_online_mask) { + int index, tcpu; + struct cpufreq_policy policy; + + cpufreq_get_policy(&policy, cpu); + cpufreq_frequency_table_target(&policy, policy.freq_table, + policy.cur, + CPUFREQ_RELATION_C, &index); + powernv_cpufreq_target_index(&policy, index); + for_each_cpu(tcpu, policy.cpus) + cpumask_clear_cpu(tcpu, mask); + } +} + +static char throttle_reason[][30] = { + "No throttling", + "Power Cap", + "Processor Over Temperature", + "Power Supply Failure", + "Over Current", + "OCC Reset" + }; + +static int powernv_cpufreq_occ_msg(struct notifier_block *nb, + unsigned long msg_type, void *_msg) +{ + struct opal_msg *msg = _msg; + struct opal_occ_msg omsg; + int i; + + if (msg_type != OPAL_MSG_OCC) + return 0; + + omsg.type = be64_to_cpu(msg->params[0]); + + switch (omsg.type) { + case OCC_RESET: + occ_reset = true; + pr_info("OCC (On Chip Controller - enforces hard thermal/power limits) Resetting\n"); + /* + * powernv_cpufreq_throttle_check() is called in + * target() callback which can detect the throttle state + * for governors like ondemand. + * But static governors will not call target() often thus + * report throttling here. + */ + if (!throttled) { + throttled = true; + pr_crit("CPU frequency is throttled for duration\n"); + } + + break; + case OCC_LOAD: + pr_info("OCC Loading, CPU frequency is throttled until OCC is started\n"); + break; + case OCC_THROTTLE: + omsg.chip = be64_to_cpu(msg->params[1]); + omsg.throttle_status = be64_to_cpu(msg->params[2]); + + if (occ_reset) { + occ_reset = false; + throttled = false; + pr_info("OCC Active, CPU frequency is no longer throttled\n"); + + for (i = 0; i < nr_chips; i++) { + chips[i].restore = true; + schedule_work(&chips[i].throttle); + } + + return 0; + } + + if (omsg.throttle_status && + omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) + pr_info("OCC: Chip %u Pmax reduced due to %s\n", + (unsigned int)omsg.chip, + throttle_reason[omsg.throttle_status]); + else if (!omsg.throttle_status) + pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip, + throttle_reason[omsg.throttle_status]); + else + return 0; + + for (i = 0; i < nr_chips; i++) + if (chips[i].id == omsg.chip) { + if (!omsg.throttle_status) + chips[i].restore = true; + schedule_work(&chips[i].throttle); + } + } + return 0; +} + +static struct notifier_block powernv_cpufreq_opal_nb = { + .notifier_call = powernv_cpufreq_occ_msg, + .next = NULL, + .priority = 0, +}; + static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) { struct powernv_smp_call_data freq_data; @@ -414,6 +545,36 @@ static struct cpufreq_driver powernv_cpufreq_driver = { .attr = powernv_cpu_freq_attr, }; +static int init_chip_info(void) +{ + unsigned int chip[256]; + unsigned int cpu, i; + unsigned int prev_chip_id = UINT_MAX; + + for_each_possible_cpu(cpu) { + unsigned int id = cpu_to_chip_id(cpu); + + if (prev_chip_id != id) { + prev_chip_id = id; + chip[nr_chips++] = id; + } + } + + chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL); + if (!chips) + return -ENOMEM; + + for (i = 0; i < nr_chips; i++) { + chips[i].id = chip[i]; + chips[i].throttled = false; + cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); + INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); + chips[i].restore = false; + } + + return 0; +} + static int __init powernv_cpufreq_init(void) { int rc = 0; @@ -429,7 +590,13 @@ static int __init powernv_cpufreq_init(void) return rc; } + /* Populate chip info */ + rc = init_chip_info(); + if (rc) + return rc; + register_reboot_notifier(&powernv_cpufreq_reboot_nb); + opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); return cpufreq_register_driver(&powernv_cpufreq_driver); } module_init(powernv_cpufreq_init); @@ -437,6 +604,8 @@ module_init(powernv_cpufreq_init); static void __exit powernv_cpufreq_exit(void) { unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); + opal_message_notifier_unregister(OPAL_MSG_OCC, + &powernv_cpufreq_opal_nb); cpufreq_unregister_driver(&powernv_cpufreq_driver); } module_exit(powernv_cpufreq_exit); diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index d29e8da396a0..7969f7690498 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -97,8 +97,8 @@ static int pmi_notifier(struct notifier_block *nb, struct cpufreq_frequency_table *cbe_freqs; u8 node; - /* Should this really be called for CPUFREQ_ADJUST, CPUFREQ_INCOMPATIBLE - * and CPUFREQ_NOTIFY policy events?) + /* Should this really be called for CPUFREQ_ADJUST and CPUFREQ_NOTIFY + * policy events?) */ if (event == CPUFREQ_START) return 0; diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c index ffa3389e535b..992ce6f9abec 100644 --- a/drivers/cpufreq/sfi-cpufreq.c +++ b/drivers/cpufreq/sfi-cpufreq.c @@ -45,12 +45,10 @@ static int sfi_parse_freq(struct sfi_table_header *table) pentry = (struct sfi_freq_table_entry *)sb->pentry; totallen = num_freq_table_entries * sizeof(*pentry); - sfi_cpufreq_array = kzalloc(totallen, GFP_KERNEL); + sfi_cpufreq_array = kmemdup(pentry, totallen, GFP_KERNEL); if (!sfi_cpufreq_array) return -ENOMEM; - memcpy(sfi_cpufreq_array, pentry, totallen); - return 0; } diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index 4ab7a2156672..15d3214aaa00 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -386,7 +386,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int prev_speed; unsigned int ret = 0; unsigned long flags; - struct timeval tv1, tv2; + ktime_t tv1, tv2; if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) return -EINVAL; @@ -415,14 +415,14 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor, /* start latency measurement */ if (transition_latency) - do_gettimeofday(&tv1); + tv1 = ktime_get(); /* switch to high state */ set_state(SPEEDSTEP_HIGH); /* end latency measurement */ if (transition_latency) - do_gettimeofday(&tv2); + tv2 = ktime_get(); *high_speed = speedstep_get_frequency(processor); if (!*high_speed) { @@ -442,8 +442,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor, set_state(SPEEDSTEP_LOW); if (transition_latency) { - *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + - tv2.tv_usec - tv1.tv_usec; + *transition_latency = ktime_to_us(ktime_sub(tv2, tv1)); pr_debug("transition latency is %u uSec\n", *transition_latency); /* convert uSec to nSec and add 20% for safety reasons */ diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index 7245611ec963..94813af97f09 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -1668,7 +1668,6 @@ pxafb_freq_policy(struct notifier_block *nb, unsigned long val, void *data) switch (val) { case CPUFREQ_ADJUST: - case CPUFREQ_INCOMPATIBLE: pr_debug("min dma period: %d ps, " "new clock %d kHz\n", pxafb_display_dma_period(var), policy->max); diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c index 89dd7e02197f..dcf774c15889 100644 --- a/drivers/video/fbdev/sa1100fb.c +++ b/drivers/video/fbdev/sa1100fb.c @@ -1042,7 +1042,6 @@ sa1100fb_freq_policy(struct notifier_block *nb, unsigned long val, switch (val) { case CPUFREQ_ADJUST: - case CPUFREQ_INCOMPATIBLE: dev_dbg(fbi->dev, "min dma period: %d ps, " "new clock %d kHz\n", sa1100fb_min_dma_period(fbi), policy->max); diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 59fc190f1e92..70fa438000af 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -560,11 +560,9 @@ static int __init xen_acpi_processor_init(void) return 0; err_unregister: - for_each_possible_cpu(i) { - struct acpi_processor_performance *perf; - perf = per_cpu_ptr(acpi_perf_data, i); - acpi_processor_unregister_performance(perf, i); - } + for_each_possible_cpu(i) + acpi_processor_unregister_performance(i); + err_out: /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ free_acpi_perf_data(); @@ -579,11 +577,9 @@ static void __exit xen_acpi_processor_exit(void) kfree(acpi_ids_done); kfree(acpi_id_present); kfree(acpi_id_cst_present); - for_each_possible_cpu(i) { - struct acpi_processor_performance *perf; - perf = per_cpu_ptr(acpi_perf_data, i); - acpi_processor_unregister_performance(perf, i); - } + for_each_possible_cpu(i) + acpi_processor_unregister_performance(i); + free_acpi_perf_data(); } diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 2c4e7a9c1725..ff5f135f16b1 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -228,10 +228,7 @@ extern int acpi_processor_preregister_performance(struct extern int acpi_processor_register_performance(struct acpi_processor_performance *performance, unsigned int cpu); -extern void acpi_processor_unregister_performance(struct - acpi_processor_performance - *performance, - unsigned int cpu); +extern void acpi_processor_unregister_performance(unsigned int cpu); /* note: this locks both the calling module and the processor module if a _PPC object exists, rmmod is disallowed then */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index bde1e567b3a9..6ff6a4d95eea 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -51,11 +51,9 @@ struct cpufreq_cpuinfo { unsigned int transition_latency; }; -struct cpufreq_real_policy { +struct cpufreq_user_policy { unsigned int min; /* in kHz */ unsigned int max; /* in kHz */ - unsigned int policy; /* see above */ - struct cpufreq_governor *governor; /* see below */ }; struct cpufreq_policy { @@ -88,7 +86,7 @@ struct cpufreq_policy { struct work_struct update; /* if update_policy() needs to be * called, but you're in IRQ context */ - struct cpufreq_real_policy user_policy; + struct cpufreq_user_policy user_policy; struct cpufreq_frequency_table *freq_table; struct list_head policy_list; @@ -369,11 +367,10 @@ static inline void cpufreq_resume(void) {} /* Policy Notifiers */ #define CPUFREQ_ADJUST (0) -#define CPUFREQ_INCOMPATIBLE (1) -#define CPUFREQ_NOTIFY (2) -#define CPUFREQ_START (3) -#define CPUFREQ_CREATE_POLICY (4) -#define CPUFREQ_REMOVE_POLICY (5) +#define CPUFREQ_NOTIFY (1) +#define CPUFREQ_START (2) +#define CPUFREQ_CREATE_POLICY (3) +#define CPUFREQ_REMOVE_POLICY (4) #ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); |