From bdc21a4d286c6917fe966fd765de99411095b1b4 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Mon, 21 Mar 2022 09:57:22 +0000 Subject: PM: EM: Add .get_cost() callback The Energy Model (EM) supports devices which report abstract power scale, not only real Watts. The primary goal for EM is to enable the Energy Aware Scheduler (EAS) for a given platform. Some of the platforms might not be able to deliver proper power values. The only information that they might have is the relative efficiency between CPU types. Thus, it makes sense to remove some restrictions in the EM framework and introduce a mechanism which would support those platforms. What is crucial for EAS to operate is the 'cost' field in the EM. The 'cost' is calculated internally in EM framework based on knowledge from 'power' values. The 'cost' values must be strictly increasing. The existing API with its 'power' value size restrictions cannot guarantee that the 'cost' will meet this requirement. Since the platform is missing this detailed information, but has only efficiency details, introduce a new custom callback in the EM framework. The new callback would allow to provide the 'cost' values which reflect efficiency of the CPUs. This would allow to provide EAS information which has different relation than what would be forced by the EM internal formulas calculating 'cost' values. Thanks to this new callback it is possible to create a system view for EAS which has no overlapping performance states across many Performance Domains. Signed-off-by: Lukasz Luba Reviewed-by: Ionela Voinescu Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 9f3c400bc52d..0a3a5663177b 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -114,9 +114,30 @@ struct em_data_callback { */ int (*active_power)(unsigned long *power, unsigned long *freq, struct device *dev); + + /** + * get_cost() - Provide the cost at the given performance state of + * a device + * @dev : Device for which we do this operation (can be a CPU) + * @freq : Frequency at the performance state in kHz + * @cost : The cost value for the performance state + * (modified) + * + * In case of CPUs, the cost is the one of a single CPU in the domain. + * It is expected to fit in the [0, EM_MAX_POWER] range due to internal + * usage in EAS calculation. + * + * Return 0 on success, or appropriate error value in case of failure. + */ + int (*get_cost)(struct device *dev, unsigned long freq, + unsigned long *cost); }; -#define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb } #define EM_SET_ACTIVE_POWER_CB(em_cb, cb) ((em_cb).active_power = cb) +#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) \ + { .active_power = _active_power_cb, \ + .get_cost = _cost_cb } +#define EM_DATA_CB(_active_power_cb) \ + EM_ADV_DATA_CB(_active_power_cb, NULL) struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); @@ -264,6 +285,7 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) #else struct em_data_callback {}; +#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { } #define EM_DATA_CB(_active_power_cb) { } #define EM_SET_ACTIVE_POWER_CB(em_cb, cb) do { } while (0) -- cgit v1.2.3 From fc3a9a9858478ab5f8441765a3f9552a0ceba10c Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Mon, 21 Mar 2022 09:57:23 +0000 Subject: PM: EM: Add artificial EM flag The Energy Model (EM) can be used on platforms which are missing real power information. Those platforms would implement .get_cost() which populates needed values for the Energy Aware Scheduler (EAS). The EAS doesn't use 'power' fields from EM, but other frameworks might use them. Thus, to avoid miss-usage of this specific type of EM, introduce a new flags which can be checked by other frameworks. Signed-off-by: Pierre Gondois Signed-off-by: Lukasz Luba Reviewed-by: Ionela Voinescu Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 5 +++++ kernel/power/energy_model.c | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 0a3a5663177b..92e82a322859 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -67,11 +67,16 @@ struct em_perf_domain { * * EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating * energy consumption. + * + * EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be + * created by platform missing real power information */ #define EM_PERF_DOMAIN_MILLIWATTS BIT(0) #define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1) +#define EM_PERF_DOMAIN_ARTIFICIAL BIT(2) #define em_span_cpus(em) (to_cpumask((em)->cpus)) +#define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL) #ifdef CONFIG_ENERGY_MODEL #define EM_MAX_POWER 0xFFFF diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 0153b0ca7b23..6ecee99af842 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -384,6 +384,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, if (milliwatts) dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS; + else if (cb->get_cost) + dev->em_pd->flags |= EM_PERF_DOMAIN_ARTIFICIAL; em_cpufreq_update_efficiencies(dev); -- cgit v1.2.3 From 75a3a99a5a9886af13be44e640cb415ebda80db2 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Mon, 21 Mar 2022 09:57:25 +0000 Subject: PM: EM: Change the order of arguments in the .active_power() callback The .active_power() callback passes the device pointer when it's called. Aligned with a convetion present in other subsystems and pass the 'dev' as a first argument. It looks more cleaner. Adjust all affected drivers which implement that API callback. Suggested-by: Ionela Voinescu Signed-off-by: Lukasz Luba Reviewed-by: Ionela Voinescu Signed-off-by: Rafael J. Wysocki --- Documentation/power/energy-model.rst | 4 ++-- drivers/cpufreq/mediatek-cpufreq-hw.c | 4 ++-- drivers/cpufreq/scmi-cpufreq.c | 4 ++-- drivers/opp/of.c | 6 +++--- include/linux/energy_model.h | 6 +++--- kernel/power/energy_model.c | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst index 49549aab41b4..fd29ed2506c0 100644 --- a/Documentation/power/energy-model.rst +++ b/Documentation/power/energy-model.rst @@ -181,8 +181,8 @@ EM framework:: -> drivers/cpufreq/foo_cpufreq.c - 01 static int est_power(unsigned long *mW, unsigned long *KHz, - 02 struct device *dev) + 01 static int est_power(struct device *dev, unsigned long *mW, + 02 unsigned long *KHz) 03 { 04 long freq, power; 05 diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 0a94c56ddad2..813cccbfe934 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -51,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = { }; static int __maybe_unused -mtk_cpufreq_get_cpu_power(unsigned long *mW, - unsigned long *KHz, struct device *cpu_dev) +mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW, + unsigned long *KHz) { struct mtk_cpufreq_data *data; struct cpufreq_policy *policy; diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 919fa6e3f462..6d2a4cf46db7 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -96,8 +96,8 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) } static int __maybe_unused -scmi_get_cpu_power(unsigned long *power, unsigned long *KHz, - struct device *cpu_dev) +scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power, + unsigned long *KHz) { unsigned long Hz; int ret, domain; diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 440ab5a03df9..485ea980bde7 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1448,7 +1448,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node); * Returns 0 on success or a proper -EINVAL value in case of error. */ static int __maybe_unused -_get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev) +_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz) { struct dev_pm_opp *opp; unsigned long opp_freq, opp_power; @@ -1482,8 +1482,8 @@ _get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev) * Returns -EINVAL if the power calculation failed because of missing * parameters, 0 otherwise. */ -static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz, - struct device *dev) +static int __maybe_unused _get_power(struct device *dev, unsigned long *mW, + unsigned long *kHz) { struct dev_pm_opp *opp; struct device_node *np; diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 92e82a322859..8419bffb4398 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -101,11 +101,11 @@ struct em_data_callback { /** * active_power() - Provide power at the next performance state of * a device + * @dev : Device for which we do this operation (can be a CPU) * @power : Active power at the performance state * (modified) * @freq : Frequency at the performance state in kHz * (modified) - * @dev : Device for which we do this operation (can be a CPU) * * active_power() must find the lowest performance state of 'dev' above * 'freq' and update 'power' and 'freq' to the matching active power @@ -117,8 +117,8 @@ struct em_data_callback { * * Return 0 on success. */ - int (*active_power)(unsigned long *power, unsigned long *freq, - struct device *dev); + int (*active_power)(struct device *dev, unsigned long *power, + unsigned long *freq); /** * get_cost() - Provide the cost at the given performance state of diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 95a3b33001f6..babefc72085d 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -140,7 +140,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, * lowest performance state of 'dev' above 'freq' and updates * 'power' and 'freq' accordingly. */ - ret = cb->active_power(&power, &freq, dev); + ret = cb->active_power(dev, &power, &freq); if (ret) { dev_err(dev, "EM: invalid perf. state: %d\n", ret); -- cgit v1.2.3