summaryrefslogtreecommitdiff
path: root/drivers/cpuidle
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r--drivers/cpuidle/Makefile3
-rw-r--r--drivers/cpuidle/cpuidle-arm.c8
-rw-r--r--drivers/cpuidle/cpuidle-big_little.c2
-rw-r--r--drivers/cpuidle/cpuidle-psci-domain.c2
-rw-r--r--drivers/cpuidle/cpuidle-psci.c89
-rw-r--r--drivers/cpuidle/cpuidle-psci.h4
-rw-r--r--drivers/cpuidle/cpuidle-qcom-spm.c2
-rw-r--r--drivers/cpuidle/cpuidle-riscv-sbi.c4
-rw-r--r--drivers/cpuidle/governors/menu.c131
-rw-r--r--drivers/cpuidle/governors/teo.c4
10 files changed, 144 insertions, 105 deletions
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index d103342b7cfc..1de9e92c5b0f 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -3,6 +3,9 @@
# Makefile for cpuidle.
#
+# Branch profiling isn't noinstr-safe
+ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING
+
obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index caba6f4bb1b7..e044fefdb816 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -137,9 +137,9 @@ out_kfree_drv:
/*
* arm_idle_init - Initializes arm cpuidle driver
*
- * Initializes arm cpuidle driver for all CPUs, if any CPU fails
- * to register cpuidle driver then rollback to cancel all CPUs
- * registration.
+ * Initializes arm cpuidle driver for all present CPUs, if any
+ * CPU fails to register cpuidle driver then rollback to cancel
+ * all CPUs registration.
*/
static int __init arm_idle_init(void)
{
@@ -147,7 +147,7 @@ static int __init arm_idle_init(void)
struct cpuidle_driver *drv;
struct cpuidle_device *dev;
- for_each_possible_cpu(cpu) {
+ for_each_present_cpu(cpu) {
ret = arm_idle_init_cpu(cpu);
if (ret)
goto out_fail;
diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c
index 74972deda0ea..4abba42fcc31 100644
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -148,7 +148,7 @@ static int __init bl_idle_driver_init(struct cpuidle_driver *drv, int part_id)
if (!cpumask)
return -ENOMEM;
- for_each_possible_cpu(cpu)
+ for_each_present_cpu(cpu)
if (smp_cpuid_part(cpu) == part_id)
cpumask_set_cpu(cpu, cpumask);
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index 5fb5228f6bf1..2041f59116ce 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -43,7 +43,7 @@ static int psci_pd_power_off(struct generic_pm_domain *pd)
/* OSI mode is enabled, set the corresponding domain state. */
pd_state = state->data;
- psci_set_domain_state(*pd_state);
+ psci_set_domain_state(pd, pd->state_idx, *pd_state);
return 0;
}
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index 2562dc001fc1..4e1ba35deda9 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -16,7 +16,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/platform_device.h>
+#include <linux/device/faux.h>
#include <linux/psci.h>
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
@@ -25,6 +25,7 @@
#include <linux/syscore_ops.h>
#include <asm/cpuidle.h>
+#include <trace/events/power.h>
#include "cpuidle-psci.h"
#include "dt_idle_states.h"
@@ -35,19 +36,30 @@ struct psci_cpuidle_data {
struct device *dev;
};
+struct psci_cpuidle_domain_state {
+ struct generic_pm_domain *pd;
+ unsigned int state_idx;
+ u32 state;
+};
+
static DEFINE_PER_CPU_READ_MOSTLY(struct psci_cpuidle_data, psci_cpuidle_data);
-static DEFINE_PER_CPU(u32, domain_state);
+static DEFINE_PER_CPU(struct psci_cpuidle_domain_state, psci_domain_state);
static bool psci_cpuidle_use_syscore;
static bool psci_cpuidle_use_cpuhp;
-void psci_set_domain_state(u32 state)
+void psci_set_domain_state(struct generic_pm_domain *pd, unsigned int state_idx,
+ u32 state)
{
- __this_cpu_write(domain_state, state);
+ struct psci_cpuidle_domain_state *ds = this_cpu_ptr(&psci_domain_state);
+
+ ds->pd = pd;
+ ds->state_idx = state_idx;
+ ds->state = state;
}
-static inline u32 psci_get_domain_state(void)
+static inline void psci_clear_domain_state(void)
{
- return __this_cpu_read(domain_state);
+ __this_cpu_write(psci_domain_state.state, 0);
}
static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
@@ -57,7 +69,8 @@ static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data);
u32 *states = data->psci_states;
struct device *pd_dev = data->dev;
- u32 state;
+ struct psci_cpuidle_domain_state *ds;
+ u32 state = states[idx];
int ret;
ret = cpu_pm_enter();
@@ -70,11 +83,13 @@ static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
else
pm_runtime_put_sync_suspend(pd_dev);
- state = psci_get_domain_state();
- if (!state)
- state = states[idx];
+ ds = this_cpu_ptr(&psci_domain_state);
+ if (ds->state)
+ state = ds->state;
+ trace_psci_domain_idle_enter(dev->cpu, state, s2idle);
ret = psci_cpu_suspend_enter(state) ? -1 : idx;
+ trace_psci_domain_idle_exit(dev->cpu, state, s2idle);
if (s2idle)
dev_pm_genpd_resume(pd_dev);
@@ -83,8 +98,12 @@ static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
cpu_pm_exit();
+ /* Correct domain-idlestate statistics if we failed to enter. */
+ if (ret == -1 && ds->state)
+ pm_genpd_inc_rejected(ds->pd, ds->state_idx);
+
/* Clear the domain state to start fresh when back from idle. */
- psci_set_domain_state(0);
+ psci_clear_domain_state();
return ret;
}
@@ -118,7 +137,7 @@ static int psci_idle_cpuhp_down(unsigned int cpu)
if (pd_dev) {
pm_runtime_put_sync(pd_dev);
/* Clear domain state to start fresh at next online. */
- psci_set_domain_state(0);
+ psci_clear_domain_state();
}
return 0;
@@ -144,7 +163,7 @@ static void psci_idle_syscore_switch(bool suspend)
/* Clear domain state to re-start fresh. */
if (!cleared) {
- psci_set_domain_state(0);
+ psci_clear_domain_state();
cleared = true;
}
}
@@ -400,18 +419,18 @@ deinit:
/*
* psci_idle_probe - Initializes PSCI cpuidle driver
*
- * Initializes PSCI cpuidle driver for all CPUs, if any CPU fails
+ * Initializes PSCI cpuidle driver for all present CPUs, if any CPU fails
* to register cpuidle driver then rollback to cancel all CPUs
* registration.
*/
-static int psci_cpuidle_probe(struct platform_device *pdev)
+static int psci_cpuidle_probe(struct faux_device *fdev)
{
int cpu, ret;
struct cpuidle_driver *drv;
struct cpuidle_device *dev;
- for_each_possible_cpu(cpu) {
- ret = psci_idle_init_cpu(&pdev->dev, cpu);
+ for_each_present_cpu(cpu) {
+ ret = psci_idle_init_cpu(&fdev->dev, cpu);
if (ret)
goto out_fail;
}
@@ -431,26 +450,36 @@ out_fail:
return ret;
}
-static struct platform_driver psci_cpuidle_driver = {
+static struct faux_device_ops psci_cpuidle_ops = {
.probe = psci_cpuidle_probe,
- .driver = {
- .name = "psci-cpuidle",
- },
};
+static bool __init dt_idle_state_present(void)
+{
+ struct device_node *cpu_node __free(device_node) =
+ of_cpu_device_node_get(cpumask_first(cpu_possible_mask));
+ if (!cpu_node)
+ return false;
+
+ struct device_node *state_node __free(device_node) =
+ of_get_cpu_state_node(cpu_node, 0);
+ if (!state_node)
+ return false;
+
+ return !!of_match_node(psci_idle_state_match, state_node);
+}
+
static int __init psci_idle_init(void)
{
- struct platform_device *pdev;
- int ret;
+ struct faux_device *fdev;
- ret = platform_driver_register(&psci_cpuidle_driver);
- if (ret)
- return ret;
+ if (!dt_idle_state_present())
+ return 0;
- pdev = platform_device_register_simple("psci-cpuidle", -1, NULL, 0);
- if (IS_ERR(pdev)) {
- platform_driver_unregister(&psci_cpuidle_driver);
- return PTR_ERR(pdev);
+ fdev = faux_device_create("psci-cpuidle", NULL, &psci_cpuidle_ops);
+ if (!fdev) {
+ pr_err("Failed to create psci-cpuidle device\n");
+ return -ENODEV;
}
return 0;
diff --git a/drivers/cpuidle/cpuidle-psci.h b/drivers/cpuidle/cpuidle-psci.h
index ef004ec7a7c5..d29cbd796cd5 100644
--- a/drivers/cpuidle/cpuidle-psci.h
+++ b/drivers/cpuidle/cpuidle-psci.h
@@ -4,8 +4,10 @@
#define __CPUIDLE_PSCI_H
struct device_node;
+struct generic_pm_domain;
-void psci_set_domain_state(u32 state);
+void psci_set_domain_state(struct generic_pm_domain *pd, unsigned int state_idx,
+ u32 state);
int psci_dt_parse_state_node(struct device_node *np, u32 *state);
#endif /* __CPUIDLE_PSCI_H */
diff --git a/drivers/cpuidle/cpuidle-qcom-spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c
index 3ab240e0e122..5f386761b156 100644
--- a/drivers/cpuidle/cpuidle-qcom-spm.c
+++ b/drivers/cpuidle/cpuidle-qcom-spm.c
@@ -135,7 +135,7 @@ static int spm_cpuidle_drv_probe(struct platform_device *pdev)
if (ret)
return dev_err_probe(&pdev->dev, ret, "set warm boot addr failed");
- for_each_possible_cpu(cpu) {
+ for_each_present_cpu(cpu) {
ret = spm_cpuidle_register(&pdev->dev, cpu);
if (ret && ret != -ENODEV) {
dev_err(&pdev->dev,
diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c
index 0c92a628bbd4..0fe1ece9fbdc 100644
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -529,8 +529,8 @@ static int sbi_cpuidle_probe(struct platform_device *pdev)
return ret;
}
- /* Initialize CPU idle driver for each CPU */
- for_each_possible_cpu(cpu) {
+ /* Initialize CPU idle driver for each present CPU */
+ for_each_present_cpu(cpu) {
ret = sbi_cpuidle_init_cpu(&pdev->dev, cpu);
if (ret) {
pr_debug("HART%ld: idle driver init failed\n",
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 28363bfa3e4c..52d5d26fc7c6 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -41,7 +41,7 @@
* the C state is required to actually break even on this cost. CPUIDLE
* provides us this duration in the "target_residency" field. So all that we
* need is a good prediction of how long we'll be idle. Like the traditional
- * menu governor, we start with the actual known "next timer event" time.
+ * menu governor, we take the actual known "next timer event" time.
*
* Since there are other source of wakeups (interrupts for example) than
* the next timer event, this estimation is rather optimistic. To get a
@@ -50,30 +50,21 @@
* duration always was 50% of the next timer tick, the correction factor will
* be 0.5.
*
- * menu uses a running average for this correction factor, however it uses a
- * set of factors, not just a single factor. This stems from the realization
- * that the ratio is dependent on the order of magnitude of the expected
- * duration; if we expect 500 milliseconds of idle time the likelihood of
- * getting an interrupt very early is much higher than if we expect 50 micro
- * seconds of idle time. A second independent factor that has big impact on
- * the actual factor is if there is (disk) IO outstanding or not.
- * (as a special twist, we consider every sleep longer than 50 milliseconds
- * as perfect; there are no power gains for sleeping longer than this)
- *
- * For these two reasons we keep an array of 12 independent factors, that gets
- * indexed based on the magnitude of the expected duration as well as the
- * "is IO outstanding" property.
+ * menu uses a running average for this correction factor, but it uses a set of
+ * factors, not just a single factor. This stems from the realization that the
+ * ratio is dependent on the order of magnitude of the expected duration; if we
+ * expect 500 milliseconds of idle time the likelihood of getting an interrupt
+ * very early is much higher than if we expect 50 micro seconds of idle time.
+ * For this reason, menu keeps an array of 6 independent factors, that gets
+ * indexed based on the magnitude of the expected duration.
*
* Repeatable-interval-detector
* ----------------------------
* There are some cases where "next timer" is a completely unusable predictor:
* Those cases where the interval is fixed, for example due to hardware
- * interrupt mitigation, but also due to fixed transfer rate devices such as
- * mice.
+ * interrupt mitigation, but also due to fixed transfer rate devices like mice.
* For this, we use a different predictor: We track the duration of the last 8
- * intervals and if the stand deviation of these 8 intervals is below a
- * threshold value, we use the average of these intervals as prediction.
- *
+ * intervals and use them to estimate the duration of the next one.
*/
struct menu_device {
@@ -116,53 +107,52 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
*/
static unsigned int get_typical_interval(struct menu_device *data)
{
- int i, divisor;
- unsigned int min, max, thresh, avg;
- uint64_t sum, variance;
-
- thresh = INT_MAX; /* Discard outliers above this value */
+ s64 value, min_thresh = -1, max_thresh = UINT_MAX;
+ unsigned int max, min, divisor;
+ u64 avg, variance, avg_sq;
+ int i;
again:
-
- /* First calculate the average of past intervals */
- min = UINT_MAX;
+ /* Compute the average and variance of past intervals. */
max = 0;
- sum = 0;
+ min = UINT_MAX;
+ avg = 0;
+ variance = 0;
divisor = 0;
for (i = 0; i < INTERVALS; i++) {
- unsigned int value = data->intervals[i];
- if (value <= thresh) {
- sum += value;
- divisor++;
- if (value > max)
- max = value;
-
- if (value < min)
- min = value;
- }
+ value = data->intervals[i];
+ /*
+ * Discard the samples outside the interval between the min and
+ * max thresholds.
+ */
+ if (value <= min_thresh || value >= max_thresh)
+ continue;
+
+ divisor++;
+
+ avg += value;
+ variance += value * value;
+
+ if (value > max)
+ max = value;
+
+ if (value < min)
+ min = value;
}
if (!max)
return UINT_MAX;
- if (divisor == INTERVALS)
- avg = sum >> INTERVAL_SHIFT;
- else
- avg = div_u64(sum, divisor);
-
- /* Then try to determine variance */
- variance = 0;
- for (i = 0; i < INTERVALS; i++) {
- unsigned int value = data->intervals[i];
- if (value <= thresh) {
- int64_t diff = (int64_t)value - avg;
- variance += diff * diff;
- }
- }
- if (divisor == INTERVALS)
+ if (divisor == INTERVALS) {
+ avg >>= INTERVAL_SHIFT;
variance >>= INTERVAL_SHIFT;
- else
+ } else {
+ do_div(avg, divisor);
do_div(variance, divisor);
+ }
+
+ avg_sq = avg * avg;
+ variance -= avg_sq;
/*
* The typical interval is obtained when standard deviation is
@@ -177,25 +167,40 @@ again:
* Use this result only if there is no timer to wake us up sooner.
*/
if (likely(variance <= U64_MAX/36)) {
- if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3))
- || variance <= 400) {
+ if ((avg_sq > variance * 36 && divisor * 4 >= INTERVALS * 3) ||
+ variance <= 400)
return avg;
- }
}
/*
- * If we have outliers to the upside in our distribution, discard
- * those by setting the threshold to exclude these outliers, then
+ * If there are outliers, discard them by setting thresholds to exclude
+ * data points at a large enough distance from the average, then
* calculate the average and standard deviation again. Once we get
- * down to the bottom 3/4 of our samples, stop excluding samples.
+ * down to the last 3/4 of our samples, stop excluding samples.
*
* This can deal with workloads that have long pauses interspersed
* with sporadic activity with a bunch of short pauses.
*/
- if ((divisor * 4) <= INTERVALS * 3)
+ if (divisor * 4 <= INTERVALS * 3) {
+ /*
+ * If there are sufficiently many data points still under
+ * consideration after the outliers have been eliminated,
+ * returning without a prediction would be a mistake because it
+ * is likely that the next interval will not exceed the current
+ * maximum, so return the latter in that case.
+ */
+ if (divisor >= INTERVALS / 2)
+ return max;
+
return UINT_MAX;
+ }
+
+ /* Update the thresholds for the next round. */
+ if (avg - min > max - avg)
+ min_thresh = min;
+ else
+ max_thresh = max;
- thresh = max - 1;
goto again;
}
@@ -250,7 +255,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
*/
data->next_timer_ns = KTIME_MAX;
delta_tick = TICK_NSEC / 2;
- data->bucket = which_bucket(KTIME_MAX);
+ data->bucket = BUCKETS - 1;
}
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 8fe5e1b47ef9..bfa55c1eab5b 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -19,7 +19,7 @@
*
* Of course, non-timer wakeup sources are more important in some use cases,
* but even then it is generally unnecessary to consider idle duration values
- * greater than the time time till the next timer event, referred as the sleep
+ * greater than the time till the next timer event, referred as the sleep
* length in what follows, because the closest timer will ultimately wake up the
* CPU anyway unless it is woken up earlier.
*
@@ -311,7 +311,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
struct cpuidle_state *s = &drv->states[i];
/*
- * Update the sums of idle state mertics for all of the states
+ * Update the sums of idle state metrics for all of the states
* shallower than the current one.
*/
intercept_sum += prev_bin->intercepts;