diff options
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r-- | drivers/cpuidle/Makefile | 3 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-arm.c | 8 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-big_little.c | 2 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-psci-domain.c | 2 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-psci.c | 89 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-psci.h | 4 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-qcom-spm.c | 2 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-riscv-sbi.c | 4 | ||||
-rw-r--r-- | drivers/cpuidle/governors/menu.c | 131 | ||||
-rw-r--r-- | drivers/cpuidle/governors/teo.c | 4 |
10 files changed, 144 insertions, 105 deletions
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index d103342b7cfc..1de9e92c5b0f 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -3,6 +3,9 @@ # Makefile for cpuidle. # +# Branch profiling isn't noinstr-safe +ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING + obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index caba6f4bb1b7..e044fefdb816 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -137,9 +137,9 @@ out_kfree_drv: /* * arm_idle_init - Initializes arm cpuidle driver * - * Initializes arm cpuidle driver for all CPUs, if any CPU fails - * to register cpuidle driver then rollback to cancel all CPUs - * registration. + * Initializes arm cpuidle driver for all present CPUs, if any + * CPU fails to register cpuidle driver then rollback to cancel + * all CPUs registration. */ static int __init arm_idle_init(void) { @@ -147,7 +147,7 @@ static int __init arm_idle_init(void) struct cpuidle_driver *drv; struct cpuidle_device *dev; - for_each_possible_cpu(cpu) { + for_each_present_cpu(cpu) { ret = arm_idle_init_cpu(cpu); if (ret) goto out_fail; diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c index 74972deda0ea..4abba42fcc31 100644 --- a/drivers/cpuidle/cpuidle-big_little.c +++ b/drivers/cpuidle/cpuidle-big_little.c @@ -148,7 +148,7 @@ static int __init bl_idle_driver_init(struct cpuidle_driver *drv, int part_id) if (!cpumask) return -ENOMEM; - for_each_possible_cpu(cpu) + for_each_present_cpu(cpu) if (smp_cpuid_part(cpu) == part_id) cpumask_set_cpu(cpu, cpumask); diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index 5fb5228f6bf1..2041f59116ce 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -43,7 +43,7 @@ static int psci_pd_power_off(struct generic_pm_domain *pd) /* OSI mode is enabled, set the corresponding domain state. */ pd_state = state->data; - psci_set_domain_state(*pd_state); + psci_set_domain_state(pd, pd->state_idx, *pd_state); return 0; } diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index 2562dc001fc1..4e1ba35deda9 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -16,7 +16,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/of.h> -#include <linux/platform_device.h> +#include <linux/device/faux.h> #include <linux/psci.h> #include <linux/pm_domain.h> #include <linux/pm_runtime.h> @@ -25,6 +25,7 @@ #include <linux/syscore_ops.h> #include <asm/cpuidle.h> +#include <trace/events/power.h> #include "cpuidle-psci.h" #include "dt_idle_states.h" @@ -35,19 +36,30 @@ struct psci_cpuidle_data { struct device *dev; }; +struct psci_cpuidle_domain_state { + struct generic_pm_domain *pd; + unsigned int state_idx; + u32 state; +}; + static DEFINE_PER_CPU_READ_MOSTLY(struct psci_cpuidle_data, psci_cpuidle_data); -static DEFINE_PER_CPU(u32, domain_state); +static DEFINE_PER_CPU(struct psci_cpuidle_domain_state, psci_domain_state); static bool psci_cpuidle_use_syscore; static bool psci_cpuidle_use_cpuhp; -void psci_set_domain_state(u32 state) +void psci_set_domain_state(struct generic_pm_domain *pd, unsigned int state_idx, + u32 state) { - __this_cpu_write(domain_state, state); + struct psci_cpuidle_domain_state *ds = this_cpu_ptr(&psci_domain_state); + + ds->pd = pd; + ds->state_idx = state_idx; + ds->state = state; } -static inline u32 psci_get_domain_state(void) +static inline void psci_clear_domain_state(void) { - return __this_cpu_read(domain_state); + __this_cpu_write(psci_domain_state.state, 0); } static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev, @@ -57,7 +69,8 @@ static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev, struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data); u32 *states = data->psci_states; struct device *pd_dev = data->dev; - u32 state; + struct psci_cpuidle_domain_state *ds; + u32 state = states[idx]; int ret; ret = cpu_pm_enter(); @@ -70,11 +83,13 @@ static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev, else pm_runtime_put_sync_suspend(pd_dev); - state = psci_get_domain_state(); - if (!state) - state = states[idx]; + ds = this_cpu_ptr(&psci_domain_state); + if (ds->state) + state = ds->state; + trace_psci_domain_idle_enter(dev->cpu, state, s2idle); ret = psci_cpu_suspend_enter(state) ? -1 : idx; + trace_psci_domain_idle_exit(dev->cpu, state, s2idle); if (s2idle) dev_pm_genpd_resume(pd_dev); @@ -83,8 +98,12 @@ static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev, cpu_pm_exit(); + /* Correct domain-idlestate statistics if we failed to enter. */ + if (ret == -1 && ds->state) + pm_genpd_inc_rejected(ds->pd, ds->state_idx); + /* Clear the domain state to start fresh when back from idle. */ - psci_set_domain_state(0); + psci_clear_domain_state(); return ret; } @@ -118,7 +137,7 @@ static int psci_idle_cpuhp_down(unsigned int cpu) if (pd_dev) { pm_runtime_put_sync(pd_dev); /* Clear domain state to start fresh at next online. */ - psci_set_domain_state(0); + psci_clear_domain_state(); } return 0; @@ -144,7 +163,7 @@ static void psci_idle_syscore_switch(bool suspend) /* Clear domain state to re-start fresh. */ if (!cleared) { - psci_set_domain_state(0); + psci_clear_domain_state(); cleared = true; } } @@ -400,18 +419,18 @@ deinit: /* * psci_idle_probe - Initializes PSCI cpuidle driver * - * Initializes PSCI cpuidle driver for all CPUs, if any CPU fails + * Initializes PSCI cpuidle driver for all present CPUs, if any CPU fails * to register cpuidle driver then rollback to cancel all CPUs * registration. */ -static int psci_cpuidle_probe(struct platform_device *pdev) +static int psci_cpuidle_probe(struct faux_device *fdev) { int cpu, ret; struct cpuidle_driver *drv; struct cpuidle_device *dev; - for_each_possible_cpu(cpu) { - ret = psci_idle_init_cpu(&pdev->dev, cpu); + for_each_present_cpu(cpu) { + ret = psci_idle_init_cpu(&fdev->dev, cpu); if (ret) goto out_fail; } @@ -431,26 +450,36 @@ out_fail: return ret; } -static struct platform_driver psci_cpuidle_driver = { +static struct faux_device_ops psci_cpuidle_ops = { .probe = psci_cpuidle_probe, - .driver = { - .name = "psci-cpuidle", - }, }; +static bool __init dt_idle_state_present(void) +{ + struct device_node *cpu_node __free(device_node) = + of_cpu_device_node_get(cpumask_first(cpu_possible_mask)); + if (!cpu_node) + return false; + + struct device_node *state_node __free(device_node) = + of_get_cpu_state_node(cpu_node, 0); + if (!state_node) + return false; + + return !!of_match_node(psci_idle_state_match, state_node); +} + static int __init psci_idle_init(void) { - struct platform_device *pdev; - int ret; + struct faux_device *fdev; - ret = platform_driver_register(&psci_cpuidle_driver); - if (ret) - return ret; + if (!dt_idle_state_present()) + return 0; - pdev = platform_device_register_simple("psci-cpuidle", -1, NULL, 0); - if (IS_ERR(pdev)) { - platform_driver_unregister(&psci_cpuidle_driver); - return PTR_ERR(pdev); + fdev = faux_device_create("psci-cpuidle", NULL, &psci_cpuidle_ops); + if (!fdev) { + pr_err("Failed to create psci-cpuidle device\n"); + return -ENODEV; } return 0; diff --git a/drivers/cpuidle/cpuidle-psci.h b/drivers/cpuidle/cpuidle-psci.h index ef004ec7a7c5..d29cbd796cd5 100644 --- a/drivers/cpuidle/cpuidle-psci.h +++ b/drivers/cpuidle/cpuidle-psci.h @@ -4,8 +4,10 @@ #define __CPUIDLE_PSCI_H struct device_node; +struct generic_pm_domain; -void psci_set_domain_state(u32 state); +void psci_set_domain_state(struct generic_pm_domain *pd, unsigned int state_idx, + u32 state); int psci_dt_parse_state_node(struct device_node *np, u32 *state); #endif /* __CPUIDLE_PSCI_H */ diff --git a/drivers/cpuidle/cpuidle-qcom-spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c index 3ab240e0e122..5f386761b156 100644 --- a/drivers/cpuidle/cpuidle-qcom-spm.c +++ b/drivers/cpuidle/cpuidle-qcom-spm.c @@ -135,7 +135,7 @@ static int spm_cpuidle_drv_probe(struct platform_device *pdev) if (ret) return dev_err_probe(&pdev->dev, ret, "set warm boot addr failed"); - for_each_possible_cpu(cpu) { + for_each_present_cpu(cpu) { ret = spm_cpuidle_register(&pdev->dev, cpu); if (ret && ret != -ENODEV) { dev_err(&pdev->dev, diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index 0c92a628bbd4..0fe1ece9fbdc 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -529,8 +529,8 @@ static int sbi_cpuidle_probe(struct platform_device *pdev) return ret; } - /* Initialize CPU idle driver for each CPU */ - for_each_possible_cpu(cpu) { + /* Initialize CPU idle driver for each present CPU */ + for_each_present_cpu(cpu) { ret = sbi_cpuidle_init_cpu(&pdev->dev, cpu); if (ret) { pr_debug("HART%ld: idle driver init failed\n", diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 28363bfa3e4c..52d5d26fc7c6 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -41,7 +41,7 @@ * the C state is required to actually break even on this cost. CPUIDLE * provides us this duration in the "target_residency" field. So all that we * need is a good prediction of how long we'll be idle. Like the traditional - * menu governor, we start with the actual known "next timer event" time. + * menu governor, we take the actual known "next timer event" time. * * Since there are other source of wakeups (interrupts for example) than * the next timer event, this estimation is rather optimistic. To get a @@ -50,30 +50,21 @@ * duration always was 50% of the next timer tick, the correction factor will * be 0.5. * - * menu uses a running average for this correction factor, however it uses a - * set of factors, not just a single factor. This stems from the realization - * that the ratio is dependent on the order of magnitude of the expected - * duration; if we expect 500 milliseconds of idle time the likelihood of - * getting an interrupt very early is much higher than if we expect 50 micro - * seconds of idle time. A second independent factor that has big impact on - * the actual factor is if there is (disk) IO outstanding or not. - * (as a special twist, we consider every sleep longer than 50 milliseconds - * as perfect; there are no power gains for sleeping longer than this) - * - * For these two reasons we keep an array of 12 independent factors, that gets - * indexed based on the magnitude of the expected duration as well as the - * "is IO outstanding" property. + * menu uses a running average for this correction factor, but it uses a set of + * factors, not just a single factor. This stems from the realization that the + * ratio is dependent on the order of magnitude of the expected duration; if we + * expect 500 milliseconds of idle time the likelihood of getting an interrupt + * very early is much higher than if we expect 50 micro seconds of idle time. + * For this reason, menu keeps an array of 6 independent factors, that gets + * indexed based on the magnitude of the expected duration. * * Repeatable-interval-detector * ---------------------------- * There are some cases where "next timer" is a completely unusable predictor: * Those cases where the interval is fixed, for example due to hardware - * interrupt mitigation, but also due to fixed transfer rate devices such as - * mice. + * interrupt mitigation, but also due to fixed transfer rate devices like mice. * For this, we use a different predictor: We track the duration of the last 8 - * intervals and if the stand deviation of these 8 intervals is below a - * threshold value, we use the average of these intervals as prediction. - * + * intervals and use them to estimate the duration of the next one. */ struct menu_device { @@ -116,53 +107,52 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); */ static unsigned int get_typical_interval(struct menu_device *data) { - int i, divisor; - unsigned int min, max, thresh, avg; - uint64_t sum, variance; - - thresh = INT_MAX; /* Discard outliers above this value */ + s64 value, min_thresh = -1, max_thresh = UINT_MAX; + unsigned int max, min, divisor; + u64 avg, variance, avg_sq; + int i; again: - - /* First calculate the average of past intervals */ - min = UINT_MAX; + /* Compute the average and variance of past intervals. */ max = 0; - sum = 0; + min = UINT_MAX; + avg = 0; + variance = 0; divisor = 0; for (i = 0; i < INTERVALS; i++) { - unsigned int value = data->intervals[i]; - if (value <= thresh) { - sum += value; - divisor++; - if (value > max) - max = value; - - if (value < min) - min = value; - } + value = data->intervals[i]; + /* + * Discard the samples outside the interval between the min and + * max thresholds. + */ + if (value <= min_thresh || value >= max_thresh) + continue; + + divisor++; + + avg += value; + variance += value * value; + + if (value > max) + max = value; + + if (value < min) + min = value; } if (!max) return UINT_MAX; - if (divisor == INTERVALS) - avg = sum >> INTERVAL_SHIFT; - else - avg = div_u64(sum, divisor); - - /* Then try to determine variance */ - variance = 0; - for (i = 0; i < INTERVALS; i++) { - unsigned int value = data->intervals[i]; - if (value <= thresh) { - int64_t diff = (int64_t)value - avg; - variance += diff * diff; - } - } - if (divisor == INTERVALS) + if (divisor == INTERVALS) { + avg >>= INTERVAL_SHIFT; variance >>= INTERVAL_SHIFT; - else + } else { + do_div(avg, divisor); do_div(variance, divisor); + } + + avg_sq = avg * avg; + variance -= avg_sq; /* * The typical interval is obtained when standard deviation is @@ -177,25 +167,40 @@ again: * Use this result only if there is no timer to wake us up sooner. */ if (likely(variance <= U64_MAX/36)) { - if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3)) - || variance <= 400) { + if ((avg_sq > variance * 36 && divisor * 4 >= INTERVALS * 3) || + variance <= 400) return avg; - } } /* - * If we have outliers to the upside in our distribution, discard - * those by setting the threshold to exclude these outliers, then + * If there are outliers, discard them by setting thresholds to exclude + * data points at a large enough distance from the average, then * calculate the average and standard deviation again. Once we get - * down to the bottom 3/4 of our samples, stop excluding samples. + * down to the last 3/4 of our samples, stop excluding samples. * * This can deal with workloads that have long pauses interspersed * with sporadic activity with a bunch of short pauses. */ - if ((divisor * 4) <= INTERVALS * 3) + if (divisor * 4 <= INTERVALS * 3) { + /* + * If there are sufficiently many data points still under + * consideration after the outliers have been eliminated, + * returning without a prediction would be a mistake because it + * is likely that the next interval will not exceed the current + * maximum, so return the latter in that case. + */ + if (divisor >= INTERVALS / 2) + return max; + return UINT_MAX; + } + + /* Update the thresholds for the next round. */ + if (avg - min > max - avg) + min_thresh = min; + else + max_thresh = max; - thresh = max - 1; goto again; } @@ -250,7 +255,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ data->next_timer_ns = KTIME_MAX; delta_tick = TICK_NSEC / 2; - data->bucket = which_bucket(KTIME_MAX); + data->bucket = BUCKETS - 1; } if (unlikely(drv->state_count <= 1 || latency_req == 0) || diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 8fe5e1b47ef9..bfa55c1eab5b 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -19,7 +19,7 @@ * * Of course, non-timer wakeup sources are more important in some use cases, * but even then it is generally unnecessary to consider idle duration values - * greater than the time time till the next timer event, referred as the sleep + * greater than the time till the next timer event, referred as the sleep * length in what follows, because the closest timer will ultimately wake up the * CPU anyway unless it is woken up earlier. * @@ -311,7 +311,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, struct cpuidle_state *s = &drv->states[i]; /* - * Update the sums of idle state mertics for all of the states + * Update the sums of idle state metrics for all of the states * shallower than the current one. */ intercept_sum += prev_bin->intercepts; |