diff options
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r-- | drivers/cpuidle/cpuidle-psci.c | 23 | ||||
-rw-r--r-- | drivers/cpuidle/governors/menu.c | 34 | ||||
-rw-r--r-- | drivers/cpuidle/governors/teo.c | 91 |
3 files changed, 89 insertions, 59 deletions
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index 2562dc001fc1..f3c30037dc8e 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -38,7 +38,6 @@ struct psci_cpuidle_data { static DEFINE_PER_CPU_READ_MOSTLY(struct psci_cpuidle_data, psci_cpuidle_data); static DEFINE_PER_CPU(u32, domain_state); static bool psci_cpuidle_use_syscore; -static bool psci_cpuidle_use_cpuhp; void psci_set_domain_state(u32 state) { @@ -105,8 +104,12 @@ static int psci_idle_cpuhp_up(unsigned int cpu) { struct device *pd_dev = __this_cpu_read(psci_cpuidle_data.dev); - if (pd_dev) - pm_runtime_get_sync(pd_dev); + if (pd_dev) { + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + pm_runtime_get_sync(pd_dev); + else + dev_pm_genpd_resume(pd_dev); + } return 0; } @@ -116,7 +119,11 @@ static int psci_idle_cpuhp_down(unsigned int cpu) struct device *pd_dev = __this_cpu_read(psci_cpuidle_data.dev); if (pd_dev) { - pm_runtime_put_sync(pd_dev); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + pm_runtime_put_sync(pd_dev); + else + dev_pm_genpd_suspend(pd_dev); + /* Clear domain state to start fresh at next online. */ psci_set_domain_state(0); } @@ -177,9 +184,6 @@ static void psci_idle_init_cpuhp(void) { int err; - if (!psci_cpuidle_use_cpuhp) - return; - err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING, "cpuidle/psci:online", psci_idle_cpuhp_up, @@ -240,10 +244,8 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv, * s2ram and s2idle. */ drv->states[state_count - 1].enter_s2idle = psci_enter_s2idle_domain_idle_state; - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) drv->states[state_count - 1].enter = psci_enter_domain_idle_state; - psci_cpuidle_use_cpuhp = true; - } return 0; } @@ -320,7 +322,6 @@ static void psci_cpu_deinit_idle(int cpu) dt_idle_detach_cpu(data->dev); psci_cpuidle_use_syscore = false; - psci_cpuidle_use_cpuhp = false; } static int psci_idle_init_cpu(struct device *dev, int cpu) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index f3c9d49f0f2a..01322a905414 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -153,6 +153,14 @@ static inline int performance_multiplier(unsigned int nr_iowaiters) static DEFINE_PER_CPU(struct menu_device, menu_devices); +static void menu_update_intervals(struct menu_device *data, unsigned int interval_us) +{ + /* Update the repeating-pattern data. */ + data->intervals[data->interval_ptr++] = interval_us; + if (data->interval_ptr >= INTERVALS) + data->interval_ptr = 0; +} + static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); /* @@ -239,8 +247,19 @@ again: * This can deal with workloads that have long pauses interspersed * with sporadic activity with a bunch of short pauses. */ - if ((divisor * 4) <= INTERVALS * 3) + if (divisor * 4 <= INTERVALS * 3) { + /* + * If there are sufficiently many data points still under + * consideration after the outliers have been eliminated, + * returning without a prediction would be a mistake because it + * is likely that the next interval will not exceed the current + * maximum, so return the latter in that case. + */ + if (divisor >= INTERVALS / 2) + return max; + return UINT_MAX; + } thresh = max - 1; goto again; @@ -266,6 +285,14 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (data->needs_update) { menu_update(drv, dev); data->needs_update = 0; + } else if (!dev->last_residency_ns) { + /* + * This happens when the driver rejects the previously selected + * idle state and returns an error, so update the recent + * intervals table to prevent invalid information from being + * used going forward. + */ + menu_update_intervals(data, UINT_MAX); } nr_iowaiters = nr_iowait_cpu(dev->cpu); @@ -535,10 +562,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->correction_factor[data->bucket] = new_factor; - /* update the repeating-pattern data */ - data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns); - if (data->interval_ptr >= INTERVALS) - data->interval_ptr = 0; + menu_update_intervals(data, ktime_to_us(measured_ns)); } /** diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index f2992f92d8db..173ddcac540a 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -10,25 +10,27 @@ * DOC: teo-description * * The idea of this governor is based on the observation that on many systems - * timer events are two or more orders of magnitude more frequent than any - * other interrupts, so they are likely to be the most significant cause of CPU - * wakeups from idle states. Moreover, information about what happened in the - * (relatively recent) past can be used to estimate whether or not the deepest - * idle state with target residency within the (known) time till the closest - * timer event, referred to as the sleep length, is likely to be suitable for - * the upcoming CPU idle period and, if not, then which of the shallower idle - * states to choose instead of it. + * timer interrupts are two or more orders of magnitude more frequent than any + * other interrupt types, so they are likely to dominate CPU wakeup patterns. + * Moreover, in principle, the time when the next timer event is going to occur + * can be determined at the idle state selection time, although doing that may + * be costly, so it can be regarded as the most reliable source of information + * for idle state selection. * - * Of course, non-timer wakeup sources are more important in some use cases - * which can be covered by taking a few most recent idle time intervals of the - * CPU into account. However, even in that context it is not necessary to - * consider idle duration values greater than the sleep length, because the - * closest timer will ultimately wake up the CPU anyway unless it is woken up - * earlier. + * Of course, non-timer wakeup sources are more important in some use cases, + * but even then it is generally unnecessary to consider idle duration values + * greater than the time time till the next timer event, referred as the sleep + * length in what follows, because the closest timer will ultimately wake up the + * CPU anyway unless it is woken up earlier. * - * Thus this governor estimates whether or not the prospective idle duration of - * a CPU is likely to be significantly shorter than the sleep length and selects - * an idle state for it accordingly. + * However, since obtaining the sleep length may be costly, the governor first + * checks if it can select a shallow idle state using wakeup pattern information + * from recent times, in which case it can do without knowing the sleep length + * at all. For this purpose, it counts CPU wakeup events and looks for an idle + * state whose target residency has not exceeded the idle duration (measured + * after wakeup) in the majority of relevant recent cases. If the target + * residency of that state is small enough, it may be used right away and the + * sleep length need not be determined. * * The computations carried out by this governor are based on using bins whose * boundaries are aligned with the target residency parameter values of the CPU @@ -39,7 +41,11 @@ * idle state 2, the third bin spans from the target residency of idle state 2 * up to, but not including, the target residency of idle state 3 and so on. * The last bin spans from the target residency of the deepest idle state - * supplied by the driver to infinity. + * supplied by the driver to the scheduler tick period length or to infinity if + * the tick period length is less than the target residency of that state. In + * the latter case, the governor also counts events with the measured idle + * duration between the tick period length and the target residency of the + * deepest idle state. * * Two metrics called "hits" and "intercepts" are associated with each bin. * They are updated every time before selecting an idle state for the given CPU @@ -49,47 +55,46 @@ * sleep length and the idle duration measured after CPU wakeup fall into the * same bin (that is, the CPU appears to wake up "on time" relative to the sleep * length). In turn, the "intercepts" metric reflects the relative frequency of - * situations in which the measured idle duration is so much shorter than the - * sleep length that the bin it falls into corresponds to an idle state - * shallower than the one whose bin is fallen into by the sleep length (these - * situations are referred to as "intercepts" below). + * non-timer wakeup events for which the measured idle duration falls into a bin + * that corresponds to an idle state shallower than the one whose bin is fallen + * into by the sleep length (these events are also referred to as "intercepts" + * below). * * In order to select an idle state for a CPU, the governor takes the following * steps (modulo the possible latency constraint that must be taken into account * too): * - * 1. Find the deepest CPU idle state whose target residency does not exceed - * the current sleep length (the candidate idle state) and compute 2 sums as - * follows: + * 1. Find the deepest enabled CPU idle state (the candidate idle state) and + * compute 2 sums as follows: * - * - The sum of the "hits" and "intercepts" metrics for the candidate state - * and all of the deeper idle states (it represents the cases in which the - * CPU was idle long enough to avoid being intercepted if the sleep length - * had been equal to the current one). + * - The sum of the "hits" metric for all of the idle states shallower than + * the candidate one (it represents the cases in which the CPU was likely + * woken up by a timer). * - * - The sum of the "intercepts" metrics for all of the idle states shallower - * than the candidate one (it represents the cases in which the CPU was not - * idle long enough to avoid being intercepted if the sleep length had been - * equal to the current one). + * - The sum of the "intercepts" metric for all of the idle states shallower + * than the candidate one (it represents the cases in which the CPU was + * likely woken up by a non-timer wakeup source). * - * 2. If the second sum is greater than the first one the CPU is likely to wake - * up early, so look for an alternative idle state to select. + * 2. If the second sum computed in step 1 is greater than a half of the sum of + * both metrics for the candidate state bin and all subsequent bins(if any), + * a shallower idle state is likely to be more suitable, so look for it. * - * - Traverse the idle states shallower than the candidate one in the + * - Traverse the enabled idle states shallower than the candidate one in the * descending order. * * - For each of them compute the sum of the "intercepts" metrics over all * of the idle states between it and the candidate one (including the * former and excluding the latter). * - * - If each of these sums that needs to be taken into account (because the - * check related to it has indicated that the CPU is likely to wake up - * early) is greater than a half of the corresponding sum computed in step - * 1 (which means that the target residency of the state in question had - * not exceeded the idle duration in over a half of the relevant cases), - * select the given idle state instead of the candidate one. + * - If this sum is greater than a half of the second sum computed in step 1, + * use the given idle state as the new candidate one. * - * 3. By default, select the candidate state. + * 3. If the current candidate state is state 0 or its target residency is short + * enough, return it and prevent the scheduler tick from being stopped. + * + * 4. Obtain the sleep length value and check if it is below the target + * residency of the current candidate state, in which case a new shallower + * candidate state needs to be found, so look for it. */ #include <linux/cpuidle.h> |