summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/cpuidle/governors/gov.h5
-rw-r--r--drivers/cpuidle/governors/menu.c15
-rw-r--r--drivers/cpuidle/governors/teo.c81
-rw-r--r--drivers/idle/intel_idle.c42
4 files changed, 90 insertions, 53 deletions
diff --git a/drivers/cpuidle/governors/gov.h b/drivers/cpuidle/governors/gov.h
index 99e067d9668c..cd06a2e7b506 100644
--- a/drivers/cpuidle/governors/gov.h
+++ b/drivers/cpuidle/governors/gov.h
@@ -10,5 +10,10 @@
* check the time till the closest expected timer event.
*/
#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC)
+/*
+ * If the closest timer is in this range, the governor idle state selection need
+ * not be adjusted after the scheduler tick has been stopped.
+ */
+#define SAFE_TIMER_RANGE_NS (2 * TICK_NSEC)
#endif /* __CPUIDLE_GOVERNOR_H */
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 899ff16ff1fe..544a5d593007 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -261,13 +261,16 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
/*
* If the tick is already stopped, the cost of possible short
- * idle duration misprediction is much higher, because the CPU
- * may be stuck in a shallow idle state for a long time as a
- * result of it. In that case, say we might mispredict and use
- * the known time till the closest timer event for the idle
- * state selection.
+ * idle duration misprediction is higher because the CPU may get
+ * stuck in a shallow idle state then. To avoid that, if
+ * predicted_ns is small enough, say it might be mispredicted
+ * and use the known time till the closest timer for idle state
+ * selection unless that timer is going to trigger within
+ * SAFE_TIMER_RANGE_NS in which case it can be regarded as a
+ * sufficient safety net.
*/
- if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC)
+ if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC &&
+ data->next_timer_ns > SAFE_TIMER_RANGE_NS)
predicted_ns = data->next_timer_ns;
} else {
/*
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index bec0142377b8..ac43b9b013b3 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -407,50 +407,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* better choice.
*/
if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
- int min_idx = idx0;
-
- if (tick_nohz_tick_stopped()) {
- /*
- * Look for the shallowest idle state below the current
- * candidate one whose target residency is at least
- * equal to the tick period length.
- */
- while (min_idx < idx &&
- drv->states[min_idx].target_residency_ns < TICK_NSEC)
- min_idx++;
-
- /*
- * Avoid selecting a state with a lower index, but with
- * the same target residency as the current candidate
- * one.
- */
- if (drv->states[min_idx].target_residency_ns ==
- drv->states[idx].target_residency_ns)
- goto constraint;
- }
-
- /*
- * If the minimum state index is greater than or equal to the
- * index of the state with the maximum intercepts metric and
- * the corresponding state is enabled, there is no need to look
- * at the deeper states.
- */
- if (min_idx >= intercept_max_idx &&
- !dev->states_usage[min_idx].disable) {
- idx = min_idx;
- goto constraint;
- }
-
/*
* Look for the deepest enabled idle state, at most as deep as
* the one with the maximum intercepts metric, whose target
* residency had not been greater than the idle duration in over
* a half of the relevant cases in the past.
- *
- * Take the possible duration limitation present if the tick
- * has been stopped already into account.
*/
- for (i = idx - 1, intercept_sum = 0; i >= min_idx; i--) {
+ for (i = idx - 1, intercept_sum = 0; i >= idx0; i--) {
intercept_sum += cpu_data->state_bins[i].intercepts;
if (dev->states_usage[i].disable)
@@ -463,7 +426,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
}
}
-constraint:
/*
* If there is a latency constraint, it may be necessary to select an
* idle state shallower than the current candidate one.
@@ -472,13 +434,13 @@ constraint:
idx = constraint_idx;
/*
- * If either the candidate state is state 0 or its target residency is
- * low enough, there is basically nothing more to do, but if the sleep
- * length is not updated, the subsequent wakeup will be counted as an
- * "intercept" which may be problematic in the cases when timer wakeups
- * are dominant. Namely, it may effectively prevent deeper idle states
- * from being selected at one point even if no imminent timers are
- * scheduled.
+ * If the tick has not been stopped and either the candidate state is
+ * state 0 or its target residency is low enough, there is basically
+ * nothing more to do, but if the sleep length is not updated, the
+ * subsequent wakeup will be counted as an "intercept". That may be
+ * problematic in the cases when timer wakeups are dominant because it
+ * may effectively prevent deeper idle states from being selected at one
+ * point even if no imminent timers are scheduled.
*
* However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one
* CPU are unlikely (user space has a default 50 us slack value for
@@ -494,7 +456,8 @@ constraint:
* shallow idle states regardless of the wakeup type, so the sleep
* length need not be known in that case.
*/
- if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
+ if (!tick_nohz_tick_stopped() && (!idx ||
+ drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
(2 * cpu_data->short_idles >= cpu_data->total ||
latency_req < LATENCY_THRESHOLD_NS))
goto out_tick;
@@ -502,6 +465,30 @@ constraint:
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
cpu_data->sleep_length_ns = duration_ns;
+ /*
+ * If the tick has been stopped and the closest timer is too far away,
+ * update the selection to prevent the CPU from getting stuck in a
+ * shallow idle state for too long.
+ */
+ if (tick_nohz_tick_stopped() && duration_ns > SAFE_TIMER_RANGE_NS &&
+ drv->states[idx].target_residency_ns < TICK_NSEC) {
+ /*
+ * Look for the deepest enabled idle state with exit latency
+ * within the PM QoS limit and with target residency within
+ * duration_ns.
+ */
+ for (i = constraint_idx; i > idx; i--) {
+ if (dev->states_usage[i].disable)
+ continue;
+
+ if (drv->states[i].target_residency_ns <= duration_ns) {
+ idx = i;
+ break;
+ }
+ }
+ return idx;
+ }
+
if (!idx)
goto out_tick;
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index f49c939d636f..f49354e37777 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -983,6 +983,43 @@ static struct cpuidle_state mtl_l_cstates[] __initdata = {
.enter = NULL }
};
+static struct cpuidle_state ptl_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00),
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 10,
+ .target_residency = 10,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6S",
+ .desc = "MWAIT 0x21",
+ .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 300,
+ .target_residency = 300,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 370,
+ .target_residency = 2500,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
static struct cpuidle_state gmt_cstates[] __initdata = {
{
.name = "C1",
@@ -1561,6 +1598,10 @@ static const struct idle_cpu idle_cpu_mtl_l __initconst = {
.state_table = mtl_l_cstates,
};
+static const struct idle_cpu idle_cpu_ptl __initconst = {
+ .state_table = ptl_cstates,
+};
+
static const struct idle_cpu idle_cpu_gmt __initconst = {
.state_table = gmt_cstates,
};
@@ -1669,6 +1710,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_VFM(INTEL_ALDERLAKE, &idle_cpu_adl),
X86_MATCH_VFM(INTEL_ALDERLAKE_L, &idle_cpu_adl_l),
X86_MATCH_VFM(INTEL_METEORLAKE_L, &idle_cpu_mtl_l),
+ X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &idle_cpu_ptl),
X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &idle_cpu_gmt),
X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &idle_cpu_spr),
X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &idle_cpu_spr),