summaryrefslogtreecommitdiff
path: root/drivers/idle
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/idle')
-rw-r--r--drivers/idle/Makefile5
-rw-r--r--drivers/idle/intel_idle.c203
2 files changed, 174 insertions, 34 deletions
diff --git a/drivers/idle/Makefile b/drivers/idle/Makefile
index 0a3c37510079..a34af1ba09bd 100644
--- a/drivers/idle/Makefile
+++ b/drivers/idle/Makefile
@@ -1,3 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_INTEL_IDLE) += intel_idle.o
+# Branch profiling isn't noinstr-safe
+ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING
+
+obj-$(CONFIG_INTEL_IDLE) += intel_idle.o
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 51293efd02ea..91a7b7e7c0c8 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -48,15 +48,20 @@
#include <trace/events/power.h>
#include <linux/sched.h>
#include <linux/sched/smt.h>
+#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/moduleparam.h>
+#include <linux/sysfs.h>
+#include <asm/cpuid/api.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/mwait.h>
#include <asm/spec-ctrl.h>
+#include <asm/msr.h>
#include <asm/tsc.h>
#include <asm/fpu/api.h>
+#include <asm/smp.h>
#define INTEL_IDLE_VERSION "0.5.1"
@@ -89,11 +94,16 @@ struct idle_cpu {
* Indicate which enable bits to clear here.
*/
unsigned long auto_demotion_disable_flags;
- bool byt_auto_demotion_disable_flag;
bool disable_promotion_to_c1e;
+ bool c1_demotion_supported;
bool use_acpi;
};
+static bool c1_demotion_supported;
+static DEFINE_MUTEX(c1_demotion_mutex);
+
+static struct device *sysfs_root __initdata;
+
static const struct idle_cpu *icpu __initdata;
static struct cpuidle_state *cpuidle_state_table __initdata;
@@ -142,8 +152,8 @@ static __always_inline int __intel_idle(struct cpuidle_device *dev,
int index, bool irqoff)
{
struct cpuidle_state *state = &drv->states[index];
- unsigned long eax = flg2MWAIT(state->flags);
- unsigned long ecx = 1*irqoff; /* break on interrupt flag */
+ unsigned int eax = flg2MWAIT(state->flags);
+ unsigned int ecx = 1*irqoff; /* break on interrupt flag */
mwait_idle_with_hints(eax, ecx);
@@ -216,9 +226,9 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- unsigned long ecx = 1; /* break on interrupt flag */
struct cpuidle_state *state = &drv->states[index];
- unsigned long eax = flg2MWAIT(state->flags);
+ unsigned int eax = flg2MWAIT(state->flags);
+ unsigned int ecx = 1; /* break on interrupt flag */
if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
fpu_idle_fpregs();
@@ -228,6 +238,15 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
return 0;
}
+static void intel_idle_enter_dead(struct cpuidle_device *dev, int index)
+{
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+ struct cpuidle_state *state = &drv->states[index];
+ unsigned long eax = flg2MWAIT(state->flags);
+
+ mwait_play_dead(eax);
+}
+
/*
* States are indexed by the cstate number,
* which is also the index into the MWAIT hint array.
@@ -1463,13 +1482,11 @@ static const struct idle_cpu idle_cpu_snx __initconst = {
static const struct idle_cpu idle_cpu_byt __initconst = {
.state_table = byt_cstates,
.disable_promotion_to_c1e = true,
- .byt_auto_demotion_disable_flag = true,
};
static const struct idle_cpu idle_cpu_cht __initconst = {
.state_table = cht_cstates,
.disable_promotion_to_c1e = true,
- .byt_auto_demotion_disable_flag = true,
};
static const struct idle_cpu idle_cpu_ivb __initconst = {
@@ -1541,18 +1558,21 @@ static const struct idle_cpu idle_cpu_gmt __initconst = {
static const struct idle_cpu idle_cpu_spr __initconst = {
.state_table = spr_cstates,
.disable_promotion_to_c1e = true,
+ .c1_demotion_supported = true,
.use_acpi = true,
};
static const struct idle_cpu idle_cpu_gnr __initconst = {
.state_table = gnr_cstates,
.disable_promotion_to_c1e = true,
+ .c1_demotion_supported = true,
.use_acpi = true,
};
static const struct idle_cpu idle_cpu_gnrd __initconst = {
.state_table = gnrd_cstates,
.disable_promotion_to_c1e = true,
+ .c1_demotion_supported = true,
.use_acpi = true,
};
@@ -1591,12 +1611,14 @@ static const struct idle_cpu idle_cpu_snr __initconst = {
static const struct idle_cpu idle_cpu_grr __initconst = {
.state_table = grr_cstates,
.disable_promotion_to_c1e = true,
+ .c1_demotion_supported = true,
.use_acpi = true,
};
static const struct idle_cpu idle_cpu_srf __initconst = {
.state_table = srf_cstates,
.disable_promotion_to_c1e = true,
+ .c1_demotion_supported = true,
.use_acpi = true,
};
@@ -1652,11 +1674,12 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &idle_cpu_snr),
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &idle_cpu_grr),
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &idle_cpu_srf),
+ X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &idle_cpu_srf),
{}
};
static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
- X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
+ X86_MATCH_VENDOR_FAM_FEATURE(INTEL, X86_FAMILY_ANY, X86_FEATURE_MWAIT, NULL),
{}
};
@@ -1694,6 +1717,10 @@ static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
module_param_named(use_acpi, force_use_acpi, bool, 0444);
MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
+static bool no_native __read_mostly; /* No effect if no_acpi is set. */
+module_param_named(no_native, no_native, bool, 0444);
+MODULE_PARM_DESC(no_native, "Ignore cpu specific (native) idle states in lieu of ACPI idle states");
+
static struct acpi_processor_power acpi_state_table __initdata;
/**
@@ -1802,6 +1829,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
mark_tsc_unstable("TSC halts in idle");
state->enter = intel_idle;
+ state->enter_dead = intel_idle_enter_dead;
state->enter_s2idle = intel_idle_s2idle;
}
}
@@ -1836,6 +1864,11 @@ static bool __init intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
}
return true;
}
+
+static inline bool ignore_native(void)
+{
+ return no_native && !no_acpi;
+}
#else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
#define force_use_acpi (false)
@@ -1845,6 +1878,7 @@ static inline bool intel_idle_off_by_default(unsigned int flags, u32 mwait_hint)
{
return false;
}
+static inline bool ignore_native(void) { return false; }
#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
/**
@@ -1908,35 +1942,35 @@ static void __init bxt_idle_state_table_update(void)
unsigned long long msr;
unsigned int usec;
- rdmsrl(MSR_PKGC6_IRTL, msr);
+ rdmsrq(MSR_PKGC6_IRTL, msr);
usec = irtl_2_usec(msr);
if (usec) {
bxt_cstates[2].exit_latency = usec;
bxt_cstates[2].target_residency = usec;
}
- rdmsrl(MSR_PKGC7_IRTL, msr);
+ rdmsrq(MSR_PKGC7_IRTL, msr);
usec = irtl_2_usec(msr);
if (usec) {
bxt_cstates[3].exit_latency = usec;
bxt_cstates[3].target_residency = usec;
}
- rdmsrl(MSR_PKGC8_IRTL, msr);
+ rdmsrq(MSR_PKGC8_IRTL, msr);
usec = irtl_2_usec(msr);
if (usec) {
bxt_cstates[4].exit_latency = usec;
bxt_cstates[4].target_residency = usec;
}
- rdmsrl(MSR_PKGC9_IRTL, msr);
+ rdmsrq(MSR_PKGC9_IRTL, msr);
usec = irtl_2_usec(msr);
if (usec) {
bxt_cstates[5].exit_latency = usec;
bxt_cstates[5].target_residency = usec;
}
- rdmsrl(MSR_PKGC10_IRTL, msr);
+ rdmsrq(MSR_PKGC10_IRTL, msr);
usec = irtl_2_usec(msr);
if (usec) {
bxt_cstates[6].exit_latency = usec;
@@ -1964,7 +1998,7 @@ static void __init sklh_idle_state_table_update(void)
if ((mwait_substates & (0xF << 28)) == 0)
return;
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+ rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
/* PC10 is not enabled in PKG C-state limit */
if ((msr & 0xF) != 8)
@@ -1976,7 +2010,7 @@ static void __init sklh_idle_state_table_update(void)
/* if SGX is present */
if (ebx & (1 << 2)) {
- rdmsrl(MSR_IA32_FEAT_CTL, msr);
+ rdmsrq(MSR_IA32_FEAT_CTL, msr);
/* if SGX is enabled */
if (msr & (1 << 18))
@@ -1995,7 +2029,7 @@ static void __init skx_idle_state_table_update(void)
{
unsigned long long msr;
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+ rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
/*
* 000b: C0/C1 (no package C-state support)
@@ -2048,7 +2082,7 @@ static void __init spr_idle_state_table_update(void)
* C6. However, if PC6 is disabled, we update the numbers to match
* core C6.
*/
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+ rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr);
/* Limit value 2 and above allow for PC6. */
if ((msr & 0x7) < 2) {
@@ -2057,6 +2091,15 @@ static void __init spr_idle_state_table_update(void)
}
}
+/**
+ * byt_cht_auto_demotion_disable - Disable Bay/Cherry Trail auto-demotion.
+ */
+static void __init byt_cht_auto_demotion_disable(void)
+{
+ wrmsrq(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
+ wrmsrq(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
+}
+
static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
{
unsigned int mwait_cstate = (MWAIT_HINT2CSTATE(mwait_hint) + 1) &
@@ -2138,6 +2181,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
case INTEL_ATOM_GRACEMONT:
adl_idle_state_table_update();
break;
+ case INTEL_ATOM_SILVERMONT:
+ case INTEL_ATOM_AIRMONT:
+ byt_cht_auto_demotion_disable();
+ break;
}
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
@@ -2151,6 +2198,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
!cpuidle_state_table[cstate].enter_s2idle)
break;
+ if (!cpuidle_state_table[cstate].enter_dead)
+ cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead;
+
/* If marked as unusable, skip this state. */
if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
pr_debug("state %s is disabled\n",
@@ -2180,11 +2230,6 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
drv->state_count++;
}
-
- if (icpu->byt_auto_demotion_disable_flag) {
- wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
- wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
- }
}
/**
@@ -2210,27 +2255,27 @@ static void auto_demotion_disable(void)
{
unsigned long long msr_bits;
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
+ rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
msr_bits &= ~auto_demotion_disable_flags;
- wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
+ wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
}
static void c1e_promotion_enable(void)
{
unsigned long long msr_bits;
- rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ rdmsrq(MSR_IA32_POWER_CTL, msr_bits);
msr_bits |= 0x2;
- wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ wrmsrq(MSR_IA32_POWER_CTL, msr_bits);
}
static void c1e_promotion_disable(void)
{
unsigned long long msr_bits;
- rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ rdmsrq(MSR_IA32_POWER_CTL, msr_bits);
msr_bits &= ~0x2;
- wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ wrmsrq(MSR_IA32_POWER_CTL, msr_bits);
}
/**
@@ -2293,6 +2338,88 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
}
+static void intel_c1_demotion_toggle(void *enable)
+{
+ unsigned long long msr_val;
+
+ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
+ /*
+ * Enable/disable C1 undemotion along with C1 demotion, as this is the
+ * most sensible configuration in general.
+ */
+ if (enable)
+ msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE;
+ else
+ msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE);
+ wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
+}
+
+static ssize_t intel_c1_demotion_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ bool enable;
+ int err;
+
+ err = kstrtobool(buf, &enable);
+ if (err)
+ return err;
+
+ mutex_lock(&c1_demotion_mutex);
+ /* Enable/disable C1 demotion on all CPUs */
+ on_each_cpu(intel_c1_demotion_toggle, (void *)enable, 1);
+ mutex_unlock(&c1_demotion_mutex);
+
+ return count;
+}
+
+static ssize_t intel_c1_demotion_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ unsigned long long msr_val;
+
+ /*
+ * Read the MSR value for a CPU and assume it is the same for all CPUs. Any other
+ * configuration would be a BIOS bug.
+ */
+ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
+ return sysfs_emit(buf, "%d\n", !!(msr_val & NHM_C1_AUTO_DEMOTE));
+}
+static DEVICE_ATTR_RW(intel_c1_demotion);
+
+static int __init intel_idle_sysfs_init(void)
+{
+ int err;
+
+ if (!c1_demotion_supported)
+ return 0;
+
+ sysfs_root = bus_get_dev_root(&cpu_subsys);
+ if (!sysfs_root)
+ return 0;
+
+ err = sysfs_add_file_to_group(&sysfs_root->kobj,
+ &dev_attr_intel_c1_demotion.attr,
+ "cpuidle");
+ if (err) {
+ put_device(sysfs_root);
+ return err;
+ }
+
+ return 0;
+}
+
+static void __init intel_idle_sysfs_uninit(void)
+{
+ if (!sysfs_root)
+ return;
+
+ sysfs_remove_file_from_group(&sysfs_root->kobj,
+ &dev_attr_intel_c1_demotion.attr,
+ "cpuidle");
+ put_device(sysfs_root);
+}
+
static int __init intel_idle_init(void)
{
const struct x86_cpu_id *id;
@@ -2320,10 +2447,7 @@ static int __init intel_idle_init(void)
return -ENODEV;
}
- if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
- return -ENODEV;
-
- cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
+ cpuid(CPUID_LEAF_MWAIT, &eax, &ebx, &ecx, &mwait_substates);
if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
!(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
@@ -2333,6 +2457,10 @@ static int __init intel_idle_init(void)
pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
icpu = (const struct idle_cpu *)id->driver_data;
+ if (icpu && ignore_native()) {
+ pr_debug("ignoring native CPU idle states\n");
+ icpu = NULL;
+ }
if (icpu) {
if (icpu->state_table)
cpuidle_state_table = icpu->state_table;
@@ -2342,6 +2470,8 @@ static int __init intel_idle_init(void)
auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
if (icpu->disable_promotion_to_c1e)
c1e_promotion = C1E_PROMOTION_DISABLE;
+ if (icpu->c1_demotion_supported)
+ c1_demotion_supported = true;
if (icpu->use_acpi || force_use_acpi)
intel_idle_acpi_cst_extract();
} else if (!intel_idle_acpi_cst_extract()) {
@@ -2355,6 +2485,10 @@ static int __init intel_idle_init(void)
if (!intel_idle_cpuidle_devices)
return -ENOMEM;
+ retval = intel_idle_sysfs_init();
+ if (retval)
+ pr_warn("failed to initialized sysfs");
+
intel_idle_cpuidle_driver_init(&intel_idle_driver);
retval = cpuidle_register_driver(&intel_idle_driver);
@@ -2373,17 +2507,20 @@ static int __init intel_idle_init(void)
pr_debug("Local APIC timer is reliable in %s\n",
boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
+ arch_cpu_rescan_dead_smt_siblings();
+
return 0;
hp_setup_fail:
intel_idle_cpuidle_devices_uninit();
cpuidle_unregister_driver(&intel_idle_driver);
init_driver_fail:
+ intel_idle_sysfs_uninit();
free_percpu(intel_idle_cpuidle_devices);
return retval;
}
-device_initcall(intel_idle_init);
+subsys_initcall_sync(intel_idle_init);
/*
* We are not really modular, but we used to support that. Meaning we also