diff options
Diffstat (limited to 'drivers/idle')
-rw-r--r-- | drivers/idle/Makefile | 5 | ||||
-rw-r--r-- | drivers/idle/intel_idle.c | 203 |
2 files changed, 174 insertions, 34 deletions
diff --git a/drivers/idle/Makefile b/drivers/idle/Makefile index 0a3c37510079..a34af1ba09bd 100644 --- a/drivers/idle/Makefile +++ b/drivers/idle/Makefile @@ -1,3 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_INTEL_IDLE) += intel_idle.o +# Branch profiling isn't noinstr-safe +ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING + +obj-$(CONFIG_INTEL_IDLE) += intel_idle.o diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 51293efd02ea..91a7b7e7c0c8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -48,15 +48,20 @@ #include <trace/events/power.h> #include <linux/sched.h> #include <linux/sched/smt.h> +#include <linux/mutex.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/moduleparam.h> +#include <linux/sysfs.h> +#include <asm/cpuid/api.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include <asm/mwait.h> #include <asm/spec-ctrl.h> +#include <asm/msr.h> #include <asm/tsc.h> #include <asm/fpu/api.h> +#include <asm/smp.h> #define INTEL_IDLE_VERSION "0.5.1" @@ -89,11 +94,16 @@ struct idle_cpu { * Indicate which enable bits to clear here. */ unsigned long auto_demotion_disable_flags; - bool byt_auto_demotion_disable_flag; bool disable_promotion_to_c1e; + bool c1_demotion_supported; bool use_acpi; }; +static bool c1_demotion_supported; +static DEFINE_MUTEX(c1_demotion_mutex); + +static struct device *sysfs_root __initdata; + static const struct idle_cpu *icpu __initdata; static struct cpuidle_state *cpuidle_state_table __initdata; @@ -142,8 +152,8 @@ static __always_inline int __intel_idle(struct cpuidle_device *dev, int index, bool irqoff) { struct cpuidle_state *state = &drv->states[index]; - unsigned long eax = flg2MWAIT(state->flags); - unsigned long ecx = 1*irqoff; /* break on interrupt flag */ + unsigned int eax = flg2MWAIT(state->flags); + unsigned int ecx = 1*irqoff; /* break on interrupt flag */ mwait_idle_with_hints(eax, ecx); @@ -216,9 +226,9 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - unsigned long ecx = 1; /* break on interrupt flag */ struct cpuidle_state *state = &drv->states[index]; - unsigned long eax = flg2MWAIT(state->flags); + unsigned int eax = flg2MWAIT(state->flags); + unsigned int ecx = 1; /* break on interrupt flag */ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) fpu_idle_fpregs(); @@ -228,6 +238,15 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, return 0; } +static void intel_idle_enter_dead(struct cpuidle_device *dev, int index) +{ + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + struct cpuidle_state *state = &drv->states[index]; + unsigned long eax = flg2MWAIT(state->flags); + + mwait_play_dead(eax); +} + /* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. @@ -1463,13 +1482,11 @@ static const struct idle_cpu idle_cpu_snx __initconst = { static const struct idle_cpu idle_cpu_byt __initconst = { .state_table = byt_cstates, .disable_promotion_to_c1e = true, - .byt_auto_demotion_disable_flag = true, }; static const struct idle_cpu idle_cpu_cht __initconst = { .state_table = cht_cstates, .disable_promotion_to_c1e = true, - .byt_auto_demotion_disable_flag = true, }; static const struct idle_cpu idle_cpu_ivb __initconst = { @@ -1541,18 +1558,21 @@ static const struct idle_cpu idle_cpu_gmt __initconst = { static const struct idle_cpu idle_cpu_spr __initconst = { .state_table = spr_cstates, .disable_promotion_to_c1e = true, + .c1_demotion_supported = true, .use_acpi = true, }; static const struct idle_cpu idle_cpu_gnr __initconst = { .state_table = gnr_cstates, .disable_promotion_to_c1e = true, + .c1_demotion_supported = true, .use_acpi = true, }; static const struct idle_cpu idle_cpu_gnrd __initconst = { .state_table = gnrd_cstates, .disable_promotion_to_c1e = true, + .c1_demotion_supported = true, .use_acpi = true, }; @@ -1591,12 +1611,14 @@ static const struct idle_cpu idle_cpu_snr __initconst = { static const struct idle_cpu idle_cpu_grr __initconst = { .state_table = grr_cstates, .disable_promotion_to_c1e = true, + .c1_demotion_supported = true, .use_acpi = true, }; static const struct idle_cpu idle_cpu_srf __initconst = { .state_table = srf_cstates, .disable_promotion_to_c1e = true, + .c1_demotion_supported = true, .use_acpi = true, }; @@ -1652,11 +1674,12 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &idle_cpu_snr), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &idle_cpu_grr), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &idle_cpu_srf), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &idle_cpu_srf), {} }; static const struct x86_cpu_id intel_mwait_ids[] __initconst = { - X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), + X86_MATCH_VENDOR_FAM_FEATURE(INTEL, X86_FAMILY_ANY, X86_FEATURE_MWAIT, NULL), {} }; @@ -1694,6 +1717,10 @@ static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ module_param_named(use_acpi, force_use_acpi, bool, 0444); MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); +static bool no_native __read_mostly; /* No effect if no_acpi is set. */ +module_param_named(no_native, no_native, bool, 0444); +MODULE_PARM_DESC(no_native, "Ignore cpu specific (native) idle states in lieu of ACPI idle states"); + static struct acpi_processor_power acpi_state_table __initdata; /** @@ -1802,6 +1829,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) mark_tsc_unstable("TSC halts in idle"); state->enter = intel_idle; + state->enter_dead = intel_idle_enter_dead; state->enter_s2idle = intel_idle_s2idle; } } @@ -1836,6 +1864,11 @@ static bool __init intel_idle_off_by_default(unsigned int flags, u32 mwait_hint) } return true; } + +static inline bool ignore_native(void) +{ + return no_native && !no_acpi; +} #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ #define force_use_acpi (false) @@ -1845,6 +1878,7 @@ static inline bool intel_idle_off_by_default(unsigned int flags, u32 mwait_hint) { return false; } +static inline bool ignore_native(void) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ /** @@ -1908,35 +1942,35 @@ static void __init bxt_idle_state_table_update(void) unsigned long long msr; unsigned int usec; - rdmsrl(MSR_PKGC6_IRTL, msr); + rdmsrq(MSR_PKGC6_IRTL, msr); usec = irtl_2_usec(msr); if (usec) { bxt_cstates[2].exit_latency = usec; bxt_cstates[2].target_residency = usec; } - rdmsrl(MSR_PKGC7_IRTL, msr); + rdmsrq(MSR_PKGC7_IRTL, msr); usec = irtl_2_usec(msr); if (usec) { bxt_cstates[3].exit_latency = usec; bxt_cstates[3].target_residency = usec; } - rdmsrl(MSR_PKGC8_IRTL, msr); + rdmsrq(MSR_PKGC8_IRTL, msr); usec = irtl_2_usec(msr); if (usec) { bxt_cstates[4].exit_latency = usec; bxt_cstates[4].target_residency = usec; } - rdmsrl(MSR_PKGC9_IRTL, msr); + rdmsrq(MSR_PKGC9_IRTL, msr); usec = irtl_2_usec(msr); if (usec) { bxt_cstates[5].exit_latency = usec; bxt_cstates[5].target_residency = usec; } - rdmsrl(MSR_PKGC10_IRTL, msr); + rdmsrq(MSR_PKGC10_IRTL, msr); usec = irtl_2_usec(msr); if (usec) { bxt_cstates[6].exit_latency = usec; @@ -1964,7 +1998,7 @@ static void __init sklh_idle_state_table_update(void) if ((mwait_substates & (0xF << 28)) == 0) return; - rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr); /* PC10 is not enabled in PKG C-state limit */ if ((msr & 0xF) != 8) @@ -1976,7 +2010,7 @@ static void __init sklh_idle_state_table_update(void) /* if SGX is present */ if (ebx & (1 << 2)) { - rdmsrl(MSR_IA32_FEAT_CTL, msr); + rdmsrq(MSR_IA32_FEAT_CTL, msr); /* if SGX is enabled */ if (msr & (1 << 18)) @@ -1995,7 +2029,7 @@ static void __init skx_idle_state_table_update(void) { unsigned long long msr; - rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr); /* * 000b: C0/C1 (no package C-state support) @@ -2048,7 +2082,7 @@ static void __init spr_idle_state_table_update(void) * C6. However, if PC6 is disabled, we update the numbers to match * core C6. */ - rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr); /* Limit value 2 and above allow for PC6. */ if ((msr & 0x7) < 2) { @@ -2057,6 +2091,15 @@ static void __init spr_idle_state_table_update(void) } } +/** + * byt_cht_auto_demotion_disable - Disable Bay/Cherry Trail auto-demotion. + */ +static void __init byt_cht_auto_demotion_disable(void) +{ + wrmsrq(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); + wrmsrq(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); +} + static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) { unsigned int mwait_cstate = (MWAIT_HINT2CSTATE(mwait_hint) + 1) & @@ -2138,6 +2181,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) case INTEL_ATOM_GRACEMONT: adl_idle_state_table_update(); break; + case INTEL_ATOM_SILVERMONT: + case INTEL_ATOM_AIRMONT: + byt_cht_auto_demotion_disable(); + break; } for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { @@ -2151,6 +2198,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) !cpuidle_state_table[cstate].enter_s2idle) break; + if (!cpuidle_state_table[cstate].enter_dead) + cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead; + /* If marked as unusable, skip this state. */ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { pr_debug("state %s is disabled\n", @@ -2180,11 +2230,6 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) drv->state_count++; } - - if (icpu->byt_auto_demotion_disable_flag) { - wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); - wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); - } } /** @@ -2210,27 +2255,27 @@ static void auto_demotion_disable(void) { unsigned long long msr_bits; - rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); + rdmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); msr_bits &= ~auto_demotion_disable_flags; - wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); + wrmsrq(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); } static void c1e_promotion_enable(void) { unsigned long long msr_bits; - rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + rdmsrq(MSR_IA32_POWER_CTL, msr_bits); msr_bits |= 0x2; - wrmsrl(MSR_IA32_POWER_CTL, msr_bits); + wrmsrq(MSR_IA32_POWER_CTL, msr_bits); } static void c1e_promotion_disable(void) { unsigned long long msr_bits; - rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + rdmsrq(MSR_IA32_POWER_CTL, msr_bits); msr_bits &= ~0x2; - wrmsrl(MSR_IA32_POWER_CTL, msr_bits); + wrmsrq(MSR_IA32_POWER_CTL, msr_bits); } /** @@ -2293,6 +2338,88 @@ static void __init intel_idle_cpuidle_devices_uninit(void) cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); } +static void intel_c1_demotion_toggle(void *enable) +{ + unsigned long long msr_val; + + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val); + /* + * Enable/disable C1 undemotion along with C1 demotion, as this is the + * most sensible configuration in general. + */ + if (enable) + msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE; + else + msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE); + wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val); +} + +static ssize_t intel_c1_demotion_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + bool enable; + int err; + + err = kstrtobool(buf, &enable); + if (err) + return err; + + mutex_lock(&c1_demotion_mutex); + /* Enable/disable C1 demotion on all CPUs */ + on_each_cpu(intel_c1_demotion_toggle, (void *)enable, 1); + mutex_unlock(&c1_demotion_mutex); + + return count; +} + +static ssize_t intel_c1_demotion_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long long msr_val; + + /* + * Read the MSR value for a CPU and assume it is the same for all CPUs. Any other + * configuration would be a BIOS bug. + */ + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val); + return sysfs_emit(buf, "%d\n", !!(msr_val & NHM_C1_AUTO_DEMOTE)); +} +static DEVICE_ATTR_RW(intel_c1_demotion); + +static int __init intel_idle_sysfs_init(void) +{ + int err; + + if (!c1_demotion_supported) + return 0; + + sysfs_root = bus_get_dev_root(&cpu_subsys); + if (!sysfs_root) + return 0; + + err = sysfs_add_file_to_group(&sysfs_root->kobj, + &dev_attr_intel_c1_demotion.attr, + "cpuidle"); + if (err) { + put_device(sysfs_root); + return err; + } + + return 0; +} + +static void __init intel_idle_sysfs_uninit(void) +{ + if (!sysfs_root) + return; + + sysfs_remove_file_from_group(&sysfs_root->kobj, + &dev_attr_intel_c1_demotion.attr, + "cpuidle"); + put_device(sysfs_root); +} + static int __init intel_idle_init(void) { const struct x86_cpu_id *id; @@ -2320,10 +2447,7 @@ static int __init intel_idle_init(void) return -ENODEV; } - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return -ENODEV; - - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); + cpuid(CPUID_LEAF_MWAIT, &eax, &ebx, &ecx, &mwait_substates); if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || @@ -2333,6 +2457,10 @@ static int __init intel_idle_init(void) pr_debug("MWAIT substates: 0x%x\n", mwait_substates); icpu = (const struct idle_cpu *)id->driver_data; + if (icpu && ignore_native()) { + pr_debug("ignoring native CPU idle states\n"); + icpu = NULL; + } if (icpu) { if (icpu->state_table) cpuidle_state_table = icpu->state_table; @@ -2342,6 +2470,8 @@ static int __init intel_idle_init(void) auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; if (icpu->disable_promotion_to_c1e) c1e_promotion = C1E_PROMOTION_DISABLE; + if (icpu->c1_demotion_supported) + c1_demotion_supported = true; if (icpu->use_acpi || force_use_acpi) intel_idle_acpi_cst_extract(); } else if (!intel_idle_acpi_cst_extract()) { @@ -2355,6 +2485,10 @@ static int __init intel_idle_init(void) if (!intel_idle_cpuidle_devices) return -ENOMEM; + retval = intel_idle_sysfs_init(); + if (retval) + pr_warn("failed to initialized sysfs"); + intel_idle_cpuidle_driver_init(&intel_idle_driver); retval = cpuidle_register_driver(&intel_idle_driver); @@ -2373,17 +2507,20 @@ static int __init intel_idle_init(void) pr_debug("Local APIC timer is reliable in %s\n", boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); + arch_cpu_rescan_dead_smt_siblings(); + return 0; hp_setup_fail: intel_idle_cpuidle_devices_uninit(); cpuidle_unregister_driver(&intel_idle_driver); init_driver_fail: + intel_idle_sysfs_uninit(); free_percpu(intel_idle_cpuidle_devices); return retval; } -device_initcall(intel_idle_init); +subsys_initcall_sync(intel_idle_init); /* * We are not really modular, but we used to support that. Meaning we also |