diff options
Diffstat (limited to 'drivers')
29 files changed, 1707 insertions, 147 deletions
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index e9ac5ea1dfc9..a9d2de403c0c 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -118,6 +118,8 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr); */ #define NUM_RETRIES 500ULL +#define OVER_16BTS_MASK ~0xFFFFULL + #define define_one_cppc_ro(_name) \ static struct kobj_attribute _name = \ __ATTR(_name, 0444, show_##_name, NULL) @@ -412,7 +414,7 @@ bool acpi_cpc_valid(void) struct cpc_desc *cpc_ptr; int cpu; - for_each_possible_cpu(cpu) { + for_each_present_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); if (!cpc_ptr) return false; @@ -730,9 +732,26 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) goto out_free; cpc_ptr->cpc_regs[i-2].sys_mem_vaddr = addr; } + } else if (gas_t->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { + if (gas_t->access_width < 1 || gas_t->access_width > 3) { + /* + * 1 = 8-bit, 2 = 16-bit, and 3 = 32-bit. + * SystemIO doesn't implement 64-bit + * registers. + */ + pr_debug("Invalid access width %d for SystemIO register\n", + gas_t->access_width); + goto out_free; + } + if (gas_t->address & OVER_16BTS_MASK) { + /* SystemIO registers use 16-bit integer addresses */ + pr_debug("Invalid IO port %llu for SystemIO register\n", + gas_t->address); + goto out_free; + } } else { if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) { - /* Support only PCC ,SYS MEM and FFH type regs */ + /* Support only PCC, SystemMemory, SystemIO, and FFH type regs. */ pr_debug("Unsupported register type: %d\n", gas_t->space_id); goto out_free; } @@ -907,7 +926,21 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) } *val = 0; - if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { + u32 width = 8 << (reg->access_width - 1); + acpi_status status; + + status = acpi_os_read_port((acpi_io_address)reg->address, + (u32 *)val, width); + if (ACPI_FAILURE(status)) { + pr_debug("Error: Failed to read SystemIO port %llx\n", + reg->address); + return -EFAULT; + } + + return 0; + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; @@ -946,7 +979,20 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; - if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { + u32 width = 8 << (reg->access_width - 1); + acpi_status status; + + status = acpi_os_write_port((acpi_io_address)reg->address, + (u32)val, width); + if (ACPI_FAILURE(status)) { + pr_debug("Error: Failed to write SystemIO port %llx\n", + reg->address); + return -EFAULT; + } + + return 0; + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; @@ -1214,6 +1260,51 @@ out_err: EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs); /** + * cppc_set_enable - Set to enable CPPC on the processor by writing the + * Continuous Performance Control package EnableRegister field. + * @cpu: CPU for which to enable CPPC register. + * @enable: 0 - disable, 1 - enable CPPC feature on the processor. + * + * Return: 0 for success, -ERRNO or -EIO otherwise. + */ +int cppc_set_enable(int cpu, bool enable) +{ + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cpc_register_resource *enable_reg; + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret = -EINVAL; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpu); + return -EINVAL; + } + + enable_reg = &cpc_desc->cpc_regs[ENABLE]; + + if (CPC_IN_PCC(enable_reg)) { + + if (pcc_ss_id < 0) + return -EIO; + + ret = cpc_write(cpu, enable_reg, enable); + if (ret) + return ret; + + pcc_ss_data = pcc_data[pcc_ss_id]; + + down_write(&pcc_ss_data->pcc_lock); + /* after writing CPC, transfer the ownership of PCC to platfrom */ + ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE); + up_write(&pcc_ss_data->pcc_lock); + return ret; + } + + return cpc_write(cpu, enable_reg, enable); +} +EXPORT_SYMBOL_GPL(cppc_set_enable); + +/** * cppc_set_perf - Set a CPU's performance controls. * @cpu: CPU for which to set performance controls. * @perf_ctrls: ptr to cppc_perf_ctrls. See cppc_acpi.h diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 4b8454f26ca1..a60ff5dfed3a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -874,11 +874,11 @@ static inline void acpi_sleep_syscore_init(void) {} #ifdef CONFIG_HIBERNATION static unsigned long s4_hardware_signature; static struct acpi_table_facs *facs; -static bool nosigcheck; +static int sigcheck = -1; /* Default behaviour is just to warn */ -void __init acpi_no_s4_hw_signature(void) +void __init acpi_check_s4_hw_signature(int check) { - nosigcheck = true; + sigcheck = check; } static int acpi_hibernation_begin(pm_message_t stage) @@ -1004,12 +1004,28 @@ static void acpi_sleep_hibernate_setup(void) hibernation_set_ops(old_suspend_ordering ? &acpi_hibernation_ops_old : &acpi_hibernation_ops); sleep_states[ACPI_STATE_S4] = 1; - if (nosigcheck) + if (!sigcheck) return; acpi_get_table(ACPI_SIG_FACS, 1, (struct acpi_table_header **)&facs); - if (facs) + if (facs) { + /* + * s4_hardware_signature is the local variable which is just + * used to warn about mismatch after we're attempting to + * resume (in violation of the ACPI specification.) + */ s4_hardware_signature = facs->hardware_signature; + + if (sigcheck > 0) { + /* + * If we're actually obeying the ACPI specification + * then the signature is written out as part of the + * swsusp header, in order to allow the boot kernel + * to gracefully decline to resume. + */ + swsusp_hardware_signature = facs->hardware_signature; + } + } } #else /* !CONFIG_HIBERNATION */ static inline void acpi_sleep_hibernate_setup(void) {} diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index ff16a36a908b..976154140f0b 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -22,6 +22,7 @@ static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static struct cpumask scale_freq_counters_mask; static bool scale_freq_invariant; +static DEFINE_PER_CPU(u32, freq_factor) = 1; static bool supports_scale_freq_counters(const struct cpumask *cpus) { @@ -155,15 +156,49 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) DEFINE_PER_CPU(unsigned long, thermal_pressure); -void topology_set_thermal_pressure(const struct cpumask *cpus, - unsigned long th_pressure) +/** + * topology_update_thermal_pressure() - Update thermal pressure for CPUs + * @cpus : The related CPUs for which capacity has been reduced + * @capped_freq : The maximum allowed frequency that CPUs can run at + * + * Update the value of thermal pressure for all @cpus in the mask. The + * cpumask should include all (online+offline) affected CPUs, to avoid + * operating on stale data when hot-plug is used for some CPUs. The + * @capped_freq reflects the currently allowed max CPUs frequency due to + * thermal capping. It might be also a boost frequency value, which is bigger + * than the internal 'freq_factor' max frequency. In such case the pressure + * value should simply be removed, since this is an indication that there is + * no thermal throttling. The @capped_freq must be provided in kHz. + */ +void topology_update_thermal_pressure(const struct cpumask *cpus, + unsigned long capped_freq) { + unsigned long max_capacity, capacity, th_pressure; + u32 max_freq; int cpu; + cpu = cpumask_first(cpus); + max_capacity = arch_scale_cpu_capacity(cpu); + max_freq = per_cpu(freq_factor, cpu); + + /* Convert to MHz scale which is used in 'freq_factor' */ + capped_freq /= 1000; + + /* + * Handle properly the boost frequencies, which should simply clean + * the thermal pressure value. + */ + if (max_freq <= capped_freq) + capacity = max_capacity; + else + capacity = mult_frac(max_capacity, capped_freq, max_freq); + + th_pressure = max_capacity - capacity; + for_each_cpu(cpu, cpus) WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); } -EXPORT_SYMBOL_GPL(topology_set_thermal_pressure); +EXPORT_SYMBOL_GPL(topology_update_thermal_pressure); static ssize_t cpu_capacity_show(struct device *dev, struct device_attribute *attr, @@ -217,7 +252,6 @@ static void update_topology_flags_workfn(struct work_struct *work) update_topology = 0; } -static DEFINE_PER_CPU(u32, freq_factor) = 1; static u32 *raw_capacity; static int free_raw_capacity(void) diff --git a/drivers/base/core.c b/drivers/base/core.c index fd034d742447..b191bd17de89 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -485,8 +485,7 @@ static void device_link_release_fn(struct work_struct *work) /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); - while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put(link->supplier); + pm_runtime_release_supplier(link, true); put_device(link->consumer); put_device(link->supplier); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index d504cd4ab3cb..2f3cce17219b 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -305,19 +305,40 @@ static int rpm_get_suppliers(struct device *dev) return 0; } +/** + * pm_runtime_release_supplier - Drop references to device link's supplier. + * @link: Target device link. + * @check_idle: Whether or not to check if the supplier device is idle. + * + * Drop all runtime PM references associated with @link to its supplier device + * and if @check_idle is set, check if that device is idle (and so it can be + * suspended). + */ +void pm_runtime_release_supplier(struct device_link *link, bool check_idle) +{ + struct device *supplier = link->supplier; + + /* + * The additional power.usage_count check is a safety net in case + * the rpm_active refcount becomes saturated, in which case + * refcount_dec_not_one() would return true forever, but it is not + * strictly necessary. + */ + while (refcount_dec_not_one(&link->rpm_active) && + atomic_read(&supplier->power.usage_count) > 0) + pm_runtime_put_noidle(supplier); + + if (check_idle) + pm_request_idle(supplier); +} + static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) { struct device_link *link; list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) { - - while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put_noidle(link->supplier); - - if (try_to_suspend) - pm_request_idle(link->supplier); - } + device_links_read_lock_held()) + pm_runtime_release_supplier(link, try_to_suspend); } static void rpm_put_suppliers(struct device *dev) @@ -742,13 +763,15 @@ static int rpm_resume(struct device *dev, int rpmflags) trace_rpm_resume_rcuidle(dev, rpmflags); repeat: - if (dev->power.runtime_error) + if (dev->power.runtime_error) { retval = -EINVAL; - else if (dev->power.disable_depth == 1 && dev->power.is_suspended - && dev->power.runtime_status == RPM_ACTIVE) - retval = 1; - else if (dev->power.disable_depth > 0) - retval = -EACCES; + } else if (dev->power.disable_depth > 0) { + if (dev->power.runtime_status == RPM_ACTIVE && + dev->power.last_status == RPM_ACTIVE) + retval = 1; + else + retval = -EACCES; + } if (retval) goto out; @@ -1410,8 +1433,10 @@ void __pm_runtime_disable(struct device *dev, bool check_resume) /* Update time accounting before disabling PM-runtime. */ update_pm_runtime_accounting(dev); - if (!dev->power.disable_depth++) + if (!dev->power.disable_depth++) { __pm_runtime_barrier(dev); + dev->power.last_status = dev->power.runtime_status; + } out: spin_unlock_irq(&dev->power.lock); @@ -1428,23 +1453,23 @@ void pm_runtime_enable(struct device *dev) spin_lock_irqsave(&dev->power.lock, flags); - if (dev->power.disable_depth > 0) { - dev->power.disable_depth--; - - /* About to enable runtime pm, set accounting_timestamp to now */ - if (!dev->power.disable_depth) - dev->power.accounting_timestamp = ktime_get_mono_fast_ns(); - } else { + if (!dev->power.disable_depth) { dev_warn(dev, "Unbalanced %s!\n", __func__); + goto out; } - WARN(!dev->power.disable_depth && - dev->power.runtime_status == RPM_SUSPENDED && - !dev->power.ignore_children && - atomic_read(&dev->power.child_count) > 0, - "Enabling runtime PM for inactive device (%s) with active children\n", - dev_name(dev)); + if (--dev->power.disable_depth > 0) + goto out; + + dev->power.last_status = RPM_INVALID; + dev->power.accounting_timestamp = ktime_get_mono_fast_ns(); + + if (dev->power.runtime_status == RPM_SUSPENDED && + !dev->power.ignore_children && + atomic_read(&dev->power.child_count) > 0) + dev_warn(dev, "Enabling runtime PM for inactive device with active children\n"); +out: spin_unlock_irqrestore(&dev->power.lock, flags); } EXPORT_SYMBOL_GPL(pm_runtime_enable); @@ -1640,6 +1665,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend); void pm_runtime_init(struct device *dev) { dev->power.runtime_status = RPM_SUSPENDED; + dev->power.last_status = RPM_INVALID; dev->power.idle_notification = false; dev->power.disable_depth = 1; @@ -1722,8 +1748,6 @@ void pm_runtime_get_suppliers(struct device *dev) void pm_runtime_put_suppliers(struct device *dev) { struct device_link *link; - unsigned long flags; - bool put; int idx; idx = device_links_read_lock(); @@ -1731,11 +1755,17 @@ void pm_runtime_put_suppliers(struct device *dev) list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) if (link->supplier_preactivated) { + bool put; + link->supplier_preactivated = false; - spin_lock_irqsave(&dev->power.lock, flags); + + spin_lock_irq(&dev->power.lock); + put = pm_runtime_status_suspended(dev) && refcount_dec_not_one(&link->rpm_active); - spin_unlock_irqrestore(&dev->power.lock, flags); + + spin_unlock_irq(&dev->power.lock); + if (put) pm_runtime_put(link->supplier); } @@ -1772,9 +1802,7 @@ void pm_runtime_drop_link(struct device_link *link) return; pm_runtime_drop_link_count(link->consumer); - - while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put(link->supplier); + pm_runtime_release_supplier(link, true); } static bool pm_runtime_need_not_resume(struct device *dev) diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 92701a18bdd9..55516043b656 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -34,6 +34,23 @@ config X86_PCC_CPUFREQ If in doubt, say N. +config X86_AMD_PSTATE + tristate "AMD Processor P-State driver" + depends on X86 && ACPI + select ACPI_PROCESSOR + select ACPI_CPPC_LIB if X86_64 + select CPU_FREQ_GOV_SCHEDUTIL if SMP + help + This driver adds a CPUFreq driver which utilizes a fine grain + processor performance frequency control range instead of legacy + performance levels. _CPC needs to be present in the ACPI tables + of the system. + + For details, take a look at: + <file:Documentation/admin-guide/pm/amd-pstate.rst>. + + If in doubt, say N. + config X86_ACPI_CPUFREQ tristate "ACPI Processor P-States driver" depends on ACPI_PROCESSOR diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 48ee5859030c..285de70af877 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -17,6 +17,10 @@ obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o +# Traces +CFLAGS_amd-pstate-trace.o := -I$(src) +amd_pstate-y := amd-pstate.o amd-pstate-trace.o + ################################################################################## # x86 drivers. # Link order matters. K8 is preferred to ACPI because of firmware bugs in early @@ -25,6 +29,7 @@ obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o # speedstep-* is preferred over p4-clockmod. obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +obj-$(CONFIG_X86_AMD_PSTATE) += amd_pstate.o obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o diff --git a/drivers/cpufreq/amd-pstate-trace.c b/drivers/cpufreq/amd-pstate-trace.c new file mode 100644 index 000000000000..891b696dcd69 --- /dev/null +++ b/drivers/cpufreq/amd-pstate-trace.c @@ -0,0 +1,2 @@ +#define CREATE_TRACE_POINTS +#include "amd-pstate-trace.h" diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h new file mode 100644 index 000000000000..647505957d4f --- /dev/null +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * amd-pstate-trace.h - AMD Processor P-state Frequency Driver Tracer + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Author: Huang Rui <ray.huang@amd.com> + */ + +#if !defined(_AMD_PSTATE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _AMD_PSTATE_TRACE_H + +#include <linux/cpufreq.h> +#include <linux/tracepoint.h> +#include <linux/trace_events.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM amd_cpu + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE amd-pstate-trace + +#define TPS(x) tracepoint_string(x) + +TRACE_EVENT(amd_pstate_perf, + + TP_PROTO(unsigned long min_perf, + unsigned long target_perf, + unsigned long capacity, + unsigned int cpu_id, + bool changed, + bool fast_switch + ), + + TP_ARGS(min_perf, + target_perf, + capacity, + cpu_id, + changed, + fast_switch + ), + + TP_STRUCT__entry( + __field(unsigned long, min_perf) + __field(unsigned long, target_perf) + __field(unsigned long, capacity) + __field(unsigned int, cpu_id) + __field(bool, changed) + __field(bool, fast_switch) + ), + + TP_fast_assign( + __entry->min_perf = min_perf; + __entry->target_perf = target_perf; + __entry->capacity = capacity; + __entry->cpu_id = cpu_id; + __entry->changed = changed; + __entry->fast_switch = fast_switch; + ), + + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s", + (unsigned long)__entry->min_perf, + (unsigned long)__entry->target_perf, + (unsigned long)__entry->capacity, + (unsigned int)__entry->cpu_id, + (__entry->changed) ? "true" : "false", + (__entry->fast_switch) ? "true" : "false" + ) +); + +#endif /* _AMD_PSTATE_TRACE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#include <trace/define_trace.h> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c new file mode 100644 index 000000000000..9ce75ed11f8e --- /dev/null +++ b/drivers/cpufreq/amd-pstate.c @@ -0,0 +1,645 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * amd-pstate.c - AMD Processor P-state Frequency Driver + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Author: Huang Rui <ray.huang@amd.com> + * + * AMD P-State introduces a new CPU performance scaling design for AMD + * processors using the ACPI Collaborative Performance and Power Control (CPPC) + * feature which works with the AMD SMU firmware providing a finer grained + * frequency control range. It is to replace the legacy ACPI P-States control, + * allows a flexible, low-latency interface for the Linux kernel to directly + * communicate the performance hints to hardware. + * + * AMD P-State is supported on recent AMD Zen base CPU series include some of + * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD + * P-State supported system. And there are two types of hardware implementations + * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution. + * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/sched.h> +#include <linux/cpufreq.h> +#include <linux/compiler.h> +#include <linux/dmi.h> +#include <linux/slab.h> +#include <linux/acpi.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <linux/static_call.h> + +#include <acpi/processor.h> +#include <acpi/cppc_acpi.h> + +#include <asm/msr.h> +#include <asm/processor.h> +#include <asm/cpufeature.h> +#include <asm/cpu_device_id.h> +#include "amd-pstate-trace.h" + +#define AMD_PSTATE_TRANSITION_LATENCY 0x20000 +#define AMD_PSTATE_TRANSITION_DELAY 500 + +/* + * TODO: We need more time to fine tune processors with shared memory solution + * with community together. + * + * There are some performance drops on the CPU benchmarks which reports from + * Suse. We are co-working with them to fine tune the shared memory solution. So + * we disable it by default to go acpi-cpufreq on these processors and add a + * module parameter to be able to enable it manually for debugging. + */ +static bool shared_mem = false; +module_param(shared_mem, bool, 0444); +MODULE_PARM_DESC(shared_mem, + "enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)"); + +static struct cpufreq_driver amd_pstate_driver; + +/** + * struct amd_cpudata - private CPU data for AMD P-State + * @cpu: CPU number + * @req: constraint request to apply + * @cppc_req_cached: cached performance request hints + * @highest_perf: the maximum performance an individual processor may reach, + * assuming ideal conditions + * @nominal_perf: the maximum sustained performance level of the processor, + * assuming ideal operating conditions + * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power + * savings are achieved + * @lowest_perf: the absolute lowest performance level of the processor + * @max_freq: the frequency that mapped to highest_perf + * @min_freq: the frequency that mapped to lowest_perf + * @nominal_freq: the frequency that mapped to nominal_perf + * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf + * @boost_supported: check whether the Processor or SBIOS supports boost mode + * + * The amd_cpudata is key private data for each CPU thread in AMD P-State, and + * represents all the attributes and goals that AMD P-State requests at runtime. + */ +struct amd_cpudata { + int cpu; + + struct freq_qos_request req[2]; + u64 cppc_req_cached; + + u32 highest_perf; + u32 nominal_perf; + u32 lowest_nonlinear_perf; + u32 lowest_perf; + + u32 max_freq; + u32 min_freq; + u32 nominal_freq; + u32 lowest_nonlinear_freq; + + bool boost_supported; +}; + +static inline int pstate_enable(bool enable) +{ + return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); +} + +static int cppc_enable(bool enable) +{ + int cpu, ret = 0; + + for_each_present_cpu(cpu) { + ret = cppc_set_enable(cpu, enable); + if (ret) + return ret; + } + + return ret; +} + +DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); + +static inline int amd_pstate_enable(bool enable) +{ + return static_call(amd_pstate_enable)(enable); +} + +static int pstate_init_perf(struct amd_cpudata *cpudata) +{ + u64 cap1; + + int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, + &cap1); + if (ret) + return ret; + + /* + * TODO: Introduce AMD specific power feature. + * + * CPPC entry doesn't indicate the highest performance in some ASICs. + */ + WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); + + WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); + WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); + WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); + + return 0; +} + +static int cppc_init_perf(struct amd_cpudata *cpudata) +{ + struct cppc_perf_caps cppc_perf; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); + + WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); + WRITE_ONCE(cpudata->lowest_nonlinear_perf, + cppc_perf.lowest_nonlinear_perf); + WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + + return 0; +} + +DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); + +static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) +{ + return static_call(amd_pstate_init_perf)(cpudata); +} + +static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, bool fast_switch) +{ + if (fast_switch) + wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); + else + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, + READ_ONCE(cpudata->cppc_req_cached)); +} + +static void cppc_update_perf(struct amd_cpudata *cpudata, + u32 min_perf, u32 des_perf, + u32 max_perf, bool fast_switch) +{ + struct cppc_perf_ctrls perf_ctrls; + + perf_ctrls.max_perf = max_perf; + perf_ctrls.min_perf = min_perf; + perf_ctrls.desired_perf = des_perf; + + cppc_set_perf(cpudata->cpu, &perf_ctrls); +} + +DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); + +static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, + u32 min_perf, u32 des_perf, + u32 max_perf, bool fast_switch) +{ + static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, + max_perf, fast_switch); +} + +static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, bool fast_switch) +{ + u64 prev = READ_ONCE(cpudata->cppc_req_cached); + u64 value = prev; + + value &= ~AMD_CPPC_MIN_PERF(~0L); + value |= AMD_CPPC_MIN_PERF(min_perf); + + value &= ~AMD_CPPC_DES_PERF(~0L); + value |= AMD_CPPC_DES_PERF(des_perf); + + value &= ~AMD_CPPC_MAX_PERF(~0L); + value |= AMD_CPPC_MAX_PERF(max_perf); + + trace_amd_pstate_perf(min_perf, des_perf, max_perf, + cpudata->cpu, (value != prev), fast_switch); + + if (value == prev) + return; + + WRITE_ONCE(cpudata->cppc_req_cached, value); + + amd_pstate_update_perf(cpudata, min_perf, des_perf, + max_perf, fast_switch); +} + +static int amd_pstate_verify(struct cpufreq_policy_data *policy) +{ + cpufreq_verify_within_cpu_limits(policy); + + return 0; +} + +static int amd_pstate_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + struct cpufreq_freqs freqs; + struct amd_cpudata *cpudata = policy->driver_data; + unsigned long max_perf, min_perf, des_perf, cap_perf; + + if (!cpudata->max_freq) + return -ENODEV; + + cap_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); + max_perf = cap_perf; + + freqs.old = policy->cur; + freqs.new = target_freq; + + des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, + cpudata->max_freq); + + cpufreq_freq_transition_begin(policy, &freqs); + amd_pstate_update(cpudata, min_perf, des_perf, + max_perf, false); + cpufreq_freq_transition_end(policy, &freqs, false); + + return 0; +} + +static void amd_pstate_adjust_perf(unsigned int cpu, + unsigned long _min_perf, + unsigned long target_perf, + unsigned long capacity) +{ + unsigned long max_perf, min_perf, des_perf, + cap_perf, lowest_nonlinear_perf; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata = policy->driver_data; + + cap_perf = READ_ONCE(cpudata->highest_perf); + lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); + + des_perf = cap_perf; + if (target_perf < capacity) + des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); + + min_perf = READ_ONCE(cpudata->highest_perf); + if (_min_perf < capacity) + min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); + + if (min_perf < lowest_nonlinear_perf) + min_perf = lowest_nonlinear_perf; + + max_perf = cap_perf; + if (max_perf < min_perf) + max_perf = min_perf; + + des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + + amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); +} + +static int amd_get_min_freq(struct amd_cpudata *cpudata) +{ + struct cppc_perf_caps cppc_perf; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + /* Switch to khz */ + return cppc_perf.lowest_freq * 1000; +} + +static int amd_get_max_freq(struct amd_cpudata *cpudata) +{ + struct cppc_perf_caps cppc_perf; + u32 max_perf, max_freq, nominal_freq, nominal_perf; + u64 boost_ratio; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + nominal_freq = cppc_perf.nominal_freq; + nominal_perf = READ_ONCE(cpudata->nominal_perf); + max_perf = READ_ONCE(cpudata->highest_perf); + + boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, + nominal_perf); + + max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; + + /* Switch to khz */ + return max_freq * 1000; +} + +static int amd_get_nominal_freq(struct amd_cpudata *cpudata) +{ + struct cppc_perf_caps cppc_perf; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + /* Switch to khz */ + return cppc_perf.nominal_freq * 1000; +} + +static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) +{ + struct cppc_perf_caps cppc_perf; + u32 lowest_nonlinear_freq, lowest_nonlinear_perf, + nominal_freq, nominal_perf; + u64 lowest_nonlinear_ratio; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + nominal_freq = cppc_perf.nominal_freq; + nominal_perf = READ_ONCE(cpudata->nominal_perf); + + lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; + + lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, + nominal_perf); + + lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; + + /* Switch to khz */ + return lowest_nonlinear_freq * 1000; +} + +static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) +{ + struct amd_cpudata *cpudata = policy->driver_data; + int ret; + + if (!cpudata->boost_supported) { + pr_err("Boost mode is not supported by this processor or SBIOS\n"); + return -EINVAL; + } + + if (state) + policy->cpuinfo.max_freq = cpudata->max_freq; + else + policy->cpuinfo.max_freq = cpudata->nominal_freq; + + policy->max = policy->cpuinfo.max_freq; + + ret = freq_qos_update_request(&cpudata->req[1], + policy->cpuinfo.max_freq); + if (ret < 0) + return ret; + + return 0; +} + +static void amd_pstate_boost_init(struct amd_cpudata *cpudata) +{ + u32 highest_perf, nominal_perf; + + highest_perf = READ_ONCE(cpudata->highest_perf); + nominal_perf = READ_ONCE(cpudata->nominal_perf); + + if (highest_perf <= nominal_perf) + return; + + cpudata->boost_supported = true; + amd_pstate_driver.boost_enabled = true; +} + +static int amd_pstate_cpu_init(struct cpufreq_policy *policy) +{ + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + struct device *dev; + struct amd_cpudata *cpudata; + + dev = get_cpu_device(policy->cpu); + if (!dev) + return -ENODEV; + + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); + if (!cpudata) + return -ENOMEM; + + cpudata->cpu = policy->cpu; + + ret = amd_pstate_init_perf(cpudata); + if (ret) + goto free_cpudata1; + + min_freq = amd_get_min_freq(cpudata); + max_freq = amd_get_max_freq(cpudata); + nominal_freq = amd_get_nominal_freq(cpudata); + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); + + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { + dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", + min_freq, max_freq); + ret = -EINVAL; + goto free_cpudata1; + } + + policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; + policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; + + policy->min = min_freq; + policy->max = max_freq; + + policy->cpuinfo.min_freq = min_freq; + policy->cpuinfo.max_freq = max_freq; + + /* It will be updated by governor */ + policy->cur = policy->cpuinfo.min_freq; + + if (boot_cpu_has(X86_FEATURE_CPPC)) + policy->fast_switch_possible = true; + + ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], + FREQ_QOS_MIN, policy->cpuinfo.min_freq); + if (ret < 0) { + dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); + goto free_cpudata1; + } + + ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1], + FREQ_QOS_MAX, policy->cpuinfo.max_freq); + if (ret < 0) { + dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); + goto free_cpudata2; + } + + /* Initial processor data capability frequencies */ + cpudata->max_freq = max_freq; + cpudata->min_freq = min_freq; + cpudata->nominal_freq = nominal_freq; + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; + + policy->driver_data = cpudata; + + amd_pstate_boost_init(cpudata); + + return 0; + +free_cpudata2: + freq_qos_remove_request(&cpudata->req[0]); +free_cpudata1: + kfree(cpudata); + return ret; +} + +static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata; + + cpudata = policy->driver_data; + + freq_qos_remove_request(&cpudata->req[1]); + freq_qos_remove_request(&cpudata->req[0]); + kfree(cpudata); + + return 0; +} + +/* Sysfs attributes */ + +/* + * This frequency is to indicate the maximum hardware frequency. + * If boost is not active but supported, the frequency will be larger than the + * one in cpuinfo. + */ +static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, + char *buf) +{ + int max_freq; + struct amd_cpudata *cpudata; + + cpudata = policy->driver_data; + + max_freq = amd_get_max_freq(cpudata); + if (max_freq < 0) + return max_freq; + + return sprintf(&buf[0], "%u\n", max_freq); +} + +static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, + char *buf) +{ + int freq; + struct amd_cpudata *cpudata; + + cpudata = policy->driver_data; + + freq = amd_get_lowest_nonlinear_freq(cpudata); + if (freq < 0) + return freq; + + return sprintf(&buf[0], "%u\n", freq); +} + +/* + * In some of ASICs, the highest_perf is not the one in the _CPC table, so we + * need to expose it to sysfs. + */ +static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, + char *buf) +{ + u32 perf; + struct amd_cpudata *cpudata = policy->driver_data; + + perf = READ_ONCE(cpudata->highest_perf); + + return sprintf(&buf[0], "%u\n", perf); +} + +cpufreq_freq_attr_ro(amd_pstate_max_freq); +cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); + +cpufreq_freq_attr_ro(amd_pstate_highest_perf); + +static struct freq_attr *amd_pstate_attr[] = { + &amd_pstate_max_freq, + &amd_pstate_lowest_nonlinear_freq, + &amd_pstate_highest_perf, + NULL, +}; + +static struct cpufreq_driver amd_pstate_driver = { + .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, + .verify = amd_pstate_verify, + .target = amd_pstate_target, + .init = amd_pstate_cpu_init, + .exit = amd_pstate_cpu_exit, + .set_boost = amd_pstate_set_boost, + .name = "amd-pstate", + .attr = amd_pstate_attr, +}; + +static int __init amd_pstate_init(void) +{ + int ret; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return -ENODEV; + + if (!acpi_cpc_valid()) { + pr_debug("the _CPC object is not present in SBIOS\n"); + return -ENODEV; + } + + /* don't keep reloading if cpufreq_driver exists */ + if (cpufreq_get_current_driver()) + return -EEXIST; + + /* capability check */ + if (boot_cpu_has(X86_FEATURE_CPPC)) { + pr_debug("AMD CPPC MSR based functionality is supported\n"); + amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; + } else if (shared_mem) { + static_call_update(amd_pstate_enable, cppc_enable); + static_call_update(amd_pstate_init_perf, cppc_init_perf); + static_call_update(amd_pstate_update_perf, cppc_update_perf); + } else { + pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n"); + return -ENODEV; + } + + /* enable amd pstate feature */ + ret = amd_pstate_enable(true); + if (ret) { + pr_err("failed to enable amd-pstate with return %d\n", ret); + return ret; + } + + ret = cpufreq_register_driver(&amd_pstate_driver); + if (ret) + pr_err("failed to register amd_pstate_driver with return %d\n", + ret); + + return ret; +} + +static void __exit amd_pstate_exit(void) +{ + cpufreq_unregister_driver(&amd_pstate_driver); + + amd_pstate_enable(false); +} + +module_init(amd_pstate_init); +module_exit(amd_pstate_exit); + +MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); +MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 096c3848fa41..b8d95536ee22 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -924,7 +924,7 @@ cpufreq_freq_attr_rw(scaling_max_freq); cpufreq_freq_attr_rw(scaling_governor); cpufreq_freq_attr_rw(scaling_setspeed); -static struct attribute *default_attrs[] = { +static struct attribute *cpufreq_attrs[] = { &cpuinfo_min_freq.attr, &cpuinfo_max_freq.attr, &cpuinfo_transition_latency.attr, @@ -938,6 +938,7 @@ static struct attribute *default_attrs[] = { &scaling_setspeed.attr, NULL }; +ATTRIBUTE_GROUPS(cpufreq); #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) #define to_attr(a) container_of(a, struct freq_attr, attr) @@ -1000,7 +1001,7 @@ static const struct sysfs_ops sysfs_ops = { static struct kobj_type ktype_cpufreq = { .sysfs_ops = &sysfs_ops, - .default_attrs = default_attrs, + .default_groups = cpufreq_groups, .release = cpufreq_sysfs_release, }; @@ -1403,7 +1404,7 @@ static int cpufreq_online(unsigned int cpu) ret = freq_qos_add_request(&policy->constraints, policy->min_freq_req, FREQ_QOS_MIN, - policy->min); + FREQ_QOS_MIN_DEFAULT_VALUE); if (ret < 0) { /* * So we don't call freq_qos_remove_request() for an @@ -1423,7 +1424,7 @@ static int cpufreq_online(unsigned int cpu) ret = freq_qos_add_request(&policy->constraints, policy->max_freq_req, FREQ_QOS_MAX, - policy->max); + FREQ_QOS_MAX_DEFAULT_VALUE); if (ret < 0) { policy->max_freq_req = NULL; goto out_destroy_policy; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 0879ec3c170c..08515f7e515f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -257,7 +257,7 @@ gov_attr_rw(ignore_nice_load); gov_attr_rw(down_threshold); gov_attr_rw(freq_step); -static struct attribute *cs_attributes[] = { +static struct attribute *cs_attrs[] = { &sampling_rate.attr, &sampling_down_factor.attr, &up_threshold.attr, @@ -266,6 +266,7 @@ static struct attribute *cs_attributes[] = { &freq_step.attr, NULL }; +ATTRIBUTE_GROUPS(cs); /************************** sysfs end ************************/ @@ -315,7 +316,7 @@ static void cs_start(struct cpufreq_policy *policy) static struct dbs_governor cs_governor = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), - .kobj_type = { .default_attrs = cs_attributes }, + .kobj_type = { .default_groups = cs_groups }, .gov_dbs_update = cs_dbs_update, .alloc = cs_alloc, .free = cs_free, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 3b8f924771b4..6a41ea4729b8 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -328,7 +328,7 @@ gov_attr_rw(sampling_down_factor); gov_attr_rw(ignore_nice_load); gov_attr_rw(powersave_bias); -static struct attribute *od_attributes[] = { +static struct attribute *od_attrs[] = { &sampling_rate.attr, &up_threshold.attr, &sampling_down_factor.attr, @@ -337,6 +337,7 @@ static struct attribute *od_attributes[] = { &io_is_busy.attr, NULL }; +ATTRIBUTE_GROUPS(od); /************************** sysfs end ************************/ @@ -401,7 +402,7 @@ static struct od_ops od_ops = { static struct dbs_governor od_dbs_gov = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"), - .kobj_type = { .default_attrs = od_attributes }, + .kobj_type = { .default_groups = od_groups }, .gov_dbs_update = od_dbs_update, .alloc = od_alloc, .free = od_free, diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index dec2a5649ac1..bc7f7e6759bd 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -664,19 +664,29 @@ static int intel_pstate_set_epb(int cpu, s16 pref) * 3 balance_power * 4 power */ + +enum energy_perf_value_index { + EPP_INDEX_DEFAULT = 0, + EPP_INDEX_PERFORMANCE, + EPP_INDEX_BALANCE_PERFORMANCE, + EPP_INDEX_BALANCE_POWERSAVE, + EPP_INDEX_POWERSAVE, +}; + static const char * const energy_perf_strings[] = { - "default", - "performance", - "balance_performance", - "balance_power", - "power", + [EPP_INDEX_DEFAULT] = "default", + [EPP_INDEX_PERFORMANCE] = "performance", + [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance", + [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power", + [EPP_INDEX_POWERSAVE] = "power", NULL }; -static const unsigned int epp_values[] = { - HWP_EPP_PERFORMANCE, - HWP_EPP_BALANCE_PERFORMANCE, - HWP_EPP_BALANCE_POWERSAVE, - HWP_EPP_POWERSAVE +static unsigned int epp_values[] = { + [EPP_INDEX_DEFAULT] = 0, /* Unused index */ + [EPP_INDEX_PERFORMANCE] = HWP_EPP_PERFORMANCE, + [EPP_INDEX_BALANCE_PERFORMANCE] = HWP_EPP_BALANCE_PERFORMANCE, + [EPP_INDEX_BALANCE_POWERSAVE] = HWP_EPP_BALANCE_POWERSAVE, + [EPP_INDEX_POWERSAVE] = HWP_EPP_POWERSAVE, }; static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw_epp) @@ -690,14 +700,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw return epp; if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { - if (epp == HWP_EPP_PERFORMANCE) - return 1; - if (epp == HWP_EPP_BALANCE_PERFORMANCE) - return 2; - if (epp == HWP_EPP_BALANCE_POWERSAVE) - return 3; - if (epp == HWP_EPP_POWERSAVE) - return 4; + if (epp == epp_values[EPP_INDEX_PERFORMANCE]) + return EPP_INDEX_PERFORMANCE; + if (epp == epp_values[EPP_INDEX_BALANCE_PERFORMANCE]) + return EPP_INDEX_BALANCE_PERFORMANCE; + if (epp == epp_values[EPP_INDEX_BALANCE_POWERSAVE]) + return EPP_INDEX_BALANCE_POWERSAVE; + if (epp == epp_values[EPP_INDEX_POWERSAVE]) + return EPP_INDEX_POWERSAVE; *raw_epp = epp; return 0; } else if (boot_cpu_has(X86_FEATURE_EPB)) { @@ -757,7 +767,7 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, if (use_raw) epp = raw_epp; else if (epp == -EINVAL) - epp = epp_values[pref_index - 1]; + epp = epp_values[pref_index]; /* * To avoid confusion, refuse to set EPP to any values different @@ -843,7 +853,7 @@ static ssize_t store_energy_performance_preference( * upfront. */ if (!raw) - epp = ret ? epp_values[ret - 1] : cpu->epp_default; + epp = ret ? epp_values[ret] : cpu->epp_default; if (cpu->epp_cached != epp) { int err; @@ -1124,19 +1134,22 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } +static void __intel_pstate_update_max_freq(struct cpudata *cpudata, + struct cpufreq_policy *policy) +{ + policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; + refresh_frequency_limits(policy); +} + static void intel_pstate_update_max_freq(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - struct cpudata *cpudata; if (!policy) return; - cpudata = all_cpu_data[cpu]; - policy->cpuinfo.max_freq = global.turbo_disabled_mf ? - cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; - - refresh_frequency_limits(policy); + __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); cpufreq_cpu_release(policy); } @@ -1584,8 +1597,15 @@ static void intel_pstate_notify_work(struct work_struct *work) { struct cpudata *cpudata = container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); + + if (policy) { + intel_pstate_get_hwp_cap(cpudata); + __intel_pstate_update_max_freq(cpudata, policy); + + cpufreq_cpu_release(policy); + } - cpufreq_update_policy(cpudata->cpu); wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } @@ -1679,10 +1699,18 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); - if (cpudata->epp_default == -EINVAL) - cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); intel_pstate_enable_hwp_interrupt(cpudata); + + if (cpudata->epp_default >= 0) + return; + + if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) { + cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); + } else { + cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE]; + intel_pstate_set_epp(cpudata, cpudata->epp_default); + } } static int atom_get_min_pstate(void) @@ -2486,18 +2514,14 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, * HWP needs some special consideration, because HWP_REQUEST uses * abstract values to represent performance rather than pure ratios. */ - if (hwp_active) { - intel_pstate_get_hwp_cap(cpu); + if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) { + int scaling = cpu->pstate.scaling; + int freq; - if (cpu->pstate.scaling != perf_ctl_scaling) { - int scaling = cpu->pstate.scaling; - int freq; - - freq = max_policy_perf * perf_ctl_scaling; - max_policy_perf = DIV_ROUND_UP(freq, scaling); - freq = min_policy_perf * perf_ctl_scaling; - min_policy_perf = DIV_ROUND_UP(freq, scaling); - } + freq = max_policy_perf * perf_ctl_scaling; + max_policy_perf = DIV_ROUND_UP(freq, scaling); + freq = min_policy_perf * perf_ctl_scaling; + min_policy_perf = DIV_ROUND_UP(freq, scaling); } pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", @@ -3349,6 +3373,16 @@ static bool intel_pstate_hwp_is_enabled(void) return !!(value & 0x1); } +static const struct x86_cpu_id intel_epp_balance_perf[] = { + /* + * Set EPP value as 102, this is the max suggested EPP + * which can result in one core turbo frequency for + * AlderLake Mobile CPUs. + */ + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102), + {} +}; + static int __init intel_pstate_init(void) { static struct cpudata **_all_cpu_data; @@ -3438,6 +3472,13 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); + if (hwp_active) { + const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf); + + if (id) + epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data; + } + mutex_lock(&intel_pstate_driver_lock); rc = intel_pstate_register_driver(default_driver); mutex_unlock(&intel_pstate_driver_lock); diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 8ddbd0c5ce37..0a94c56ddad2 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -36,6 +36,8 @@ enum { struct mtk_cpufreq_data { struct cpufreq_frequency_table *table; void __iomem *reg_bases[REG_ARRAY_SIZE]; + struct resource *res; + void __iomem *base; int nr_opp; }; @@ -156,6 +158,7 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, { struct mtk_cpufreq_data *data; struct device *dev = &pdev->dev; + struct resource *res; void __iomem *base; int ret, i; int index; @@ -170,9 +173,26 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, if (index < 0) return index; - base = devm_platform_ioremap_resource(pdev, index); - if (IS_ERR(base)) - return PTR_ERR(base); + res = platform_get_resource(pdev, IORESOURCE_MEM, index); + if (!res) { + dev_err(dev, "failed to get mem resource %d\n", index); + return -ENODEV; + } + + if (!request_mem_region(res->start, resource_size(res), res->name)) { + dev_err(dev, "failed to request resource %pR\n", res); + return -EBUSY; + } + + base = ioremap(res->start, resource_size(res)); + if (!base) { + dev_err(dev, "failed to map resource %pR\n", res); + ret = -ENOMEM; + goto release_region; + } + + data->base = base; + data->res = res; for (i = REG_FREQ_LUT_TABLE; i < REG_ARRAY_SIZE; i++) data->reg_bases[i] = base + offsets[i]; @@ -187,6 +207,9 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, policy->driver_data = data; return 0; +release_region: + release_mem_region(res->start, resource_size(res)); + return ret; } static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) @@ -233,9 +256,13 @@ static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) static int mtk_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) { struct mtk_cpufreq_data *data = policy->driver_data; + struct resource *res = data->res; + void __iomem *base = data->base; /* HW should be in paused state now */ writel_relaxed(0x0, data->reg_bases[REG_FREQ_ENABLE]); + iounmap(base); + release_mem_region(res->start, resource_size(res)); return 0; } diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index a2be0df7e174..05f3d7876e44 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -46,6 +46,7 @@ struct qcom_cpufreq_data { */ struct mutex throttle_lock; int throttle_irq; + char irq_name[15]; bool cancel_throttle; struct delayed_work throttle_work; struct cpufreq_policy *policy; @@ -275,10 +276,10 @@ static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) { - unsigned long max_capacity, capacity, freq_hz, throttled_freq; struct cpufreq_policy *policy = data->policy; int cpu = cpumask_first(policy->cpus); struct device *dev = get_cpu_device(cpu); + unsigned long freq_hz, throttled_freq; struct dev_pm_opp *opp; unsigned int freq; @@ -295,16 +296,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) throttled_freq = freq_hz / HZ_PER_KHZ; - /* Update thermal pressure */ - - max_capacity = arch_scale_cpu_capacity(cpu); - capacity = mult_frac(max_capacity, throttled_freq, policy->cpuinfo.max_freq); - - /* Don't pass boost capacity to scheduler */ - if (capacity > max_capacity) - capacity = max_capacity; - - arch_set_thermal_pressure(policy->cpus, max_capacity - capacity); + /* Update thermal pressure (the boost frequencies are accepted) */ + arch_update_thermal_pressure(policy->related_cpus, throttled_freq); /* * In the unlikely case policy is unregistered do not enable @@ -342,9 +335,9 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data) /* Disable interrupt and enable polling */ disable_irq_nosync(c_data->throttle_irq); - qcom_lmh_dcvs_notify(c_data); + schedule_delayed_work(&c_data->throttle_work, 0); - return 0; + return IRQ_HANDLED; } static const struct qcom_cpufreq_soc_data qcom_soc_data = { @@ -375,16 +368,17 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) { struct qcom_cpufreq_data *data = policy->driver_data; struct platform_device *pdev = cpufreq_get_driver_data(); - char irq_name[15]; int ret; /* * Look for LMh interrupt. If no interrupt line is specified / * if there is an error, allow cpufreq to be enabled as usual. */ - data->throttle_irq = platform_get_irq(pdev, index); - if (data->throttle_irq <= 0) - return data->throttle_irq == -EPROBE_DEFER ? -EPROBE_DEFER : 0; + data->throttle_irq = platform_get_irq_optional(pdev, index); + if (data->throttle_irq == -ENXIO) + return 0; + if (data->throttle_irq < 0) + return data->throttle_irq; data->cancel_throttle = false; data->policy = policy; @@ -392,14 +386,19 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) mutex_init(&data->throttle_lock); INIT_DEFERRABLE_WORK(&data->throttle_work, qcom_lmh_dcvs_poll); - snprintf(irq_name, sizeof(irq_name), "dcvsh-irq-%u", policy->cpu); + snprintf(data->irq_name, sizeof(data->irq_name), "dcvsh-irq-%u", policy->cpu); ret = request_threaded_irq(data->throttle_irq, NULL, qcom_lmh_dcvs_handle_irq, - IRQF_ONESHOT, irq_name, data); + IRQF_ONESHOT, data->irq_name, data); if (ret) { - dev_err(&pdev->dev, "Error registering %s: %d\n", irq_name, ret); + dev_err(&pdev->dev, "Error registering %s: %d\n", data->irq_name, ret); return 0; } + ret = irq_set_affinity_hint(data->throttle_irq, policy->cpus); + if (ret) + dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n", + data->irq_name, data->throttle_irq); + return 0; } diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 2e5670446991..c4922684f305 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -34,7 +34,7 @@ * 1) Energy break even point * 2) Performance impact * 3) Latency tolerance (from pmqos infrastructure) - * These these three factors are treated independently. + * These three factors are treated independently. * * Energy break even point * ----------------------- diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 469e18547d06..2b496a53cbca 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -335,6 +335,7 @@ static struct attribute *cpuidle_state_default_attrs[] = { &attr_default_status.attr, NULL }; +ATTRIBUTE_GROUPS(cpuidle_state_default); struct cpuidle_state_kobj { struct cpuidle_state *state; @@ -448,7 +449,7 @@ static void cpuidle_state_sysfs_release(struct kobject *kobj) static struct kobj_type ktype_state_cpuidle = { .sysfs_ops = &cpuidle_state_sysfs_ops, - .default_attrs = cpuidle_state_default_attrs, + .default_groups = cpuidle_state_default_groups, .release = cpuidle_state_sysfs_release, }; @@ -505,7 +506,7 @@ error_state: } /** - * cpuidle_remove_driver_sysfs - removes the cpuidle states sysfs attributes + * cpuidle_remove_state_sysfs - removes the cpuidle states sysfs attributes * @device: the target device */ static void cpuidle_remove_state_sysfs(struct cpuidle_device *device) @@ -591,10 +592,11 @@ static struct attribute *cpuidle_driver_default_attrs[] = { &attr_driver_name.attr, NULL }; +ATTRIBUTE_GROUPS(cpuidle_driver_default); static struct kobj_type ktype_driver_cpuidle = { .sysfs_ops = &cpuidle_driver_sysfs_ops, - .default_attrs = cpuidle_driver_default_attrs, + .default_groups = cpuidle_driver_default_groups, .release = cpuidle_driver_sysfs_release, }; diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index e87d01c0b76a..87eb2b837e68 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -132,6 +132,15 @@ config ARM_RK3399_DMC_DEVFREQ It sets the frequency for the memory controller and reads the usage counts from hardware. +config ARM_SUN8I_A33_MBUS_DEVFREQ + tristate "sun8i/sun50i MBUS DEVFREQ Driver" + depends on ARCH_SUNXI || COMPILE_TEST + depends on COMMON_CLK + select DEVFREQ_GOV_SIMPLE_ONDEMAND + help + This adds the DEVFREQ driver for the MBUS controller in some + Allwinner sun8i (A33 through H3) and sun50i (A64 and H5) SoCs. + source "drivers/devfreq/event/Kconfig" endif # PM_DEVFREQ diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index a16333ea7034..0b6be92a25d9 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o obj-$(CONFIG_ARM_IMX_BUS_DEVFREQ) += imx-bus.o obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ) += imx8m-ddrc.o obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o +obj-$(CONFIG_ARM_SUN8I_A33_MBUS_DEVFREQ) += sun8i-a33-mbus.o obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra30-devfreq.o # DEVFREQ Event Drivers diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 7906220d025c..a525a609dfc6 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -382,8 +382,8 @@ static int devfreq_set_target(struct devfreq *devfreq, unsigned long new_freq, devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE); if (devfreq_update_status(devfreq, new_freq)) - dev_err(&devfreq->dev, - "Couldn't update frequency transition information.\n"); + dev_warn(&devfreq->dev, + "Couldn't update frequency transition information.\n"); devfreq->previous_freq = new_freq; diff --git a/drivers/devfreq/sun8i-a33-mbus.c b/drivers/devfreq/sun8i-a33-mbus.c new file mode 100644 index 000000000000..13d32213139f --- /dev/null +++ b/drivers/devfreq/sun8i-a33-mbus.c @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Copyright (C) 2020-2021 Samuel Holland <samuel@sholland.org> +// + +#include <linux/clk.h> +#include <linux/devfreq.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/property.h> + +#define MBUS_CR 0x0000 +#define MBUS_CR_GET_DRAM_TYPE(x) (((x) >> 16) & 0x7) +#define MBUS_CR_DRAM_TYPE_DDR2 2 +#define MBUS_CR_DRAM_TYPE_DDR3 3 +#define MBUS_CR_DRAM_TYPE_DDR4 4 +#define MBUS_CR_DRAM_TYPE_LPDDR2 6 +#define MBUS_CR_DRAM_TYPE_LPDDR3 7 + +#define MBUS_TMR 0x000c +#define MBUS_TMR_PERIOD(x) ((x) - 1) + +#define MBUS_PMU_CFG 0x009c +#define MBUS_PMU_CFG_PERIOD(x) (((x) - 1) << 16) +#define MBUS_PMU_CFG_UNIT (0x3 << 1) +#define MBUS_PMU_CFG_UNIT_B (0x0 << 1) +#define MBUS_PMU_CFG_UNIT_KB (0x1 << 1) +#define MBUS_PMU_CFG_UNIT_MB (0x2 << 1) +#define MBUS_PMU_CFG_ENABLE (0x1 << 0) + +#define MBUS_PMU_BWCR(n) (0x00a0 + (0x04 * (n))) + +#define MBUS_TOTAL_BWCR MBUS_PMU_BWCR(5) +#define MBUS_TOTAL_BWCR_H616 MBUS_PMU_BWCR(13) + +#define MBUS_MDFSCR 0x0100 +#define MBUS_MDFSCR_BUFFER_TIMING (0x1 << 15) +#define MBUS_MDFSCR_PAD_HOLD (0x1 << 13) +#define MBUS_MDFSCR_BYPASS (0x1 << 4) +#define MBUS_MDFSCR_MODE (0x1 << 1) +#define MBUS_MDFSCR_MODE_DFS (0x0 << 1) +#define MBUS_MDFSCR_MODE_CFS (0x1 << 1) +#define MBUS_MDFSCR_START (0x1 << 0) + +#define MBUS_MDFSMRMR 0x0108 + +#define DRAM_PWRCTL 0x0004 +#define DRAM_PWRCTL_SELFREF_EN (0x1 << 0) + +#define DRAM_RFSHTMG 0x0090 +#define DRAM_RFSHTMG_TREFI(x) ((x) << 16) +#define DRAM_RFSHTMG_TRFC(x) ((x) << 0) + +#define DRAM_VTFCR 0x00b8 +#define DRAM_VTFCR_VTF_ENABLE (0x3 << 8) + +#define DRAM_ODTMAP 0x0120 + +#define DRAM_DX_MAX 4 + +#define DRAM_DXnGCR0(n) (0x0344 + 0x80 * (n)) +#define DRAM_DXnGCR0_DXODT (0x3 << 4) +#define DRAM_DXnGCR0_DXODT_DYNAMIC (0x0 << 4) +#define DRAM_DXnGCR0_DXODT_ENABLED (0x1 << 4) +#define DRAM_DXnGCR0_DXODT_DISABLED (0x2 << 4) +#define DRAM_DXnGCR0_DXEN (0x1 << 0) + +struct sun8i_a33_mbus_variant { + u32 min_dram_divider; + u32 max_dram_divider; + u32 odt_freq_mhz; +}; + +struct sun8i_a33_mbus { + const struct sun8i_a33_mbus_variant *variant; + void __iomem *reg_dram; + void __iomem *reg_mbus; + struct clk *clk_bus; + struct clk *clk_dram; + struct clk *clk_mbus; + struct devfreq *devfreq_dram; + struct devfreq_simple_ondemand_data gov_data; + struct devfreq_dev_profile profile; + u32 data_width; + u32 nominal_bw; + u32 odtmap; + u32 tREFI_ns; + u32 tRFC_ns; + unsigned long freq_table[]; +}; + +/* + * The unit for this value is (MBUS clock cycles / MBUS_TMR_PERIOD). When + * MBUS_TMR_PERIOD is programmed to match the MBUS clock frequency in MHz, as + * it is during DRAM init and during probe, the resulting unit is microseconds. + */ +static int pmu_period = 50000; +module_param(pmu_period, int, 0644); +MODULE_PARM_DESC(pmu_period, "Bandwidth measurement period (microseconds)"); + +static u32 sun8i_a33_mbus_get_peak_bw(struct sun8i_a33_mbus *priv) +{ + /* Returns the peak transfer (in KiB) during any single PMU period. */ + return readl_relaxed(priv->reg_mbus + MBUS_TOTAL_BWCR); +} + +static void sun8i_a33_mbus_restart_pmu_counters(struct sun8i_a33_mbus *priv) +{ + u32 pmu_cfg = MBUS_PMU_CFG_PERIOD(pmu_period) | MBUS_PMU_CFG_UNIT_KB; + + /* All PMU counters are cleared on a disable->enable transition. */ + writel_relaxed(pmu_cfg, + priv->reg_mbus + MBUS_PMU_CFG); + writel_relaxed(pmu_cfg | MBUS_PMU_CFG_ENABLE, + priv->reg_mbus + MBUS_PMU_CFG); + +} + +static void sun8i_a33_mbus_update_nominal_bw(struct sun8i_a33_mbus *priv, + u32 ddr_freq_mhz) +{ + /* + * Nominal bandwidth (KiB per PMU period): + * + * DDR transfers microseconds KiB + * ------------- * ------------ * -------- + * microsecond PMU period transfer + */ + priv->nominal_bw = ddr_freq_mhz * pmu_period * priv->data_width / 1024; +} + +static int sun8i_a33_mbus_set_dram_freq(struct sun8i_a33_mbus *priv, + unsigned long freq) +{ + u32 ddr_freq_mhz = freq / USEC_PER_SEC; /* DDR */ + u32 dram_freq_mhz = ddr_freq_mhz / 2; /* SDR */ + u32 mctl_freq_mhz = dram_freq_mhz / 2; /* HDR */ + u32 dxodt, mdfscr, pwrctl, vtfcr; + u32 i, tREFI_32ck, tRFC_ck; + int ret; + + /* The rate change is not effective until the MDFS process runs. */ + ret = clk_set_rate(priv->clk_dram, freq); + if (ret) + return ret; + + /* Disable automatic self-refesh and VTF before starting MDFS. */ + pwrctl = readl_relaxed(priv->reg_dram + DRAM_PWRCTL) & + ~DRAM_PWRCTL_SELFREF_EN; + writel_relaxed(pwrctl, priv->reg_dram + DRAM_PWRCTL); + vtfcr = readl_relaxed(priv->reg_dram + DRAM_VTFCR); + writel_relaxed(vtfcr & ~DRAM_VTFCR_VTF_ENABLE, + priv->reg_dram + DRAM_VTFCR); + + /* Set up MDFS and enable double buffering for timing registers. */ + mdfscr = MBUS_MDFSCR_MODE_DFS | + MBUS_MDFSCR_BYPASS | + MBUS_MDFSCR_PAD_HOLD | + MBUS_MDFSCR_BUFFER_TIMING; + writel(mdfscr, priv->reg_mbus + MBUS_MDFSCR); + + /* Update the buffered copy of RFSHTMG. */ + tREFI_32ck = priv->tREFI_ns * mctl_freq_mhz / 1000 / 32; + tRFC_ck = DIV_ROUND_UP(priv->tRFC_ns * mctl_freq_mhz, 1000); + writel(DRAM_RFSHTMG_TREFI(tREFI_32ck) | DRAM_RFSHTMG_TRFC(tRFC_ck), + priv->reg_dram + DRAM_RFSHTMG); + + /* Enable ODT if needed, or disable it to save power. */ + if (priv->odtmap && dram_freq_mhz > priv->variant->odt_freq_mhz) { + dxodt = DRAM_DXnGCR0_DXODT_DYNAMIC; + writel(priv->odtmap, priv->reg_dram + DRAM_ODTMAP); + } else { + dxodt = DRAM_DXnGCR0_DXODT_DISABLED; + writel(0, priv->reg_dram + DRAM_ODTMAP); + } + for (i = 0; i < DRAM_DX_MAX; ++i) { + void __iomem *reg = priv->reg_dram + DRAM_DXnGCR0(i); + + writel((readl(reg) & ~DRAM_DXnGCR0_DXODT) | dxodt, reg); + } + + dev_dbg(priv->devfreq_dram->dev.parent, + "Setting DRAM to %u MHz, tREFI=%u, tRFC=%u, ODT=%s\n", + dram_freq_mhz, tREFI_32ck, tRFC_ck, + dxodt == DRAM_DXnGCR0_DXODT_DYNAMIC ? "dynamic" : "disabled"); + + /* Trigger hardware MDFS. */ + writel(mdfscr | MBUS_MDFSCR_START, priv->reg_mbus + MBUS_MDFSCR); + ret = readl_poll_timeout_atomic(priv->reg_mbus + MBUS_MDFSCR, mdfscr, + !(mdfscr & MBUS_MDFSCR_START), 10, 1000); + if (ret) + return ret; + + /* Disable double buffering. */ + writel(0, priv->reg_mbus + MBUS_MDFSCR); + + /* Restore VTF configuration. */ + writel_relaxed(vtfcr, priv->reg_dram + DRAM_VTFCR); + + /* Enable automatic self-refresh at the lowest frequency only. */ + if (freq == priv->freq_table[0]) + pwrctl |= DRAM_PWRCTL_SELFREF_EN; + writel_relaxed(pwrctl, priv->reg_dram + DRAM_PWRCTL); + + sun8i_a33_mbus_restart_pmu_counters(priv); + sun8i_a33_mbus_update_nominal_bw(priv, ddr_freq_mhz); + + return 0; +} + +static int sun8i_a33_mbus_set_dram_target(struct device *dev, + unsigned long *freq, u32 flags) +{ + struct sun8i_a33_mbus *priv = dev_get_drvdata(dev); + struct devfreq *devfreq = priv->devfreq_dram; + struct dev_pm_opp *opp; + int ret; + + opp = devfreq_recommended_opp(dev, freq, flags); + if (IS_ERR(opp)) + return PTR_ERR(opp); + + dev_pm_opp_put(opp); + + if (*freq == devfreq->previous_freq) + return 0; + + ret = sun8i_a33_mbus_set_dram_freq(priv, *freq); + if (ret) { + dev_warn(dev, "failed to set DRAM frequency: %d\n", ret); + *freq = devfreq->previous_freq; + } + + return ret; +} + +static int sun8i_a33_mbus_get_dram_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct sun8i_a33_mbus *priv = dev_get_drvdata(dev); + + stat->busy_time = sun8i_a33_mbus_get_peak_bw(priv); + stat->total_time = priv->nominal_bw; + stat->current_frequency = priv->devfreq_dram->previous_freq; + + sun8i_a33_mbus_restart_pmu_counters(priv); + + dev_dbg(dev, "Using %lu/%lu (%lu%%) at %lu MHz\n", + stat->busy_time, stat->total_time, + DIV_ROUND_CLOSEST(stat->busy_time * 100, stat->total_time), + stat->current_frequency / USEC_PER_SEC); + + return 0; +} + +static int sun8i_a33_mbus_hw_init(struct device *dev, + struct sun8i_a33_mbus *priv, + unsigned long ddr_freq) +{ + u32 i, mbus_cr, mbus_freq_mhz; + + /* Choose tREFI and tRFC to match the configured DRAM type. */ + mbus_cr = readl_relaxed(priv->reg_mbus + MBUS_CR); + switch (MBUS_CR_GET_DRAM_TYPE(mbus_cr)) { + case MBUS_CR_DRAM_TYPE_DDR2: + case MBUS_CR_DRAM_TYPE_DDR3: + case MBUS_CR_DRAM_TYPE_DDR4: + priv->tREFI_ns = 7800; + priv->tRFC_ns = 350; + break; + case MBUS_CR_DRAM_TYPE_LPDDR2: + case MBUS_CR_DRAM_TYPE_LPDDR3: + priv->tREFI_ns = 3900; + priv->tRFC_ns = 210; + break; + default: + return -EINVAL; + } + + /* Save ODTMAP so it can be restored when raising the frequency. */ + priv->odtmap = readl_relaxed(priv->reg_dram + DRAM_ODTMAP); + + /* Compute the DRAM data bus width by counting enabled DATx8 blocks. */ + for (i = 0; i < DRAM_DX_MAX; ++i) { + void __iomem *reg = priv->reg_dram + DRAM_DXnGCR0(i); + + if (!(readl_relaxed(reg) & DRAM_DXnGCR0_DXEN)) + break; + } + priv->data_width = i; + + dev_dbg(dev, "Detected %u-bit %sDDRx with%s ODT\n", + priv->data_width * 8, + MBUS_CR_GET_DRAM_TYPE(mbus_cr) > 4 ? "LP" : "", + priv->odtmap ? "" : "out"); + + /* Program MBUS_TMR such that the PMU period unit is microseconds. */ + mbus_freq_mhz = clk_get_rate(priv->clk_mbus) / USEC_PER_SEC; + writel_relaxed(MBUS_TMR_PERIOD(mbus_freq_mhz), + priv->reg_mbus + MBUS_TMR); + + /* "Master Ready Mask Register" bits must be set or MDFS will block. */ + writel_relaxed(0xffffffff, priv->reg_mbus + MBUS_MDFSMRMR); + + sun8i_a33_mbus_restart_pmu_counters(priv); + sun8i_a33_mbus_update_nominal_bw(priv, ddr_freq / USEC_PER_SEC); + + return 0; +} + +static int __maybe_unused sun8i_a33_mbus_suspend(struct device *dev) +{ + struct sun8i_a33_mbus *priv = dev_get_drvdata(dev); + + clk_disable_unprepare(priv->clk_bus); + + return 0; +} + +static int __maybe_unused sun8i_a33_mbus_resume(struct device *dev) +{ + struct sun8i_a33_mbus *priv = dev_get_drvdata(dev); + + return clk_prepare_enable(priv->clk_bus); +} + +static int sun8i_a33_mbus_probe(struct platform_device *pdev) +{ + const struct sun8i_a33_mbus_variant *variant; + struct device *dev = &pdev->dev; + struct sun8i_a33_mbus *priv; + unsigned long base_freq; + unsigned int max_state; + const char *err; + int i, ret; + + variant = device_get_match_data(dev); + if (!variant) + return -EINVAL; + + max_state = variant->max_dram_divider - variant->min_dram_divider + 1; + + priv = devm_kzalloc(dev, struct_size(priv, freq_table, max_state), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + platform_set_drvdata(pdev, priv); + + priv->variant = variant; + + priv->reg_dram = devm_platform_ioremap_resource_byname(pdev, "dram"); + if (IS_ERR(priv->reg_dram)) + return PTR_ERR(priv->reg_dram); + + priv->reg_mbus = devm_platform_ioremap_resource_byname(pdev, "mbus"); + if (IS_ERR(priv->reg_mbus)) + return PTR_ERR(priv->reg_mbus); + + priv->clk_bus = devm_clk_get(dev, "bus"); + if (IS_ERR(priv->clk_bus)) + return dev_err_probe(dev, PTR_ERR(priv->clk_bus), + "failed to get bus clock\n"); + + priv->clk_dram = devm_clk_get(dev, "dram"); + if (IS_ERR(priv->clk_dram)) + return dev_err_probe(dev, PTR_ERR(priv->clk_dram), + "failed to get dram clock\n"); + + priv->clk_mbus = devm_clk_get(dev, "mbus"); + if (IS_ERR(priv->clk_mbus)) + return dev_err_probe(dev, PTR_ERR(priv->clk_mbus), + "failed to get mbus clock\n"); + + ret = clk_prepare_enable(priv->clk_bus); + if (ret) + return dev_err_probe(dev, ret, + "failed to enable bus clock\n"); + + /* Lock the DRAM clock rate to keep priv->nominal_bw in sync. */ + ret = clk_rate_exclusive_get(priv->clk_dram); + if (ret) { + err = "failed to lock dram clock rate\n"; + goto err_disable_bus; + } + + /* Lock the MBUS clock rate to keep MBUS_TMR_PERIOD in sync. */ + ret = clk_rate_exclusive_get(priv->clk_mbus); + if (ret) { + err = "failed to lock mbus clock rate\n"; + goto err_unlock_dram; + } + + priv->gov_data.upthreshold = 10; + priv->gov_data.downdifferential = 5; + + priv->profile.initial_freq = clk_get_rate(priv->clk_dram); + priv->profile.polling_ms = 1000; + priv->profile.target = sun8i_a33_mbus_set_dram_target; + priv->profile.get_dev_status = sun8i_a33_mbus_get_dram_status; + priv->profile.freq_table = priv->freq_table; + priv->profile.max_state = max_state; + + ret = devm_pm_opp_set_clkname(dev, "dram"); + if (ret) { + err = "failed to add OPP table\n"; + goto err_unlock_mbus; + } + + base_freq = clk_get_rate(clk_get_parent(priv->clk_dram)); + for (i = 0; i < max_state; ++i) { + unsigned int div = variant->max_dram_divider - i; + + priv->freq_table[i] = base_freq / div; + + ret = dev_pm_opp_add(dev, priv->freq_table[i], 0); + if (ret) { + err = "failed to add OPPs\n"; + goto err_remove_opps; + } + } + + ret = sun8i_a33_mbus_hw_init(dev, priv, priv->profile.initial_freq); + if (ret) { + err = "failed to init hardware\n"; + goto err_remove_opps; + } + + priv->devfreq_dram = devfreq_add_device(dev, &priv->profile, + DEVFREQ_GOV_SIMPLE_ONDEMAND, + &priv->gov_data); + if (IS_ERR(priv->devfreq_dram)) { + ret = PTR_ERR(priv->devfreq_dram); + err = "failed to add devfreq device\n"; + goto err_remove_opps; + } + + /* + * This must be set manually after registering the devfreq device, + * because there is no way to select a dynamic OPP as the suspend OPP. + */ + priv->devfreq_dram->suspend_freq = priv->freq_table[0]; + + return 0; + +err_remove_opps: + dev_pm_opp_remove_all_dynamic(dev); +err_unlock_mbus: + clk_rate_exclusive_put(priv->clk_mbus); +err_unlock_dram: + clk_rate_exclusive_put(priv->clk_dram); +err_disable_bus: + clk_disable_unprepare(priv->clk_bus); + + return dev_err_probe(dev, ret, err); +} + +static int sun8i_a33_mbus_remove(struct platform_device *pdev) +{ + struct sun8i_a33_mbus *priv = platform_get_drvdata(pdev); + unsigned long initial_freq = priv->profile.initial_freq; + struct device *dev = &pdev->dev; + int ret; + + devfreq_remove_device(priv->devfreq_dram); + + ret = sun8i_a33_mbus_set_dram_freq(priv, initial_freq); + if (ret) + dev_warn(dev, "failed to restore DRAM frequency: %d\n", ret); + + dev_pm_opp_remove_all_dynamic(dev); + clk_rate_exclusive_put(priv->clk_mbus); + clk_rate_exclusive_put(priv->clk_dram); + clk_disable_unprepare(priv->clk_bus); + + return 0; +} + +static const struct sun8i_a33_mbus_variant sun50i_a64_mbus = { + .min_dram_divider = 1, + .max_dram_divider = 4, + .odt_freq_mhz = 400, +}; + +static const struct of_device_id sun8i_a33_mbus_of_match[] = { + { .compatible = "allwinner,sun50i-a64-mbus", .data = &sun50i_a64_mbus }, + { .compatible = "allwinner,sun50i-h5-mbus", .data = &sun50i_a64_mbus }, + { }, +}; +MODULE_DEVICE_TABLE(of, sun8i_a33_mbus_of_match); + +static SIMPLE_DEV_PM_OPS(sun8i_a33_mbus_pm_ops, + sun8i_a33_mbus_suspend, sun8i_a33_mbus_resume); + +static struct platform_driver sun8i_a33_mbus_driver = { + .probe = sun8i_a33_mbus_probe, + .remove = sun8i_a33_mbus_remove, + .driver = { + .name = "sun8i-a33-mbus", + .of_match_table = sun8i_a33_mbus_of_match, + .pm = pm_ptr(&sun8i_a33_mbus_pm_ops), + }, +}; +module_platform_driver(sun8i_a33_mbus_driver); + +MODULE_AUTHOR("Samuel Holland <samuel@sholland.org>"); +MODULE_DESCRIPTION("Allwinner sun8i/sun50i MBUS DEVFREQ Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 80a2c270d502..bb612fce7ead 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -1103,17 +1103,17 @@ static int jz4740_mmc_remove(struct platform_device *pdev) return 0; } -static int __maybe_unused jz4740_mmc_suspend(struct device *dev) +static int jz4740_mmc_suspend(struct device *dev) { return pinctrl_pm_select_sleep_state(dev); } -static int __maybe_unused jz4740_mmc_resume(struct device *dev) +static int jz4740_mmc_resume(struct device *dev) { return pinctrl_select_default_state(dev); } -static SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend, +DEFINE_SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend, jz4740_mmc_resume); static struct platform_driver jz4740_mmc_driver = { @@ -1123,7 +1123,7 @@ static struct platform_driver jz4740_mmc_driver = { .name = "jz4740-mmc", .probe_type = PROBE_PREFER_ASYNCHRONOUS, .of_match_table = of_match_ptr(jz4740_mmc_of_match), - .pm = pm_ptr(&jz4740_mmc_pm_ops), + .pm = pm_sleep_ptr(&jz4740_mmc_pm_ops), }, }; diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index 2fe6fcdbb1b3..98c218bd6669 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -1183,7 +1183,6 @@ static int mxcmci_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP static int mxcmci_suspend(struct device *dev) { struct mmc_host *mmc = dev_get_drvdata(dev); @@ -1210,9 +1209,8 @@ static int mxcmci_resume(struct device *dev) return ret; } -#endif -static SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume); +DEFINE_SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume); static struct platform_driver mxcmci_driver = { .probe = mxcmci_probe, @@ -1220,7 +1218,7 @@ static struct platform_driver mxcmci_driver = { .driver = { .name = DRIVER_NAME, .probe_type = PROBE_PREFER_ASYNCHRONOUS, - .pm = &mxcmci_pm_ops, + .pm = pm_sleep_ptr(&mxcmci_pm_ops), .of_match_table = mxcmci_of_match, } }; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 7161a5b1c701..19e2621e0645 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5460,7 +5460,9 @@ static struct pci_driver rtl8169_pci_driver = { .probe = rtl_init_one, .remove = rtl_remove_one, .shutdown = rtl_shutdown, - .driver.pm = pm_ptr(&rtl8169_pm_ops), +#ifdef CONFIG_PM + .driver.pm = &rtl8169_pm_ops, +#endif }; module_pci_driver(rtl8169_pci_driver); diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 2a5c1829aab7..8cb45f2d3d78 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -382,7 +382,7 @@ void dtpm_unregister(struct dtpm *dtpm) { powercap_unregister_zone(pct, &dtpm->zone); - pr_info("Unregistered dtpm node '%s'\n", dtpm->zone.name); + pr_debug("Unregistered dtpm node '%s'\n", dtpm->zone.name); } /** @@ -453,8 +453,8 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) dtpm->power_limit = dtpm->power_max; } - pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n", - dtpm->zone.name, dtpm->power_min, dtpm->power_max); + pr_debug("Registered dtpm node '%s' / %llu-%llu uW, \n", + dtpm->zone.name, dtpm->power_min, dtpm->power_max); mutex_unlock(&dtpm_lock); diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index 6e1a0043c411..a20bf12f3ce3 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -12,7 +12,7 @@ * * All of the kthreads used for idle injection are created at init time. * - * Next, the users of the the idle injection framework provide a cpumask via + * Next, the users of the idle injection framework provide a cpumask via * its register function. The kthreads will be synchronized with respect to * this cpumask. * diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 7c0099e7a6d7..07611a00b78f 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -61,6 +61,20 @@ #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff #define PP_POLICY_MASK 0x1F +/* + * SPR has different layout for Psys Domain PowerLimit registers. + * There are 17 bits of PL1 and PL2 instead of 15 bits. + * The Enable bits and TimeWindow bits are also shifted as a result. + */ +#define PSYS_POWER_LIMIT1_MASK 0x1FFFF +#define PSYS_POWER_LIMIT1_ENABLE BIT(17) + +#define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32) +#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49) + +#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19) +#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51) + /* Non HW constants */ #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ #define RAPL_PRIMITIVE_DUMMY BIT(2) @@ -97,6 +111,7 @@ struct rapl_defaults { bool to_raw); unsigned int dram_domain_energy_unit; unsigned int psys_domain_energy_unit; + bool spr_psys_bits; }; static struct rapl_defaults *rapl_defaults; @@ -669,12 +684,51 @@ static struct rapl_primitive_info rpi[] = { RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0, + RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32, + RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17, + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49, + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19, + RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), + PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51, + RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), /* non-hardware */ PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, RAPL_PRIMITIVE_DERIVED), {NULL, 0, 0, 0}, }; +static enum rapl_primitives +prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim) +{ + if (!rapl_defaults->spr_psys_bits) + return prim; + + if (rd->id != RAPL_DOMAIN_PLATFORM) + return prim; + + switch (prim) { + case POWER_LIMIT1: + return PSYS_POWER_LIMIT1; + case POWER_LIMIT2: + return PSYS_POWER_LIMIT2; + case PL1_ENABLE: + return PSYS_PL1_ENABLE; + case PL2_ENABLE: + return PSYS_PL2_ENABLE; + case TIME_WINDOW1: + return PSYS_TIME_WINDOW1; + case TIME_WINDOW2: + return PSYS_TIME_WINDOW2; + default: + return prim; + } +} + /* Read primitive data based on its related struct rapl_primitive_info. * if xlate flag is set, return translated data based on data units, i.e. * time, energy, and power. @@ -692,7 +746,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd, enum rapl_primitives prim, bool xlate, u64 *data) { u64 value; - struct rapl_primitive_info *rp = &rpi[prim]; + enum rapl_primitives prim_fixed = prim_fixups(rd, prim); + struct rapl_primitive_info *rp = &rpi[prim_fixed]; struct reg_action ra; int cpu; @@ -738,7 +793,8 @@ static int rapl_write_data_raw(struct rapl_domain *rd, enum rapl_primitives prim, unsigned long long value) { - struct rapl_primitive_info *rp = &rpi[prim]; + enum rapl_primitives prim_fixed = prim_fixups(rd, prim); + struct rapl_primitive_info *rp = &rpi[prim_fixed]; int cpu; u64 bits; struct reg_action ra; @@ -981,6 +1037,7 @@ static const struct rapl_defaults rapl_defaults_spr_server = { .compute_time_window = rapl_compute_time_window_core, .dram_domain_energy_unit = 15300, .psys_domain_energy_unit = 1000000000, + .spr_psys_bits = true, }; static const struct rapl_defaults rapl_defaults_byt = { diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index 43b1ae8a7789..0bfb8eebd126 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -462,7 +462,6 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; struct cpumask *cpus; unsigned int frequency; - unsigned long max_capacity, capacity; int ret; /* Request state should be less than max_level */ @@ -479,10 +478,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, if (ret >= 0) { cpufreq_cdev->cpufreq_state = state; cpus = cpufreq_cdev->policy->related_cpus; - max_capacity = arch_scale_cpu_capacity(cpumask_first(cpus)); - capacity = frequency * max_capacity; - capacity /= cpufreq_cdev->policy->cpuinfo.max_freq; - arch_set_thermal_pressure(cpus, max_capacity - capacity); + arch_update_thermal_pressure(cpus, frequency); ret = 0; } |