From 658fa7b1c47a857af484c5c5dff8d0164b7c7bfb Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:52 +0530 Subject: ACPI: CPPC: Add cppc_get_perf() API to read performance controls Add cppc_get_perf() function to read values of performance control registers including desired_perf, min_perf, max_perf, energy_perf, and auto_sel. This provides a read interface to complement the existing cppc_set_perf() write interface for performance control registers. Note that auto_sel is read by cppc_get_perf() but not written by cppc_set_perf() to avoid unintended mode changes during performance updates. It can be updated with existing dedicated cppc_set_auto_sel() API. Use cppc_get_perf() in cppc_cpufreq_get_cpu_data() to initialize perf_ctrls with current hardware register values during cpufreq policy initialization. Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-2-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki --- include/acpi/cppc_acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 4d644f03098e..3fc796c0d902 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -151,6 +151,7 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); +extern int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); @@ -193,6 +194,10 @@ static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ { return -EOPNOTSUPP; } +static inline int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) +{ + return -EOPNOTSUPP; +} static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) { return -EOPNOTSUPP; -- cgit v1.2.3 From 13c45a26635fa51a68911aa57e6778bdad18b103 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:57 +0530 Subject: ACPI: CPPC: add APIs and sysfs interface for perf_limited Add sysfs interface to read/write the Performance Limited register. The Performance Limited register indicates to the OS that an unpredictable event (like thermal throttling) has limited processor performance. It contains two sticky bits set by the platform: - Bit 0 (Desired_Excursion): Set when delivered performance is constrained below desired performance. Not used when Autonomous Selection is enabled. - Bit 1 (Minimum_Excursion): Set when delivered performance is constrained below minimum performance. These bits remain set until OSPM explicitly clears them. The write operation accepts a bitmask of bits to clear: - Write 0x1 to clear bit 0 - Write 0x2 to clear bit 1 - Write 0x3 to clear both bits This enables users to detect if platform throttling impacted a workload. Users clear the register before execution, run the workload, then check afterward - if set, hardware throttling occurred during that time window. The interface is exposed as: /sys/devices/system/cpu/cpuX/cpufreq/perf_limited Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-7-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 56 ++++++++++++++++++++++++++++++++++++++++++ drivers/cpufreq/cppc_cpufreq.c | 5 ++++ include/acpi/cppc_acpi.h | 15 +++++++++++ 3 files changed, 76 insertions(+) (limited to 'include') diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 94a7ffa8be3c..53a6ffd995a1 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1978,6 +1978,62 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) } EXPORT_SYMBOL_GPL(cppc_set_perf); +/** + * cppc_get_perf_limited - Get the Performance Limited register value. + * @cpu: CPU from which to get Performance Limited register. + * @perf_limited: Pointer to store the Performance Limited value. + * + * The returned value contains sticky status bits indicating platform-imposed + * performance limitations. + * + * Return: 0 for success, -EIO on failure, -EOPNOTSUPP if not supported. + */ +int cppc_get_perf_limited(int cpu, u64 *perf_limited) +{ + return cppc_get_reg_val(cpu, PERF_LIMITED, perf_limited); +} +EXPORT_SYMBOL_GPL(cppc_get_perf_limited); + +/** + * cppc_set_perf_limited() - Clear bits in the Performance Limited register. + * @cpu: CPU on which to write register. + * @bits_to_clear: Bitmask of bits to clear in the perf_limited register. + * + * The Performance Limited register contains two sticky bits set by platform: + * - Bit 0 (Desired_Excursion): Set when delivered performance is constrained + * below desired performance. Not used when Autonomous Selection is enabled. + * - Bit 1 (Minimum_Excursion): Set when delivered performance is constrained + * below minimum performance. + * + * These bits are sticky and remain set until OSPM explicitly clears them. + * This function only allows clearing bits (the platform sets them). + * + * Return: 0 for success, -EINVAL for invalid bits, -EIO on register + * access failure, -EOPNOTSUPP if not supported. + */ +int cppc_set_perf_limited(int cpu, u64 bits_to_clear) +{ + u64 current_val, new_val; + int ret; + + /* Only bits 0 and 1 are valid */ + if (bits_to_clear & ~CPPC_PERF_LIMITED_MASK) + return -EINVAL; + + if (!bits_to_clear) + return 0; + + ret = cppc_get_perf_limited(cpu, ¤t_val); + if (ret) + return ret; + + /* Clear the specified bits */ + new_val = current_val & ~bits_to_clear; + + return cppc_set_reg_val(cpu, PERF_LIMITED, new_val); +} +EXPORT_SYMBOL_GPL(cppc_set_perf_limited); + /** * cppc_get_transition_latency - returns frequency transition latency in ns * @cpu_num: CPU number for per_cpu(). diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 8a8cf76828ee..94d489a4c90d 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -985,16 +985,21 @@ store_energy_performance_preference_val(struct cpufreq_policy *policy, return count; } +CPPC_CPUFREQ_ATTR_RW_U64(perf_limited, cppc_get_perf_limited, + cppc_set_perf_limited) + cpufreq_freq_attr_ro(freqdomain_cpus); cpufreq_freq_attr_rw(auto_select); cpufreq_freq_attr_rw(auto_act_window); cpufreq_freq_attr_rw(energy_performance_preference_val); +cpufreq_freq_attr_rw(perf_limited); static struct freq_attr *cppc_cpufreq_attr[] = { &freqdomain_cpus, &auto_select, &auto_act_window, &energy_performance_preference_val, + &perf_limited, NULL, }; diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 3fc796c0d902..f7afa20b8ad9 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -42,6 +42,11 @@ #define CPPC_EPP_PERFORMANCE_PREF 0x00 #define CPPC_EPP_ENERGY_EFFICIENCY_PREF 0xFF +#define CPPC_PERF_LIMITED_DESIRED_EXCURSION BIT(0) +#define CPPC_PERF_LIMITED_MINIMUM_EXCURSION BIT(1) +#define CPPC_PERF_LIMITED_MASK (CPPC_PERF_LIMITED_DESIRED_EXCURSION | \ + CPPC_PERF_LIMITED_MINIMUM_EXCURSION) + /* Each register has the folowing format. */ struct cpc_reg { u8 descriptor; @@ -174,6 +179,8 @@ extern int cppc_get_auto_act_window(int cpu, u64 *auto_act_window); extern int cppc_set_auto_act_window(int cpu, u64 auto_act_window); extern int cppc_get_auto_sel(int cpu, bool *enable); extern int cppc_set_auto_sel(int cpu, bool enable); +extern int cppc_get_perf_limited(int cpu, u64 *perf_limited); +extern int cppc_set_perf_limited(int cpu, u64 bits_to_clear); extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); extern int amd_detect_prefcore(bool *detected); @@ -270,6 +277,14 @@ static inline int cppc_set_auto_sel(int cpu, bool enable) { return -EOPNOTSUPP; } +static inline int cppc_get_perf_limited(int cpu, u64 *perf_limited) +{ + return -EOPNOTSUPP; +} +static inline int cppc_set_perf_limited(int cpu, u64 bits_to_clear) +{ + return -EOPNOTSUPP; +} static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf) { return -ENODEV; -- cgit v1.2.3 From 8505bfb4e4eca28ef1b20d3369435ec2d6a125c6 Mon Sep 17 00:00:00 2001 From: Pengjie Zhang Date: Fri, 13 Feb 2026 18:09:35 +0800 Subject: ACPI: CPPC: Move reference performance to capabilities Currently, the `Reference Performance` register is read every time the CPU frequency is sampled in `cppc_get_perf_ctrs()`. This function is on the hot path of the cppc_cpufreq driver. Reference Performance indicates the performance level that corresponds to the Reference Counter incrementing and is not expected to change dynamically during runtime (unlike the Delivered and Reference counters). Reading this register in the hot path incurs unnecessary overhead, particularly on platforms where CPC registers are located in the PCC (Platform Communication Channel) subspace. This patch moves `reference_perf` from the dynamic feedback counters structure (`cppc_perf_fb_ctrs`) to the static capabilities structure (`cppc_perf_caps`). Signed-off-by: Pengjie Zhang [ rjw: Changelog adjustment ] Link: https://patch.msgid.link/20260213100935.19111-1-zhangpengjie2@huawei.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 55 ++++++++++++++++++------------------------ drivers/cpufreq/cppc_cpufreq.c | 21 +++++++++------- include/acpi/cppc_acpi.h | 2 +- 3 files changed, 37 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 53a6ffd995a1..07bbf5b366a4 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -177,12 +177,12 @@ __ATTR(_name, 0444, show_##_name, NULL) show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, highest_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_perf); +show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, reference_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_nonlinear_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, guaranteed_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_freq); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_freq); -show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, reference_perf); show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); /* Check for valid access_width, otherwise, fallback to using bit_width */ @@ -1352,9 +1352,10 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); struct cpc_register_resource *highest_reg, *lowest_reg, - *lowest_non_linear_reg, *nominal_reg, *guaranteed_reg, - *low_freq_reg = NULL, *nom_freq_reg = NULL; - u64 high, low, guaranteed, nom, min_nonlinear, low_f = 0, nom_f = 0; + *lowest_non_linear_reg, *nominal_reg, *reference_reg, + *guaranteed_reg, *low_freq_reg = NULL, *nom_freq_reg = NULL; + u64 high, low, guaranteed, nom, ref, min_nonlinear, + low_f = 0, nom_f = 0; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); struct cppc_pcc_data *pcc_ss_data = NULL; int ret = 0, regs_in_pcc = 0; @@ -1368,6 +1369,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) lowest_reg = &cpc_desc->cpc_regs[LOWEST_PERF]; lowest_non_linear_reg = &cpc_desc->cpc_regs[LOW_NON_LINEAR_PERF]; nominal_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; + reference_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; low_freq_reg = &cpc_desc->cpc_regs[LOWEST_FREQ]; nom_freq_reg = &cpc_desc->cpc_regs[NOMINAL_FREQ]; guaranteed_reg = &cpc_desc->cpc_regs[GUARANTEED_PERF]; @@ -1375,6 +1377,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) /* Are any of the regs PCC ?*/ if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) || CPC_IN_PCC(lowest_non_linear_reg) || CPC_IN_PCC(nominal_reg) || + (CPC_SUPPORTED(reference_reg) && CPC_IN_PCC(reference_reg)) || CPC_IN_PCC(low_freq_reg) || CPC_IN_PCC(nom_freq_reg) || CPC_IN_PCC(guaranteed_reg)) { if (pcc_ss_id < 0) { @@ -1400,6 +1403,17 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) cpc_read(cpunum, nominal_reg, &nom); perf_caps->nominal_perf = nom; + /* + * If reference perf register is not supported then we should + * use the nominal perf value + */ + if (CPC_SUPPORTED(reference_reg)) { + cpc_read(cpunum, reference_reg, &ref); + perf_caps->reference_perf = ref; + } else { + perf_caps->reference_perf = nom; + } + if (guaranteed_reg->type != ACPI_TYPE_BUFFER || IS_NULL_REG(&guaranteed_reg->cpc_entry.reg)) { perf_caps->guaranteed_perf = 0; @@ -1411,7 +1425,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear); perf_caps->lowest_nonlinear_perf = min_nonlinear; - if (!high || !low || !nom || !min_nonlinear) + if (!high || !low || !nom || !ref || !min_nonlinear) ret = -EFAULT; /* Read optional lowest and nominal frequencies if present */ @@ -1441,20 +1455,10 @@ EXPORT_SYMBOL_GPL(cppc_get_perf_caps); bool cppc_perf_ctrs_in_pcc_cpu(unsigned int cpu) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); - struct cpc_register_resource *ref_perf_reg; - - /* - * If reference perf register is not supported then we should use the - * nominal perf value - */ - ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; - if (!CPC_SUPPORTED(ref_perf_reg)) - ref_perf_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; return CPC_IN_PCC(&cpc_desc->cpc_regs[DELIVERED_CTR]) || CPC_IN_PCC(&cpc_desc->cpc_regs[REFERENCE_CTR]) || - CPC_IN_PCC(&cpc_desc->cpc_regs[CTR_WRAP_TIME]) || - CPC_IN_PCC(ref_perf_reg); + CPC_IN_PCC(&cpc_desc->cpc_regs[CTR_WRAP_TIME]); } EXPORT_SYMBOL_GPL(cppc_perf_ctrs_in_pcc_cpu); @@ -1491,10 +1495,10 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); struct cpc_register_resource *delivered_reg, *reference_reg, - *ref_perf_reg, *ctr_wrap_reg; + *ctr_wrap_reg; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); struct cppc_pcc_data *pcc_ss_data = NULL; - u64 delivered, reference, ref_perf, ctr_wrap_time; + u64 delivered, reference, ctr_wrap_time; int ret = 0, regs_in_pcc = 0; if (!cpc_desc) { @@ -1504,19 +1508,11 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) delivered_reg = &cpc_desc->cpc_regs[DELIVERED_CTR]; reference_reg = &cpc_desc->cpc_regs[REFERENCE_CTR]; - ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; ctr_wrap_reg = &cpc_desc->cpc_regs[CTR_WRAP_TIME]; - /* - * If reference perf register is not supported then we should - * use the nominal perf value - */ - if (!CPC_SUPPORTED(ref_perf_reg)) - ref_perf_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; - /* Are any of the regs PCC ?*/ if (CPC_IN_PCC(delivered_reg) || CPC_IN_PCC(reference_reg) || - CPC_IN_PCC(ctr_wrap_reg) || CPC_IN_PCC(ref_perf_reg)) { + CPC_IN_PCC(ctr_wrap_reg)) { if (pcc_ss_id < 0) { pr_debug("Invalid pcc_ss_id\n"); return -ENODEV; @@ -1533,8 +1529,6 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) cpc_read(cpunum, delivered_reg, &delivered); cpc_read(cpunum, reference_reg, &reference); - cpc_read(cpunum, ref_perf_reg, &ref_perf); - /* * Per spec, if ctr_wrap_time optional register is unsupported, then the * performance counters are assumed to never wrap during the lifetime of @@ -1544,14 +1538,13 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) if (CPC_SUPPORTED(ctr_wrap_reg)) cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time); - if (!delivered || !reference || !ref_perf) { + if (!delivered || !reference) { ret = -EFAULT; goto out_err; } perf_fb_ctrs->delivered = delivered; perf_fb_ctrs->reference = reference; - perf_fb_ctrs->reference_perf = ref_perf; perf_fb_ctrs->wraparound_time = ctr_wrap_time; out_err: if (regs_in_pcc) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 94d489a4c90d..5dfb109cf1f4 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -50,7 +50,8 @@ struct cppc_freq_invariance { static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); static struct kthread_worker *kworker_fie; -static int cppc_perf_from_fbctrs(struct cppc_perf_fb_ctrs *fb_ctrs_t0, +static int cppc_perf_from_fbctrs(u64 reference_perf, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, struct cppc_perf_fb_ctrs *fb_ctrs_t1); /** @@ -70,7 +71,7 @@ static void __cppc_scale_freq_tick(struct cppc_freq_invariance *cppc_fi) struct cppc_perf_fb_ctrs fb_ctrs = {0}; struct cppc_cpudata *cpu_data; unsigned long local_freq_scale; - u64 perf; + u64 perf, ref_perf; cpu_data = cppc_fi->cpu_data; @@ -79,7 +80,9 @@ static void __cppc_scale_freq_tick(struct cppc_freq_invariance *cppc_fi) return; } - perf = cppc_perf_from_fbctrs(&cppc_fi->prev_perf_fb_ctrs, &fb_ctrs); + ref_perf = cpu_data->perf_caps.reference_perf; + perf = cppc_perf_from_fbctrs(ref_perf, + &cppc_fi->prev_perf_fb_ctrs, &fb_ctrs); if (!perf) return; @@ -747,13 +750,11 @@ static inline u64 get_delta(u64 t1, u64 t0) return (u32)t1 - (u32)t0; } -static int cppc_perf_from_fbctrs(struct cppc_perf_fb_ctrs *fb_ctrs_t0, +static int cppc_perf_from_fbctrs(u64 reference_perf, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, struct cppc_perf_fb_ctrs *fb_ctrs_t1) { u64 delta_reference, delta_delivered; - u64 reference_perf; - - reference_perf = fb_ctrs_t0->reference_perf; delta_reference = get_delta(fb_ctrs_t1->reference, fb_ctrs_t0->reference); @@ -790,7 +791,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; struct cppc_cpudata *cpu_data; - u64 delivered_perf; + u64 delivered_perf, reference_perf; int ret; if (!policy) @@ -807,7 +808,9 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) return 0; } - delivered_perf = cppc_perf_from_fbctrs(&fb_ctrs_t0, &fb_ctrs_t1); + reference_perf = cpu_data->perf_caps.reference_perf; + delivered_perf = cppc_perf_from_fbctrs(reference_perf, + &fb_ctrs_t0, &fb_ctrs_t1); if (!delivered_perf) goto out_invalid_counters; diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index f7afa20b8ad9..d8e405becdc3 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -121,6 +121,7 @@ struct cppc_perf_caps { u32 guaranteed_perf; u32 highest_perf; u32 nominal_perf; + u32 reference_perf; u32 lowest_perf; u32 lowest_nonlinear_perf; u32 lowest_freq; @@ -138,7 +139,6 @@ struct cppc_perf_ctrls { struct cppc_perf_fb_ctrs { u64 reference; u64 delivered; - u64 reference_perf; u64 wraparound_time; }; -- cgit v1.2.3 From a4c6c18e93a1d24df9ab95794cef471c89daefe4 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Tue, 7 Apr 2026 16:11:40 +0800 Subject: cpuidle: Extract and export no-lock variants of cpuidle_unregister_device() The cpuidle_unregister_device() function always acquires the internal cpuidle_lock (or pause/resume idle) during their execution. However, in some power notification scenarios (e.g., when old idle states may become unavailable), it is necessary to efficiently disable cpuidle first, then remove and re-create all cpuidle devices for all CPUs. To avoid frequent lock overhead and ensure atomicity across the entire batch operation, the caller needs to hold the cpuidle_lock once outside the loop. To address this, extract the core logic into the new function cpuidle_unregister_device_no_lock() and export it. Signed-off-by: Huisong Li [ rjw: Added missing "inline", subject and changelog tweaks ] Link: https://patch.msgid.link/20260407081141.2493581-2-lihuisong@huawei.com Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 22 +++++++++++++++------- include/linux/cpuidle.h | 2 ++ 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index c7876e9e024f..1a55542efead 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -714,16 +714,12 @@ out_unregister: EXPORT_SYMBOL_GPL(cpuidle_register_device); -/** - * cpuidle_unregister_device - unregisters a CPU's idle PM feature - * @dev: the cpu - */ -void cpuidle_unregister_device(struct cpuidle_device *dev) +void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev) { if (!dev || dev->registered == 0) return; - cpuidle_pause_and_lock(); + lockdep_assert_held(&cpuidle_lock); cpuidle_disable_device(dev); @@ -732,10 +728,22 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) __cpuidle_unregister_device(dev); cpuidle_coupled_unregister_device(dev); +} +EXPORT_SYMBOL_GPL(cpuidle_unregister_device_no_lock); + +/** + * cpuidle_unregister_device - unregisters a CPU's idle PM feature + * @dev: the cpu + */ +void cpuidle_unregister_device(struct cpuidle_device *dev) +{ + if (!dev || dev->registered == 0) + return; + cpuidle_pause_and_lock(); + cpuidle_unregister_device_no_lock(dev); cpuidle_resume_and_unlock(); } - EXPORT_SYMBOL_GPL(cpuidle_unregister_device); /** diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 4073690504a7..a2485348def3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -188,6 +188,7 @@ extern void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx, extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); extern int cpuidle_register_device(struct cpuidle_device *dev); extern void cpuidle_unregister_device(struct cpuidle_device *dev); +extern void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev); extern int cpuidle_register(struct cpuidle_driver *drv, const struct cpumask *const coupled_cpus); extern void cpuidle_unregister(struct cpuidle_driver *drv); @@ -226,6 +227,7 @@ static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { } static inline int cpuidle_register_device(struct cpuidle_device *dev) {return -ENODEV; } static inline void cpuidle_unregister_device(struct cpuidle_device *dev) { } +static inline void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev) {} static inline int cpuidle_register(struct cpuidle_driver *drv, const struct cpumask *const coupled_cpus) {return -ENODEV; } -- cgit v1.2.3