diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/cpufreq/powernv-cpufreq.c | 16 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-pseries.c | 77 |
2 files changed, 60 insertions, 33 deletions
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 23a06cba392c..5a2cf5f91ccb 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -36,6 +36,7 @@ #define MAX_PSTATE_SHIFT 32 #define LPSTATE_SHIFT 48 #define GPSTATE_SHIFT 56 +#define MAX_NR_CHIPS 32 #define MAX_RAMP_DOWN_TIME 5120 /* @@ -1046,12 +1047,20 @@ static int init_chip_info(void) unsigned int *chip; unsigned int cpu, i; unsigned int prev_chip_id = UINT_MAX; + cpumask_t *chip_cpu_mask; int ret = 0; chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL); if (!chip) return -ENOMEM; + /* Allocate a chip cpu mask large enough to fit mask for all chips */ + chip_cpu_mask = kcalloc(MAX_NR_CHIPS, sizeof(cpumask_t), GFP_KERNEL); + if (!chip_cpu_mask) { + ret = -ENOMEM; + goto free_and_return; + } + for_each_possible_cpu(cpu) { unsigned int id = cpu_to_chip_id(cpu); @@ -1059,22 +1068,25 @@ static int init_chip_info(void) prev_chip_id = id; chip[nr_chips++] = id; } + cpumask_set_cpu(cpu, &chip_cpu_mask[nr_chips-1]); } chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); if (!chips) { ret = -ENOMEM; - goto free_and_return; + goto out_free_chip_cpu_mask; } for (i = 0; i < nr_chips; i++) { chips[i].id = chip[i]; - cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); + cpumask_copy(&chips[i].mask, &chip_cpu_mask[i]); INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); for_each_cpu(cpu, &chips[i].mask) per_cpu(chip_info, cpu) = &chips[i]; } +out_free_chip_cpu_mask: + kfree(chip_cpu_mask); free_and_return: kfree(chip); return ret; diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index a2b5c6f60cf0..7e7ab5597d7a 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -346,11 +346,9 @@ static int pseries_cpuidle_driver_init(void) static void __init fixup_cede0_latency(void) { struct xcede_latency_payload *payload; - u64 min_latency_us; + u64 min_xcede_latency_us = UINT_MAX; int i; - min_latency_us = dedicated_states[1].exit_latency; // CEDE latency - if (parse_cede_parameters()) return; @@ -358,42 +356,45 @@ static void __init fixup_cede0_latency(void) nr_xcede_records); payload = &xcede_latency_parameter.payload; + + /* + * The CEDE idle state maps to CEDE(0). While the hypervisor + * does not advertise CEDE(0) exit latency values, it does + * advertise the latency values of the extended CEDE states. + * We use the lowest advertised exit latency value as a proxy + * for the exit latency of CEDE(0). + */ for (i = 0; i < nr_xcede_records; i++) { struct xcede_latency_record *record = &payload->records[i]; + u8 hint = record->hint; u64 latency_tb = be64_to_cpu(record->latency_ticks); u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC); - if (latency_us == 0) - pr_warn("cpuidle: xcede record %d has an unrealistic latency of 0us.\n", i); - - if (latency_us < min_latency_us) - min_latency_us = latency_us; - } - - /* - * By default, we assume that CEDE(0) has exit latency 10us, - * since there is no way for us to query from the platform. - * - * However, if the wakeup latency of an Extended CEDE state is - * smaller than 10us, then we can be sure that CEDE(0) - * requires no more than that. - * - * Perform the fix-up. - */ - if (min_latency_us < dedicated_states[1].exit_latency) { /* - * We set a minimum of 1us wakeup latency for cede0 to - * distinguish it from snooze + * We expect the exit latency of an extended CEDE + * state to be non-zero, it to since it takes at least + * a few nanoseconds to wakeup the idle CPU and + * dispatch the virtual processor into the Linux + * Guest. + * + * So we consider only non-zero value for performing + * the fixup of CEDE(0) latency. */ - u64 cede0_latency = 1; + if (latency_us == 0) { + pr_warn("cpuidle: Skipping xcede record %d [hint=%d]. Exit latency = 0us\n", + i, hint); + continue; + } - if (min_latency_us > cede0_latency) - cede0_latency = min_latency_us - 1; + if (latency_us < min_xcede_latency_us) + min_xcede_latency_us = latency_us; + } - dedicated_states[1].exit_latency = cede0_latency; - dedicated_states[1].target_residency = 10 * (cede0_latency); + if (min_xcede_latency_us != UINT_MAX) { + dedicated_states[1].exit_latency = min_xcede_latency_us; + dedicated_states[1].target_residency = 10 * (min_xcede_latency_us); pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n", - cede0_latency); + min_xcede_latency_us); } } @@ -402,7 +403,7 @@ static void __init fixup_cede0_latency(void) * pseries_idle_probe() * Choose state table for shared versus dedicated partition */ -static int pseries_idle_probe(void) +static int __init pseries_idle_probe(void) { if (cpuidle_disable != IDLE_NO_OVERRIDE) @@ -419,7 +420,21 @@ static int pseries_idle_probe(void) cpuidle_state_table = shared_states; max_idle_state = ARRAY_SIZE(shared_states); } else { - fixup_cede0_latency(); + /* + * Use firmware provided latency values + * starting with POWER10 platforms. In the + * case that we are running on a POWER10 + * platform but in an earlier compat mode, we + * can still use the firmware provided values. + * + * However, on platforms prior to POWER10, we + * cannot rely on the accuracy of the firmware + * provided latency values. On such platforms, + * go with the conservative default estimate + * of 10us. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31) || pvr_version_is(PVR_POWER10)) + fixup_cede0_latency(); cpuidle_state_table = dedicated_states; max_idle_state = NR_DEDICATED_STATES; } |