From dd3b3d160ea7004091051da60e86f36f40970888 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Jan 2023 19:17:03 +0100 Subject: thermal: ACPI: Make helpers retrieve temperature only It is slightly better to make the ACPI thermal helper functions retrieve the trip point temperature only instead of doing the full trip point initialization, because they are also used for updating some already registered trip points, in which case initializing a new trip just in order to update the temperature of an existing one is somewhat wasteful. Modify the ACPI thermal helpers accordingly and update their users. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano --- .../intel/int340x_thermal/int340x_thermal_zone.c | 40 +++++--- drivers/thermal/intel/intel_pch_thermal.c | 7 +- drivers/thermal/thermal_acpi.c | 108 +++++++-------------- 3 files changed, 66 insertions(+), 89 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index 09b1b51eb6a5..a675f79f792b 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -70,24 +70,35 @@ static int int340x_thermal_read_trips(struct acpi_device *zone_adev, { int i, ret; - ret = thermal_acpi_trip_critical(zone_adev, &zone_trips[trip_cnt]); - if (!ret) + ret = thermal_acpi_critical_trip_temp(zone_adev, + &zone_trips[trip_cnt].temperature); + if (!ret) { + zone_trips[trip_cnt].type = THERMAL_TRIP_CRITICAL; trip_cnt++; + } - ret = thermal_acpi_trip_hot(zone_adev, &zone_trips[trip_cnt]); - if (!ret) + ret = thermal_acpi_hot_trip_temp(zone_adev, + &zone_trips[trip_cnt].temperature); + if (!ret) { + zone_trips[trip_cnt].type = THERMAL_TRIP_HOT; trip_cnt++; + } - ret = thermal_acpi_trip_passive(zone_adev, &zone_trips[trip_cnt]); - if (!ret) + ret = thermal_acpi_passive_trip_temp(zone_adev, + &zone_trips[trip_cnt].temperature); + if (!ret) { + zone_trips[trip_cnt].type = THERMAL_TRIP_PASSIVE; trip_cnt++; + } for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) { - ret = thermal_acpi_trip_active(zone_adev, i, &zone_trips[trip_cnt]); + ret = thermal_acpi_active_trip_temp(zone_adev, i, + &zone_trips[trip_cnt].temperature); if (ret) break; + zone_trips[trip_cnt].type = THERMAL_TRIP_ACTIVE; trip_cnt++; } @@ -213,22 +224,21 @@ void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone) mutex_lock(&int34x_zone->zone->lock); for (i = int34x_zone->aux_trip_nr; i < trip_cnt; i++) { - struct thermal_trip trip; - int err; + int temp, err; switch (zone_trips[i].type) { case THERMAL_TRIP_CRITICAL: - err = thermal_acpi_trip_critical(zone_adev, &trip); + err = thermal_acpi_critical_trip_temp(zone_adev, &temp); break; case THERMAL_TRIP_HOT: - err = thermal_acpi_trip_hot(zone_adev, &trip); + err = thermal_acpi_hot_trip_temp(zone_adev, &temp); break; case THERMAL_TRIP_PASSIVE: - err = thermal_acpi_trip_passive(zone_adev, &trip); + err = thermal_acpi_passive_trip_temp(zone_adev, &temp); break; case THERMAL_TRIP_ACTIVE: - err = thermal_acpi_trip_active(zone_adev, act_trip_nr++, - &trip); + err = thermal_acpi_active_trip_temp(zone_adev, act_trip_nr++, + &temp); break; default: err = -ENODEV; @@ -238,7 +248,7 @@ void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone) continue; } - zone_trips[i].temperature = trip.temperature; + zone_trips[i].temperature = temp; } mutex_unlock(&int34x_zone->zone->lock); diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 45a9ea86907e..c529c05c1853 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -100,16 +100,17 @@ static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int *nr_trips) { struct acpi_device *adev; - int ret; + int temp; adev = ACPI_COMPANION(&ptd->pdev->dev); if (!adev) return; - ret = thermal_acpi_trip_passive(adev, &ptd->trips[*nr_trips]); - if (ret || ptd->trips[*nr_trips].temperature <= 0) + if (thermal_acpi_passive_trip_temp(adev, &temp) || temp <= 0) return; + ptd->trips[*nr_trips].type = THERMAL_TRIP_PASSIVE; + ptd->trips[*nr_trips].temperature = temp; ++(*nr_trips); } #else diff --git a/drivers/thermal/thermal_acpi.c b/drivers/thermal/thermal_acpi.c index 671f774a7621..0e5698818f69 100644 --- a/drivers/thermal/thermal_acpi.c +++ b/drivers/thermal/thermal_acpi.c @@ -21,42 +21,11 @@ #define TEMP_MIN_DECIK 2180 #define TEMP_MAX_DECIK 4480 -static int thermal_acpi_trip_init(struct acpi_device *adev, - enum thermal_trip_type type, int id, - struct thermal_trip *trip) +static int thermal_acpi_trip_temp(struct acpi_device *adev, char *obj_name, + int *ret_temp) { unsigned long long temp; acpi_status status; - char obj_name[5]; - - switch (type) { - case THERMAL_TRIP_ACTIVE: - if (id < 0 || id > 9) - return -EINVAL; - - obj_name[1] = 'A'; - obj_name[2] = 'C'; - obj_name[3] = '0' + id; - break; - case THERMAL_TRIP_PASSIVE: - obj_name[1] = 'P'; - obj_name[2] = 'S'; - obj_name[3] = 'V'; - break; - case THERMAL_TRIP_HOT: - obj_name[1] = 'H'; - obj_name[2] = 'O'; - obj_name[3] = 'T'; - break; - case THERMAL_TRIP_CRITICAL: - obj_name[1] = 'C'; - obj_name[2] = 'R'; - obj_name[3] = 'T'; - break; - } - - obj_name[0] = '_'; - obj_name[4] = '\0'; status = acpi_evaluate_integer(adev->handle, obj_name, NULL, &temp); if (ACPI_FAILURE(status)) { @@ -65,87 +34,84 @@ static int thermal_acpi_trip_init(struct acpi_device *adev, } if (temp >= TEMP_MIN_DECIK && temp <= TEMP_MAX_DECIK) { - trip->temperature = deci_kelvin_to_millicelsius(temp); + *ret_temp = deci_kelvin_to_millicelsius(temp); } else { acpi_handle_debug(adev->handle, "%s result %llu out of range\n", obj_name, temp); - trip->temperature = THERMAL_TEMP_INVALID; + *ret_temp = THERMAL_TEMP_INVALID; } - trip->hysteresis = 0; - trip->type = type; - return 0; } /** - * thermal_acpi_trip_active - Get the specified active trip point - * @adev: Thermal zone ACPI device object to get the description from. + * thermal_acpi_active_trip_temp - Retrieve active trip point temperature + * @adev: Target thermal zone ACPI device object. * @id: Active cooling level (0 - 9). - * @trip: Trip point structure to be populated on success. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _ACx object for the thermal zone represented by @adev to obtain * the temperature of the active cooling trip point corresponding to the active - * cooling level given by @id and initialize @trip as an active trip point using - * that temperature value. + * cooling level given by @id. * * Return 0 on success or a negative error value on failure. */ -int thermal_acpi_trip_active(struct acpi_device *adev, int id, - struct thermal_trip *trip) +int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_ACTIVE, id, trip); + char obj_name[] = {'_', 'A', 'C', '0' + id, '\0'}; + + if (id < 0 || id > 9) + return -EINVAL; + + return thermal_acpi_trip_temp(adev, obj_name, ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_active); +EXPORT_SYMBOL_GPL(thermal_acpi_active_trip_temp); /** - * thermal_acpi_trip_passive - Get the passive trip point - * @adev: Thermal zone ACPI device object to get the description from. - * @trip: Trip point structure to be populated on success. + * thermal_acpi_passive_trip_temp - Retrieve passive trip point temperature + * @adev: Target thermal zone ACPI device object. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _PSV object for the thermal zone represented by @adev to obtain - * the temperature of the passive cooling trip point and initialize @trip as a - * passive trip point using that temperature value. + * the temperature of the passive cooling trip point. * * Return 0 on success or -ENODATA on failure. */ -int thermal_acpi_trip_passive(struct acpi_device *adev, struct thermal_trip *trip) +int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_PASSIVE, INT_MAX, trip); + return thermal_acpi_trip_temp(adev, "_PSV", ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_passive); +EXPORT_SYMBOL_GPL(thermal_acpi_passive_trip_temp); /** - * thermal_acpi_trip_hot - Get the near critical trip point - * @adev: the ACPI device to get the description from. - * @trip: a &struct thermal_trip to be filled if the function succeed. + * thermal_acpi_hot_trip_temp - Retrieve hot trip point temperature + * @adev: Target thermal zone ACPI device object. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _HOT object for the thermal zone represented by @adev to obtain * the temperature of the trip point at which the system is expected to be put - * into the S4 sleep state and initialize @trip as a hot trip point using that - * temperature value. + * into the S4 sleep state. * * Return 0 on success or -ENODATA on failure. */ -int thermal_acpi_trip_hot(struct acpi_device *adev, struct thermal_trip *trip) +int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_HOT, INT_MAX, trip); + return thermal_acpi_trip_temp(adev, "_HOT", ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_hot); +EXPORT_SYMBOL_GPL(thermal_acpi_hot_trip_temp); /** - * thermal_acpi_trip_critical - Get the critical trip point - * @adev: the ACPI device to get the description from. - * @trip: a &struct thermal_trip to be filled if the function succeed. + * thermal_acpi_critical_trip_temp - Retrieve critical trip point temperature + * @adev: Target thermal zone ACPI device object. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _CRT object for the thermal zone represented by @adev to obtain - * the temperature of the critical cooling trip point and initialize @trip as a - * critical trip point using that temperature value. + * the temperature of the critical cooling trip point. * * Return 0 on success or -ENODATA on failure. */ -int thermal_acpi_trip_critical(struct acpi_device *adev, struct thermal_trip *trip) +int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_CRITICAL, INT_MAX, trip); + return thermal_acpi_trip_temp(adev, "_CRT", ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_critical); +EXPORT_SYMBOL_GPL(thermal_acpi_critical_trip_temp); -- cgit v1.2.3 From be014c789c717a4328f49d58b53170923bc475f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:42:16 +0100 Subject: thermal: intel: int340x: Assorted minor cleanups Improve some inconsistent usage of white space in int340x_thermal_zone.c, fix up one coding style issue in it (missing braces around an else branch of a conditional) and while at it replace a !ACPI_FAILURE() check with an equivalent ACPI_SUCCESS() one. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- .../thermal/intel/int340x_thermal/int340x_thermal_zone.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index a675f79f792b..30c05b436700 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -30,15 +30,16 @@ static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone, return conv_temp; *temp = (unsigned long)conv_temp * 10; - } else + } else { /* _TMP returns the temperature in tenths of degrees Kelvin */ *temp = deci_kelvin_to_millicelsius(tmp); + } return 0; } static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, - int trip, int temp) + int trip, int temp) { struct int34x_thermal_zone *d = zone->devdata; acpi_status status; @@ -46,7 +47,7 @@ static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, snprintf(name, sizeof(name), "PAT%d", trip); status = acpi_execute_simple_method(d->adev->handle, name, - millicelsius_to_deci_kelvin(temp)); + millicelsius_to_deci_kelvin(temp)); if (ACPI_FAILURE(status)) return -EIO; @@ -92,7 +93,6 @@ static int int340x_thermal_read_trips(struct acpi_device *zone_adev, } for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) { - ret = thermal_acpi_active_trip_temp(zone_adev, i, &zone_trips[trip_cnt].temperature); if (ret) @@ -121,8 +121,7 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, acpi_status status; int i, ret; - int34x_thermal_zone = kzalloc(sizeof(*int34x_thermal_zone), - GFP_KERNEL); + int34x_thermal_zone = kzalloc(sizeof(*int34x_thermal_zone), GFP_KERNEL); if (!int34x_thermal_zone) return ERR_PTR(-ENOMEM); @@ -139,7 +138,7 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, int34x_thermal_zone->ops->get_temp = get_temp; status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt); - if (!ACPI_FAILURE(status)) { + if (ACPI_SUCCESS(status)) { int34x_thermal_zone->aux_trip_nr = trip_cnt; trip_mask = BIT(trip_cnt) - 1; } @@ -169,8 +168,7 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, int34x_thermal_zone->trips = zone_trips; - int34x_thermal_zone->lpat_table = acpi_lpat_get_conversion_table( - adev->handle); + int34x_thermal_zone->lpat_table = acpi_lpat_get_conversion_table(adev->handle); int34x_thermal_zone->zone = thermal_zone_device_register_with_trips( acpi_device_bid(adev), -- cgit v1.2.3 From 67c6945867145edda3092c336aa4cf56f9986ed7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:43:32 +0100 Subject: thermal: intel: int340x: Rename variable in int340x_thermal_zone_add() Rename local variables int34x_thermal_zone in int340x_thermal_zone_add() and int340x_thermal_zone_remove() to int34x_zone which allows a number of code lines to be shorter and easier to read and adjust some white space for consistency. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- .../intel/int340x_thermal/int340x_thermal_zone.c | 67 +++++++++++----------- 1 file changed, 33 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index 30c05b436700..020e681759d8 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -113,7 +113,7 @@ static struct thermal_zone_params int340x_thermal_params = { struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, int (*get_temp) (struct thermal_zone_device *, int *)) { - struct int34x_thermal_zone *int34x_thermal_zone; + struct int34x_thermal_zone *int34x_zone; struct thermal_trip *zone_trips; unsigned long long trip_cnt = 0; unsigned long long hyst; @@ -121,25 +121,25 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, acpi_status status; int i, ret; - int34x_thermal_zone = kzalloc(sizeof(*int34x_thermal_zone), GFP_KERNEL); - if (!int34x_thermal_zone) + int34x_zone = kzalloc(sizeof(*int34x_zone), GFP_KERNEL); + if (!int34x_zone) return ERR_PTR(-ENOMEM); - int34x_thermal_zone->adev = adev; + int34x_zone->adev = adev; - int34x_thermal_zone->ops = kmemdup(&int340x_thermal_zone_ops, - sizeof(int340x_thermal_zone_ops), GFP_KERNEL); - if (!int34x_thermal_zone->ops) { + int34x_zone->ops = kmemdup(&int340x_thermal_zone_ops, + sizeof(int340x_thermal_zone_ops), GFP_KERNEL); + if (!int34x_zone->ops) { ret = -ENOMEM; goto err_ops_alloc; } if (get_temp) - int34x_thermal_zone->ops->get_temp = get_temp; + int34x_zone->ops->get_temp = get_temp; status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt); if (ACPI_SUCCESS(status)) { - int34x_thermal_zone->aux_trip_nr = trip_cnt; + int34x_zone->aux_trip_nr = trip_cnt; trip_mask = BIT(trip_cnt) - 1; } @@ -166,48 +166,47 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, for (i = 0; i < trip_cnt; ++i) zone_trips[i].hysteresis = hyst; - int34x_thermal_zone->trips = zone_trips; + int34x_zone->trips = zone_trips; - int34x_thermal_zone->lpat_table = acpi_lpat_get_conversion_table(adev->handle); + int34x_zone->lpat_table = acpi_lpat_get_conversion_table(adev->handle); - int34x_thermal_zone->zone = thermal_zone_device_register_with_trips( - acpi_device_bid(adev), - zone_trips, trip_cnt, - trip_mask, int34x_thermal_zone, - int34x_thermal_zone->ops, - &int340x_thermal_params, - 0, 0); - if (IS_ERR(int34x_thermal_zone->zone)) { - ret = PTR_ERR(int34x_thermal_zone->zone); + int34x_zone->zone = thermal_zone_device_register_with_trips( + acpi_device_bid(adev), + zone_trips, trip_cnt, + trip_mask, int34x_zone, + int34x_zone->ops, + &int340x_thermal_params, + 0, 0); + if (IS_ERR(int34x_zone->zone)) { + ret = PTR_ERR(int34x_zone->zone); goto err_thermal_zone; } - ret = thermal_zone_device_enable(int34x_thermal_zone->zone); + ret = thermal_zone_device_enable(int34x_zone->zone); if (ret) goto err_enable; - return int34x_thermal_zone; + return int34x_zone; err_enable: - thermal_zone_device_unregister(int34x_thermal_zone->zone); + thermal_zone_device_unregister(int34x_zone->zone); err_thermal_zone: - kfree(int34x_thermal_zone->trips); - acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); + kfree(int34x_zone->trips); + acpi_lpat_free_conversion_table(int34x_zone->lpat_table); err_trips_alloc: - kfree(int34x_thermal_zone->ops); + kfree(int34x_zone->ops); err_ops_alloc: - kfree(int34x_thermal_zone); + kfree(int34x_zone); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(int340x_thermal_zone_add); -void int340x_thermal_zone_remove(struct int34x_thermal_zone - *int34x_thermal_zone) +void int340x_thermal_zone_remove(struct int34x_thermal_zone *int34x_zone) { - thermal_zone_device_unregister(int34x_thermal_zone->zone); - acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); - kfree(int34x_thermal_zone->trips); - kfree(int34x_thermal_zone->ops); - kfree(int34x_thermal_zone); + thermal_zone_device_unregister(int34x_zone->zone); + acpi_lpat_free_conversion_table(int34x_zone->lpat_table); + kfree(int34x_zone->trips); + kfree(int34x_zone->ops); + kfree(int34x_zone); } EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove); -- cgit v1.2.3 From d0009d14e9853bb5f553a36df9fb7791deffc594 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:45:17 +0100 Subject: thermal: intel: int340x: Drop pointless cast to unsigned long The explicit casting from int to unsigned long in int340x_thermal_get_zone_temp() is pointless, becuase the multiplication result is cast back to int by the assignment in the same statement, so drop it. No expected functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index 020e681759d8..0e622bb560af 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -29,7 +29,7 @@ static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone, if (conv_temp < 0) return conv_temp; - *temp = (unsigned long)conv_temp * 10; + *temp = conv_temp * 10; } else { /* _TMP returns the temperature in tenths of degrees Kelvin */ *temp = deci_kelvin_to_millicelsius(tmp); -- cgit v1.2.3 From 1bcebcab887ba4c4d790d7ccb055303c5dc7dbbb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:47:43 +0100 Subject: thermal: intel: int340x: Improve int340x_thermal_set_trip_temp() Instead of using snprintf() to populate the ACPI object name in int340x_thermal_set_trip_temp(), use an appropriate initializer and make the function fail if its trip argument is greater than 9, because ACPI object names can only be 4 characters long and it does not make sense to even try to evaluate objects with longer names (that argument is guaranteed to be non-negative, because it comes from the thermal code that will not pass negative trip numbers to zone callbacks). Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index 0e622bb560af..00665967ca52 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -42,10 +42,12 @@ static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, int trip, int temp) { struct int34x_thermal_zone *d = zone->devdata; + char name[] = {'P', 'A', 'T', '0' + trip, '\0'}; acpi_status status; - char name[10]; - snprintf(name, sizeof(name), "PAT%d", trip); + if (trip > 9) + return -EINVAL; + status = acpi_execute_simple_method(d->adev->handle, name, millicelsius_to_deci_kelvin(temp)); if (ACPI_FAILURE(status)) -- cgit v1.2.3 From 2cee73568e8d2f9d63793cf84e9aa65c9dfd85e2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:58:32 +0100 Subject: thermal: intel: intel_pch: Make pch_wpt_add_acpi_psv_trip() return int Modify pch_wpt_add_acpi_psv_trip() to return an int value instead of using a return pointer for that. While at it, drop an excessive empty code line. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index c529c05c1853..e5ce4d03c3c1 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -90,34 +90,31 @@ struct pch_thermal_device { }; #ifdef CONFIG_ACPI - /* * On some platforms, there is a companion ACPI device, which adds * passive trip temperature using _PSV method. There is no specific * passive temperature setting in MMIO interface of this PCI device. */ -static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, - int *nr_trips) +static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) { struct acpi_device *adev; int temp; adev = ACPI_COMPANION(&ptd->pdev->dev); if (!adev) - return; + return 0; if (thermal_acpi_passive_trip_temp(adev, &temp) || temp <= 0) - return; + return 0; - ptd->trips[*nr_trips].type = THERMAL_TRIP_PASSIVE; - ptd->trips[*nr_trips].temperature = temp; - ++(*nr_trips); + ptd->trips[trip].type = THERMAL_TRIP_PASSIVE; + ptd->trips[trip].temperature = temp; + return 1; } #else -static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, - int *nr_trips) +static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) { - + return 0; } #endif @@ -167,7 +164,7 @@ read_trips: ++(*nr_trips); } - pch_wpt_add_acpi_psv_trip(ptd, nr_trips); + *nr_trips += pch_wpt_add_acpi_psv_trip(ptd, *nr_trips); return 0; } -- cgit v1.2.3 From 558718f4d3798fa80a9288addc09240910c07df1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:59:48 +0100 Subject: thermal: intel: intel_pch: Eliminate redundant return pointers Both pch_wpt_init() and pch_wpt_get_temp() can return the proper result via their return values, so they do not need to use return pointers. Modify them accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 40 ++++++++++++++----------------- 1 file changed, 18 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index e5ce4d03c3c1..02bd7a048159 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -118,12 +118,11 @@ static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) } #endif -static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips) +static int pch_wpt_init(struct pch_thermal_device *ptd) { - u8 tsel; + int nr_trips = 0; u16 trip_temp; - - *nr_trips = 0; + u8 tsel; /* Check if BIOS has already enabled thermal sensor */ if (WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL)) { @@ -151,29 +150,23 @@ read_trips: trip_temp = readw(ptd->hw_base + WPT_CTT); trip_temp &= 0x1FF; if (trip_temp) { - ptd->trips[*nr_trips].temperature = GET_WPT_TEMP(trip_temp); - ptd->trips[*nr_trips].type = THERMAL_TRIP_CRITICAL; - ++(*nr_trips); + ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); + ptd->trips[nr_trips++].type = THERMAL_TRIP_CRITICAL; } trip_temp = readw(ptd->hw_base + WPT_PHL); trip_temp &= 0x1FF; if (trip_temp) { - ptd->trips[*nr_trips].temperature = GET_WPT_TEMP(trip_temp); - ptd->trips[*nr_trips].type = THERMAL_TRIP_HOT; - ++(*nr_trips); + ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); + ptd->trips[nr_trips++].type = THERMAL_TRIP_HOT; } - *nr_trips += pch_wpt_add_acpi_psv_trip(ptd, *nr_trips); - - return 0; + return nr_trips + pch_wpt_add_acpi_psv_trip(ptd, nr_trips); } -static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp) +static int pch_wpt_get_temp(struct pch_thermal_device *ptd) { - *temp = GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); - - return 0; + return GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); } /* Cool the PCH when it's overheat in .suspend_noirq phase */ @@ -259,8 +252,8 @@ static int pch_wpt_resume(struct pch_thermal_device *ptd) } struct pch_dev_ops { - int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips); - int (*get_temp)(struct pch_thermal_device *ptd, int *temp); + int (*hw_init)(struct pch_thermal_device *ptd); + int (*get_temp)(struct pch_thermal_device *ptd); int (*suspend)(struct pch_thermal_device *ptd); int (*resume)(struct pch_thermal_device *ptd); }; @@ -278,7 +271,8 @@ static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) { struct pch_thermal_device *ptd = tzd->devdata; - return ptd->ops->get_temp(ptd, temp); + *temp = ptd->ops->get_temp(ptd); + return 0; } static void pch_critical(struct thermal_zone_device *tzd) @@ -372,9 +366,11 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, goto error_release; } - err = ptd->ops->hw_init(ptd, &nr_trips); - if (err) + nr_trips = ptd->ops->hw_init(ptd); + if (nr_trips < 0) { + err = nr_trips; goto error_cleanup; + } ptd->tzd = thermal_zone_device_register_with_trips(bi->name, ptd->trips, nr_trips, 0, ptd, -- cgit v1.2.3 From 1aa4f925d80c44bd70442a8e94718fc921b8a55f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 20:00:48 +0100 Subject: thermal: intel: intel_pch: Rename device operations callbacks Because the same device operations callbacks are used for all supported boards, they are in fact generic, so rename them to reflect that. Also rename the operations object itself for consistency. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 34 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 02bd7a048159..b15d1f5fd51e 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -118,7 +118,7 @@ static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) } #endif -static int pch_wpt_init(struct pch_thermal_device *ptd) +static int pch_hw_init(struct pch_thermal_device *ptd) { int nr_trips = 0; u16 trip_temp; @@ -164,13 +164,13 @@ read_trips: return nr_trips + pch_wpt_add_acpi_psv_trip(ptd, nr_trips); } -static int pch_wpt_get_temp(struct pch_thermal_device *ptd) +static int pch_get_temp(struct pch_thermal_device *ptd) { return GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); } /* Cool the PCH when it's overheat in .suspend_noirq phase */ -static int pch_wpt_suspend(struct pch_thermal_device *ptd) +static int pch_suspend(struct pch_thermal_device *ptd) { u8 tsel; int pch_delay_cnt = 0; @@ -237,7 +237,7 @@ static int pch_wpt_suspend(struct pch_thermal_device *ptd) return 0; } -static int pch_wpt_resume(struct pch_thermal_device *ptd) +static int pch_resume(struct pch_thermal_device *ptd) { u8 tsel; @@ -258,13 +258,11 @@ struct pch_dev_ops { int (*resume)(struct pch_thermal_device *ptd); }; - -/* dev ops for Wildcat Point */ -static const struct pch_dev_ops pch_dev_ops_wpt = { - .hw_init = pch_wpt_init, - .get_temp = pch_wpt_get_temp, - .suspend = pch_wpt_suspend, - .resume = pch_wpt_resume, +static const struct pch_dev_ops pch_dev_ops = { + .hw_init = pch_hw_init, + .get_temp = pch_get_temp, + .suspend = pch_suspend, + .resume = pch_resume, }; static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) @@ -301,31 +299,31 @@ static const struct board_info { } board_info[] = { [board_hsw] = { .name = "pch_haswell", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_wpt] = { .name = "pch_wildcat_point", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_skl] = { .name = "pch_skylake", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_cnl] = { .name = "pch_cannonlake", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_cml] = { .name = "pch_cometlake", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_lwb] = { .name = "pch_lewisburg", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_wbg] = { .name = "pch_wellsburg", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, }; -- cgit v1.2.3 From 86cb1004b6f71b2f79f5fb08502a5bd10c852bf5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 20:02:19 +0100 Subject: thermal: intel: intel_pch: Eliminate device operations object The same device operations object is pointed to by all of the board configurations in the driver, so effectively the same operations callbacks are used by all of them which only adds overhead (that can be significant due to retpolines) for no real purpose. For this reason, drop the device operations object and replace the respective callback invocations by direct calls to the specific functions that were previously pointed to by callback pointers. No intentional change in behavior. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 33 ++++--------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index b15d1f5fd51e..4b2545191fe1 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -82,7 +82,6 @@ static char driver_name[] = "Intel PCH thermal driver"; struct pch_thermal_device { void __iomem *hw_base; - const struct pch_dev_ops *ops; struct pci_dev *pdev; struct thermal_zone_device *tzd; struct thermal_trip trips[PCH_MAX_TRIPS]; @@ -251,25 +250,11 @@ static int pch_resume(struct pch_thermal_device *ptd) return 0; } -struct pch_dev_ops { - int (*hw_init)(struct pch_thermal_device *ptd); - int (*get_temp)(struct pch_thermal_device *ptd); - int (*suspend)(struct pch_thermal_device *ptd); - int (*resume)(struct pch_thermal_device *ptd); -}; - -static const struct pch_dev_ops pch_dev_ops = { - .hw_init = pch_hw_init, - .get_temp = pch_get_temp, - .suspend = pch_suspend, - .resume = pch_resume, -}; - static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) { struct pch_thermal_device *ptd = tzd->devdata; - *temp = ptd->ops->get_temp(ptd); + *temp = pch_get_temp(ptd); return 0; } @@ -295,35 +280,27 @@ enum board_ids { static const struct board_info { const char *name; - const struct pch_dev_ops *ops; } board_info[] = { [board_hsw] = { .name = "pch_haswell", - .ops = &pch_dev_ops, }, [board_wpt] = { .name = "pch_wildcat_point", - .ops = &pch_dev_ops, }, [board_skl] = { .name = "pch_skylake", - .ops = &pch_dev_ops, }, [board_cnl] = { .name = "pch_cannonlake", - .ops = &pch_dev_ops, }, [board_cml] = { .name = "pch_cometlake", - .ops = &pch_dev_ops, }, [board_lwb] = { .name = "pch_lewisburg", - .ops = &pch_dev_ops, }, [board_wbg] = { .name = "pch_wellsburg", - .ops = &pch_dev_ops, }, }; @@ -340,8 +317,6 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, if (!ptd) return -ENOMEM; - ptd->ops = bi->ops; - pci_set_drvdata(pdev, ptd); ptd->pdev = pdev; @@ -364,7 +339,7 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, goto error_release; } - nr_trips = ptd->ops->hw_init(ptd); + nr_trips = pch_hw_init(ptd); if (nr_trips < 0) { err = nr_trips; goto error_cleanup; @@ -412,14 +387,14 @@ static int intel_pch_thermal_suspend_noirq(struct device *device) { struct pch_thermal_device *ptd = dev_get_drvdata(device); - return ptd->ops->suspend(ptd); + return pch_suspend(ptd); } static int intel_pch_thermal_resume(struct device *device) { struct pch_thermal_device *ptd = dev_get_drvdata(device); - return ptd->ops->resume(ptd); + return pch_resume(ptd); } static const struct pci_device_id intel_pch_thermal_id[] = { -- cgit v1.2.3 From 35c87f948d315ebc820a8784a962a506c1a3d299 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 20:03:11 +0100 Subject: thermal: intel: intel_pch: Fold two functions into their callers Fold two functions, pch_hw_init() and pch_get_temp(), that each have only one caller, into their respective callers to make the code somewhat easier to follow. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 98 +++++++++++++------------------ 1 file changed, 42 insertions(+), 56 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 4b2545191fe1..fa6cd4dd2f56 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -117,57 +117,6 @@ static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) } #endif -static int pch_hw_init(struct pch_thermal_device *ptd) -{ - int nr_trips = 0; - u16 trip_temp; - u8 tsel; - - /* Check if BIOS has already enabled thermal sensor */ - if (WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL)) { - ptd->bios_enabled = true; - goto read_trips; - } - - tsel = readb(ptd->hw_base + WPT_TSEL); - /* - * When TSEL's Policy Lock-Down bit is 1, TSEL become RO. - * If so, thermal sensor cannot enable. Bail out. - */ - if (tsel & WPT_TSEL_PLDB) { - dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); - return -ENODEV; - } - - writeb(tsel|WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); - if (!(WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL))) { - dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); - return -ENODEV; - } - -read_trips: - trip_temp = readw(ptd->hw_base + WPT_CTT); - trip_temp &= 0x1FF; - if (trip_temp) { - ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); - ptd->trips[nr_trips++].type = THERMAL_TRIP_CRITICAL; - } - - trip_temp = readw(ptd->hw_base + WPT_PHL); - trip_temp &= 0x1FF; - if (trip_temp) { - ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); - ptd->trips[nr_trips++].type = THERMAL_TRIP_HOT; - } - - return nr_trips + pch_wpt_add_acpi_psv_trip(ptd, nr_trips); -} - -static int pch_get_temp(struct pch_thermal_device *ptd) -{ - return GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); -} - /* Cool the PCH when it's overheat in .suspend_noirq phase */ static int pch_suspend(struct pch_thermal_device *ptd) { @@ -254,7 +203,7 @@ static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) { struct pch_thermal_device *ptd = tzd->devdata; - *temp = pch_get_temp(ptd); + *temp = GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); return 0; } @@ -310,8 +259,10 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, enum board_ids board_id = id->driver_data; const struct board_info *bi = &board_info[board_id]; struct pch_thermal_device *ptd; + int nr_trips = 0; + u16 trip_temp; + u8 tsel; int err; - int nr_trips; ptd = devm_kzalloc(&pdev->dev, sizeof(*ptd), GFP_KERNEL); if (!ptd) @@ -339,12 +290,47 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, goto error_release; } - nr_trips = pch_hw_init(ptd); - if (nr_trips < 0) { - err = nr_trips; + /* Check if BIOS has already enabled thermal sensor */ + if (WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL)) { + ptd->bios_enabled = true; + goto read_trips; + } + + tsel = readb(ptd->hw_base + WPT_TSEL); + /* + * When TSEL's Policy Lock-Down bit is 1, TSEL become RO. + * If so, thermal sensor cannot enable. Bail out. + */ + if (tsel & WPT_TSEL_PLDB) { + dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); + err = -ENODEV; goto error_cleanup; } + writeb(tsel|WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); + if (!(WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL))) { + dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); + err = -ENODEV; + goto error_cleanup; + } + +read_trips: + trip_temp = readw(ptd->hw_base + WPT_CTT); + trip_temp &= 0x1FF; + if (trip_temp) { + ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); + ptd->trips[nr_trips++].type = THERMAL_TRIP_CRITICAL; + } + + trip_temp = readw(ptd->hw_base + WPT_PHL); + trip_temp &= 0x1FF; + if (trip_temp) { + ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); + ptd->trips[nr_trips++].type = THERMAL_TRIP_HOT; + } + + nr_trips += pch_wpt_add_acpi_psv_trip(ptd, nr_trips); + ptd->tzd = thermal_zone_device_register_with_trips(bi->name, ptd->trips, nr_trips, 0, ptd, &tzd_ops, NULL, 0, 0); -- cgit v1.2.3 From c5f43242f48ac81d57f50592acf7d425640d9f83 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 20:04:05 +0100 Subject: thermal: intel: intel_pch: Fold suspend and resume routines into their callers Fold pch_suspend() and pch_resume(), that each have only one caller, into their respective callers to make the code somewhat easier to follow. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 155 ++++++++++++++---------------- 1 file changed, 71 insertions(+), 84 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index fa6cd4dd2f56..ffb2acd0fd45 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -117,88 +117,6 @@ static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) } #endif -/* Cool the PCH when it's overheat in .suspend_noirq phase */ -static int pch_suspend(struct pch_thermal_device *ptd) -{ - u8 tsel; - int pch_delay_cnt = 0; - u16 pch_thr_temp, pch_cur_temp; - - /* Shutdown the thermal sensor if it is not enabled by BIOS */ - if (!ptd->bios_enabled) { - tsel = readb(ptd->hw_base + WPT_TSEL); - writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL); - return 0; - } - - /* Do not check temperature if it is not s2idle */ - if (pm_suspend_via_firmware()) - return 0; - - /* Get the PCH temperature threshold value */ - pch_thr_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TSPM)); - - /* Get the PCH current temperature value */ - pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); - - /* - * If current PCH temperature is higher than configured PCH threshold - * value, run some delay loop with sleep to let the current temperature - * go down below the threshold value which helps to allow system enter - * lower power S0ix suspend state. Even after delay loop if PCH current - * temperature stays above threshold, notify the warning message - * which helps to indentify the reason why S0ix entry was rejected. - */ - while (pch_delay_cnt < delay_cnt) { - if (pch_cur_temp < pch_thr_temp) - break; - - if (pm_wakeup_pending()) { - dev_warn(&ptd->pdev->dev, "Wakeup event detected, abort cooling\n"); - return 0; - } - - pch_delay_cnt++; - dev_dbg(&ptd->pdev->dev, - "CPU-PCH current temp [%dC] higher than the threshold temp [%dC], sleep %d times for %d ms duration\n", - pch_cur_temp, pch_thr_temp, pch_delay_cnt, delay_timeout); - msleep(delay_timeout); - /* Read the PCH current temperature for next cycle. */ - pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); - } - - if (pch_cur_temp >= pch_thr_temp) - dev_warn(&ptd->pdev->dev, - "CPU-PCH is hot [%dC] after %d ms delay. S0ix might fail\n", - pch_cur_temp, pch_delay_cnt * delay_timeout); - else { - if (pch_delay_cnt) - dev_info(&ptd->pdev->dev, - "CPU-PCH is cool [%dC] after %d ms delay\n", - pch_cur_temp, pch_delay_cnt * delay_timeout); - else - dev_info(&ptd->pdev->dev, - "CPU-PCH is cool [%dC]\n", - pch_cur_temp); - } - - return 0; -} - -static int pch_resume(struct pch_thermal_device *ptd) -{ - u8 tsel; - - if (ptd->bios_enabled) - return 0; - - tsel = readb(ptd->hw_base + WPT_TSEL); - - writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); - - return 0; -} - static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) { struct pch_thermal_device *ptd = tzd->devdata; @@ -372,15 +290,84 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev) static int intel_pch_thermal_suspend_noirq(struct device *device) { struct pch_thermal_device *ptd = dev_get_drvdata(device); + u16 pch_thr_temp, pch_cur_temp; + int pch_delay_cnt = 0; + u8 tsel; + + /* Shutdown the thermal sensor if it is not enabled by BIOS */ + if (!ptd->bios_enabled) { + tsel = readb(ptd->hw_base + WPT_TSEL); + writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL); + return 0; + } + + /* Do not check temperature if it is not s2idle */ + if (pm_suspend_via_firmware()) + return 0; + + /* Get the PCH temperature threshold value */ + pch_thr_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TSPM)); + + /* Get the PCH current temperature value */ + pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); - return pch_suspend(ptd); + /* + * If current PCH temperature is higher than configured PCH threshold + * value, run some delay loop with sleep to let the current temperature + * go down below the threshold value which helps to allow system enter + * lower power S0ix suspend state. Even after delay loop if PCH current + * temperature stays above threshold, notify the warning message + * which helps to indentify the reason why S0ix entry was rejected. + */ + while (pch_delay_cnt < delay_cnt) { + if (pch_cur_temp < pch_thr_temp) + break; + + if (pm_wakeup_pending()) { + dev_warn(&ptd->pdev->dev, "Wakeup event detected, abort cooling\n"); + return 0; + } + + pch_delay_cnt++; + dev_dbg(&ptd->pdev->dev, + "CPU-PCH current temp [%dC] higher than the threshold temp [%dC], sleep %d times for %d ms duration\n", + pch_cur_temp, pch_thr_temp, pch_delay_cnt, delay_timeout); + msleep(delay_timeout); + /* Read the PCH current temperature for next cycle. */ + pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); + } + + if (pch_cur_temp >= pch_thr_temp) + dev_warn(&ptd->pdev->dev, + "CPU-PCH is hot [%dC] after %d ms delay. S0ix might fail\n", + pch_cur_temp, pch_delay_cnt * delay_timeout); + else { + if (pch_delay_cnt) + dev_info(&ptd->pdev->dev, + "CPU-PCH is cool [%dC] after %d ms delay\n", + pch_cur_temp, pch_delay_cnt * delay_timeout); + else + dev_info(&ptd->pdev->dev, + "CPU-PCH is cool [%dC]\n", + pch_cur_temp); + } + + return 0; } static int intel_pch_thermal_resume(struct device *device) { struct pch_thermal_device *ptd = dev_get_drvdata(device); + u8 tsel; - return pch_resume(ptd); + if (ptd->bios_enabled) + return 0; + + tsel = readb(ptd->hw_base + WPT_TSEL); + + writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); + + return 0; } static const struct pci_device_id intel_pch_thermal_id[] = { -- cgit v1.2.3 From ae98e57a6e829dee26c1573134a24709d2cb82a3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 20:04:55 +0100 Subject: thermal: intel: intel_pch: Rename board ID symbols Use capitals in the names of the board ID symbols and add the PCH_ prefix to each of them for consistency. Also rename the board_ids enum accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 54 +++++++++++++++---------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index ffb2acd0fd45..12600bca89b6 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -135,38 +135,38 @@ static struct thermal_zone_device_ops tzd_ops = { .critical = pch_critical, }; -enum board_ids { - board_hsw, - board_wpt, - board_skl, - board_cnl, - board_cml, - board_lwb, - board_wbg, +enum pch_board_ids { + PCH_BOARD_HSW = 0, + PCH_BOARD_WPT, + PCH_BOARD_SKL, + PCH_BOARD_CNL, + PCH_BOARD_CML, + PCH_BOARD_LWB, + PCH_BOARD_WBG, }; static const struct board_info { const char *name; } board_info[] = { - [board_hsw] = { + [PCH_BOARD_HSW] = { .name = "pch_haswell", }, - [board_wpt] = { + [PCH_BOARD_WPT] = { .name = "pch_wildcat_point", }, - [board_skl] = { + [PCH_BOARD_SKL] = { .name = "pch_skylake", }, - [board_cnl] = { + [PCH_BOARD_CNL] = { .name = "pch_cannonlake", }, - [board_cml] = { + [PCH_BOARD_CML] = { .name = "pch_cometlake", }, - [board_lwb] = { + [PCH_BOARD_LWB] = { .name = "pch_lewisburg", }, - [board_wbg] = { + [PCH_BOARD_WBG] = { .name = "pch_wellsburg", }, }; @@ -174,7 +174,7 @@ static const struct board_info { static int intel_pch_thermal_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - enum board_ids board_id = id->driver_data; + enum pch_board_ids board_id = id->driver_data; const struct board_info *bi = &board_info[board_id]; struct pch_thermal_device *ptd; int nr_trips = 0; @@ -372,27 +372,27 @@ static int intel_pch_thermal_resume(struct device *device) static const struct pci_device_id intel_pch_thermal_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_1), - .driver_data = board_hsw, }, + .driver_data = PCH_BOARD_HSW, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_2), - .driver_data = board_hsw, }, + .driver_data = PCH_BOARD_HSW, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT), - .driver_data = board_wpt, }, + .driver_data = PCH_BOARD_WPT, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL), - .driver_data = board_skl, }, + .driver_data = PCH_BOARD_SKL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL_H), - .driver_data = board_skl, }, + .driver_data = PCH_BOARD_SKL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL), - .driver_data = board_cnl, }, + .driver_data = PCH_BOARD_CNL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL_H), - .driver_data = board_cnl, }, + .driver_data = PCH_BOARD_CNL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL_LP), - .driver_data = board_cnl, }, + .driver_data = PCH_BOARD_CNL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CML_H), - .driver_data = board_cml, }, + .driver_data = PCH_BOARD_CML, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_LWB), - .driver_data = board_lwb, }, + .driver_data = PCH_BOARD_LWB, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WBG), - .driver_data = board_wbg, }, + .driver_data = PCH_BOARD_WBG, }, { 0, }, }; MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id); -- cgit v1.2.3 From 2153a87ff9ef5537b8a96e7c94a9d79a78c7a30c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 31 Jan 2023 14:08:20 +0100 Subject: thermal: intel: intel_pch: Drop struct board_info Because the only member of struct board_info is the name, the board_info[] array of struct board_info elements can be replaced with an array of strings. Modify the code accordingly and drop struct board_info. No intentional functional impact. Suggested-by: Zhang Rui Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 42 ++++++++++--------------------- 1 file changed, 13 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 12600bca89b6..b855d031a855 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -145,37 +145,20 @@ enum pch_board_ids { PCH_BOARD_WBG, }; -static const struct board_info { - const char *name; -} board_info[] = { - [PCH_BOARD_HSW] = { - .name = "pch_haswell", - }, - [PCH_BOARD_WPT] = { - .name = "pch_wildcat_point", - }, - [PCH_BOARD_SKL] = { - .name = "pch_skylake", - }, - [PCH_BOARD_CNL] = { - .name = "pch_cannonlake", - }, - [PCH_BOARD_CML] = { - .name = "pch_cometlake", - }, - [PCH_BOARD_LWB] = { - .name = "pch_lewisburg", - }, - [PCH_BOARD_WBG] = { - .name = "pch_wellsburg", - }, +static const char *board_names[] = { + [PCH_BOARD_HSW] = "pch_haswell", + [PCH_BOARD_WPT] = "pch_wildcat_point", + [PCH_BOARD_SKL] = "pch_skylake", + [PCH_BOARD_CNL] = "pch_cannonlake", + [PCH_BOARD_CML] = "pch_cometlake", + [PCH_BOARD_LWB] = "pch_lewisburg", + [PCH_BOARD_WBG] = "pch_wellsburg", }; static int intel_pch_thermal_probe(struct pci_dev *pdev, const struct pci_device_id *id) { enum pch_board_ids board_id = id->driver_data; - const struct board_info *bi = &board_info[board_id]; struct pch_thermal_device *ptd; int nr_trips = 0; u16 trip_temp; @@ -249,12 +232,13 @@ read_trips: nr_trips += pch_wpt_add_acpi_psv_trip(ptd, nr_trips); - ptd->tzd = thermal_zone_device_register_with_trips(bi->name, ptd->trips, - nr_trips, 0, ptd, - &tzd_ops, NULL, 0, 0); + ptd->tzd = thermal_zone_device_register_with_trips(board_names[board_id], + ptd->trips, nr_trips, + 0, ptd, &tzd_ops, + NULL, 0, 0); if (IS_ERR(ptd->tzd)) { dev_err(&pdev->dev, "Failed to register thermal zone %s\n", - bi->name); + board_names[board_id]); err = PTR_ERR(ptd->tzd); goto error_cleanup; } -- cgit v1.2.3 From 8e47363588377e1bdb65e2b020b409cfb44dd260 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 12:39:41 -0800 Subject: thermal: intel: powerclamp: Fix cur_state for multi package system The powerclamp cooling device cur_state shows actual idle observed by package C-state idle counters. But the implementation is not sufficient for multi package or multi die system. The cur_state value is incorrect. On these systems, these counters must be read from each package/die and somehow aggregate them. But there is no good method for aggregation. It was not a problem when explicit CPU model addition was required to enable intel powerclamp. In this way certain CPU models could have been avoided. But with the removal of CPU model check with the availability of Package C-state counters, the driver is loaded on most of the recent systems. For multi package/die systems, just show the actual target idle state, the system is trying to achieve. In powerclamp this is the user set state minus one. Also there is no use of starting a worker thread for polling package C-state counters and applying any compensation for multiple package or multiple die systems. Fixes: b721ca0d1927 ("thermal/powerclamp: remove cpu whitelist") Signed-off-by: Srinivas Pandruvada Cc: 4.14+ # 4.14+ Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index b80e25ec1261..2f4cbfdf26a0 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -57,6 +57,7 @@ static unsigned int target_mwait; static struct dentry *debug_dir; +static bool poll_pkg_cstate_enable; /* user selected target */ static unsigned int set_target_ratio; @@ -261,6 +262,9 @@ static unsigned int get_compensation(int ratio) { unsigned int comp = 0; + if (!poll_pkg_cstate_enable) + return 0; + /* we only use compensation if all adjacent ones are good */ if (ratio == 1 && cal_data[ratio].confidence >= CONFIDENCE_OK && @@ -519,7 +523,8 @@ static int start_power_clamp(void) control_cpu = cpumask_first(cpu_online_mask); clamping = true; - schedule_delayed_work(&poll_pkg_cstate_work, 0); + if (poll_pkg_cstate_enable) + schedule_delayed_work(&poll_pkg_cstate_work, 0); /* start one kthread worker per online cpu */ for_each_online_cpu(cpu) { @@ -585,11 +590,15 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { - if (true == clamping) - *state = pkg_cstate_ratio_cur; - else + if (clamping) { + if (poll_pkg_cstate_enable) + *state = pkg_cstate_ratio_cur; + else + *state = set_target_ratio; + } else { /* to save power, do not poll idle ratio while not clamping */ *state = -1; /* indicates invalid state */ + } return 0; } @@ -712,6 +721,9 @@ static int __init powerclamp_init(void) goto exit_unregister; } + if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) + poll_pkg_cstate_enable = true; + cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, &powerclamp_cooling_ops); if (IS_ERR(cooling_dev)) { -- cgit v1.2.3 From bbfc3349c4e77a27ea83c765bf593d935f8f5599 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 10:28:51 -0800 Subject: powercap: idle_inject: Export symbols Export symbols for external interfaces, so that they can be used in other loadable modules. Export is done under name space IDLE_INJECT. Signed-off-by: Srinivas Pandruvada Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index fe86a09e3b67..dfa989182e71 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -160,6 +160,7 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev, WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); } } +EXPORT_SYMBOL_NS_GPL(idle_inject_set_duration, IDLE_INJECT); /** * idle_inject_get_duration - idle and run duration retrieval helper @@ -174,6 +175,7 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev, *run_duration_us = READ_ONCE(ii_dev->run_duration_us); *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); } +EXPORT_SYMBOL_NS_GPL(idle_inject_get_duration, IDLE_INJECT); /** * idle_inject_set_latency - set the maximum latency allowed @@ -185,6 +187,7 @@ void idle_inject_set_latency(struct idle_inject_device *ii_dev, { WRITE_ONCE(ii_dev->latency_us, latency_us); } +EXPORT_SYMBOL_NS_GPL(idle_inject_set_latency, IDLE_INJECT); /** * idle_inject_start - start idle injections @@ -216,6 +219,7 @@ int idle_inject_start(struct idle_inject_device *ii_dev) return 0; } +EXPORT_SYMBOL_NS_GPL(idle_inject_start, IDLE_INJECT); /** * idle_inject_stop - stops idle injections @@ -262,6 +266,7 @@ void idle_inject_stop(struct idle_inject_device *ii_dev) cpu_hotplug_enable(); } +EXPORT_SYMBOL_NS_GPL(idle_inject_stop, IDLE_INJECT); /** * idle_inject_setup - prepare the current task for idle injection @@ -337,6 +342,7 @@ out_rollback: return NULL; } +EXPORT_SYMBOL_NS_GPL(idle_inject_register, IDLE_INJECT); /** * idle_inject_unregister - unregister idle injection control device @@ -357,6 +363,7 @@ void idle_inject_unregister(struct idle_inject_device *ii_dev) kfree(ii_dev); } +EXPORT_SYMBOL_NS_GPL(idle_inject_unregister, IDLE_INJECT); static struct smp_hotplug_thread idle_inject_threads = { .store = &idle_inject_thread.tsk, -- cgit v1.2.3 From acbc661032b8aa0e8359ac77074769ade34a176c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 10:28:52 -0800 Subject: powercap: idle_inject: Add update callback The powercap/idle_inject core uses play_idle_precise() to inject idle time. But play_idle_precise() can't ensure that the CPU is fully idle for the specified duration because of wakeups due to interrupts. To compensate for the reduced idle time due to these wakes, the caller can adjust requested idle time for the next cycle. The goal of idle injection is to keep system at some idle percent on average, so this is fine to overshoot or undershoot instantaneous idle times. The idle inject core provides an interface idle_inject_set_duration() to set idle and runtime duration. Some architectures provide interface to get actual idle time observed by the hardware. So, the effective idle percent can be adjusted using the hardware feedback. For example, Intel CPUs provides package idle counters, which is currently used by Intel powerclamp driver to readjust runtime duration. When the caller's desired idle time over a period is less or greater than the actual CPU idle time observed by the hardware, caller can readjust idle and runtime duration for the next cycle. The only way this can be done currently is by monitoring hardware idle time from a different software thread and readjust idle and runtime duration using idle_inject_set_duration(). This can be avoided by adding a callback which callers can register and readjust from this callback function. Add a capability to register an optional update() callback, which can be called from the idle inject core before waking up CPUs for idle injection. This callback can be registered via a new interface: idle_inject_register_full(). During this process of constantly adjusting idle and runtime duration there can be some cases where actual idle time is more than the desired. In this case idle inject can be skipped for a cycle. If update() callback returns false, then the idle inject core skips waking up CPUs for the idle injection. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 52 +++++++++++++++++++++++++++++++++++++----- include/linux/idle_inject.h | 3 +++ 2 files changed, 49 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index dfa989182e71..6862e6c2e87d 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -63,13 +63,29 @@ struct idle_inject_thread { * @idle_duration_us: duration of CPU idle time to inject * @run_duration_us: duration of CPU run time to allow * @latency_us: max allowed latency + * @update: Optional callback deciding whether or not to skip idle + * injection in the given cycle. * @cpumask: mask of CPUs affected by idle injection + * + * This structure is used to define per instance idle inject device data. Each + * instance has an idle duration, a run duration and mask of CPUs to inject + * idle. + * + * Actual CPU idle time is injected by calling kernel scheduler interface + * play_idle_precise(). There is one optional callback that can be registered + * by calling idle_inject_register_full(): + * + * update() - This callback is invoked just before waking up CPUs to inject + * idle. If it returns false, CPUs are not woken up to inject idle in the given + * cycle. It also allows the caller to readjust the idle and run duration by + * calling idle_inject_set_duration() for the next cycle. */ struct idle_inject_device { struct hrtimer timer; unsigned int idle_duration_us; unsigned int run_duration_us; unsigned int latency_us; + bool (*update)(void); unsigned long cpumask[]; }; @@ -111,11 +127,12 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) struct idle_inject_device *ii_dev = container_of(timer, struct idle_inject_device, timer); + if (!ii_dev->update || (ii_dev->update && ii_dev->update())) + idle_inject_wakeup(ii_dev); + duration_us = READ_ONCE(ii_dev->run_duration_us); duration_us += READ_ONCE(ii_dev->idle_duration_us); - idle_inject_wakeup(ii_dev); - hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC)); return HRTIMER_RESTART; @@ -295,17 +312,22 @@ static int idle_inject_should_run(unsigned int cpu) } /** - * idle_inject_register - initialize idle injection on a set of CPUs + * idle_inject_register_full - initialize idle injection on a set of CPUs * @cpumask: CPUs to be affected by idle injection + * @update: This callback is called just before waking up CPUs to inject + * idle * * This function creates an idle injection control device structure for the - * given set of CPUs and initializes the timer associated with it. It does not - * start any injection cycles. + * given set of CPUs and initializes the timer associated with it. This + * function also allows to register update()callback. + * It does not start any injection cycles. * * Return: NULL if memory allocation fails, idle injection control device * pointer on success. */ -struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) + +struct idle_inject_device *idle_inject_register_full(struct cpumask *cpumask, + bool (*update)(void)) { struct idle_inject_device *ii_dev; int cpu, cpu_rb; @@ -318,6 +340,7 @@ struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ii_dev->timer.function = idle_inject_timer_fn; ii_dev->latency_us = UINT_MAX; + ii_dev->update = update; for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { @@ -342,6 +365,23 @@ out_rollback: return NULL; } +EXPORT_SYMBOL_NS_GPL(idle_inject_register_full, IDLE_INJECT); + +/** + * idle_inject_register - initialize idle injection on a set of CPUs + * @cpumask: CPUs to be affected by idle injection + * + * This function creates an idle injection control device structure for the + * given set of CPUs and initializes the timer associated with it. It does not + * start any injection cycles. + * + * Return: NULL if memory allocation fails, idle injection control device + * pointer on success. + */ +struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) +{ + return idle_inject_register_full(cpumask, NULL); +} EXPORT_SYMBOL_NS_GPL(idle_inject_register, IDLE_INJECT); /** diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h index fb88e23a99d3..a85d5dd40f72 100644 --- a/include/linux/idle_inject.h +++ b/include/linux/idle_inject.h @@ -13,6 +13,9 @@ struct idle_inject_device; struct idle_inject_device *idle_inject_register(struct cpumask *cpumask); +struct idle_inject_device *idle_inject_register_full(struct cpumask *cpumask, + bool (*update)(void)); + void idle_inject_unregister(struct idle_inject_device *ii_dev); int idle_inject_start(struct idle_inject_device *ii_dev); -- cgit v1.2.3 From 8526eb7fc75abcd09d8bd061610215baf0ca948a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 10:28:53 -0800 Subject: thermal: intel: powerclamp: Use powercap idle-inject feature There are two idle injection implementation in the Linux kernel. One via intel_powerclamp and the other using powercap/idle_inject. Both implementation end up in calling play_idle* function from a FIFO priority thread. Both can't be used at the same time. It is better to use one idle injection framework for better maintainability. In this way, there is only one caller for play_idle. Here powercap/idle_inject can be used for both per-core and for system wide idle injection. This framework has a well defined interface which allow registry for per-core or for all CPUs (system wide). This reduces code complexity in the intel powerclamp driver as all the per CPU kthreads, delayed work and calls to play_idle can be removed. The changes include: - Remove unneeded include files - Remove per CPU kthread workers: balancing_work and idle_injection_work. - Reuse the compensation related code by moving from previous worker thread to idle_injection callback. - Adjust the idle_duration and runtime by using powercap/idle_inject interface. - Remove all variables, which are not required once powercap/idle_inject is used. - Add mutex to avoid race during removal of idle injection during module unload and user action to change idle inject percent. Also for protection during dynamic adjustment of run and idle time from update() callback. - Remove online/offline callbacks to designate control CPU - Use cpu_present_mask global variable for CPU mask - Remove hot plug locks Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/Kconfig | 3 + drivers/thermal/intel/intel_powerclamp.c | 379 +++++++++++++------------------ 2 files changed, 159 insertions(+), 223 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig index e50fd260484a..b5808f92702d 100644 --- a/drivers/thermal/intel/Kconfig +++ b/drivers/thermal/intel/Kconfig @@ -3,6 +3,9 @@ config INTEL_POWERCLAMP tristate "Intel PowerClamp idle injection driver" depends on X86 depends on CPU_SUP_INTEL + depends on CPU_IDLE + select POWERCAP + select IDLE_INJECT help Enable this to enable Intel PowerClamp idle injection driver. This enforce idle time which results in more package C-state residency. The diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 2f4cbfdf26a0..209bb5f5ae17 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -2,7 +2,7 @@ /* * intel_powerclamp.c - package c-state idle injection * - * Copyright (c) 2012, Intel Corporation. + * Copyright (c) 2012-2023, Intel Corporation. * * Authors: * Arjan van de Ven @@ -27,21 +27,15 @@ #include #include #include -#include #include #include -#include -#include #include #include -#include -#include +#include -#include #include #include #include -#include #define MAX_TARGET_RATIO (50U) /* For each undisturbed clamping period (no extra wake ups during idle time), @@ -59,35 +53,26 @@ static unsigned int target_mwait; static struct dentry *debug_dir; static bool poll_pkg_cstate_enable; -/* user selected target */ -static unsigned int set_target_ratio; +/* Idle ratio observed using package C-state counters */ static unsigned int current_ratio; -static bool should_skip; -static unsigned int control_cpu; /* The cpu assigned to collect stat and update - * control parameters. default to BSP but BSP - * can be offlined. - */ -static bool clamping; +/* Skip the idle injection till set to true */ +static bool should_skip; -struct powerclamp_worker_data { - struct kthread_worker *worker; - struct kthread_work balancing_work; - struct kthread_delayed_work idle_injection_work; +struct powerclamp_data { unsigned int cpu; unsigned int count; unsigned int guard; unsigned int window_size_now; unsigned int target_ratio; - unsigned int duration_jiffies; bool clamping; }; -static struct powerclamp_worker_data __percpu *worker_data; +static struct powerclamp_data powerclamp_data; + static struct thermal_cooling_device *cooling_dev; -static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu - * clamping kthread worker - */ + +static DEFINE_MUTEX(powerclamp_lock); static unsigned int duration; static unsigned int pkg_cstate_ratio_cur; @@ -306,7 +291,7 @@ static void adjust_compensation(int target_ratio, unsigned int win) if (d->confidence >= CONFIDENCE_OK) return; - delta = set_target_ratio - current_ratio; + delta = powerclamp_data.target_ratio - current_ratio; /* filter out bad data */ if (delta >= 0 && delta <= (1+target_ratio/10)) { if (d->steady_comp) @@ -345,82 +330,39 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio, adjust_compensation(target_ratio, win); /* if we are above target+guard, skip */ - return set_target_ratio + guard <= current_ratio; + return powerclamp_data.target_ratio + guard <= current_ratio; } -static void clamp_balancing_func(struct kthread_work *work) +/* + * This function calculates runtime from the current target ratio. + * This function gets called under powerclamp_lock. + */ +static unsigned int get_run_time(void) { - struct powerclamp_worker_data *w_data; - int sleeptime; - unsigned long target_jiffies; unsigned int compensated_ratio; - int interval; /* jiffies to sleep for each attempt */ - - w_data = container_of(work, struct powerclamp_worker_data, - balancing_work); + unsigned int runtime; /* * make sure user selected ratio does not take effect until * the next round. adjust target_ratio if user has changed * target such that we can converge quickly. */ - w_data->target_ratio = READ_ONCE(set_target_ratio); - w_data->guard = 1 + w_data->target_ratio / 20; - w_data->window_size_now = window_size; - w_data->duration_jiffies = msecs_to_jiffies(duration); - w_data->count++; + powerclamp_data.guard = 1 + powerclamp_data.target_ratio / 20; + powerclamp_data.window_size_now = window_size; /* * systems may have different ability to enter package level * c-states, thus we need to compensate the injected idle ratio * to achieve the actual target reported by the HW. */ - compensated_ratio = w_data->target_ratio + - get_compensation(w_data->target_ratio); + compensated_ratio = powerclamp_data.target_ratio + + get_compensation(powerclamp_data.target_ratio); if (compensated_ratio <= 0) compensated_ratio = 1; - interval = w_data->duration_jiffies * 100 / compensated_ratio; - - /* align idle time */ - target_jiffies = roundup(jiffies, interval); - sleeptime = target_jiffies - jiffies; - if (sleeptime <= 0) - sleeptime = 1; - - if (clamping && w_data->clamping && cpu_online(w_data->cpu)) - kthread_queue_delayed_work(w_data->worker, - &w_data->idle_injection_work, - sleeptime); -} -static void clamp_idle_injection_func(struct kthread_work *work) -{ - struct powerclamp_worker_data *w_data; + runtime = duration * 100 / compensated_ratio - duration; - w_data = container_of(work, struct powerclamp_worker_data, - idle_injection_work.work); - - /* - * only elected controlling cpu can collect stats and update - * control parameters. - */ - if (w_data->cpu == control_cpu && - !(w_data->count % w_data->window_size_now)) { - should_skip = - powerclamp_adjust_controls(w_data->target_ratio, - w_data->guard, - w_data->window_size_now); - smp_mb(); - } - - if (should_skip) - goto balance; - - play_idle(jiffies_to_usecs(w_data->duration_jiffies)); - -balance: - if (clamping && w_data->clamping && cpu_online(w_data->cpu)) - kthread_queue_work(w_data->worker, &w_data->balancing_work); + return runtime; } /* @@ -456,127 +398,135 @@ static void poll_pkg_cstate(struct work_struct *dummy) msr_last = msr_now; tsc_last = tsc_now; - if (true == clamping) + mutex_lock(&powerclamp_lock); + if (powerclamp_data.clamping) schedule_delayed_work(&poll_pkg_cstate_work, HZ); + mutex_unlock(&powerclamp_lock); } -static void start_power_clamp_worker(unsigned long cpu) -{ - struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); - struct kthread_worker *worker; - - worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu); - if (IS_ERR(worker)) - return; - - w_data->worker = worker; - w_data->count = 0; - w_data->cpu = cpu; - w_data->clamping = true; - set_bit(cpu, cpu_clamping_mask); - sched_set_fifo(worker->task); - kthread_init_work(&w_data->balancing_work, clamp_balancing_func); - kthread_init_delayed_work(&w_data->idle_injection_work, - clamp_idle_injection_func); - kthread_queue_work(w_data->worker, &w_data->balancing_work); -} +static struct idle_inject_device *ii_dev; -static void stop_power_clamp_worker(unsigned long cpu) +/* + * This function is called from idle injection core on timer expiry + * for the run duration. This allows powerclamp to readjust or skip + * injecting idle for this cycle. + */ +static bool idle_inject_update(void) { - struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); + bool update = false; - if (!w_data->worker) - return; + /* We can't sleep in this callback */ + if (!mutex_trylock(&powerclamp_lock)) + return true; - w_data->clamping = false; - /* - * Make sure that all works that get queued after this point see - * the clamping disabled. The counter part is not needed because - * there is an implicit memory barrier when the queued work - * is proceed. - */ - smp_wmb(); - kthread_cancel_work_sync(&w_data->balancing_work); - kthread_cancel_delayed_work_sync(&w_data->idle_injection_work); - /* - * The balancing work still might be queued here because - * the handling of the "clapming" variable, cancel, and queue - * operations are not synchronized via a lock. But it is not - * a big deal. The balancing work is fast and destroy kthread - * will wait for it. - */ - clear_bit(w_data->cpu, cpu_clamping_mask); - kthread_destroy_worker(w_data->worker); + if (!(powerclamp_data.count % powerclamp_data.window_size_now)) { - w_data->worker = NULL; -} + should_skip = powerclamp_adjust_controls(powerclamp_data.target_ratio, + powerclamp_data.guard, + powerclamp_data.window_size_now); + update = true; + } -static int start_power_clamp(void) -{ - unsigned long cpu; + if (update) { + unsigned int runtime = get_run_time(); - set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); - /* prevent cpu hotplug */ - cpus_read_lock(); + idle_inject_set_duration(ii_dev, runtime, duration); + } - /* prefer BSP */ - control_cpu = cpumask_first(cpu_online_mask); + powerclamp_data.count++; - clamping = true; - if (poll_pkg_cstate_enable) - schedule_delayed_work(&poll_pkg_cstate_work, 0); + mutex_unlock(&powerclamp_lock); - /* start one kthread worker per online cpu */ - for_each_online_cpu(cpu) { - start_power_clamp_worker(cpu); - } - cpus_read_unlock(); + if (should_skip) + return false; - return 0; + return true; } -static void end_power_clamp(void) +/* This function starts idle injection by calling idle_inject_start() */ +static void trigger_idle_injection(void) { - int i; + unsigned int runtime = get_run_time(); + idle_inject_set_duration(ii_dev, runtime, duration); + idle_inject_start(ii_dev); + powerclamp_data.clamping = true; +} + +/* + * This function is called from start_power_clamp() to register + * CPUS with powercap idle injection register and set default + * idle duration and latency. + */ +static int powerclamp_idle_injection_register(void) +{ /* - * Block requeuing in all the kthread workers. They will flush and - * stop faster. + * The idle inject core will only inject for online CPUs, + * So we can register for all present CPUs. In this way + * if some CPU goes online/offline while idle inject + * is registered, nothing additional calls are required. + * The same runtime and idle time is applicable for + * newly onlined CPUs if any. + * + * Here cpu_present_mask can be used as is. + * cast to (struct cpumask *) is required as the + * cpu_present_mask is const struct cpumask *, otherwise + * there will be compiler warnings. */ - clamping = false; - for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { - pr_debug("clamping worker for cpu %d alive, destroy\n", i); - stop_power_clamp_worker(i); + ii_dev = idle_inject_register_full((struct cpumask *)cpu_present_mask, + idle_inject_update); + if (!ii_dev) { + pr_err("powerclamp: idle_inject_register failed\n"); + return -EAGAIN; } + + idle_inject_set_duration(ii_dev, TICK_USEC, duration); + idle_inject_set_latency(ii_dev, UINT_MAX); + + return 0; } -static int powerclamp_cpu_online(unsigned int cpu) +/* + * This function is called from end_power_clamp() to stop idle injection + * and unregister CPUS from powercap idle injection core. + */ +static void remove_idle_injection(void) { - if (clamping == false) - return 0; - start_power_clamp_worker(cpu); - /* prefer BSP as controlling CPU */ - if (cpu == 0) { - control_cpu = 0; - smp_mb(); - } - return 0; + if (!powerclamp_data.clamping) + return; + + powerclamp_data.clamping = false; + idle_inject_stop(ii_dev); } -static int powerclamp_cpu_predown(unsigned int cpu) +/* + * This function is called when user change the cooling device + * state from zero to some other value. + */ +static int start_power_clamp(void) { - if (clamping == false) - return 0; + int ret; - stop_power_clamp_worker(cpu); - if (cpu != control_cpu) - return 0; + ret = powerclamp_idle_injection_register(); + if (!ret) { + trigger_idle_injection(); + if (poll_pkg_cstate_enable) + schedule_delayed_work(&poll_pkg_cstate_work, 0); + } - control_cpu = cpumask_first(cpu_online_mask); - if (control_cpu == cpu) - control_cpu = cpumask_next(cpu, cpu_online_mask); - smp_mb(); - return 0; + return ret; +} + +/* + * This function is called when user change the cooling device + * state from non zero value zero. + */ +static void end_power_clamp(void) +{ + if (powerclamp_data.clamping) { + remove_idle_injection(); + idle_inject_unregister(ii_dev); + } } static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, @@ -590,16 +540,20 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { - if (clamping) { + mutex_lock(&powerclamp_lock); + + if (powerclamp_data.clamping) { if (poll_pkg_cstate_enable) *state = pkg_cstate_ratio_cur; else - *state = set_target_ratio; + *state = powerclamp_data.target_ratio; } else { /* to save power, do not poll idle ratio while not clamping */ *state = -1; /* indicates invalid state */ } + mutex_unlock(&powerclamp_lock); + return 0; } @@ -608,24 +562,32 @@ static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, { int ret = 0; + mutex_lock(&powerclamp_lock); + new_target_ratio = clamp(new_target_ratio, 0UL, - (unsigned long) (MAX_TARGET_RATIO-1)); - if (set_target_ratio == 0 && new_target_ratio > 0) { + (unsigned long) (MAX_TARGET_RATIO - 1)); + if (!powerclamp_data.target_ratio && new_target_ratio > 0) { pr_info("Start idle injection to reduce power\n"); - set_target_ratio = new_target_ratio; + powerclamp_data.target_ratio = new_target_ratio; ret = start_power_clamp(); + if (ret) + powerclamp_data.target_ratio = 0; goto exit_set; - } else if (set_target_ratio > 0 && new_target_ratio == 0) { + } else if (powerclamp_data.target_ratio > 0 && new_target_ratio == 0) { pr_info("Stop forced idle injection\n"); end_power_clamp(); - set_target_ratio = 0; + powerclamp_data.target_ratio = 0; } else /* adjust currently running */ { - set_target_ratio = new_target_ratio; - /* make new set_target_ratio visible to other cpus */ - smp_mb(); + unsigned int runtime; + + powerclamp_data.target_ratio = new_target_ratio; + runtime = get_run_time(); + idle_inject_set_duration(ii_dev, runtime, duration); } exit_set: + mutex_unlock(&powerclamp_lock); + return ret; } @@ -666,7 +628,6 @@ static int powerclamp_debug_show(struct seq_file *m, void *unused) { int i = 0; - seq_printf(m, "controlling cpu: %d\n", control_cpu); seq_printf(m, "pct confidence steady dynamic (compensation)\n"); for (i = 0; i < MAX_TARGET_RATIO; i++) { seq_printf(m, "%d\t%lu\t%lu\t%lu\n", @@ -689,78 +650,50 @@ static inline void powerclamp_create_debug_files(void) &powerclamp_debug_fops); } -static enum cpuhp_state hp_state; - static int __init powerclamp_init(void) { int retval; - cpu_clamping_mask = bitmap_zalloc(num_possible_cpus(), GFP_KERNEL); - if (!cpu_clamping_mask) - return -ENOMEM; - /* probe cpu features and ids here */ retval = powerclamp_probe(); if (retval) - goto exit_free; + return retval; /* set default limit, maybe adjusted during runtime based on feedback */ window_size = 2; - retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "thermal/intel_powerclamp:online", - powerclamp_cpu_online, - powerclamp_cpu_predown); - if (retval < 0) - goto exit_free; - - hp_state = retval; - - worker_data = alloc_percpu(struct powerclamp_worker_data); - if (!worker_data) { - retval = -ENOMEM; - goto exit_unregister; - } if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) poll_pkg_cstate_enable = true; cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, - &powerclamp_cooling_ops); - if (IS_ERR(cooling_dev)) { - retval = -ENODEV; - goto exit_free_thread; - } + &powerclamp_cooling_ops); + if (IS_ERR(cooling_dev)) + return -ENODEV; if (!duration) - duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES); + duration = jiffies_to_usecs(DEFAULT_DURATION_JIFFIES); powerclamp_create_debug_files(); return 0; - -exit_free_thread: - free_percpu(worker_data); -exit_unregister: - cpuhp_remove_state_nocalls(hp_state); -exit_free: - bitmap_free(cpu_clamping_mask); - return retval; } module_init(powerclamp_init); static void __exit powerclamp_exit(void) { + mutex_lock(&powerclamp_lock); end_power_clamp(); - cpuhp_remove_state_nocalls(hp_state); - free_percpu(worker_data); + mutex_unlock(&powerclamp_lock); + thermal_cooling_device_unregister(cooling_dev); - bitmap_free(cpu_clamping_mask); cancel_delayed_work_sync(&poll_pkg_cstate_work); debugfs_remove_recursive(debug_dir); } module_exit(powerclamp_exit); +MODULE_IMPORT_NS(IDLE_INJECT); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arjan van de Ven "); MODULE_AUTHOR("Jacob Pan "); -- cgit v1.2.3 From 72ffc28f2fe8bce4e5b682caedf7a26c4998c756 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 1 Feb 2023 23:36:16 +0100 Subject: thermal: intel: quark_dts: Use generic trip points Make the intel_quark_dts_thermal driver register an array of generic trip points along with the thermal zone and drop the trip points thermal zone callbacks that are not used any more from it. Signed-off-by: Daniel Lezcano [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_quark_dts_thermal.c | 55 +++++++++---------------- 1 file changed, 20 insertions(+), 35 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_quark_dts_thermal.c b/drivers/thermal/intel/intel_quark_dts_thermal.c index 3eafc6b0e6c3..97b843fa7568 100644 --- a/drivers/thermal/intel/intel_quark_dts_thermal.c +++ b/drivers/thermal/intel/intel_quark_dts_thermal.c @@ -84,6 +84,7 @@ #define QRK_DTS_MASK_TP_THRES 0xFF #define QRK_DTS_SHIFT_TP 8 #define QRK_DTS_ID_TP_CRITICAL 0 +#define QRK_DTS_ID_TP_HOT 1 #define QRK_DTS_SAFE_TP_THRES 105 /* Thermal Sensor Register Lock */ @@ -104,6 +105,7 @@ struct soc_sensor_entry { u32 store_ptps; u32 store_dts_enable; struct thermal_zone_device *tzone; + struct thermal_trip trips[QRK_MAX_DTS_TRIPS]; }; static struct soc_sensor_entry *soc_dts; @@ -172,9 +174,9 @@ static int soc_dts_disable(struct thermal_zone_device *tzd) return ret; } -static int _get_trip_temp(int trip, int *temp) +static int get_trip_temp(int trip) { - int status; + int status, temp; u32 out; mutex_lock(&dts_update_mutex); @@ -183,7 +185,7 @@ static int _get_trip_temp(int trip, int *temp) mutex_unlock(&dts_update_mutex); if (status) - return status; + return THERMAL_TEMP_INVALID; /* * Thermal Sensor Programmable Trip Point Register has 8-bit @@ -191,21 +193,10 @@ static int _get_trip_temp(int trip, int *temp) * thresholds. The threshold value is always offset by its * temperature base (50 degree Celsius). */ - *temp = (out >> (trip * QRK_DTS_SHIFT_TP)) & QRK_DTS_MASK_TP_THRES; - *temp -= QRK_DTS_TEMP_BASE; + temp = (out >> (trip * QRK_DTS_SHIFT_TP)) & QRK_DTS_MASK_TP_THRES; + temp -= QRK_DTS_TEMP_BASE; - return 0; -} - -static inline int sys_get_trip_temp(struct thermal_zone_device *tzd, - int trip, int *temp) -{ - return _get_trip_temp(trip, temp); -} - -static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, int *temp) -{ - return _get_trip_temp(QRK_DTS_ID_TP_CRITICAL, temp); + return temp; } static int update_trip_temp(struct soc_sensor_entry *aux_entry, @@ -262,17 +253,6 @@ static inline int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, return update_trip_temp(tzd->devdata, trip, temp); } -static int sys_get_trip_type(struct thermal_zone_device *thermal, - int trip, enum thermal_trip_type *type) -{ - if (trip) - *type = THERMAL_TRIP_HOT; - else - *type = THERMAL_TRIP_CRITICAL; - - return 0; -} - static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp) { @@ -315,10 +295,7 @@ static int sys_change_mode(struct thermal_zone_device *tzd, static struct thermal_zone_device_ops tzone_ops = { .get_temp = sys_get_curr_temp, - .get_trip_temp = sys_get_trip_temp, - .get_trip_type = sys_get_trip_type, .set_trip_temp = sys_set_trip_temp, - .get_crit_temp = sys_get_crit_temp, .change_mode = sys_change_mode, }; @@ -385,10 +362,18 @@ static struct soc_sensor_entry *alloc_soc_dts(void) goto err_ret; } - aux_entry->tzone = thermal_zone_device_register("quark_dts", - QRK_MAX_DTS_TRIPS, - wr_mask, - aux_entry, &tzone_ops, NULL, 0, polling_delay); + aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = get_trip_temp(QRK_DTS_ID_TP_CRITICAL); + aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL; + + aux_entry->trips[QRK_DTS_ID_TP_HOT].temperature = get_trip_temp(QRK_DTS_ID_TP_HOT); + aux_entry->trips[QRK_DTS_ID_TP_HOT].type = THERMAL_TRIP_HOT; + + aux_entry->tzone = thermal_zone_device_register_with_trips("quark_dts", + aux_entry->trips, + QRK_MAX_DTS_TRIPS, + wr_mask, + aux_entry, &tzone_ops, + NULL, 0, polling_delay); if (IS_ERR(aux_entry->tzone)) { err = PTR_ERR(aux_entry->tzone); goto err_ret; -- cgit v1.2.3 From 621084965459d2b9e2713844aac4d803d5bb6d67 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 3 Feb 2023 20:29:02 -0800 Subject: thermal: intel: powerclamp: Return last requested state as cur_state When the user is reading cur_state from the thermal cooling device for Intel powerclamp device: - It returns the idle ratio from Package C-state counters when there is active idle injection session. - -1, when there is no active idle injection session. This information is not very useful as the package C-state counters vary a lot from read to read. Instead just return the last requested cur_state. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 209bb5f5ae17..1390748706a6 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -541,17 +541,7 @@ static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { mutex_lock(&powerclamp_lock); - - if (powerclamp_data.clamping) { - if (poll_pkg_cstate_enable) - *state = pkg_cstate_ratio_cur; - else - *state = powerclamp_data.target_ratio; - } else { - /* to save power, do not poll idle ratio while not clamping */ - *state = -1; /* indicates invalid state */ - } - + *state = powerclamp_data.target_ratio; mutex_unlock(&powerclamp_lock); return 0; -- cgit v1.2.3 From 966d0ab67350c6206f8053a3e6ed0b892bdc42a5 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Feb 2023 23:09:08 -0800 Subject: thermal: intel: powerclamp: Fix duration module parameter After the switch to use the powercap/idle-inject framework in the Intel powerclamp driver, the idle duration unit is microsecond. However, the module parameter for idle duration is in milliseconds, so convert it to microseconds in the "set" callback and back to milliseconds in a new "get" callback. While here, also use mutex protection for setting and getting "duration". The other uses of "duration" are already protected by the mutex. Fixes: 8526eb7fc75a ("thermal: intel: powerclamp: Use powercap idle-inject feature") Signed-off-by: Srinivas Pandruvada [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 1390748706a6..5995d10b5699 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -74,6 +74,7 @@ static struct thermal_cooling_device *cooling_dev; static DEFINE_MUTEX(powerclamp_lock); +/* This duration is in microseconds */ static unsigned int duration; static unsigned int pkg_cstate_ratio_cur; static unsigned int window_size; @@ -90,23 +91,34 @@ static int duration_set(const char *arg, const struct kernel_param *kp) pr_err("Out of recommended range %lu, between 6-25ms\n", new_duration); ret = -EINVAL; + goto exit; } - duration = clamp(new_duration, 6ul, 25ul); - smp_mb(); - + mutex_lock(&powerclamp_lock); + duration = clamp(new_duration, 6ul, 25ul) * 1000; + mutex_unlock(&powerclamp_lock); exit: return ret; } +static int duration_get(char *buf, const struct kernel_param *kp) +{ + int ret; + + mutex_lock(&powerclamp_lock); + ret = sysfs_emit(buf, "%d\n", duration / 1000); + mutex_unlock(&powerclamp_lock); + + return ret; +} + static const struct kernel_param_ops duration_ops = { .set = duration_set, - .get = param_get_int, + .get = duration_get, }; - -module_param_cb(duration, &duration_ops, &duration, 0644); +module_param_cb(duration, &duration_ops, NULL, 0644); MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec."); struct powerclamp_calibration_data { -- cgit v1.2.3 From ebf519710218814cf827adbf9111af081344c969 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Feb 2023 09:35:34 -0800 Subject: thermal: intel: powerclamp: Add two module parameters In some use cases, it is desirable to only inject idle on certain set of CPUs. For example on Alder Lake systems, it is possible that we force idle only on P-Cores for thermal reasons. Also the idle percent can be more than 50% if we only choose partial set of CPUs in the system. Introduce 2 new module parameters for this purpose. They can be only changed when the cooling device is inactive. cpumask (Read/Write): A bit mask of CPUs to inject idle. The format of this bitmask is same as used in other subsystems like in /proc/irq/*/smp_affinity. The mask is comma separated 32 bit groups. Each CPU is one bit. For example for 256 CPU system the full mask is: ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff The rightmost mask is for CPU 0-32. max_idle (Read/Write): Maximum injected idle time to the total CPU time ratio in percent range from 1 to 100. Even if the cooling device max_state is always 100 (100%), this parameter allows to add a max idle percent limit. The default is 50, to match the current implementation of powerclamp driver. Also doesn't allow value more than 75, if the cpumask includes every CPU present in the system. Also when the cpumask doesn't include every CPU, there is no use of compensation using package C-state idle counters. Hence don't start package C-state polling thread even for a single package or a single die system in this case. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- .../admin-guide/thermal/intel_powerclamp.rst | 22 +++ drivers/thermal/intel/intel_powerclamp.c | 176 ++++++++++++++++++--- 2 files changed, 178 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/Documentation/admin-guide/thermal/intel_powerclamp.rst b/Documentation/admin-guide/thermal/intel_powerclamp.rst index 3f6dfb0b3ea6..2d9d2d739f02 100644 --- a/Documentation/admin-guide/thermal/intel_powerclamp.rst +++ b/Documentation/admin-guide/thermal/intel_powerclamp.rst @@ -26,6 +26,8 @@ By: - Generic Thermal Layer (sysfs) - Kernel APIs (TBD) + (*) Module Parameters + INTRODUCTION ============ @@ -318,3 +320,23 @@ device, a PID based userspace thermal controller can manage to control CPU temperature effectively, when no other thermal influence is added. For example, a UltraBook user can compile the kernel under certain temperature (below most active trip points). + +Module Parameters +================= + +``cpumask`` (RW) + A bit mask of CPUs to inject idle. The format of the bitmask is same as + used in other subsystems like in /proc/irq/*/smp_affinity. The mask is + comma separated 32 bit groups. Each CPU is one bit. For example for a 256 + CPU system the full mask is: + ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff + + The rightmost mask is for CPU 0-32. + +``max_idle`` (RW) + Maximum injected idle time to the total CPU time ratio in percent range + from 1 to 100. Even if the cooling device max_state is always 100 (100%), + this parameter allows to add a max idle percent limit. The default is 50, + to match the current implementation of powerclamp driver. Also doesn't + allow value more than 75, if the cpumask includes every CPU present in + the system. diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 5995d10b5699..c7ba5680cd48 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -37,7 +37,7 @@ #include #include -#define MAX_TARGET_RATIO (50U) +#define MAX_TARGET_RATIO (100U) /* For each undisturbed clamping period (no extra wake ups during idle time), * we increment the confidence counter for the given target ratio. * CONFIDENCE_OK defines the level where runtime calibration results are @@ -121,6 +121,141 @@ static const struct kernel_param_ops duration_ops = { module_param_cb(duration, &duration_ops, NULL, 0644); MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec."); +#define DEFAULT_MAX_IDLE 50 +#define MAX_ALL_CPU_IDLE 75 + +static u8 max_idle = DEFAULT_MAX_IDLE; + +static cpumask_var_t idle_injection_cpu_mask; + +static int allocate_copy_idle_injection_mask(const struct cpumask *copy_mask) +{ + if (cpumask_available(idle_injection_cpu_mask)) + goto copy_mask; + + /* This mask is allocated only one time and freed during module exit */ + if (!alloc_cpumask_var(&idle_injection_cpu_mask, GFP_KERNEL)) + return -ENOMEM; + +copy_mask: + cpumask_copy(idle_injection_cpu_mask, copy_mask); + + return 0; +} + +/* Return true if the cpumask and idle percent combination is invalid */ +static bool check_invalid(cpumask_var_t mask, u8 idle) +{ + if (cpumask_equal(cpu_present_mask, mask) && idle > MAX_ALL_CPU_IDLE) + return true; + + return false; +} + +static int cpumask_set(const char *arg, const struct kernel_param *kp) +{ + cpumask_var_t new_mask; + int ret; + + mutex_lock(&powerclamp_lock); + + /* Can't set mask when cooling device is in use */ + if (powerclamp_data.clamping) { + ret = -EAGAIN; + goto skip_cpumask_set; + } + + ret = alloc_cpumask_var(&new_mask, GFP_KERNEL); + if (!ret) + goto skip_cpumask_set; + + ret = bitmap_parse(arg, strlen(arg), cpumask_bits(new_mask), + nr_cpumask_bits); + if (ret) + goto free_cpumask_set; + + if (cpumask_empty(new_mask) || check_invalid(new_mask, max_idle)) { + ret = -EINVAL; + goto free_cpumask_set; + } + + /* + * When module parameters are passed from kernel command line + * during insmod, the module parameter callback is called + * before powerclamp_init(), so we can't assume that some + * cpumask can be allocated and copied before here. Also + * in this case this cpumask is used as the default mask. + */ + ret = allocate_copy_idle_injection_mask(new_mask); + +free_cpumask_set: + free_cpumask_var(new_mask); +skip_cpumask_set: + mutex_unlock(&powerclamp_lock); + + return ret; +} + +static int cpumask_get(char *buf, const struct kernel_param *kp) +{ + if (!cpumask_available(idle_injection_cpu_mask)) + return -ENODEV; + + return bitmap_print_to_pagebuf(false, buf, cpumask_bits(idle_injection_cpu_mask), + nr_cpumask_bits); +} + +static const struct kernel_param_ops cpumask_ops = { + .set = cpumask_set, + .get = cpumask_get, +}; + +module_param_cb(cpumask, &cpumask_ops, NULL, 0644); +MODULE_PARM_DESC(cpumask, "Mask of CPUs to use for idle injection."); + +static int max_idle_set(const char *arg, const struct kernel_param *kp) +{ + u8 new_max_idle; + int ret = 0; + + mutex_lock(&powerclamp_lock); + + /* Can't set mask when cooling device is in use */ + if (powerclamp_data.clamping) { + ret = -EAGAIN; + goto skip_limit_set; + } + + ret = kstrtou8(arg, 10, &new_max_idle); + if (ret) + goto skip_limit_set; + + if (new_max_idle > MAX_TARGET_RATIO) { + ret = -EINVAL; + goto skip_limit_set; + } + + if (check_invalid(idle_injection_cpu_mask, new_max_idle)) { + ret = -EINVAL; + goto skip_limit_set; + } + + max_idle = new_max_idle; + +skip_limit_set: + mutex_unlock(&powerclamp_lock); + + return ret; +} + +static const struct kernel_param_ops max_idle_ops = { + .set = max_idle_set, + .get = param_get_int, +}; + +module_param_cb(max_idle, &max_idle_ops, &max_idle, 0644); +MODULE_PARM_DESC(max_idle, "maximum injected idle time to the total CPU time ratio in percent range:1-100"); + struct powerclamp_calibration_data { unsigned long confidence; /* used for calibration, basically a counter * gets incremented each time a clamping @@ -472,21 +607,15 @@ static void trigger_idle_injection(void) */ static int powerclamp_idle_injection_register(void) { - /* - * The idle inject core will only inject for online CPUs, - * So we can register for all present CPUs. In this way - * if some CPU goes online/offline while idle inject - * is registered, nothing additional calls are required. - * The same runtime and idle time is applicable for - * newly onlined CPUs if any. - * - * Here cpu_present_mask can be used as is. - * cast to (struct cpumask *) is required as the - * cpu_present_mask is const struct cpumask *, otherwise - * there will be compiler warnings. - */ - ii_dev = idle_inject_register_full((struct cpumask *)cpu_present_mask, - idle_inject_update); + poll_pkg_cstate_enable = false; + if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) { + ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update); + if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) + poll_pkg_cstate_enable = true; + } else { + ii_dev = idle_inject_register(idle_injection_cpu_mask); + } + if (!ii_dev) { pr_err("powerclamp: idle_inject_register failed\n"); return -EAGAIN; @@ -567,7 +696,7 @@ static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, mutex_lock(&powerclamp_lock); new_target_ratio = clamp(new_target_ratio, 0UL, - (unsigned long) (MAX_TARGET_RATIO - 1)); + (unsigned long) (max_idle - 1)); if (!powerclamp_data.target_ratio && new_target_ratio > 0) { pr_info("Start idle injection to reduce power\n"); powerclamp_data.target_ratio = new_target_ratio; @@ -658,15 +787,19 @@ static int __init powerclamp_init(void) /* probe cpu features and ids here */ retval = powerclamp_probe(); + if (retval) + return retval; + + mutex_lock(&powerclamp_lock); + retval = allocate_copy_idle_injection_mask(cpu_present_mask); + mutex_unlock(&powerclamp_lock); + if (retval) return retval; /* set default limit, maybe adjusted during runtime based on feedback */ window_size = 2; - if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) - poll_pkg_cstate_enable = true; - cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, &powerclamp_cooling_ops); if (IS_ERR(cooling_dev)) @@ -691,6 +824,9 @@ static void __exit powerclamp_exit(void) cancel_delayed_work_sync(&poll_pkg_cstate_work); debugfs_remove_recursive(debug_dir); + + if (cpumask_available(idle_injection_cpu_mask)) + free_cpumask_var(idle_injection_cpu_mask); } module_exit(powerclamp_exit); -- cgit v1.2.3