From dd3b3d160ea7004091051da60e86f36f40970888 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Jan 2023 19:17:03 +0100 Subject: [PATCH 01/25] thermal: ACPI: Make helpers retrieve temperature only It is slightly better to make the ACPI thermal helper functions retrieve the trip point temperature only instead of doing the full trip point initialization, because they are also used for updating some already registered trip points, in which case initializing a new trip just in order to update the temperature of an existing one is somewhat wasteful. Modify the ACPI thermal helpers accordingly and update their users. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano --- .../int340x_thermal/int340x_thermal_zone.c | 40 ++++--- drivers/thermal/intel/intel_pch_thermal.c | 7 +- drivers/thermal/thermal_acpi.c | 108 ++++++------------ include/linux/thermal.h | 9 +- 4 files changed, 70 insertions(+), 94 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index 09b1b51eb6a5..a675f79f792b 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -70,24 +70,35 @@ static int int340x_thermal_read_trips(struct acpi_device *zone_adev, { int i, ret; - ret = thermal_acpi_trip_critical(zone_adev, &zone_trips[trip_cnt]); - if (!ret) + ret = thermal_acpi_critical_trip_temp(zone_adev, + &zone_trips[trip_cnt].temperature); + if (!ret) { + zone_trips[trip_cnt].type = THERMAL_TRIP_CRITICAL; trip_cnt++; + } - ret = thermal_acpi_trip_hot(zone_adev, &zone_trips[trip_cnt]); - if (!ret) + ret = thermal_acpi_hot_trip_temp(zone_adev, + &zone_trips[trip_cnt].temperature); + if (!ret) { + zone_trips[trip_cnt].type = THERMAL_TRIP_HOT; trip_cnt++; + } - ret = thermal_acpi_trip_passive(zone_adev, &zone_trips[trip_cnt]); - if (!ret) + ret = thermal_acpi_passive_trip_temp(zone_adev, + &zone_trips[trip_cnt].temperature); + if (!ret) { + zone_trips[trip_cnt].type = THERMAL_TRIP_PASSIVE; trip_cnt++; + } for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) { - ret = thermal_acpi_trip_active(zone_adev, i, &zone_trips[trip_cnt]); + ret = thermal_acpi_active_trip_temp(zone_adev, i, + &zone_trips[trip_cnt].temperature); if (ret) break; + zone_trips[trip_cnt].type = THERMAL_TRIP_ACTIVE; trip_cnt++; } @@ -213,22 +224,21 @@ void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone) mutex_lock(&int34x_zone->zone->lock); for (i = int34x_zone->aux_trip_nr; i < trip_cnt; i++) { - struct thermal_trip trip; - int err; + int temp, err; switch (zone_trips[i].type) { case THERMAL_TRIP_CRITICAL: - err = thermal_acpi_trip_critical(zone_adev, &trip); + err = thermal_acpi_critical_trip_temp(zone_adev, &temp); break; case THERMAL_TRIP_HOT: - err = thermal_acpi_trip_hot(zone_adev, &trip); + err = thermal_acpi_hot_trip_temp(zone_adev, &temp); break; case THERMAL_TRIP_PASSIVE: - err = thermal_acpi_trip_passive(zone_adev, &trip); + err = thermal_acpi_passive_trip_temp(zone_adev, &temp); break; case THERMAL_TRIP_ACTIVE: - err = thermal_acpi_trip_active(zone_adev, act_trip_nr++, - &trip); + err = thermal_acpi_active_trip_temp(zone_adev, act_trip_nr++, + &temp); break; default: err = -ENODEV; @@ -238,7 +248,7 @@ void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone) continue; } - zone_trips[i].temperature = trip.temperature; + zone_trips[i].temperature = temp; } mutex_unlock(&int34x_zone->zone->lock); diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 45a9ea86907e..c529c05c1853 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -100,16 +100,17 @@ static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int *nr_trips) { struct acpi_device *adev; - int ret; + int temp; adev = ACPI_COMPANION(&ptd->pdev->dev); if (!adev) return; - ret = thermal_acpi_trip_passive(adev, &ptd->trips[*nr_trips]); - if (ret || ptd->trips[*nr_trips].temperature <= 0) + if (thermal_acpi_passive_trip_temp(adev, &temp) || temp <= 0) return; + ptd->trips[*nr_trips].type = THERMAL_TRIP_PASSIVE; + ptd->trips[*nr_trips].temperature = temp; ++(*nr_trips); } #else diff --git a/drivers/thermal/thermal_acpi.c b/drivers/thermal/thermal_acpi.c index 671f774a7621..0e5698818f69 100644 --- a/drivers/thermal/thermal_acpi.c +++ b/drivers/thermal/thermal_acpi.c @@ -21,42 +21,11 @@ #define TEMP_MIN_DECIK 2180 #define TEMP_MAX_DECIK 4480 -static int thermal_acpi_trip_init(struct acpi_device *adev, - enum thermal_trip_type type, int id, - struct thermal_trip *trip) +static int thermal_acpi_trip_temp(struct acpi_device *adev, char *obj_name, + int *ret_temp) { unsigned long long temp; acpi_status status; - char obj_name[5]; - - switch (type) { - case THERMAL_TRIP_ACTIVE: - if (id < 0 || id > 9) - return -EINVAL; - - obj_name[1] = 'A'; - obj_name[2] = 'C'; - obj_name[3] = '0' + id; - break; - case THERMAL_TRIP_PASSIVE: - obj_name[1] = 'P'; - obj_name[2] = 'S'; - obj_name[3] = 'V'; - break; - case THERMAL_TRIP_HOT: - obj_name[1] = 'H'; - obj_name[2] = 'O'; - obj_name[3] = 'T'; - break; - case THERMAL_TRIP_CRITICAL: - obj_name[1] = 'C'; - obj_name[2] = 'R'; - obj_name[3] = 'T'; - break; - } - - obj_name[0] = '_'; - obj_name[4] = '\0'; status = acpi_evaluate_integer(adev->handle, obj_name, NULL, &temp); if (ACPI_FAILURE(status)) { @@ -65,87 +34,84 @@ static int thermal_acpi_trip_init(struct acpi_device *adev, } if (temp >= TEMP_MIN_DECIK && temp <= TEMP_MAX_DECIK) { - trip->temperature = deci_kelvin_to_millicelsius(temp); + *ret_temp = deci_kelvin_to_millicelsius(temp); } else { acpi_handle_debug(adev->handle, "%s result %llu out of range\n", obj_name, temp); - trip->temperature = THERMAL_TEMP_INVALID; + *ret_temp = THERMAL_TEMP_INVALID; } - trip->hysteresis = 0; - trip->type = type; - return 0; } /** - * thermal_acpi_trip_active - Get the specified active trip point - * @adev: Thermal zone ACPI device object to get the description from. + * thermal_acpi_active_trip_temp - Retrieve active trip point temperature + * @adev: Target thermal zone ACPI device object. * @id: Active cooling level (0 - 9). - * @trip: Trip point structure to be populated on success. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _ACx object for the thermal zone represented by @adev to obtain * the temperature of the active cooling trip point corresponding to the active - * cooling level given by @id and initialize @trip as an active trip point using - * that temperature value. + * cooling level given by @id. * * Return 0 on success or a negative error value on failure. */ -int thermal_acpi_trip_active(struct acpi_device *adev, int id, - struct thermal_trip *trip) +int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_ACTIVE, id, trip); + char obj_name[] = {'_', 'A', 'C', '0' + id, '\0'}; + + if (id < 0 || id > 9) + return -EINVAL; + + return thermal_acpi_trip_temp(adev, obj_name, ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_active); +EXPORT_SYMBOL_GPL(thermal_acpi_active_trip_temp); /** - * thermal_acpi_trip_passive - Get the passive trip point - * @adev: Thermal zone ACPI device object to get the description from. - * @trip: Trip point structure to be populated on success. + * thermal_acpi_passive_trip_temp - Retrieve passive trip point temperature + * @adev: Target thermal zone ACPI device object. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _PSV object for the thermal zone represented by @adev to obtain - * the temperature of the passive cooling trip point and initialize @trip as a - * passive trip point using that temperature value. + * the temperature of the passive cooling trip point. * * Return 0 on success or -ENODATA on failure. */ -int thermal_acpi_trip_passive(struct acpi_device *adev, struct thermal_trip *trip) +int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_PASSIVE, INT_MAX, trip); + return thermal_acpi_trip_temp(adev, "_PSV", ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_passive); +EXPORT_SYMBOL_GPL(thermal_acpi_passive_trip_temp); /** - * thermal_acpi_trip_hot - Get the near critical trip point - * @adev: the ACPI device to get the description from. - * @trip: a &struct thermal_trip to be filled if the function succeed. + * thermal_acpi_hot_trip_temp - Retrieve hot trip point temperature + * @adev: Target thermal zone ACPI device object. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _HOT object for the thermal zone represented by @adev to obtain * the temperature of the trip point at which the system is expected to be put - * into the S4 sleep state and initialize @trip as a hot trip point using that - * temperature value. + * into the S4 sleep state. * * Return 0 on success or -ENODATA on failure. */ -int thermal_acpi_trip_hot(struct acpi_device *adev, struct thermal_trip *trip) +int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_HOT, INT_MAX, trip); + return thermal_acpi_trip_temp(adev, "_HOT", ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_hot); +EXPORT_SYMBOL_GPL(thermal_acpi_hot_trip_temp); /** - * thermal_acpi_trip_critical - Get the critical trip point - * @adev: the ACPI device to get the description from. - * @trip: a &struct thermal_trip to be filled if the function succeed. + * thermal_acpi_critical_trip_temp - Retrieve critical trip point temperature + * @adev: Target thermal zone ACPI device object. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _CRT object for the thermal zone represented by @adev to obtain - * the temperature of the critical cooling trip point and initialize @trip as a - * critical trip point using that temperature value. + * the temperature of the critical cooling trip point. * * Return 0 on success or -ENODATA on failure. */ -int thermal_acpi_trip_critical(struct acpi_device *adev, struct thermal_trip *trip) +int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_CRITICAL, INT_MAX, trip); + return thermal_acpi_trip_temp(adev, "_CRT", ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_critical); +EXPORT_SYMBOL_GPL(thermal_acpi_critical_trip_temp); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 23c2617156b5..2bb4bf33f4f3 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -347,11 +347,10 @@ int thermal_zone_get_num_trips(struct thermal_zone_device *tz); int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); #ifdef CONFIG_THERMAL_ACPI -int thermal_acpi_trip_active(struct acpi_device *adev, int id, - struct thermal_trip *trip); -int thermal_acpi_trip_passive(struct acpi_device *adev, struct thermal_trip *trip); -int thermal_acpi_trip_hot(struct acpi_device *adev, struct thermal_trip *trip); -int thermal_acpi_trip_critical(struct acpi_device *adev, struct thermal_trip *trip); +int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp); +int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp); +int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp); +int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); #endif #ifdef CONFIG_THERMAL From be014c789c717a4328f49d58b53170923bc475f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:42:16 +0100 Subject: [PATCH 02/25] thermal: intel: int340x: Assorted minor cleanups Improve some inconsistent usage of white space in int340x_thermal_zone.c, fix up one coding style issue in it (missing braces around an else branch of a conditional) and while at it replace a !ACPI_FAILURE() check with an equivalent ACPI_SUCCESS() one. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- .../intel/int340x_thermal/int340x_thermal_zone.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index a675f79f792b..30c05b436700 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -30,15 +30,16 @@ static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone, return conv_temp; *temp = (unsigned long)conv_temp * 10; - } else + } else { /* _TMP returns the temperature in tenths of degrees Kelvin */ *temp = deci_kelvin_to_millicelsius(tmp); + } return 0; } static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, - int trip, int temp) + int trip, int temp) { struct int34x_thermal_zone *d = zone->devdata; acpi_status status; @@ -46,7 +47,7 @@ static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, snprintf(name, sizeof(name), "PAT%d", trip); status = acpi_execute_simple_method(d->adev->handle, name, - millicelsius_to_deci_kelvin(temp)); + millicelsius_to_deci_kelvin(temp)); if (ACPI_FAILURE(status)) return -EIO; @@ -92,7 +93,6 @@ static int int340x_thermal_read_trips(struct acpi_device *zone_adev, } for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) { - ret = thermal_acpi_active_trip_temp(zone_adev, i, &zone_trips[trip_cnt].temperature); if (ret) @@ -121,8 +121,7 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, acpi_status status; int i, ret; - int34x_thermal_zone = kzalloc(sizeof(*int34x_thermal_zone), - GFP_KERNEL); + int34x_thermal_zone = kzalloc(sizeof(*int34x_thermal_zone), GFP_KERNEL); if (!int34x_thermal_zone) return ERR_PTR(-ENOMEM); @@ -139,7 +138,7 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, int34x_thermal_zone->ops->get_temp = get_temp; status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt); - if (!ACPI_FAILURE(status)) { + if (ACPI_SUCCESS(status)) { int34x_thermal_zone->aux_trip_nr = trip_cnt; trip_mask = BIT(trip_cnt) - 1; } @@ -169,8 +168,7 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, int34x_thermal_zone->trips = zone_trips; - int34x_thermal_zone->lpat_table = acpi_lpat_get_conversion_table( - adev->handle); + int34x_thermal_zone->lpat_table = acpi_lpat_get_conversion_table(adev->handle); int34x_thermal_zone->zone = thermal_zone_device_register_with_trips( acpi_device_bid(adev), From 67c6945867145edda3092c336aa4cf56f9986ed7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:43:32 +0100 Subject: [PATCH 03/25] thermal: intel: int340x: Rename variable in int340x_thermal_zone_add() Rename local variables int34x_thermal_zone in int340x_thermal_zone_add() and int340x_thermal_zone_remove() to int34x_zone which allows a number of code lines to be shorter and easier to read and adjust some white space for consistency. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- .../int340x_thermal/int340x_thermal_zone.c | 67 +++++++++---------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index 30c05b436700..020e681759d8 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -113,7 +113,7 @@ static struct thermal_zone_params int340x_thermal_params = { struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, int (*get_temp) (struct thermal_zone_device *, int *)) { - struct int34x_thermal_zone *int34x_thermal_zone; + struct int34x_thermal_zone *int34x_zone; struct thermal_trip *zone_trips; unsigned long long trip_cnt = 0; unsigned long long hyst; @@ -121,25 +121,25 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, acpi_status status; int i, ret; - int34x_thermal_zone = kzalloc(sizeof(*int34x_thermal_zone), GFP_KERNEL); - if (!int34x_thermal_zone) + int34x_zone = kzalloc(sizeof(*int34x_zone), GFP_KERNEL); + if (!int34x_zone) return ERR_PTR(-ENOMEM); - int34x_thermal_zone->adev = adev; + int34x_zone->adev = adev; - int34x_thermal_zone->ops = kmemdup(&int340x_thermal_zone_ops, - sizeof(int340x_thermal_zone_ops), GFP_KERNEL); - if (!int34x_thermal_zone->ops) { + int34x_zone->ops = kmemdup(&int340x_thermal_zone_ops, + sizeof(int340x_thermal_zone_ops), GFP_KERNEL); + if (!int34x_zone->ops) { ret = -ENOMEM; goto err_ops_alloc; } if (get_temp) - int34x_thermal_zone->ops->get_temp = get_temp; + int34x_zone->ops->get_temp = get_temp; status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt); if (ACPI_SUCCESS(status)) { - int34x_thermal_zone->aux_trip_nr = trip_cnt; + int34x_zone->aux_trip_nr = trip_cnt; trip_mask = BIT(trip_cnt) - 1; } @@ -166,48 +166,47 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, for (i = 0; i < trip_cnt; ++i) zone_trips[i].hysteresis = hyst; - int34x_thermal_zone->trips = zone_trips; + int34x_zone->trips = zone_trips; - int34x_thermal_zone->lpat_table = acpi_lpat_get_conversion_table(adev->handle); + int34x_zone->lpat_table = acpi_lpat_get_conversion_table(adev->handle); - int34x_thermal_zone->zone = thermal_zone_device_register_with_trips( - acpi_device_bid(adev), - zone_trips, trip_cnt, - trip_mask, int34x_thermal_zone, - int34x_thermal_zone->ops, - &int340x_thermal_params, - 0, 0); - if (IS_ERR(int34x_thermal_zone->zone)) { - ret = PTR_ERR(int34x_thermal_zone->zone); + int34x_zone->zone = thermal_zone_device_register_with_trips( + acpi_device_bid(adev), + zone_trips, trip_cnt, + trip_mask, int34x_zone, + int34x_zone->ops, + &int340x_thermal_params, + 0, 0); + if (IS_ERR(int34x_zone->zone)) { + ret = PTR_ERR(int34x_zone->zone); goto err_thermal_zone; } - ret = thermal_zone_device_enable(int34x_thermal_zone->zone); + ret = thermal_zone_device_enable(int34x_zone->zone); if (ret) goto err_enable; - return int34x_thermal_zone; + return int34x_zone; err_enable: - thermal_zone_device_unregister(int34x_thermal_zone->zone); + thermal_zone_device_unregister(int34x_zone->zone); err_thermal_zone: - kfree(int34x_thermal_zone->trips); - acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); + kfree(int34x_zone->trips); + acpi_lpat_free_conversion_table(int34x_zone->lpat_table); err_trips_alloc: - kfree(int34x_thermal_zone->ops); + kfree(int34x_zone->ops); err_ops_alloc: - kfree(int34x_thermal_zone); + kfree(int34x_zone); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(int340x_thermal_zone_add); -void int340x_thermal_zone_remove(struct int34x_thermal_zone - *int34x_thermal_zone) +void int340x_thermal_zone_remove(struct int34x_thermal_zone *int34x_zone) { - thermal_zone_device_unregister(int34x_thermal_zone->zone); - acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); - kfree(int34x_thermal_zone->trips); - kfree(int34x_thermal_zone->ops); - kfree(int34x_thermal_zone); + thermal_zone_device_unregister(int34x_zone->zone); + acpi_lpat_free_conversion_table(int34x_zone->lpat_table); + kfree(int34x_zone->trips); + kfree(int34x_zone->ops); + kfree(int34x_zone); } EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove); From d0009d14e9853bb5f553a36df9fb7791deffc594 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:45:17 +0100 Subject: [PATCH 04/25] thermal: intel: int340x: Drop pointless cast to unsigned long The explicit casting from int to unsigned long in int340x_thermal_get_zone_temp() is pointless, becuase the multiplication result is cast back to int by the assignment in the same statement, so drop it. No expected functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index 020e681759d8..0e622bb560af 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -29,7 +29,7 @@ static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone, if (conv_temp < 0) return conv_temp; - *temp = (unsigned long)conv_temp * 10; + *temp = conv_temp * 10; } else { /* _TMP returns the temperature in tenths of degrees Kelvin */ *temp = deci_kelvin_to_millicelsius(tmp); From 1bcebcab887ba4c4d790d7ccb055303c5dc7dbbb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:47:43 +0100 Subject: [PATCH 05/25] thermal: intel: int340x: Improve int340x_thermal_set_trip_temp() Instead of using snprintf() to populate the ACPI object name in int340x_thermal_set_trip_temp(), use an appropriate initializer and make the function fail if its trip argument is greater than 9, because ACPI object names can only be 4 characters long and it does not make sense to even try to evaluate objects with longer names (that argument is guaranteed to be non-negative, because it comes from the thermal code that will not pass negative trip numbers to zone callbacks). Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- .../thermal/intel/int340x_thermal/int340x_thermal_zone.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index 0e622bb560af..00665967ca52 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -42,10 +42,12 @@ static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, int trip, int temp) { struct int34x_thermal_zone *d = zone->devdata; + char name[] = {'P', 'A', 'T', '0' + trip, '\0'}; acpi_status status; - char name[10]; - snprintf(name, sizeof(name), "PAT%d", trip); + if (trip > 9) + return -EINVAL; + status = acpi_execute_simple_method(d->adev->handle, name, millicelsius_to_deci_kelvin(temp)); if (ACPI_FAILURE(status)) From 2cee73568e8d2f9d63793cf84e9aa65c9dfd85e2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:58:32 +0100 Subject: [PATCH 06/25] thermal: intel: intel_pch: Make pch_wpt_add_acpi_psv_trip() return int Modify pch_wpt_add_acpi_psv_trip() to return an int value instead of using a return pointer for that. While at it, drop an excessive empty code line. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index c529c05c1853..e5ce4d03c3c1 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -90,34 +90,31 @@ struct pch_thermal_device { }; #ifdef CONFIG_ACPI - /* * On some platforms, there is a companion ACPI device, which adds * passive trip temperature using _PSV method. There is no specific * passive temperature setting in MMIO interface of this PCI device. */ -static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, - int *nr_trips) +static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) { struct acpi_device *adev; int temp; adev = ACPI_COMPANION(&ptd->pdev->dev); if (!adev) - return; + return 0; if (thermal_acpi_passive_trip_temp(adev, &temp) || temp <= 0) - return; + return 0; - ptd->trips[*nr_trips].type = THERMAL_TRIP_PASSIVE; - ptd->trips[*nr_trips].temperature = temp; - ++(*nr_trips); + ptd->trips[trip].type = THERMAL_TRIP_PASSIVE; + ptd->trips[trip].temperature = temp; + return 1; } #else -static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, - int *nr_trips) +static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) { - + return 0; } #endif @@ -167,7 +164,7 @@ read_trips: ++(*nr_trips); } - pch_wpt_add_acpi_psv_trip(ptd, nr_trips); + *nr_trips += pch_wpt_add_acpi_psv_trip(ptd, *nr_trips); return 0; } From 558718f4d3798fa80a9288addc09240910c07df1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 19:59:48 +0100 Subject: [PATCH 07/25] thermal: intel: intel_pch: Eliminate redundant return pointers Both pch_wpt_init() and pch_wpt_get_temp() can return the proper result via their return values, so they do not need to use return pointers. Modify them accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 40 ++++++++++------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index e5ce4d03c3c1..02bd7a048159 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -118,12 +118,11 @@ static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) } #endif -static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips) +static int pch_wpt_init(struct pch_thermal_device *ptd) { - u8 tsel; + int nr_trips = 0; u16 trip_temp; - - *nr_trips = 0; + u8 tsel; /* Check if BIOS has already enabled thermal sensor */ if (WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL)) { @@ -151,29 +150,23 @@ read_trips: trip_temp = readw(ptd->hw_base + WPT_CTT); trip_temp &= 0x1FF; if (trip_temp) { - ptd->trips[*nr_trips].temperature = GET_WPT_TEMP(trip_temp); - ptd->trips[*nr_trips].type = THERMAL_TRIP_CRITICAL; - ++(*nr_trips); + ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); + ptd->trips[nr_trips++].type = THERMAL_TRIP_CRITICAL; } trip_temp = readw(ptd->hw_base + WPT_PHL); trip_temp &= 0x1FF; if (trip_temp) { - ptd->trips[*nr_trips].temperature = GET_WPT_TEMP(trip_temp); - ptd->trips[*nr_trips].type = THERMAL_TRIP_HOT; - ++(*nr_trips); + ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); + ptd->trips[nr_trips++].type = THERMAL_TRIP_HOT; } - *nr_trips += pch_wpt_add_acpi_psv_trip(ptd, *nr_trips); - - return 0; + return nr_trips + pch_wpt_add_acpi_psv_trip(ptd, nr_trips); } -static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp) +static int pch_wpt_get_temp(struct pch_thermal_device *ptd) { - *temp = GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); - - return 0; + return GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); } /* Cool the PCH when it's overheat in .suspend_noirq phase */ @@ -259,8 +252,8 @@ static int pch_wpt_resume(struct pch_thermal_device *ptd) } struct pch_dev_ops { - int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips); - int (*get_temp)(struct pch_thermal_device *ptd, int *temp); + int (*hw_init)(struct pch_thermal_device *ptd); + int (*get_temp)(struct pch_thermal_device *ptd); int (*suspend)(struct pch_thermal_device *ptd); int (*resume)(struct pch_thermal_device *ptd); }; @@ -278,7 +271,8 @@ static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) { struct pch_thermal_device *ptd = tzd->devdata; - return ptd->ops->get_temp(ptd, temp); + *temp = ptd->ops->get_temp(ptd); + return 0; } static void pch_critical(struct thermal_zone_device *tzd) @@ -372,9 +366,11 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, goto error_release; } - err = ptd->ops->hw_init(ptd, &nr_trips); - if (err) + nr_trips = ptd->ops->hw_init(ptd); + if (nr_trips < 0) { + err = nr_trips; goto error_cleanup; + } ptd->tzd = thermal_zone_device_register_with_trips(bi->name, ptd->trips, nr_trips, 0, ptd, From 1aa4f925d80c44bd70442a8e94718fc921b8a55f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 20:00:48 +0100 Subject: [PATCH 08/25] thermal: intel: intel_pch: Rename device operations callbacks Because the same device operations callbacks are used for all supported boards, they are in fact generic, so rename them to reflect that. Also rename the operations object itself for consistency. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 34 +++++++++++------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 02bd7a048159..b15d1f5fd51e 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -118,7 +118,7 @@ static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) } #endif -static int pch_wpt_init(struct pch_thermal_device *ptd) +static int pch_hw_init(struct pch_thermal_device *ptd) { int nr_trips = 0; u16 trip_temp; @@ -164,13 +164,13 @@ read_trips: return nr_trips + pch_wpt_add_acpi_psv_trip(ptd, nr_trips); } -static int pch_wpt_get_temp(struct pch_thermal_device *ptd) +static int pch_get_temp(struct pch_thermal_device *ptd) { return GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); } /* Cool the PCH when it's overheat in .suspend_noirq phase */ -static int pch_wpt_suspend(struct pch_thermal_device *ptd) +static int pch_suspend(struct pch_thermal_device *ptd) { u8 tsel; int pch_delay_cnt = 0; @@ -237,7 +237,7 @@ static int pch_wpt_suspend(struct pch_thermal_device *ptd) return 0; } -static int pch_wpt_resume(struct pch_thermal_device *ptd) +static int pch_resume(struct pch_thermal_device *ptd) { u8 tsel; @@ -258,13 +258,11 @@ struct pch_dev_ops { int (*resume)(struct pch_thermal_device *ptd); }; - -/* dev ops for Wildcat Point */ -static const struct pch_dev_ops pch_dev_ops_wpt = { - .hw_init = pch_wpt_init, - .get_temp = pch_wpt_get_temp, - .suspend = pch_wpt_suspend, - .resume = pch_wpt_resume, +static const struct pch_dev_ops pch_dev_ops = { + .hw_init = pch_hw_init, + .get_temp = pch_get_temp, + .suspend = pch_suspend, + .resume = pch_resume, }; static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) @@ -301,31 +299,31 @@ static const struct board_info { } board_info[] = { [board_hsw] = { .name = "pch_haswell", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_wpt] = { .name = "pch_wildcat_point", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_skl] = { .name = "pch_skylake", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_cnl] = { .name = "pch_cannonlake", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_cml] = { .name = "pch_cometlake", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_lwb] = { .name = "pch_lewisburg", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, [board_wbg] = { .name = "pch_wellsburg", - .ops = &pch_dev_ops_wpt, + .ops = &pch_dev_ops, }, }; From 86cb1004b6f71b2f79f5fb08502a5bd10c852bf5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 20:02:19 +0100 Subject: [PATCH 09/25] thermal: intel: intel_pch: Eliminate device operations object The same device operations object is pointed to by all of the board configurations in the driver, so effectively the same operations callbacks are used by all of them which only adds overhead (that can be significant due to retpolines) for no real purpose. For this reason, drop the device operations object and replace the respective callback invocations by direct calls to the specific functions that were previously pointed to by callback pointers. No intentional change in behavior. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 33 +++-------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index b15d1f5fd51e..4b2545191fe1 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -82,7 +82,6 @@ static char driver_name[] = "Intel PCH thermal driver"; struct pch_thermal_device { void __iomem *hw_base; - const struct pch_dev_ops *ops; struct pci_dev *pdev; struct thermal_zone_device *tzd; struct thermal_trip trips[PCH_MAX_TRIPS]; @@ -251,25 +250,11 @@ static int pch_resume(struct pch_thermal_device *ptd) return 0; } -struct pch_dev_ops { - int (*hw_init)(struct pch_thermal_device *ptd); - int (*get_temp)(struct pch_thermal_device *ptd); - int (*suspend)(struct pch_thermal_device *ptd); - int (*resume)(struct pch_thermal_device *ptd); -}; - -static const struct pch_dev_ops pch_dev_ops = { - .hw_init = pch_hw_init, - .get_temp = pch_get_temp, - .suspend = pch_suspend, - .resume = pch_resume, -}; - static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) { struct pch_thermal_device *ptd = tzd->devdata; - *temp = ptd->ops->get_temp(ptd); + *temp = pch_get_temp(ptd); return 0; } @@ -295,35 +280,27 @@ enum board_ids { static const struct board_info { const char *name; - const struct pch_dev_ops *ops; } board_info[] = { [board_hsw] = { .name = "pch_haswell", - .ops = &pch_dev_ops, }, [board_wpt] = { .name = "pch_wildcat_point", - .ops = &pch_dev_ops, }, [board_skl] = { .name = "pch_skylake", - .ops = &pch_dev_ops, }, [board_cnl] = { .name = "pch_cannonlake", - .ops = &pch_dev_ops, }, [board_cml] = { .name = "pch_cometlake", - .ops = &pch_dev_ops, }, [board_lwb] = { .name = "pch_lewisburg", - .ops = &pch_dev_ops, }, [board_wbg] = { .name = "pch_wellsburg", - .ops = &pch_dev_ops, }, }; @@ -340,8 +317,6 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, if (!ptd) return -ENOMEM; - ptd->ops = bi->ops; - pci_set_drvdata(pdev, ptd); ptd->pdev = pdev; @@ -364,7 +339,7 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, goto error_release; } - nr_trips = ptd->ops->hw_init(ptd); + nr_trips = pch_hw_init(ptd); if (nr_trips < 0) { err = nr_trips; goto error_cleanup; @@ -412,14 +387,14 @@ static int intel_pch_thermal_suspend_noirq(struct device *device) { struct pch_thermal_device *ptd = dev_get_drvdata(device); - return ptd->ops->suspend(ptd); + return pch_suspend(ptd); } static int intel_pch_thermal_resume(struct device *device) { struct pch_thermal_device *ptd = dev_get_drvdata(device); - return ptd->ops->resume(ptd); + return pch_resume(ptd); } static const struct pci_device_id intel_pch_thermal_id[] = { From 35c87f948d315ebc820a8784a962a506c1a3d299 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 20:03:11 +0100 Subject: [PATCH 10/25] thermal: intel: intel_pch: Fold two functions into their callers Fold two functions, pch_hw_init() and pch_get_temp(), that each have only one caller, into their respective callers to make the code somewhat easier to follow. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 98 ++++++++++------------- 1 file changed, 42 insertions(+), 56 deletions(-) diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 4b2545191fe1..fa6cd4dd2f56 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -117,57 +117,6 @@ static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) } #endif -static int pch_hw_init(struct pch_thermal_device *ptd) -{ - int nr_trips = 0; - u16 trip_temp; - u8 tsel; - - /* Check if BIOS has already enabled thermal sensor */ - if (WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL)) { - ptd->bios_enabled = true; - goto read_trips; - } - - tsel = readb(ptd->hw_base + WPT_TSEL); - /* - * When TSEL's Policy Lock-Down bit is 1, TSEL become RO. - * If so, thermal sensor cannot enable. Bail out. - */ - if (tsel & WPT_TSEL_PLDB) { - dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); - return -ENODEV; - } - - writeb(tsel|WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); - if (!(WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL))) { - dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); - return -ENODEV; - } - -read_trips: - trip_temp = readw(ptd->hw_base + WPT_CTT); - trip_temp &= 0x1FF; - if (trip_temp) { - ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); - ptd->trips[nr_trips++].type = THERMAL_TRIP_CRITICAL; - } - - trip_temp = readw(ptd->hw_base + WPT_PHL); - trip_temp &= 0x1FF; - if (trip_temp) { - ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); - ptd->trips[nr_trips++].type = THERMAL_TRIP_HOT; - } - - return nr_trips + pch_wpt_add_acpi_psv_trip(ptd, nr_trips); -} - -static int pch_get_temp(struct pch_thermal_device *ptd) -{ - return GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); -} - /* Cool the PCH when it's overheat in .suspend_noirq phase */ static int pch_suspend(struct pch_thermal_device *ptd) { @@ -254,7 +203,7 @@ static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) { struct pch_thermal_device *ptd = tzd->devdata; - *temp = pch_get_temp(ptd); + *temp = GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); return 0; } @@ -310,8 +259,10 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, enum board_ids board_id = id->driver_data; const struct board_info *bi = &board_info[board_id]; struct pch_thermal_device *ptd; + int nr_trips = 0; + u16 trip_temp; + u8 tsel; int err; - int nr_trips; ptd = devm_kzalloc(&pdev->dev, sizeof(*ptd), GFP_KERNEL); if (!ptd) @@ -339,12 +290,47 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, goto error_release; } - nr_trips = pch_hw_init(ptd); - if (nr_trips < 0) { - err = nr_trips; + /* Check if BIOS has already enabled thermal sensor */ + if (WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL)) { + ptd->bios_enabled = true; + goto read_trips; + } + + tsel = readb(ptd->hw_base + WPT_TSEL); + /* + * When TSEL's Policy Lock-Down bit is 1, TSEL become RO. + * If so, thermal sensor cannot enable. Bail out. + */ + if (tsel & WPT_TSEL_PLDB) { + dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); + err = -ENODEV; goto error_cleanup; } + writeb(tsel|WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); + if (!(WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL))) { + dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); + err = -ENODEV; + goto error_cleanup; + } + +read_trips: + trip_temp = readw(ptd->hw_base + WPT_CTT); + trip_temp &= 0x1FF; + if (trip_temp) { + ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); + ptd->trips[nr_trips++].type = THERMAL_TRIP_CRITICAL; + } + + trip_temp = readw(ptd->hw_base + WPT_PHL); + trip_temp &= 0x1FF; + if (trip_temp) { + ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); + ptd->trips[nr_trips++].type = THERMAL_TRIP_HOT; + } + + nr_trips += pch_wpt_add_acpi_psv_trip(ptd, nr_trips); + ptd->tzd = thermal_zone_device_register_with_trips(bi->name, ptd->trips, nr_trips, 0, ptd, &tzd_ops, NULL, 0, 0); From c5f43242f48ac81d57f50592acf7d425640d9f83 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 20:04:05 +0100 Subject: [PATCH 11/25] thermal: intel: intel_pch: Fold suspend and resume routines into their callers Fold pch_suspend() and pch_resume(), that each have only one caller, into their respective callers to make the code somewhat easier to follow. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 155 ++++++++++------------ 1 file changed, 71 insertions(+), 84 deletions(-) diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index fa6cd4dd2f56..ffb2acd0fd45 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -117,88 +117,6 @@ static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) } #endif -/* Cool the PCH when it's overheat in .suspend_noirq phase */ -static int pch_suspend(struct pch_thermal_device *ptd) -{ - u8 tsel; - int pch_delay_cnt = 0; - u16 pch_thr_temp, pch_cur_temp; - - /* Shutdown the thermal sensor if it is not enabled by BIOS */ - if (!ptd->bios_enabled) { - tsel = readb(ptd->hw_base + WPT_TSEL); - writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL); - return 0; - } - - /* Do not check temperature if it is not s2idle */ - if (pm_suspend_via_firmware()) - return 0; - - /* Get the PCH temperature threshold value */ - pch_thr_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TSPM)); - - /* Get the PCH current temperature value */ - pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); - - /* - * If current PCH temperature is higher than configured PCH threshold - * value, run some delay loop with sleep to let the current temperature - * go down below the threshold value which helps to allow system enter - * lower power S0ix suspend state. Even after delay loop if PCH current - * temperature stays above threshold, notify the warning message - * which helps to indentify the reason why S0ix entry was rejected. - */ - while (pch_delay_cnt < delay_cnt) { - if (pch_cur_temp < pch_thr_temp) - break; - - if (pm_wakeup_pending()) { - dev_warn(&ptd->pdev->dev, "Wakeup event detected, abort cooling\n"); - return 0; - } - - pch_delay_cnt++; - dev_dbg(&ptd->pdev->dev, - "CPU-PCH current temp [%dC] higher than the threshold temp [%dC], sleep %d times for %d ms duration\n", - pch_cur_temp, pch_thr_temp, pch_delay_cnt, delay_timeout); - msleep(delay_timeout); - /* Read the PCH current temperature for next cycle. */ - pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); - } - - if (pch_cur_temp >= pch_thr_temp) - dev_warn(&ptd->pdev->dev, - "CPU-PCH is hot [%dC] after %d ms delay. S0ix might fail\n", - pch_cur_temp, pch_delay_cnt * delay_timeout); - else { - if (pch_delay_cnt) - dev_info(&ptd->pdev->dev, - "CPU-PCH is cool [%dC] after %d ms delay\n", - pch_cur_temp, pch_delay_cnt * delay_timeout); - else - dev_info(&ptd->pdev->dev, - "CPU-PCH is cool [%dC]\n", - pch_cur_temp); - } - - return 0; -} - -static int pch_resume(struct pch_thermal_device *ptd) -{ - u8 tsel; - - if (ptd->bios_enabled) - return 0; - - tsel = readb(ptd->hw_base + WPT_TSEL); - - writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); - - return 0; -} - static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) { struct pch_thermal_device *ptd = tzd->devdata; @@ -372,15 +290,84 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev) static int intel_pch_thermal_suspend_noirq(struct device *device) { struct pch_thermal_device *ptd = dev_get_drvdata(device); + u16 pch_thr_temp, pch_cur_temp; + int pch_delay_cnt = 0; + u8 tsel; - return pch_suspend(ptd); + /* Shutdown the thermal sensor if it is not enabled by BIOS */ + if (!ptd->bios_enabled) { + tsel = readb(ptd->hw_base + WPT_TSEL); + writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL); + return 0; + } + + /* Do not check temperature if it is not s2idle */ + if (pm_suspend_via_firmware()) + return 0; + + /* Get the PCH temperature threshold value */ + pch_thr_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TSPM)); + + /* Get the PCH current temperature value */ + pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); + + /* + * If current PCH temperature is higher than configured PCH threshold + * value, run some delay loop with sleep to let the current temperature + * go down below the threshold value which helps to allow system enter + * lower power S0ix suspend state. Even after delay loop if PCH current + * temperature stays above threshold, notify the warning message + * which helps to indentify the reason why S0ix entry was rejected. + */ + while (pch_delay_cnt < delay_cnt) { + if (pch_cur_temp < pch_thr_temp) + break; + + if (pm_wakeup_pending()) { + dev_warn(&ptd->pdev->dev, "Wakeup event detected, abort cooling\n"); + return 0; + } + + pch_delay_cnt++; + dev_dbg(&ptd->pdev->dev, + "CPU-PCH current temp [%dC] higher than the threshold temp [%dC], sleep %d times for %d ms duration\n", + pch_cur_temp, pch_thr_temp, pch_delay_cnt, delay_timeout); + msleep(delay_timeout); + /* Read the PCH current temperature for next cycle. */ + pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); + } + + if (pch_cur_temp >= pch_thr_temp) + dev_warn(&ptd->pdev->dev, + "CPU-PCH is hot [%dC] after %d ms delay. S0ix might fail\n", + pch_cur_temp, pch_delay_cnt * delay_timeout); + else { + if (pch_delay_cnt) + dev_info(&ptd->pdev->dev, + "CPU-PCH is cool [%dC] after %d ms delay\n", + pch_cur_temp, pch_delay_cnt * delay_timeout); + else + dev_info(&ptd->pdev->dev, + "CPU-PCH is cool [%dC]\n", + pch_cur_temp); + } + + return 0; } static int intel_pch_thermal_resume(struct device *device) { struct pch_thermal_device *ptd = dev_get_drvdata(device); + u8 tsel; - return pch_resume(ptd); + if (ptd->bios_enabled) + return 0; + + tsel = readb(ptd->hw_base + WPT_TSEL); + + writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); + + return 0; } static const struct pci_device_id intel_pch_thermal_id[] = { From ae98e57a6e829dee26c1573134a24709d2cb82a3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 30 Jan 2023 20:04:55 +0100 Subject: [PATCH 12/25] thermal: intel: intel_pch: Rename board ID symbols Use capitals in the names of the board ID symbols and add the PCH_ prefix to each of them for consistency. Also rename the board_ids enum accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 54 +++++++++++------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index ffb2acd0fd45..12600bca89b6 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -135,38 +135,38 @@ static struct thermal_zone_device_ops tzd_ops = { .critical = pch_critical, }; -enum board_ids { - board_hsw, - board_wpt, - board_skl, - board_cnl, - board_cml, - board_lwb, - board_wbg, +enum pch_board_ids { + PCH_BOARD_HSW = 0, + PCH_BOARD_WPT, + PCH_BOARD_SKL, + PCH_BOARD_CNL, + PCH_BOARD_CML, + PCH_BOARD_LWB, + PCH_BOARD_WBG, }; static const struct board_info { const char *name; } board_info[] = { - [board_hsw] = { + [PCH_BOARD_HSW] = { .name = "pch_haswell", }, - [board_wpt] = { + [PCH_BOARD_WPT] = { .name = "pch_wildcat_point", }, - [board_skl] = { + [PCH_BOARD_SKL] = { .name = "pch_skylake", }, - [board_cnl] = { + [PCH_BOARD_CNL] = { .name = "pch_cannonlake", }, - [board_cml] = { + [PCH_BOARD_CML] = { .name = "pch_cometlake", }, - [board_lwb] = { + [PCH_BOARD_LWB] = { .name = "pch_lewisburg", }, - [board_wbg] = { + [PCH_BOARD_WBG] = { .name = "pch_wellsburg", }, }; @@ -174,7 +174,7 @@ static const struct board_info { static int intel_pch_thermal_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - enum board_ids board_id = id->driver_data; + enum pch_board_ids board_id = id->driver_data; const struct board_info *bi = &board_info[board_id]; struct pch_thermal_device *ptd; int nr_trips = 0; @@ -372,27 +372,27 @@ static int intel_pch_thermal_resume(struct device *device) static const struct pci_device_id intel_pch_thermal_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_1), - .driver_data = board_hsw, }, + .driver_data = PCH_BOARD_HSW, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_2), - .driver_data = board_hsw, }, + .driver_data = PCH_BOARD_HSW, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT), - .driver_data = board_wpt, }, + .driver_data = PCH_BOARD_WPT, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL), - .driver_data = board_skl, }, + .driver_data = PCH_BOARD_SKL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL_H), - .driver_data = board_skl, }, + .driver_data = PCH_BOARD_SKL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL), - .driver_data = board_cnl, }, + .driver_data = PCH_BOARD_CNL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL_H), - .driver_data = board_cnl, }, + .driver_data = PCH_BOARD_CNL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL_LP), - .driver_data = board_cnl, }, + .driver_data = PCH_BOARD_CNL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CML_H), - .driver_data = board_cml, }, + .driver_data = PCH_BOARD_CML, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_LWB), - .driver_data = board_lwb, }, + .driver_data = PCH_BOARD_LWB, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WBG), - .driver_data = board_wbg, }, + .driver_data = PCH_BOARD_WBG, }, { 0, }, }; MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id); From 2153a87ff9ef5537b8a96e7c94a9d79a78c7a30c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 31 Jan 2023 14:08:20 +0100 Subject: [PATCH 13/25] thermal: intel: intel_pch: Drop struct board_info Because the only member of struct board_info is the name, the board_info[] array of struct board_info elements can be replaced with an array of strings. Modify the code accordingly and drop struct board_info. No intentional functional impact. Suggested-by: Zhang Rui Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 42 +++++++---------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 12600bca89b6..b855d031a855 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -145,37 +145,20 @@ enum pch_board_ids { PCH_BOARD_WBG, }; -static const struct board_info { - const char *name; -} board_info[] = { - [PCH_BOARD_HSW] = { - .name = "pch_haswell", - }, - [PCH_BOARD_WPT] = { - .name = "pch_wildcat_point", - }, - [PCH_BOARD_SKL] = { - .name = "pch_skylake", - }, - [PCH_BOARD_CNL] = { - .name = "pch_cannonlake", - }, - [PCH_BOARD_CML] = { - .name = "pch_cometlake", - }, - [PCH_BOARD_LWB] = { - .name = "pch_lewisburg", - }, - [PCH_BOARD_WBG] = { - .name = "pch_wellsburg", - }, +static const char *board_names[] = { + [PCH_BOARD_HSW] = "pch_haswell", + [PCH_BOARD_WPT] = "pch_wildcat_point", + [PCH_BOARD_SKL] = "pch_skylake", + [PCH_BOARD_CNL] = "pch_cannonlake", + [PCH_BOARD_CML] = "pch_cometlake", + [PCH_BOARD_LWB] = "pch_lewisburg", + [PCH_BOARD_WBG] = "pch_wellsburg", }; static int intel_pch_thermal_probe(struct pci_dev *pdev, const struct pci_device_id *id) { enum pch_board_ids board_id = id->driver_data; - const struct board_info *bi = &board_info[board_id]; struct pch_thermal_device *ptd; int nr_trips = 0; u16 trip_temp; @@ -249,12 +232,13 @@ read_trips: nr_trips += pch_wpt_add_acpi_psv_trip(ptd, nr_trips); - ptd->tzd = thermal_zone_device_register_with_trips(bi->name, ptd->trips, - nr_trips, 0, ptd, - &tzd_ops, NULL, 0, 0); + ptd->tzd = thermal_zone_device_register_with_trips(board_names[board_id], + ptd->trips, nr_trips, + 0, ptd, &tzd_ops, + NULL, 0, 0); if (IS_ERR(ptd->tzd)) { dev_err(&pdev->dev, "Failed to register thermal zone %s\n", - bi->name); + board_names[board_id]); err = PTR_ERR(ptd->tzd); goto error_cleanup; } From 8e47363588377e1bdb65e2b020b409cfb44dd260 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 12:39:41 -0800 Subject: [PATCH 14/25] thermal: intel: powerclamp: Fix cur_state for multi package system The powerclamp cooling device cur_state shows actual idle observed by package C-state idle counters. But the implementation is not sufficient for multi package or multi die system. The cur_state value is incorrect. On these systems, these counters must be read from each package/die and somehow aggregate them. But there is no good method for aggregation. It was not a problem when explicit CPU model addition was required to enable intel powerclamp. In this way certain CPU models could have been avoided. But with the removal of CPU model check with the availability of Package C-state counters, the driver is loaded on most of the recent systems. For multi package/die systems, just show the actual target idle state, the system is trying to achieve. In powerclamp this is the user set state minus one. Also there is no use of starting a worker thread for polling package C-state counters and applying any compensation for multiple package or multiple die systems. Fixes: b721ca0d1927 ("thermal/powerclamp: remove cpu whitelist") Signed-off-by: Srinivas Pandruvada Cc: 4.14+ # 4.14+ Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index b80e25ec1261..2f4cbfdf26a0 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -57,6 +57,7 @@ static unsigned int target_mwait; static struct dentry *debug_dir; +static bool poll_pkg_cstate_enable; /* user selected target */ static unsigned int set_target_ratio; @@ -261,6 +262,9 @@ static unsigned int get_compensation(int ratio) { unsigned int comp = 0; + if (!poll_pkg_cstate_enable) + return 0; + /* we only use compensation if all adjacent ones are good */ if (ratio == 1 && cal_data[ratio].confidence >= CONFIDENCE_OK && @@ -519,7 +523,8 @@ static int start_power_clamp(void) control_cpu = cpumask_first(cpu_online_mask); clamping = true; - schedule_delayed_work(&poll_pkg_cstate_work, 0); + if (poll_pkg_cstate_enable) + schedule_delayed_work(&poll_pkg_cstate_work, 0); /* start one kthread worker per online cpu */ for_each_online_cpu(cpu) { @@ -585,11 +590,15 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { - if (true == clamping) - *state = pkg_cstate_ratio_cur; - else + if (clamping) { + if (poll_pkg_cstate_enable) + *state = pkg_cstate_ratio_cur; + else + *state = set_target_ratio; + } else { /* to save power, do not poll idle ratio while not clamping */ *state = -1; /* indicates invalid state */ + } return 0; } @@ -712,6 +721,9 @@ static int __init powerclamp_init(void) goto exit_unregister; } + if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) + poll_pkg_cstate_enable = true; + cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, &powerclamp_cooling_ops); if (IS_ERR(cooling_dev)) { From bbfc3349c4e77a27ea83c765bf593d935f8f5599 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 10:28:51 -0800 Subject: [PATCH 15/25] powercap: idle_inject: Export symbols Export symbols for external interfaces, so that they can be used in other loadable modules. Export is done under name space IDLE_INJECT. Signed-off-by: Srinivas Pandruvada Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index fe86a09e3b67..dfa989182e71 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -160,6 +160,7 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev, WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); } } +EXPORT_SYMBOL_NS_GPL(idle_inject_set_duration, IDLE_INJECT); /** * idle_inject_get_duration - idle and run duration retrieval helper @@ -174,6 +175,7 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev, *run_duration_us = READ_ONCE(ii_dev->run_duration_us); *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); } +EXPORT_SYMBOL_NS_GPL(idle_inject_get_duration, IDLE_INJECT); /** * idle_inject_set_latency - set the maximum latency allowed @@ -185,6 +187,7 @@ void idle_inject_set_latency(struct idle_inject_device *ii_dev, { WRITE_ONCE(ii_dev->latency_us, latency_us); } +EXPORT_SYMBOL_NS_GPL(idle_inject_set_latency, IDLE_INJECT); /** * idle_inject_start - start idle injections @@ -216,6 +219,7 @@ int idle_inject_start(struct idle_inject_device *ii_dev) return 0; } +EXPORT_SYMBOL_NS_GPL(idle_inject_start, IDLE_INJECT); /** * idle_inject_stop - stops idle injections @@ -262,6 +266,7 @@ void idle_inject_stop(struct idle_inject_device *ii_dev) cpu_hotplug_enable(); } +EXPORT_SYMBOL_NS_GPL(idle_inject_stop, IDLE_INJECT); /** * idle_inject_setup - prepare the current task for idle injection @@ -337,6 +342,7 @@ out_rollback: return NULL; } +EXPORT_SYMBOL_NS_GPL(idle_inject_register, IDLE_INJECT); /** * idle_inject_unregister - unregister idle injection control device @@ -357,6 +363,7 @@ void idle_inject_unregister(struct idle_inject_device *ii_dev) kfree(ii_dev); } +EXPORT_SYMBOL_NS_GPL(idle_inject_unregister, IDLE_INJECT); static struct smp_hotplug_thread idle_inject_threads = { .store = &idle_inject_thread.tsk, From acbc661032b8aa0e8359ac77074769ade34a176c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 10:28:52 -0800 Subject: [PATCH 16/25] powercap: idle_inject: Add update callback The powercap/idle_inject core uses play_idle_precise() to inject idle time. But play_idle_precise() can't ensure that the CPU is fully idle for the specified duration because of wakeups due to interrupts. To compensate for the reduced idle time due to these wakes, the caller can adjust requested idle time for the next cycle. The goal of idle injection is to keep system at some idle percent on average, so this is fine to overshoot or undershoot instantaneous idle times. The idle inject core provides an interface idle_inject_set_duration() to set idle and runtime duration. Some architectures provide interface to get actual idle time observed by the hardware. So, the effective idle percent can be adjusted using the hardware feedback. For example, Intel CPUs provides package idle counters, which is currently used by Intel powerclamp driver to readjust runtime duration. When the caller's desired idle time over a period is less or greater than the actual CPU idle time observed by the hardware, caller can readjust idle and runtime duration for the next cycle. The only way this can be done currently is by monitoring hardware idle time from a different software thread and readjust idle and runtime duration using idle_inject_set_duration(). This can be avoided by adding a callback which callers can register and readjust from this callback function. Add a capability to register an optional update() callback, which can be called from the idle inject core before waking up CPUs for idle injection. This callback can be registered via a new interface: idle_inject_register_full(). During this process of constantly adjusting idle and runtime duration there can be some cases where actual idle time is more than the desired. In this case idle inject can be skipped for a cycle. If update() callback returns false, then the idle inject core skips waking up CPUs for the idle injection. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 52 ++++++++++++++++++++++++++++++---- include/linux/idle_inject.h | 3 ++ 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index dfa989182e71..6862e6c2e87d 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -63,13 +63,29 @@ struct idle_inject_thread { * @idle_duration_us: duration of CPU idle time to inject * @run_duration_us: duration of CPU run time to allow * @latency_us: max allowed latency + * @update: Optional callback deciding whether or not to skip idle + * injection in the given cycle. * @cpumask: mask of CPUs affected by idle injection + * + * This structure is used to define per instance idle inject device data. Each + * instance has an idle duration, a run duration and mask of CPUs to inject + * idle. + * + * Actual CPU idle time is injected by calling kernel scheduler interface + * play_idle_precise(). There is one optional callback that can be registered + * by calling idle_inject_register_full(): + * + * update() - This callback is invoked just before waking up CPUs to inject + * idle. If it returns false, CPUs are not woken up to inject idle in the given + * cycle. It also allows the caller to readjust the idle and run duration by + * calling idle_inject_set_duration() for the next cycle. */ struct idle_inject_device { struct hrtimer timer; unsigned int idle_duration_us; unsigned int run_duration_us; unsigned int latency_us; + bool (*update)(void); unsigned long cpumask[]; }; @@ -111,11 +127,12 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) struct idle_inject_device *ii_dev = container_of(timer, struct idle_inject_device, timer); + if (!ii_dev->update || (ii_dev->update && ii_dev->update())) + idle_inject_wakeup(ii_dev); + duration_us = READ_ONCE(ii_dev->run_duration_us); duration_us += READ_ONCE(ii_dev->idle_duration_us); - idle_inject_wakeup(ii_dev); - hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC)); return HRTIMER_RESTART; @@ -295,17 +312,22 @@ static int idle_inject_should_run(unsigned int cpu) } /** - * idle_inject_register - initialize idle injection on a set of CPUs + * idle_inject_register_full - initialize idle injection on a set of CPUs * @cpumask: CPUs to be affected by idle injection + * @update: This callback is called just before waking up CPUs to inject + * idle * * This function creates an idle injection control device structure for the - * given set of CPUs and initializes the timer associated with it. It does not - * start any injection cycles. + * given set of CPUs and initializes the timer associated with it. This + * function also allows to register update()callback. + * It does not start any injection cycles. * * Return: NULL if memory allocation fails, idle injection control device * pointer on success. */ -struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) + +struct idle_inject_device *idle_inject_register_full(struct cpumask *cpumask, + bool (*update)(void)) { struct idle_inject_device *ii_dev; int cpu, cpu_rb; @@ -318,6 +340,7 @@ struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ii_dev->timer.function = idle_inject_timer_fn; ii_dev->latency_us = UINT_MAX; + ii_dev->update = update; for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { @@ -342,6 +365,23 @@ out_rollback: return NULL; } +EXPORT_SYMBOL_NS_GPL(idle_inject_register_full, IDLE_INJECT); + +/** + * idle_inject_register - initialize idle injection on a set of CPUs + * @cpumask: CPUs to be affected by idle injection + * + * This function creates an idle injection control device structure for the + * given set of CPUs and initializes the timer associated with it. It does not + * start any injection cycles. + * + * Return: NULL if memory allocation fails, idle injection control device + * pointer on success. + */ +struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) +{ + return idle_inject_register_full(cpumask, NULL); +} EXPORT_SYMBOL_NS_GPL(idle_inject_register, IDLE_INJECT); /** diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h index fb88e23a99d3..a85d5dd40f72 100644 --- a/include/linux/idle_inject.h +++ b/include/linux/idle_inject.h @@ -13,6 +13,9 @@ struct idle_inject_device; struct idle_inject_device *idle_inject_register(struct cpumask *cpumask); +struct idle_inject_device *idle_inject_register_full(struct cpumask *cpumask, + bool (*update)(void)); + void idle_inject_unregister(struct idle_inject_device *ii_dev); int idle_inject_start(struct idle_inject_device *ii_dev); From 8526eb7fc75abcd09d8bd061610215baf0ca948a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 1 Feb 2023 10:28:53 -0800 Subject: [PATCH 17/25] thermal: intel: powerclamp: Use powercap idle-inject feature There are two idle injection implementation in the Linux kernel. One via intel_powerclamp and the other using powercap/idle_inject. Both implementation end up in calling play_idle* function from a FIFO priority thread. Both can't be used at the same time. It is better to use one idle injection framework for better maintainability. In this way, there is only one caller for play_idle. Here powercap/idle_inject can be used for both per-core and for system wide idle injection. This framework has a well defined interface which allow registry for per-core or for all CPUs (system wide). This reduces code complexity in the intel powerclamp driver as all the per CPU kthreads, delayed work and calls to play_idle can be removed. The changes include: - Remove unneeded include files - Remove per CPU kthread workers: balancing_work and idle_injection_work. - Reuse the compensation related code by moving from previous worker thread to idle_injection callback. - Adjust the idle_duration and runtime by using powercap/idle_inject interface. - Remove all variables, which are not required once powercap/idle_inject is used. - Add mutex to avoid race during removal of idle injection during module unload and user action to change idle inject percent. Also for protection during dynamic adjustment of run and idle time from update() callback. - Remove online/offline callbacks to designate control CPU - Use cpu_present_mask global variable for CPU mask - Remove hot plug locks Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/Kconfig | 3 + drivers/thermal/intel/intel_powerclamp.c | 401 ++++++++++------------- 2 files changed, 170 insertions(+), 234 deletions(-) diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig index e50fd260484a..b5808f92702d 100644 --- a/drivers/thermal/intel/Kconfig +++ b/drivers/thermal/intel/Kconfig @@ -3,6 +3,9 @@ config INTEL_POWERCLAMP tristate "Intel PowerClamp idle injection driver" depends on X86 depends on CPU_SUP_INTEL + depends on CPU_IDLE + select POWERCAP + select IDLE_INJECT help Enable this to enable Intel PowerClamp idle injection driver. This enforce idle time which results in more package C-state residency. The diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 2f4cbfdf26a0..209bb5f5ae17 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -2,7 +2,7 @@ /* * intel_powerclamp.c - package c-state idle injection * - * Copyright (c) 2012, Intel Corporation. + * Copyright (c) 2012-2023, Intel Corporation. * * Authors: * Arjan van de Ven @@ -27,21 +27,15 @@ #include #include #include -#include #include #include -#include -#include #include #include -#include -#include +#include -#include #include #include #include -#include #define MAX_TARGET_RATIO (50U) /* For each undisturbed clamping period (no extra wake ups during idle time), @@ -59,35 +53,26 @@ static unsigned int target_mwait; static struct dentry *debug_dir; static bool poll_pkg_cstate_enable; -/* user selected target */ -static unsigned int set_target_ratio; +/* Idle ratio observed using package C-state counters */ static unsigned int current_ratio; + +/* Skip the idle injection till set to true */ static bool should_skip; -static unsigned int control_cpu; /* The cpu assigned to collect stat and update - * control parameters. default to BSP but BSP - * can be offlined. - */ -static bool clamping; - -struct powerclamp_worker_data { - struct kthread_worker *worker; - struct kthread_work balancing_work; - struct kthread_delayed_work idle_injection_work; +struct powerclamp_data { unsigned int cpu; unsigned int count; unsigned int guard; unsigned int window_size_now; unsigned int target_ratio; - unsigned int duration_jiffies; bool clamping; }; -static struct powerclamp_worker_data __percpu *worker_data; +static struct powerclamp_data powerclamp_data; + static struct thermal_cooling_device *cooling_dev; -static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu - * clamping kthread worker - */ + +static DEFINE_MUTEX(powerclamp_lock); static unsigned int duration; static unsigned int pkg_cstate_ratio_cur; @@ -306,7 +291,7 @@ static void adjust_compensation(int target_ratio, unsigned int win) if (d->confidence >= CONFIDENCE_OK) return; - delta = set_target_ratio - current_ratio; + delta = powerclamp_data.target_ratio - current_ratio; /* filter out bad data */ if (delta >= 0 && delta <= (1+target_ratio/10)) { if (d->steady_comp) @@ -345,82 +330,39 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio, adjust_compensation(target_ratio, win); /* if we are above target+guard, skip */ - return set_target_ratio + guard <= current_ratio; + return powerclamp_data.target_ratio + guard <= current_ratio; } -static void clamp_balancing_func(struct kthread_work *work) +/* + * This function calculates runtime from the current target ratio. + * This function gets called under powerclamp_lock. + */ +static unsigned int get_run_time(void) { - struct powerclamp_worker_data *w_data; - int sleeptime; - unsigned long target_jiffies; unsigned int compensated_ratio; - int interval; /* jiffies to sleep for each attempt */ - - w_data = container_of(work, struct powerclamp_worker_data, - balancing_work); + unsigned int runtime; /* * make sure user selected ratio does not take effect until * the next round. adjust target_ratio if user has changed * target such that we can converge quickly. */ - w_data->target_ratio = READ_ONCE(set_target_ratio); - w_data->guard = 1 + w_data->target_ratio / 20; - w_data->window_size_now = window_size; - w_data->duration_jiffies = msecs_to_jiffies(duration); - w_data->count++; + powerclamp_data.guard = 1 + powerclamp_data.target_ratio / 20; + powerclamp_data.window_size_now = window_size; /* * systems may have different ability to enter package level * c-states, thus we need to compensate the injected idle ratio * to achieve the actual target reported by the HW. */ - compensated_ratio = w_data->target_ratio + - get_compensation(w_data->target_ratio); + compensated_ratio = powerclamp_data.target_ratio + + get_compensation(powerclamp_data.target_ratio); if (compensated_ratio <= 0) compensated_ratio = 1; - interval = w_data->duration_jiffies * 100 / compensated_ratio; - /* align idle time */ - target_jiffies = roundup(jiffies, interval); - sleeptime = target_jiffies - jiffies; - if (sleeptime <= 0) - sleeptime = 1; + runtime = duration * 100 / compensated_ratio - duration; - if (clamping && w_data->clamping && cpu_online(w_data->cpu)) - kthread_queue_delayed_work(w_data->worker, - &w_data->idle_injection_work, - sleeptime); -} - -static void clamp_idle_injection_func(struct kthread_work *work) -{ - struct powerclamp_worker_data *w_data; - - w_data = container_of(work, struct powerclamp_worker_data, - idle_injection_work.work); - - /* - * only elected controlling cpu can collect stats and update - * control parameters. - */ - if (w_data->cpu == control_cpu && - !(w_data->count % w_data->window_size_now)) { - should_skip = - powerclamp_adjust_controls(w_data->target_ratio, - w_data->guard, - w_data->window_size_now); - smp_mb(); - } - - if (should_skip) - goto balance; - - play_idle(jiffies_to_usecs(w_data->duration_jiffies)); - -balance: - if (clamping && w_data->clamping && cpu_online(w_data->cpu)) - kthread_queue_work(w_data->worker, &w_data->balancing_work); + return runtime; } /* @@ -456,129 +398,137 @@ static void poll_pkg_cstate(struct work_struct *dummy) msr_last = msr_now; tsc_last = tsc_now; - if (true == clamping) + mutex_lock(&powerclamp_lock); + if (powerclamp_data.clamping) schedule_delayed_work(&poll_pkg_cstate_work, HZ); + mutex_unlock(&powerclamp_lock); } -static void start_power_clamp_worker(unsigned long cpu) -{ - struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); - struct kthread_worker *worker; +static struct idle_inject_device *ii_dev; - worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu); - if (IS_ERR(worker)) +/* + * This function is called from idle injection core on timer expiry + * for the run duration. This allows powerclamp to readjust or skip + * injecting idle for this cycle. + */ +static bool idle_inject_update(void) +{ + bool update = false; + + /* We can't sleep in this callback */ + if (!mutex_trylock(&powerclamp_lock)) + return true; + + if (!(powerclamp_data.count % powerclamp_data.window_size_now)) { + + should_skip = powerclamp_adjust_controls(powerclamp_data.target_ratio, + powerclamp_data.guard, + powerclamp_data.window_size_now); + update = true; + } + + if (update) { + unsigned int runtime = get_run_time(); + + idle_inject_set_duration(ii_dev, runtime, duration); + } + + powerclamp_data.count++; + + mutex_unlock(&powerclamp_lock); + + if (should_skip) + return false; + + return true; +} + +/* This function starts idle injection by calling idle_inject_start() */ +static void trigger_idle_injection(void) +{ + unsigned int runtime = get_run_time(); + + idle_inject_set_duration(ii_dev, runtime, duration); + idle_inject_start(ii_dev); + powerclamp_data.clamping = true; +} + +/* + * This function is called from start_power_clamp() to register + * CPUS with powercap idle injection register and set default + * idle duration and latency. + */ +static int powerclamp_idle_injection_register(void) +{ + /* + * The idle inject core will only inject for online CPUs, + * So we can register for all present CPUs. In this way + * if some CPU goes online/offline while idle inject + * is registered, nothing additional calls are required. + * The same runtime and idle time is applicable for + * newly onlined CPUs if any. + * + * Here cpu_present_mask can be used as is. + * cast to (struct cpumask *) is required as the + * cpu_present_mask is const struct cpumask *, otherwise + * there will be compiler warnings. + */ + ii_dev = idle_inject_register_full((struct cpumask *)cpu_present_mask, + idle_inject_update); + if (!ii_dev) { + pr_err("powerclamp: idle_inject_register failed\n"); + return -EAGAIN; + } + + idle_inject_set_duration(ii_dev, TICK_USEC, duration); + idle_inject_set_latency(ii_dev, UINT_MAX); + + return 0; +} + +/* + * This function is called from end_power_clamp() to stop idle injection + * and unregister CPUS from powercap idle injection core. + */ +static void remove_idle_injection(void) +{ + if (!powerclamp_data.clamping) return; - w_data->worker = worker; - w_data->count = 0; - w_data->cpu = cpu; - w_data->clamping = true; - set_bit(cpu, cpu_clamping_mask); - sched_set_fifo(worker->task); - kthread_init_work(&w_data->balancing_work, clamp_balancing_func); - kthread_init_delayed_work(&w_data->idle_injection_work, - clamp_idle_injection_func); - kthread_queue_work(w_data->worker, &w_data->balancing_work); -} - -static void stop_power_clamp_worker(unsigned long cpu) -{ - struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); - - if (!w_data->worker) - return; - - w_data->clamping = false; - /* - * Make sure that all works that get queued after this point see - * the clamping disabled. The counter part is not needed because - * there is an implicit memory barrier when the queued work - * is proceed. - */ - smp_wmb(); - kthread_cancel_work_sync(&w_data->balancing_work); - kthread_cancel_delayed_work_sync(&w_data->idle_injection_work); - /* - * The balancing work still might be queued here because - * the handling of the "clapming" variable, cancel, and queue - * operations are not synchronized via a lock. But it is not - * a big deal. The balancing work is fast and destroy kthread - * will wait for it. - */ - clear_bit(w_data->cpu, cpu_clamping_mask); - kthread_destroy_worker(w_data->worker); - - w_data->worker = NULL; + powerclamp_data.clamping = false; + idle_inject_stop(ii_dev); } +/* + * This function is called when user change the cooling device + * state from zero to some other value. + */ static int start_power_clamp(void) { - unsigned long cpu; + int ret; - set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); - /* prevent cpu hotplug */ - cpus_read_lock(); - - /* prefer BSP */ - control_cpu = cpumask_first(cpu_online_mask); - - clamping = true; - if (poll_pkg_cstate_enable) - schedule_delayed_work(&poll_pkg_cstate_work, 0); - - /* start one kthread worker per online cpu */ - for_each_online_cpu(cpu) { - start_power_clamp_worker(cpu); + ret = powerclamp_idle_injection_register(); + if (!ret) { + trigger_idle_injection(); + if (poll_pkg_cstate_enable) + schedule_delayed_work(&poll_pkg_cstate_work, 0); } - cpus_read_unlock(); - return 0; + return ret; } +/* + * This function is called when user change the cooling device + * state from non zero value zero. + */ static void end_power_clamp(void) { - int i; - - /* - * Block requeuing in all the kthread workers. They will flush and - * stop faster. - */ - clamping = false; - for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { - pr_debug("clamping worker for cpu %d alive, destroy\n", i); - stop_power_clamp_worker(i); + if (powerclamp_data.clamping) { + remove_idle_injection(); + idle_inject_unregister(ii_dev); } } -static int powerclamp_cpu_online(unsigned int cpu) -{ - if (clamping == false) - return 0; - start_power_clamp_worker(cpu); - /* prefer BSP as controlling CPU */ - if (cpu == 0) { - control_cpu = 0; - smp_mb(); - } - return 0; -} - -static int powerclamp_cpu_predown(unsigned int cpu) -{ - if (clamping == false) - return 0; - - stop_power_clamp_worker(cpu); - if (cpu != control_cpu) - return 0; - - control_cpu = cpumask_first(cpu_online_mask); - if (control_cpu == cpu) - control_cpu = cpumask_next(cpu, cpu_online_mask); - smp_mb(); - return 0; -} - static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state) { @@ -590,16 +540,20 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { - if (clamping) { + mutex_lock(&powerclamp_lock); + + if (powerclamp_data.clamping) { if (poll_pkg_cstate_enable) *state = pkg_cstate_ratio_cur; else - *state = set_target_ratio; + *state = powerclamp_data.target_ratio; } else { /* to save power, do not poll idle ratio while not clamping */ *state = -1; /* indicates invalid state */ } + mutex_unlock(&powerclamp_lock); + return 0; } @@ -608,24 +562,32 @@ static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, { int ret = 0; + mutex_lock(&powerclamp_lock); + new_target_ratio = clamp(new_target_ratio, 0UL, - (unsigned long) (MAX_TARGET_RATIO-1)); - if (set_target_ratio == 0 && new_target_ratio > 0) { + (unsigned long) (MAX_TARGET_RATIO - 1)); + if (!powerclamp_data.target_ratio && new_target_ratio > 0) { pr_info("Start idle injection to reduce power\n"); - set_target_ratio = new_target_ratio; + powerclamp_data.target_ratio = new_target_ratio; ret = start_power_clamp(); + if (ret) + powerclamp_data.target_ratio = 0; goto exit_set; - } else if (set_target_ratio > 0 && new_target_ratio == 0) { + } else if (powerclamp_data.target_ratio > 0 && new_target_ratio == 0) { pr_info("Stop forced idle injection\n"); end_power_clamp(); - set_target_ratio = 0; + powerclamp_data.target_ratio = 0; } else /* adjust currently running */ { - set_target_ratio = new_target_ratio; - /* make new set_target_ratio visible to other cpus */ - smp_mb(); + unsigned int runtime; + + powerclamp_data.target_ratio = new_target_ratio; + runtime = get_run_time(); + idle_inject_set_duration(ii_dev, runtime, duration); } exit_set: + mutex_unlock(&powerclamp_lock); + return ret; } @@ -666,7 +628,6 @@ static int powerclamp_debug_show(struct seq_file *m, void *unused) { int i = 0; - seq_printf(m, "controlling cpu: %d\n", control_cpu); seq_printf(m, "pct confidence steady dynamic (compensation)\n"); for (i = 0; i < MAX_TARGET_RATIO; i++) { seq_printf(m, "%d\t%lu\t%lu\t%lu\n", @@ -689,78 +650,50 @@ static inline void powerclamp_create_debug_files(void) &powerclamp_debug_fops); } -static enum cpuhp_state hp_state; - static int __init powerclamp_init(void) { int retval; - cpu_clamping_mask = bitmap_zalloc(num_possible_cpus(), GFP_KERNEL); - if (!cpu_clamping_mask) - return -ENOMEM; - /* probe cpu features and ids here */ retval = powerclamp_probe(); if (retval) - goto exit_free; + return retval; /* set default limit, maybe adjusted during runtime based on feedback */ window_size = 2; - retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "thermal/intel_powerclamp:online", - powerclamp_cpu_online, - powerclamp_cpu_predown); - if (retval < 0) - goto exit_free; - - hp_state = retval; - - worker_data = alloc_percpu(struct powerclamp_worker_data); - if (!worker_data) { - retval = -ENOMEM; - goto exit_unregister; - } if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) poll_pkg_cstate_enable = true; cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, - &powerclamp_cooling_ops); - if (IS_ERR(cooling_dev)) { - retval = -ENODEV; - goto exit_free_thread; - } + &powerclamp_cooling_ops); + if (IS_ERR(cooling_dev)) + return -ENODEV; if (!duration) - duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES); + duration = jiffies_to_usecs(DEFAULT_DURATION_JIFFIES); powerclamp_create_debug_files(); return 0; - -exit_free_thread: - free_percpu(worker_data); -exit_unregister: - cpuhp_remove_state_nocalls(hp_state); -exit_free: - bitmap_free(cpu_clamping_mask); - return retval; } module_init(powerclamp_init); static void __exit powerclamp_exit(void) { + mutex_lock(&powerclamp_lock); end_power_clamp(); - cpuhp_remove_state_nocalls(hp_state); - free_percpu(worker_data); + mutex_unlock(&powerclamp_lock); + thermal_cooling_device_unregister(cooling_dev); - bitmap_free(cpu_clamping_mask); cancel_delayed_work_sync(&poll_pkg_cstate_work); debugfs_remove_recursive(debug_dir); } module_exit(powerclamp_exit); +MODULE_IMPORT_NS(IDLE_INJECT); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arjan van de Ven "); MODULE_AUTHOR("Jacob Pan "); From 72ffc28f2fe8bce4e5b682caedf7a26c4998c756 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 1 Feb 2023 23:36:16 +0100 Subject: [PATCH 18/25] thermal: intel: quark_dts: Use generic trip points Make the intel_quark_dts_thermal driver register an array of generic trip points along with the thermal zone and drop the trip points thermal zone callbacks that are not used any more from it. Signed-off-by: Daniel Lezcano [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- .../thermal/intel/intel_quark_dts_thermal.c | 55 +++++++------------ 1 file changed, 20 insertions(+), 35 deletions(-) diff --git a/drivers/thermal/intel/intel_quark_dts_thermal.c b/drivers/thermal/intel/intel_quark_dts_thermal.c index 3eafc6b0e6c3..97b843fa7568 100644 --- a/drivers/thermal/intel/intel_quark_dts_thermal.c +++ b/drivers/thermal/intel/intel_quark_dts_thermal.c @@ -84,6 +84,7 @@ #define QRK_DTS_MASK_TP_THRES 0xFF #define QRK_DTS_SHIFT_TP 8 #define QRK_DTS_ID_TP_CRITICAL 0 +#define QRK_DTS_ID_TP_HOT 1 #define QRK_DTS_SAFE_TP_THRES 105 /* Thermal Sensor Register Lock */ @@ -104,6 +105,7 @@ struct soc_sensor_entry { u32 store_ptps; u32 store_dts_enable; struct thermal_zone_device *tzone; + struct thermal_trip trips[QRK_MAX_DTS_TRIPS]; }; static struct soc_sensor_entry *soc_dts; @@ -172,9 +174,9 @@ static int soc_dts_disable(struct thermal_zone_device *tzd) return ret; } -static int _get_trip_temp(int trip, int *temp) +static int get_trip_temp(int trip) { - int status; + int status, temp; u32 out; mutex_lock(&dts_update_mutex); @@ -183,7 +185,7 @@ static int _get_trip_temp(int trip, int *temp) mutex_unlock(&dts_update_mutex); if (status) - return status; + return THERMAL_TEMP_INVALID; /* * Thermal Sensor Programmable Trip Point Register has 8-bit @@ -191,21 +193,10 @@ static int _get_trip_temp(int trip, int *temp) * thresholds. The threshold value is always offset by its * temperature base (50 degree Celsius). */ - *temp = (out >> (trip * QRK_DTS_SHIFT_TP)) & QRK_DTS_MASK_TP_THRES; - *temp -= QRK_DTS_TEMP_BASE; + temp = (out >> (trip * QRK_DTS_SHIFT_TP)) & QRK_DTS_MASK_TP_THRES; + temp -= QRK_DTS_TEMP_BASE; - return 0; -} - -static inline int sys_get_trip_temp(struct thermal_zone_device *tzd, - int trip, int *temp) -{ - return _get_trip_temp(trip, temp); -} - -static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, int *temp) -{ - return _get_trip_temp(QRK_DTS_ID_TP_CRITICAL, temp); + return temp; } static int update_trip_temp(struct soc_sensor_entry *aux_entry, @@ -262,17 +253,6 @@ static inline int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, return update_trip_temp(tzd->devdata, trip, temp); } -static int sys_get_trip_type(struct thermal_zone_device *thermal, - int trip, enum thermal_trip_type *type) -{ - if (trip) - *type = THERMAL_TRIP_HOT; - else - *type = THERMAL_TRIP_CRITICAL; - - return 0; -} - static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp) { @@ -315,10 +295,7 @@ static int sys_change_mode(struct thermal_zone_device *tzd, static struct thermal_zone_device_ops tzone_ops = { .get_temp = sys_get_curr_temp, - .get_trip_temp = sys_get_trip_temp, - .get_trip_type = sys_get_trip_type, .set_trip_temp = sys_set_trip_temp, - .get_crit_temp = sys_get_crit_temp, .change_mode = sys_change_mode, }; @@ -385,10 +362,18 @@ static struct soc_sensor_entry *alloc_soc_dts(void) goto err_ret; } - aux_entry->tzone = thermal_zone_device_register("quark_dts", - QRK_MAX_DTS_TRIPS, - wr_mask, - aux_entry, &tzone_ops, NULL, 0, polling_delay); + aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = get_trip_temp(QRK_DTS_ID_TP_CRITICAL); + aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL; + + aux_entry->trips[QRK_DTS_ID_TP_HOT].temperature = get_trip_temp(QRK_DTS_ID_TP_HOT); + aux_entry->trips[QRK_DTS_ID_TP_HOT].type = THERMAL_TRIP_HOT; + + aux_entry->tzone = thermal_zone_device_register_with_trips("quark_dts", + aux_entry->trips, + QRK_MAX_DTS_TRIPS, + wr_mask, + aux_entry, &tzone_ops, + NULL, 0, polling_delay); if (IS_ERR(aux_entry->tzone)) { err = PTR_ERR(aux_entry->tzone); goto err_ret; From 621084965459d2b9e2713844aac4d803d5bb6d67 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 3 Feb 2023 20:29:02 -0800 Subject: [PATCH 19/25] thermal: intel: powerclamp: Return last requested state as cur_state When the user is reading cur_state from the thermal cooling device for Intel powerclamp device: - It returns the idle ratio from Package C-state counters when there is active idle injection session. - -1, when there is no active idle injection session. This information is not very useful as the package C-state counters vary a lot from read to read. Instead just return the last requested cur_state. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 209bb5f5ae17..1390748706a6 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -541,17 +541,7 @@ static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { mutex_lock(&powerclamp_lock); - - if (powerclamp_data.clamping) { - if (poll_pkg_cstate_enable) - *state = pkg_cstate_ratio_cur; - else - *state = powerclamp_data.target_ratio; - } else { - /* to save power, do not poll idle ratio while not clamping */ - *state = -1; /* indicates invalid state */ - } - + *state = powerclamp_data.target_ratio; mutex_unlock(&powerclamp_lock); return 0; From 966d0ab67350c6206f8053a3e6ed0b892bdc42a5 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Feb 2023 23:09:08 -0800 Subject: [PATCH 20/25] thermal: intel: powerclamp: Fix duration module parameter After the switch to use the powercap/idle-inject framework in the Intel powerclamp driver, the idle duration unit is microsecond. However, the module parameter for idle duration is in milliseconds, so convert it to microseconds in the "set" callback and back to milliseconds in a new "get" callback. While here, also use mutex protection for setting and getting "duration". The other uses of "duration" are already protected by the mutex. Fixes: 8526eb7fc75a ("thermal: intel: powerclamp: Use powercap idle-inject feature") Signed-off-by: Srinivas Pandruvada [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 1390748706a6..5995d10b5699 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -74,6 +74,7 @@ static struct thermal_cooling_device *cooling_dev; static DEFINE_MUTEX(powerclamp_lock); +/* This duration is in microseconds */ static unsigned int duration; static unsigned int pkg_cstate_ratio_cur; static unsigned int window_size; @@ -90,23 +91,34 @@ static int duration_set(const char *arg, const struct kernel_param *kp) pr_err("Out of recommended range %lu, between 6-25ms\n", new_duration); ret = -EINVAL; + goto exit; } - duration = clamp(new_duration, 6ul, 25ul); - smp_mb(); - + mutex_lock(&powerclamp_lock); + duration = clamp(new_duration, 6ul, 25ul) * 1000; + mutex_unlock(&powerclamp_lock); exit: return ret; } +static int duration_get(char *buf, const struct kernel_param *kp) +{ + int ret; + + mutex_lock(&powerclamp_lock); + ret = sysfs_emit(buf, "%d\n", duration / 1000); + mutex_unlock(&powerclamp_lock); + + return ret; +} + static const struct kernel_param_ops duration_ops = { .set = duration_set, - .get = param_get_int, + .get = duration_get, }; - -module_param_cb(duration, &duration_ops, &duration, 0644); +module_param_cb(duration, &duration_ops, NULL, 0644); MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec."); struct powerclamp_calibration_data { From 707bf8e1dfd51dd0cafe92da24f5276702edebe5 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Feb 2023 09:32:18 -0800 Subject: [PATCH 21/25] Documentation: admin-guide: Move intel_powerclamp documentation Create a folder "thermal" under Documentation/admin-guide and move intel_powerclamp documentation to this folder. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/index.rst | 1 + .../{driver-api => admin-guide}/thermal/intel_powerclamp.rst | 0 Documentation/driver-api/thermal/index.rst | 1 - MAINTAINERS | 1 + 4 files changed, 2 insertions(+), 1 deletion(-) rename Documentation/{driver-api => admin-guide}/thermal/intel_powerclamp.rst (100%) diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 5bfafcbb9562..c872a8a1ddfa 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -116,6 +116,7 @@ configure specific aspects of kernel behavior to your liking. svga syscall-user-dispatch sysrq + thermal thunderbolt ufs unicode diff --git a/Documentation/driver-api/thermal/intel_powerclamp.rst b/Documentation/admin-guide/thermal/intel_powerclamp.rst similarity index 100% rename from Documentation/driver-api/thermal/intel_powerclamp.rst rename to Documentation/admin-guide/thermal/intel_powerclamp.rst diff --git a/Documentation/driver-api/thermal/index.rst b/Documentation/driver-api/thermal/index.rst index 030306ffa408..a886028014ab 100644 --- a/Documentation/driver-api/thermal/index.rst +++ b/Documentation/driver-api/thermal/index.rst @@ -14,7 +14,6 @@ Thermal exynos_thermal exynos_thermal_emulation - intel_powerclamp nouveau_thermal x86_pkg_temperature_thermal intel_dptf diff --git a/MAINTAINERS b/MAINTAINERS index f781f936ae35..ad8d7819f483 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20707,6 +20707,7 @@ S: Supported Q: https://patchwork.kernel.org/project/linux-pm/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git thermal F: Documentation/ABI/testing/sysfs-class-thermal +F: Documentation/admin-guide/thermal/ F: Documentation/devicetree/bindings/thermal/ F: Documentation/driver-api/thermal/ F: drivers/thermal/ From ebf519710218814cf827adbf9111af081344c969 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Feb 2023 09:35:34 -0800 Subject: [PATCH 22/25] thermal: intel: powerclamp: Add two module parameters In some use cases, it is desirable to only inject idle on certain set of CPUs. For example on Alder Lake systems, it is possible that we force idle only on P-Cores for thermal reasons. Also the idle percent can be more than 50% if we only choose partial set of CPUs in the system. Introduce 2 new module parameters for this purpose. They can be only changed when the cooling device is inactive. cpumask (Read/Write): A bit mask of CPUs to inject idle. The format of this bitmask is same as used in other subsystems like in /proc/irq/*/smp_affinity. The mask is comma separated 32 bit groups. Each CPU is one bit. For example for 256 CPU system the full mask is: ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff The rightmost mask is for CPU 0-32. max_idle (Read/Write): Maximum injected idle time to the total CPU time ratio in percent range from 1 to 100. Even if the cooling device max_state is always 100 (100%), this parameter allows to add a max idle percent limit. The default is 50, to match the current implementation of powerclamp driver. Also doesn't allow value more than 75, if the cpumask includes every CPU present in the system. Also when the cpumask doesn't include every CPU, there is no use of compensation using package C-state idle counters. Hence don't start package C-state polling thread even for a single package or a single die system in this case. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- .../admin-guide/thermal/intel_powerclamp.rst | 22 +++ drivers/thermal/intel/intel_powerclamp.c | 176 ++++++++++++++++-- 2 files changed, 178 insertions(+), 20 deletions(-) diff --git a/Documentation/admin-guide/thermal/intel_powerclamp.rst b/Documentation/admin-guide/thermal/intel_powerclamp.rst index 3f6dfb0b3ea6..2d9d2d739f02 100644 --- a/Documentation/admin-guide/thermal/intel_powerclamp.rst +++ b/Documentation/admin-guide/thermal/intel_powerclamp.rst @@ -26,6 +26,8 @@ By: - Generic Thermal Layer (sysfs) - Kernel APIs (TBD) + (*) Module Parameters + INTRODUCTION ============ @@ -318,3 +320,23 @@ device, a PID based userspace thermal controller can manage to control CPU temperature effectively, when no other thermal influence is added. For example, a UltraBook user can compile the kernel under certain temperature (below most active trip points). + +Module Parameters +================= + +``cpumask`` (RW) + A bit mask of CPUs to inject idle. The format of the bitmask is same as + used in other subsystems like in /proc/irq/*/smp_affinity. The mask is + comma separated 32 bit groups. Each CPU is one bit. For example for a 256 + CPU system the full mask is: + ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff + + The rightmost mask is for CPU 0-32. + +``max_idle`` (RW) + Maximum injected idle time to the total CPU time ratio in percent range + from 1 to 100. Even if the cooling device max_state is always 100 (100%), + this parameter allows to add a max idle percent limit. The default is 50, + to match the current implementation of powerclamp driver. Also doesn't + allow value more than 75, if the cpumask includes every CPU present in + the system. diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 5995d10b5699..c7ba5680cd48 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -37,7 +37,7 @@ #include #include -#define MAX_TARGET_RATIO (50U) +#define MAX_TARGET_RATIO (100U) /* For each undisturbed clamping period (no extra wake ups during idle time), * we increment the confidence counter for the given target ratio. * CONFIDENCE_OK defines the level where runtime calibration results are @@ -121,6 +121,141 @@ static const struct kernel_param_ops duration_ops = { module_param_cb(duration, &duration_ops, NULL, 0644); MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec."); +#define DEFAULT_MAX_IDLE 50 +#define MAX_ALL_CPU_IDLE 75 + +static u8 max_idle = DEFAULT_MAX_IDLE; + +static cpumask_var_t idle_injection_cpu_mask; + +static int allocate_copy_idle_injection_mask(const struct cpumask *copy_mask) +{ + if (cpumask_available(idle_injection_cpu_mask)) + goto copy_mask; + + /* This mask is allocated only one time and freed during module exit */ + if (!alloc_cpumask_var(&idle_injection_cpu_mask, GFP_KERNEL)) + return -ENOMEM; + +copy_mask: + cpumask_copy(idle_injection_cpu_mask, copy_mask); + + return 0; +} + +/* Return true if the cpumask and idle percent combination is invalid */ +static bool check_invalid(cpumask_var_t mask, u8 idle) +{ + if (cpumask_equal(cpu_present_mask, mask) && idle > MAX_ALL_CPU_IDLE) + return true; + + return false; +} + +static int cpumask_set(const char *arg, const struct kernel_param *kp) +{ + cpumask_var_t new_mask; + int ret; + + mutex_lock(&powerclamp_lock); + + /* Can't set mask when cooling device is in use */ + if (powerclamp_data.clamping) { + ret = -EAGAIN; + goto skip_cpumask_set; + } + + ret = alloc_cpumask_var(&new_mask, GFP_KERNEL); + if (!ret) + goto skip_cpumask_set; + + ret = bitmap_parse(arg, strlen(arg), cpumask_bits(new_mask), + nr_cpumask_bits); + if (ret) + goto free_cpumask_set; + + if (cpumask_empty(new_mask) || check_invalid(new_mask, max_idle)) { + ret = -EINVAL; + goto free_cpumask_set; + } + + /* + * When module parameters are passed from kernel command line + * during insmod, the module parameter callback is called + * before powerclamp_init(), so we can't assume that some + * cpumask can be allocated and copied before here. Also + * in this case this cpumask is used as the default mask. + */ + ret = allocate_copy_idle_injection_mask(new_mask); + +free_cpumask_set: + free_cpumask_var(new_mask); +skip_cpumask_set: + mutex_unlock(&powerclamp_lock); + + return ret; +} + +static int cpumask_get(char *buf, const struct kernel_param *kp) +{ + if (!cpumask_available(idle_injection_cpu_mask)) + return -ENODEV; + + return bitmap_print_to_pagebuf(false, buf, cpumask_bits(idle_injection_cpu_mask), + nr_cpumask_bits); +} + +static const struct kernel_param_ops cpumask_ops = { + .set = cpumask_set, + .get = cpumask_get, +}; + +module_param_cb(cpumask, &cpumask_ops, NULL, 0644); +MODULE_PARM_DESC(cpumask, "Mask of CPUs to use for idle injection."); + +static int max_idle_set(const char *arg, const struct kernel_param *kp) +{ + u8 new_max_idle; + int ret = 0; + + mutex_lock(&powerclamp_lock); + + /* Can't set mask when cooling device is in use */ + if (powerclamp_data.clamping) { + ret = -EAGAIN; + goto skip_limit_set; + } + + ret = kstrtou8(arg, 10, &new_max_idle); + if (ret) + goto skip_limit_set; + + if (new_max_idle > MAX_TARGET_RATIO) { + ret = -EINVAL; + goto skip_limit_set; + } + + if (check_invalid(idle_injection_cpu_mask, new_max_idle)) { + ret = -EINVAL; + goto skip_limit_set; + } + + max_idle = new_max_idle; + +skip_limit_set: + mutex_unlock(&powerclamp_lock); + + return ret; +} + +static const struct kernel_param_ops max_idle_ops = { + .set = max_idle_set, + .get = param_get_int, +}; + +module_param_cb(max_idle, &max_idle_ops, &max_idle, 0644); +MODULE_PARM_DESC(max_idle, "maximum injected idle time to the total CPU time ratio in percent range:1-100"); + struct powerclamp_calibration_data { unsigned long confidence; /* used for calibration, basically a counter * gets incremented each time a clamping @@ -472,21 +607,15 @@ static void trigger_idle_injection(void) */ static int powerclamp_idle_injection_register(void) { - /* - * The idle inject core will only inject for online CPUs, - * So we can register for all present CPUs. In this way - * if some CPU goes online/offline while idle inject - * is registered, nothing additional calls are required. - * The same runtime and idle time is applicable for - * newly onlined CPUs if any. - * - * Here cpu_present_mask can be used as is. - * cast to (struct cpumask *) is required as the - * cpu_present_mask is const struct cpumask *, otherwise - * there will be compiler warnings. - */ - ii_dev = idle_inject_register_full((struct cpumask *)cpu_present_mask, - idle_inject_update); + poll_pkg_cstate_enable = false; + if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) { + ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update); + if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) + poll_pkg_cstate_enable = true; + } else { + ii_dev = idle_inject_register(idle_injection_cpu_mask); + } + if (!ii_dev) { pr_err("powerclamp: idle_inject_register failed\n"); return -EAGAIN; @@ -567,7 +696,7 @@ static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, mutex_lock(&powerclamp_lock); new_target_ratio = clamp(new_target_ratio, 0UL, - (unsigned long) (MAX_TARGET_RATIO - 1)); + (unsigned long) (max_idle - 1)); if (!powerclamp_data.target_ratio && new_target_ratio > 0) { pr_info("Start idle injection to reduce power\n"); powerclamp_data.target_ratio = new_target_ratio; @@ -658,15 +787,19 @@ static int __init powerclamp_init(void) /* probe cpu features and ids here */ retval = powerclamp_probe(); + if (retval) + return retval; + + mutex_lock(&powerclamp_lock); + retval = allocate_copy_idle_injection_mask(cpu_present_mask); + mutex_unlock(&powerclamp_lock); + if (retval) return retval; /* set default limit, maybe adjusted during runtime based on feedback */ window_size = 2; - if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) - poll_pkg_cstate_enable = true; - cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, &powerclamp_cooling_ops); if (IS_ERR(cooling_dev)) @@ -691,6 +824,9 @@ static void __exit powerclamp_exit(void) cancel_delayed_work_sync(&poll_pkg_cstate_work); debugfs_remove_recursive(debug_dir); + + if (cpumask_available(idle_injection_cpu_mask)) + free_cpumask_var(idle_injection_cpu_mask); } module_exit(powerclamp_exit); From e63273c1bc637fad711488afe9c2f1ab58a7ae86 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 13 Feb 2023 17:07:59 +0700 Subject: [PATCH 23/25] Documentation: admin-guide: Add toctree entry for thermal docs kernel test robot reported htmldocs warnings: Documentation/admin-guide/index.rst:62: WARNING: toctree contains reference to nonexisting document 'admin-guide/thermal' Documentation/admin-guide/thermal/intel_powerclamp.rst: WARNING: document isn't included in any toctree Add toctree entry for thermal/ docs to fix these warnings. Link: https://lore.kernel.org/linux-doc/202302121759.MmJgDTxc-lkp@intel.com/ Fixes: 707bf8e1dfd51d ("Documentation: admin-guide: Move intel_powerclamp documentation") Reported-by: kernel test robot Signed-off-by: Bagas Sanjaya Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/index.rst | 2 +- Documentation/admin-guide/thermal/index.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 Documentation/admin-guide/thermal/index.rst diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index c872a8a1ddfa..0571938ecdc8 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -116,7 +116,7 @@ configure specific aspects of kernel behavior to your liking. svga syscall-user-dispatch sysrq - thermal + thermal/index thunderbolt ufs unicode diff --git a/Documentation/admin-guide/thermal/index.rst b/Documentation/admin-guide/thermal/index.rst new file mode 100644 index 000000000000..193b7b01a87d --- /dev/null +++ b/Documentation/admin-guide/thermal/index.rst @@ -0,0 +1,8 @@ +================= +Thermal Subsystem +================= + +.. toctree:: + :maxdepth: 1 + + intel_powerclamp From e8b703ed75ef859d9b8c77f0ff92558011907b7f Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 13 Feb 2023 17:08:00 +0700 Subject: [PATCH 24/25] Documentation: powerclamp: Escape wildcard in cpumask description kernel test robot reported htmldocs warning: Documentation/admin-guide/thermal/intel_powerclamp.rst:328: WARNING: Inline emphasis start-string without end-string. The mistaken asterisk in /proc/irq/*/smp_affinity is rendered as hyperlink as the result. Escape the asterisk to fix above warning. Link: https://lore.kernel.org/linux-doc/202302122247.N4S791c4-lkp@intel.com/ Fixes: ebf51971021881 ("thermal: intel: powerclamp: Add two module parameters") Reported-by: kernel test robot Signed-off-by: Bagas Sanjaya Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/thermal/intel_powerclamp.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/thermal/intel_powerclamp.rst b/Documentation/admin-guide/thermal/intel_powerclamp.rst index 2d9d2d739f02..f919fbe1cdd9 100644 --- a/Documentation/admin-guide/thermal/intel_powerclamp.rst +++ b/Documentation/admin-guide/thermal/intel_powerclamp.rst @@ -326,7 +326,7 @@ Module Parameters ``cpumask`` (RW) A bit mask of CPUs to inject idle. The format of the bitmask is same as - used in other subsystems like in /proc/irq/*/smp_affinity. The mask is + used in other subsystems like in /proc/irq/\*/smp_affinity. The mask is comma separated 32 bit groups. Each CPU is one bit. For example for a 256 CPU system the full mask is: ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff From fef1f0be10c61dd16fd370964c316c399483448f Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 13 Feb 2023 17:08:01 +0700 Subject: [PATCH 25/25] Documentation: powerclamp: Fix numbered lists formatting Texts in numbered lists are rendered as continous paragraph when there should have been breaks between first line text in the beginning of list item and the description. Fix this by adding appropriate line breaks and indent the rest of lines to match the first line of numbered list item. Fixes: d6d71ee4a14ae6 ("PM: Introduce Intel PowerClamp Driver") Fixes: 6bbe6f5732faea ("docs: thermal: convert to ReST") Signed-off-by: Bagas Sanjaya Signed-off-by: Rafael J. Wysocki --- .../admin-guide/thermal/intel_powerclamp.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/thermal/intel_powerclamp.rst b/Documentation/admin-guide/thermal/intel_powerclamp.rst index f919fbe1cdd9..3ce96043af17 100644 --- a/Documentation/admin-guide/thermal/intel_powerclamp.rst +++ b/Documentation/admin-guide/thermal/intel_powerclamp.rst @@ -155,13 +155,15 @@ b) determine the amount of compensation needed at each target ratio Compensation to each target ratio consists of two parts: a) steady state error compensation - This is to offset the error occurring when the system can - enter idle without extra wakeups (such as external interrupts). + + This is to offset the error occurring when the system can + enter idle without extra wakeups (such as external interrupts). b) dynamic error compensation - When an excessive amount of wakeups occurs during idle, an - additional idle ratio can be added to quiet interrupts, by - slowing down CPU activities. + + When an excessive amount of wakeups occurs during idle, an + additional idle ratio can be added to quiet interrupts, by + slowing down CPU activities. A debugfs file is provided for the user to examine compensation progress and results, such as on a Westmere system:: @@ -283,6 +285,7 @@ cur_state returns value -1 instead of 0 which is to avoid confusing 100% busy state with the disabled state. Example usage: + - To inject 25% idle time:: $ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state