linux/drivers/thermal/thermal_trip.c
Nícolas F. R. A. Prado cf3986f8c0 thermal: core: Don't update trip points inside the hysteresis range
When searching for the trip points that need to be set, the nearest
higher trip point's temperature is used for the high trip, while the
nearest lower trip point's temperature minus the hysteresis is used for
the low trip. The issue with this logic is that when the current
temperature is inside a trip point's hysteresis range, both high and low
trips will come from the same trip point. As a consequence instability
can still occur like this:
* the temperature rises slightly and enters the hysteresis range of a
  trip point
* polling happens and updates the trip points to the hysteresis range
* the temperature falls slightly, exiting the hysteresis range, crossing
  the trip point and triggering an IRQ, the trip points are updated
* repeat

So even though the current hysteresis implementation prevents
instability from happening due to IRQs triggering on the same
temperature value, both ways, it doesn't prevent it from happening due
to an IRQ on one way and polling on the other.

To properly implement a hysteresis behavior, when inside the hysteresis
range, don't update the trip points. This way, the previously set trip
points will stay in effect, which will in a way remember the previous
state (if the temperature signal came from above or below the range) and
therefore have the right trip point already set.

The exception is if there was no previous trip point set, in which case
a previous state doesn't exist, and so it's sensible to allow the
hysteresis range as trip points.

The following logs show the current behavior when running on a real
machine:

[  202.524658] thermal thermal_zone0: new temperature boundaries: -2147483647 < x < 40000
   203.562817: thermal_temperature: thermal_zone=vpu0-thermal id=0 temp_prev=36986 temp=37979
[  203.562845] thermal thermal_zone0: new temperature boundaries: 37000 < x < 40000
   204.176059: thermal_temperature: thermal_zone=vpu0-thermal id=0 temp_prev=37979 temp=40028
[  204.176089] thermal thermal_zone0: new temperature boundaries: 37000 < x < 100000
   205.226813: thermal_temperature: thermal_zone=vpu0-thermal id=0 temp_prev=40028 temp=38652
[  205.226842] thermal thermal_zone0: new temperature boundaries: 37000 < x < 40000

And with this patch applied:

[  184.933415] thermal thermal_zone0: new temperature boundaries: -2147483647 < x < 40000
   185.981182: thermal_temperature: thermal_zone=vpu0-thermal id=0 temp_prev=36986 temp=37872
   186.744685: thermal_temperature: thermal_zone=vpu0-thermal id=0 temp_prev=37872 temp=40058
[  186.744716] thermal thermal_zone0: new temperature boundaries: 37000 < x < 100000
   187.773284: thermal_temperature: thermal_zone=vpu0-thermal id=0 temp_prev=40058 temp=38698

Fixes: 060c034a97 ("thermal: Add support for hardware-tracked trip points")
Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Co-developed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2023-10-20 19:26:37 +02:00

198 lines
4.7 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2008 Intel Corp
* Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
* Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
* Copyright 2022 Linaro Limited
*
* Thermal trips handling
*/
#include "thermal_core.h"
int for_each_thermal_trip(struct thermal_zone_device *tz,
int (*cb)(struct thermal_trip *, void *),
void *data)
{
struct thermal_trip *trip;
int ret;
for_each_trip(tz, trip) {
ret = cb(trip, data);
if (ret)
return ret;
}
return 0;
}
EXPORT_SYMBOL_GPL(for_each_thermal_trip);
int thermal_zone_for_each_trip(struct thermal_zone_device *tz,
int (*cb)(struct thermal_trip *, void *),
void *data)
{
int ret;
mutex_lock(&tz->lock);
ret = for_each_thermal_trip(tz, cb, data);
mutex_unlock(&tz->lock);
return ret;
}
EXPORT_SYMBOL_GPL(thermal_zone_for_each_trip);
int thermal_zone_get_num_trips(struct thermal_zone_device *tz)
{
return tz->num_trips;
}
EXPORT_SYMBOL_GPL(thermal_zone_get_num_trips);
/**
* __thermal_zone_set_trips - Computes the next trip points for the driver
* @tz: a pointer to a thermal zone device structure
*
* The function computes the next temperature boundaries by browsing
* the trip points. The result is the closer low and high trip points
* to the current temperature. These values are passed to the backend
* driver to let it set its own notification mechanism (usually an
* interrupt).
*
* This function must be called with tz->lock held. Both tz and tz->ops
* must be valid pointers.
*
* It does not return a value
*/
void __thermal_zone_set_trips(struct thermal_zone_device *tz)
{
struct thermal_trip trip;
int low = -INT_MAX, high = INT_MAX;
bool same_trip = false;
int i, ret;
lockdep_assert_held(&tz->lock);
if (!tz->ops->set_trips)
return;
for (i = 0; i < tz->num_trips; i++) {
bool low_set = false;
int trip_low;
ret = __thermal_zone_get_trip(tz, i , &trip);
if (ret)
return;
trip_low = trip.temperature - trip.hysteresis;
if (trip_low < tz->temperature && trip_low > low) {
low = trip_low;
low_set = true;
same_trip = false;
}
if (trip.temperature > tz->temperature &&
trip.temperature < high) {
high = trip.temperature;
same_trip = low_set;
}
}
/* No need to change trip points */
if (tz->prev_low_trip == low && tz->prev_high_trip == high)
return;
/*
* If "high" and "low" are the same, skip the change unless this is the
* first time.
*/
if (same_trip && (tz->prev_low_trip != -INT_MAX ||
tz->prev_high_trip != INT_MAX))
return;
tz->prev_low_trip = low;
tz->prev_high_trip = high;
dev_dbg(&tz->device,
"new temperature boundaries: %d < x < %d\n", low, high);
/*
* Set a temperature window. When this window is left the driver
* must inform the thermal core via thermal_zone_device_update.
*/
ret = tz->ops->set_trips(tz, low, high);
if (ret)
dev_err(&tz->device, "Failed to set trips: %d\n", ret);
}
int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
struct thermal_trip *trip)
{
if (!tz || !tz->trips || trip_id < 0 || trip_id >= tz->num_trips || !trip)
return -EINVAL;
*trip = tz->trips[trip_id];
return 0;
}
EXPORT_SYMBOL_GPL(__thermal_zone_get_trip);
int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
struct thermal_trip *trip)
{
int ret;
mutex_lock(&tz->lock);
ret = __thermal_zone_get_trip(tz, trip_id, trip);
mutex_unlock(&tz->lock);
return ret;
}
EXPORT_SYMBOL_GPL(thermal_zone_get_trip);
int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,
const struct thermal_trip *trip)
{
struct thermal_trip t;
int ret;
if (!tz->ops->set_trip_temp && !tz->ops->set_trip_hyst && !tz->trips)
return -EINVAL;
ret = __thermal_zone_get_trip(tz, trip_id, &t);
if (ret)
return ret;
if (t.type != trip->type)
return -EINVAL;
if (t.temperature != trip->temperature && tz->ops->set_trip_temp) {
ret = tz->ops->set_trip_temp(tz, trip_id, trip->temperature);
if (ret)
return ret;
}
if (t.hysteresis != trip->hysteresis && tz->ops->set_trip_hyst) {
ret = tz->ops->set_trip_hyst(tz, trip_id, trip->hysteresis);
if (ret)
return ret;
}
if (tz->trips && (t.temperature != trip->temperature || t.hysteresis != trip->hysteresis))
tz->trips[trip_id] = *trip;
thermal_notify_tz_trip_change(tz->id, trip_id, trip->type,
trip->temperature, trip->hysteresis);
__thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
return 0;
}
int thermal_zone_trip_id(struct thermal_zone_device *tz,
const struct thermal_trip *trip)
{
/*
* Assume the trip to be located within the bounds of the thermal
* zone's trips[] table.
*/
return trip - tz->trips;
}