1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * A power allocator to manage temperature 4 * 5 * Copyright (C) 2014 ARM Ltd. 6 * 7 */ 8 9 #define pr_fmt(fmt) "Power allocator: " fmt 10 11 #include <linux/rculist.h> 12 #include <linux/slab.h> 13 #include <linux/thermal.h> 14 15 #define CREATE_TRACE_POINTS 16 #include <trace/events/thermal_power_allocator.h> 17 18 #include "thermal_core.h" 19 20 #define INVALID_TRIP -1 21 22 #define FRAC_BITS 10 23 #define int_to_frac(x) ((x) << FRAC_BITS) 24 #define frac_to_int(x) ((x) >> FRAC_BITS) 25 26 /** 27 * mul_frac() - multiply two fixed-point numbers 28 * @x: first multiplicand 29 * @y: second multiplicand 30 * 31 * Return: the result of multiplying two fixed-point numbers. The 32 * result is also a fixed-point number. 33 */ 34 static inline s64 mul_frac(s64 x, s64 y) 35 { 36 return (x * y) >> FRAC_BITS; 37 } 38 39 /** 40 * div_frac() - divide two fixed-point numbers 41 * @x: the dividend 42 * @y: the divisor 43 * 44 * Return: the result of dividing two fixed-point numbers. The 45 * result is also a fixed-point number. 46 */ 47 static inline s64 div_frac(s64 x, s64 y) 48 { 49 return div_s64(x << FRAC_BITS, y); 50 } 51 52 /** 53 * struct power_allocator_params - parameters for the power allocator governor 54 * @allocated_tzp: whether we have allocated tzp for this thermal zone and 55 * it needs to be freed on unbind 56 * @err_integral: accumulated error in the PID controller. 57 * @prev_err: error in the previous iteration of the PID controller. 58 * Used to calculate the derivative term. 59 * @trip_switch_on: first passive trip point of the thermal zone. The 60 * governor switches on when this trip point is crossed. 61 * If the thermal zone only has one passive trip point, 62 * @trip_switch_on should be INVALID_TRIP. 63 * @trip_max_desired_temperature: last passive trip point of the thermal 64 * zone. The temperature we are 65 * controlling for. 66 */ 67 struct power_allocator_params { 68 bool allocated_tzp; 69 s64 err_integral; 70 s32 prev_err; 71 int trip_switch_on; 72 int trip_max_desired_temperature; 73 }; 74 75 /** 76 * estimate_sustainable_power() - Estimate the sustainable power of a thermal zone 77 * @tz: thermal zone we are operating in 78 * 79 * For thermal zones that don't provide a sustainable_power in their 80 * thermal_zone_params, estimate one. Calculate it using the minimum 81 * power of all the cooling devices as that gives a valid value that 82 * can give some degree of functionality. For optimal performance of 83 * this governor, provide a sustainable_power in the thermal zone's 84 * thermal_zone_params. 85 */ 86 static u32 estimate_sustainable_power(struct thermal_zone_device *tz) 87 { 88 u32 sustainable_power = 0; 89 struct thermal_instance *instance; 90 struct power_allocator_params *params = tz->governor_data; 91 92 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 93 struct thermal_cooling_device *cdev = instance->cdev; 94 u32 min_power; 95 96 if (instance->trip != params->trip_max_desired_temperature) 97 continue; 98 99 if (power_actor_get_min_power(cdev, tz, &min_power)) 100 continue; 101 102 sustainable_power += min_power; 103 } 104 105 return sustainable_power; 106 } 107 108 /** 109 * estimate_pid_constants() - Estimate the constants for the PID controller 110 * @tz: thermal zone for which to estimate the constants 111 * @sustainable_power: sustainable power for the thermal zone 112 * @trip_switch_on: trip point number for the switch on temperature 113 * @control_temp: target temperature for the power allocator governor 114 * @force: whether to force the update of the constants 115 * 116 * This function is used to update the estimation of the PID 117 * controller constants in struct thermal_zone_parameters. 118 * Sustainable power is provided in case it was estimated. The 119 * estimated sustainable_power should not be stored in the 120 * thermal_zone_parameters so it has to be passed explicitly to this 121 * function. 122 * 123 * If @force is not set, the values in the thermal zone's parameters 124 * are preserved if they are not zero. If @force is set, the values 125 * in thermal zone's parameters are overwritten. 126 */ 127 static void estimate_pid_constants(struct thermal_zone_device *tz, 128 u32 sustainable_power, int trip_switch_on, 129 int control_temp, bool force) 130 { 131 int ret; 132 int switch_on_temp; 133 u32 temperature_threshold; 134 135 ret = tz->ops->get_trip_temp(tz, trip_switch_on, &switch_on_temp); 136 if (ret) 137 switch_on_temp = 0; 138 139 temperature_threshold = control_temp - switch_on_temp; 140 /* 141 * estimate_pid_constants() tries to find appropriate default 142 * values for thermal zones that don't provide them. If a 143 * system integrator has configured a thermal zone with two 144 * passive trip points at the same temperature, that person 145 * hasn't put any effort to set up the thermal zone properly 146 * so just give up. 147 */ 148 if (!temperature_threshold) 149 return; 150 151 if (!tz->tzp->k_po || force) 152 tz->tzp->k_po = int_to_frac(sustainable_power) / 153 temperature_threshold; 154 155 if (!tz->tzp->k_pu || force) 156 tz->tzp->k_pu = int_to_frac(2 * sustainable_power) / 157 temperature_threshold; 158 159 if (!tz->tzp->k_i || force) 160 tz->tzp->k_i = int_to_frac(10) / 1000; 161 /* 162 * The default for k_d and integral_cutoff is 0, so we can 163 * leave them as they are. 164 */ 165 } 166 167 /** 168 * pid_controller() - PID controller 169 * @tz: thermal zone we are operating in 170 * @control_temp: the target temperature in millicelsius 171 * @max_allocatable_power: maximum allocatable power for this thermal zone 172 * 173 * This PID controller increases the available power budget so that the 174 * temperature of the thermal zone gets as close as possible to 175 * @control_temp and limits the power if it exceeds it. k_po is the 176 * proportional term when we are overshooting, k_pu is the 177 * proportional term when we are undershooting. integral_cutoff is a 178 * threshold below which we stop accumulating the error. The 179 * accumulated error is only valid if the requested power will make 180 * the system warmer. If the system is mostly idle, there's no point 181 * in accumulating positive error. 182 * 183 * Return: The power budget for the next period. 184 */ 185 static u32 pid_controller(struct thermal_zone_device *tz, 186 int control_temp, 187 u32 max_allocatable_power) 188 { 189 s64 p, i, d, power_range; 190 s32 err, max_power_frac; 191 u32 sustainable_power; 192 struct power_allocator_params *params = tz->governor_data; 193 194 max_power_frac = int_to_frac(max_allocatable_power); 195 196 if (tz->tzp->sustainable_power) { 197 sustainable_power = tz->tzp->sustainable_power; 198 } else { 199 sustainable_power = estimate_sustainable_power(tz); 200 estimate_pid_constants(tz, sustainable_power, 201 params->trip_switch_on, control_temp, 202 true); 203 } 204 205 err = control_temp - tz->temperature; 206 err = int_to_frac(err); 207 208 /* Calculate the proportional term */ 209 p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err); 210 211 /* 212 * Calculate the integral term 213 * 214 * if the error is less than cut off allow integration (but 215 * the integral is limited to max power) 216 */ 217 i = mul_frac(tz->tzp->k_i, params->err_integral); 218 219 if (err < int_to_frac(tz->tzp->integral_cutoff)) { 220 s64 i_next = i + mul_frac(tz->tzp->k_i, err); 221 222 if (abs(i_next) < max_power_frac) { 223 i = i_next; 224 params->err_integral += err; 225 } 226 } 227 228 /* 229 * Calculate the derivative term 230 * 231 * We do err - prev_err, so with a positive k_d, a decreasing 232 * error (i.e. driving closer to the line) results in less 233 * power being applied, slowing down the controller) 234 */ 235 d = mul_frac(tz->tzp->k_d, err - params->prev_err); 236 d = div_frac(d, tz->passive_delay); 237 params->prev_err = err; 238 239 power_range = p + i + d; 240 241 /* feed-forward the known sustainable dissipatable power */ 242 power_range = sustainable_power + frac_to_int(power_range); 243 244 power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power); 245 246 trace_thermal_power_allocator_pid(tz, frac_to_int(err), 247 frac_to_int(params->err_integral), 248 frac_to_int(p), frac_to_int(i), 249 frac_to_int(d), power_range); 250 251 return power_range; 252 } 253 254 /** 255 * divvy_up_power() - divvy the allocated power between the actors 256 * @req_power: each actor's requested power 257 * @max_power: each actor's maximum available power 258 * @num_actors: size of the @req_power, @max_power and @granted_power's array 259 * @total_req_power: sum of @req_power 260 * @power_range: total allocated power 261 * @granted_power: output array: each actor's granted power 262 * @extra_actor_power: an appropriately sized array to be used in the 263 * function as temporary storage of the extra power given 264 * to the actors 265 * 266 * This function divides the total allocated power (@power_range) 267 * fairly between the actors. It first tries to give each actor a 268 * share of the @power_range according to how much power it requested 269 * compared to the rest of the actors. For example, if only one actor 270 * requests power, then it receives all the @power_range. If 271 * three actors each requests 1mW, each receives a third of the 272 * @power_range. 273 * 274 * If any actor received more than their maximum power, then that 275 * surplus is re-divvied among the actors based on how far they are 276 * from their respective maximums. 277 * 278 * Granted power for each actor is written to @granted_power, which 279 * should've been allocated by the calling function. 280 */ 281 static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors, 282 u32 total_req_power, u32 power_range, 283 u32 *granted_power, u32 *extra_actor_power) 284 { 285 u32 extra_power, capped_extra_power; 286 int i; 287 288 /* 289 * Prevent division by 0 if none of the actors request power. 290 */ 291 if (!total_req_power) 292 total_req_power = 1; 293 294 capped_extra_power = 0; 295 extra_power = 0; 296 for (i = 0; i < num_actors; i++) { 297 u64 req_range = (u64)req_power[i] * power_range; 298 299 granted_power[i] = DIV_ROUND_CLOSEST_ULL(req_range, 300 total_req_power); 301 302 if (granted_power[i] > max_power[i]) { 303 extra_power += granted_power[i] - max_power[i]; 304 granted_power[i] = max_power[i]; 305 } 306 307 extra_actor_power[i] = max_power[i] - granted_power[i]; 308 capped_extra_power += extra_actor_power[i]; 309 } 310 311 if (!extra_power) 312 return; 313 314 /* 315 * Re-divvy the reclaimed extra among actors based on 316 * how far they are from the max 317 */ 318 extra_power = min(extra_power, capped_extra_power); 319 if (capped_extra_power > 0) 320 for (i = 0; i < num_actors; i++) 321 granted_power[i] += (extra_actor_power[i] * 322 extra_power) / capped_extra_power; 323 } 324 325 static int allocate_power(struct thermal_zone_device *tz, 326 int control_temp) 327 { 328 struct thermal_instance *instance; 329 struct power_allocator_params *params = tz->governor_data; 330 u32 *req_power, *max_power, *granted_power, *extra_actor_power; 331 u32 *weighted_req_power; 332 u32 total_req_power, max_allocatable_power, total_weighted_req_power; 333 u32 total_granted_power, power_range; 334 int i, num_actors, total_weight, ret = 0; 335 int trip_max_desired_temperature = params->trip_max_desired_temperature; 336 337 mutex_lock(&tz->lock); 338 339 num_actors = 0; 340 total_weight = 0; 341 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 342 if ((instance->trip == trip_max_desired_temperature) && 343 cdev_is_power_actor(instance->cdev)) { 344 num_actors++; 345 total_weight += instance->weight; 346 } 347 } 348 349 if (!num_actors) { 350 ret = -ENODEV; 351 goto unlock; 352 } 353 354 /* 355 * We need to allocate five arrays of the same size: 356 * req_power, max_power, granted_power, extra_actor_power and 357 * weighted_req_power. They are going to be needed until this 358 * function returns. Allocate them all in one go to simplify 359 * the allocation and deallocation logic. 360 */ 361 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power)); 362 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power)); 363 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power)); 364 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*weighted_req_power)); 365 req_power = kcalloc(num_actors * 5, sizeof(*req_power), GFP_KERNEL); 366 if (!req_power) { 367 ret = -ENOMEM; 368 goto unlock; 369 } 370 371 max_power = &req_power[num_actors]; 372 granted_power = &req_power[2 * num_actors]; 373 extra_actor_power = &req_power[3 * num_actors]; 374 weighted_req_power = &req_power[4 * num_actors]; 375 376 i = 0; 377 total_weighted_req_power = 0; 378 total_req_power = 0; 379 max_allocatable_power = 0; 380 381 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 382 int weight; 383 struct thermal_cooling_device *cdev = instance->cdev; 384 385 if (instance->trip != trip_max_desired_temperature) 386 continue; 387 388 if (!cdev_is_power_actor(cdev)) 389 continue; 390 391 if (cdev->ops->get_requested_power(cdev, tz, &req_power[i])) 392 continue; 393 394 if (!total_weight) 395 weight = 1 << FRAC_BITS; 396 else 397 weight = instance->weight; 398 399 weighted_req_power[i] = frac_to_int(weight * req_power[i]); 400 401 if (power_actor_get_max_power(cdev, tz, &max_power[i])) 402 continue; 403 404 total_req_power += req_power[i]; 405 max_allocatable_power += max_power[i]; 406 total_weighted_req_power += weighted_req_power[i]; 407 408 i++; 409 } 410 411 power_range = pid_controller(tz, control_temp, max_allocatable_power); 412 413 divvy_up_power(weighted_req_power, max_power, num_actors, 414 total_weighted_req_power, power_range, granted_power, 415 extra_actor_power); 416 417 total_granted_power = 0; 418 i = 0; 419 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 420 if (instance->trip != trip_max_desired_temperature) 421 continue; 422 423 if (!cdev_is_power_actor(instance->cdev)) 424 continue; 425 426 power_actor_set_power(instance->cdev, instance, 427 granted_power[i]); 428 total_granted_power += granted_power[i]; 429 430 i++; 431 } 432 433 trace_thermal_power_allocator(tz, req_power, total_req_power, 434 granted_power, total_granted_power, 435 num_actors, power_range, 436 max_allocatable_power, tz->temperature, 437 control_temp - tz->temperature); 438 439 kfree(req_power); 440 unlock: 441 mutex_unlock(&tz->lock); 442 443 return ret; 444 } 445 446 /** 447 * get_governor_trips() - get the number of the two trip points that are key for this governor 448 * @tz: thermal zone to operate on 449 * @params: pointer to private data for this governor 450 * 451 * The power allocator governor works optimally with two trips points: 452 * a "switch on" trip point and a "maximum desired temperature". These 453 * are defined as the first and last passive trip points. 454 * 455 * If there is only one trip point, then that's considered to be the 456 * "maximum desired temperature" trip point and the governor is always 457 * on. If there are no passive or active trip points, then the 458 * governor won't do anything. In fact, its throttle function 459 * won't be called at all. 460 */ 461 static void get_governor_trips(struct thermal_zone_device *tz, 462 struct power_allocator_params *params) 463 { 464 int i, last_active, last_passive; 465 bool found_first_passive; 466 467 found_first_passive = false; 468 last_active = INVALID_TRIP; 469 last_passive = INVALID_TRIP; 470 471 for (i = 0; i < tz->trips; i++) { 472 enum thermal_trip_type type; 473 int ret; 474 475 ret = tz->ops->get_trip_type(tz, i, &type); 476 if (ret) { 477 dev_warn(&tz->device, 478 "Failed to get trip point %d type: %d\n", i, 479 ret); 480 continue; 481 } 482 483 if (type == THERMAL_TRIP_PASSIVE) { 484 if (!found_first_passive) { 485 params->trip_switch_on = i; 486 found_first_passive = true; 487 } else { 488 last_passive = i; 489 } 490 } else if (type == THERMAL_TRIP_ACTIVE) { 491 last_active = i; 492 } else { 493 break; 494 } 495 } 496 497 if (last_passive != INVALID_TRIP) { 498 params->trip_max_desired_temperature = last_passive; 499 } else if (found_first_passive) { 500 params->trip_max_desired_temperature = params->trip_switch_on; 501 params->trip_switch_on = INVALID_TRIP; 502 } else { 503 params->trip_switch_on = INVALID_TRIP; 504 params->trip_max_desired_temperature = last_active; 505 } 506 } 507 508 static void reset_pid_controller(struct power_allocator_params *params) 509 { 510 params->err_integral = 0; 511 params->prev_err = 0; 512 } 513 514 static void allow_maximum_power(struct thermal_zone_device *tz) 515 { 516 struct thermal_instance *instance; 517 struct power_allocator_params *params = tz->governor_data; 518 519 mutex_lock(&tz->lock); 520 list_for_each_entry(instance, &tz->thermal_instances, tz_node) { 521 if ((instance->trip != params->trip_max_desired_temperature) || 522 (!cdev_is_power_actor(instance->cdev))) 523 continue; 524 525 instance->target = 0; 526 mutex_lock(&instance->cdev->lock); 527 instance->cdev->updated = false; 528 mutex_unlock(&instance->cdev->lock); 529 thermal_cdev_update(instance->cdev); 530 } 531 mutex_unlock(&tz->lock); 532 } 533 534 /** 535 * power_allocator_bind() - bind the power_allocator governor to a thermal zone 536 * @tz: thermal zone to bind it to 537 * 538 * Initialize the PID controller parameters and bind it to the thermal 539 * zone. 540 * 541 * Return: 0 on success, or -ENOMEM if we ran out of memory. 542 */ 543 static int power_allocator_bind(struct thermal_zone_device *tz) 544 { 545 int ret; 546 struct power_allocator_params *params; 547 int control_temp; 548 549 params = kzalloc(sizeof(*params), GFP_KERNEL); 550 if (!params) 551 return -ENOMEM; 552 553 if (!tz->tzp) { 554 tz->tzp = kzalloc(sizeof(*tz->tzp), GFP_KERNEL); 555 if (!tz->tzp) { 556 ret = -ENOMEM; 557 goto free_params; 558 } 559 560 params->allocated_tzp = true; 561 } 562 563 if (!tz->tzp->sustainable_power) 564 dev_warn(&tz->device, "power_allocator: sustainable_power will be estimated\n"); 565 566 get_governor_trips(tz, params); 567 568 if (tz->trips > 0) { 569 ret = tz->ops->get_trip_temp(tz, 570 params->trip_max_desired_temperature, 571 &control_temp); 572 if (!ret) 573 estimate_pid_constants(tz, tz->tzp->sustainable_power, 574 params->trip_switch_on, 575 control_temp, false); 576 } 577 578 reset_pid_controller(params); 579 580 tz->governor_data = params; 581 582 return 0; 583 584 free_params: 585 kfree(params); 586 587 return ret; 588 } 589 590 static void power_allocator_unbind(struct thermal_zone_device *tz) 591 { 592 struct power_allocator_params *params = tz->governor_data; 593 594 dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id); 595 596 if (params->allocated_tzp) { 597 kfree(tz->tzp); 598 tz->tzp = NULL; 599 } 600 601 kfree(tz->governor_data); 602 tz->governor_data = NULL; 603 } 604 605 static int power_allocator_throttle(struct thermal_zone_device *tz, int trip) 606 { 607 int ret; 608 int switch_on_temp, control_temp; 609 struct power_allocator_params *params = tz->governor_data; 610 611 /* 612 * We get called for every trip point but we only need to do 613 * our calculations once 614 */ 615 if (trip != params->trip_max_desired_temperature) 616 return 0; 617 618 ret = tz->ops->get_trip_temp(tz, params->trip_switch_on, 619 &switch_on_temp); 620 if (!ret && (tz->temperature < switch_on_temp)) { 621 tz->passive = 0; 622 reset_pid_controller(params); 623 allow_maximum_power(tz); 624 return 0; 625 } 626 627 tz->passive = 1; 628 629 ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature, 630 &control_temp); 631 if (ret) { 632 dev_warn(&tz->device, 633 "Failed to get the maximum desired temperature: %d\n", 634 ret); 635 return ret; 636 } 637 638 return allocate_power(tz, control_temp); 639 } 640 641 static struct thermal_governor thermal_gov_power_allocator = { 642 .name = "power_allocator", 643 .bind_to_tz = power_allocator_bind, 644 .unbind_from_tz = power_allocator_unbind, 645 .throttle = power_allocator_throttle, 646 }; 647 THERMAL_GOVERNOR_DECLARE(thermal_gov_power_allocator); 648