1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * devfreq_cooling: Thermal cooling device implementation for devices using 4 * devfreq 5 * 6 * Copyright (C) 2014-2015 ARM Limited 7 * 8 * TODO: 9 * - If OPPs are added or removed after devfreq cooling has 10 * registered, the devfreq cooling won't react to it. 11 */ 12 13 #include <linux/devfreq.h> 14 #include <linux/devfreq_cooling.h> 15 #include <linux/export.h> 16 #include <linux/idr.h> 17 #include <linux/slab.h> 18 #include <linux/pm_opp.h> 19 #include <linux/pm_qos.h> 20 #include <linux/thermal.h> 21 22 #include <trace/events/thermal.h> 23 24 #define HZ_PER_KHZ 1000 25 #define SCALE_ERROR_MITIGATION 100 26 27 static DEFINE_IDA(devfreq_ida); 28 29 /** 30 * struct devfreq_cooling_device - Devfreq cooling device 31 * @id: unique integer value corresponding to each 32 * devfreq_cooling_device registered. 33 * @cdev: Pointer to associated thermal cooling device. 34 * @devfreq: Pointer to associated devfreq device. 35 * @cooling_state: Current cooling state. 36 * @power_table: Pointer to table with maximum power draw for each 37 * cooling state. State is the index into the table, and 38 * the power is in mW. 39 * @freq_table: Pointer to a table with the frequencies sorted in descending 40 * order. You can index the table by cooling device state 41 * @freq_table_size: Size of the @freq_table and @power_table 42 * @power_ops: Pointer to devfreq_cooling_power, used to generate the 43 * @power_table. 44 * @res_util: Resource utilization scaling factor for the power. 45 * It is multiplied by 100 to minimize the error. It is used 46 * for estimation of the power budget instead of using 47 * 'utilization' (which is 'busy_time / 'total_time'). 48 * The 'res_util' range is from 100 to (power_table[state] * 100) 49 * for the corresponding 'state'. 50 * @capped_state: index to cooling state with in dynamic power budget 51 * @req_max_freq: PM QoS request for limiting the maximum frequency 52 * of the devfreq device. 53 */ 54 struct devfreq_cooling_device { 55 int id; 56 struct thermal_cooling_device *cdev; 57 struct devfreq *devfreq; 58 unsigned long cooling_state; 59 u32 *power_table; 60 u32 *freq_table; 61 size_t freq_table_size; 62 struct devfreq_cooling_power *power_ops; 63 u32 res_util; 64 int capped_state; 65 struct dev_pm_qos_request req_max_freq; 66 }; 67 68 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, 69 unsigned long *state) 70 { 71 struct devfreq_cooling_device *dfc = cdev->devdata; 72 73 *state = dfc->freq_table_size - 1; 74 75 return 0; 76 } 77 78 static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev, 79 unsigned long *state) 80 { 81 struct devfreq_cooling_device *dfc = cdev->devdata; 82 83 *state = dfc->cooling_state; 84 85 return 0; 86 } 87 88 static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, 89 unsigned long state) 90 { 91 struct devfreq_cooling_device *dfc = cdev->devdata; 92 struct devfreq *df = dfc->devfreq; 93 struct device *dev = df->dev.parent; 94 unsigned long freq; 95 96 if (state == dfc->cooling_state) 97 return 0; 98 99 dev_dbg(dev, "Setting cooling state %lu\n", state); 100 101 if (state >= dfc->freq_table_size) 102 return -EINVAL; 103 104 freq = dfc->freq_table[state]; 105 106 dev_pm_qos_update_request(&dfc->req_max_freq, 107 DIV_ROUND_UP(freq, HZ_PER_KHZ)); 108 109 dfc->cooling_state = state; 110 111 return 0; 112 } 113 114 /** 115 * freq_get_state() - get the cooling state corresponding to a frequency 116 * @dfc: Pointer to devfreq cooling device 117 * @freq: frequency in Hz 118 * 119 * Return: the cooling state associated with the @freq, or 120 * THERMAL_CSTATE_INVALID if it wasn't found. 121 */ 122 static unsigned long 123 freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq) 124 { 125 int i; 126 127 for (i = 0; i < dfc->freq_table_size; i++) { 128 if (dfc->freq_table[i] == freq) 129 return i; 130 } 131 132 return THERMAL_CSTATE_INVALID; 133 } 134 135 static unsigned long get_voltage(struct devfreq *df, unsigned long freq) 136 { 137 struct device *dev = df->dev.parent; 138 unsigned long voltage; 139 struct dev_pm_opp *opp; 140 141 opp = dev_pm_opp_find_freq_exact(dev, freq, true); 142 if (PTR_ERR(opp) == -ERANGE) 143 opp = dev_pm_opp_find_freq_exact(dev, freq, false); 144 145 if (IS_ERR(opp)) { 146 dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n", 147 freq, PTR_ERR(opp)); 148 return 0; 149 } 150 151 voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ 152 dev_pm_opp_put(opp); 153 154 if (voltage == 0) { 155 dev_err_ratelimited(dev, 156 "Failed to get voltage for frequency %lu\n", 157 freq); 158 } 159 160 return voltage; 161 } 162 163 /** 164 * get_static_power() - calculate the static power 165 * @dfc: Pointer to devfreq cooling device 166 * @freq: Frequency in Hz 167 * 168 * Calculate the static power in milliwatts using the supplied 169 * get_static_power(). The current voltage is calculated using the 170 * OPP library. If no get_static_power() was supplied, assume the 171 * static power is negligible. 172 */ 173 static unsigned long 174 get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq) 175 { 176 struct devfreq *df = dfc->devfreq; 177 unsigned long voltage; 178 179 if (!dfc->power_ops->get_static_power) 180 return 0; 181 182 voltage = get_voltage(df, freq); 183 184 if (voltage == 0) 185 return 0; 186 187 return dfc->power_ops->get_static_power(df, voltage); 188 } 189 190 /** 191 * get_dynamic_power - calculate the dynamic power 192 * @dfc: Pointer to devfreq cooling device 193 * @freq: Frequency in Hz 194 * @voltage: Voltage in millivolts 195 * 196 * Calculate the dynamic power in milliwatts consumed by the device at 197 * frequency @freq and voltage @voltage. If the get_dynamic_power() 198 * was supplied as part of the devfreq_cooling_power struct, then that 199 * function is used. Otherwise, a simple power model (Pdyn = Coeff * 200 * Voltage^2 * Frequency) is used. 201 */ 202 static unsigned long 203 get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq, 204 unsigned long voltage) 205 { 206 u64 power; 207 u32 freq_mhz; 208 struct devfreq_cooling_power *dfc_power = dfc->power_ops; 209 210 if (dfc_power->get_dynamic_power) 211 return dfc_power->get_dynamic_power(dfc->devfreq, freq, 212 voltage); 213 214 freq_mhz = freq / 1000000; 215 power = (u64)dfc_power->dyn_power_coeff * freq_mhz * voltage * voltage; 216 do_div(power, 1000000000); 217 218 return power; 219 } 220 221 222 static inline unsigned long get_total_power(struct devfreq_cooling_device *dfc, 223 unsigned long freq, 224 unsigned long voltage) 225 { 226 return get_static_power(dfc, freq) + get_dynamic_power(dfc, freq, 227 voltage); 228 } 229 230 231 static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev, 232 struct thermal_zone_device *tz, 233 u32 *power) 234 { 235 struct devfreq_cooling_device *dfc = cdev->devdata; 236 struct devfreq *df = dfc->devfreq; 237 struct devfreq_dev_status *status = &df->last_status; 238 unsigned long state; 239 unsigned long freq = status->current_frequency; 240 unsigned long voltage; 241 u32 dyn_power = 0; 242 u32 static_power = 0; 243 int res; 244 245 state = freq_get_state(dfc, freq); 246 if (state == THERMAL_CSTATE_INVALID) { 247 res = -EAGAIN; 248 goto fail; 249 } 250 251 if (dfc->power_ops->get_real_power) { 252 voltage = get_voltage(df, freq); 253 if (voltage == 0) { 254 res = -EINVAL; 255 goto fail; 256 } 257 258 res = dfc->power_ops->get_real_power(df, power, freq, voltage); 259 if (!res) { 260 state = dfc->capped_state; 261 dfc->res_util = dfc->power_table[state]; 262 dfc->res_util *= SCALE_ERROR_MITIGATION; 263 264 if (*power > 1) 265 dfc->res_util /= *power; 266 } else { 267 goto fail; 268 } 269 } else { 270 dyn_power = dfc->power_table[state]; 271 272 /* Scale dynamic power for utilization */ 273 dyn_power *= status->busy_time; 274 dyn_power /= status->total_time; 275 /* Get static power */ 276 static_power = get_static_power(dfc, freq); 277 278 *power = dyn_power + static_power; 279 } 280 281 trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power, 282 static_power, *power); 283 284 return 0; 285 fail: 286 /* It is safe to set max in this case */ 287 dfc->res_util = SCALE_ERROR_MITIGATION; 288 return res; 289 } 290 291 static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, 292 struct thermal_zone_device *tz, 293 unsigned long state, 294 u32 *power) 295 { 296 struct devfreq_cooling_device *dfc = cdev->devdata; 297 unsigned long freq; 298 u32 static_power; 299 300 if (state >= dfc->freq_table_size) 301 return -EINVAL; 302 303 freq = dfc->freq_table[state]; 304 static_power = get_static_power(dfc, freq); 305 306 *power = dfc->power_table[state] + static_power; 307 return 0; 308 } 309 310 static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, 311 struct thermal_zone_device *tz, 312 u32 power, unsigned long *state) 313 { 314 struct devfreq_cooling_device *dfc = cdev->devdata; 315 struct devfreq *df = dfc->devfreq; 316 struct devfreq_dev_status *status = &df->last_status; 317 unsigned long freq = status->current_frequency; 318 unsigned long busy_time; 319 s32 dyn_power; 320 u32 static_power; 321 s32 est_power; 322 int i; 323 324 if (dfc->power_ops->get_real_power) { 325 /* Scale for resource utilization */ 326 est_power = power * dfc->res_util; 327 est_power /= SCALE_ERROR_MITIGATION; 328 } else { 329 static_power = get_static_power(dfc, freq); 330 331 dyn_power = power - static_power; 332 dyn_power = dyn_power > 0 ? dyn_power : 0; 333 334 /* Scale dynamic power for utilization */ 335 busy_time = status->busy_time ?: 1; 336 est_power = (dyn_power * status->total_time) / busy_time; 337 } 338 339 /* 340 * Find the first cooling state that is within the power 341 * budget for dynamic power. 342 */ 343 for (i = 0; i < dfc->freq_table_size - 1; i++) 344 if (est_power >= dfc->power_table[i]) 345 break; 346 347 *state = i; 348 dfc->capped_state = i; 349 trace_thermal_power_devfreq_limit(cdev, freq, *state, power); 350 return 0; 351 } 352 353 static struct thermal_cooling_device_ops devfreq_cooling_ops = { 354 .get_max_state = devfreq_cooling_get_max_state, 355 .get_cur_state = devfreq_cooling_get_cur_state, 356 .set_cur_state = devfreq_cooling_set_cur_state, 357 }; 358 359 /** 360 * devfreq_cooling_gen_tables() - Generate power and freq tables. 361 * @dfc: Pointer to devfreq cooling device. 362 * 363 * Generate power and frequency tables: the power table hold the 364 * device's maximum power usage at each cooling state (OPP). The 365 * static and dynamic power using the appropriate voltage and 366 * frequency for the state, is acquired from the struct 367 * devfreq_cooling_power, and summed to make the maximum power draw. 368 * 369 * The frequency table holds the frequencies in descending order. 370 * That way its indexed by cooling device state. 371 * 372 * The tables are malloced, and pointers put in dfc. They must be 373 * freed when unregistering the devfreq cooling device. 374 * 375 * Return: 0 on success, negative error code on failure. 376 */ 377 static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc) 378 { 379 struct devfreq *df = dfc->devfreq; 380 struct device *dev = df->dev.parent; 381 int ret, num_opps; 382 unsigned long freq; 383 u32 *power_table = NULL; 384 u32 *freq_table; 385 int i; 386 387 num_opps = dev_pm_opp_get_opp_count(dev); 388 389 if (dfc->power_ops) { 390 power_table = kcalloc(num_opps, sizeof(*power_table), 391 GFP_KERNEL); 392 if (!power_table) 393 return -ENOMEM; 394 } 395 396 freq_table = kcalloc(num_opps, sizeof(*freq_table), 397 GFP_KERNEL); 398 if (!freq_table) { 399 ret = -ENOMEM; 400 goto free_power_table; 401 } 402 403 for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) { 404 unsigned long power, voltage; 405 struct dev_pm_opp *opp; 406 407 opp = dev_pm_opp_find_freq_floor(dev, &freq); 408 if (IS_ERR(opp)) { 409 ret = PTR_ERR(opp); 410 goto free_tables; 411 } 412 413 voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ 414 dev_pm_opp_put(opp); 415 416 if (dfc->power_ops) { 417 if (dfc->power_ops->get_real_power) 418 power = get_total_power(dfc, freq, voltage); 419 else 420 power = get_dynamic_power(dfc, freq, voltage); 421 422 dev_dbg(dev, "Power table: %lu MHz @ %lu mV: %lu = %lu mW\n", 423 freq / 1000000, voltage, power, power); 424 425 power_table[i] = power; 426 } 427 428 freq_table[i] = freq; 429 } 430 431 if (dfc->power_ops) 432 dfc->power_table = power_table; 433 434 dfc->freq_table = freq_table; 435 dfc->freq_table_size = num_opps; 436 437 return 0; 438 439 free_tables: 440 kfree(freq_table); 441 free_power_table: 442 kfree(power_table); 443 444 return ret; 445 } 446 447 /** 448 * of_devfreq_cooling_register_power() - Register devfreq cooling device, 449 * with OF and power information. 450 * @np: Pointer to OF device_node. 451 * @df: Pointer to devfreq device. 452 * @dfc_power: Pointer to devfreq_cooling_power. 453 * 454 * Register a devfreq cooling device. The available OPPs must be 455 * registered on the device. 456 * 457 * If @dfc_power is provided, the cooling device is registered with the 458 * power extensions. For the power extensions to work correctly, 459 * devfreq should use the simple_ondemand governor, other governors 460 * are not currently supported. 461 */ 462 struct thermal_cooling_device * 463 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, 464 struct devfreq_cooling_power *dfc_power) 465 { 466 struct thermal_cooling_device *cdev; 467 struct devfreq_cooling_device *dfc; 468 char dev_name[THERMAL_NAME_LENGTH]; 469 int err; 470 471 dfc = kzalloc(sizeof(*dfc), GFP_KERNEL); 472 if (!dfc) 473 return ERR_PTR(-ENOMEM); 474 475 dfc->devfreq = df; 476 477 if (dfc_power) { 478 dfc->power_ops = dfc_power; 479 480 devfreq_cooling_ops.get_requested_power = 481 devfreq_cooling_get_requested_power; 482 devfreq_cooling_ops.state2power = devfreq_cooling_state2power; 483 devfreq_cooling_ops.power2state = devfreq_cooling_power2state; 484 } 485 486 err = devfreq_cooling_gen_tables(dfc); 487 if (err) 488 goto free_dfc; 489 490 err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq, 491 DEV_PM_QOS_MAX_FREQUENCY, 492 PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); 493 if (err < 0) 494 goto free_tables; 495 496 err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); 497 if (err < 0) 498 goto remove_qos_req; 499 dfc->id = err; 500 501 snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id); 502 503 cdev = thermal_of_cooling_device_register(np, dev_name, dfc, 504 &devfreq_cooling_ops); 505 if (IS_ERR(cdev)) { 506 err = PTR_ERR(cdev); 507 dev_err(df->dev.parent, 508 "Failed to register devfreq cooling device (%d)\n", 509 err); 510 goto release_ida; 511 } 512 513 dfc->cdev = cdev; 514 515 return cdev; 516 517 release_ida: 518 ida_simple_remove(&devfreq_ida, dfc->id); 519 520 remove_qos_req: 521 dev_pm_qos_remove_request(&dfc->req_max_freq); 522 523 free_tables: 524 kfree(dfc->power_table); 525 kfree(dfc->freq_table); 526 free_dfc: 527 kfree(dfc); 528 529 return ERR_PTR(err); 530 } 531 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power); 532 533 /** 534 * of_devfreq_cooling_register() - Register devfreq cooling device, 535 * with OF information. 536 * @np: Pointer to OF device_node. 537 * @df: Pointer to devfreq device. 538 */ 539 struct thermal_cooling_device * 540 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) 541 { 542 return of_devfreq_cooling_register_power(np, df, NULL); 543 } 544 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register); 545 546 /** 547 * devfreq_cooling_register() - Register devfreq cooling device. 548 * @df: Pointer to devfreq device. 549 */ 550 struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df) 551 { 552 return of_devfreq_cooling_register(NULL, df); 553 } 554 EXPORT_SYMBOL_GPL(devfreq_cooling_register); 555 556 /** 557 * devfreq_cooling_unregister() - Unregister devfreq cooling device. 558 * @cdev: Pointer to devfreq cooling device to unregister. 559 */ 560 void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) 561 { 562 struct devfreq_cooling_device *dfc; 563 564 if (!cdev) 565 return; 566 567 dfc = cdev->devdata; 568 569 thermal_cooling_device_unregister(dfc->cdev); 570 ida_simple_remove(&devfreq_ida, dfc->id); 571 dev_pm_qos_remove_request(&dfc->req_max_freq); 572 kfree(dfc->power_table); 573 kfree(dfc->freq_table); 574 575 kfree(dfc); 576 } 577 EXPORT_SYMBOL_GPL(devfreq_cooling_unregister); 578