1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * devfreq_cooling: Thermal cooling device implementation for devices using 4 * devfreq 5 * 6 * Copyright (C) 2014-2015 ARM Limited 7 * 8 * TODO: 9 * - If OPPs are added or removed after devfreq cooling has 10 * registered, the devfreq cooling won't react to it. 11 */ 12 13 #include <linux/devfreq.h> 14 #include <linux/devfreq_cooling.h> 15 #include <linux/energy_model.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/pm_opp.h> 19 #include <linux/pm_qos.h> 20 #include <linux/thermal.h> 21 #include <linux/units.h> 22 23 #include "thermal_trace.h" 24 25 #define SCALE_ERROR_MITIGATION 100 26 27 /** 28 * struct devfreq_cooling_device - Devfreq cooling device 29 * devfreq_cooling_device registered. 30 * @cdev: Pointer to associated thermal cooling device. 31 * @cooling_ops: devfreq callbacks to thermal cooling device ops 32 * @devfreq: Pointer to associated devfreq device. 33 * @cooling_state: Current cooling state. 34 * @freq_table: Pointer to a table with the frequencies sorted in descending 35 * order. You can index the table by cooling device state 36 * @max_state: It is the last index, that is, one less than the number of the 37 * OPPs 38 * @power_ops: Pointer to devfreq_cooling_power, a more precised model. 39 * @res_util: Resource utilization scaling factor for the power. 40 * It is multiplied by 100 to minimize the error. It is used 41 * for estimation of the power budget instead of using 42 * 'utilization' (which is 'busy_time' / 'total_time'). 43 * The 'res_util' range is from 100 to power * 100 for the 44 * corresponding 'state'. 45 * @capped_state: index to cooling state with in dynamic power budget 46 * @req_max_freq: PM QoS request for limiting the maximum frequency 47 * of the devfreq device. 48 * @em_pd: Energy Model for the associated Devfreq device 49 */ 50 struct devfreq_cooling_device { 51 struct thermal_cooling_device *cdev; 52 struct thermal_cooling_device_ops cooling_ops; 53 struct devfreq *devfreq; 54 unsigned long cooling_state; 55 u32 *freq_table; 56 size_t max_state; 57 struct devfreq_cooling_power *power_ops; 58 u32 res_util; 59 int capped_state; 60 struct dev_pm_qos_request req_max_freq; 61 struct em_perf_domain *em_pd; 62 }; 63 64 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, 65 unsigned long *state) 66 { 67 struct devfreq_cooling_device *dfc = cdev->devdata; 68 69 *state = dfc->max_state; 70 71 return 0; 72 } 73 74 static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev, 75 unsigned long *state) 76 { 77 struct devfreq_cooling_device *dfc = cdev->devdata; 78 79 *state = dfc->cooling_state; 80 81 return 0; 82 } 83 84 static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, 85 unsigned long state) 86 { 87 struct devfreq_cooling_device *dfc = cdev->devdata; 88 struct devfreq *df = dfc->devfreq; 89 struct device *dev = df->dev.parent; 90 unsigned long freq; 91 int perf_idx; 92 93 if (state == dfc->cooling_state) 94 return 0; 95 96 dev_dbg(dev, "Setting cooling state %lu\n", state); 97 98 if (state > dfc->max_state) 99 return -EINVAL; 100 101 if (dfc->em_pd) { 102 perf_idx = dfc->max_state - state; 103 freq = dfc->em_pd->table[perf_idx].frequency * 1000; 104 } else { 105 freq = dfc->freq_table[state]; 106 } 107 108 dev_pm_qos_update_request(&dfc->req_max_freq, 109 DIV_ROUND_UP(freq, HZ_PER_KHZ)); 110 111 dfc->cooling_state = state; 112 113 return 0; 114 } 115 116 /** 117 * get_perf_idx() - get the performance index corresponding to a frequency 118 * @em_pd: Pointer to device's Energy Model 119 * @freq: frequency in kHz 120 * 121 * Return: the performance index associated with the @freq, or 122 * -EINVAL if it wasn't found. 123 */ 124 static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq) 125 { 126 int i; 127 128 for (i = 0; i < em_pd->nr_perf_states; i++) { 129 if (em_pd->table[i].frequency == freq) 130 return i; 131 } 132 133 return -EINVAL; 134 } 135 136 static unsigned long get_voltage(struct devfreq *df, unsigned long freq) 137 { 138 struct device *dev = df->dev.parent; 139 unsigned long voltage; 140 struct dev_pm_opp *opp; 141 142 opp = dev_pm_opp_find_freq_exact(dev, freq, true); 143 if (PTR_ERR(opp) == -ERANGE) 144 opp = dev_pm_opp_find_freq_exact(dev, freq, false); 145 146 if (IS_ERR(opp)) { 147 dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n", 148 freq, PTR_ERR(opp)); 149 return 0; 150 } 151 152 voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ 153 dev_pm_opp_put(opp); 154 155 if (voltage == 0) { 156 dev_err_ratelimited(dev, 157 "Failed to get voltage for frequency %lu\n", 158 freq); 159 } 160 161 return voltage; 162 } 163 164 static void _normalize_load(struct devfreq_dev_status *status) 165 { 166 if (status->total_time > 0xfffff) { 167 status->total_time >>= 10; 168 status->busy_time >>= 10; 169 } 170 171 status->busy_time <<= 10; 172 status->busy_time /= status->total_time ? : 1; 173 174 status->busy_time = status->busy_time ? : 1; 175 status->total_time = 1024; 176 } 177 178 static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev, 179 u32 *power) 180 { 181 struct devfreq_cooling_device *dfc = cdev->devdata; 182 struct devfreq *df = dfc->devfreq; 183 struct devfreq_dev_status status; 184 unsigned long state; 185 unsigned long freq; 186 unsigned long voltage; 187 int res, perf_idx; 188 189 mutex_lock(&df->lock); 190 status = df->last_status; 191 mutex_unlock(&df->lock); 192 193 freq = status.current_frequency; 194 195 if (dfc->power_ops && dfc->power_ops->get_real_power) { 196 voltage = get_voltage(df, freq); 197 if (voltage == 0) { 198 res = -EINVAL; 199 goto fail; 200 } 201 202 res = dfc->power_ops->get_real_power(df, power, freq, voltage); 203 if (!res) { 204 state = dfc->max_state - dfc->capped_state; 205 206 /* Convert EM power into milli-Watts first */ 207 dfc->res_util = dfc->em_pd->table[state].power; 208 dfc->res_util /= MICROWATT_PER_MILLIWATT; 209 210 dfc->res_util *= SCALE_ERROR_MITIGATION; 211 212 if (*power > 1) 213 dfc->res_util /= *power; 214 } else { 215 goto fail; 216 } 217 } else { 218 /* Energy Model frequencies are in kHz */ 219 perf_idx = get_perf_idx(dfc->em_pd, freq / 1000); 220 if (perf_idx < 0) { 221 res = -EAGAIN; 222 goto fail; 223 } 224 225 _normalize_load(&status); 226 227 /* Convert EM power into milli-Watts first */ 228 *power = dfc->em_pd->table[perf_idx].power; 229 *power /= MICROWATT_PER_MILLIWATT; 230 /* Scale power for utilization */ 231 *power *= status.busy_time; 232 *power >>= 10; 233 } 234 235 trace_thermal_power_devfreq_get_power(cdev, &status, freq, *power); 236 237 return 0; 238 fail: 239 /* It is safe to set max in this case */ 240 dfc->res_util = SCALE_ERROR_MITIGATION; 241 return res; 242 } 243 244 static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, 245 unsigned long state, u32 *power) 246 { 247 struct devfreq_cooling_device *dfc = cdev->devdata; 248 int perf_idx; 249 250 if (state > dfc->max_state) 251 return -EINVAL; 252 253 perf_idx = dfc->max_state - state; 254 *power = dfc->em_pd->table[perf_idx].power; 255 *power /= MICROWATT_PER_MILLIWATT; 256 257 return 0; 258 } 259 260 static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, 261 u32 power, unsigned long *state) 262 { 263 struct devfreq_cooling_device *dfc = cdev->devdata; 264 struct devfreq *df = dfc->devfreq; 265 struct devfreq_dev_status status; 266 unsigned long freq, em_power_mw; 267 s32 est_power; 268 int i; 269 270 mutex_lock(&df->lock); 271 status = df->last_status; 272 mutex_unlock(&df->lock); 273 274 freq = status.current_frequency; 275 276 if (dfc->power_ops && dfc->power_ops->get_real_power) { 277 /* Scale for resource utilization */ 278 est_power = power * dfc->res_util; 279 est_power /= SCALE_ERROR_MITIGATION; 280 } else { 281 /* Scale dynamic power for utilization */ 282 _normalize_load(&status); 283 est_power = power << 10; 284 est_power /= status.busy_time; 285 } 286 287 /* 288 * Find the first cooling state that is within the power 289 * budget. The EM power table is sorted ascending. 290 */ 291 for (i = dfc->max_state; i > 0; i--) { 292 /* Convert EM power to milli-Watts to make safe comparison */ 293 em_power_mw = dfc->em_pd->table[i].power; 294 em_power_mw /= MICROWATT_PER_MILLIWATT; 295 if (est_power >= em_power_mw) 296 break; 297 } 298 299 *state = dfc->max_state - i; 300 dfc->capped_state = *state; 301 302 trace_thermal_power_devfreq_limit(cdev, freq, *state, power); 303 return 0; 304 } 305 306 /** 307 * devfreq_cooling_gen_tables() - Generate frequency table. 308 * @dfc: Pointer to devfreq cooling device. 309 * @num_opps: Number of OPPs 310 * 311 * Generate frequency table which holds the frequencies in descending 312 * order. That way its indexed by cooling device state. This is for 313 * compatibility with drivers which do not register Energy Model. 314 * 315 * Return: 0 on success, negative error code on failure. 316 */ 317 static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc, 318 int num_opps) 319 { 320 struct devfreq *df = dfc->devfreq; 321 struct device *dev = df->dev.parent; 322 unsigned long freq; 323 int i; 324 325 dfc->freq_table = kcalloc(num_opps, sizeof(*dfc->freq_table), 326 GFP_KERNEL); 327 if (!dfc->freq_table) 328 return -ENOMEM; 329 330 for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) { 331 struct dev_pm_opp *opp; 332 333 opp = dev_pm_opp_find_freq_floor(dev, &freq); 334 if (IS_ERR(opp)) { 335 kfree(dfc->freq_table); 336 return PTR_ERR(opp); 337 } 338 339 dev_pm_opp_put(opp); 340 dfc->freq_table[i] = freq; 341 } 342 343 return 0; 344 } 345 346 /** 347 * of_devfreq_cooling_register_power() - Register devfreq cooling device, 348 * with OF and power information. 349 * @np: Pointer to OF device_node. 350 * @df: Pointer to devfreq device. 351 * @dfc_power: Pointer to devfreq_cooling_power. 352 * 353 * Register a devfreq cooling device. The available OPPs must be 354 * registered on the device. 355 * 356 * If @dfc_power is provided, the cooling device is registered with the 357 * power extensions. For the power extensions to work correctly, 358 * devfreq should use the simple_ondemand governor, other governors 359 * are not currently supported. 360 */ 361 struct thermal_cooling_device * 362 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, 363 struct devfreq_cooling_power *dfc_power) 364 { 365 struct thermal_cooling_device *cdev; 366 struct device *dev = df->dev.parent; 367 struct devfreq_cooling_device *dfc; 368 struct em_perf_domain *em; 369 struct thermal_cooling_device_ops *ops; 370 char *name; 371 int err, num_opps; 372 373 374 dfc = kzalloc(sizeof(*dfc), GFP_KERNEL); 375 if (!dfc) 376 return ERR_PTR(-ENOMEM); 377 378 dfc->devfreq = df; 379 380 ops = &dfc->cooling_ops; 381 ops->get_max_state = devfreq_cooling_get_max_state; 382 ops->get_cur_state = devfreq_cooling_get_cur_state; 383 ops->set_cur_state = devfreq_cooling_set_cur_state; 384 385 em = em_pd_get(dev); 386 if (em && !em_is_artificial(em)) { 387 dfc->em_pd = em; 388 ops->get_requested_power = 389 devfreq_cooling_get_requested_power; 390 ops->state2power = devfreq_cooling_state2power; 391 ops->power2state = devfreq_cooling_power2state; 392 393 dfc->power_ops = dfc_power; 394 395 num_opps = em_pd_nr_perf_states(dfc->em_pd); 396 } else { 397 /* Backward compatibility for drivers which do not use IPA */ 398 dev_dbg(dev, "missing proper EM for cooling device\n"); 399 400 num_opps = dev_pm_opp_get_opp_count(dev); 401 402 err = devfreq_cooling_gen_tables(dfc, num_opps); 403 if (err) 404 goto free_dfc; 405 } 406 407 if (num_opps <= 0) { 408 err = -EINVAL; 409 goto free_dfc; 410 } 411 412 /* max_state is an index, not a counter */ 413 dfc->max_state = num_opps - 1; 414 415 err = dev_pm_qos_add_request(dev, &dfc->req_max_freq, 416 DEV_PM_QOS_MAX_FREQUENCY, 417 PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); 418 if (err < 0) 419 goto free_table; 420 421 err = -ENOMEM; 422 name = kasprintf(GFP_KERNEL, "devfreq-%s", dev_name(dev)); 423 if (!name) 424 goto remove_qos_req; 425 426 cdev = thermal_of_cooling_device_register(np, name, dfc, ops); 427 kfree(name); 428 429 if (IS_ERR(cdev)) { 430 err = PTR_ERR(cdev); 431 dev_err(dev, 432 "Failed to register devfreq cooling device (%d)\n", 433 err); 434 goto remove_qos_req; 435 } 436 437 dfc->cdev = cdev; 438 439 return cdev; 440 441 remove_qos_req: 442 dev_pm_qos_remove_request(&dfc->req_max_freq); 443 free_table: 444 kfree(dfc->freq_table); 445 free_dfc: 446 kfree(dfc); 447 448 return ERR_PTR(err); 449 } 450 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power); 451 452 /** 453 * of_devfreq_cooling_register() - Register devfreq cooling device, 454 * with OF information. 455 * @np: Pointer to OF device_node. 456 * @df: Pointer to devfreq device. 457 */ 458 struct thermal_cooling_device * 459 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) 460 { 461 return of_devfreq_cooling_register_power(np, df, NULL); 462 } 463 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register); 464 465 /** 466 * devfreq_cooling_register() - Register devfreq cooling device. 467 * @df: Pointer to devfreq device. 468 */ 469 struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df) 470 { 471 return of_devfreq_cooling_register(NULL, df); 472 } 473 EXPORT_SYMBOL_GPL(devfreq_cooling_register); 474 475 /** 476 * devfreq_cooling_em_register() - Register devfreq cooling device with 477 * power information and automatically register Energy Model (EM) 478 * @df: Pointer to devfreq device. 479 * @dfc_power: Pointer to devfreq_cooling_power. 480 * 481 * Register a devfreq cooling device and automatically register EM. The 482 * available OPPs must be registered for the device. 483 * 484 * If @dfc_power is provided, the cooling device is registered with the 485 * power extensions. It is using the simple Energy Model which requires 486 * "dynamic-power-coefficient" a devicetree property. To not break drivers 487 * which miss that DT property, the function won't bail out when the EM 488 * registration failed. The cooling device will be registered if everything 489 * else is OK. 490 */ 491 struct thermal_cooling_device * 492 devfreq_cooling_em_register(struct devfreq *df, 493 struct devfreq_cooling_power *dfc_power) 494 { 495 struct thermal_cooling_device *cdev; 496 struct device *dev; 497 int ret; 498 499 if (IS_ERR_OR_NULL(df)) 500 return ERR_PTR(-EINVAL); 501 502 dev = df->dev.parent; 503 504 ret = dev_pm_opp_of_register_em(dev, NULL); 505 if (ret) 506 dev_dbg(dev, "Unable to register EM for devfreq cooling device (%d)\n", 507 ret); 508 509 cdev = of_devfreq_cooling_register_power(dev->of_node, df, dfc_power); 510 511 if (IS_ERR_OR_NULL(cdev)) 512 em_dev_unregister_perf_domain(dev); 513 514 return cdev; 515 } 516 EXPORT_SYMBOL_GPL(devfreq_cooling_em_register); 517 518 /** 519 * devfreq_cooling_unregister() - Unregister devfreq cooling device. 520 * @cdev: Pointer to devfreq cooling device to unregister. 521 * 522 * Unregisters devfreq cooling device and related Energy Model if it was 523 * present. 524 */ 525 void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) 526 { 527 struct devfreq_cooling_device *dfc; 528 struct device *dev; 529 530 if (IS_ERR_OR_NULL(cdev)) 531 return; 532 533 dfc = cdev->devdata; 534 dev = dfc->devfreq->dev.parent; 535 536 thermal_cooling_device_unregister(dfc->cdev); 537 dev_pm_qos_remove_request(&dfc->req_max_freq); 538 539 em_dev_unregister_perf_domain(dev); 540 541 kfree(dfc->freq_table); 542 kfree(dfc); 543 } 544 EXPORT_SYMBOL_GPL(devfreq_cooling_unregister); 545