1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * devfreq_cooling: Thermal cooling device implementation for devices using 4 * devfreq 5 * 6 * Copyright (C) 2014-2015 ARM Limited 7 * 8 * TODO: 9 * - If OPPs are added or removed after devfreq cooling has 10 * registered, the devfreq cooling won't react to it. 11 */ 12 13 #include <linux/devfreq.h> 14 #include <linux/devfreq_cooling.h> 15 #include <linux/energy_model.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/pm_opp.h> 19 #include <linux/pm_qos.h> 20 #include <linux/thermal.h> 21 #include <linux/units.h> 22 23 #include <trace/events/thermal.h> 24 25 #define SCALE_ERROR_MITIGATION 100 26 27 /** 28 * struct devfreq_cooling_device - Devfreq cooling device 29 * devfreq_cooling_device registered. 30 * @cdev: Pointer to associated thermal cooling device. 31 * @devfreq: Pointer to associated devfreq device. 32 * @cooling_state: Current cooling state. 33 * @freq_table: Pointer to a table with the frequencies sorted in descending 34 * order. You can index the table by cooling device state 35 * @max_state: It is the last index, that is, one less than the number of the 36 * OPPs 37 * @power_ops: Pointer to devfreq_cooling_power, a more precised model. 38 * @res_util: Resource utilization scaling factor for the power. 39 * It is multiplied by 100 to minimize the error. It is used 40 * for estimation of the power budget instead of using 41 * 'utilization' (which is 'busy_time' / 'total_time'). 42 * The 'res_util' range is from 100 to power * 100 for the 43 * corresponding 'state'. 44 * @capped_state: index to cooling state with in dynamic power budget 45 * @req_max_freq: PM QoS request for limiting the maximum frequency 46 * of the devfreq device. 47 * @em_pd: Energy Model for the associated Devfreq device 48 */ 49 struct devfreq_cooling_device { 50 struct thermal_cooling_device *cdev; 51 struct devfreq *devfreq; 52 unsigned long cooling_state; 53 u32 *freq_table; 54 size_t max_state; 55 struct devfreq_cooling_power *power_ops; 56 u32 res_util; 57 int capped_state; 58 struct dev_pm_qos_request req_max_freq; 59 struct em_perf_domain *em_pd; 60 }; 61 62 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, 63 unsigned long *state) 64 { 65 struct devfreq_cooling_device *dfc = cdev->devdata; 66 67 *state = dfc->max_state; 68 69 return 0; 70 } 71 72 static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev, 73 unsigned long *state) 74 { 75 struct devfreq_cooling_device *dfc = cdev->devdata; 76 77 *state = dfc->cooling_state; 78 79 return 0; 80 } 81 82 static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, 83 unsigned long state) 84 { 85 struct devfreq_cooling_device *dfc = cdev->devdata; 86 struct devfreq *df = dfc->devfreq; 87 struct device *dev = df->dev.parent; 88 unsigned long freq; 89 int perf_idx; 90 91 if (state == dfc->cooling_state) 92 return 0; 93 94 dev_dbg(dev, "Setting cooling state %lu\n", state); 95 96 if (state > dfc->max_state) 97 return -EINVAL; 98 99 if (dfc->em_pd) { 100 perf_idx = dfc->max_state - state; 101 freq = dfc->em_pd->table[perf_idx].frequency * 1000; 102 } else { 103 freq = dfc->freq_table[state]; 104 } 105 106 dev_pm_qos_update_request(&dfc->req_max_freq, 107 DIV_ROUND_UP(freq, HZ_PER_KHZ)); 108 109 dfc->cooling_state = state; 110 111 return 0; 112 } 113 114 /** 115 * get_perf_idx() - get the performance index corresponding to a frequency 116 * @em_pd: Pointer to device's Energy Model 117 * @freq: frequency in kHz 118 * 119 * Return: the performance index associated with the @freq, or 120 * -EINVAL if it wasn't found. 121 */ 122 static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq) 123 { 124 int i; 125 126 for (i = 0; i < em_pd->nr_perf_states; i++) { 127 if (em_pd->table[i].frequency == freq) 128 return i; 129 } 130 131 return -EINVAL; 132 } 133 134 static unsigned long get_voltage(struct devfreq *df, unsigned long freq) 135 { 136 struct device *dev = df->dev.parent; 137 unsigned long voltage; 138 struct dev_pm_opp *opp; 139 140 opp = dev_pm_opp_find_freq_exact(dev, freq, true); 141 if (PTR_ERR(opp) == -ERANGE) 142 opp = dev_pm_opp_find_freq_exact(dev, freq, false); 143 144 if (IS_ERR(opp)) { 145 dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n", 146 freq, PTR_ERR(opp)); 147 return 0; 148 } 149 150 voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ 151 dev_pm_opp_put(opp); 152 153 if (voltage == 0) { 154 dev_err_ratelimited(dev, 155 "Failed to get voltage for frequency %lu\n", 156 freq); 157 } 158 159 return voltage; 160 } 161 162 static void _normalize_load(struct devfreq_dev_status *status) 163 { 164 if (status->total_time > 0xfffff) { 165 status->total_time >>= 10; 166 status->busy_time >>= 10; 167 } 168 169 status->busy_time <<= 10; 170 status->busy_time /= status->total_time ? : 1; 171 172 status->busy_time = status->busy_time ? : 1; 173 status->total_time = 1024; 174 } 175 176 static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev, 177 u32 *power) 178 { 179 struct devfreq_cooling_device *dfc = cdev->devdata; 180 struct devfreq *df = dfc->devfreq; 181 struct devfreq_dev_status status; 182 unsigned long state; 183 unsigned long freq; 184 unsigned long voltage; 185 int res, perf_idx; 186 187 mutex_lock(&df->lock); 188 status = df->last_status; 189 mutex_unlock(&df->lock); 190 191 freq = status.current_frequency; 192 193 if (dfc->power_ops && dfc->power_ops->get_real_power) { 194 voltage = get_voltage(df, freq); 195 if (voltage == 0) { 196 res = -EINVAL; 197 goto fail; 198 } 199 200 res = dfc->power_ops->get_real_power(df, power, freq, voltage); 201 if (!res) { 202 state = dfc->capped_state; 203 dfc->res_util = dfc->em_pd->table[state].power; 204 dfc->res_util *= SCALE_ERROR_MITIGATION; 205 206 if (*power > 1) 207 dfc->res_util /= *power; 208 } else { 209 goto fail; 210 } 211 } else { 212 /* Energy Model frequencies are in kHz */ 213 perf_idx = get_perf_idx(dfc->em_pd, freq / 1000); 214 if (perf_idx < 0) { 215 res = -EAGAIN; 216 goto fail; 217 } 218 219 _normalize_load(&status); 220 221 /* Scale power for utilization */ 222 *power = dfc->em_pd->table[perf_idx].power; 223 *power *= status.busy_time; 224 *power >>= 10; 225 } 226 227 trace_thermal_power_devfreq_get_power(cdev, &status, freq, *power); 228 229 return 0; 230 fail: 231 /* It is safe to set max in this case */ 232 dfc->res_util = SCALE_ERROR_MITIGATION; 233 return res; 234 } 235 236 static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, 237 unsigned long state, u32 *power) 238 { 239 struct devfreq_cooling_device *dfc = cdev->devdata; 240 int perf_idx; 241 242 if (state > dfc->max_state) 243 return -EINVAL; 244 245 perf_idx = dfc->max_state - state; 246 *power = dfc->em_pd->table[perf_idx].power; 247 248 return 0; 249 } 250 251 static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, 252 u32 power, unsigned long *state) 253 { 254 struct devfreq_cooling_device *dfc = cdev->devdata; 255 struct devfreq *df = dfc->devfreq; 256 struct devfreq_dev_status status; 257 unsigned long freq; 258 s32 est_power; 259 int i; 260 261 mutex_lock(&df->lock); 262 status = df->last_status; 263 mutex_unlock(&df->lock); 264 265 freq = status.current_frequency; 266 267 if (dfc->power_ops && dfc->power_ops->get_real_power) { 268 /* Scale for resource utilization */ 269 est_power = power * dfc->res_util; 270 est_power /= SCALE_ERROR_MITIGATION; 271 } else { 272 /* Scale dynamic power for utilization */ 273 _normalize_load(&status); 274 est_power = power << 10; 275 est_power /= status.busy_time; 276 } 277 278 /* 279 * Find the first cooling state that is within the power 280 * budget. The EM power table is sorted ascending. 281 */ 282 for (i = dfc->max_state; i > 0; i--) 283 if (est_power >= dfc->em_pd->table[i].power) 284 break; 285 286 *state = dfc->max_state - i; 287 dfc->capped_state = *state; 288 289 trace_thermal_power_devfreq_limit(cdev, freq, *state, power); 290 return 0; 291 } 292 293 static struct thermal_cooling_device_ops devfreq_cooling_ops = { 294 .get_max_state = devfreq_cooling_get_max_state, 295 .get_cur_state = devfreq_cooling_get_cur_state, 296 .set_cur_state = devfreq_cooling_set_cur_state, 297 }; 298 299 /** 300 * devfreq_cooling_gen_tables() - Generate frequency table. 301 * @dfc: Pointer to devfreq cooling device. 302 * @num_opps: Number of OPPs 303 * 304 * Generate frequency table which holds the frequencies in descending 305 * order. That way its indexed by cooling device state. This is for 306 * compatibility with drivers which do not register Energy Model. 307 * 308 * Return: 0 on success, negative error code on failure. 309 */ 310 static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc, 311 int num_opps) 312 { 313 struct devfreq *df = dfc->devfreq; 314 struct device *dev = df->dev.parent; 315 unsigned long freq; 316 int i; 317 318 dfc->freq_table = kcalloc(num_opps, sizeof(*dfc->freq_table), 319 GFP_KERNEL); 320 if (!dfc->freq_table) 321 return -ENOMEM; 322 323 for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) { 324 struct dev_pm_opp *opp; 325 326 opp = dev_pm_opp_find_freq_floor(dev, &freq); 327 if (IS_ERR(opp)) { 328 kfree(dfc->freq_table); 329 return PTR_ERR(opp); 330 } 331 332 dev_pm_opp_put(opp); 333 dfc->freq_table[i] = freq; 334 } 335 336 return 0; 337 } 338 339 /** 340 * of_devfreq_cooling_register_power() - Register devfreq cooling device, 341 * with OF and power information. 342 * @np: Pointer to OF device_node. 343 * @df: Pointer to devfreq device. 344 * @dfc_power: Pointer to devfreq_cooling_power. 345 * 346 * Register a devfreq cooling device. The available OPPs must be 347 * registered on the device. 348 * 349 * If @dfc_power is provided, the cooling device is registered with the 350 * power extensions. For the power extensions to work correctly, 351 * devfreq should use the simple_ondemand governor, other governors 352 * are not currently supported. 353 */ 354 struct thermal_cooling_device * 355 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, 356 struct devfreq_cooling_power *dfc_power) 357 { 358 struct thermal_cooling_device *cdev; 359 struct device *dev = df->dev.parent; 360 struct devfreq_cooling_device *dfc; 361 struct em_perf_domain *em; 362 struct thermal_cooling_device_ops *ops; 363 char *name; 364 int err, num_opps; 365 366 ops = kmemdup(&devfreq_cooling_ops, sizeof(*ops), GFP_KERNEL); 367 if (!ops) 368 return ERR_PTR(-ENOMEM); 369 370 dfc = kzalloc(sizeof(*dfc), GFP_KERNEL); 371 if (!dfc) { 372 err = -ENOMEM; 373 goto free_ops; 374 } 375 376 dfc->devfreq = df; 377 378 em = em_pd_get(dev); 379 if (em && !em_is_artificial(em)) { 380 dfc->em_pd = em; 381 ops->get_requested_power = 382 devfreq_cooling_get_requested_power; 383 ops->state2power = devfreq_cooling_state2power; 384 ops->power2state = devfreq_cooling_power2state; 385 386 dfc->power_ops = dfc_power; 387 388 num_opps = em_pd_nr_perf_states(dfc->em_pd); 389 } else { 390 /* Backward compatibility for drivers which do not use IPA */ 391 dev_dbg(dev, "missing proper EM for cooling device\n"); 392 393 num_opps = dev_pm_opp_get_opp_count(dev); 394 395 err = devfreq_cooling_gen_tables(dfc, num_opps); 396 if (err) 397 goto free_dfc; 398 } 399 400 if (num_opps <= 0) { 401 err = -EINVAL; 402 goto free_dfc; 403 } 404 405 /* max_state is an index, not a counter */ 406 dfc->max_state = num_opps - 1; 407 408 err = dev_pm_qos_add_request(dev, &dfc->req_max_freq, 409 DEV_PM_QOS_MAX_FREQUENCY, 410 PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); 411 if (err < 0) 412 goto free_table; 413 414 err = -ENOMEM; 415 name = kasprintf(GFP_KERNEL, "devfreq-%s", dev_name(dev)); 416 if (!name) 417 goto remove_qos_req; 418 419 cdev = thermal_of_cooling_device_register(np, name, dfc, ops); 420 kfree(name); 421 422 if (IS_ERR(cdev)) { 423 err = PTR_ERR(cdev); 424 dev_err(dev, 425 "Failed to register devfreq cooling device (%d)\n", 426 err); 427 goto remove_qos_req; 428 } 429 430 dfc->cdev = cdev; 431 432 return cdev; 433 434 remove_qos_req: 435 dev_pm_qos_remove_request(&dfc->req_max_freq); 436 free_table: 437 kfree(dfc->freq_table); 438 free_dfc: 439 kfree(dfc); 440 free_ops: 441 kfree(ops); 442 443 return ERR_PTR(err); 444 } 445 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power); 446 447 /** 448 * of_devfreq_cooling_register() - Register devfreq cooling device, 449 * with OF information. 450 * @np: Pointer to OF device_node. 451 * @df: Pointer to devfreq device. 452 */ 453 struct thermal_cooling_device * 454 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) 455 { 456 return of_devfreq_cooling_register_power(np, df, NULL); 457 } 458 EXPORT_SYMBOL_GPL(of_devfreq_cooling_register); 459 460 /** 461 * devfreq_cooling_register() - Register devfreq cooling device. 462 * @df: Pointer to devfreq device. 463 */ 464 struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df) 465 { 466 return of_devfreq_cooling_register(NULL, df); 467 } 468 EXPORT_SYMBOL_GPL(devfreq_cooling_register); 469 470 /** 471 * devfreq_cooling_em_register() - Register devfreq cooling device with 472 * power information and automatically register Energy Model (EM) 473 * @df: Pointer to devfreq device. 474 * @dfc_power: Pointer to devfreq_cooling_power. 475 * 476 * Register a devfreq cooling device and automatically register EM. The 477 * available OPPs must be registered for the device. 478 * 479 * If @dfc_power is provided, the cooling device is registered with the 480 * power extensions. It is using the simple Energy Model which requires 481 * "dynamic-power-coefficient" a devicetree property. To not break drivers 482 * which miss that DT property, the function won't bail out when the EM 483 * registration failed. The cooling device will be registered if everything 484 * else is OK. 485 */ 486 struct thermal_cooling_device * 487 devfreq_cooling_em_register(struct devfreq *df, 488 struct devfreq_cooling_power *dfc_power) 489 { 490 struct thermal_cooling_device *cdev; 491 struct device *dev; 492 int ret; 493 494 if (IS_ERR_OR_NULL(df)) 495 return ERR_PTR(-EINVAL); 496 497 dev = df->dev.parent; 498 499 ret = dev_pm_opp_of_register_em(dev, NULL); 500 if (ret) 501 dev_dbg(dev, "Unable to register EM for devfreq cooling device (%d)\n", 502 ret); 503 504 cdev = of_devfreq_cooling_register_power(dev->of_node, df, dfc_power); 505 506 if (IS_ERR_OR_NULL(cdev)) 507 em_dev_unregister_perf_domain(dev); 508 509 return cdev; 510 } 511 EXPORT_SYMBOL_GPL(devfreq_cooling_em_register); 512 513 /** 514 * devfreq_cooling_unregister() - Unregister devfreq cooling device. 515 * @cdev: Pointer to devfreq cooling device to unregister. 516 * 517 * Unregisters devfreq cooling device and related Energy Model if it was 518 * present. 519 */ 520 void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) 521 { 522 struct devfreq_cooling_device *dfc; 523 const struct thermal_cooling_device_ops *ops; 524 struct device *dev; 525 526 if (IS_ERR_OR_NULL(cdev)) 527 return; 528 529 ops = cdev->ops; 530 dfc = cdev->devdata; 531 dev = dfc->devfreq->dev.parent; 532 533 thermal_cooling_device_unregister(dfc->cdev); 534 dev_pm_qos_remove_request(&dfc->req_max_freq); 535 536 em_dev_unregister_perf_domain(dev); 537 538 kfree(dfc->freq_table); 539 kfree(dfc); 540 kfree(ops); 541 } 542 EXPORT_SYMBOL_GPL(devfreq_cooling_unregister); 543