1 /* 2 * CPUFreq governor based on scheduler-provided CPU utilization data. 3 * 4 * Copyright (C) 2016, Intel Corporation 5 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/cpufreq.h> 15 #include <linux/kthread.h> 16 #include <linux/slab.h> 17 #include <trace/events/power.h> 18 19 #include "sched.h" 20 21 #define SUGOV_KTHREAD_PRIORITY 50 22 23 struct sugov_tunables { 24 struct gov_attr_set attr_set; 25 unsigned int rate_limit_us; 26 }; 27 28 struct sugov_policy { 29 struct cpufreq_policy *policy; 30 31 struct sugov_tunables *tunables; 32 struct list_head tunables_hook; 33 34 raw_spinlock_t update_lock; /* For shared policies */ 35 u64 last_freq_update_time; 36 s64 freq_update_delay_ns; 37 unsigned int next_freq; 38 39 /* The next fields are only needed if fast switch cannot be used. */ 40 struct irq_work irq_work; 41 struct kthread_work work; 42 struct mutex work_lock; 43 struct kthread_worker worker; 44 struct task_struct *thread; 45 bool work_in_progress; 46 47 bool need_freq_update; 48 }; 49 50 struct sugov_cpu { 51 struct update_util_data update_util; 52 struct sugov_policy *sg_policy; 53 54 unsigned int cached_raw_freq; 55 unsigned long iowait_boost; 56 unsigned long iowait_boost_max; 57 u64 last_update; 58 59 /* The fields below are only needed when sharing a policy. */ 60 unsigned long util; 61 unsigned long max; 62 unsigned int flags; 63 }; 64 65 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); 66 67 /************************ Governor internals ***********************/ 68 69 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) 70 { 71 s64 delta_ns; 72 73 if (sg_policy->work_in_progress) 74 return false; 75 76 if (unlikely(sg_policy->need_freq_update)) { 77 sg_policy->need_freq_update = false; 78 /* 79 * This happens when limits change, so forget the previous 80 * next_freq value and force an update. 81 */ 82 sg_policy->next_freq = UINT_MAX; 83 return true; 84 } 85 86 delta_ns = time - sg_policy->last_freq_update_time; 87 return delta_ns >= sg_policy->freq_update_delay_ns; 88 } 89 90 static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, 91 unsigned int next_freq) 92 { 93 struct cpufreq_policy *policy = sg_policy->policy; 94 95 sg_policy->last_freq_update_time = time; 96 97 if (policy->fast_switch_enabled) { 98 if (sg_policy->next_freq == next_freq) { 99 trace_cpu_frequency(policy->cur, smp_processor_id()); 100 return; 101 } 102 sg_policy->next_freq = next_freq; 103 next_freq = cpufreq_driver_fast_switch(policy, next_freq); 104 if (next_freq == CPUFREQ_ENTRY_INVALID) 105 return; 106 107 policy->cur = next_freq; 108 trace_cpu_frequency(next_freq, smp_processor_id()); 109 } else if (sg_policy->next_freq != next_freq) { 110 sg_policy->next_freq = next_freq; 111 sg_policy->work_in_progress = true; 112 irq_work_queue(&sg_policy->irq_work); 113 } 114 } 115 116 /** 117 * get_next_freq - Compute a new frequency for a given cpufreq policy. 118 * @sg_cpu: schedutil cpu object to compute the new frequency for. 119 * @util: Current CPU utilization. 120 * @max: CPU capacity. 121 * 122 * If the utilization is frequency-invariant, choose the new frequency to be 123 * proportional to it, that is 124 * 125 * next_freq = C * max_freq * util / max 126 * 127 * Otherwise, approximate the would-be frequency-invariant utilization by 128 * util_raw * (curr_freq / max_freq) which leads to 129 * 130 * next_freq = C * curr_freq * util_raw / max 131 * 132 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. 133 * 134 * The lowest driver-supported frequency which is equal or greater than the raw 135 * next_freq (as calculated above) is returned, subject to policy min/max and 136 * cpufreq driver limitations. 137 */ 138 static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, 139 unsigned long max) 140 { 141 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 142 struct cpufreq_policy *policy = sg_policy->policy; 143 unsigned int freq = arch_scale_freq_invariant() ? 144 policy->cpuinfo.max_freq : policy->cur; 145 146 freq = (freq + (freq >> 2)) * util / max; 147 148 if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) 149 return sg_policy->next_freq; 150 sg_cpu->cached_raw_freq = freq; 151 return cpufreq_driver_resolve_freq(policy, freq); 152 } 153 154 static void sugov_get_util(unsigned long *util, unsigned long *max) 155 { 156 struct rq *rq = this_rq(); 157 unsigned long cfs_max; 158 159 cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); 160 161 *util = min(rq->cfs.avg.util_avg, cfs_max); 162 *max = cfs_max; 163 } 164 165 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, 166 unsigned int flags) 167 { 168 if (flags & SCHED_CPUFREQ_IOWAIT) { 169 sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; 170 } else if (sg_cpu->iowait_boost) { 171 s64 delta_ns = time - sg_cpu->last_update; 172 173 /* Clear iowait_boost if the CPU apprears to have been idle. */ 174 if (delta_ns > TICK_NSEC) 175 sg_cpu->iowait_boost = 0; 176 } 177 } 178 179 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, 180 unsigned long *max) 181 { 182 unsigned long boost_util = sg_cpu->iowait_boost; 183 unsigned long boost_max = sg_cpu->iowait_boost_max; 184 185 if (!boost_util) 186 return; 187 188 if (*util * boost_max < *max * boost_util) { 189 *util = boost_util; 190 *max = boost_max; 191 } 192 sg_cpu->iowait_boost >>= 1; 193 } 194 195 static void sugov_update_single(struct update_util_data *hook, u64 time, 196 unsigned int flags) 197 { 198 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 199 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 200 struct cpufreq_policy *policy = sg_policy->policy; 201 unsigned long util, max; 202 unsigned int next_f; 203 204 sugov_set_iowait_boost(sg_cpu, time, flags); 205 sg_cpu->last_update = time; 206 207 if (!sugov_should_update_freq(sg_policy, time)) 208 return; 209 210 if (flags & SCHED_CPUFREQ_RT_DL) { 211 next_f = policy->cpuinfo.max_freq; 212 } else { 213 sugov_get_util(&util, &max); 214 sugov_iowait_boost(sg_cpu, &util, &max); 215 next_f = get_next_freq(sg_cpu, util, max); 216 } 217 sugov_update_commit(sg_policy, time, next_f); 218 } 219 220 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, 221 unsigned long util, unsigned long max, 222 unsigned int flags) 223 { 224 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 225 struct cpufreq_policy *policy = sg_policy->policy; 226 unsigned int max_f = policy->cpuinfo.max_freq; 227 u64 last_freq_update_time = sg_policy->last_freq_update_time; 228 unsigned int j; 229 230 if (flags & SCHED_CPUFREQ_RT_DL) 231 return max_f; 232 233 sugov_iowait_boost(sg_cpu, &util, &max); 234 235 for_each_cpu(j, policy->cpus) { 236 struct sugov_cpu *j_sg_cpu; 237 unsigned long j_util, j_max; 238 s64 delta_ns; 239 240 if (j == smp_processor_id()) 241 continue; 242 243 j_sg_cpu = &per_cpu(sugov_cpu, j); 244 /* 245 * If the CPU utilization was last updated before the previous 246 * frequency update and the time elapsed between the last update 247 * of the CPU utilization and the last frequency update is long 248 * enough, don't take the CPU into account as it probably is 249 * idle now (and clear iowait_boost for it). 250 */ 251 delta_ns = last_freq_update_time - j_sg_cpu->last_update; 252 if (delta_ns > TICK_NSEC) { 253 j_sg_cpu->iowait_boost = 0; 254 continue; 255 } 256 if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) 257 return max_f; 258 259 j_util = j_sg_cpu->util; 260 j_max = j_sg_cpu->max; 261 if (j_util * max > j_max * util) { 262 util = j_util; 263 max = j_max; 264 } 265 266 sugov_iowait_boost(j_sg_cpu, &util, &max); 267 } 268 269 return get_next_freq(sg_cpu, util, max); 270 } 271 272 static void sugov_update_shared(struct update_util_data *hook, u64 time, 273 unsigned int flags) 274 { 275 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 276 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 277 unsigned long util, max; 278 unsigned int next_f; 279 280 sugov_get_util(&util, &max); 281 282 raw_spin_lock(&sg_policy->update_lock); 283 284 sg_cpu->util = util; 285 sg_cpu->max = max; 286 sg_cpu->flags = flags; 287 288 sugov_set_iowait_boost(sg_cpu, time, flags); 289 sg_cpu->last_update = time; 290 291 if (sugov_should_update_freq(sg_policy, time)) { 292 next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); 293 sugov_update_commit(sg_policy, time, next_f); 294 } 295 296 raw_spin_unlock(&sg_policy->update_lock); 297 } 298 299 static void sugov_work(struct kthread_work *work) 300 { 301 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); 302 303 mutex_lock(&sg_policy->work_lock); 304 __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, 305 CPUFREQ_RELATION_L); 306 mutex_unlock(&sg_policy->work_lock); 307 308 sg_policy->work_in_progress = false; 309 } 310 311 static void sugov_irq_work(struct irq_work *irq_work) 312 { 313 struct sugov_policy *sg_policy; 314 315 sg_policy = container_of(irq_work, struct sugov_policy, irq_work); 316 317 /* 318 * For RT and deadline tasks, the schedutil governor shoots the 319 * frequency to maximum. Special care must be taken to ensure that this 320 * kthread doesn't result in the same behavior. 321 * 322 * This is (mostly) guaranteed by the work_in_progress flag. The flag is 323 * updated only at the end of the sugov_work() function and before that 324 * the schedutil governor rejects all other frequency scaling requests. 325 * 326 * There is a very rare case though, where the RT thread yields right 327 * after the work_in_progress flag is cleared. The effects of that are 328 * neglected for now. 329 */ 330 kthread_queue_work(&sg_policy->worker, &sg_policy->work); 331 } 332 333 /************************** sysfs interface ************************/ 334 335 static struct sugov_tunables *global_tunables; 336 static DEFINE_MUTEX(global_tunables_lock); 337 338 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) 339 { 340 return container_of(attr_set, struct sugov_tunables, attr_set); 341 } 342 343 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) 344 { 345 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 346 347 return sprintf(buf, "%u\n", tunables->rate_limit_us); 348 } 349 350 static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, 351 size_t count) 352 { 353 struct sugov_tunables *tunables = to_sugov_tunables(attr_set); 354 struct sugov_policy *sg_policy; 355 unsigned int rate_limit_us; 356 357 if (kstrtouint(buf, 10, &rate_limit_us)) 358 return -EINVAL; 359 360 tunables->rate_limit_us = rate_limit_us; 361 362 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) 363 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; 364 365 return count; 366 } 367 368 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); 369 370 static struct attribute *sugov_attributes[] = { 371 &rate_limit_us.attr, 372 NULL 373 }; 374 375 static struct kobj_type sugov_tunables_ktype = { 376 .default_attrs = sugov_attributes, 377 .sysfs_ops = &governor_sysfs_ops, 378 }; 379 380 /********************** cpufreq governor interface *********************/ 381 382 static struct cpufreq_governor schedutil_gov; 383 384 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) 385 { 386 struct sugov_policy *sg_policy; 387 388 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); 389 if (!sg_policy) 390 return NULL; 391 392 sg_policy->policy = policy; 393 raw_spin_lock_init(&sg_policy->update_lock); 394 return sg_policy; 395 } 396 397 static void sugov_policy_free(struct sugov_policy *sg_policy) 398 { 399 kfree(sg_policy); 400 } 401 402 static int sugov_kthread_create(struct sugov_policy *sg_policy) 403 { 404 struct task_struct *thread; 405 struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; 406 struct cpufreq_policy *policy = sg_policy->policy; 407 int ret; 408 409 /* kthread only required for slow path */ 410 if (policy->fast_switch_enabled) 411 return 0; 412 413 kthread_init_work(&sg_policy->work, sugov_work); 414 kthread_init_worker(&sg_policy->worker); 415 thread = kthread_create(kthread_worker_fn, &sg_policy->worker, 416 "sugov:%d", 417 cpumask_first(policy->related_cpus)); 418 if (IS_ERR(thread)) { 419 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); 420 return PTR_ERR(thread); 421 } 422 423 ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, ¶m); 424 if (ret) { 425 kthread_stop(thread); 426 pr_warn("%s: failed to set SCHED_FIFO\n", __func__); 427 return ret; 428 } 429 430 sg_policy->thread = thread; 431 kthread_bind_mask(thread, policy->related_cpus); 432 init_irq_work(&sg_policy->irq_work, sugov_irq_work); 433 mutex_init(&sg_policy->work_lock); 434 435 wake_up_process(thread); 436 437 return 0; 438 } 439 440 static void sugov_kthread_stop(struct sugov_policy *sg_policy) 441 { 442 /* kthread only required for slow path */ 443 if (sg_policy->policy->fast_switch_enabled) 444 return; 445 446 kthread_flush_worker(&sg_policy->worker); 447 kthread_stop(sg_policy->thread); 448 mutex_destroy(&sg_policy->work_lock); 449 } 450 451 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) 452 { 453 struct sugov_tunables *tunables; 454 455 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); 456 if (tunables) { 457 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); 458 if (!have_governor_per_policy()) 459 global_tunables = tunables; 460 } 461 return tunables; 462 } 463 464 static void sugov_tunables_free(struct sugov_tunables *tunables) 465 { 466 if (!have_governor_per_policy()) 467 global_tunables = NULL; 468 469 kfree(tunables); 470 } 471 472 static int sugov_init(struct cpufreq_policy *policy) 473 { 474 struct sugov_policy *sg_policy; 475 struct sugov_tunables *tunables; 476 unsigned int lat; 477 int ret = 0; 478 479 /* State should be equivalent to EXIT */ 480 if (policy->governor_data) 481 return -EBUSY; 482 483 cpufreq_enable_fast_switch(policy); 484 485 sg_policy = sugov_policy_alloc(policy); 486 if (!sg_policy) { 487 ret = -ENOMEM; 488 goto disable_fast_switch; 489 } 490 491 ret = sugov_kthread_create(sg_policy); 492 if (ret) 493 goto free_sg_policy; 494 495 mutex_lock(&global_tunables_lock); 496 497 if (global_tunables) { 498 if (WARN_ON(have_governor_per_policy())) { 499 ret = -EINVAL; 500 goto stop_kthread; 501 } 502 policy->governor_data = sg_policy; 503 sg_policy->tunables = global_tunables; 504 505 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); 506 goto out; 507 } 508 509 tunables = sugov_tunables_alloc(sg_policy); 510 if (!tunables) { 511 ret = -ENOMEM; 512 goto stop_kthread; 513 } 514 515 tunables->rate_limit_us = LATENCY_MULTIPLIER; 516 lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; 517 if (lat) 518 tunables->rate_limit_us *= lat; 519 520 policy->governor_data = sg_policy; 521 sg_policy->tunables = tunables; 522 523 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, 524 get_governor_parent_kobj(policy), "%s", 525 schedutil_gov.name); 526 if (ret) 527 goto fail; 528 529 out: 530 mutex_unlock(&global_tunables_lock); 531 return 0; 532 533 fail: 534 policy->governor_data = NULL; 535 sugov_tunables_free(tunables); 536 537 stop_kthread: 538 sugov_kthread_stop(sg_policy); 539 540 free_sg_policy: 541 mutex_unlock(&global_tunables_lock); 542 543 sugov_policy_free(sg_policy); 544 545 disable_fast_switch: 546 cpufreq_disable_fast_switch(policy); 547 548 pr_err("initialization failed (error %d)\n", ret); 549 return ret; 550 } 551 552 static void sugov_exit(struct cpufreq_policy *policy) 553 { 554 struct sugov_policy *sg_policy = policy->governor_data; 555 struct sugov_tunables *tunables = sg_policy->tunables; 556 unsigned int count; 557 558 mutex_lock(&global_tunables_lock); 559 560 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); 561 policy->governor_data = NULL; 562 if (!count) 563 sugov_tunables_free(tunables); 564 565 mutex_unlock(&global_tunables_lock); 566 567 sugov_kthread_stop(sg_policy); 568 sugov_policy_free(sg_policy); 569 cpufreq_disable_fast_switch(policy); 570 } 571 572 static int sugov_start(struct cpufreq_policy *policy) 573 { 574 struct sugov_policy *sg_policy = policy->governor_data; 575 unsigned int cpu; 576 577 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; 578 sg_policy->last_freq_update_time = 0; 579 sg_policy->next_freq = UINT_MAX; 580 sg_policy->work_in_progress = false; 581 sg_policy->need_freq_update = false; 582 583 for_each_cpu(cpu, policy->cpus) { 584 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); 585 586 sg_cpu->sg_policy = sg_policy; 587 if (policy_is_shared(policy)) { 588 sg_cpu->util = 0; 589 sg_cpu->max = 0; 590 sg_cpu->flags = SCHED_CPUFREQ_RT; 591 sg_cpu->last_update = 0; 592 sg_cpu->cached_raw_freq = 0; 593 sg_cpu->iowait_boost = 0; 594 sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; 595 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 596 sugov_update_shared); 597 } else { 598 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 599 sugov_update_single); 600 } 601 } 602 return 0; 603 } 604 605 static void sugov_stop(struct cpufreq_policy *policy) 606 { 607 struct sugov_policy *sg_policy = policy->governor_data; 608 unsigned int cpu; 609 610 for_each_cpu(cpu, policy->cpus) 611 cpufreq_remove_update_util_hook(cpu); 612 613 synchronize_sched(); 614 615 if (!policy->fast_switch_enabled) { 616 irq_work_sync(&sg_policy->irq_work); 617 kthread_cancel_work_sync(&sg_policy->work); 618 } 619 } 620 621 static void sugov_limits(struct cpufreq_policy *policy) 622 { 623 struct sugov_policy *sg_policy = policy->governor_data; 624 625 if (!policy->fast_switch_enabled) { 626 mutex_lock(&sg_policy->work_lock); 627 cpufreq_policy_apply_limits(policy); 628 mutex_unlock(&sg_policy->work_lock); 629 } 630 631 sg_policy->need_freq_update = true; 632 } 633 634 static struct cpufreq_governor schedutil_gov = { 635 .name = "schedutil", 636 .owner = THIS_MODULE, 637 .init = sugov_init, 638 .exit = sugov_exit, 639 .start = sugov_start, 640 .stop = sugov_stop, 641 .limits = sugov_limits, 642 }; 643 644 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL 645 struct cpufreq_governor *cpufreq_default_governor(void) 646 { 647 return &schedutil_gov; 648 } 649 #endif 650 651 static int __init sugov_register(void) 652 { 653 return cpufreq_register_governor(&schedutil_gov); 654 } 655 fs_initcall(sugov_register); 656