1 /* 2 * cpuidle.c - core cpuidle infrastructure 3 * 4 * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 5 * Shaohua Li <shaohua.li@intel.com> 6 * Adam Belay <abelay@novell.com> 7 * 8 * This code is licenced under the GPL. 9 */ 10 11 #include <linux/clockchips.h> 12 #include <linux/kernel.h> 13 #include <linux/mutex.h> 14 #include <linux/sched.h> 15 #include <linux/sched/clock.h> 16 #include <linux/notifier.h> 17 #include <linux/pm_qos.h> 18 #include <linux/cpu.h> 19 #include <linux/cpuidle.h> 20 #include <linux/ktime.h> 21 #include <linux/hrtimer.h> 22 #include <linux/module.h> 23 #include <linux/suspend.h> 24 #include <linux/tick.h> 25 #include <trace/events/power.h> 26 27 #include "cpuidle.h" 28 29 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices); 30 DEFINE_PER_CPU(struct cpuidle_device, cpuidle_dev); 31 32 DEFINE_MUTEX(cpuidle_lock); 33 LIST_HEAD(cpuidle_detected_devices); 34 35 static int enabled_devices; 36 static int off __read_mostly; 37 static int initialized __read_mostly; 38 39 int cpuidle_disabled(void) 40 { 41 return off; 42 } 43 void disable_cpuidle(void) 44 { 45 off = 1; 46 } 47 48 bool cpuidle_not_available(struct cpuidle_driver *drv, 49 struct cpuidle_device *dev) 50 { 51 return off || !initialized || !drv || !dev || !dev->enabled; 52 } 53 54 /** 55 * cpuidle_play_dead - cpu off-lining 56 * 57 * Returns in case of an error or no driver 58 */ 59 int cpuidle_play_dead(void) 60 { 61 struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); 62 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 63 int i; 64 65 if (!drv) 66 return -ENODEV; 67 68 /* Find lowest-power state that supports long-term idle */ 69 for (i = drv->state_count - 1; i >= 0; i--) 70 if (drv->states[i].enter_dead) 71 return drv->states[i].enter_dead(dev, i); 72 73 return -ENODEV; 74 } 75 76 static int find_deepest_state(struct cpuidle_driver *drv, 77 struct cpuidle_device *dev, 78 u64 max_latency_ns, 79 unsigned int forbidden_flags, 80 bool s2idle) 81 { 82 u64 latency_req = 0; 83 int i, ret = 0; 84 85 for (i = 1; i < drv->state_count; i++) { 86 struct cpuidle_state *s = &drv->states[i]; 87 88 if (dev->states_usage[i].disable || 89 s->exit_latency_ns <= latency_req || 90 s->exit_latency_ns > max_latency_ns || 91 (s->flags & forbidden_flags) || 92 (s2idle && !s->enter_s2idle)) 93 continue; 94 95 latency_req = s->exit_latency_ns; 96 ret = i; 97 } 98 return ret; 99 } 100 101 /** 102 * cpuidle_use_deepest_state - Set/unset governor override mode. 103 * @latency_limit_ns: Idle state exit latency limit (or no override if 0). 104 * 105 * If @latency_limit_ns is nonzero, set the current CPU to use the deepest idle 106 * state with exit latency within @latency_limit_ns (override governors going 107 * forward), or do not override governors if it is zero. 108 */ 109 void cpuidle_use_deepest_state(u64 latency_limit_ns) 110 { 111 struct cpuidle_device *dev; 112 113 preempt_disable(); 114 dev = cpuidle_get_device(); 115 if (dev) 116 dev->forced_idle_latency_limit_ns = latency_limit_ns; 117 preempt_enable(); 118 } 119 120 /** 121 * cpuidle_find_deepest_state - Find the deepest available idle state. 122 * @drv: cpuidle driver for the given CPU. 123 * @dev: cpuidle device for the given CPU. 124 * @latency_limit_ns: Idle state exit latency limit 125 * 126 * Return: the index of the deepest available idle state. 127 */ 128 int cpuidle_find_deepest_state(struct cpuidle_driver *drv, 129 struct cpuidle_device *dev, 130 u64 latency_limit_ns) 131 { 132 return find_deepest_state(drv, dev, latency_limit_ns, 0, false); 133 } 134 135 #ifdef CONFIG_SUSPEND 136 static void enter_s2idle_proper(struct cpuidle_driver *drv, 137 struct cpuidle_device *dev, int index) 138 { 139 ktime_t time_start, time_end; 140 141 time_start = ns_to_ktime(local_clock()); 142 143 /* 144 * trace_suspend_resume() called by tick_freeze() for the last CPU 145 * executing it contains RCU usage regarded as invalid in the idle 146 * context, so tell RCU about that. 147 */ 148 RCU_NONIDLE(tick_freeze()); 149 /* 150 * The state used here cannot be a "coupled" one, because the "coupled" 151 * cpuidle mechanism enables interrupts and doing that with timekeeping 152 * suspended is generally unsafe. 153 */ 154 stop_critical_timings(); 155 drv->states[index].enter_s2idle(dev, drv, index); 156 WARN_ON(!irqs_disabled()); 157 /* 158 * timekeeping_resume() that will be called by tick_unfreeze() for the 159 * first CPU executing it calls functions containing RCU read-side 160 * critical sections, so tell RCU about that. 161 */ 162 RCU_NONIDLE(tick_unfreeze()); 163 start_critical_timings(); 164 165 time_end = ns_to_ktime(local_clock()); 166 167 dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start); 168 dev->states_usage[index].s2idle_usage++; 169 } 170 171 /** 172 * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle. 173 * @drv: cpuidle driver for the given CPU. 174 * @dev: cpuidle device for the given CPU. 175 * 176 * If there are states with the ->enter_s2idle callback, find the deepest of 177 * them and enter it with frozen tick. 178 */ 179 int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) 180 { 181 int index; 182 183 /* 184 * Find the deepest state with ->enter_s2idle present, which guarantees 185 * that interrupts won't be enabled when it exits and allows the tick to 186 * be frozen safely. 187 */ 188 index = find_deepest_state(drv, dev, U64_MAX, 0, true); 189 if (index > 0) { 190 enter_s2idle_proper(drv, dev, index); 191 local_irq_enable(); 192 } 193 return index; 194 } 195 #endif /* CONFIG_SUSPEND */ 196 197 /** 198 * cpuidle_enter_state - enter the state and update stats 199 * @dev: cpuidle device for this cpu 200 * @drv: cpuidle driver for this cpu 201 * @index: index into the states table in @drv of the state to enter 202 */ 203 int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, 204 int index) 205 { 206 int entered_state; 207 208 struct cpuidle_state *target_state = &drv->states[index]; 209 bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP); 210 ktime_t time_start, time_end; 211 212 /* 213 * Tell the time framework to switch to a broadcast timer because our 214 * local timer will be shut down. If a local timer is used from another 215 * CPU as a broadcast timer, this call may fail if it is not available. 216 */ 217 if (broadcast && tick_broadcast_enter()) { 218 index = find_deepest_state(drv, dev, target_state->exit_latency_ns, 219 CPUIDLE_FLAG_TIMER_STOP, false); 220 if (index < 0) { 221 default_idle_call(); 222 return -EBUSY; 223 } 224 target_state = &drv->states[index]; 225 broadcast = false; 226 } 227 228 /* Take note of the planned idle state. */ 229 sched_idle_set_state(target_state); 230 231 trace_cpu_idle_rcuidle(index, dev->cpu); 232 time_start = ns_to_ktime(local_clock()); 233 234 stop_critical_timings(); 235 entered_state = target_state->enter(dev, drv, index); 236 start_critical_timings(); 237 238 sched_clock_idle_wakeup_event(); 239 time_end = ns_to_ktime(local_clock()); 240 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); 241 242 /* The cpu is no longer idle or about to enter idle. */ 243 sched_idle_set_state(NULL); 244 245 if (broadcast) { 246 if (WARN_ON_ONCE(!irqs_disabled())) 247 local_irq_disable(); 248 249 tick_broadcast_exit(); 250 } 251 252 if (!cpuidle_state_is_coupled(drv, index)) 253 local_irq_enable(); 254 255 if (entered_state >= 0) { 256 s64 diff, delay = drv->states[entered_state].exit_latency_ns; 257 int i; 258 259 /* 260 * Update cpuidle counters 261 * This can be moved to within driver enter routine, 262 * but that results in multiple copies of same code. 263 */ 264 diff = ktime_sub(time_end, time_start); 265 266 dev->last_residency_ns = diff; 267 dev->states_usage[entered_state].time_ns += diff; 268 dev->states_usage[entered_state].usage++; 269 270 if (diff < drv->states[entered_state].target_residency_ns) { 271 for (i = entered_state - 1; i >= 0; i--) { 272 if (dev->states_usage[i].disable) 273 continue; 274 275 /* Shallower states are enabled, so update. */ 276 dev->states_usage[entered_state].above++; 277 break; 278 } 279 } else if (diff > delay) { 280 for (i = entered_state + 1; i < drv->state_count; i++) { 281 if (dev->states_usage[i].disable) 282 continue; 283 284 /* 285 * Update if a deeper state would have been a 286 * better match for the observed idle duration. 287 */ 288 if (diff - delay >= drv->states[i].target_residency_ns) 289 dev->states_usage[entered_state].below++; 290 291 break; 292 } 293 } 294 } else { 295 dev->last_residency_ns = 0; 296 } 297 298 return entered_state; 299 } 300 301 /** 302 * cpuidle_select - ask the cpuidle framework to choose an idle state 303 * 304 * @drv: the cpuidle driver 305 * @dev: the cpuidle device 306 * @stop_tick: indication on whether or not to stop the tick 307 * 308 * Returns the index of the idle state. The return value must not be negative. 309 * 310 * The memory location pointed to by @stop_tick is expected to be written the 311 * 'false' boolean value if the scheduler tick should not be stopped before 312 * entering the returned state. 313 */ 314 int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, 315 bool *stop_tick) 316 { 317 return cpuidle_curr_governor->select(drv, dev, stop_tick); 318 } 319 320 /** 321 * cpuidle_enter - enter into the specified idle state 322 * 323 * @drv: the cpuidle driver tied with the cpu 324 * @dev: the cpuidle device 325 * @index: the index in the idle state table 326 * 327 * Returns the index in the idle state, < 0 in case of error. 328 * The error code depends on the backend driver 329 */ 330 int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, 331 int index) 332 { 333 int ret = 0; 334 335 /* 336 * Store the next hrtimer, which becomes either next tick or the next 337 * timer event, whatever expires first. Additionally, to make this data 338 * useful for consumers outside cpuidle, we rely on that the governor's 339 * ->select() callback have decided, whether to stop the tick or not. 340 */ 341 WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer()); 342 343 if (cpuidle_state_is_coupled(drv, index)) 344 ret = cpuidle_enter_state_coupled(dev, drv, index); 345 else 346 ret = cpuidle_enter_state(dev, drv, index); 347 348 WRITE_ONCE(dev->next_hrtimer, 0); 349 return ret; 350 } 351 352 /** 353 * cpuidle_reflect - tell the underlying governor what was the state 354 * we were in 355 * 356 * @dev : the cpuidle device 357 * @index: the index in the idle state table 358 * 359 */ 360 void cpuidle_reflect(struct cpuidle_device *dev, int index) 361 { 362 if (cpuidle_curr_governor->reflect && index >= 0) 363 cpuidle_curr_governor->reflect(dev, index); 364 } 365 366 /** 367 * cpuidle_poll_time - return amount of time to poll for, 368 * governors can override dev->poll_limit_ns if necessary 369 * 370 * @drv: the cpuidle driver tied with the cpu 371 * @dev: the cpuidle device 372 * 373 */ 374 u64 cpuidle_poll_time(struct cpuidle_driver *drv, 375 struct cpuidle_device *dev) 376 { 377 int i; 378 u64 limit_ns; 379 380 if (dev->poll_limit_ns) 381 return dev->poll_limit_ns; 382 383 limit_ns = TICK_NSEC; 384 for (i = 1; i < drv->state_count; i++) { 385 if (dev->states_usage[i].disable) 386 continue; 387 388 limit_ns = drv->states[i].target_residency_ns; 389 break; 390 } 391 392 dev->poll_limit_ns = limit_ns; 393 394 return dev->poll_limit_ns; 395 } 396 397 /** 398 * cpuidle_install_idle_handler - installs the cpuidle idle loop handler 399 */ 400 void cpuidle_install_idle_handler(void) 401 { 402 if (enabled_devices) { 403 /* Make sure all changes finished before we switch to new idle */ 404 smp_wmb(); 405 initialized = 1; 406 } 407 } 408 409 /** 410 * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler 411 */ 412 void cpuidle_uninstall_idle_handler(void) 413 { 414 if (enabled_devices) { 415 initialized = 0; 416 wake_up_all_idle_cpus(); 417 } 418 419 /* 420 * Make sure external observers (such as the scheduler) 421 * are done looking at pointed idle states. 422 */ 423 synchronize_rcu(); 424 } 425 426 /** 427 * cpuidle_pause_and_lock - temporarily disables CPUIDLE 428 */ 429 void cpuidle_pause_and_lock(void) 430 { 431 mutex_lock(&cpuidle_lock); 432 cpuidle_uninstall_idle_handler(); 433 } 434 435 EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock); 436 437 /** 438 * cpuidle_resume_and_unlock - resumes CPUIDLE operation 439 */ 440 void cpuidle_resume_and_unlock(void) 441 { 442 cpuidle_install_idle_handler(); 443 mutex_unlock(&cpuidle_lock); 444 } 445 446 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); 447 448 /* Currently used in suspend/resume path to suspend cpuidle */ 449 void cpuidle_pause(void) 450 { 451 mutex_lock(&cpuidle_lock); 452 cpuidle_uninstall_idle_handler(); 453 mutex_unlock(&cpuidle_lock); 454 } 455 456 /* Currently used in suspend/resume path to resume cpuidle */ 457 void cpuidle_resume(void) 458 { 459 mutex_lock(&cpuidle_lock); 460 cpuidle_install_idle_handler(); 461 mutex_unlock(&cpuidle_lock); 462 } 463 464 /** 465 * cpuidle_enable_device - enables idle PM for a CPU 466 * @dev: the CPU 467 * 468 * This function must be called between cpuidle_pause_and_lock and 469 * cpuidle_resume_and_unlock when used externally. 470 */ 471 int cpuidle_enable_device(struct cpuidle_device *dev) 472 { 473 int ret; 474 struct cpuidle_driver *drv; 475 476 if (!dev) 477 return -EINVAL; 478 479 if (dev->enabled) 480 return 0; 481 482 if (!cpuidle_curr_governor) 483 return -EIO; 484 485 drv = cpuidle_get_cpu_driver(dev); 486 487 if (!drv) 488 return -EIO; 489 490 if (!dev->registered) 491 return -EINVAL; 492 493 ret = cpuidle_add_device_sysfs(dev); 494 if (ret) 495 return ret; 496 497 if (cpuidle_curr_governor->enable) { 498 ret = cpuidle_curr_governor->enable(drv, dev); 499 if (ret) 500 goto fail_sysfs; 501 } 502 503 smp_wmb(); 504 505 dev->enabled = 1; 506 507 enabled_devices++; 508 return 0; 509 510 fail_sysfs: 511 cpuidle_remove_device_sysfs(dev); 512 513 return ret; 514 } 515 516 EXPORT_SYMBOL_GPL(cpuidle_enable_device); 517 518 /** 519 * cpuidle_disable_device - disables idle PM for a CPU 520 * @dev: the CPU 521 * 522 * This function must be called between cpuidle_pause_and_lock and 523 * cpuidle_resume_and_unlock when used externally. 524 */ 525 void cpuidle_disable_device(struct cpuidle_device *dev) 526 { 527 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 528 529 if (!dev || !dev->enabled) 530 return; 531 532 if (!drv || !cpuidle_curr_governor) 533 return; 534 535 dev->enabled = 0; 536 537 if (cpuidle_curr_governor->disable) 538 cpuidle_curr_governor->disable(drv, dev); 539 540 cpuidle_remove_device_sysfs(dev); 541 enabled_devices--; 542 } 543 544 EXPORT_SYMBOL_GPL(cpuidle_disable_device); 545 546 static void __cpuidle_unregister_device(struct cpuidle_device *dev) 547 { 548 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 549 550 list_del(&dev->device_list); 551 per_cpu(cpuidle_devices, dev->cpu) = NULL; 552 module_put(drv->owner); 553 554 dev->registered = 0; 555 } 556 557 static void __cpuidle_device_init(struct cpuidle_device *dev) 558 { 559 memset(dev->states_usage, 0, sizeof(dev->states_usage)); 560 dev->last_residency_ns = 0; 561 dev->next_hrtimer = 0; 562 } 563 564 /** 565 * __cpuidle_register_device - internal register function called before register 566 * and enable routines 567 * @dev: the cpu 568 * 569 * cpuidle_lock mutex must be held before this is called 570 */ 571 static int __cpuidle_register_device(struct cpuidle_device *dev) 572 { 573 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 574 int i, ret; 575 576 if (!try_module_get(drv->owner)) 577 return -EINVAL; 578 579 for (i = 0; i < drv->state_count; i++) { 580 if (drv->states[i].flags & CPUIDLE_FLAG_UNUSABLE) 581 dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER; 582 583 if (drv->states[i].flags & CPUIDLE_FLAG_OFF) 584 dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_USER; 585 } 586 587 per_cpu(cpuidle_devices, dev->cpu) = dev; 588 list_add(&dev->device_list, &cpuidle_detected_devices); 589 590 ret = cpuidle_coupled_register_device(dev); 591 if (ret) 592 __cpuidle_unregister_device(dev); 593 else 594 dev->registered = 1; 595 596 return ret; 597 } 598 599 /** 600 * cpuidle_register_device - registers a CPU's idle PM feature 601 * @dev: the cpu 602 */ 603 int cpuidle_register_device(struct cpuidle_device *dev) 604 { 605 int ret = -EBUSY; 606 607 if (!dev) 608 return -EINVAL; 609 610 mutex_lock(&cpuidle_lock); 611 612 if (dev->registered) 613 goto out_unlock; 614 615 __cpuidle_device_init(dev); 616 617 ret = __cpuidle_register_device(dev); 618 if (ret) 619 goto out_unlock; 620 621 ret = cpuidle_add_sysfs(dev); 622 if (ret) 623 goto out_unregister; 624 625 ret = cpuidle_enable_device(dev); 626 if (ret) 627 goto out_sysfs; 628 629 cpuidle_install_idle_handler(); 630 631 out_unlock: 632 mutex_unlock(&cpuidle_lock); 633 634 return ret; 635 636 out_sysfs: 637 cpuidle_remove_sysfs(dev); 638 out_unregister: 639 __cpuidle_unregister_device(dev); 640 goto out_unlock; 641 } 642 643 EXPORT_SYMBOL_GPL(cpuidle_register_device); 644 645 /** 646 * cpuidle_unregister_device - unregisters a CPU's idle PM feature 647 * @dev: the cpu 648 */ 649 void cpuidle_unregister_device(struct cpuidle_device *dev) 650 { 651 if (!dev || dev->registered == 0) 652 return; 653 654 cpuidle_pause_and_lock(); 655 656 cpuidle_disable_device(dev); 657 658 cpuidle_remove_sysfs(dev); 659 660 __cpuidle_unregister_device(dev); 661 662 cpuidle_coupled_unregister_device(dev); 663 664 cpuidle_resume_and_unlock(); 665 } 666 667 EXPORT_SYMBOL_GPL(cpuidle_unregister_device); 668 669 /** 670 * cpuidle_unregister: unregister a driver and the devices. This function 671 * can be used only if the driver has been previously registered through 672 * the cpuidle_register function. 673 * 674 * @drv: a valid pointer to a struct cpuidle_driver 675 */ 676 void cpuidle_unregister(struct cpuidle_driver *drv) 677 { 678 int cpu; 679 struct cpuidle_device *device; 680 681 for_each_cpu(cpu, drv->cpumask) { 682 device = &per_cpu(cpuidle_dev, cpu); 683 cpuidle_unregister_device(device); 684 } 685 686 cpuidle_unregister_driver(drv); 687 } 688 EXPORT_SYMBOL_GPL(cpuidle_unregister); 689 690 /** 691 * cpuidle_register: registers the driver and the cpu devices with the 692 * coupled_cpus passed as parameter. This function is used for all common 693 * initialization pattern there are in the arch specific drivers. The 694 * devices is globally defined in this file. 695 * 696 * @drv : a valid pointer to a struct cpuidle_driver 697 * @coupled_cpus: a cpumask for the coupled states 698 * 699 * Returns 0 on success, < 0 otherwise 700 */ 701 int cpuidle_register(struct cpuidle_driver *drv, 702 const struct cpumask *const coupled_cpus) 703 { 704 int ret, cpu; 705 struct cpuidle_device *device; 706 707 ret = cpuidle_register_driver(drv); 708 if (ret) { 709 pr_err("failed to register cpuidle driver\n"); 710 return ret; 711 } 712 713 for_each_cpu(cpu, drv->cpumask) { 714 device = &per_cpu(cpuidle_dev, cpu); 715 device->cpu = cpu; 716 717 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED 718 /* 719 * On multiplatform for ARM, the coupled idle states could be 720 * enabled in the kernel even if the cpuidle driver does not 721 * use it. Note, coupled_cpus is a struct copy. 722 */ 723 if (coupled_cpus) 724 device->coupled_cpus = *coupled_cpus; 725 #endif 726 ret = cpuidle_register_device(device); 727 if (!ret) 728 continue; 729 730 pr_err("Failed to register cpuidle device for cpu%d\n", cpu); 731 732 cpuidle_unregister(drv); 733 break; 734 } 735 736 return ret; 737 } 738 EXPORT_SYMBOL_GPL(cpuidle_register); 739 740 /** 741 * cpuidle_init - core initializer 742 */ 743 static int __init cpuidle_init(void) 744 { 745 if (cpuidle_disabled()) 746 return -ENODEV; 747 748 return cpuidle_add_interface(cpu_subsys.dev_root); 749 } 750 751 module_param(off, int, 0444); 752 module_param_string(governor, param_governor, CPUIDLE_NAME_LEN, 0444); 753 core_initcall(cpuidle_init); 754