1 /* 2 * cpuidle.c - core cpuidle infrastructure 3 * 4 * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 5 * Shaohua Li <shaohua.li@intel.com> 6 * Adam Belay <abelay@novell.com> 7 * 8 * This code is licenced under the GPL. 9 */ 10 11 #include <linux/clockchips.h> 12 #include <linux/kernel.h> 13 #include <linux/mutex.h> 14 #include <linux/sched.h> 15 #include <linux/sched/clock.h> 16 #include <linux/notifier.h> 17 #include <linux/pm_qos.h> 18 #include <linux/cpu.h> 19 #include <linux/cpuidle.h> 20 #include <linux/ktime.h> 21 #include <linux/hrtimer.h> 22 #include <linux/module.h> 23 #include <linux/suspend.h> 24 #include <linux/tick.h> 25 #include <linux/mmu_context.h> 26 #include <trace/events/power.h> 27 28 #include "cpuidle.h" 29 30 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices); 31 DEFINE_PER_CPU(struct cpuidle_device, cpuidle_dev); 32 33 DEFINE_MUTEX(cpuidle_lock); 34 LIST_HEAD(cpuidle_detected_devices); 35 36 static int enabled_devices; 37 static int off __read_mostly; 38 static int initialized __read_mostly; 39 40 int cpuidle_disabled(void) 41 { 42 return off; 43 } 44 void disable_cpuidle(void) 45 { 46 off = 1; 47 } 48 49 bool cpuidle_not_available(struct cpuidle_driver *drv, 50 struct cpuidle_device *dev) 51 { 52 return off || !initialized || !drv || !dev || !dev->enabled; 53 } 54 55 /** 56 * cpuidle_play_dead - cpu off-lining 57 * 58 * Returns in case of an error or no driver 59 */ 60 int cpuidle_play_dead(void) 61 { 62 struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); 63 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 64 int i; 65 66 if (!drv) 67 return -ENODEV; 68 69 /* Find lowest-power state that supports long-term idle */ 70 for (i = drv->state_count - 1; i >= 0; i--) 71 if (drv->states[i].enter_dead) 72 return drv->states[i].enter_dead(dev, i); 73 74 return -ENODEV; 75 } 76 77 static int find_deepest_state(struct cpuidle_driver *drv, 78 struct cpuidle_device *dev, 79 u64 max_latency_ns, 80 unsigned int forbidden_flags, 81 bool s2idle) 82 { 83 u64 latency_req = 0; 84 int i, ret = 0; 85 86 for (i = 1; i < drv->state_count; i++) { 87 struct cpuidle_state *s = &drv->states[i]; 88 89 if (dev->states_usage[i].disable || 90 s->exit_latency_ns <= latency_req || 91 s->exit_latency_ns > max_latency_ns || 92 (s->flags & forbidden_flags) || 93 (s2idle && !s->enter_s2idle)) 94 continue; 95 96 latency_req = s->exit_latency_ns; 97 ret = i; 98 } 99 return ret; 100 } 101 102 /** 103 * cpuidle_use_deepest_state - Set/unset governor override mode. 104 * @latency_limit_ns: Idle state exit latency limit (or no override if 0). 105 * 106 * If @latency_limit_ns is nonzero, set the current CPU to use the deepest idle 107 * state with exit latency within @latency_limit_ns (override governors going 108 * forward), or do not override governors if it is zero. 109 */ 110 void cpuidle_use_deepest_state(u64 latency_limit_ns) 111 { 112 struct cpuidle_device *dev; 113 114 preempt_disable(); 115 dev = cpuidle_get_device(); 116 if (dev) 117 dev->forced_idle_latency_limit_ns = latency_limit_ns; 118 preempt_enable(); 119 } 120 121 /** 122 * cpuidle_find_deepest_state - Find the deepest available idle state. 123 * @drv: cpuidle driver for the given CPU. 124 * @dev: cpuidle device for the given CPU. 125 * @latency_limit_ns: Idle state exit latency limit 126 * 127 * Return: the index of the deepest available idle state. 128 */ 129 int cpuidle_find_deepest_state(struct cpuidle_driver *drv, 130 struct cpuidle_device *dev, 131 u64 latency_limit_ns) 132 { 133 return find_deepest_state(drv, dev, latency_limit_ns, 0, false); 134 } 135 136 #ifdef CONFIG_SUSPEND 137 static void enter_s2idle_proper(struct cpuidle_driver *drv, 138 struct cpuidle_device *dev, int index) 139 { 140 ktime_t time_start, time_end; 141 142 time_start = ns_to_ktime(local_clock()); 143 144 /* 145 * trace_suspend_resume() called by tick_freeze() for the last CPU 146 * executing it contains RCU usage regarded as invalid in the idle 147 * context, so tell RCU about that. 148 */ 149 tick_freeze(); 150 /* 151 * The state used here cannot be a "coupled" one, because the "coupled" 152 * cpuidle mechanism enables interrupts and doing that with timekeeping 153 * suspended is generally unsafe. 154 */ 155 stop_critical_timings(); 156 rcu_idle_enter(); 157 drv->states[index].enter_s2idle(dev, drv, index); 158 if (WARN_ON_ONCE(!irqs_disabled())) 159 local_irq_disable(); 160 /* 161 * timekeeping_resume() that will be called by tick_unfreeze() for the 162 * first CPU executing it calls functions containing RCU read-side 163 * critical sections, so tell RCU about that. 164 */ 165 rcu_idle_exit(); 166 tick_unfreeze(); 167 start_critical_timings(); 168 169 time_end = ns_to_ktime(local_clock()); 170 171 dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start); 172 dev->states_usage[index].s2idle_usage++; 173 } 174 175 /** 176 * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle. 177 * @drv: cpuidle driver for the given CPU. 178 * @dev: cpuidle device for the given CPU. 179 * 180 * If there are states with the ->enter_s2idle callback, find the deepest of 181 * them and enter it with frozen tick. 182 */ 183 int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) 184 { 185 int index; 186 187 /* 188 * Find the deepest state with ->enter_s2idle present, which guarantees 189 * that interrupts won't be enabled when it exits and allows the tick to 190 * be frozen safely. 191 */ 192 index = find_deepest_state(drv, dev, U64_MAX, 0, true); 193 if (index > 0) { 194 enter_s2idle_proper(drv, dev, index); 195 local_irq_enable(); 196 } 197 return index; 198 } 199 #endif /* CONFIG_SUSPEND */ 200 201 /** 202 * cpuidle_enter_state - enter the state and update stats 203 * @dev: cpuidle device for this cpu 204 * @drv: cpuidle driver for this cpu 205 * @index: index into the states table in @drv of the state to enter 206 */ 207 int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, 208 int index) 209 { 210 int entered_state; 211 212 struct cpuidle_state *target_state = &drv->states[index]; 213 bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP); 214 ktime_t time_start, time_end; 215 216 /* 217 * Tell the time framework to switch to a broadcast timer because our 218 * local timer will be shut down. If a local timer is used from another 219 * CPU as a broadcast timer, this call may fail if it is not available. 220 */ 221 if (broadcast && tick_broadcast_enter()) { 222 index = find_deepest_state(drv, dev, target_state->exit_latency_ns, 223 CPUIDLE_FLAG_TIMER_STOP, false); 224 if (index < 0) { 225 default_idle_call(); 226 return -EBUSY; 227 } 228 target_state = &drv->states[index]; 229 broadcast = false; 230 } 231 232 if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED) 233 leave_mm(dev->cpu); 234 235 /* Take note of the planned idle state. */ 236 sched_idle_set_state(target_state); 237 238 trace_cpu_idle(index, dev->cpu); 239 time_start = ns_to_ktime(local_clock()); 240 241 stop_critical_timings(); 242 rcu_idle_enter(); 243 entered_state = target_state->enter(dev, drv, index); 244 rcu_idle_exit(); 245 start_critical_timings(); 246 247 sched_clock_idle_wakeup_event(); 248 time_end = ns_to_ktime(local_clock()); 249 trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); 250 251 /* The cpu is no longer idle or about to enter idle. */ 252 sched_idle_set_state(NULL); 253 254 if (broadcast) { 255 if (WARN_ON_ONCE(!irqs_disabled())) 256 local_irq_disable(); 257 258 tick_broadcast_exit(); 259 } 260 261 if (!cpuidle_state_is_coupled(drv, index)) 262 local_irq_enable(); 263 264 if (entered_state >= 0) { 265 s64 diff, delay = drv->states[entered_state].exit_latency_ns; 266 int i; 267 268 /* 269 * Update cpuidle counters 270 * This can be moved to within driver enter routine, 271 * but that results in multiple copies of same code. 272 */ 273 diff = ktime_sub(time_end, time_start); 274 275 dev->last_residency_ns = diff; 276 dev->states_usage[entered_state].time_ns += diff; 277 dev->states_usage[entered_state].usage++; 278 279 if (diff < drv->states[entered_state].target_residency_ns) { 280 for (i = entered_state - 1; i >= 0; i--) { 281 if (dev->states_usage[i].disable) 282 continue; 283 284 /* Shallower states are enabled, so update. */ 285 dev->states_usage[entered_state].above++; 286 break; 287 } 288 } else if (diff > delay) { 289 for (i = entered_state + 1; i < drv->state_count; i++) { 290 if (dev->states_usage[i].disable) 291 continue; 292 293 /* 294 * Update if a deeper state would have been a 295 * better match for the observed idle duration. 296 */ 297 if (diff - delay >= drv->states[i].target_residency_ns) 298 dev->states_usage[entered_state].below++; 299 300 break; 301 } 302 } 303 } else { 304 dev->last_residency_ns = 0; 305 } 306 307 return entered_state; 308 } 309 310 /** 311 * cpuidle_select - ask the cpuidle framework to choose an idle state 312 * 313 * @drv: the cpuidle driver 314 * @dev: the cpuidle device 315 * @stop_tick: indication on whether or not to stop the tick 316 * 317 * Returns the index of the idle state. The return value must not be negative. 318 * 319 * The memory location pointed to by @stop_tick is expected to be written the 320 * 'false' boolean value if the scheduler tick should not be stopped before 321 * entering the returned state. 322 */ 323 int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, 324 bool *stop_tick) 325 { 326 return cpuidle_curr_governor->select(drv, dev, stop_tick); 327 } 328 329 /** 330 * cpuidle_enter - enter into the specified idle state 331 * 332 * @drv: the cpuidle driver tied with the cpu 333 * @dev: the cpuidle device 334 * @index: the index in the idle state table 335 * 336 * Returns the index in the idle state, < 0 in case of error. 337 * The error code depends on the backend driver 338 */ 339 int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, 340 int index) 341 { 342 int ret = 0; 343 344 /* 345 * Store the next hrtimer, which becomes either next tick or the next 346 * timer event, whatever expires first. Additionally, to make this data 347 * useful for consumers outside cpuidle, we rely on that the governor's 348 * ->select() callback have decided, whether to stop the tick or not. 349 */ 350 WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer()); 351 352 if (cpuidle_state_is_coupled(drv, index)) 353 ret = cpuidle_enter_state_coupled(dev, drv, index); 354 else 355 ret = cpuidle_enter_state(dev, drv, index); 356 357 WRITE_ONCE(dev->next_hrtimer, 0); 358 return ret; 359 } 360 361 /** 362 * cpuidle_reflect - tell the underlying governor what was the state 363 * we were in 364 * 365 * @dev : the cpuidle device 366 * @index: the index in the idle state table 367 * 368 */ 369 void cpuidle_reflect(struct cpuidle_device *dev, int index) 370 { 371 if (cpuidle_curr_governor->reflect && index >= 0) 372 cpuidle_curr_governor->reflect(dev, index); 373 } 374 375 /** 376 * cpuidle_poll_time - return amount of time to poll for, 377 * governors can override dev->poll_limit_ns if necessary 378 * 379 * @drv: the cpuidle driver tied with the cpu 380 * @dev: the cpuidle device 381 * 382 */ 383 u64 cpuidle_poll_time(struct cpuidle_driver *drv, 384 struct cpuidle_device *dev) 385 { 386 int i; 387 u64 limit_ns; 388 389 if (dev->poll_limit_ns) 390 return dev->poll_limit_ns; 391 392 limit_ns = TICK_NSEC; 393 for (i = 1; i < drv->state_count; i++) { 394 if (dev->states_usage[i].disable) 395 continue; 396 397 limit_ns = drv->states[i].target_residency_ns; 398 break; 399 } 400 401 dev->poll_limit_ns = limit_ns; 402 403 return dev->poll_limit_ns; 404 } 405 406 /** 407 * cpuidle_install_idle_handler - installs the cpuidle idle loop handler 408 */ 409 void cpuidle_install_idle_handler(void) 410 { 411 if (enabled_devices) { 412 /* Make sure all changes finished before we switch to new idle */ 413 smp_wmb(); 414 initialized = 1; 415 } 416 } 417 418 /** 419 * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler 420 */ 421 void cpuidle_uninstall_idle_handler(void) 422 { 423 if (enabled_devices) { 424 initialized = 0; 425 wake_up_all_idle_cpus(); 426 } 427 428 /* 429 * Make sure external observers (such as the scheduler) 430 * are done looking at pointed idle states. 431 */ 432 synchronize_rcu(); 433 } 434 435 /** 436 * cpuidle_pause_and_lock - temporarily disables CPUIDLE 437 */ 438 void cpuidle_pause_and_lock(void) 439 { 440 mutex_lock(&cpuidle_lock); 441 cpuidle_uninstall_idle_handler(); 442 } 443 444 EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock); 445 446 /** 447 * cpuidle_resume_and_unlock - resumes CPUIDLE operation 448 */ 449 void cpuidle_resume_and_unlock(void) 450 { 451 cpuidle_install_idle_handler(); 452 mutex_unlock(&cpuidle_lock); 453 } 454 455 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); 456 457 /* Currently used in suspend/resume path to suspend cpuidle */ 458 void cpuidle_pause(void) 459 { 460 mutex_lock(&cpuidle_lock); 461 cpuidle_uninstall_idle_handler(); 462 mutex_unlock(&cpuidle_lock); 463 } 464 465 /* Currently used in suspend/resume path to resume cpuidle */ 466 void cpuidle_resume(void) 467 { 468 mutex_lock(&cpuidle_lock); 469 cpuidle_install_idle_handler(); 470 mutex_unlock(&cpuidle_lock); 471 } 472 473 /** 474 * cpuidle_enable_device - enables idle PM for a CPU 475 * @dev: the CPU 476 * 477 * This function must be called between cpuidle_pause_and_lock and 478 * cpuidle_resume_and_unlock when used externally. 479 */ 480 int cpuidle_enable_device(struct cpuidle_device *dev) 481 { 482 int ret; 483 struct cpuidle_driver *drv; 484 485 if (!dev) 486 return -EINVAL; 487 488 if (dev->enabled) 489 return 0; 490 491 if (!cpuidle_curr_governor) 492 return -EIO; 493 494 drv = cpuidle_get_cpu_driver(dev); 495 496 if (!drv) 497 return -EIO; 498 499 if (!dev->registered) 500 return -EINVAL; 501 502 ret = cpuidle_add_device_sysfs(dev); 503 if (ret) 504 return ret; 505 506 if (cpuidle_curr_governor->enable) { 507 ret = cpuidle_curr_governor->enable(drv, dev); 508 if (ret) 509 goto fail_sysfs; 510 } 511 512 smp_wmb(); 513 514 dev->enabled = 1; 515 516 enabled_devices++; 517 return 0; 518 519 fail_sysfs: 520 cpuidle_remove_device_sysfs(dev); 521 522 return ret; 523 } 524 525 EXPORT_SYMBOL_GPL(cpuidle_enable_device); 526 527 /** 528 * cpuidle_disable_device - disables idle PM for a CPU 529 * @dev: the CPU 530 * 531 * This function must be called between cpuidle_pause_and_lock and 532 * cpuidle_resume_and_unlock when used externally. 533 */ 534 void cpuidle_disable_device(struct cpuidle_device *dev) 535 { 536 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 537 538 if (!dev || !dev->enabled) 539 return; 540 541 if (!drv || !cpuidle_curr_governor) 542 return; 543 544 dev->enabled = 0; 545 546 if (cpuidle_curr_governor->disable) 547 cpuidle_curr_governor->disable(drv, dev); 548 549 cpuidle_remove_device_sysfs(dev); 550 enabled_devices--; 551 } 552 553 EXPORT_SYMBOL_GPL(cpuidle_disable_device); 554 555 static void __cpuidle_unregister_device(struct cpuidle_device *dev) 556 { 557 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 558 559 list_del(&dev->device_list); 560 per_cpu(cpuidle_devices, dev->cpu) = NULL; 561 module_put(drv->owner); 562 563 dev->registered = 0; 564 } 565 566 static void __cpuidle_device_init(struct cpuidle_device *dev) 567 { 568 memset(dev->states_usage, 0, sizeof(dev->states_usage)); 569 dev->last_residency_ns = 0; 570 dev->next_hrtimer = 0; 571 } 572 573 /** 574 * __cpuidle_register_device - internal register function called before register 575 * and enable routines 576 * @dev: the cpu 577 * 578 * cpuidle_lock mutex must be held before this is called 579 */ 580 static int __cpuidle_register_device(struct cpuidle_device *dev) 581 { 582 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 583 int i, ret; 584 585 if (!try_module_get(drv->owner)) 586 return -EINVAL; 587 588 for (i = 0; i < drv->state_count; i++) { 589 if (drv->states[i].flags & CPUIDLE_FLAG_UNUSABLE) 590 dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER; 591 592 if (drv->states[i].flags & CPUIDLE_FLAG_OFF) 593 dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_USER; 594 } 595 596 per_cpu(cpuidle_devices, dev->cpu) = dev; 597 list_add(&dev->device_list, &cpuidle_detected_devices); 598 599 ret = cpuidle_coupled_register_device(dev); 600 if (ret) 601 __cpuidle_unregister_device(dev); 602 else 603 dev->registered = 1; 604 605 return ret; 606 } 607 608 /** 609 * cpuidle_register_device - registers a CPU's idle PM feature 610 * @dev: the cpu 611 */ 612 int cpuidle_register_device(struct cpuidle_device *dev) 613 { 614 int ret = -EBUSY; 615 616 if (!dev) 617 return -EINVAL; 618 619 mutex_lock(&cpuidle_lock); 620 621 if (dev->registered) 622 goto out_unlock; 623 624 __cpuidle_device_init(dev); 625 626 ret = __cpuidle_register_device(dev); 627 if (ret) 628 goto out_unlock; 629 630 ret = cpuidle_add_sysfs(dev); 631 if (ret) 632 goto out_unregister; 633 634 ret = cpuidle_enable_device(dev); 635 if (ret) 636 goto out_sysfs; 637 638 cpuidle_install_idle_handler(); 639 640 out_unlock: 641 mutex_unlock(&cpuidle_lock); 642 643 return ret; 644 645 out_sysfs: 646 cpuidle_remove_sysfs(dev); 647 out_unregister: 648 __cpuidle_unregister_device(dev); 649 goto out_unlock; 650 } 651 652 EXPORT_SYMBOL_GPL(cpuidle_register_device); 653 654 /** 655 * cpuidle_unregister_device - unregisters a CPU's idle PM feature 656 * @dev: the cpu 657 */ 658 void cpuidle_unregister_device(struct cpuidle_device *dev) 659 { 660 if (!dev || dev->registered == 0) 661 return; 662 663 cpuidle_pause_and_lock(); 664 665 cpuidle_disable_device(dev); 666 667 cpuidle_remove_sysfs(dev); 668 669 __cpuidle_unregister_device(dev); 670 671 cpuidle_coupled_unregister_device(dev); 672 673 cpuidle_resume_and_unlock(); 674 } 675 676 EXPORT_SYMBOL_GPL(cpuidle_unregister_device); 677 678 /** 679 * cpuidle_unregister: unregister a driver and the devices. This function 680 * can be used only if the driver has been previously registered through 681 * the cpuidle_register function. 682 * 683 * @drv: a valid pointer to a struct cpuidle_driver 684 */ 685 void cpuidle_unregister(struct cpuidle_driver *drv) 686 { 687 int cpu; 688 struct cpuidle_device *device; 689 690 for_each_cpu(cpu, drv->cpumask) { 691 device = &per_cpu(cpuidle_dev, cpu); 692 cpuidle_unregister_device(device); 693 } 694 695 cpuidle_unregister_driver(drv); 696 } 697 EXPORT_SYMBOL_GPL(cpuidle_unregister); 698 699 /** 700 * cpuidle_register: registers the driver and the cpu devices with the 701 * coupled_cpus passed as parameter. This function is used for all common 702 * initialization pattern there are in the arch specific drivers. The 703 * devices is globally defined in this file. 704 * 705 * @drv : a valid pointer to a struct cpuidle_driver 706 * @coupled_cpus: a cpumask for the coupled states 707 * 708 * Returns 0 on success, < 0 otherwise 709 */ 710 int cpuidle_register(struct cpuidle_driver *drv, 711 const struct cpumask *const coupled_cpus) 712 { 713 int ret, cpu; 714 struct cpuidle_device *device; 715 716 ret = cpuidle_register_driver(drv); 717 if (ret) { 718 pr_err("failed to register cpuidle driver\n"); 719 return ret; 720 } 721 722 for_each_cpu(cpu, drv->cpumask) { 723 device = &per_cpu(cpuidle_dev, cpu); 724 device->cpu = cpu; 725 726 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED 727 /* 728 * On multiplatform for ARM, the coupled idle states could be 729 * enabled in the kernel even if the cpuidle driver does not 730 * use it. Note, coupled_cpus is a struct copy. 731 */ 732 if (coupled_cpus) 733 device->coupled_cpus = *coupled_cpus; 734 #endif 735 ret = cpuidle_register_device(device); 736 if (!ret) 737 continue; 738 739 pr_err("Failed to register cpuidle device for cpu%d\n", cpu); 740 741 cpuidle_unregister(drv); 742 break; 743 } 744 745 return ret; 746 } 747 EXPORT_SYMBOL_GPL(cpuidle_register); 748 749 /** 750 * cpuidle_init - core initializer 751 */ 752 static int __init cpuidle_init(void) 753 { 754 if (cpuidle_disabled()) 755 return -ENODEV; 756 757 return cpuidle_add_interface(cpu_subsys.dev_root); 758 } 759 760 module_param(off, int, 0444); 761 module_param_string(governor, param_governor, CPUIDLE_NAME_LEN, 0444); 762 core_initcall(cpuidle_init); 763