1 /* 2 * linux/kernel/hrtimer.c 3 * 4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner 7 * 8 * High-resolution kernel timers 9 * 10 * In contrast to the low-resolution timeout API implemented in 11 * kernel/timer.c, hrtimers provide finer resolution and accuracy 12 * depending on system configuration and capabilities. 13 * 14 * These timers are currently used for: 15 * - itimers 16 * - POSIX timers 17 * - nanosleep 18 * - precise in-kernel timing 19 * 20 * Started by: Thomas Gleixner and Ingo Molnar 21 * 22 * Credits: 23 * based on kernel/timer.c 24 * 25 * Help, testing, suggestions, bugfixes, improvements were 26 * provided by: 27 * 28 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel 29 * et. al. 30 * 31 * For licencing details see kernel-base/COPYING 32 */ 33 34 #include <linux/cpu.h> 35 #include <linux/export.h> 36 #include <linux/percpu.h> 37 #include <linux/hrtimer.h> 38 #include <linux/notifier.h> 39 #include <linux/syscalls.h> 40 #include <linux/kallsyms.h> 41 #include <linux/interrupt.h> 42 #include <linux/tick.h> 43 #include <linux/seq_file.h> 44 #include <linux/err.h> 45 #include <linux/debugobjects.h> 46 #include <linux/sched.h> 47 #include <linux/sched/sysctl.h> 48 #include <linux/sched/rt.h> 49 #include <linux/sched/deadline.h> 50 #include <linux/timer.h> 51 #include <linux/freezer.h> 52 53 #include <asm/uaccess.h> 54 55 #include <trace/events/timer.h> 56 57 #include "tick-internal.h" 58 59 /* 60 * The timer bases: 61 * 62 * There are more clockids then hrtimer bases. Thus, we index 63 * into the timer bases by the hrtimer_base_type enum. When trying 64 * to reach a base using a clockid, hrtimer_clockid_to_base() 65 * is used to convert from clockid to the proper hrtimer_base_type. 66 */ 67 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 68 { 69 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), 70 .clock_base = 71 { 72 { 73 .index = HRTIMER_BASE_MONOTONIC, 74 .clockid = CLOCK_MONOTONIC, 75 .get_time = &ktime_get, 76 }, 77 { 78 .index = HRTIMER_BASE_REALTIME, 79 .clockid = CLOCK_REALTIME, 80 .get_time = &ktime_get_real, 81 }, 82 { 83 .index = HRTIMER_BASE_BOOTTIME, 84 .clockid = CLOCK_BOOTTIME, 85 .get_time = &ktime_get_boottime, 86 }, 87 { 88 .index = HRTIMER_BASE_TAI, 89 .clockid = CLOCK_TAI, 90 .get_time = &ktime_get_clocktai, 91 }, 92 } 93 }; 94 95 static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { 96 [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, 97 [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, 98 [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, 99 [CLOCK_TAI] = HRTIMER_BASE_TAI, 100 }; 101 102 static inline int hrtimer_clockid_to_base(clockid_t clock_id) 103 { 104 return hrtimer_clock_to_base_table[clock_id]; 105 } 106 107 /* 108 * Functions and macros which are different for UP/SMP systems are kept in a 109 * single place 110 */ 111 #ifdef CONFIG_SMP 112 113 /* 114 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock 115 * means that all timers which are tied to this base via timer->base are 116 * locked, and the base itself is locked too. 117 * 118 * So __run_timers/migrate_timers can safely modify all timers which could 119 * be found on the lists/queues. 120 * 121 * When the timer's base is locked, and the timer removed from list, it is 122 * possible to set timer->base = NULL and drop the lock: the timer remains 123 * locked. 124 */ 125 static 126 struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, 127 unsigned long *flags) 128 { 129 struct hrtimer_clock_base *base; 130 131 for (;;) { 132 base = timer->base; 133 if (likely(base != NULL)) { 134 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 135 if (likely(base == timer->base)) 136 return base; 137 /* The timer has migrated to another CPU: */ 138 raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); 139 } 140 cpu_relax(); 141 } 142 } 143 144 /* 145 * With HIGHRES=y we do not migrate the timer when it is expiring 146 * before the next event on the target cpu because we cannot reprogram 147 * the target cpu hardware and we would cause it to fire late. 148 * 149 * Called with cpu_base->lock of target cpu held. 150 */ 151 static int 152 hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) 153 { 154 #ifdef CONFIG_HIGH_RES_TIMERS 155 ktime_t expires; 156 157 if (!new_base->cpu_base->hres_active) 158 return 0; 159 160 expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); 161 return expires.tv64 <= new_base->cpu_base->expires_next.tv64; 162 #else 163 return 0; 164 #endif 165 } 166 167 /* 168 * Switch the timer base to the current CPU when possible. 169 */ 170 static inline struct hrtimer_clock_base * 171 switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, 172 int pinned) 173 { 174 struct hrtimer_clock_base *new_base; 175 struct hrtimer_cpu_base *new_cpu_base; 176 int this_cpu = smp_processor_id(); 177 int cpu = get_nohz_timer_target(pinned); 178 int basenum = base->index; 179 180 again: 181 new_cpu_base = &per_cpu(hrtimer_bases, cpu); 182 new_base = &new_cpu_base->clock_base[basenum]; 183 184 if (base != new_base) { 185 /* 186 * We are trying to move timer to new_base. 187 * However we can't change timer's base while it is running, 188 * so we keep it on the same CPU. No hassle vs. reprogramming 189 * the event source in the high resolution case. The softirq 190 * code will take care of this when the timer function has 191 * completed. There is no conflict as we hold the lock until 192 * the timer is enqueued. 193 */ 194 if (unlikely(hrtimer_callback_running(timer))) 195 return base; 196 197 /* See the comment in lock_timer_base() */ 198 timer->base = NULL; 199 raw_spin_unlock(&base->cpu_base->lock); 200 raw_spin_lock(&new_base->cpu_base->lock); 201 202 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 203 cpu = this_cpu; 204 raw_spin_unlock(&new_base->cpu_base->lock); 205 raw_spin_lock(&base->cpu_base->lock); 206 timer->base = base; 207 goto again; 208 } 209 timer->base = new_base; 210 } else { 211 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 212 cpu = this_cpu; 213 goto again; 214 } 215 } 216 return new_base; 217 } 218 219 #else /* CONFIG_SMP */ 220 221 static inline struct hrtimer_clock_base * 222 lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 223 { 224 struct hrtimer_clock_base *base = timer->base; 225 226 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 227 228 return base; 229 } 230 231 # define switch_hrtimer_base(t, b, p) (b) 232 233 #endif /* !CONFIG_SMP */ 234 235 /* 236 * Functions for the union type storage format of ktime_t which are 237 * too large for inlining: 238 */ 239 #if BITS_PER_LONG < 64 240 /* 241 * Divide a ktime value by a nanosecond value 242 */ 243 u64 __ktime_divns(const ktime_t kt, s64 div) 244 { 245 u64 dclc; 246 int sft = 0; 247 248 dclc = ktime_to_ns(kt); 249 /* Make sure the divisor is less than 2^32: */ 250 while (div >> 32) { 251 sft++; 252 div >>= 1; 253 } 254 dclc >>= sft; 255 do_div(dclc, (unsigned long) div); 256 257 return dclc; 258 } 259 EXPORT_SYMBOL_GPL(__ktime_divns); 260 #endif /* BITS_PER_LONG >= 64 */ 261 262 /* 263 * Add two ktime values and do a safety check for overflow: 264 */ 265 ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) 266 { 267 ktime_t res = ktime_add(lhs, rhs); 268 269 /* 270 * We use KTIME_SEC_MAX here, the maximum timeout which we can 271 * return to user space in a timespec: 272 */ 273 if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64) 274 res = ktime_set(KTIME_SEC_MAX, 0); 275 276 return res; 277 } 278 279 EXPORT_SYMBOL_GPL(ktime_add_safe); 280 281 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS 282 283 static struct debug_obj_descr hrtimer_debug_descr; 284 285 static void *hrtimer_debug_hint(void *addr) 286 { 287 return ((struct hrtimer *) addr)->function; 288 } 289 290 /* 291 * fixup_init is called when: 292 * - an active object is initialized 293 */ 294 static int hrtimer_fixup_init(void *addr, enum debug_obj_state state) 295 { 296 struct hrtimer *timer = addr; 297 298 switch (state) { 299 case ODEBUG_STATE_ACTIVE: 300 hrtimer_cancel(timer); 301 debug_object_init(timer, &hrtimer_debug_descr); 302 return 1; 303 default: 304 return 0; 305 } 306 } 307 308 /* 309 * fixup_activate is called when: 310 * - an active object is activated 311 * - an unknown object is activated (might be a statically initialized object) 312 */ 313 static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state) 314 { 315 switch (state) { 316 317 case ODEBUG_STATE_NOTAVAILABLE: 318 WARN_ON_ONCE(1); 319 return 0; 320 321 case ODEBUG_STATE_ACTIVE: 322 WARN_ON(1); 323 324 default: 325 return 0; 326 } 327 } 328 329 /* 330 * fixup_free is called when: 331 * - an active object is freed 332 */ 333 static int hrtimer_fixup_free(void *addr, enum debug_obj_state state) 334 { 335 struct hrtimer *timer = addr; 336 337 switch (state) { 338 case ODEBUG_STATE_ACTIVE: 339 hrtimer_cancel(timer); 340 debug_object_free(timer, &hrtimer_debug_descr); 341 return 1; 342 default: 343 return 0; 344 } 345 } 346 347 static struct debug_obj_descr hrtimer_debug_descr = { 348 .name = "hrtimer", 349 .debug_hint = hrtimer_debug_hint, 350 .fixup_init = hrtimer_fixup_init, 351 .fixup_activate = hrtimer_fixup_activate, 352 .fixup_free = hrtimer_fixup_free, 353 }; 354 355 static inline void debug_hrtimer_init(struct hrtimer *timer) 356 { 357 debug_object_init(timer, &hrtimer_debug_descr); 358 } 359 360 static inline void debug_hrtimer_activate(struct hrtimer *timer) 361 { 362 debug_object_activate(timer, &hrtimer_debug_descr); 363 } 364 365 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) 366 { 367 debug_object_deactivate(timer, &hrtimer_debug_descr); 368 } 369 370 static inline void debug_hrtimer_free(struct hrtimer *timer) 371 { 372 debug_object_free(timer, &hrtimer_debug_descr); 373 } 374 375 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, 376 enum hrtimer_mode mode); 377 378 void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, 379 enum hrtimer_mode mode) 380 { 381 debug_object_init_on_stack(timer, &hrtimer_debug_descr); 382 __hrtimer_init(timer, clock_id, mode); 383 } 384 EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); 385 386 void destroy_hrtimer_on_stack(struct hrtimer *timer) 387 { 388 debug_object_free(timer, &hrtimer_debug_descr); 389 } 390 391 #else 392 static inline void debug_hrtimer_init(struct hrtimer *timer) { } 393 static inline void debug_hrtimer_activate(struct hrtimer *timer) { } 394 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } 395 #endif 396 397 static inline void 398 debug_init(struct hrtimer *timer, clockid_t clockid, 399 enum hrtimer_mode mode) 400 { 401 debug_hrtimer_init(timer); 402 trace_hrtimer_init(timer, clockid, mode); 403 } 404 405 static inline void debug_activate(struct hrtimer *timer) 406 { 407 debug_hrtimer_activate(timer); 408 trace_hrtimer_start(timer); 409 } 410 411 static inline void debug_deactivate(struct hrtimer *timer) 412 { 413 debug_hrtimer_deactivate(timer); 414 trace_hrtimer_cancel(timer); 415 } 416 417 #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) 418 static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, 419 struct hrtimer *timer) 420 { 421 #ifdef CONFIG_HIGH_RES_TIMERS 422 cpu_base->next_timer = timer; 423 #endif 424 } 425 426 static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) 427 { 428 struct hrtimer_clock_base *base = cpu_base->clock_base; 429 ktime_t expires, expires_next = { .tv64 = KTIME_MAX }; 430 unsigned int active = cpu_base->active_bases; 431 432 hrtimer_update_next_timer(cpu_base, NULL); 433 for (; active; base++, active >>= 1) { 434 struct timerqueue_node *next; 435 struct hrtimer *timer; 436 437 if (!(active & 0x01)) 438 continue; 439 440 next = timerqueue_getnext(&base->active); 441 timer = container_of(next, struct hrtimer, node); 442 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 443 if (expires.tv64 < expires_next.tv64) { 444 expires_next = expires; 445 hrtimer_update_next_timer(cpu_base, timer); 446 } 447 } 448 /* 449 * clock_was_set() might have changed base->offset of any of 450 * the clock bases so the result might be negative. Fix it up 451 * to prevent a false positive in clockevents_program_event(). 452 */ 453 if (expires_next.tv64 < 0) 454 expires_next.tv64 = 0; 455 return expires_next; 456 } 457 #endif 458 459 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) 460 { 461 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 462 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 463 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; 464 465 return ktime_get_update_offsets_now(&base->clock_was_set_seq, 466 offs_real, offs_boot, offs_tai); 467 } 468 469 /* High resolution timer related functions */ 470 #ifdef CONFIG_HIGH_RES_TIMERS 471 472 /* 473 * High resolution timer enabled ? 474 */ 475 static int hrtimer_hres_enabled __read_mostly = 1; 476 unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; 477 EXPORT_SYMBOL_GPL(hrtimer_resolution); 478 479 /* 480 * Enable / Disable high resolution mode 481 */ 482 static int __init setup_hrtimer_hres(char *str) 483 { 484 if (!strcmp(str, "off")) 485 hrtimer_hres_enabled = 0; 486 else if (!strcmp(str, "on")) 487 hrtimer_hres_enabled = 1; 488 else 489 return 0; 490 return 1; 491 } 492 493 __setup("highres=", setup_hrtimer_hres); 494 495 /* 496 * hrtimer_high_res_enabled - query, if the highres mode is enabled 497 */ 498 static inline int hrtimer_is_hres_enabled(void) 499 { 500 return hrtimer_hres_enabled; 501 } 502 503 /* 504 * Is the high resolution mode active ? 505 */ 506 static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) 507 { 508 return cpu_base->hres_active; 509 } 510 511 static inline int hrtimer_hres_active(void) 512 { 513 return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); 514 } 515 516 /* 517 * Reprogram the event source with checking both queues for the 518 * next event 519 * Called with interrupts disabled and base->lock held 520 */ 521 static void 522 hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) 523 { 524 ktime_t expires_next; 525 526 if (!cpu_base->hres_active) 527 return; 528 529 expires_next = __hrtimer_get_next_event(cpu_base); 530 531 if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64) 532 return; 533 534 cpu_base->expires_next.tv64 = expires_next.tv64; 535 536 /* 537 * If a hang was detected in the last timer interrupt then we 538 * leave the hang delay active in the hardware. We want the 539 * system to make progress. That also prevents the following 540 * scenario: 541 * T1 expires 50ms from now 542 * T2 expires 5s from now 543 * 544 * T1 is removed, so this code is called and would reprogram 545 * the hardware to 5s from now. Any hrtimer_start after that 546 * will not reprogram the hardware due to hang_detected being 547 * set. So we'd effectivly block all timers until the T2 event 548 * fires. 549 */ 550 if (cpu_base->hang_detected) 551 return; 552 553 tick_program_event(cpu_base->expires_next, 1); 554 } 555 556 /* 557 * When a timer is enqueued and expires earlier than the already enqueued 558 * timers, we have to check, whether it expires earlier than the timer for 559 * which the clock event device was armed. 560 * 561 * Called with interrupts disabled and base->cpu_base.lock held 562 */ 563 static void hrtimer_reprogram(struct hrtimer *timer, 564 struct hrtimer_clock_base *base) 565 { 566 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 567 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 568 569 WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); 570 571 /* 572 * If the timer is not on the current cpu, we cannot reprogram 573 * the other cpus clock event device. 574 */ 575 if (base->cpu_base != cpu_base) 576 return; 577 578 /* 579 * If the hrtimer interrupt is running, then it will 580 * reevaluate the clock bases and reprogram the clock event 581 * device. The callbacks are always executed in hard interrupt 582 * context so we don't need an extra check for a running 583 * callback. 584 */ 585 if (cpu_base->in_hrtirq) 586 return; 587 588 /* 589 * CLOCK_REALTIME timer might be requested with an absolute 590 * expiry time which is less than base->offset. Set it to 0. 591 */ 592 if (expires.tv64 < 0) 593 expires.tv64 = 0; 594 595 if (expires.tv64 >= cpu_base->expires_next.tv64) 596 return; 597 598 /* Update the pointer to the next expiring timer */ 599 cpu_base->next_timer = timer; 600 601 /* 602 * If a hang was detected in the last timer interrupt then we 603 * do not schedule a timer which is earlier than the expiry 604 * which we enforced in the hang detection. We want the system 605 * to make progress. 606 */ 607 if (cpu_base->hang_detected) 608 return; 609 610 /* 611 * Program the timer hardware. We enforce the expiry for 612 * events which are already in the past. 613 */ 614 cpu_base->expires_next = expires; 615 tick_program_event(expires, 1); 616 } 617 618 /* 619 * Initialize the high resolution related parts of cpu_base 620 */ 621 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) 622 { 623 base->expires_next.tv64 = KTIME_MAX; 624 base->hres_active = 0; 625 } 626 627 /* 628 * Retrigger next event is called after clock was set 629 * 630 * Called with interrupts disabled via on_each_cpu() 631 */ 632 static void retrigger_next_event(void *arg) 633 { 634 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 635 636 if (!base->hres_active) 637 return; 638 639 raw_spin_lock(&base->lock); 640 hrtimer_update_base(base); 641 hrtimer_force_reprogram(base, 0); 642 raw_spin_unlock(&base->lock); 643 } 644 645 /* 646 * Switch to high resolution mode 647 */ 648 static int hrtimer_switch_to_hres(void) 649 { 650 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 651 652 if (tick_init_highres()) { 653 printk(KERN_WARNING "Could not switch to high resolution " 654 "mode on CPU %d\n", base->cpu); 655 return 0; 656 } 657 base->hres_active = 1; 658 hrtimer_resolution = HIGH_RES_NSEC; 659 660 tick_setup_sched_timer(); 661 /* "Retrigger" the interrupt to get things going */ 662 retrigger_next_event(NULL); 663 return 1; 664 } 665 666 static void clock_was_set_work(struct work_struct *work) 667 { 668 clock_was_set(); 669 } 670 671 static DECLARE_WORK(hrtimer_work, clock_was_set_work); 672 673 /* 674 * Called from timekeeping and resume code to reprogramm the hrtimer 675 * interrupt device on all cpus. 676 */ 677 void clock_was_set_delayed(void) 678 { 679 schedule_work(&hrtimer_work); 680 } 681 682 #else 683 684 static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; } 685 static inline int hrtimer_hres_active(void) { return 0; } 686 static inline int hrtimer_is_hres_enabled(void) { return 0; } 687 static inline int hrtimer_switch_to_hres(void) { return 0; } 688 static inline void 689 hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } 690 static inline int hrtimer_reprogram(struct hrtimer *timer, 691 struct hrtimer_clock_base *base) 692 { 693 return 0; 694 } 695 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } 696 static inline void retrigger_next_event(void *arg) { } 697 698 #endif /* CONFIG_HIGH_RES_TIMERS */ 699 700 /* 701 * Clock realtime was set 702 * 703 * Change the offset of the realtime clock vs. the monotonic 704 * clock. 705 * 706 * We might have to reprogram the high resolution timer interrupt. On 707 * SMP we call the architecture specific code to retrigger _all_ high 708 * resolution timer interrupts. On UP we just disable interrupts and 709 * call the high resolution interrupt code. 710 */ 711 void clock_was_set(void) 712 { 713 #ifdef CONFIG_HIGH_RES_TIMERS 714 /* Retrigger the CPU local events everywhere */ 715 on_each_cpu(retrigger_next_event, NULL, 1); 716 #endif 717 timerfd_clock_was_set(); 718 } 719 720 /* 721 * During resume we might have to reprogram the high resolution timer 722 * interrupt on all online CPUs. However, all other CPUs will be 723 * stopped with IRQs interrupts disabled so the clock_was_set() call 724 * must be deferred. 725 */ 726 void hrtimers_resume(void) 727 { 728 WARN_ONCE(!irqs_disabled(), 729 KERN_INFO "hrtimers_resume() called with IRQs enabled!"); 730 731 /* Retrigger on the local CPU */ 732 retrigger_next_event(NULL); 733 /* And schedule a retrigger for all others */ 734 clock_was_set_delayed(); 735 } 736 737 static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) 738 { 739 #ifdef CONFIG_TIMER_STATS 740 if (timer->start_site) 741 return; 742 timer->start_site = __builtin_return_address(0); 743 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); 744 timer->start_pid = current->pid; 745 #endif 746 } 747 748 static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) 749 { 750 #ifdef CONFIG_TIMER_STATS 751 timer->start_site = NULL; 752 #endif 753 } 754 755 static inline void timer_stats_account_hrtimer(struct hrtimer *timer) 756 { 757 #ifdef CONFIG_TIMER_STATS 758 if (likely(!timer_stats_active)) 759 return; 760 timer_stats_update_stats(timer, timer->start_pid, timer->start_site, 761 timer->function, timer->start_comm, 0); 762 #endif 763 } 764 765 /* 766 * Counterpart to lock_hrtimer_base above: 767 */ 768 static inline 769 void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 770 { 771 raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); 772 } 773 774 /** 775 * hrtimer_forward - forward the timer expiry 776 * @timer: hrtimer to forward 777 * @now: forward past this time 778 * @interval: the interval to forward 779 * 780 * Forward the timer expiry so it will expire in the future. 781 * Returns the number of overruns. 782 * 783 * Can be safely called from the callback function of @timer. If 784 * called from other contexts @timer must neither be enqueued nor 785 * running the callback and the caller needs to take care of 786 * serialization. 787 * 788 * Note: This only updates the timer expiry value and does not requeue 789 * the timer. 790 */ 791 u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) 792 { 793 u64 orun = 1; 794 ktime_t delta; 795 796 delta = ktime_sub(now, hrtimer_get_expires(timer)); 797 798 if (delta.tv64 < 0) 799 return 0; 800 801 if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED)) 802 return 0; 803 804 if (interval.tv64 < hrtimer_resolution) 805 interval.tv64 = hrtimer_resolution; 806 807 if (unlikely(delta.tv64 >= interval.tv64)) { 808 s64 incr = ktime_to_ns(interval); 809 810 orun = ktime_divns(delta, incr); 811 hrtimer_add_expires_ns(timer, incr * orun); 812 if (hrtimer_get_expires_tv64(timer) > now.tv64) 813 return orun; 814 /* 815 * This (and the ktime_add() below) is the 816 * correction for exact: 817 */ 818 orun++; 819 } 820 hrtimer_add_expires(timer, interval); 821 822 return orun; 823 } 824 EXPORT_SYMBOL_GPL(hrtimer_forward); 825 826 /* 827 * enqueue_hrtimer - internal function to (re)start a timer 828 * 829 * The timer is inserted in expiry order. Insertion into the 830 * red black tree is O(log(n)). Must hold the base lock. 831 * 832 * Returns 1 when the new timer is the leftmost timer in the tree. 833 */ 834 static int enqueue_hrtimer(struct hrtimer *timer, 835 struct hrtimer_clock_base *base) 836 { 837 debug_activate(timer); 838 839 base->cpu_base->active_bases |= 1 << base->index; 840 841 /* 842 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the 843 * state of a possibly running callback. 844 */ 845 timer->state |= HRTIMER_STATE_ENQUEUED; 846 847 return timerqueue_add(&base->active, &timer->node); 848 } 849 850 /* 851 * __remove_hrtimer - internal function to remove a timer 852 * 853 * Caller must hold the base lock. 854 * 855 * High resolution timer mode reprograms the clock event device when the 856 * timer is the one which expires next. The caller can disable this by setting 857 * reprogram to zero. This is useful, when the context does a reprogramming 858 * anyway (e.g. timer interrupt) 859 */ 860 static void __remove_hrtimer(struct hrtimer *timer, 861 struct hrtimer_clock_base *base, 862 unsigned long newstate, int reprogram) 863 { 864 struct hrtimer_cpu_base *cpu_base = base->cpu_base; 865 unsigned int state = timer->state; 866 867 timer->state = newstate; 868 if (!(state & HRTIMER_STATE_ENQUEUED)) 869 return; 870 871 if (!timerqueue_del(&base->active, &timer->node)) 872 cpu_base->active_bases &= ~(1 << base->index); 873 874 #ifdef CONFIG_HIGH_RES_TIMERS 875 /* 876 * Note: If reprogram is false we do not update 877 * cpu_base->next_timer. This happens when we remove the first 878 * timer on a remote cpu. No harm as we never dereference 879 * cpu_base->next_timer. So the worst thing what can happen is 880 * an superflous call to hrtimer_force_reprogram() on the 881 * remote cpu later on if the same timer gets enqueued again. 882 */ 883 if (reprogram && timer == cpu_base->next_timer) 884 hrtimer_force_reprogram(cpu_base, 1); 885 #endif 886 } 887 888 /* 889 * remove hrtimer, called with base lock held 890 */ 891 static inline int 892 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) 893 { 894 if (hrtimer_is_queued(timer)) { 895 unsigned long state = timer->state; 896 int reprogram; 897 898 /* 899 * Remove the timer and force reprogramming when high 900 * resolution mode is active and the timer is on the current 901 * CPU. If we remove a timer on another CPU, reprogramming is 902 * skipped. The interrupt event on this CPU is fired and 903 * reprogramming happens in the interrupt handler. This is a 904 * rare case and less expensive than a smp call. 905 */ 906 debug_deactivate(timer); 907 timer_stats_hrtimer_clear_start_info(timer); 908 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); 909 910 if (!restart) { 911 /* 912 * We must preserve the CALLBACK state flag here, 913 * otherwise we could move the timer base in 914 * switch_hrtimer_base. 915 */ 916 state &= HRTIMER_STATE_CALLBACK; 917 } 918 __remove_hrtimer(timer, base, state, reprogram); 919 return 1; 920 } 921 return 0; 922 } 923 924 /** 925 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU 926 * @timer: the timer to be added 927 * @tim: expiry time 928 * @delta_ns: "slack" range for the timer 929 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or 930 * relative (HRTIMER_MODE_REL) 931 */ 932 void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 933 unsigned long delta_ns, const enum hrtimer_mode mode) 934 { 935 struct hrtimer_clock_base *base, *new_base; 936 unsigned long flags; 937 int leftmost; 938 939 base = lock_hrtimer_base(timer, &flags); 940 941 /* Remove an active timer from the queue: */ 942 remove_hrtimer(timer, base, true); 943 944 if (mode & HRTIMER_MODE_REL) { 945 tim = ktime_add_safe(tim, base->get_time()); 946 /* 947 * CONFIG_TIME_LOW_RES is a temporary way for architectures 948 * to signal that they simply return xtime in 949 * do_gettimeoffset(). In this case we want to round up by 950 * resolution when starting a relative timer, to avoid short 951 * timeouts. This will go away with the GTOD framework. 952 */ 953 #ifdef CONFIG_TIME_LOW_RES 954 tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution)); 955 #endif 956 } 957 958 hrtimer_set_expires_range_ns(timer, tim, delta_ns); 959 960 /* Switch the timer base, if necessary: */ 961 new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); 962 963 timer_stats_hrtimer_set_start_info(timer); 964 965 leftmost = enqueue_hrtimer(timer, new_base); 966 if (!leftmost) 967 goto unlock; 968 969 if (!hrtimer_is_hres_active(timer)) { 970 /* 971 * Kick to reschedule the next tick to handle the new timer 972 * on dynticks target. 973 */ 974 wake_up_nohz_cpu(new_base->cpu_base->cpu); 975 } else { 976 hrtimer_reprogram(timer, new_base); 977 } 978 unlock: 979 unlock_hrtimer_base(timer, &flags); 980 } 981 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); 982 983 /** 984 * hrtimer_try_to_cancel - try to deactivate a timer 985 * @timer: hrtimer to stop 986 * 987 * Returns: 988 * 0 when the timer was not active 989 * 1 when the timer was active 990 * -1 when the timer is currently excuting the callback function and 991 * cannot be stopped 992 */ 993 int hrtimer_try_to_cancel(struct hrtimer *timer) 994 { 995 struct hrtimer_clock_base *base; 996 unsigned long flags; 997 int ret = -1; 998 999 /* 1000 * Check lockless first. If the timer is not active (neither 1001 * enqueued nor running the callback, nothing to do here. The 1002 * base lock does not serialize against a concurrent enqueue, 1003 * so we can avoid taking it. 1004 */ 1005 if (!hrtimer_active(timer)) 1006 return 0; 1007 1008 base = lock_hrtimer_base(timer, &flags); 1009 1010 if (!hrtimer_callback_running(timer)) 1011 ret = remove_hrtimer(timer, base, false); 1012 1013 unlock_hrtimer_base(timer, &flags); 1014 1015 return ret; 1016 1017 } 1018 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); 1019 1020 /** 1021 * hrtimer_cancel - cancel a timer and wait for the handler to finish. 1022 * @timer: the timer to be cancelled 1023 * 1024 * Returns: 1025 * 0 when the timer was not active 1026 * 1 when the timer was active 1027 */ 1028 int hrtimer_cancel(struct hrtimer *timer) 1029 { 1030 for (;;) { 1031 int ret = hrtimer_try_to_cancel(timer); 1032 1033 if (ret >= 0) 1034 return ret; 1035 cpu_relax(); 1036 } 1037 } 1038 EXPORT_SYMBOL_GPL(hrtimer_cancel); 1039 1040 /** 1041 * hrtimer_get_remaining - get remaining time for the timer 1042 * @timer: the timer to read 1043 */ 1044 ktime_t hrtimer_get_remaining(const struct hrtimer *timer) 1045 { 1046 unsigned long flags; 1047 ktime_t rem; 1048 1049 lock_hrtimer_base(timer, &flags); 1050 rem = hrtimer_expires_remaining(timer); 1051 unlock_hrtimer_base(timer, &flags); 1052 1053 return rem; 1054 } 1055 EXPORT_SYMBOL_GPL(hrtimer_get_remaining); 1056 1057 #ifdef CONFIG_NO_HZ_COMMON 1058 /** 1059 * hrtimer_get_next_event - get the time until next expiry event 1060 * 1061 * Returns the next expiry time or KTIME_MAX if no timer is pending. 1062 */ 1063 u64 hrtimer_get_next_event(void) 1064 { 1065 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1066 u64 expires = KTIME_MAX; 1067 unsigned long flags; 1068 1069 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1070 1071 if (!__hrtimer_hres_active(cpu_base)) 1072 expires = __hrtimer_get_next_event(cpu_base).tv64; 1073 1074 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1075 1076 return expires; 1077 } 1078 #endif 1079 1080 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, 1081 enum hrtimer_mode mode) 1082 { 1083 struct hrtimer_cpu_base *cpu_base; 1084 int base; 1085 1086 memset(timer, 0, sizeof(struct hrtimer)); 1087 1088 cpu_base = raw_cpu_ptr(&hrtimer_bases); 1089 1090 if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) 1091 clock_id = CLOCK_MONOTONIC; 1092 1093 base = hrtimer_clockid_to_base(clock_id); 1094 timer->base = &cpu_base->clock_base[base]; 1095 timerqueue_init(&timer->node); 1096 1097 #ifdef CONFIG_TIMER_STATS 1098 timer->start_site = NULL; 1099 timer->start_pid = -1; 1100 memset(timer->start_comm, 0, TASK_COMM_LEN); 1101 #endif 1102 } 1103 1104 /** 1105 * hrtimer_init - initialize a timer to the given clock 1106 * @timer: the timer to be initialized 1107 * @clock_id: the clock to be used 1108 * @mode: timer mode abs/rel 1109 */ 1110 void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, 1111 enum hrtimer_mode mode) 1112 { 1113 debug_init(timer, clock_id, mode); 1114 __hrtimer_init(timer, clock_id, mode); 1115 } 1116 EXPORT_SYMBOL_GPL(hrtimer_init); 1117 1118 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, 1119 struct hrtimer_clock_base *base, 1120 struct hrtimer *timer, ktime_t *now) 1121 { 1122 enum hrtimer_restart (*fn)(struct hrtimer *); 1123 int restart; 1124 1125 WARN_ON(!irqs_disabled()); 1126 1127 debug_deactivate(timer); 1128 __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); 1129 timer_stats_account_hrtimer(timer); 1130 fn = timer->function; 1131 1132 /* 1133 * Because we run timers from hardirq context, there is no chance 1134 * they get migrated to another cpu, therefore its safe to unlock 1135 * the timer base. 1136 */ 1137 raw_spin_unlock(&cpu_base->lock); 1138 trace_hrtimer_expire_entry(timer, now); 1139 restart = fn(timer); 1140 trace_hrtimer_expire_exit(timer); 1141 raw_spin_lock(&cpu_base->lock); 1142 1143 /* 1144 * Note: We clear the CALLBACK bit after enqueue_hrtimer and 1145 * we do not reprogramm the event hardware. Happens either in 1146 * hrtimer_start_range_ns() or in hrtimer_interrupt() 1147 * 1148 * Note: Because we dropped the cpu_base->lock above, 1149 * hrtimer_start_range_ns() can have popped in and enqueued the timer 1150 * for us already. 1151 */ 1152 if (restart != HRTIMER_NORESTART && 1153 !(timer->state & HRTIMER_STATE_ENQUEUED)) 1154 enqueue_hrtimer(timer, base); 1155 1156 WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK)); 1157 1158 timer->state &= ~HRTIMER_STATE_CALLBACK; 1159 } 1160 1161 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) 1162 { 1163 struct hrtimer_clock_base *base = cpu_base->clock_base; 1164 unsigned int active = cpu_base->active_bases; 1165 1166 for (; active; base++, active >>= 1) { 1167 struct timerqueue_node *node; 1168 ktime_t basenow; 1169 1170 if (!(active & 0x01)) 1171 continue; 1172 1173 basenow = ktime_add(now, base->offset); 1174 1175 while ((node = timerqueue_getnext(&base->active))) { 1176 struct hrtimer *timer; 1177 1178 timer = container_of(node, struct hrtimer, node); 1179 1180 /* 1181 * The immediate goal for using the softexpires is 1182 * minimizing wakeups, not running timers at the 1183 * earliest interrupt after their soft expiration. 1184 * This allows us to avoid using a Priority Search 1185 * Tree, which can answer a stabbing querry for 1186 * overlapping intervals and instead use the simple 1187 * BST we already have. 1188 * We don't add extra wakeups by delaying timers that 1189 * are right-of a not yet expired timer, because that 1190 * timer will have to trigger a wakeup anyway. 1191 */ 1192 if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) 1193 break; 1194 1195 __run_hrtimer(cpu_base, base, timer, &basenow); 1196 } 1197 } 1198 } 1199 1200 #ifdef CONFIG_HIGH_RES_TIMERS 1201 1202 /* 1203 * High resolution timer interrupt 1204 * Called with interrupts disabled 1205 */ 1206 void hrtimer_interrupt(struct clock_event_device *dev) 1207 { 1208 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1209 ktime_t expires_next, now, entry_time, delta; 1210 int retries = 0; 1211 1212 BUG_ON(!cpu_base->hres_active); 1213 cpu_base->nr_events++; 1214 dev->next_event.tv64 = KTIME_MAX; 1215 1216 raw_spin_lock(&cpu_base->lock); 1217 entry_time = now = hrtimer_update_base(cpu_base); 1218 retry: 1219 cpu_base->in_hrtirq = 1; 1220 /* 1221 * We set expires_next to KTIME_MAX here with cpu_base->lock 1222 * held to prevent that a timer is enqueued in our queue via 1223 * the migration code. This does not affect enqueueing of 1224 * timers which run their callback and need to be requeued on 1225 * this CPU. 1226 */ 1227 cpu_base->expires_next.tv64 = KTIME_MAX; 1228 1229 __hrtimer_run_queues(cpu_base, now); 1230 1231 /* Reevaluate the clock bases for the next expiry */ 1232 expires_next = __hrtimer_get_next_event(cpu_base); 1233 /* 1234 * Store the new expiry value so the migration code can verify 1235 * against it. 1236 */ 1237 cpu_base->expires_next = expires_next; 1238 cpu_base->in_hrtirq = 0; 1239 raw_spin_unlock(&cpu_base->lock); 1240 1241 /* Reprogramming necessary ? */ 1242 if (!tick_program_event(expires_next, 0)) { 1243 cpu_base->hang_detected = 0; 1244 return; 1245 } 1246 1247 /* 1248 * The next timer was already expired due to: 1249 * - tracing 1250 * - long lasting callbacks 1251 * - being scheduled away when running in a VM 1252 * 1253 * We need to prevent that we loop forever in the hrtimer 1254 * interrupt routine. We give it 3 attempts to avoid 1255 * overreacting on some spurious event. 1256 * 1257 * Acquire base lock for updating the offsets and retrieving 1258 * the current time. 1259 */ 1260 raw_spin_lock(&cpu_base->lock); 1261 now = hrtimer_update_base(cpu_base); 1262 cpu_base->nr_retries++; 1263 if (++retries < 3) 1264 goto retry; 1265 /* 1266 * Give the system a chance to do something else than looping 1267 * here. We stored the entry time, so we know exactly how long 1268 * we spent here. We schedule the next event this amount of 1269 * time away. 1270 */ 1271 cpu_base->nr_hangs++; 1272 cpu_base->hang_detected = 1; 1273 raw_spin_unlock(&cpu_base->lock); 1274 delta = ktime_sub(now, entry_time); 1275 if ((unsigned int)delta.tv64 > cpu_base->max_hang_time) 1276 cpu_base->max_hang_time = (unsigned int) delta.tv64; 1277 /* 1278 * Limit it to a sensible value as we enforce a longer 1279 * delay. Give the CPU at least 100ms to catch up. 1280 */ 1281 if (delta.tv64 > 100 * NSEC_PER_MSEC) 1282 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); 1283 else 1284 expires_next = ktime_add(now, delta); 1285 tick_program_event(expires_next, 1); 1286 printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", 1287 ktime_to_ns(delta)); 1288 } 1289 1290 /* 1291 * local version of hrtimer_peek_ahead_timers() called with interrupts 1292 * disabled. 1293 */ 1294 static inline void __hrtimer_peek_ahead_timers(void) 1295 { 1296 struct tick_device *td; 1297 1298 if (!hrtimer_hres_active()) 1299 return; 1300 1301 td = this_cpu_ptr(&tick_cpu_device); 1302 if (td && td->evtdev) 1303 hrtimer_interrupt(td->evtdev); 1304 } 1305 1306 #else /* CONFIG_HIGH_RES_TIMERS */ 1307 1308 static inline void __hrtimer_peek_ahead_timers(void) { } 1309 1310 #endif /* !CONFIG_HIGH_RES_TIMERS */ 1311 1312 /* 1313 * Called from run_local_timers in hardirq context every jiffy 1314 */ 1315 void hrtimer_run_queues(void) 1316 { 1317 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1318 ktime_t now; 1319 1320 if (__hrtimer_hres_active(cpu_base)) 1321 return; 1322 1323 /* 1324 * This _is_ ugly: We have to check periodically, whether we 1325 * can switch to highres and / or nohz mode. The clocksource 1326 * switch happens with xtime_lock held. Notification from 1327 * there only sets the check bit in the tick_oneshot code, 1328 * otherwise we might deadlock vs. xtime_lock. 1329 */ 1330 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) { 1331 hrtimer_switch_to_hres(); 1332 return; 1333 } 1334 1335 raw_spin_lock(&cpu_base->lock); 1336 now = hrtimer_update_base(cpu_base); 1337 __hrtimer_run_queues(cpu_base, now); 1338 raw_spin_unlock(&cpu_base->lock); 1339 } 1340 1341 /* 1342 * Sleep related functions: 1343 */ 1344 static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) 1345 { 1346 struct hrtimer_sleeper *t = 1347 container_of(timer, struct hrtimer_sleeper, timer); 1348 struct task_struct *task = t->task; 1349 1350 t->task = NULL; 1351 if (task) 1352 wake_up_process(task); 1353 1354 return HRTIMER_NORESTART; 1355 } 1356 1357 void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) 1358 { 1359 sl->timer.function = hrtimer_wakeup; 1360 sl->task = task; 1361 } 1362 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); 1363 1364 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) 1365 { 1366 hrtimer_init_sleeper(t, current); 1367 1368 do { 1369 set_current_state(TASK_INTERRUPTIBLE); 1370 hrtimer_start_expires(&t->timer, mode); 1371 1372 if (likely(t->task)) 1373 freezable_schedule(); 1374 1375 hrtimer_cancel(&t->timer); 1376 mode = HRTIMER_MODE_ABS; 1377 1378 } while (t->task && !signal_pending(current)); 1379 1380 __set_current_state(TASK_RUNNING); 1381 1382 return t->task == NULL; 1383 } 1384 1385 static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp) 1386 { 1387 struct timespec rmt; 1388 ktime_t rem; 1389 1390 rem = hrtimer_expires_remaining(timer); 1391 if (rem.tv64 <= 0) 1392 return 0; 1393 rmt = ktime_to_timespec(rem); 1394 1395 if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) 1396 return -EFAULT; 1397 1398 return 1; 1399 } 1400 1401 long __sched hrtimer_nanosleep_restart(struct restart_block *restart) 1402 { 1403 struct hrtimer_sleeper t; 1404 struct timespec __user *rmtp; 1405 int ret = 0; 1406 1407 hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, 1408 HRTIMER_MODE_ABS); 1409 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); 1410 1411 if (do_nanosleep(&t, HRTIMER_MODE_ABS)) 1412 goto out; 1413 1414 rmtp = restart->nanosleep.rmtp; 1415 if (rmtp) { 1416 ret = update_rmtp(&t.timer, rmtp); 1417 if (ret <= 0) 1418 goto out; 1419 } 1420 1421 /* The other values in restart are already filled in */ 1422 ret = -ERESTART_RESTARTBLOCK; 1423 out: 1424 destroy_hrtimer_on_stack(&t.timer); 1425 return ret; 1426 } 1427 1428 long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, 1429 const enum hrtimer_mode mode, const clockid_t clockid) 1430 { 1431 struct restart_block *restart; 1432 struct hrtimer_sleeper t; 1433 int ret = 0; 1434 unsigned long slack; 1435 1436 slack = current->timer_slack_ns; 1437 if (dl_task(current) || rt_task(current)) 1438 slack = 0; 1439 1440 hrtimer_init_on_stack(&t.timer, clockid, mode); 1441 hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); 1442 if (do_nanosleep(&t, mode)) 1443 goto out; 1444 1445 /* Absolute timers do not update the rmtp value and restart: */ 1446 if (mode == HRTIMER_MODE_ABS) { 1447 ret = -ERESTARTNOHAND; 1448 goto out; 1449 } 1450 1451 if (rmtp) { 1452 ret = update_rmtp(&t.timer, rmtp); 1453 if (ret <= 0) 1454 goto out; 1455 } 1456 1457 restart = ¤t->restart_block; 1458 restart->fn = hrtimer_nanosleep_restart; 1459 restart->nanosleep.clockid = t.timer.base->clockid; 1460 restart->nanosleep.rmtp = rmtp; 1461 restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); 1462 1463 ret = -ERESTART_RESTARTBLOCK; 1464 out: 1465 destroy_hrtimer_on_stack(&t.timer); 1466 return ret; 1467 } 1468 1469 SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, 1470 struct timespec __user *, rmtp) 1471 { 1472 struct timespec tu; 1473 1474 if (copy_from_user(&tu, rqtp, sizeof(tu))) 1475 return -EFAULT; 1476 1477 if (!timespec_valid(&tu)) 1478 return -EINVAL; 1479 1480 return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); 1481 } 1482 1483 /* 1484 * Functions related to boot-time initialization: 1485 */ 1486 static void init_hrtimers_cpu(int cpu) 1487 { 1488 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 1489 int i; 1490 1491 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1492 cpu_base->clock_base[i].cpu_base = cpu_base; 1493 timerqueue_init_head(&cpu_base->clock_base[i].active); 1494 } 1495 1496 cpu_base->cpu = cpu; 1497 hrtimer_init_hres(cpu_base); 1498 } 1499 1500 #ifdef CONFIG_HOTPLUG_CPU 1501 1502 static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, 1503 struct hrtimer_clock_base *new_base) 1504 { 1505 struct hrtimer *timer; 1506 struct timerqueue_node *node; 1507 1508 while ((node = timerqueue_getnext(&old_base->active))) { 1509 timer = container_of(node, struct hrtimer, node); 1510 BUG_ON(hrtimer_callback_running(timer)); 1511 debug_deactivate(timer); 1512 1513 /* 1514 * Mark it as ENQUEUED not INACTIVE otherwise the 1515 * timer could be seen as !active and just vanish away 1516 * under us on another CPU 1517 */ 1518 __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); 1519 timer->base = new_base; 1520 /* 1521 * Enqueue the timers on the new cpu. This does not 1522 * reprogram the event device in case the timer 1523 * expires before the earliest on this CPU, but we run 1524 * hrtimer_interrupt after we migrated everything to 1525 * sort out already expired timers and reprogram the 1526 * event device. 1527 */ 1528 enqueue_hrtimer(timer, new_base); 1529 } 1530 } 1531 1532 static void migrate_hrtimers(int scpu) 1533 { 1534 struct hrtimer_cpu_base *old_base, *new_base; 1535 int i; 1536 1537 BUG_ON(cpu_online(scpu)); 1538 tick_cancel_sched_timer(scpu); 1539 1540 local_irq_disable(); 1541 old_base = &per_cpu(hrtimer_bases, scpu); 1542 new_base = this_cpu_ptr(&hrtimer_bases); 1543 /* 1544 * The caller is globally serialized and nobody else 1545 * takes two locks at once, deadlock is not possible. 1546 */ 1547 raw_spin_lock(&new_base->lock); 1548 raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1549 1550 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1551 migrate_hrtimer_list(&old_base->clock_base[i], 1552 &new_base->clock_base[i]); 1553 } 1554 1555 raw_spin_unlock(&old_base->lock); 1556 raw_spin_unlock(&new_base->lock); 1557 1558 /* Check, if we got expired work to do */ 1559 __hrtimer_peek_ahead_timers(); 1560 local_irq_enable(); 1561 } 1562 1563 #endif /* CONFIG_HOTPLUG_CPU */ 1564 1565 static int hrtimer_cpu_notify(struct notifier_block *self, 1566 unsigned long action, void *hcpu) 1567 { 1568 int scpu = (long)hcpu; 1569 1570 switch (action) { 1571 1572 case CPU_UP_PREPARE: 1573 case CPU_UP_PREPARE_FROZEN: 1574 init_hrtimers_cpu(scpu); 1575 break; 1576 1577 #ifdef CONFIG_HOTPLUG_CPU 1578 case CPU_DEAD: 1579 case CPU_DEAD_FROZEN: 1580 migrate_hrtimers(scpu); 1581 break; 1582 #endif 1583 1584 default: 1585 break; 1586 } 1587 1588 return NOTIFY_OK; 1589 } 1590 1591 static struct notifier_block hrtimers_nb = { 1592 .notifier_call = hrtimer_cpu_notify, 1593 }; 1594 1595 void __init hrtimers_init(void) 1596 { 1597 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, 1598 (void *)(long)smp_processor_id()); 1599 register_cpu_notifier(&hrtimers_nb); 1600 } 1601 1602 /** 1603 * schedule_hrtimeout_range_clock - sleep until timeout 1604 * @expires: timeout value (ktime_t) 1605 * @delta: slack in expires timeout (ktime_t) 1606 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL 1607 * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME 1608 */ 1609 int __sched 1610 schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, 1611 const enum hrtimer_mode mode, int clock) 1612 { 1613 struct hrtimer_sleeper t; 1614 1615 /* 1616 * Optimize when a zero timeout value is given. It does not 1617 * matter whether this is an absolute or a relative time. 1618 */ 1619 if (expires && !expires->tv64) { 1620 __set_current_state(TASK_RUNNING); 1621 return 0; 1622 } 1623 1624 /* 1625 * A NULL parameter means "infinite" 1626 */ 1627 if (!expires) { 1628 schedule(); 1629 return -EINTR; 1630 } 1631 1632 hrtimer_init_on_stack(&t.timer, clock, mode); 1633 hrtimer_set_expires_range_ns(&t.timer, *expires, delta); 1634 1635 hrtimer_init_sleeper(&t, current); 1636 1637 hrtimer_start_expires(&t.timer, mode); 1638 1639 if (likely(t.task)) 1640 schedule(); 1641 1642 hrtimer_cancel(&t.timer); 1643 destroy_hrtimer_on_stack(&t.timer); 1644 1645 __set_current_state(TASK_RUNNING); 1646 1647 return !t.task ? 0 : -EINTR; 1648 } 1649 1650 /** 1651 * schedule_hrtimeout_range - sleep until timeout 1652 * @expires: timeout value (ktime_t) 1653 * @delta: slack in expires timeout (ktime_t) 1654 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL 1655 * 1656 * Make the current task sleep until the given expiry time has 1657 * elapsed. The routine will return immediately unless 1658 * the current task state has been set (see set_current_state()). 1659 * 1660 * The @delta argument gives the kernel the freedom to schedule the 1661 * actual wakeup to a time that is both power and performance friendly. 1662 * The kernel give the normal best effort behavior for "@expires+@delta", 1663 * but may decide to fire the timer earlier, but no earlier than @expires. 1664 * 1665 * You can set the task state as follows - 1666 * 1667 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to 1668 * pass before the routine returns. 1669 * 1670 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 1671 * delivered to the current task. 1672 * 1673 * The current task state is guaranteed to be TASK_RUNNING when this 1674 * routine returns. 1675 * 1676 * Returns 0 when the timer has expired otherwise -EINTR 1677 */ 1678 int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, 1679 const enum hrtimer_mode mode) 1680 { 1681 return schedule_hrtimeout_range_clock(expires, delta, mode, 1682 CLOCK_MONOTONIC); 1683 } 1684 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); 1685 1686 /** 1687 * schedule_hrtimeout - sleep until timeout 1688 * @expires: timeout value (ktime_t) 1689 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL 1690 * 1691 * Make the current task sleep until the given expiry time has 1692 * elapsed. The routine will return immediately unless 1693 * the current task state has been set (see set_current_state()). 1694 * 1695 * You can set the task state as follows - 1696 * 1697 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to 1698 * pass before the routine returns. 1699 * 1700 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 1701 * delivered to the current task. 1702 * 1703 * The current task state is guaranteed to be TASK_RUNNING when this 1704 * routine returns. 1705 * 1706 * Returns 0 when the timer has expired otherwise -EINTR 1707 */ 1708 int __sched schedule_hrtimeout(ktime_t *expires, 1709 const enum hrtimer_mode mode) 1710 { 1711 return schedule_hrtimeout_range(expires, 0, mode); 1712 } 1713 EXPORT_SYMBOL_GPL(schedule_hrtimeout); 1714