1 /* 2 * linux/kernel/time/clocksource.c 3 * 4 * This file contains the functions which manage clocksource drivers. 5 * 6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 * 22 * TODO WishList: 23 * o Allow clocksource drivers to be unregistered 24 */ 25 26 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 27 28 #include <linux/device.h> 29 #include <linux/clocksource.h> 30 #include <linux/init.h> 31 #include <linux/module.h> 32 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ 33 #include <linux/tick.h> 34 #include <linux/kthread.h> 35 36 #include "tick-internal.h" 37 #include "timekeeping_internal.h" 38 39 /** 40 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks 41 * @mult: pointer to mult variable 42 * @shift: pointer to shift variable 43 * @from: frequency to convert from 44 * @to: frequency to convert to 45 * @maxsec: guaranteed runtime conversion range in seconds 46 * 47 * The function evaluates the shift/mult pair for the scaled math 48 * operations of clocksources and clockevents. 49 * 50 * @to and @from are frequency values in HZ. For clock sources @to is 51 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock 52 * event @to is the counter frequency and @from is NSEC_PER_SEC. 53 * 54 * The @maxsec conversion range argument controls the time frame in 55 * seconds which must be covered by the runtime conversion with the 56 * calculated mult and shift factors. This guarantees that no 64bit 57 * overflow happens when the input value of the conversion is 58 * multiplied with the calculated mult factor. Larger ranges may 59 * reduce the conversion accuracy by chosing smaller mult and shift 60 * factors. 61 */ 62 void 63 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) 64 { 65 u64 tmp; 66 u32 sft, sftacc= 32; 67 68 /* 69 * Calculate the shift factor which is limiting the conversion 70 * range: 71 */ 72 tmp = ((u64)maxsec * from) >> 32; 73 while (tmp) { 74 tmp >>=1; 75 sftacc--; 76 } 77 78 /* 79 * Find the conversion shift/mult pair which has the best 80 * accuracy and fits the maxsec conversion range: 81 */ 82 for (sft = 32; sft > 0; sft--) { 83 tmp = (u64) to << sft; 84 tmp += from / 2; 85 do_div(tmp, from); 86 if ((tmp >> sftacc) == 0) 87 break; 88 } 89 *mult = tmp; 90 *shift = sft; 91 } 92 EXPORT_SYMBOL_GPL(clocks_calc_mult_shift); 93 94 /*[Clocksource internal variables]--------- 95 * curr_clocksource: 96 * currently selected clocksource. 97 * suspend_clocksource: 98 * used to calculate the suspend time. 99 * clocksource_list: 100 * linked list with the registered clocksources 101 * clocksource_mutex: 102 * protects manipulations to curr_clocksource and the clocksource_list 103 * override_name: 104 * Name of the user-specified clocksource. 105 */ 106 static struct clocksource *curr_clocksource; 107 static struct clocksource *suspend_clocksource; 108 static LIST_HEAD(clocksource_list); 109 static DEFINE_MUTEX(clocksource_mutex); 110 static char override_name[CS_NAME_LEN]; 111 static int finished_booting; 112 static u64 suspend_start; 113 114 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG 115 static void clocksource_watchdog_work(struct work_struct *work); 116 static void clocksource_select(void); 117 118 static LIST_HEAD(watchdog_list); 119 static struct clocksource *watchdog; 120 static struct timer_list watchdog_timer; 121 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); 122 static DEFINE_SPINLOCK(watchdog_lock); 123 static int watchdog_running; 124 static atomic_t watchdog_reset_pending; 125 126 static void inline clocksource_watchdog_lock(unsigned long *flags) 127 { 128 spin_lock_irqsave(&watchdog_lock, *flags); 129 } 130 131 static void inline clocksource_watchdog_unlock(unsigned long *flags) 132 { 133 spin_unlock_irqrestore(&watchdog_lock, *flags); 134 } 135 136 static int clocksource_watchdog_kthread(void *data); 137 static void __clocksource_change_rating(struct clocksource *cs, int rating); 138 139 /* 140 * Interval: 0.5sec Threshold: 0.0625s 141 */ 142 #define WATCHDOG_INTERVAL (HZ >> 1) 143 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) 144 145 static void clocksource_watchdog_work(struct work_struct *work) 146 { 147 /* 148 * We cannot directly run clocksource_watchdog_kthread() here, because 149 * clocksource_select() calls timekeeping_notify() which uses 150 * stop_machine(). One cannot use stop_machine() from a workqueue() due 151 * lock inversions wrt CPU hotplug. 152 * 153 * Also, we only ever run this work once or twice during the lifetime 154 * of the kernel, so there is no point in creating a more permanent 155 * kthread for this. 156 * 157 * If kthread_run fails the next watchdog scan over the 158 * watchdog_list will find the unstable clock again. 159 */ 160 kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); 161 } 162 163 static void __clocksource_unstable(struct clocksource *cs) 164 { 165 cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); 166 cs->flags |= CLOCK_SOURCE_UNSTABLE; 167 168 /* 169 * If the clocksource is registered clocksource_watchdog_kthread() will 170 * re-rate and re-select. 171 */ 172 if (list_empty(&cs->list)) { 173 cs->rating = 0; 174 return; 175 } 176 177 if (cs->mark_unstable) 178 cs->mark_unstable(cs); 179 180 /* kick clocksource_watchdog_kthread() */ 181 if (finished_booting) 182 schedule_work(&watchdog_work); 183 } 184 185 /** 186 * clocksource_mark_unstable - mark clocksource unstable via watchdog 187 * @cs: clocksource to be marked unstable 188 * 189 * This function is called by the x86 TSC code to mark clocksources as unstable; 190 * it defers demotion and re-selection to a kthread. 191 */ 192 void clocksource_mark_unstable(struct clocksource *cs) 193 { 194 unsigned long flags; 195 196 spin_lock_irqsave(&watchdog_lock, flags); 197 if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { 198 if (!list_empty(&cs->list) && list_empty(&cs->wd_list)) 199 list_add(&cs->wd_list, &watchdog_list); 200 __clocksource_unstable(cs); 201 } 202 spin_unlock_irqrestore(&watchdog_lock, flags); 203 } 204 205 static void clocksource_watchdog(struct timer_list *unused) 206 { 207 struct clocksource *cs; 208 u64 csnow, wdnow, cslast, wdlast, delta; 209 int64_t wd_nsec, cs_nsec; 210 int next_cpu, reset_pending; 211 212 spin_lock(&watchdog_lock); 213 if (!watchdog_running) 214 goto out; 215 216 reset_pending = atomic_read(&watchdog_reset_pending); 217 218 list_for_each_entry(cs, &watchdog_list, wd_list) { 219 220 /* Clocksource already marked unstable? */ 221 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 222 if (finished_booting) 223 schedule_work(&watchdog_work); 224 continue; 225 } 226 227 local_irq_disable(); 228 csnow = cs->read(cs); 229 wdnow = watchdog->read(watchdog); 230 local_irq_enable(); 231 232 /* Clocksource initialized ? */ 233 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || 234 atomic_read(&watchdog_reset_pending)) { 235 cs->flags |= CLOCK_SOURCE_WATCHDOG; 236 cs->wd_last = wdnow; 237 cs->cs_last = csnow; 238 continue; 239 } 240 241 delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask); 242 wd_nsec = clocksource_cyc2ns(delta, watchdog->mult, 243 watchdog->shift); 244 245 delta = clocksource_delta(csnow, cs->cs_last, cs->mask); 246 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); 247 wdlast = cs->wd_last; /* save these in case we print them */ 248 cslast = cs->cs_last; 249 cs->cs_last = csnow; 250 cs->wd_last = wdnow; 251 252 if (atomic_read(&watchdog_reset_pending)) 253 continue; 254 255 /* Check the deviation from the watchdog clocksource. */ 256 if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { 257 pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n", 258 smp_processor_id(), cs->name); 259 pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", 260 watchdog->name, wdnow, wdlast, watchdog->mask); 261 pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", 262 cs->name, csnow, cslast, cs->mask); 263 __clocksource_unstable(cs); 264 continue; 265 } 266 267 if (cs == curr_clocksource && cs->tick_stable) 268 cs->tick_stable(cs); 269 270 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && 271 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && 272 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { 273 /* Mark it valid for high-res. */ 274 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 275 276 /* 277 * clocksource_done_booting() will sort it if 278 * finished_booting is not set yet. 279 */ 280 if (!finished_booting) 281 continue; 282 283 /* 284 * If this is not the current clocksource let 285 * the watchdog thread reselect it. Due to the 286 * change to high res this clocksource might 287 * be preferred now. If it is the current 288 * clocksource let the tick code know about 289 * that change. 290 */ 291 if (cs != curr_clocksource) { 292 cs->flags |= CLOCK_SOURCE_RESELECT; 293 schedule_work(&watchdog_work); 294 } else { 295 tick_clock_notify(); 296 } 297 } 298 } 299 300 /* 301 * We only clear the watchdog_reset_pending, when we did a 302 * full cycle through all clocksources. 303 */ 304 if (reset_pending) 305 atomic_dec(&watchdog_reset_pending); 306 307 /* 308 * Cycle through CPUs to check if the CPUs stay synchronized 309 * to each other. 310 */ 311 next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); 312 if (next_cpu >= nr_cpu_ids) 313 next_cpu = cpumask_first(cpu_online_mask); 314 watchdog_timer.expires += WATCHDOG_INTERVAL; 315 add_timer_on(&watchdog_timer, next_cpu); 316 out: 317 spin_unlock(&watchdog_lock); 318 } 319 320 static inline void clocksource_start_watchdog(void) 321 { 322 if (watchdog_running || !watchdog || list_empty(&watchdog_list)) 323 return; 324 timer_setup(&watchdog_timer, clocksource_watchdog, 0); 325 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; 326 add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); 327 watchdog_running = 1; 328 } 329 330 static inline void clocksource_stop_watchdog(void) 331 { 332 if (!watchdog_running || (watchdog && !list_empty(&watchdog_list))) 333 return; 334 del_timer(&watchdog_timer); 335 watchdog_running = 0; 336 } 337 338 static inline void clocksource_reset_watchdog(void) 339 { 340 struct clocksource *cs; 341 342 list_for_each_entry(cs, &watchdog_list, wd_list) 343 cs->flags &= ~CLOCK_SOURCE_WATCHDOG; 344 } 345 346 static void clocksource_resume_watchdog(void) 347 { 348 atomic_inc(&watchdog_reset_pending); 349 } 350 351 static void clocksource_enqueue_watchdog(struct clocksource *cs) 352 { 353 INIT_LIST_HEAD(&cs->wd_list); 354 355 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 356 /* cs is a clocksource to be watched. */ 357 list_add(&cs->wd_list, &watchdog_list); 358 cs->flags &= ~CLOCK_SOURCE_WATCHDOG; 359 } else { 360 /* cs is a watchdog. */ 361 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 362 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 363 } 364 } 365 366 static void clocksource_select_watchdog(bool fallback) 367 { 368 struct clocksource *cs, *old_wd; 369 unsigned long flags; 370 371 spin_lock_irqsave(&watchdog_lock, flags); 372 /* save current watchdog */ 373 old_wd = watchdog; 374 if (fallback) 375 watchdog = NULL; 376 377 list_for_each_entry(cs, &clocksource_list, list) { 378 /* cs is a clocksource to be watched. */ 379 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) 380 continue; 381 382 /* Skip current if we were requested for a fallback. */ 383 if (fallback && cs == old_wd) 384 continue; 385 386 /* Pick the best watchdog. */ 387 if (!watchdog || cs->rating > watchdog->rating) 388 watchdog = cs; 389 } 390 /* If we failed to find a fallback restore the old one. */ 391 if (!watchdog) 392 watchdog = old_wd; 393 394 /* If we changed the watchdog we need to reset cycles. */ 395 if (watchdog != old_wd) 396 clocksource_reset_watchdog(); 397 398 /* Check if the watchdog timer needs to be started. */ 399 clocksource_start_watchdog(); 400 spin_unlock_irqrestore(&watchdog_lock, flags); 401 } 402 403 static void clocksource_dequeue_watchdog(struct clocksource *cs) 404 { 405 if (cs != watchdog) { 406 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 407 /* cs is a watched clocksource. */ 408 list_del_init(&cs->wd_list); 409 /* Check if the watchdog timer needs to be stopped. */ 410 clocksource_stop_watchdog(); 411 } 412 } 413 } 414 415 static int __clocksource_watchdog_kthread(void) 416 { 417 struct clocksource *cs, *tmp; 418 unsigned long flags; 419 int select = 0; 420 421 spin_lock_irqsave(&watchdog_lock, flags); 422 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { 423 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 424 list_del_init(&cs->wd_list); 425 __clocksource_change_rating(cs, 0); 426 select = 1; 427 } 428 if (cs->flags & CLOCK_SOURCE_RESELECT) { 429 cs->flags &= ~CLOCK_SOURCE_RESELECT; 430 select = 1; 431 } 432 } 433 /* Check if the watchdog timer needs to be stopped. */ 434 clocksource_stop_watchdog(); 435 spin_unlock_irqrestore(&watchdog_lock, flags); 436 437 return select; 438 } 439 440 static int clocksource_watchdog_kthread(void *data) 441 { 442 mutex_lock(&clocksource_mutex); 443 if (__clocksource_watchdog_kthread()) 444 clocksource_select(); 445 mutex_unlock(&clocksource_mutex); 446 return 0; 447 } 448 449 static bool clocksource_is_watchdog(struct clocksource *cs) 450 { 451 return cs == watchdog; 452 } 453 454 #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ 455 456 static void clocksource_enqueue_watchdog(struct clocksource *cs) 457 { 458 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 459 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 460 } 461 462 static void clocksource_select_watchdog(bool fallback) { } 463 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } 464 static inline void clocksource_resume_watchdog(void) { } 465 static inline int __clocksource_watchdog_kthread(void) { return 0; } 466 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } 467 void clocksource_mark_unstable(struct clocksource *cs) { } 468 469 static inline void clocksource_watchdog_lock(unsigned long *flags) { } 470 static inline void clocksource_watchdog_unlock(unsigned long *flags) { } 471 472 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ 473 474 static bool clocksource_is_suspend(struct clocksource *cs) 475 { 476 return cs == suspend_clocksource; 477 } 478 479 static void __clocksource_suspend_select(struct clocksource *cs) 480 { 481 /* 482 * Skip the clocksource which will be stopped in suspend state. 483 */ 484 if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP)) 485 return; 486 487 /* 488 * The nonstop clocksource can be selected as the suspend clocksource to 489 * calculate the suspend time, so it should not supply suspend/resume 490 * interfaces to suspend the nonstop clocksource when system suspends. 491 */ 492 if (cs->suspend || cs->resume) { 493 pr_warn("Nonstop clocksource %s should not supply suspend/resume interfaces\n", 494 cs->name); 495 } 496 497 /* Pick the best rating. */ 498 if (!suspend_clocksource || cs->rating > suspend_clocksource->rating) 499 suspend_clocksource = cs; 500 } 501 502 /** 503 * clocksource_suspend_select - Select the best clocksource for suspend timing 504 * @fallback: if select a fallback clocksource 505 */ 506 static void clocksource_suspend_select(bool fallback) 507 { 508 struct clocksource *cs, *old_suspend; 509 510 old_suspend = suspend_clocksource; 511 if (fallback) 512 suspend_clocksource = NULL; 513 514 list_for_each_entry(cs, &clocksource_list, list) { 515 /* Skip current if we were requested for a fallback. */ 516 if (fallback && cs == old_suspend) 517 continue; 518 519 __clocksource_suspend_select(cs); 520 } 521 } 522 523 /** 524 * clocksource_start_suspend_timing - Start measuring the suspend timing 525 * @cs: current clocksource from timekeeping 526 * @start_cycles: current cycles from timekeeping 527 * 528 * This function will save the start cycle values of suspend timer to calculate 529 * the suspend time when resuming system. 530 * 531 * This function is called late in the suspend process from timekeeping_suspend(), 532 * that means processes are freezed, non-boot cpus and interrupts are disabled 533 * now. It is therefore possible to start the suspend timer without taking the 534 * clocksource mutex. 535 */ 536 void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles) 537 { 538 if (!suspend_clocksource) 539 return; 540 541 /* 542 * If current clocksource is the suspend timer, we should use the 543 * tkr_mono.cycle_last value as suspend_start to avoid same reading 544 * from suspend timer. 545 */ 546 if (clocksource_is_suspend(cs)) { 547 suspend_start = start_cycles; 548 return; 549 } 550 551 if (suspend_clocksource->enable && 552 suspend_clocksource->enable(suspend_clocksource)) { 553 pr_warn_once("Failed to enable the non-suspend-able clocksource.\n"); 554 return; 555 } 556 557 suspend_start = suspend_clocksource->read(suspend_clocksource); 558 } 559 560 /** 561 * clocksource_stop_suspend_timing - Stop measuring the suspend timing 562 * @cs: current clocksource from timekeeping 563 * @cycle_now: current cycles from timekeeping 564 * 565 * This function will calculate the suspend time from suspend timer. 566 * 567 * Returns nanoseconds since suspend started, 0 if no usable suspend clocksource. 568 * 569 * This function is called early in the resume process from timekeeping_resume(), 570 * that means there is only one cpu, no processes are running and the interrupts 571 * are disabled. It is therefore possible to stop the suspend timer without 572 * taking the clocksource mutex. 573 */ 574 u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now) 575 { 576 u64 now, delta, nsec = 0; 577 578 if (!suspend_clocksource) 579 return 0; 580 581 /* 582 * If current clocksource is the suspend timer, we should use the 583 * tkr_mono.cycle_last value from timekeeping as current cycle to 584 * avoid same reading from suspend timer. 585 */ 586 if (clocksource_is_suspend(cs)) 587 now = cycle_now; 588 else 589 now = suspend_clocksource->read(suspend_clocksource); 590 591 if (now > suspend_start) { 592 delta = clocksource_delta(now, suspend_start, 593 suspend_clocksource->mask); 594 nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult, 595 suspend_clocksource->shift); 596 } 597 598 /* 599 * Disable the suspend timer to save power if current clocksource is 600 * not the suspend timer. 601 */ 602 if (!clocksource_is_suspend(cs) && suspend_clocksource->disable) 603 suspend_clocksource->disable(suspend_clocksource); 604 605 return nsec; 606 } 607 608 /** 609 * clocksource_suspend - suspend the clocksource(s) 610 */ 611 void clocksource_suspend(void) 612 { 613 struct clocksource *cs; 614 615 list_for_each_entry_reverse(cs, &clocksource_list, list) 616 if (cs->suspend) 617 cs->suspend(cs); 618 } 619 620 /** 621 * clocksource_resume - resume the clocksource(s) 622 */ 623 void clocksource_resume(void) 624 { 625 struct clocksource *cs; 626 627 list_for_each_entry(cs, &clocksource_list, list) 628 if (cs->resume) 629 cs->resume(cs); 630 631 clocksource_resume_watchdog(); 632 } 633 634 /** 635 * clocksource_touch_watchdog - Update watchdog 636 * 637 * Update the watchdog after exception contexts such as kgdb so as not 638 * to incorrectly trip the watchdog. This might fail when the kernel 639 * was stopped in code which holds watchdog_lock. 640 */ 641 void clocksource_touch_watchdog(void) 642 { 643 clocksource_resume_watchdog(); 644 } 645 646 /** 647 * clocksource_max_adjustment- Returns max adjustment amount 648 * @cs: Pointer to clocksource 649 * 650 */ 651 static u32 clocksource_max_adjustment(struct clocksource *cs) 652 { 653 u64 ret; 654 /* 655 * We won't try to correct for more than 11% adjustments (110,000 ppm), 656 */ 657 ret = (u64)cs->mult * 11; 658 do_div(ret,100); 659 return (u32)ret; 660 } 661 662 /** 663 * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted 664 * @mult: cycle to nanosecond multiplier 665 * @shift: cycle to nanosecond divisor (power of two) 666 * @maxadj: maximum adjustment value to mult (~11%) 667 * @mask: bitmask for two's complement subtraction of non 64 bit counters 668 * @max_cyc: maximum cycle value before potential overflow (does not include 669 * any safety margin) 670 * 671 * NOTE: This function includes a safety margin of 50%, in other words, we 672 * return half the number of nanoseconds the hardware counter can technically 673 * cover. This is done so that we can potentially detect problems caused by 674 * delayed timers or bad hardware, which might result in time intervals that 675 * are larger than what the math used can handle without overflows. 676 */ 677 u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) 678 { 679 u64 max_nsecs, max_cycles; 680 681 /* 682 * Calculate the maximum number of cycles that we can pass to the 683 * cyc2ns() function without overflowing a 64-bit result. 684 */ 685 max_cycles = ULLONG_MAX; 686 do_div(max_cycles, mult+maxadj); 687 688 /* 689 * The actual maximum number of cycles we can defer the clocksource is 690 * determined by the minimum of max_cycles and mask. 691 * Note: Here we subtract the maxadj to make sure we don't sleep for 692 * too long if there's a large negative adjustment. 693 */ 694 max_cycles = min(max_cycles, mask); 695 max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); 696 697 /* return the max_cycles value as well if requested */ 698 if (max_cyc) 699 *max_cyc = max_cycles; 700 701 /* Return 50% of the actual maximum, so we can detect bad values */ 702 max_nsecs >>= 1; 703 704 return max_nsecs; 705 } 706 707 /** 708 * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles 709 * @cs: Pointer to clocksource to be updated 710 * 711 */ 712 static inline void clocksource_update_max_deferment(struct clocksource *cs) 713 { 714 cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, 715 cs->maxadj, cs->mask, 716 &cs->max_cycles); 717 } 718 719 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET 720 721 static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) 722 { 723 struct clocksource *cs; 724 725 if (!finished_booting || list_empty(&clocksource_list)) 726 return NULL; 727 728 /* 729 * We pick the clocksource with the highest rating. If oneshot 730 * mode is active, we pick the highres valid clocksource with 731 * the best rating. 732 */ 733 list_for_each_entry(cs, &clocksource_list, list) { 734 if (skipcur && cs == curr_clocksource) 735 continue; 736 if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES)) 737 continue; 738 return cs; 739 } 740 return NULL; 741 } 742 743 static void __clocksource_select(bool skipcur) 744 { 745 bool oneshot = tick_oneshot_mode_active(); 746 struct clocksource *best, *cs; 747 748 /* Find the best suitable clocksource */ 749 best = clocksource_find_best(oneshot, skipcur); 750 if (!best) 751 return; 752 753 if (!strlen(override_name)) 754 goto found; 755 756 /* Check for the override clocksource. */ 757 list_for_each_entry(cs, &clocksource_list, list) { 758 if (skipcur && cs == curr_clocksource) 759 continue; 760 if (strcmp(cs->name, override_name) != 0) 761 continue; 762 /* 763 * Check to make sure we don't switch to a non-highres 764 * capable clocksource if the tick code is in oneshot 765 * mode (highres or nohz) 766 */ 767 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) { 768 /* Override clocksource cannot be used. */ 769 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 770 pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n", 771 cs->name); 772 override_name[0] = 0; 773 } else { 774 /* 775 * The override cannot be currently verified. 776 * Deferring to let the watchdog check. 777 */ 778 pr_info("Override clocksource %s is not currently HRT compatible - deferring\n", 779 cs->name); 780 } 781 } else 782 /* Override clocksource can be used. */ 783 best = cs; 784 break; 785 } 786 787 found: 788 if (curr_clocksource != best && !timekeeping_notify(best)) { 789 pr_info("Switched to clocksource %s\n", best->name); 790 curr_clocksource = best; 791 } 792 } 793 794 /** 795 * clocksource_select - Select the best clocksource available 796 * 797 * Private function. Must hold clocksource_mutex when called. 798 * 799 * Select the clocksource with the best rating, or the clocksource, 800 * which is selected by userspace override. 801 */ 802 static void clocksource_select(void) 803 { 804 __clocksource_select(false); 805 } 806 807 static void clocksource_select_fallback(void) 808 { 809 __clocksource_select(true); 810 } 811 812 #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ 813 static inline void clocksource_select(void) { } 814 static inline void clocksource_select_fallback(void) { } 815 816 #endif 817 818 /* 819 * clocksource_done_booting - Called near the end of core bootup 820 * 821 * Hack to avoid lots of clocksource churn at boot time. 822 * We use fs_initcall because we want this to start before 823 * device_initcall but after subsys_initcall. 824 */ 825 static int __init clocksource_done_booting(void) 826 { 827 mutex_lock(&clocksource_mutex); 828 curr_clocksource = clocksource_default_clock(); 829 finished_booting = 1; 830 /* 831 * Run the watchdog first to eliminate unstable clock sources 832 */ 833 __clocksource_watchdog_kthread(); 834 clocksource_select(); 835 mutex_unlock(&clocksource_mutex); 836 return 0; 837 } 838 fs_initcall(clocksource_done_booting); 839 840 /* 841 * Enqueue the clocksource sorted by rating 842 */ 843 static void clocksource_enqueue(struct clocksource *cs) 844 { 845 struct list_head *entry = &clocksource_list; 846 struct clocksource *tmp; 847 848 list_for_each_entry(tmp, &clocksource_list, list) { 849 /* Keep track of the place, where to insert */ 850 if (tmp->rating < cs->rating) 851 break; 852 entry = &tmp->list; 853 } 854 list_add(&cs->list, entry); 855 } 856 857 /** 858 * __clocksource_update_freq_scale - Used update clocksource with new freq 859 * @cs: clocksource to be registered 860 * @scale: Scale factor multiplied against freq to get clocksource hz 861 * @freq: clocksource frequency (cycles per second) divided by scale 862 * 863 * This should only be called from the clocksource->enable() method. 864 * 865 * This *SHOULD NOT* be called directly! Please use the 866 * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper 867 * functions. 868 */ 869 void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq) 870 { 871 u64 sec; 872 873 /* 874 * Default clocksources are *special* and self-define their mult/shift. 875 * But, you're not special, so you should specify a freq value. 876 */ 877 if (freq) { 878 /* 879 * Calc the maximum number of seconds which we can run before 880 * wrapping around. For clocksources which have a mask > 32-bit 881 * we need to limit the max sleep time to have a good 882 * conversion precision. 10 minutes is still a reasonable 883 * amount. That results in a shift value of 24 for a 884 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to 885 * ~ 0.06ppm granularity for NTP. 886 */ 887 sec = cs->mask; 888 do_div(sec, freq); 889 do_div(sec, scale); 890 if (!sec) 891 sec = 1; 892 else if (sec > 600 && cs->mask > UINT_MAX) 893 sec = 600; 894 895 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, 896 NSEC_PER_SEC / scale, sec * scale); 897 } 898 /* 899 * Ensure clocksources that have large 'mult' values don't overflow 900 * when adjusted. 901 */ 902 cs->maxadj = clocksource_max_adjustment(cs); 903 while (freq && ((cs->mult + cs->maxadj < cs->mult) 904 || (cs->mult - cs->maxadj > cs->mult))) { 905 cs->mult >>= 1; 906 cs->shift--; 907 cs->maxadj = clocksource_max_adjustment(cs); 908 } 909 910 /* 911 * Only warn for *special* clocksources that self-define 912 * their mult/shift values and don't specify a freq. 913 */ 914 WARN_ONCE(cs->mult + cs->maxadj < cs->mult, 915 "timekeeping: Clocksource %s might overflow on 11%% adjustment\n", 916 cs->name); 917 918 clocksource_update_max_deferment(cs); 919 920 pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", 921 cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); 922 } 923 EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); 924 925 /** 926 * __clocksource_register_scale - Used to install new clocksources 927 * @cs: clocksource to be registered 928 * @scale: Scale factor multiplied against freq to get clocksource hz 929 * @freq: clocksource frequency (cycles per second) divided by scale 930 * 931 * Returns -EBUSY if registration fails, zero otherwise. 932 * 933 * This *SHOULD NOT* be called directly! Please use the 934 * clocksource_register_hz() or clocksource_register_khz helper functions. 935 */ 936 int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) 937 { 938 unsigned long flags; 939 940 clocksource_arch_init(cs); 941 942 /* Initialize mult/shift and max_idle_ns */ 943 __clocksource_update_freq_scale(cs, scale, freq); 944 945 /* Add clocksource to the clocksource list */ 946 mutex_lock(&clocksource_mutex); 947 948 clocksource_watchdog_lock(&flags); 949 clocksource_enqueue(cs); 950 clocksource_enqueue_watchdog(cs); 951 clocksource_watchdog_unlock(&flags); 952 953 clocksource_select(); 954 clocksource_select_watchdog(false); 955 __clocksource_suspend_select(cs); 956 mutex_unlock(&clocksource_mutex); 957 return 0; 958 } 959 EXPORT_SYMBOL_GPL(__clocksource_register_scale); 960 961 static void __clocksource_change_rating(struct clocksource *cs, int rating) 962 { 963 list_del(&cs->list); 964 cs->rating = rating; 965 clocksource_enqueue(cs); 966 } 967 968 /** 969 * clocksource_change_rating - Change the rating of a registered clocksource 970 * @cs: clocksource to be changed 971 * @rating: new rating 972 */ 973 void clocksource_change_rating(struct clocksource *cs, int rating) 974 { 975 unsigned long flags; 976 977 mutex_lock(&clocksource_mutex); 978 clocksource_watchdog_lock(&flags); 979 __clocksource_change_rating(cs, rating); 980 clocksource_watchdog_unlock(&flags); 981 982 clocksource_select(); 983 clocksource_select_watchdog(false); 984 clocksource_suspend_select(false); 985 mutex_unlock(&clocksource_mutex); 986 } 987 EXPORT_SYMBOL(clocksource_change_rating); 988 989 /* 990 * Unbind clocksource @cs. Called with clocksource_mutex held 991 */ 992 static int clocksource_unbind(struct clocksource *cs) 993 { 994 unsigned long flags; 995 996 if (clocksource_is_watchdog(cs)) { 997 /* Select and try to install a replacement watchdog. */ 998 clocksource_select_watchdog(true); 999 if (clocksource_is_watchdog(cs)) 1000 return -EBUSY; 1001 } 1002 1003 if (cs == curr_clocksource) { 1004 /* Select and try to install a replacement clock source */ 1005 clocksource_select_fallback(); 1006 if (curr_clocksource == cs) 1007 return -EBUSY; 1008 } 1009 1010 if (clocksource_is_suspend(cs)) { 1011 /* 1012 * Select and try to install a replacement suspend clocksource. 1013 * If no replacement suspend clocksource, we will just let the 1014 * clocksource go and have no suspend clocksource. 1015 */ 1016 clocksource_suspend_select(true); 1017 } 1018 1019 clocksource_watchdog_lock(&flags); 1020 clocksource_dequeue_watchdog(cs); 1021 list_del_init(&cs->list); 1022 clocksource_watchdog_unlock(&flags); 1023 1024 return 0; 1025 } 1026 1027 /** 1028 * clocksource_unregister - remove a registered clocksource 1029 * @cs: clocksource to be unregistered 1030 */ 1031 int clocksource_unregister(struct clocksource *cs) 1032 { 1033 int ret = 0; 1034 1035 mutex_lock(&clocksource_mutex); 1036 if (!list_empty(&cs->list)) 1037 ret = clocksource_unbind(cs); 1038 mutex_unlock(&clocksource_mutex); 1039 return ret; 1040 } 1041 EXPORT_SYMBOL(clocksource_unregister); 1042 1043 #ifdef CONFIG_SYSFS 1044 /** 1045 * current_clocksource_show - sysfs interface for current clocksource 1046 * @dev: unused 1047 * @attr: unused 1048 * @buf: char buffer to be filled with clocksource list 1049 * 1050 * Provides sysfs interface for listing current clocksource. 1051 */ 1052 static ssize_t current_clocksource_show(struct device *dev, 1053 struct device_attribute *attr, 1054 char *buf) 1055 { 1056 ssize_t count = 0; 1057 1058 mutex_lock(&clocksource_mutex); 1059 count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); 1060 mutex_unlock(&clocksource_mutex); 1061 1062 return count; 1063 } 1064 1065 ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) 1066 { 1067 size_t ret = cnt; 1068 1069 /* strings from sysfs write are not 0 terminated! */ 1070 if (!cnt || cnt >= CS_NAME_LEN) 1071 return -EINVAL; 1072 1073 /* strip of \n: */ 1074 if (buf[cnt-1] == '\n') 1075 cnt--; 1076 if (cnt > 0) 1077 memcpy(dst, buf, cnt); 1078 dst[cnt] = 0; 1079 return ret; 1080 } 1081 1082 /** 1083 * current_clocksource_store - interface for manually overriding clocksource 1084 * @dev: unused 1085 * @attr: unused 1086 * @buf: name of override clocksource 1087 * @count: length of buffer 1088 * 1089 * Takes input from sysfs interface for manually overriding the default 1090 * clocksource selection. 1091 */ 1092 static ssize_t current_clocksource_store(struct device *dev, 1093 struct device_attribute *attr, 1094 const char *buf, size_t count) 1095 { 1096 ssize_t ret; 1097 1098 mutex_lock(&clocksource_mutex); 1099 1100 ret = sysfs_get_uname(buf, override_name, count); 1101 if (ret >= 0) 1102 clocksource_select(); 1103 1104 mutex_unlock(&clocksource_mutex); 1105 1106 return ret; 1107 } 1108 static DEVICE_ATTR_RW(current_clocksource); 1109 1110 /** 1111 * unbind_clocksource_store - interface for manually unbinding clocksource 1112 * @dev: unused 1113 * @attr: unused 1114 * @buf: unused 1115 * @count: length of buffer 1116 * 1117 * Takes input from sysfs interface for manually unbinding a clocksource. 1118 */ 1119 static ssize_t unbind_clocksource_store(struct device *dev, 1120 struct device_attribute *attr, 1121 const char *buf, size_t count) 1122 { 1123 struct clocksource *cs; 1124 char name[CS_NAME_LEN]; 1125 ssize_t ret; 1126 1127 ret = sysfs_get_uname(buf, name, count); 1128 if (ret < 0) 1129 return ret; 1130 1131 ret = -ENODEV; 1132 mutex_lock(&clocksource_mutex); 1133 list_for_each_entry(cs, &clocksource_list, list) { 1134 if (strcmp(cs->name, name)) 1135 continue; 1136 ret = clocksource_unbind(cs); 1137 break; 1138 } 1139 mutex_unlock(&clocksource_mutex); 1140 1141 return ret ? ret : count; 1142 } 1143 static DEVICE_ATTR_WO(unbind_clocksource); 1144 1145 /** 1146 * available_clocksource_show - sysfs interface for listing clocksource 1147 * @dev: unused 1148 * @attr: unused 1149 * @buf: char buffer to be filled with clocksource list 1150 * 1151 * Provides sysfs interface for listing registered clocksources 1152 */ 1153 static ssize_t available_clocksource_show(struct device *dev, 1154 struct device_attribute *attr, 1155 char *buf) 1156 { 1157 struct clocksource *src; 1158 ssize_t count = 0; 1159 1160 mutex_lock(&clocksource_mutex); 1161 list_for_each_entry(src, &clocksource_list, list) { 1162 /* 1163 * Don't show non-HRES clocksource if the tick code is 1164 * in one shot mode (highres=on or nohz=on) 1165 */ 1166 if (!tick_oneshot_mode_active() || 1167 (src->flags & CLOCK_SOURCE_VALID_FOR_HRES)) 1168 count += snprintf(buf + count, 1169 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), 1170 "%s ", src->name); 1171 } 1172 mutex_unlock(&clocksource_mutex); 1173 1174 count += snprintf(buf + count, 1175 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); 1176 1177 return count; 1178 } 1179 static DEVICE_ATTR_RO(available_clocksource); 1180 1181 static struct attribute *clocksource_attrs[] = { 1182 &dev_attr_current_clocksource.attr, 1183 &dev_attr_unbind_clocksource.attr, 1184 &dev_attr_available_clocksource.attr, 1185 NULL 1186 }; 1187 ATTRIBUTE_GROUPS(clocksource); 1188 1189 static struct bus_type clocksource_subsys = { 1190 .name = "clocksource", 1191 .dev_name = "clocksource", 1192 }; 1193 1194 static struct device device_clocksource = { 1195 .id = 0, 1196 .bus = &clocksource_subsys, 1197 .groups = clocksource_groups, 1198 }; 1199 1200 static int __init init_clocksource_sysfs(void) 1201 { 1202 int error = subsys_system_register(&clocksource_subsys, NULL); 1203 1204 if (!error) 1205 error = device_register(&device_clocksource); 1206 1207 return error; 1208 } 1209 1210 device_initcall(init_clocksource_sysfs); 1211 #endif /* CONFIG_SYSFS */ 1212 1213 /** 1214 * boot_override_clocksource - boot clock override 1215 * @str: override name 1216 * 1217 * Takes a clocksource= boot argument and uses it 1218 * as the clocksource override name. 1219 */ 1220 static int __init boot_override_clocksource(char* str) 1221 { 1222 mutex_lock(&clocksource_mutex); 1223 if (str) 1224 strlcpy(override_name, str, sizeof(override_name)); 1225 mutex_unlock(&clocksource_mutex); 1226 return 1; 1227 } 1228 1229 __setup("clocksource=", boot_override_clocksource); 1230 1231 /** 1232 * boot_override_clock - Compatibility layer for deprecated boot option 1233 * @str: override name 1234 * 1235 * DEPRECATED! Takes a clock= boot argument and uses it 1236 * as the clocksource override name 1237 */ 1238 static int __init boot_override_clock(char* str) 1239 { 1240 if (!strcmp(str, "pmtmr")) { 1241 pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n"); 1242 return boot_override_clocksource("acpi_pm"); 1243 } 1244 pr_warn("clock= boot option is deprecated - use clocksource=xyz\n"); 1245 return boot_override_clocksource(str); 1246 } 1247 1248 __setup("clock=", boot_override_clock); 1249