1 /* 2 * linux/kernel/time/clocksource.c 3 * 4 * This file contains the functions which manage clocksource drivers. 5 * 6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 * 22 * TODO WishList: 23 * o Allow clocksource drivers to be unregistered 24 */ 25 26 #include <linux/device.h> 27 #include <linux/clocksource.h> 28 #include <linux/init.h> 29 #include <linux/module.h> 30 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ 31 #include <linux/tick.h> 32 #include <linux/kthread.h> 33 34 #include "tick-internal.h" 35 36 void timecounter_init(struct timecounter *tc, 37 const struct cyclecounter *cc, 38 u64 start_tstamp) 39 { 40 tc->cc = cc; 41 tc->cycle_last = cc->read(cc); 42 tc->nsec = start_tstamp; 43 } 44 EXPORT_SYMBOL_GPL(timecounter_init); 45 46 /** 47 * timecounter_read_delta - get nanoseconds since last call of this function 48 * @tc: Pointer to time counter 49 * 50 * When the underlying cycle counter runs over, this will be handled 51 * correctly as long as it does not run over more than once between 52 * calls. 53 * 54 * The first call to this function for a new time counter initializes 55 * the time tracking and returns an undefined result. 56 */ 57 static u64 timecounter_read_delta(struct timecounter *tc) 58 { 59 cycle_t cycle_now, cycle_delta; 60 u64 ns_offset; 61 62 /* read cycle counter: */ 63 cycle_now = tc->cc->read(tc->cc); 64 65 /* calculate the delta since the last timecounter_read_delta(): */ 66 cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask; 67 68 /* convert to nanoseconds: */ 69 ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta); 70 71 /* update time stamp of timecounter_read_delta() call: */ 72 tc->cycle_last = cycle_now; 73 74 return ns_offset; 75 } 76 77 u64 timecounter_read(struct timecounter *tc) 78 { 79 u64 nsec; 80 81 /* increment time by nanoseconds since last call */ 82 nsec = timecounter_read_delta(tc); 83 nsec += tc->nsec; 84 tc->nsec = nsec; 85 86 return nsec; 87 } 88 EXPORT_SYMBOL_GPL(timecounter_read); 89 90 u64 timecounter_cyc2time(struct timecounter *tc, 91 cycle_t cycle_tstamp) 92 { 93 u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask; 94 u64 nsec; 95 96 /* 97 * Instead of always treating cycle_tstamp as more recent 98 * than tc->cycle_last, detect when it is too far in the 99 * future and treat it as old time stamp instead. 100 */ 101 if (cycle_delta > tc->cc->mask / 2) { 102 cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask; 103 nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta); 104 } else { 105 nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec; 106 } 107 108 return nsec; 109 } 110 EXPORT_SYMBOL_GPL(timecounter_cyc2time); 111 112 /** 113 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks 114 * @mult: pointer to mult variable 115 * @shift: pointer to shift variable 116 * @from: frequency to convert from 117 * @to: frequency to convert to 118 * @maxsec: guaranteed runtime conversion range in seconds 119 * 120 * The function evaluates the shift/mult pair for the scaled math 121 * operations of clocksources and clockevents. 122 * 123 * @to and @from are frequency values in HZ. For clock sources @to is 124 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock 125 * event @to is the counter frequency and @from is NSEC_PER_SEC. 126 * 127 * The @maxsec conversion range argument controls the time frame in 128 * seconds which must be covered by the runtime conversion with the 129 * calculated mult and shift factors. This guarantees that no 64bit 130 * overflow happens when the input value of the conversion is 131 * multiplied with the calculated mult factor. Larger ranges may 132 * reduce the conversion accuracy by chosing smaller mult and shift 133 * factors. 134 */ 135 void 136 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) 137 { 138 u64 tmp; 139 u32 sft, sftacc= 32; 140 141 /* 142 * Calculate the shift factor which is limiting the conversion 143 * range: 144 */ 145 tmp = ((u64)maxsec * from) >> 32; 146 while (tmp) { 147 tmp >>=1; 148 sftacc--; 149 } 150 151 /* 152 * Find the conversion shift/mult pair which has the best 153 * accuracy and fits the maxsec conversion range: 154 */ 155 for (sft = 32; sft > 0; sft--) { 156 tmp = (u64) to << sft; 157 tmp += from / 2; 158 do_div(tmp, from); 159 if ((tmp >> sftacc) == 0) 160 break; 161 } 162 *mult = tmp; 163 *shift = sft; 164 } 165 166 /*[Clocksource internal variables]--------- 167 * curr_clocksource: 168 * currently selected clocksource. 169 * clocksource_list: 170 * linked list with the registered clocksources 171 * clocksource_mutex: 172 * protects manipulations to curr_clocksource and the clocksource_list 173 * override_name: 174 * Name of the user-specified clocksource. 175 */ 176 static struct clocksource *curr_clocksource; 177 static LIST_HEAD(clocksource_list); 178 static DEFINE_MUTEX(clocksource_mutex); 179 static char override_name[CS_NAME_LEN]; 180 static int finished_booting; 181 182 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG 183 static void clocksource_watchdog_work(struct work_struct *work); 184 static void clocksource_select(void); 185 186 static LIST_HEAD(watchdog_list); 187 static struct clocksource *watchdog; 188 static struct timer_list watchdog_timer; 189 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); 190 static DEFINE_SPINLOCK(watchdog_lock); 191 static int watchdog_running; 192 static atomic_t watchdog_reset_pending; 193 194 static int clocksource_watchdog_kthread(void *data); 195 static void __clocksource_change_rating(struct clocksource *cs, int rating); 196 197 /* 198 * Interval: 0.5sec Threshold: 0.0625s 199 */ 200 #define WATCHDOG_INTERVAL (HZ >> 1) 201 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) 202 203 static void clocksource_watchdog_work(struct work_struct *work) 204 { 205 /* 206 * If kthread_run fails the next watchdog scan over the 207 * watchdog_list will find the unstable clock again. 208 */ 209 kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); 210 } 211 212 static void __clocksource_unstable(struct clocksource *cs) 213 { 214 cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); 215 cs->flags |= CLOCK_SOURCE_UNSTABLE; 216 if (finished_booting) 217 schedule_work(&watchdog_work); 218 } 219 220 static void clocksource_unstable(struct clocksource *cs, int64_t delta) 221 { 222 printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", 223 cs->name, delta); 224 __clocksource_unstable(cs); 225 } 226 227 /** 228 * clocksource_mark_unstable - mark clocksource unstable via watchdog 229 * @cs: clocksource to be marked unstable 230 * 231 * This function is called instead of clocksource_change_rating from 232 * cpu hotplug code to avoid a deadlock between the clocksource mutex 233 * and the cpu hotplug mutex. It defers the update of the clocksource 234 * to the watchdog thread. 235 */ 236 void clocksource_mark_unstable(struct clocksource *cs) 237 { 238 unsigned long flags; 239 240 spin_lock_irqsave(&watchdog_lock, flags); 241 if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { 242 if (list_empty(&cs->wd_list)) 243 list_add(&cs->wd_list, &watchdog_list); 244 __clocksource_unstable(cs); 245 } 246 spin_unlock_irqrestore(&watchdog_lock, flags); 247 } 248 249 static void clocksource_watchdog(unsigned long data) 250 { 251 struct clocksource *cs; 252 cycle_t csnow, wdnow; 253 int64_t wd_nsec, cs_nsec; 254 int next_cpu, reset_pending; 255 256 spin_lock(&watchdog_lock); 257 if (!watchdog_running) 258 goto out; 259 260 reset_pending = atomic_read(&watchdog_reset_pending); 261 262 list_for_each_entry(cs, &watchdog_list, wd_list) { 263 264 /* Clocksource already marked unstable? */ 265 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 266 if (finished_booting) 267 schedule_work(&watchdog_work); 268 continue; 269 } 270 271 local_irq_disable(); 272 csnow = cs->read(cs); 273 wdnow = watchdog->read(watchdog); 274 local_irq_enable(); 275 276 /* Clocksource initialized ? */ 277 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || 278 atomic_read(&watchdog_reset_pending)) { 279 cs->flags |= CLOCK_SOURCE_WATCHDOG; 280 cs->wd_last = wdnow; 281 cs->cs_last = csnow; 282 continue; 283 } 284 285 wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask, 286 watchdog->mult, watchdog->shift); 287 288 cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) & 289 cs->mask, cs->mult, cs->shift); 290 cs->cs_last = csnow; 291 cs->wd_last = wdnow; 292 293 if (atomic_read(&watchdog_reset_pending)) 294 continue; 295 296 /* Check the deviation from the watchdog clocksource. */ 297 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { 298 clocksource_unstable(cs, cs_nsec - wd_nsec); 299 continue; 300 } 301 302 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && 303 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && 304 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { 305 /* Mark it valid for high-res. */ 306 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 307 308 /* 309 * clocksource_done_booting() will sort it if 310 * finished_booting is not set yet. 311 */ 312 if (!finished_booting) 313 continue; 314 315 /* 316 * If this is not the current clocksource let 317 * the watchdog thread reselect it. Due to the 318 * change to high res this clocksource might 319 * be preferred now. If it is the current 320 * clocksource let the tick code know about 321 * that change. 322 */ 323 if (cs != curr_clocksource) { 324 cs->flags |= CLOCK_SOURCE_RESELECT; 325 schedule_work(&watchdog_work); 326 } else { 327 tick_clock_notify(); 328 } 329 } 330 } 331 332 /* 333 * We only clear the watchdog_reset_pending, when we did a 334 * full cycle through all clocksources. 335 */ 336 if (reset_pending) 337 atomic_dec(&watchdog_reset_pending); 338 339 /* 340 * Cycle through CPUs to check if the CPUs stay synchronized 341 * to each other. 342 */ 343 next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); 344 if (next_cpu >= nr_cpu_ids) 345 next_cpu = cpumask_first(cpu_online_mask); 346 watchdog_timer.expires += WATCHDOG_INTERVAL; 347 add_timer_on(&watchdog_timer, next_cpu); 348 out: 349 spin_unlock(&watchdog_lock); 350 } 351 352 static inline void clocksource_start_watchdog(void) 353 { 354 if (watchdog_running || !watchdog || list_empty(&watchdog_list)) 355 return; 356 init_timer(&watchdog_timer); 357 watchdog_timer.function = clocksource_watchdog; 358 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; 359 add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); 360 watchdog_running = 1; 361 } 362 363 static inline void clocksource_stop_watchdog(void) 364 { 365 if (!watchdog_running || (watchdog && !list_empty(&watchdog_list))) 366 return; 367 del_timer(&watchdog_timer); 368 watchdog_running = 0; 369 } 370 371 static inline void clocksource_reset_watchdog(void) 372 { 373 struct clocksource *cs; 374 375 list_for_each_entry(cs, &watchdog_list, wd_list) 376 cs->flags &= ~CLOCK_SOURCE_WATCHDOG; 377 } 378 379 static void clocksource_resume_watchdog(void) 380 { 381 atomic_inc(&watchdog_reset_pending); 382 } 383 384 static void clocksource_enqueue_watchdog(struct clocksource *cs) 385 { 386 unsigned long flags; 387 388 spin_lock_irqsave(&watchdog_lock, flags); 389 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 390 /* cs is a clocksource to be watched. */ 391 list_add(&cs->wd_list, &watchdog_list); 392 cs->flags &= ~CLOCK_SOURCE_WATCHDOG; 393 } else { 394 /* cs is a watchdog. */ 395 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 396 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 397 /* Pick the best watchdog. */ 398 if (!watchdog || cs->rating > watchdog->rating) { 399 watchdog = cs; 400 /* Reset watchdog cycles */ 401 clocksource_reset_watchdog(); 402 } 403 } 404 /* Check if the watchdog timer needs to be started. */ 405 clocksource_start_watchdog(); 406 spin_unlock_irqrestore(&watchdog_lock, flags); 407 } 408 409 static void clocksource_dequeue_watchdog(struct clocksource *cs) 410 { 411 unsigned long flags; 412 413 spin_lock_irqsave(&watchdog_lock, flags); 414 if (cs != watchdog) { 415 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 416 /* cs is a watched clocksource. */ 417 list_del_init(&cs->wd_list); 418 /* Check if the watchdog timer needs to be stopped. */ 419 clocksource_stop_watchdog(); 420 } 421 } 422 spin_unlock_irqrestore(&watchdog_lock, flags); 423 } 424 425 static int __clocksource_watchdog_kthread(void) 426 { 427 struct clocksource *cs, *tmp; 428 unsigned long flags; 429 LIST_HEAD(unstable); 430 int select = 0; 431 432 spin_lock_irqsave(&watchdog_lock, flags); 433 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { 434 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 435 list_del_init(&cs->wd_list); 436 list_add(&cs->wd_list, &unstable); 437 select = 1; 438 } 439 if (cs->flags & CLOCK_SOURCE_RESELECT) { 440 cs->flags &= ~CLOCK_SOURCE_RESELECT; 441 select = 1; 442 } 443 } 444 /* Check if the watchdog timer needs to be stopped. */ 445 clocksource_stop_watchdog(); 446 spin_unlock_irqrestore(&watchdog_lock, flags); 447 448 /* Needs to be done outside of watchdog lock */ 449 list_for_each_entry_safe(cs, tmp, &unstable, wd_list) { 450 list_del_init(&cs->wd_list); 451 __clocksource_change_rating(cs, 0); 452 } 453 return select; 454 } 455 456 static int clocksource_watchdog_kthread(void *data) 457 { 458 mutex_lock(&clocksource_mutex); 459 if (__clocksource_watchdog_kthread()) 460 clocksource_select(); 461 mutex_unlock(&clocksource_mutex); 462 return 0; 463 } 464 465 static bool clocksource_is_watchdog(struct clocksource *cs) 466 { 467 return cs == watchdog; 468 } 469 470 #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ 471 472 static void clocksource_enqueue_watchdog(struct clocksource *cs) 473 { 474 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 475 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 476 } 477 478 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } 479 static inline void clocksource_resume_watchdog(void) { } 480 static inline int __clocksource_watchdog_kthread(void) { return 0; } 481 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } 482 void clocksource_mark_unstable(struct clocksource *cs) { } 483 484 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ 485 486 /** 487 * clocksource_suspend - suspend the clocksource(s) 488 */ 489 void clocksource_suspend(void) 490 { 491 struct clocksource *cs; 492 493 list_for_each_entry_reverse(cs, &clocksource_list, list) 494 if (cs->suspend) 495 cs->suspend(cs); 496 } 497 498 /** 499 * clocksource_resume - resume the clocksource(s) 500 */ 501 void clocksource_resume(void) 502 { 503 struct clocksource *cs; 504 505 list_for_each_entry(cs, &clocksource_list, list) 506 if (cs->resume) 507 cs->resume(cs); 508 509 clocksource_resume_watchdog(); 510 } 511 512 /** 513 * clocksource_touch_watchdog - Update watchdog 514 * 515 * Update the watchdog after exception contexts such as kgdb so as not 516 * to incorrectly trip the watchdog. This might fail when the kernel 517 * was stopped in code which holds watchdog_lock. 518 */ 519 void clocksource_touch_watchdog(void) 520 { 521 clocksource_resume_watchdog(); 522 } 523 524 /** 525 * clocksource_max_adjustment- Returns max adjustment amount 526 * @cs: Pointer to clocksource 527 * 528 */ 529 static u32 clocksource_max_adjustment(struct clocksource *cs) 530 { 531 u64 ret; 532 /* 533 * We won't try to correct for more than 11% adjustments (110,000 ppm), 534 */ 535 ret = (u64)cs->mult * 11; 536 do_div(ret,100); 537 return (u32)ret; 538 } 539 540 /** 541 * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted 542 * @mult: cycle to nanosecond multiplier 543 * @shift: cycle to nanosecond divisor (power of two) 544 * @maxadj: maximum adjustment value to mult (~11%) 545 * @mask: bitmask for two's complement subtraction of non 64 bit counters 546 */ 547 u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) 548 { 549 u64 max_nsecs, max_cycles; 550 551 /* 552 * Calculate the maximum number of cycles that we can pass to the 553 * cyc2ns function without overflowing a 64-bit signed result. The 554 * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj) 555 * which is equivalent to the below. 556 * max_cycles < (2^63)/(mult + maxadj) 557 * max_cycles < 2^(log2((2^63)/(mult + maxadj))) 558 * max_cycles < 2^(log2(2^63) - log2(mult + maxadj)) 559 * max_cycles < 2^(63 - log2(mult + maxadj)) 560 * max_cycles < 1 << (63 - log2(mult + maxadj)) 561 * Please note that we add 1 to the result of the log2 to account for 562 * any rounding errors, ensure the above inequality is satisfied and 563 * no overflow will occur. 564 */ 565 max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1)); 566 567 /* 568 * The actual maximum number of cycles we can defer the clocksource is 569 * determined by the minimum of max_cycles and mask. 570 * Note: Here we subtract the maxadj to make sure we don't sleep for 571 * too long if there's a large negative adjustment. 572 */ 573 max_cycles = min(max_cycles, mask); 574 max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); 575 576 return max_nsecs; 577 } 578 579 /** 580 * clocksource_max_deferment - Returns max time the clocksource can be deferred 581 * @cs: Pointer to clocksource 582 * 583 */ 584 static u64 clocksource_max_deferment(struct clocksource *cs) 585 { 586 u64 max_nsecs; 587 588 max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, 589 cs->mask); 590 /* 591 * To ensure that the clocksource does not wrap whilst we are idle, 592 * limit the time the clocksource can be deferred by 12.5%. Please 593 * note a margin of 12.5% is used because this can be computed with 594 * a shift, versus say 10% which would require division. 595 */ 596 return max_nsecs - (max_nsecs >> 3); 597 } 598 599 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET 600 601 static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) 602 { 603 struct clocksource *cs; 604 605 if (!finished_booting || list_empty(&clocksource_list)) 606 return NULL; 607 608 /* 609 * We pick the clocksource with the highest rating. If oneshot 610 * mode is active, we pick the highres valid clocksource with 611 * the best rating. 612 */ 613 list_for_each_entry(cs, &clocksource_list, list) { 614 if (skipcur && cs == curr_clocksource) 615 continue; 616 if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES)) 617 continue; 618 return cs; 619 } 620 return NULL; 621 } 622 623 static void __clocksource_select(bool skipcur) 624 { 625 bool oneshot = tick_oneshot_mode_active(); 626 struct clocksource *best, *cs; 627 628 /* Find the best suitable clocksource */ 629 best = clocksource_find_best(oneshot, skipcur); 630 if (!best) 631 return; 632 633 /* Check for the override clocksource. */ 634 list_for_each_entry(cs, &clocksource_list, list) { 635 if (skipcur && cs == curr_clocksource) 636 continue; 637 if (strcmp(cs->name, override_name) != 0) 638 continue; 639 /* 640 * Check to make sure we don't switch to a non-highres 641 * capable clocksource if the tick code is in oneshot 642 * mode (highres or nohz) 643 */ 644 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) { 645 /* Override clocksource cannot be used. */ 646 printk(KERN_WARNING "Override clocksource %s is not " 647 "HRT compatible. Cannot switch while in " 648 "HRT/NOHZ mode\n", cs->name); 649 override_name[0] = 0; 650 } else 651 /* Override clocksource can be used. */ 652 best = cs; 653 break; 654 } 655 656 if (curr_clocksource != best && !timekeeping_notify(best)) { 657 pr_info("Switched to clocksource %s\n", best->name); 658 curr_clocksource = best; 659 } 660 } 661 662 /** 663 * clocksource_select - Select the best clocksource available 664 * 665 * Private function. Must hold clocksource_mutex when called. 666 * 667 * Select the clocksource with the best rating, or the clocksource, 668 * which is selected by userspace override. 669 */ 670 static void clocksource_select(void) 671 { 672 return __clocksource_select(false); 673 } 674 675 static void clocksource_select_fallback(void) 676 { 677 return __clocksource_select(true); 678 } 679 680 #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ 681 682 static inline void clocksource_select(void) { } 683 static inline void clocksource_select_fallback(void) { } 684 685 #endif 686 687 /* 688 * clocksource_done_booting - Called near the end of core bootup 689 * 690 * Hack to avoid lots of clocksource churn at boot time. 691 * We use fs_initcall because we want this to start before 692 * device_initcall but after subsys_initcall. 693 */ 694 static int __init clocksource_done_booting(void) 695 { 696 mutex_lock(&clocksource_mutex); 697 curr_clocksource = clocksource_default_clock(); 698 finished_booting = 1; 699 /* 700 * Run the watchdog first to eliminate unstable clock sources 701 */ 702 __clocksource_watchdog_kthread(); 703 clocksource_select(); 704 mutex_unlock(&clocksource_mutex); 705 return 0; 706 } 707 fs_initcall(clocksource_done_booting); 708 709 /* 710 * Enqueue the clocksource sorted by rating 711 */ 712 static void clocksource_enqueue(struct clocksource *cs) 713 { 714 struct list_head *entry = &clocksource_list; 715 struct clocksource *tmp; 716 717 list_for_each_entry(tmp, &clocksource_list, list) 718 /* Keep track of the place, where to insert */ 719 if (tmp->rating >= cs->rating) 720 entry = &tmp->list; 721 list_add(&cs->list, entry); 722 } 723 724 /** 725 * __clocksource_updatefreq_scale - Used update clocksource with new freq 726 * @cs: clocksource to be registered 727 * @scale: Scale factor multiplied against freq to get clocksource hz 728 * @freq: clocksource frequency (cycles per second) divided by scale 729 * 730 * This should only be called from the clocksource->enable() method. 731 * 732 * This *SHOULD NOT* be called directly! Please use the 733 * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions. 734 */ 735 void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) 736 { 737 u64 sec; 738 /* 739 * Calc the maximum number of seconds which we can run before 740 * wrapping around. For clocksources which have a mask > 32bit 741 * we need to limit the max sleep time to have a good 742 * conversion precision. 10 minutes is still a reasonable 743 * amount. That results in a shift value of 24 for a 744 * clocksource with mask >= 40bit and f >= 4GHz. That maps to 745 * ~ 0.06ppm granularity for NTP. We apply the same 12.5% 746 * margin as we do in clocksource_max_deferment() 747 */ 748 sec = (cs->mask - (cs->mask >> 3)); 749 do_div(sec, freq); 750 do_div(sec, scale); 751 if (!sec) 752 sec = 1; 753 else if (sec > 600 && cs->mask > UINT_MAX) 754 sec = 600; 755 756 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, 757 NSEC_PER_SEC / scale, sec * scale); 758 759 /* 760 * for clocksources that have large mults, to avoid overflow. 761 * Since mult may be adjusted by ntp, add an safety extra margin 762 * 763 */ 764 cs->maxadj = clocksource_max_adjustment(cs); 765 while ((cs->mult + cs->maxadj < cs->mult) 766 || (cs->mult - cs->maxadj > cs->mult)) { 767 cs->mult >>= 1; 768 cs->shift--; 769 cs->maxadj = clocksource_max_adjustment(cs); 770 } 771 772 cs->max_idle_ns = clocksource_max_deferment(cs); 773 } 774 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); 775 776 /** 777 * __clocksource_register_scale - Used to install new clocksources 778 * @cs: clocksource to be registered 779 * @scale: Scale factor multiplied against freq to get clocksource hz 780 * @freq: clocksource frequency (cycles per second) divided by scale 781 * 782 * Returns -EBUSY if registration fails, zero otherwise. 783 * 784 * This *SHOULD NOT* be called directly! Please use the 785 * clocksource_register_hz() or clocksource_register_khz helper functions. 786 */ 787 int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) 788 { 789 790 /* Initialize mult/shift and max_idle_ns */ 791 __clocksource_updatefreq_scale(cs, scale, freq); 792 793 /* Add clocksource to the clcoksource list */ 794 mutex_lock(&clocksource_mutex); 795 clocksource_enqueue(cs); 796 clocksource_enqueue_watchdog(cs); 797 clocksource_select(); 798 mutex_unlock(&clocksource_mutex); 799 return 0; 800 } 801 EXPORT_SYMBOL_GPL(__clocksource_register_scale); 802 803 804 /** 805 * clocksource_register - Used to install new clocksources 806 * @cs: clocksource to be registered 807 * 808 * Returns -EBUSY if registration fails, zero otherwise. 809 */ 810 int clocksource_register(struct clocksource *cs) 811 { 812 /* calculate max adjustment for given mult/shift */ 813 cs->maxadj = clocksource_max_adjustment(cs); 814 WARN_ONCE(cs->mult + cs->maxadj < cs->mult, 815 "Clocksource %s might overflow on 11%% adjustment\n", 816 cs->name); 817 818 /* calculate max idle time permitted for this clocksource */ 819 cs->max_idle_ns = clocksource_max_deferment(cs); 820 821 mutex_lock(&clocksource_mutex); 822 clocksource_enqueue(cs); 823 clocksource_enqueue_watchdog(cs); 824 clocksource_select(); 825 mutex_unlock(&clocksource_mutex); 826 return 0; 827 } 828 EXPORT_SYMBOL(clocksource_register); 829 830 static void __clocksource_change_rating(struct clocksource *cs, int rating) 831 { 832 list_del(&cs->list); 833 cs->rating = rating; 834 clocksource_enqueue(cs); 835 } 836 837 /** 838 * clocksource_change_rating - Change the rating of a registered clocksource 839 * @cs: clocksource to be changed 840 * @rating: new rating 841 */ 842 void clocksource_change_rating(struct clocksource *cs, int rating) 843 { 844 mutex_lock(&clocksource_mutex); 845 __clocksource_change_rating(cs, rating); 846 clocksource_select(); 847 mutex_unlock(&clocksource_mutex); 848 } 849 EXPORT_SYMBOL(clocksource_change_rating); 850 851 /* 852 * Unbind clocksource @cs. Called with clocksource_mutex held 853 */ 854 static int clocksource_unbind(struct clocksource *cs) 855 { 856 /* 857 * I really can't convince myself to support this on hardware 858 * designed by lobotomized monkeys. 859 */ 860 if (clocksource_is_watchdog(cs)) 861 return -EBUSY; 862 863 if (cs == curr_clocksource) { 864 /* Select and try to install a replacement clock source */ 865 clocksource_select_fallback(); 866 if (curr_clocksource == cs) 867 return -EBUSY; 868 } 869 clocksource_dequeue_watchdog(cs); 870 list_del_init(&cs->list); 871 return 0; 872 } 873 874 /** 875 * clocksource_unregister - remove a registered clocksource 876 * @cs: clocksource to be unregistered 877 */ 878 int clocksource_unregister(struct clocksource *cs) 879 { 880 int ret = 0; 881 882 mutex_lock(&clocksource_mutex); 883 if (!list_empty(&cs->list)) 884 ret = clocksource_unbind(cs); 885 mutex_unlock(&clocksource_mutex); 886 return ret; 887 } 888 EXPORT_SYMBOL(clocksource_unregister); 889 890 #ifdef CONFIG_SYSFS 891 /** 892 * sysfs_show_current_clocksources - sysfs interface for current clocksource 893 * @dev: unused 894 * @attr: unused 895 * @buf: char buffer to be filled with clocksource list 896 * 897 * Provides sysfs interface for listing current clocksource. 898 */ 899 static ssize_t 900 sysfs_show_current_clocksources(struct device *dev, 901 struct device_attribute *attr, char *buf) 902 { 903 ssize_t count = 0; 904 905 mutex_lock(&clocksource_mutex); 906 count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); 907 mutex_unlock(&clocksource_mutex); 908 909 return count; 910 } 911 912 ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) 913 { 914 size_t ret = cnt; 915 916 /* strings from sysfs write are not 0 terminated! */ 917 if (!cnt || cnt >= CS_NAME_LEN) 918 return -EINVAL; 919 920 /* strip of \n: */ 921 if (buf[cnt-1] == '\n') 922 cnt--; 923 if (cnt > 0) 924 memcpy(dst, buf, cnt); 925 dst[cnt] = 0; 926 return ret; 927 } 928 929 /** 930 * sysfs_override_clocksource - interface for manually overriding clocksource 931 * @dev: unused 932 * @attr: unused 933 * @buf: name of override clocksource 934 * @count: length of buffer 935 * 936 * Takes input from sysfs interface for manually overriding the default 937 * clocksource selection. 938 */ 939 static ssize_t sysfs_override_clocksource(struct device *dev, 940 struct device_attribute *attr, 941 const char *buf, size_t count) 942 { 943 ssize_t ret; 944 945 mutex_lock(&clocksource_mutex); 946 947 ret = sysfs_get_uname(buf, override_name, count); 948 if (ret >= 0) 949 clocksource_select(); 950 951 mutex_unlock(&clocksource_mutex); 952 953 return ret; 954 } 955 956 /** 957 * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource 958 * @dev: unused 959 * @attr: unused 960 * @buf: unused 961 * @count: length of buffer 962 * 963 * Takes input from sysfs interface for manually unbinding a clocksource. 964 */ 965 static ssize_t sysfs_unbind_clocksource(struct device *dev, 966 struct device_attribute *attr, 967 const char *buf, size_t count) 968 { 969 struct clocksource *cs; 970 char name[CS_NAME_LEN]; 971 ssize_t ret; 972 973 ret = sysfs_get_uname(buf, name, count); 974 if (ret < 0) 975 return ret; 976 977 ret = -ENODEV; 978 mutex_lock(&clocksource_mutex); 979 list_for_each_entry(cs, &clocksource_list, list) { 980 if (strcmp(cs->name, name)) 981 continue; 982 ret = clocksource_unbind(cs); 983 break; 984 } 985 mutex_unlock(&clocksource_mutex); 986 987 return ret ? ret : count; 988 } 989 990 /** 991 * sysfs_show_available_clocksources - sysfs interface for listing clocksource 992 * @dev: unused 993 * @attr: unused 994 * @buf: char buffer to be filled with clocksource list 995 * 996 * Provides sysfs interface for listing registered clocksources 997 */ 998 static ssize_t 999 sysfs_show_available_clocksources(struct device *dev, 1000 struct device_attribute *attr, 1001 char *buf) 1002 { 1003 struct clocksource *src; 1004 ssize_t count = 0; 1005 1006 mutex_lock(&clocksource_mutex); 1007 list_for_each_entry(src, &clocksource_list, list) { 1008 /* 1009 * Don't show non-HRES clocksource if the tick code is 1010 * in one shot mode (highres=on or nohz=on) 1011 */ 1012 if (!tick_oneshot_mode_active() || 1013 (src->flags & CLOCK_SOURCE_VALID_FOR_HRES)) 1014 count += snprintf(buf + count, 1015 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), 1016 "%s ", src->name); 1017 } 1018 mutex_unlock(&clocksource_mutex); 1019 1020 count += snprintf(buf + count, 1021 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); 1022 1023 return count; 1024 } 1025 1026 /* 1027 * Sysfs setup bits: 1028 */ 1029 static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, 1030 sysfs_override_clocksource); 1031 1032 static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource); 1033 1034 static DEVICE_ATTR(available_clocksource, 0444, 1035 sysfs_show_available_clocksources, NULL); 1036 1037 static struct bus_type clocksource_subsys = { 1038 .name = "clocksource", 1039 .dev_name = "clocksource", 1040 }; 1041 1042 static struct device device_clocksource = { 1043 .id = 0, 1044 .bus = &clocksource_subsys, 1045 }; 1046 1047 static int __init init_clocksource_sysfs(void) 1048 { 1049 int error = subsys_system_register(&clocksource_subsys, NULL); 1050 1051 if (!error) 1052 error = device_register(&device_clocksource); 1053 if (!error) 1054 error = device_create_file( 1055 &device_clocksource, 1056 &dev_attr_current_clocksource); 1057 if (!error) 1058 error = device_create_file(&device_clocksource, 1059 &dev_attr_unbind_clocksource); 1060 if (!error) 1061 error = device_create_file( 1062 &device_clocksource, 1063 &dev_attr_available_clocksource); 1064 return error; 1065 } 1066 1067 device_initcall(init_clocksource_sysfs); 1068 #endif /* CONFIG_SYSFS */ 1069 1070 /** 1071 * boot_override_clocksource - boot clock override 1072 * @str: override name 1073 * 1074 * Takes a clocksource= boot argument and uses it 1075 * as the clocksource override name. 1076 */ 1077 static int __init boot_override_clocksource(char* str) 1078 { 1079 mutex_lock(&clocksource_mutex); 1080 if (str) 1081 strlcpy(override_name, str, sizeof(override_name)); 1082 mutex_unlock(&clocksource_mutex); 1083 return 1; 1084 } 1085 1086 __setup("clocksource=", boot_override_clocksource); 1087 1088 /** 1089 * boot_override_clock - Compatibility layer for deprecated boot option 1090 * @str: override name 1091 * 1092 * DEPRECATED! Takes a clock= boot argument and uses it 1093 * as the clocksource override name 1094 */ 1095 static int __init boot_override_clock(char* str) 1096 { 1097 if (!strcmp(str, "pmtmr")) { 1098 printk("Warning: clock=pmtmr is deprecated. " 1099 "Use clocksource=acpi_pm.\n"); 1100 return boot_override_clocksource("acpi_pm"); 1101 } 1102 printk("Warning! clock= boot option is deprecated. " 1103 "Use clocksource=xyz\n"); 1104 return boot_override_clocksource(str); 1105 } 1106 1107 __setup("clock=", boot_override_clock); 1108