1 /* 2 * linux/kernel/time/clocksource.c 3 * 4 * This file contains the functions which manage clocksource drivers. 5 * 6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 * 22 * TODO WishList: 23 * o Allow clocksource drivers to be unregistered 24 */ 25 26 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 27 28 #include <linux/device.h> 29 #include <linux/clocksource.h> 30 #include <linux/init.h> 31 #include <linux/module.h> 32 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ 33 #include <linux/tick.h> 34 #include <linux/kthread.h> 35 36 #include "tick-internal.h" 37 #include "timekeeping_internal.h" 38 39 /** 40 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks 41 * @mult: pointer to mult variable 42 * @shift: pointer to shift variable 43 * @from: frequency to convert from 44 * @to: frequency to convert to 45 * @maxsec: guaranteed runtime conversion range in seconds 46 * 47 * The function evaluates the shift/mult pair for the scaled math 48 * operations of clocksources and clockevents. 49 * 50 * @to and @from are frequency values in HZ. For clock sources @to is 51 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock 52 * event @to is the counter frequency and @from is NSEC_PER_SEC. 53 * 54 * The @maxsec conversion range argument controls the time frame in 55 * seconds which must be covered by the runtime conversion with the 56 * calculated mult and shift factors. This guarantees that no 64bit 57 * overflow happens when the input value of the conversion is 58 * multiplied with the calculated mult factor. Larger ranges may 59 * reduce the conversion accuracy by chosing smaller mult and shift 60 * factors. 61 */ 62 void 63 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) 64 { 65 u64 tmp; 66 u32 sft, sftacc= 32; 67 68 /* 69 * Calculate the shift factor which is limiting the conversion 70 * range: 71 */ 72 tmp = ((u64)maxsec * from) >> 32; 73 while (tmp) { 74 tmp >>=1; 75 sftacc--; 76 } 77 78 /* 79 * Find the conversion shift/mult pair which has the best 80 * accuracy and fits the maxsec conversion range: 81 */ 82 for (sft = 32; sft > 0; sft--) { 83 tmp = (u64) to << sft; 84 tmp += from / 2; 85 do_div(tmp, from); 86 if ((tmp >> sftacc) == 0) 87 break; 88 } 89 *mult = tmp; 90 *shift = sft; 91 } 92 EXPORT_SYMBOL_GPL(clocks_calc_mult_shift); 93 94 /*[Clocksource internal variables]--------- 95 * curr_clocksource: 96 * currently selected clocksource. 97 * clocksource_list: 98 * linked list with the registered clocksources 99 * clocksource_mutex: 100 * protects manipulations to curr_clocksource and the clocksource_list 101 * override_name: 102 * Name of the user-specified clocksource. 103 */ 104 static struct clocksource *curr_clocksource; 105 static LIST_HEAD(clocksource_list); 106 static DEFINE_MUTEX(clocksource_mutex); 107 static char override_name[CS_NAME_LEN]; 108 static int finished_booting; 109 110 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG 111 static void clocksource_watchdog_work(struct work_struct *work); 112 static void clocksource_select(void); 113 114 static LIST_HEAD(watchdog_list); 115 static struct clocksource *watchdog; 116 static struct timer_list watchdog_timer; 117 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); 118 static DEFINE_SPINLOCK(watchdog_lock); 119 static int watchdog_running; 120 static atomic_t watchdog_reset_pending; 121 122 static void inline clocksource_watchdog_lock(unsigned long *flags) 123 { 124 spin_lock_irqsave(&watchdog_lock, *flags); 125 } 126 127 static void inline clocksource_watchdog_unlock(unsigned long *flags) 128 { 129 spin_unlock_irqrestore(&watchdog_lock, *flags); 130 } 131 132 /* 133 * Interval: 0.5sec Threshold: 0.0625s 134 */ 135 #define WATCHDOG_INTERVAL (HZ >> 1) 136 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) 137 138 static void __clocksource_unstable(struct clocksource *cs) 139 { 140 cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); 141 cs->flags |= CLOCK_SOURCE_UNSTABLE; 142 143 /* 144 * If the clocksource is registered clocksource_watchdog_work() will 145 * re-rate and re-select. 146 */ 147 if (list_empty(&cs->list)) { 148 cs->rating = 0; 149 return; 150 } 151 152 if (cs->mark_unstable) 153 cs->mark_unstable(cs); 154 155 /* kick clocksource_watchdog_work() */ 156 if (finished_booting) 157 schedule_work(&watchdog_work); 158 } 159 160 /** 161 * clocksource_mark_unstable - mark clocksource unstable via watchdog 162 * @cs: clocksource to be marked unstable 163 * 164 * This function is called by the x86 TSC code to mark clocksources as unstable; 165 * it defers demotion and re-selection to a work. 166 */ 167 void clocksource_mark_unstable(struct clocksource *cs) 168 { 169 unsigned long flags; 170 171 spin_lock_irqsave(&watchdog_lock, flags); 172 if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { 173 if (!list_empty(&cs->list) && list_empty(&cs->wd_list)) 174 list_add(&cs->wd_list, &watchdog_list); 175 __clocksource_unstable(cs); 176 } 177 spin_unlock_irqrestore(&watchdog_lock, flags); 178 } 179 180 static void clocksource_watchdog(struct timer_list *unused) 181 { 182 struct clocksource *cs; 183 u64 csnow, wdnow, cslast, wdlast, delta; 184 int64_t wd_nsec, cs_nsec; 185 int next_cpu, reset_pending; 186 187 spin_lock(&watchdog_lock); 188 if (!watchdog_running) 189 goto out; 190 191 reset_pending = atomic_read(&watchdog_reset_pending); 192 193 list_for_each_entry(cs, &watchdog_list, wd_list) { 194 195 /* Clocksource already marked unstable? */ 196 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 197 if (finished_booting) 198 schedule_work(&watchdog_work); 199 continue; 200 } 201 202 local_irq_disable(); 203 csnow = cs->read(cs); 204 wdnow = watchdog->read(watchdog); 205 local_irq_enable(); 206 207 /* Clocksource initialized ? */ 208 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || 209 atomic_read(&watchdog_reset_pending)) { 210 cs->flags |= CLOCK_SOURCE_WATCHDOG; 211 cs->wd_last = wdnow; 212 cs->cs_last = csnow; 213 continue; 214 } 215 216 delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask); 217 wd_nsec = clocksource_cyc2ns(delta, watchdog->mult, 218 watchdog->shift); 219 220 delta = clocksource_delta(csnow, cs->cs_last, cs->mask); 221 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); 222 wdlast = cs->wd_last; /* save these in case we print them */ 223 cslast = cs->cs_last; 224 cs->cs_last = csnow; 225 cs->wd_last = wdnow; 226 227 if (atomic_read(&watchdog_reset_pending)) 228 continue; 229 230 /* Check the deviation from the watchdog clocksource. */ 231 if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { 232 pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n", 233 smp_processor_id(), cs->name); 234 pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", 235 watchdog->name, wdnow, wdlast, watchdog->mask); 236 pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", 237 cs->name, csnow, cslast, cs->mask); 238 __clocksource_unstable(cs); 239 continue; 240 } 241 242 if (cs == curr_clocksource && cs->tick_stable) 243 cs->tick_stable(cs); 244 245 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && 246 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && 247 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { 248 /* Mark it valid for high-res. */ 249 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 250 251 /* 252 * clocksource_done_booting() will sort it if 253 * finished_booting is not set yet. 254 */ 255 if (!finished_booting) 256 continue; 257 258 /* 259 * If this is not the current clocksource let 260 * the watchdog thread reselect it. Due to the 261 * change to high res this clocksource might 262 * be preferred now. If it is the current 263 * clocksource let the tick code know about 264 * that change. 265 */ 266 if (cs != curr_clocksource) { 267 cs->flags |= CLOCK_SOURCE_RESELECT; 268 schedule_work(&watchdog_work); 269 } else { 270 tick_clock_notify(); 271 } 272 } 273 } 274 275 /* 276 * We only clear the watchdog_reset_pending, when we did a 277 * full cycle through all clocksources. 278 */ 279 if (reset_pending) 280 atomic_dec(&watchdog_reset_pending); 281 282 /* 283 * Cycle through CPUs to check if the CPUs stay synchronized 284 * to each other. 285 */ 286 next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); 287 if (next_cpu >= nr_cpu_ids) 288 next_cpu = cpumask_first(cpu_online_mask); 289 watchdog_timer.expires += WATCHDOG_INTERVAL; 290 add_timer_on(&watchdog_timer, next_cpu); 291 out: 292 spin_unlock(&watchdog_lock); 293 } 294 295 static inline void clocksource_start_watchdog(void) 296 { 297 if (watchdog_running || !watchdog || list_empty(&watchdog_list)) 298 return; 299 timer_setup(&watchdog_timer, clocksource_watchdog, 0); 300 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; 301 add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); 302 watchdog_running = 1; 303 } 304 305 static inline void clocksource_stop_watchdog(void) 306 { 307 if (!watchdog_running || (watchdog && !list_empty(&watchdog_list))) 308 return; 309 del_timer(&watchdog_timer); 310 watchdog_running = 0; 311 } 312 313 static inline void clocksource_reset_watchdog(void) 314 { 315 struct clocksource *cs; 316 317 list_for_each_entry(cs, &watchdog_list, wd_list) 318 cs->flags &= ~CLOCK_SOURCE_WATCHDOG; 319 } 320 321 static void clocksource_resume_watchdog(void) 322 { 323 atomic_inc(&watchdog_reset_pending); 324 } 325 326 static void clocksource_enqueue_watchdog(struct clocksource *cs) 327 { 328 INIT_LIST_HEAD(&cs->wd_list); 329 330 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 331 /* cs is a clocksource to be watched. */ 332 list_add(&cs->wd_list, &watchdog_list); 333 cs->flags &= ~CLOCK_SOURCE_WATCHDOG; 334 } else { 335 /* cs is a watchdog. */ 336 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 337 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 338 } 339 } 340 341 static void clocksource_select_watchdog(bool fallback) 342 { 343 struct clocksource *cs, *old_wd; 344 unsigned long flags; 345 346 spin_lock_irqsave(&watchdog_lock, flags); 347 /* save current watchdog */ 348 old_wd = watchdog; 349 if (fallback) 350 watchdog = NULL; 351 352 list_for_each_entry(cs, &clocksource_list, list) { 353 /* cs is a clocksource to be watched. */ 354 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) 355 continue; 356 357 /* Skip current if we were requested for a fallback. */ 358 if (fallback && cs == old_wd) 359 continue; 360 361 /* Pick the best watchdog. */ 362 if (!watchdog || cs->rating > watchdog->rating) 363 watchdog = cs; 364 } 365 /* If we failed to find a fallback restore the old one. */ 366 if (!watchdog) 367 watchdog = old_wd; 368 369 /* If we changed the watchdog we need to reset cycles. */ 370 if (watchdog != old_wd) 371 clocksource_reset_watchdog(); 372 373 /* Check if the watchdog timer needs to be started. */ 374 clocksource_start_watchdog(); 375 spin_unlock_irqrestore(&watchdog_lock, flags); 376 } 377 378 static void clocksource_dequeue_watchdog(struct clocksource *cs) 379 { 380 if (cs != watchdog) { 381 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 382 /* cs is a watched clocksource. */ 383 list_del_init(&cs->wd_list); 384 /* Check if the watchdog timer needs to be stopped. */ 385 clocksource_stop_watchdog(); 386 } 387 } 388 } 389 390 static void __clocksource_change_rating(struct clocksource *cs, int rating); 391 392 static int __clocksource_watchdog_work(void) 393 { 394 struct clocksource *cs, *tmp; 395 unsigned long flags; 396 int select = 0; 397 398 spin_lock_irqsave(&watchdog_lock, flags); 399 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { 400 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 401 list_del_init(&cs->wd_list); 402 __clocksource_change_rating(cs, 0); 403 select = 1; 404 } 405 if (cs->flags & CLOCK_SOURCE_RESELECT) { 406 cs->flags &= ~CLOCK_SOURCE_RESELECT; 407 select = 1; 408 } 409 } 410 /* Check if the watchdog timer needs to be stopped. */ 411 clocksource_stop_watchdog(); 412 spin_unlock_irqrestore(&watchdog_lock, flags); 413 414 return select; 415 } 416 417 static void clocksource_watchdog_work(struct work_struct *work) 418 { 419 mutex_lock(&clocksource_mutex); 420 if (__clocksource_watchdog_work()) 421 clocksource_select(); 422 mutex_unlock(&clocksource_mutex); 423 } 424 425 static bool clocksource_is_watchdog(struct clocksource *cs) 426 { 427 return cs == watchdog; 428 } 429 430 #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ 431 432 static void clocksource_enqueue_watchdog(struct clocksource *cs) 433 { 434 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 435 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 436 } 437 438 static void clocksource_select_watchdog(bool fallback) { } 439 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } 440 static inline void clocksource_resume_watchdog(void) { } 441 static inline int __clocksource_watchdog_work(void) { return 0; } 442 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } 443 void clocksource_mark_unstable(struct clocksource *cs) { } 444 445 static inline void clocksource_watchdog_lock(unsigned long *flags) { } 446 static inline void clocksource_watchdog_unlock(unsigned long *flags) { } 447 448 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ 449 450 /** 451 * clocksource_suspend - suspend the clocksource(s) 452 */ 453 void clocksource_suspend(void) 454 { 455 struct clocksource *cs; 456 457 list_for_each_entry_reverse(cs, &clocksource_list, list) 458 if (cs->suspend) 459 cs->suspend(cs); 460 } 461 462 /** 463 * clocksource_resume - resume the clocksource(s) 464 */ 465 void clocksource_resume(void) 466 { 467 struct clocksource *cs; 468 469 list_for_each_entry(cs, &clocksource_list, list) 470 if (cs->resume) 471 cs->resume(cs); 472 473 clocksource_resume_watchdog(); 474 } 475 476 /** 477 * clocksource_touch_watchdog - Update watchdog 478 * 479 * Update the watchdog after exception contexts such as kgdb so as not 480 * to incorrectly trip the watchdog. This might fail when the kernel 481 * was stopped in code which holds watchdog_lock. 482 */ 483 void clocksource_touch_watchdog(void) 484 { 485 clocksource_resume_watchdog(); 486 } 487 488 /** 489 * clocksource_max_adjustment- Returns max adjustment amount 490 * @cs: Pointer to clocksource 491 * 492 */ 493 static u32 clocksource_max_adjustment(struct clocksource *cs) 494 { 495 u64 ret; 496 /* 497 * We won't try to correct for more than 11% adjustments (110,000 ppm), 498 */ 499 ret = (u64)cs->mult * 11; 500 do_div(ret,100); 501 return (u32)ret; 502 } 503 504 /** 505 * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted 506 * @mult: cycle to nanosecond multiplier 507 * @shift: cycle to nanosecond divisor (power of two) 508 * @maxadj: maximum adjustment value to mult (~11%) 509 * @mask: bitmask for two's complement subtraction of non 64 bit counters 510 * @max_cyc: maximum cycle value before potential overflow (does not include 511 * any safety margin) 512 * 513 * NOTE: This function includes a safety margin of 50%, in other words, we 514 * return half the number of nanoseconds the hardware counter can technically 515 * cover. This is done so that we can potentially detect problems caused by 516 * delayed timers or bad hardware, which might result in time intervals that 517 * are larger than what the math used can handle without overflows. 518 */ 519 u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) 520 { 521 u64 max_nsecs, max_cycles; 522 523 /* 524 * Calculate the maximum number of cycles that we can pass to the 525 * cyc2ns() function without overflowing a 64-bit result. 526 */ 527 max_cycles = ULLONG_MAX; 528 do_div(max_cycles, mult+maxadj); 529 530 /* 531 * The actual maximum number of cycles we can defer the clocksource is 532 * determined by the minimum of max_cycles and mask. 533 * Note: Here we subtract the maxadj to make sure we don't sleep for 534 * too long if there's a large negative adjustment. 535 */ 536 max_cycles = min(max_cycles, mask); 537 max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); 538 539 /* return the max_cycles value as well if requested */ 540 if (max_cyc) 541 *max_cyc = max_cycles; 542 543 /* Return 50% of the actual maximum, so we can detect bad values */ 544 max_nsecs >>= 1; 545 546 return max_nsecs; 547 } 548 549 /** 550 * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles 551 * @cs: Pointer to clocksource to be updated 552 * 553 */ 554 static inline void clocksource_update_max_deferment(struct clocksource *cs) 555 { 556 cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, 557 cs->maxadj, cs->mask, 558 &cs->max_cycles); 559 } 560 561 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET 562 563 static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) 564 { 565 struct clocksource *cs; 566 567 if (!finished_booting || list_empty(&clocksource_list)) 568 return NULL; 569 570 /* 571 * We pick the clocksource with the highest rating. If oneshot 572 * mode is active, we pick the highres valid clocksource with 573 * the best rating. 574 */ 575 list_for_each_entry(cs, &clocksource_list, list) { 576 if (skipcur && cs == curr_clocksource) 577 continue; 578 if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES)) 579 continue; 580 return cs; 581 } 582 return NULL; 583 } 584 585 static void __clocksource_select(bool skipcur) 586 { 587 bool oneshot = tick_oneshot_mode_active(); 588 struct clocksource *best, *cs; 589 590 /* Find the best suitable clocksource */ 591 best = clocksource_find_best(oneshot, skipcur); 592 if (!best) 593 return; 594 595 if (!strlen(override_name)) 596 goto found; 597 598 /* Check for the override clocksource. */ 599 list_for_each_entry(cs, &clocksource_list, list) { 600 if (skipcur && cs == curr_clocksource) 601 continue; 602 if (strcmp(cs->name, override_name) != 0) 603 continue; 604 /* 605 * Check to make sure we don't switch to a non-highres 606 * capable clocksource if the tick code is in oneshot 607 * mode (highres or nohz) 608 */ 609 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) { 610 /* Override clocksource cannot be used. */ 611 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 612 pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n", 613 cs->name); 614 override_name[0] = 0; 615 } else { 616 /* 617 * The override cannot be currently verified. 618 * Deferring to let the watchdog check. 619 */ 620 pr_info("Override clocksource %s is not currently HRT compatible - deferring\n", 621 cs->name); 622 } 623 } else 624 /* Override clocksource can be used. */ 625 best = cs; 626 break; 627 } 628 629 found: 630 if (curr_clocksource != best && !timekeeping_notify(best)) { 631 pr_info("Switched to clocksource %s\n", best->name); 632 curr_clocksource = best; 633 } 634 } 635 636 /** 637 * clocksource_select - Select the best clocksource available 638 * 639 * Private function. Must hold clocksource_mutex when called. 640 * 641 * Select the clocksource with the best rating, or the clocksource, 642 * which is selected by userspace override. 643 */ 644 static void clocksource_select(void) 645 { 646 __clocksource_select(false); 647 } 648 649 static void clocksource_select_fallback(void) 650 { 651 __clocksource_select(true); 652 } 653 654 #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ 655 static inline void clocksource_select(void) { } 656 static inline void clocksource_select_fallback(void) { } 657 658 #endif 659 660 /* 661 * clocksource_done_booting - Called near the end of core bootup 662 * 663 * Hack to avoid lots of clocksource churn at boot time. 664 * We use fs_initcall because we want this to start before 665 * device_initcall but after subsys_initcall. 666 */ 667 static int __init clocksource_done_booting(void) 668 { 669 mutex_lock(&clocksource_mutex); 670 curr_clocksource = clocksource_default_clock(); 671 finished_booting = 1; 672 /* 673 * Run the watchdog first to eliminate unstable clock sources 674 */ 675 __clocksource_watchdog_work(); 676 clocksource_select(); 677 mutex_unlock(&clocksource_mutex); 678 return 0; 679 } 680 fs_initcall(clocksource_done_booting); 681 682 /* 683 * Enqueue the clocksource sorted by rating 684 */ 685 static void clocksource_enqueue(struct clocksource *cs) 686 { 687 struct list_head *entry = &clocksource_list; 688 struct clocksource *tmp; 689 690 list_for_each_entry(tmp, &clocksource_list, list) { 691 /* Keep track of the place, where to insert */ 692 if (tmp->rating < cs->rating) 693 break; 694 entry = &tmp->list; 695 } 696 list_add(&cs->list, entry); 697 } 698 699 /** 700 * __clocksource_update_freq_scale - Used update clocksource with new freq 701 * @cs: clocksource to be registered 702 * @scale: Scale factor multiplied against freq to get clocksource hz 703 * @freq: clocksource frequency (cycles per second) divided by scale 704 * 705 * This should only be called from the clocksource->enable() method. 706 * 707 * This *SHOULD NOT* be called directly! Please use the 708 * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper 709 * functions. 710 */ 711 void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq) 712 { 713 u64 sec; 714 715 /* 716 * Default clocksources are *special* and self-define their mult/shift. 717 * But, you're not special, so you should specify a freq value. 718 */ 719 if (freq) { 720 /* 721 * Calc the maximum number of seconds which we can run before 722 * wrapping around. For clocksources which have a mask > 32-bit 723 * we need to limit the max sleep time to have a good 724 * conversion precision. 10 minutes is still a reasonable 725 * amount. That results in a shift value of 24 for a 726 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to 727 * ~ 0.06ppm granularity for NTP. 728 */ 729 sec = cs->mask; 730 do_div(sec, freq); 731 do_div(sec, scale); 732 if (!sec) 733 sec = 1; 734 else if (sec > 600 && cs->mask > UINT_MAX) 735 sec = 600; 736 737 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, 738 NSEC_PER_SEC / scale, sec * scale); 739 } 740 /* 741 * Ensure clocksources that have large 'mult' values don't overflow 742 * when adjusted. 743 */ 744 cs->maxadj = clocksource_max_adjustment(cs); 745 while (freq && ((cs->mult + cs->maxadj < cs->mult) 746 || (cs->mult - cs->maxadj > cs->mult))) { 747 cs->mult >>= 1; 748 cs->shift--; 749 cs->maxadj = clocksource_max_adjustment(cs); 750 } 751 752 /* 753 * Only warn for *special* clocksources that self-define 754 * their mult/shift values and don't specify a freq. 755 */ 756 WARN_ONCE(cs->mult + cs->maxadj < cs->mult, 757 "timekeeping: Clocksource %s might overflow on 11%% adjustment\n", 758 cs->name); 759 760 clocksource_update_max_deferment(cs); 761 762 pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", 763 cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); 764 } 765 EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); 766 767 /** 768 * __clocksource_register_scale - Used to install new clocksources 769 * @cs: clocksource to be registered 770 * @scale: Scale factor multiplied against freq to get clocksource hz 771 * @freq: clocksource frequency (cycles per second) divided by scale 772 * 773 * Returns -EBUSY if registration fails, zero otherwise. 774 * 775 * This *SHOULD NOT* be called directly! Please use the 776 * clocksource_register_hz() or clocksource_register_khz helper functions. 777 */ 778 int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) 779 { 780 unsigned long flags; 781 782 /* Initialize mult/shift and max_idle_ns */ 783 __clocksource_update_freq_scale(cs, scale, freq); 784 785 /* Add clocksource to the clocksource list */ 786 mutex_lock(&clocksource_mutex); 787 788 clocksource_watchdog_lock(&flags); 789 clocksource_enqueue(cs); 790 clocksource_enqueue_watchdog(cs); 791 clocksource_watchdog_unlock(&flags); 792 793 clocksource_select(); 794 clocksource_select_watchdog(false); 795 mutex_unlock(&clocksource_mutex); 796 return 0; 797 } 798 EXPORT_SYMBOL_GPL(__clocksource_register_scale); 799 800 static void __clocksource_change_rating(struct clocksource *cs, int rating) 801 { 802 list_del(&cs->list); 803 cs->rating = rating; 804 clocksource_enqueue(cs); 805 } 806 807 /** 808 * clocksource_change_rating - Change the rating of a registered clocksource 809 * @cs: clocksource to be changed 810 * @rating: new rating 811 */ 812 void clocksource_change_rating(struct clocksource *cs, int rating) 813 { 814 unsigned long flags; 815 816 mutex_lock(&clocksource_mutex); 817 clocksource_watchdog_lock(&flags); 818 __clocksource_change_rating(cs, rating); 819 clocksource_watchdog_unlock(&flags); 820 821 clocksource_select(); 822 clocksource_select_watchdog(false); 823 mutex_unlock(&clocksource_mutex); 824 } 825 EXPORT_SYMBOL(clocksource_change_rating); 826 827 /* 828 * Unbind clocksource @cs. Called with clocksource_mutex held 829 */ 830 static int clocksource_unbind(struct clocksource *cs) 831 { 832 unsigned long flags; 833 834 if (clocksource_is_watchdog(cs)) { 835 /* Select and try to install a replacement watchdog. */ 836 clocksource_select_watchdog(true); 837 if (clocksource_is_watchdog(cs)) 838 return -EBUSY; 839 } 840 841 if (cs == curr_clocksource) { 842 /* Select and try to install a replacement clock source */ 843 clocksource_select_fallback(); 844 if (curr_clocksource == cs) 845 return -EBUSY; 846 } 847 848 clocksource_watchdog_lock(&flags); 849 clocksource_dequeue_watchdog(cs); 850 list_del_init(&cs->list); 851 clocksource_watchdog_unlock(&flags); 852 853 return 0; 854 } 855 856 /** 857 * clocksource_unregister - remove a registered clocksource 858 * @cs: clocksource to be unregistered 859 */ 860 int clocksource_unregister(struct clocksource *cs) 861 { 862 int ret = 0; 863 864 mutex_lock(&clocksource_mutex); 865 if (!list_empty(&cs->list)) 866 ret = clocksource_unbind(cs); 867 mutex_unlock(&clocksource_mutex); 868 return ret; 869 } 870 EXPORT_SYMBOL(clocksource_unregister); 871 872 #ifdef CONFIG_SYSFS 873 /** 874 * current_clocksource_show - sysfs interface for current clocksource 875 * @dev: unused 876 * @attr: unused 877 * @buf: char buffer to be filled with clocksource list 878 * 879 * Provides sysfs interface for listing current clocksource. 880 */ 881 static ssize_t current_clocksource_show(struct device *dev, 882 struct device_attribute *attr, 883 char *buf) 884 { 885 ssize_t count = 0; 886 887 mutex_lock(&clocksource_mutex); 888 count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); 889 mutex_unlock(&clocksource_mutex); 890 891 return count; 892 } 893 894 ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) 895 { 896 size_t ret = cnt; 897 898 /* strings from sysfs write are not 0 terminated! */ 899 if (!cnt || cnt >= CS_NAME_LEN) 900 return -EINVAL; 901 902 /* strip of \n: */ 903 if (buf[cnt-1] == '\n') 904 cnt--; 905 if (cnt > 0) 906 memcpy(dst, buf, cnt); 907 dst[cnt] = 0; 908 return ret; 909 } 910 911 /** 912 * current_clocksource_store - interface for manually overriding clocksource 913 * @dev: unused 914 * @attr: unused 915 * @buf: name of override clocksource 916 * @count: length of buffer 917 * 918 * Takes input from sysfs interface for manually overriding the default 919 * clocksource selection. 920 */ 921 static ssize_t current_clocksource_store(struct device *dev, 922 struct device_attribute *attr, 923 const char *buf, size_t count) 924 { 925 ssize_t ret; 926 927 mutex_lock(&clocksource_mutex); 928 929 ret = sysfs_get_uname(buf, override_name, count); 930 if (ret >= 0) 931 clocksource_select(); 932 933 mutex_unlock(&clocksource_mutex); 934 935 return ret; 936 } 937 static DEVICE_ATTR_RW(current_clocksource); 938 939 /** 940 * unbind_clocksource_store - interface for manually unbinding clocksource 941 * @dev: unused 942 * @attr: unused 943 * @buf: unused 944 * @count: length of buffer 945 * 946 * Takes input from sysfs interface for manually unbinding a clocksource. 947 */ 948 static ssize_t unbind_clocksource_store(struct device *dev, 949 struct device_attribute *attr, 950 const char *buf, size_t count) 951 { 952 struct clocksource *cs; 953 char name[CS_NAME_LEN]; 954 ssize_t ret; 955 956 ret = sysfs_get_uname(buf, name, count); 957 if (ret < 0) 958 return ret; 959 960 ret = -ENODEV; 961 mutex_lock(&clocksource_mutex); 962 list_for_each_entry(cs, &clocksource_list, list) { 963 if (strcmp(cs->name, name)) 964 continue; 965 ret = clocksource_unbind(cs); 966 break; 967 } 968 mutex_unlock(&clocksource_mutex); 969 970 return ret ? ret : count; 971 } 972 static DEVICE_ATTR_WO(unbind_clocksource); 973 974 /** 975 * available_clocksource_show - sysfs interface for listing clocksource 976 * @dev: unused 977 * @attr: unused 978 * @buf: char buffer to be filled with clocksource list 979 * 980 * Provides sysfs interface for listing registered clocksources 981 */ 982 static ssize_t available_clocksource_show(struct device *dev, 983 struct device_attribute *attr, 984 char *buf) 985 { 986 struct clocksource *src; 987 ssize_t count = 0; 988 989 mutex_lock(&clocksource_mutex); 990 list_for_each_entry(src, &clocksource_list, list) { 991 /* 992 * Don't show non-HRES clocksource if the tick code is 993 * in one shot mode (highres=on or nohz=on) 994 */ 995 if (!tick_oneshot_mode_active() || 996 (src->flags & CLOCK_SOURCE_VALID_FOR_HRES)) 997 count += snprintf(buf + count, 998 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), 999 "%s ", src->name); 1000 } 1001 mutex_unlock(&clocksource_mutex); 1002 1003 count += snprintf(buf + count, 1004 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); 1005 1006 return count; 1007 } 1008 static DEVICE_ATTR_RO(available_clocksource); 1009 1010 static struct attribute *clocksource_attrs[] = { 1011 &dev_attr_current_clocksource.attr, 1012 &dev_attr_unbind_clocksource.attr, 1013 &dev_attr_available_clocksource.attr, 1014 NULL 1015 }; 1016 ATTRIBUTE_GROUPS(clocksource); 1017 1018 static struct bus_type clocksource_subsys = { 1019 .name = "clocksource", 1020 .dev_name = "clocksource", 1021 }; 1022 1023 static struct device device_clocksource = { 1024 .id = 0, 1025 .bus = &clocksource_subsys, 1026 .groups = clocksource_groups, 1027 }; 1028 1029 static int __init init_clocksource_sysfs(void) 1030 { 1031 int error = subsys_system_register(&clocksource_subsys, NULL); 1032 1033 if (!error) 1034 error = device_register(&device_clocksource); 1035 1036 return error; 1037 } 1038 1039 device_initcall(init_clocksource_sysfs); 1040 #endif /* CONFIG_SYSFS */ 1041 1042 /** 1043 * boot_override_clocksource - boot clock override 1044 * @str: override name 1045 * 1046 * Takes a clocksource= boot argument and uses it 1047 * as the clocksource override name. 1048 */ 1049 static int __init boot_override_clocksource(char* str) 1050 { 1051 mutex_lock(&clocksource_mutex); 1052 if (str) 1053 strlcpy(override_name, str, sizeof(override_name)); 1054 mutex_unlock(&clocksource_mutex); 1055 return 1; 1056 } 1057 1058 __setup("clocksource=", boot_override_clocksource); 1059 1060 /** 1061 * boot_override_clock - Compatibility layer for deprecated boot option 1062 * @str: override name 1063 * 1064 * DEPRECATED! Takes a clock= boot argument and uses it 1065 * as the clocksource override name 1066 */ 1067 static int __init boot_override_clock(char* str) 1068 { 1069 if (!strcmp(str, "pmtmr")) { 1070 pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n"); 1071 return boot_override_clocksource("acpi_pm"); 1072 } 1073 pr_warn("clock= boot option is deprecated - use clocksource=xyz\n"); 1074 return boot_override_clocksource(str); 1075 } 1076 1077 __setup("clock=", boot_override_clock); 1078