1 /* 2 * linux/kernel/time/clocksource.c 3 * 4 * This file contains the functions which manage clocksource drivers. 5 * 6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 * 22 * TODO WishList: 23 * o Allow clocksource drivers to be unregistered 24 */ 25 26 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 27 28 #include <linux/device.h> 29 #include <linux/clocksource.h> 30 #include <linux/init.h> 31 #include <linux/module.h> 32 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ 33 #include <linux/tick.h> 34 #include <linux/kthread.h> 35 36 #include "tick-internal.h" 37 #include "timekeeping_internal.h" 38 39 /** 40 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks 41 * @mult: pointer to mult variable 42 * @shift: pointer to shift variable 43 * @from: frequency to convert from 44 * @to: frequency to convert to 45 * @maxsec: guaranteed runtime conversion range in seconds 46 * 47 * The function evaluates the shift/mult pair for the scaled math 48 * operations of clocksources and clockevents. 49 * 50 * @to and @from are frequency values in HZ. For clock sources @to is 51 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock 52 * event @to is the counter frequency and @from is NSEC_PER_SEC. 53 * 54 * The @maxsec conversion range argument controls the time frame in 55 * seconds which must be covered by the runtime conversion with the 56 * calculated mult and shift factors. This guarantees that no 64bit 57 * overflow happens when the input value of the conversion is 58 * multiplied with the calculated mult factor. Larger ranges may 59 * reduce the conversion accuracy by chosing smaller mult and shift 60 * factors. 61 */ 62 void 63 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) 64 { 65 u64 tmp; 66 u32 sft, sftacc= 32; 67 68 /* 69 * Calculate the shift factor which is limiting the conversion 70 * range: 71 */ 72 tmp = ((u64)maxsec * from) >> 32; 73 while (tmp) { 74 tmp >>=1; 75 sftacc--; 76 } 77 78 /* 79 * Find the conversion shift/mult pair which has the best 80 * accuracy and fits the maxsec conversion range: 81 */ 82 for (sft = 32; sft > 0; sft--) { 83 tmp = (u64) to << sft; 84 tmp += from / 2; 85 do_div(tmp, from); 86 if ((tmp >> sftacc) == 0) 87 break; 88 } 89 *mult = tmp; 90 *shift = sft; 91 } 92 EXPORT_SYMBOL_GPL(clocks_calc_mult_shift); 93 94 /*[Clocksource internal variables]--------- 95 * curr_clocksource: 96 * currently selected clocksource. 97 * clocksource_list: 98 * linked list with the registered clocksources 99 * clocksource_mutex: 100 * protects manipulations to curr_clocksource and the clocksource_list 101 * override_name: 102 * Name of the user-specified clocksource. 103 */ 104 static struct clocksource *curr_clocksource; 105 static LIST_HEAD(clocksource_list); 106 static DEFINE_MUTEX(clocksource_mutex); 107 static char override_name[CS_NAME_LEN]; 108 static int finished_booting; 109 110 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG 111 static void clocksource_watchdog_work(struct work_struct *work); 112 static void clocksource_select(void); 113 114 static LIST_HEAD(watchdog_list); 115 static struct clocksource *watchdog; 116 static struct timer_list watchdog_timer; 117 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); 118 static DEFINE_SPINLOCK(watchdog_lock); 119 static int watchdog_running; 120 static atomic_t watchdog_reset_pending; 121 122 static int clocksource_watchdog_kthread(void *data); 123 static void __clocksource_change_rating(struct clocksource *cs, int rating); 124 125 /* 126 * Interval: 0.5sec Threshold: 0.0625s 127 */ 128 #define WATCHDOG_INTERVAL (HZ >> 1) 129 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) 130 131 static void clocksource_watchdog_work(struct work_struct *work) 132 { 133 /* 134 * If kthread_run fails the next watchdog scan over the 135 * watchdog_list will find the unstable clock again. 136 */ 137 kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); 138 } 139 140 static void __clocksource_unstable(struct clocksource *cs) 141 { 142 cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); 143 cs->flags |= CLOCK_SOURCE_UNSTABLE; 144 145 if (cs->mark_unstable) 146 cs->mark_unstable(cs); 147 148 if (finished_booting) 149 schedule_work(&watchdog_work); 150 } 151 152 /** 153 * clocksource_mark_unstable - mark clocksource unstable via watchdog 154 * @cs: clocksource to be marked unstable 155 * 156 * This function is called instead of clocksource_change_rating from 157 * cpu hotplug code to avoid a deadlock between the clocksource mutex 158 * and the cpu hotplug mutex. It defers the update of the clocksource 159 * to the watchdog thread. 160 */ 161 void clocksource_mark_unstable(struct clocksource *cs) 162 { 163 unsigned long flags; 164 165 spin_lock_irqsave(&watchdog_lock, flags); 166 if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { 167 if (list_empty(&cs->wd_list)) 168 list_add(&cs->wd_list, &watchdog_list); 169 __clocksource_unstable(cs); 170 } 171 spin_unlock_irqrestore(&watchdog_lock, flags); 172 } 173 174 static void clocksource_watchdog(unsigned long data) 175 { 176 struct clocksource *cs; 177 u64 csnow, wdnow, cslast, wdlast, delta; 178 int64_t wd_nsec, cs_nsec; 179 int next_cpu, reset_pending; 180 181 spin_lock(&watchdog_lock); 182 if (!watchdog_running) 183 goto out; 184 185 reset_pending = atomic_read(&watchdog_reset_pending); 186 187 list_for_each_entry(cs, &watchdog_list, wd_list) { 188 189 /* Clocksource already marked unstable? */ 190 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 191 if (finished_booting) 192 schedule_work(&watchdog_work); 193 continue; 194 } 195 196 local_irq_disable(); 197 csnow = cs->read(cs); 198 wdnow = watchdog->read(watchdog); 199 local_irq_enable(); 200 201 /* Clocksource initialized ? */ 202 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || 203 atomic_read(&watchdog_reset_pending)) { 204 cs->flags |= CLOCK_SOURCE_WATCHDOG; 205 cs->wd_last = wdnow; 206 cs->cs_last = csnow; 207 continue; 208 } 209 210 delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask); 211 wd_nsec = clocksource_cyc2ns(delta, watchdog->mult, 212 watchdog->shift); 213 214 delta = clocksource_delta(csnow, cs->cs_last, cs->mask); 215 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); 216 wdlast = cs->wd_last; /* save these in case we print them */ 217 cslast = cs->cs_last; 218 cs->cs_last = csnow; 219 cs->wd_last = wdnow; 220 221 if (atomic_read(&watchdog_reset_pending)) 222 continue; 223 224 /* Check the deviation from the watchdog clocksource. */ 225 if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { 226 pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n", 227 smp_processor_id(), cs->name); 228 pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", 229 watchdog->name, wdnow, wdlast, watchdog->mask); 230 pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", 231 cs->name, csnow, cslast, cs->mask); 232 __clocksource_unstable(cs); 233 continue; 234 } 235 236 if (cs == curr_clocksource && cs->tick_stable) 237 cs->tick_stable(cs); 238 239 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && 240 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && 241 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { 242 /* Mark it valid for high-res. */ 243 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 244 245 /* 246 * clocksource_done_booting() will sort it if 247 * finished_booting is not set yet. 248 */ 249 if (!finished_booting) 250 continue; 251 252 /* 253 * If this is not the current clocksource let 254 * the watchdog thread reselect it. Due to the 255 * change to high res this clocksource might 256 * be preferred now. If it is the current 257 * clocksource let the tick code know about 258 * that change. 259 */ 260 if (cs != curr_clocksource) { 261 cs->flags |= CLOCK_SOURCE_RESELECT; 262 schedule_work(&watchdog_work); 263 } else { 264 tick_clock_notify(); 265 } 266 } 267 } 268 269 /* 270 * We only clear the watchdog_reset_pending, when we did a 271 * full cycle through all clocksources. 272 */ 273 if (reset_pending) 274 atomic_dec(&watchdog_reset_pending); 275 276 /* 277 * Cycle through CPUs to check if the CPUs stay synchronized 278 * to each other. 279 */ 280 next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); 281 if (next_cpu >= nr_cpu_ids) 282 next_cpu = cpumask_first(cpu_online_mask); 283 watchdog_timer.expires += WATCHDOG_INTERVAL; 284 add_timer_on(&watchdog_timer, next_cpu); 285 out: 286 spin_unlock(&watchdog_lock); 287 } 288 289 static inline void clocksource_start_watchdog(void) 290 { 291 if (watchdog_running || !watchdog || list_empty(&watchdog_list)) 292 return; 293 init_timer(&watchdog_timer); 294 watchdog_timer.function = clocksource_watchdog; 295 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; 296 add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); 297 watchdog_running = 1; 298 } 299 300 static inline void clocksource_stop_watchdog(void) 301 { 302 if (!watchdog_running || (watchdog && !list_empty(&watchdog_list))) 303 return; 304 del_timer(&watchdog_timer); 305 watchdog_running = 0; 306 } 307 308 static inline void clocksource_reset_watchdog(void) 309 { 310 struct clocksource *cs; 311 312 list_for_each_entry(cs, &watchdog_list, wd_list) 313 cs->flags &= ~CLOCK_SOURCE_WATCHDOG; 314 } 315 316 static void clocksource_resume_watchdog(void) 317 { 318 atomic_inc(&watchdog_reset_pending); 319 } 320 321 static void clocksource_enqueue_watchdog(struct clocksource *cs) 322 { 323 unsigned long flags; 324 325 spin_lock_irqsave(&watchdog_lock, flags); 326 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 327 /* cs is a clocksource to be watched. */ 328 list_add(&cs->wd_list, &watchdog_list); 329 cs->flags &= ~CLOCK_SOURCE_WATCHDOG; 330 } else { 331 /* cs is a watchdog. */ 332 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 333 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 334 } 335 spin_unlock_irqrestore(&watchdog_lock, flags); 336 } 337 338 static void clocksource_select_watchdog(bool fallback) 339 { 340 struct clocksource *cs, *old_wd; 341 unsigned long flags; 342 343 spin_lock_irqsave(&watchdog_lock, flags); 344 /* save current watchdog */ 345 old_wd = watchdog; 346 if (fallback) 347 watchdog = NULL; 348 349 list_for_each_entry(cs, &clocksource_list, list) { 350 /* cs is a clocksource to be watched. */ 351 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) 352 continue; 353 354 /* Skip current if we were requested for a fallback. */ 355 if (fallback && cs == old_wd) 356 continue; 357 358 /* Pick the best watchdog. */ 359 if (!watchdog || cs->rating > watchdog->rating) 360 watchdog = cs; 361 } 362 /* If we failed to find a fallback restore the old one. */ 363 if (!watchdog) 364 watchdog = old_wd; 365 366 /* If we changed the watchdog we need to reset cycles. */ 367 if (watchdog != old_wd) 368 clocksource_reset_watchdog(); 369 370 /* Check if the watchdog timer needs to be started. */ 371 clocksource_start_watchdog(); 372 spin_unlock_irqrestore(&watchdog_lock, flags); 373 } 374 375 static void clocksource_dequeue_watchdog(struct clocksource *cs) 376 { 377 unsigned long flags; 378 379 spin_lock_irqsave(&watchdog_lock, flags); 380 if (cs != watchdog) { 381 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 382 /* cs is a watched clocksource. */ 383 list_del_init(&cs->wd_list); 384 /* Check if the watchdog timer needs to be stopped. */ 385 clocksource_stop_watchdog(); 386 } 387 } 388 spin_unlock_irqrestore(&watchdog_lock, flags); 389 } 390 391 static int __clocksource_watchdog_kthread(void) 392 { 393 struct clocksource *cs, *tmp; 394 unsigned long flags; 395 LIST_HEAD(unstable); 396 int select = 0; 397 398 spin_lock_irqsave(&watchdog_lock, flags); 399 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { 400 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 401 list_del_init(&cs->wd_list); 402 list_add(&cs->wd_list, &unstable); 403 select = 1; 404 } 405 if (cs->flags & CLOCK_SOURCE_RESELECT) { 406 cs->flags &= ~CLOCK_SOURCE_RESELECT; 407 select = 1; 408 } 409 } 410 /* Check if the watchdog timer needs to be stopped. */ 411 clocksource_stop_watchdog(); 412 spin_unlock_irqrestore(&watchdog_lock, flags); 413 414 /* Needs to be done outside of watchdog lock */ 415 list_for_each_entry_safe(cs, tmp, &unstable, wd_list) { 416 list_del_init(&cs->wd_list); 417 __clocksource_change_rating(cs, 0); 418 } 419 return select; 420 } 421 422 static int clocksource_watchdog_kthread(void *data) 423 { 424 mutex_lock(&clocksource_mutex); 425 if (__clocksource_watchdog_kthread()) 426 clocksource_select(); 427 mutex_unlock(&clocksource_mutex); 428 return 0; 429 } 430 431 static bool clocksource_is_watchdog(struct clocksource *cs) 432 { 433 return cs == watchdog; 434 } 435 436 #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ 437 438 static void clocksource_enqueue_watchdog(struct clocksource *cs) 439 { 440 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 441 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 442 } 443 444 static void clocksource_select_watchdog(bool fallback) { } 445 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } 446 static inline void clocksource_resume_watchdog(void) { } 447 static inline int __clocksource_watchdog_kthread(void) { return 0; } 448 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } 449 void clocksource_mark_unstable(struct clocksource *cs) { } 450 451 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ 452 453 /** 454 * clocksource_suspend - suspend the clocksource(s) 455 */ 456 void clocksource_suspend(void) 457 { 458 struct clocksource *cs; 459 460 list_for_each_entry_reverse(cs, &clocksource_list, list) 461 if (cs->suspend) 462 cs->suspend(cs); 463 } 464 465 /** 466 * clocksource_resume - resume the clocksource(s) 467 */ 468 void clocksource_resume(void) 469 { 470 struct clocksource *cs; 471 472 list_for_each_entry(cs, &clocksource_list, list) 473 if (cs->resume) 474 cs->resume(cs); 475 476 clocksource_resume_watchdog(); 477 } 478 479 /** 480 * clocksource_touch_watchdog - Update watchdog 481 * 482 * Update the watchdog after exception contexts such as kgdb so as not 483 * to incorrectly trip the watchdog. This might fail when the kernel 484 * was stopped in code which holds watchdog_lock. 485 */ 486 void clocksource_touch_watchdog(void) 487 { 488 clocksource_resume_watchdog(); 489 } 490 491 /** 492 * clocksource_max_adjustment- Returns max adjustment amount 493 * @cs: Pointer to clocksource 494 * 495 */ 496 static u32 clocksource_max_adjustment(struct clocksource *cs) 497 { 498 u64 ret; 499 /* 500 * We won't try to correct for more than 11% adjustments (110,000 ppm), 501 */ 502 ret = (u64)cs->mult * 11; 503 do_div(ret,100); 504 return (u32)ret; 505 } 506 507 /** 508 * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted 509 * @mult: cycle to nanosecond multiplier 510 * @shift: cycle to nanosecond divisor (power of two) 511 * @maxadj: maximum adjustment value to mult (~11%) 512 * @mask: bitmask for two's complement subtraction of non 64 bit counters 513 * @max_cyc: maximum cycle value before potential overflow (does not include 514 * any safety margin) 515 * 516 * NOTE: This function includes a safety margin of 50%, in other words, we 517 * return half the number of nanoseconds the hardware counter can technically 518 * cover. This is done so that we can potentially detect problems caused by 519 * delayed timers or bad hardware, which might result in time intervals that 520 * are larger than what the math used can handle without overflows. 521 */ 522 u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) 523 { 524 u64 max_nsecs, max_cycles; 525 526 /* 527 * Calculate the maximum number of cycles that we can pass to the 528 * cyc2ns() function without overflowing a 64-bit result. 529 */ 530 max_cycles = ULLONG_MAX; 531 do_div(max_cycles, mult+maxadj); 532 533 /* 534 * The actual maximum number of cycles we can defer the clocksource is 535 * determined by the minimum of max_cycles and mask. 536 * Note: Here we subtract the maxadj to make sure we don't sleep for 537 * too long if there's a large negative adjustment. 538 */ 539 max_cycles = min(max_cycles, mask); 540 max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); 541 542 /* return the max_cycles value as well if requested */ 543 if (max_cyc) 544 *max_cyc = max_cycles; 545 546 /* Return 50% of the actual maximum, so we can detect bad values */ 547 max_nsecs >>= 1; 548 549 return max_nsecs; 550 } 551 552 /** 553 * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles 554 * @cs: Pointer to clocksource to be updated 555 * 556 */ 557 static inline void clocksource_update_max_deferment(struct clocksource *cs) 558 { 559 cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, 560 cs->maxadj, cs->mask, 561 &cs->max_cycles); 562 } 563 564 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET 565 566 static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) 567 { 568 struct clocksource *cs; 569 570 if (!finished_booting || list_empty(&clocksource_list)) 571 return NULL; 572 573 /* 574 * We pick the clocksource with the highest rating. If oneshot 575 * mode is active, we pick the highres valid clocksource with 576 * the best rating. 577 */ 578 list_for_each_entry(cs, &clocksource_list, list) { 579 if (skipcur && cs == curr_clocksource) 580 continue; 581 if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES)) 582 continue; 583 return cs; 584 } 585 return NULL; 586 } 587 588 static void __clocksource_select(bool skipcur) 589 { 590 bool oneshot = tick_oneshot_mode_active(); 591 struct clocksource *best, *cs; 592 593 /* Find the best suitable clocksource */ 594 best = clocksource_find_best(oneshot, skipcur); 595 if (!best) 596 return; 597 598 /* Check for the override clocksource. */ 599 list_for_each_entry(cs, &clocksource_list, list) { 600 if (skipcur && cs == curr_clocksource) 601 continue; 602 if (strcmp(cs->name, override_name) != 0) 603 continue; 604 /* 605 * Check to make sure we don't switch to a non-highres 606 * capable clocksource if the tick code is in oneshot 607 * mode (highres or nohz) 608 */ 609 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) { 610 /* Override clocksource cannot be used. */ 611 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 612 pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n", 613 cs->name); 614 override_name[0] = 0; 615 } else { 616 /* 617 * The override cannot be currently verified. 618 * Deferring to let the watchdog check. 619 */ 620 pr_info("Override clocksource %s is not currently HRT compatible - deferring\n", 621 cs->name); 622 } 623 } else 624 /* Override clocksource can be used. */ 625 best = cs; 626 break; 627 } 628 629 if (curr_clocksource != best && !timekeeping_notify(best)) { 630 pr_info("Switched to clocksource %s\n", best->name); 631 curr_clocksource = best; 632 } 633 } 634 635 /** 636 * clocksource_select - Select the best clocksource available 637 * 638 * Private function. Must hold clocksource_mutex when called. 639 * 640 * Select the clocksource with the best rating, or the clocksource, 641 * which is selected by userspace override. 642 */ 643 static void clocksource_select(void) 644 { 645 __clocksource_select(false); 646 } 647 648 static void clocksource_select_fallback(void) 649 { 650 __clocksource_select(true); 651 } 652 653 #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ 654 static inline void clocksource_select(void) { } 655 static inline void clocksource_select_fallback(void) { } 656 657 #endif 658 659 /* 660 * clocksource_done_booting - Called near the end of core bootup 661 * 662 * Hack to avoid lots of clocksource churn at boot time. 663 * We use fs_initcall because we want this to start before 664 * device_initcall but after subsys_initcall. 665 */ 666 static int __init clocksource_done_booting(void) 667 { 668 mutex_lock(&clocksource_mutex); 669 curr_clocksource = clocksource_default_clock(); 670 finished_booting = 1; 671 /* 672 * Run the watchdog first to eliminate unstable clock sources 673 */ 674 __clocksource_watchdog_kthread(); 675 clocksource_select(); 676 mutex_unlock(&clocksource_mutex); 677 return 0; 678 } 679 fs_initcall(clocksource_done_booting); 680 681 /* 682 * Enqueue the clocksource sorted by rating 683 */ 684 static void clocksource_enqueue(struct clocksource *cs) 685 { 686 struct list_head *entry = &clocksource_list; 687 struct clocksource *tmp; 688 689 list_for_each_entry(tmp, &clocksource_list, list) { 690 /* Keep track of the place, where to insert */ 691 if (tmp->rating < cs->rating) 692 break; 693 entry = &tmp->list; 694 } 695 list_add(&cs->list, entry); 696 } 697 698 /** 699 * __clocksource_update_freq_scale - Used update clocksource with new freq 700 * @cs: clocksource to be registered 701 * @scale: Scale factor multiplied against freq to get clocksource hz 702 * @freq: clocksource frequency (cycles per second) divided by scale 703 * 704 * This should only be called from the clocksource->enable() method. 705 * 706 * This *SHOULD NOT* be called directly! Please use the 707 * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper 708 * functions. 709 */ 710 void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq) 711 { 712 u64 sec; 713 714 /* 715 * Default clocksources are *special* and self-define their mult/shift. 716 * But, you're not special, so you should specify a freq value. 717 */ 718 if (freq) { 719 /* 720 * Calc the maximum number of seconds which we can run before 721 * wrapping around. For clocksources which have a mask > 32-bit 722 * we need to limit the max sleep time to have a good 723 * conversion precision. 10 minutes is still a reasonable 724 * amount. That results in a shift value of 24 for a 725 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to 726 * ~ 0.06ppm granularity for NTP. 727 */ 728 sec = cs->mask; 729 do_div(sec, freq); 730 do_div(sec, scale); 731 if (!sec) 732 sec = 1; 733 else if (sec > 600 && cs->mask > UINT_MAX) 734 sec = 600; 735 736 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, 737 NSEC_PER_SEC / scale, sec * scale); 738 } 739 /* 740 * Ensure clocksources that have large 'mult' values don't overflow 741 * when adjusted. 742 */ 743 cs->maxadj = clocksource_max_adjustment(cs); 744 while (freq && ((cs->mult + cs->maxadj < cs->mult) 745 || (cs->mult - cs->maxadj > cs->mult))) { 746 cs->mult >>= 1; 747 cs->shift--; 748 cs->maxadj = clocksource_max_adjustment(cs); 749 } 750 751 /* 752 * Only warn for *special* clocksources that self-define 753 * their mult/shift values and don't specify a freq. 754 */ 755 WARN_ONCE(cs->mult + cs->maxadj < cs->mult, 756 "timekeeping: Clocksource %s might overflow on 11%% adjustment\n", 757 cs->name); 758 759 clocksource_update_max_deferment(cs); 760 761 pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", 762 cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); 763 } 764 EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); 765 766 /** 767 * __clocksource_register_scale - Used to install new clocksources 768 * @cs: clocksource to be registered 769 * @scale: Scale factor multiplied against freq to get clocksource hz 770 * @freq: clocksource frequency (cycles per second) divided by scale 771 * 772 * Returns -EBUSY if registration fails, zero otherwise. 773 * 774 * This *SHOULD NOT* be called directly! Please use the 775 * clocksource_register_hz() or clocksource_register_khz helper functions. 776 */ 777 int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) 778 { 779 780 /* Initialize mult/shift and max_idle_ns */ 781 __clocksource_update_freq_scale(cs, scale, freq); 782 783 /* Add clocksource to the clocksource list */ 784 mutex_lock(&clocksource_mutex); 785 clocksource_enqueue(cs); 786 clocksource_enqueue_watchdog(cs); 787 clocksource_select(); 788 clocksource_select_watchdog(false); 789 mutex_unlock(&clocksource_mutex); 790 return 0; 791 } 792 EXPORT_SYMBOL_GPL(__clocksource_register_scale); 793 794 static void __clocksource_change_rating(struct clocksource *cs, int rating) 795 { 796 list_del(&cs->list); 797 cs->rating = rating; 798 clocksource_enqueue(cs); 799 } 800 801 /** 802 * clocksource_change_rating - Change the rating of a registered clocksource 803 * @cs: clocksource to be changed 804 * @rating: new rating 805 */ 806 void clocksource_change_rating(struct clocksource *cs, int rating) 807 { 808 mutex_lock(&clocksource_mutex); 809 __clocksource_change_rating(cs, rating); 810 clocksource_select(); 811 clocksource_select_watchdog(false); 812 mutex_unlock(&clocksource_mutex); 813 } 814 EXPORT_SYMBOL(clocksource_change_rating); 815 816 /* 817 * Unbind clocksource @cs. Called with clocksource_mutex held 818 */ 819 static int clocksource_unbind(struct clocksource *cs) 820 { 821 if (clocksource_is_watchdog(cs)) { 822 /* Select and try to install a replacement watchdog. */ 823 clocksource_select_watchdog(true); 824 if (clocksource_is_watchdog(cs)) 825 return -EBUSY; 826 } 827 828 if (cs == curr_clocksource) { 829 /* Select and try to install a replacement clock source */ 830 clocksource_select_fallback(); 831 if (curr_clocksource == cs) 832 return -EBUSY; 833 } 834 clocksource_dequeue_watchdog(cs); 835 list_del_init(&cs->list); 836 return 0; 837 } 838 839 /** 840 * clocksource_unregister - remove a registered clocksource 841 * @cs: clocksource to be unregistered 842 */ 843 int clocksource_unregister(struct clocksource *cs) 844 { 845 int ret = 0; 846 847 mutex_lock(&clocksource_mutex); 848 if (!list_empty(&cs->list)) 849 ret = clocksource_unbind(cs); 850 mutex_unlock(&clocksource_mutex); 851 return ret; 852 } 853 EXPORT_SYMBOL(clocksource_unregister); 854 855 #ifdef CONFIG_SYSFS 856 /** 857 * sysfs_show_current_clocksources - sysfs interface for current clocksource 858 * @dev: unused 859 * @attr: unused 860 * @buf: char buffer to be filled with clocksource list 861 * 862 * Provides sysfs interface for listing current clocksource. 863 */ 864 static ssize_t 865 sysfs_show_current_clocksources(struct device *dev, 866 struct device_attribute *attr, char *buf) 867 { 868 ssize_t count = 0; 869 870 mutex_lock(&clocksource_mutex); 871 count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); 872 mutex_unlock(&clocksource_mutex); 873 874 return count; 875 } 876 877 ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) 878 { 879 size_t ret = cnt; 880 881 /* strings from sysfs write are not 0 terminated! */ 882 if (!cnt || cnt >= CS_NAME_LEN) 883 return -EINVAL; 884 885 /* strip of \n: */ 886 if (buf[cnt-1] == '\n') 887 cnt--; 888 if (cnt > 0) 889 memcpy(dst, buf, cnt); 890 dst[cnt] = 0; 891 return ret; 892 } 893 894 /** 895 * sysfs_override_clocksource - interface for manually overriding clocksource 896 * @dev: unused 897 * @attr: unused 898 * @buf: name of override clocksource 899 * @count: length of buffer 900 * 901 * Takes input from sysfs interface for manually overriding the default 902 * clocksource selection. 903 */ 904 static ssize_t sysfs_override_clocksource(struct device *dev, 905 struct device_attribute *attr, 906 const char *buf, size_t count) 907 { 908 ssize_t ret; 909 910 mutex_lock(&clocksource_mutex); 911 912 ret = sysfs_get_uname(buf, override_name, count); 913 if (ret >= 0) 914 clocksource_select(); 915 916 mutex_unlock(&clocksource_mutex); 917 918 return ret; 919 } 920 921 /** 922 * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource 923 * @dev: unused 924 * @attr: unused 925 * @buf: unused 926 * @count: length of buffer 927 * 928 * Takes input from sysfs interface for manually unbinding a clocksource. 929 */ 930 static ssize_t sysfs_unbind_clocksource(struct device *dev, 931 struct device_attribute *attr, 932 const char *buf, size_t count) 933 { 934 struct clocksource *cs; 935 char name[CS_NAME_LEN]; 936 ssize_t ret; 937 938 ret = sysfs_get_uname(buf, name, count); 939 if (ret < 0) 940 return ret; 941 942 ret = -ENODEV; 943 mutex_lock(&clocksource_mutex); 944 list_for_each_entry(cs, &clocksource_list, list) { 945 if (strcmp(cs->name, name)) 946 continue; 947 ret = clocksource_unbind(cs); 948 break; 949 } 950 mutex_unlock(&clocksource_mutex); 951 952 return ret ? ret : count; 953 } 954 955 /** 956 * sysfs_show_available_clocksources - sysfs interface for listing clocksource 957 * @dev: unused 958 * @attr: unused 959 * @buf: char buffer to be filled with clocksource list 960 * 961 * Provides sysfs interface for listing registered clocksources 962 */ 963 static ssize_t 964 sysfs_show_available_clocksources(struct device *dev, 965 struct device_attribute *attr, 966 char *buf) 967 { 968 struct clocksource *src; 969 ssize_t count = 0; 970 971 mutex_lock(&clocksource_mutex); 972 list_for_each_entry(src, &clocksource_list, list) { 973 /* 974 * Don't show non-HRES clocksource if the tick code is 975 * in one shot mode (highres=on or nohz=on) 976 */ 977 if (!tick_oneshot_mode_active() || 978 (src->flags & CLOCK_SOURCE_VALID_FOR_HRES)) 979 count += snprintf(buf + count, 980 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), 981 "%s ", src->name); 982 } 983 mutex_unlock(&clocksource_mutex); 984 985 count += snprintf(buf + count, 986 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); 987 988 return count; 989 } 990 991 /* 992 * Sysfs setup bits: 993 */ 994 static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, 995 sysfs_override_clocksource); 996 997 static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource); 998 999 static DEVICE_ATTR(available_clocksource, 0444, 1000 sysfs_show_available_clocksources, NULL); 1001 1002 static struct bus_type clocksource_subsys = { 1003 .name = "clocksource", 1004 .dev_name = "clocksource", 1005 }; 1006 1007 static struct device device_clocksource = { 1008 .id = 0, 1009 .bus = &clocksource_subsys, 1010 }; 1011 1012 static int __init init_clocksource_sysfs(void) 1013 { 1014 int error = subsys_system_register(&clocksource_subsys, NULL); 1015 1016 if (!error) 1017 error = device_register(&device_clocksource); 1018 if (!error) 1019 error = device_create_file( 1020 &device_clocksource, 1021 &dev_attr_current_clocksource); 1022 if (!error) 1023 error = device_create_file(&device_clocksource, 1024 &dev_attr_unbind_clocksource); 1025 if (!error) 1026 error = device_create_file( 1027 &device_clocksource, 1028 &dev_attr_available_clocksource); 1029 return error; 1030 } 1031 1032 device_initcall(init_clocksource_sysfs); 1033 #endif /* CONFIG_SYSFS */ 1034 1035 /** 1036 * boot_override_clocksource - boot clock override 1037 * @str: override name 1038 * 1039 * Takes a clocksource= boot argument and uses it 1040 * as the clocksource override name. 1041 */ 1042 static int __init boot_override_clocksource(char* str) 1043 { 1044 mutex_lock(&clocksource_mutex); 1045 if (str) 1046 strlcpy(override_name, str, sizeof(override_name)); 1047 mutex_unlock(&clocksource_mutex); 1048 return 1; 1049 } 1050 1051 __setup("clocksource=", boot_override_clocksource); 1052 1053 /** 1054 * boot_override_clock - Compatibility layer for deprecated boot option 1055 * @str: override name 1056 * 1057 * DEPRECATED! Takes a clock= boot argument and uses it 1058 * as the clocksource override name 1059 */ 1060 static int __init boot_override_clock(char* str) 1061 { 1062 if (!strcmp(str, "pmtmr")) { 1063 pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n"); 1064 return boot_override_clocksource("acpi_pm"); 1065 } 1066 pr_warn("clock= boot option is deprecated - use clocksource=xyz\n"); 1067 return boot_override_clocksource(str); 1068 } 1069 1070 __setup("clock=", boot_override_clock); 1071