xref: /openbmc/linux/kernel/time/clocksource.c (revision b34e08d5)
1 /*
2  * linux/kernel/time/clocksource.c
3  *
4  * This file contains the functions which manage clocksource drivers.
5  *
6  * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  *
22  * TODO WishList:
23  *   o Allow clocksource drivers to be unregistered
24  */
25 
26 #include <linux/device.h>
27 #include <linux/clocksource.h>
28 #include <linux/init.h>
29 #include <linux/module.h>
30 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
31 #include <linux/tick.h>
32 #include <linux/kthread.h>
33 
34 #include "tick-internal.h"
35 
36 void timecounter_init(struct timecounter *tc,
37 		      const struct cyclecounter *cc,
38 		      u64 start_tstamp)
39 {
40 	tc->cc = cc;
41 	tc->cycle_last = cc->read(cc);
42 	tc->nsec = start_tstamp;
43 }
44 EXPORT_SYMBOL_GPL(timecounter_init);
45 
46 /**
47  * timecounter_read_delta - get nanoseconds since last call of this function
48  * @tc:         Pointer to time counter
49  *
50  * When the underlying cycle counter runs over, this will be handled
51  * correctly as long as it does not run over more than once between
52  * calls.
53  *
54  * The first call to this function for a new time counter initializes
55  * the time tracking and returns an undefined result.
56  */
57 static u64 timecounter_read_delta(struct timecounter *tc)
58 {
59 	cycle_t cycle_now, cycle_delta;
60 	u64 ns_offset;
61 
62 	/* read cycle counter: */
63 	cycle_now = tc->cc->read(tc->cc);
64 
65 	/* calculate the delta since the last timecounter_read_delta(): */
66 	cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
67 
68 	/* convert to nanoseconds: */
69 	ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
70 
71 	/* update time stamp of timecounter_read_delta() call: */
72 	tc->cycle_last = cycle_now;
73 
74 	return ns_offset;
75 }
76 
77 u64 timecounter_read(struct timecounter *tc)
78 {
79 	u64 nsec;
80 
81 	/* increment time by nanoseconds since last call */
82 	nsec = timecounter_read_delta(tc);
83 	nsec += tc->nsec;
84 	tc->nsec = nsec;
85 
86 	return nsec;
87 }
88 EXPORT_SYMBOL_GPL(timecounter_read);
89 
90 u64 timecounter_cyc2time(struct timecounter *tc,
91 			 cycle_t cycle_tstamp)
92 {
93 	u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
94 	u64 nsec;
95 
96 	/*
97 	 * Instead of always treating cycle_tstamp as more recent
98 	 * than tc->cycle_last, detect when it is too far in the
99 	 * future and treat it as old time stamp instead.
100 	 */
101 	if (cycle_delta > tc->cc->mask / 2) {
102 		cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
103 		nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
104 	} else {
105 		nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
106 	}
107 
108 	return nsec;
109 }
110 EXPORT_SYMBOL_GPL(timecounter_cyc2time);
111 
112 /**
113  * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
114  * @mult:	pointer to mult variable
115  * @shift:	pointer to shift variable
116  * @from:	frequency to convert from
117  * @to:		frequency to convert to
118  * @maxsec:	guaranteed runtime conversion range in seconds
119  *
120  * The function evaluates the shift/mult pair for the scaled math
121  * operations of clocksources and clockevents.
122  *
123  * @to and @from are frequency values in HZ. For clock sources @to is
124  * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
125  * event @to is the counter frequency and @from is NSEC_PER_SEC.
126  *
127  * The @maxsec conversion range argument controls the time frame in
128  * seconds which must be covered by the runtime conversion with the
129  * calculated mult and shift factors. This guarantees that no 64bit
130  * overflow happens when the input value of the conversion is
131  * multiplied with the calculated mult factor. Larger ranges may
132  * reduce the conversion accuracy by chosing smaller mult and shift
133  * factors.
134  */
135 void
136 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
137 {
138 	u64 tmp;
139 	u32 sft, sftacc= 32;
140 
141 	/*
142 	 * Calculate the shift factor which is limiting the conversion
143 	 * range:
144 	 */
145 	tmp = ((u64)maxsec * from) >> 32;
146 	while (tmp) {
147 		tmp >>=1;
148 		sftacc--;
149 	}
150 
151 	/*
152 	 * Find the conversion shift/mult pair which has the best
153 	 * accuracy and fits the maxsec conversion range:
154 	 */
155 	for (sft = 32; sft > 0; sft--) {
156 		tmp = (u64) to << sft;
157 		tmp += from / 2;
158 		do_div(tmp, from);
159 		if ((tmp >> sftacc) == 0)
160 			break;
161 	}
162 	*mult = tmp;
163 	*shift = sft;
164 }
165 
166 /*[Clocksource internal variables]---------
167  * curr_clocksource:
168  *	currently selected clocksource.
169  * clocksource_list:
170  *	linked list with the registered clocksources
171  * clocksource_mutex:
172  *	protects manipulations to curr_clocksource and the clocksource_list
173  * override_name:
174  *	Name of the user-specified clocksource.
175  */
176 static struct clocksource *curr_clocksource;
177 static LIST_HEAD(clocksource_list);
178 static DEFINE_MUTEX(clocksource_mutex);
179 static char override_name[CS_NAME_LEN];
180 static int finished_booting;
181 
182 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
183 static void clocksource_watchdog_work(struct work_struct *work);
184 static void clocksource_select(void);
185 
186 static LIST_HEAD(watchdog_list);
187 static struct clocksource *watchdog;
188 static struct timer_list watchdog_timer;
189 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
190 static DEFINE_SPINLOCK(watchdog_lock);
191 static int watchdog_running;
192 static atomic_t watchdog_reset_pending;
193 
194 static int clocksource_watchdog_kthread(void *data);
195 static void __clocksource_change_rating(struct clocksource *cs, int rating);
196 
197 /*
198  * Interval: 0.5sec Threshold: 0.0625s
199  */
200 #define WATCHDOG_INTERVAL (HZ >> 1)
201 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
202 
203 static void clocksource_watchdog_work(struct work_struct *work)
204 {
205 	/*
206 	 * If kthread_run fails the next watchdog scan over the
207 	 * watchdog_list will find the unstable clock again.
208 	 */
209 	kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
210 }
211 
212 static void __clocksource_unstable(struct clocksource *cs)
213 {
214 	cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
215 	cs->flags |= CLOCK_SOURCE_UNSTABLE;
216 	if (finished_booting)
217 		schedule_work(&watchdog_work);
218 }
219 
220 static void clocksource_unstable(struct clocksource *cs, int64_t delta)
221 {
222 	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
223 	       cs->name, delta);
224 	__clocksource_unstable(cs);
225 }
226 
227 /**
228  * clocksource_mark_unstable - mark clocksource unstable via watchdog
229  * @cs:		clocksource to be marked unstable
230  *
231  * This function is called instead of clocksource_change_rating from
232  * cpu hotplug code to avoid a deadlock between the clocksource mutex
233  * and the cpu hotplug mutex. It defers the update of the clocksource
234  * to the watchdog thread.
235  */
236 void clocksource_mark_unstable(struct clocksource *cs)
237 {
238 	unsigned long flags;
239 
240 	spin_lock_irqsave(&watchdog_lock, flags);
241 	if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
242 		if (list_empty(&cs->wd_list))
243 			list_add(&cs->wd_list, &watchdog_list);
244 		__clocksource_unstable(cs);
245 	}
246 	spin_unlock_irqrestore(&watchdog_lock, flags);
247 }
248 
249 static void clocksource_watchdog(unsigned long data)
250 {
251 	struct clocksource *cs;
252 	cycle_t csnow, wdnow;
253 	int64_t wd_nsec, cs_nsec;
254 	int next_cpu, reset_pending;
255 
256 	spin_lock(&watchdog_lock);
257 	if (!watchdog_running)
258 		goto out;
259 
260 	reset_pending = atomic_read(&watchdog_reset_pending);
261 
262 	list_for_each_entry(cs, &watchdog_list, wd_list) {
263 
264 		/* Clocksource already marked unstable? */
265 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
266 			if (finished_booting)
267 				schedule_work(&watchdog_work);
268 			continue;
269 		}
270 
271 		local_irq_disable();
272 		csnow = cs->read(cs);
273 		wdnow = watchdog->read(watchdog);
274 		local_irq_enable();
275 
276 		/* Clocksource initialized ? */
277 		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
278 		    atomic_read(&watchdog_reset_pending)) {
279 			cs->flags |= CLOCK_SOURCE_WATCHDOG;
280 			cs->wd_last = wdnow;
281 			cs->cs_last = csnow;
282 			continue;
283 		}
284 
285 		wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask,
286 					     watchdog->mult, watchdog->shift);
287 
288 		cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) &
289 					     cs->mask, cs->mult, cs->shift);
290 		cs->cs_last = csnow;
291 		cs->wd_last = wdnow;
292 
293 		if (atomic_read(&watchdog_reset_pending))
294 			continue;
295 
296 		/* Check the deviation from the watchdog clocksource. */
297 		if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
298 			clocksource_unstable(cs, cs_nsec - wd_nsec);
299 			continue;
300 		}
301 
302 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
303 		    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
304 		    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
305 			/* Mark it valid for high-res. */
306 			cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
307 
308 			/*
309 			 * clocksource_done_booting() will sort it if
310 			 * finished_booting is not set yet.
311 			 */
312 			if (!finished_booting)
313 				continue;
314 
315 			/*
316 			 * If this is not the current clocksource let
317 			 * the watchdog thread reselect it. Due to the
318 			 * change to high res this clocksource might
319 			 * be preferred now. If it is the current
320 			 * clocksource let the tick code know about
321 			 * that change.
322 			 */
323 			if (cs != curr_clocksource) {
324 				cs->flags |= CLOCK_SOURCE_RESELECT;
325 				schedule_work(&watchdog_work);
326 			} else {
327 				tick_clock_notify();
328 			}
329 		}
330 	}
331 
332 	/*
333 	 * We only clear the watchdog_reset_pending, when we did a
334 	 * full cycle through all clocksources.
335 	 */
336 	if (reset_pending)
337 		atomic_dec(&watchdog_reset_pending);
338 
339 	/*
340 	 * Cycle through CPUs to check if the CPUs stay synchronized
341 	 * to each other.
342 	 */
343 	next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
344 	if (next_cpu >= nr_cpu_ids)
345 		next_cpu = cpumask_first(cpu_online_mask);
346 	watchdog_timer.expires += WATCHDOG_INTERVAL;
347 	add_timer_on(&watchdog_timer, next_cpu);
348 out:
349 	spin_unlock(&watchdog_lock);
350 }
351 
352 static inline void clocksource_start_watchdog(void)
353 {
354 	if (watchdog_running || !watchdog || list_empty(&watchdog_list))
355 		return;
356 	init_timer(&watchdog_timer);
357 	watchdog_timer.function = clocksource_watchdog;
358 	watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
359 	add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
360 	watchdog_running = 1;
361 }
362 
363 static inline void clocksource_stop_watchdog(void)
364 {
365 	if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
366 		return;
367 	del_timer(&watchdog_timer);
368 	watchdog_running = 0;
369 }
370 
371 static inline void clocksource_reset_watchdog(void)
372 {
373 	struct clocksource *cs;
374 
375 	list_for_each_entry(cs, &watchdog_list, wd_list)
376 		cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
377 }
378 
379 static void clocksource_resume_watchdog(void)
380 {
381 	atomic_inc(&watchdog_reset_pending);
382 }
383 
384 static void clocksource_enqueue_watchdog(struct clocksource *cs)
385 {
386 	unsigned long flags;
387 
388 	spin_lock_irqsave(&watchdog_lock, flags);
389 	if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
390 		/* cs is a clocksource to be watched. */
391 		list_add(&cs->wd_list, &watchdog_list);
392 		cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
393 	} else {
394 		/* cs is a watchdog. */
395 		if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
396 			cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
397 		/* Pick the best watchdog. */
398 		if (!watchdog || cs->rating > watchdog->rating) {
399 			watchdog = cs;
400 			/* Reset watchdog cycles */
401 			clocksource_reset_watchdog();
402 		}
403 	}
404 	/* Check if the watchdog timer needs to be started. */
405 	clocksource_start_watchdog();
406 	spin_unlock_irqrestore(&watchdog_lock, flags);
407 }
408 
409 static void clocksource_dequeue_watchdog(struct clocksource *cs)
410 {
411 	unsigned long flags;
412 
413 	spin_lock_irqsave(&watchdog_lock, flags);
414 	if (cs != watchdog) {
415 		if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
416 			/* cs is a watched clocksource. */
417 			list_del_init(&cs->wd_list);
418 			/* Check if the watchdog timer needs to be stopped. */
419 			clocksource_stop_watchdog();
420 		}
421 	}
422 	spin_unlock_irqrestore(&watchdog_lock, flags);
423 }
424 
425 static int __clocksource_watchdog_kthread(void)
426 {
427 	struct clocksource *cs, *tmp;
428 	unsigned long flags;
429 	LIST_HEAD(unstable);
430 	int select = 0;
431 
432 	spin_lock_irqsave(&watchdog_lock, flags);
433 	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
434 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
435 			list_del_init(&cs->wd_list);
436 			list_add(&cs->wd_list, &unstable);
437 			select = 1;
438 		}
439 		if (cs->flags & CLOCK_SOURCE_RESELECT) {
440 			cs->flags &= ~CLOCK_SOURCE_RESELECT;
441 			select = 1;
442 		}
443 	}
444 	/* Check if the watchdog timer needs to be stopped. */
445 	clocksource_stop_watchdog();
446 	spin_unlock_irqrestore(&watchdog_lock, flags);
447 
448 	/* Needs to be done outside of watchdog lock */
449 	list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
450 		list_del_init(&cs->wd_list);
451 		__clocksource_change_rating(cs, 0);
452 	}
453 	return select;
454 }
455 
456 static int clocksource_watchdog_kthread(void *data)
457 {
458 	mutex_lock(&clocksource_mutex);
459 	if (__clocksource_watchdog_kthread())
460 		clocksource_select();
461 	mutex_unlock(&clocksource_mutex);
462 	return 0;
463 }
464 
465 static bool clocksource_is_watchdog(struct clocksource *cs)
466 {
467 	return cs == watchdog;
468 }
469 
470 #else /* CONFIG_CLOCKSOURCE_WATCHDOG */
471 
472 static void clocksource_enqueue_watchdog(struct clocksource *cs)
473 {
474 	if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
475 		cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
476 }
477 
478 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
479 static inline void clocksource_resume_watchdog(void) { }
480 static inline int __clocksource_watchdog_kthread(void) { return 0; }
481 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
482 void clocksource_mark_unstable(struct clocksource *cs) { }
483 
484 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
485 
486 /**
487  * clocksource_suspend - suspend the clocksource(s)
488  */
489 void clocksource_suspend(void)
490 {
491 	struct clocksource *cs;
492 
493 	list_for_each_entry_reverse(cs, &clocksource_list, list)
494 		if (cs->suspend)
495 			cs->suspend(cs);
496 }
497 
498 /**
499  * clocksource_resume - resume the clocksource(s)
500  */
501 void clocksource_resume(void)
502 {
503 	struct clocksource *cs;
504 
505 	list_for_each_entry(cs, &clocksource_list, list)
506 		if (cs->resume)
507 			cs->resume(cs);
508 
509 	clocksource_resume_watchdog();
510 }
511 
512 /**
513  * clocksource_touch_watchdog - Update watchdog
514  *
515  * Update the watchdog after exception contexts such as kgdb so as not
516  * to incorrectly trip the watchdog. This might fail when the kernel
517  * was stopped in code which holds watchdog_lock.
518  */
519 void clocksource_touch_watchdog(void)
520 {
521 	clocksource_resume_watchdog();
522 }
523 
524 /**
525  * clocksource_max_adjustment- Returns max adjustment amount
526  * @cs:         Pointer to clocksource
527  *
528  */
529 static u32 clocksource_max_adjustment(struct clocksource *cs)
530 {
531 	u64 ret;
532 	/*
533 	 * We won't try to correct for more than 11% adjustments (110,000 ppm),
534 	 */
535 	ret = (u64)cs->mult * 11;
536 	do_div(ret,100);
537 	return (u32)ret;
538 }
539 
540 /**
541  * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
542  * @mult:	cycle to nanosecond multiplier
543  * @shift:	cycle to nanosecond divisor (power of two)
544  * @maxadj:	maximum adjustment value to mult (~11%)
545  * @mask:	bitmask for two's complement subtraction of non 64 bit counters
546  */
547 u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
548 {
549 	u64 max_nsecs, max_cycles;
550 
551 	/*
552 	 * Calculate the maximum number of cycles that we can pass to the
553 	 * cyc2ns function without overflowing a 64-bit signed result. The
554 	 * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
555 	 * which is equivalent to the below.
556 	 * max_cycles < (2^63)/(mult + maxadj)
557 	 * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
558 	 * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
559 	 * max_cycles < 2^(63 - log2(mult + maxadj))
560 	 * max_cycles < 1 << (63 - log2(mult + maxadj))
561 	 * Please note that we add 1 to the result of the log2 to account for
562 	 * any rounding errors, ensure the above inequality is satisfied and
563 	 * no overflow will occur.
564 	 */
565 	max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
566 
567 	/*
568 	 * The actual maximum number of cycles we can defer the clocksource is
569 	 * determined by the minimum of max_cycles and mask.
570 	 * Note: Here we subtract the maxadj to make sure we don't sleep for
571 	 * too long if there's a large negative adjustment.
572 	 */
573 	max_cycles = min(max_cycles, mask);
574 	max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
575 
576 	return max_nsecs;
577 }
578 
579 /**
580  * clocksource_max_deferment - Returns max time the clocksource can be deferred
581  * @cs:         Pointer to clocksource
582  *
583  */
584 static u64 clocksource_max_deferment(struct clocksource *cs)
585 {
586 	u64 max_nsecs;
587 
588 	max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
589 					  cs->mask);
590 	/*
591 	 * To ensure that the clocksource does not wrap whilst we are idle,
592 	 * limit the time the clocksource can be deferred by 12.5%. Please
593 	 * note a margin of 12.5% is used because this can be computed with
594 	 * a shift, versus say 10% which would require division.
595 	 */
596 	return max_nsecs - (max_nsecs >> 3);
597 }
598 
599 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
600 
601 static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
602 {
603 	struct clocksource *cs;
604 
605 	if (!finished_booting || list_empty(&clocksource_list))
606 		return NULL;
607 
608 	/*
609 	 * We pick the clocksource with the highest rating. If oneshot
610 	 * mode is active, we pick the highres valid clocksource with
611 	 * the best rating.
612 	 */
613 	list_for_each_entry(cs, &clocksource_list, list) {
614 		if (skipcur && cs == curr_clocksource)
615 			continue;
616 		if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
617 			continue;
618 		return cs;
619 	}
620 	return NULL;
621 }
622 
623 static void __clocksource_select(bool skipcur)
624 {
625 	bool oneshot = tick_oneshot_mode_active();
626 	struct clocksource *best, *cs;
627 
628 	/* Find the best suitable clocksource */
629 	best = clocksource_find_best(oneshot, skipcur);
630 	if (!best)
631 		return;
632 
633 	/* Check for the override clocksource. */
634 	list_for_each_entry(cs, &clocksource_list, list) {
635 		if (skipcur && cs == curr_clocksource)
636 			continue;
637 		if (strcmp(cs->name, override_name) != 0)
638 			continue;
639 		/*
640 		 * Check to make sure we don't switch to a non-highres
641 		 * capable clocksource if the tick code is in oneshot
642 		 * mode (highres or nohz)
643 		 */
644 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
645 			/* Override clocksource cannot be used. */
646 			printk(KERN_WARNING "Override clocksource %s is not "
647 			       "HRT compatible. Cannot switch while in "
648 			       "HRT/NOHZ mode\n", cs->name);
649 			override_name[0] = 0;
650 		} else
651 			/* Override clocksource can be used. */
652 			best = cs;
653 		break;
654 	}
655 
656 	if (curr_clocksource != best && !timekeeping_notify(best)) {
657 		pr_info("Switched to clocksource %s\n", best->name);
658 		curr_clocksource = best;
659 	}
660 }
661 
662 /**
663  * clocksource_select - Select the best clocksource available
664  *
665  * Private function. Must hold clocksource_mutex when called.
666  *
667  * Select the clocksource with the best rating, or the clocksource,
668  * which is selected by userspace override.
669  */
670 static void clocksource_select(void)
671 {
672 	return __clocksource_select(false);
673 }
674 
675 static void clocksource_select_fallback(void)
676 {
677 	return __clocksource_select(true);
678 }
679 
680 #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
681 
682 static inline void clocksource_select(void) { }
683 static inline void clocksource_select_fallback(void) { }
684 
685 #endif
686 
687 /*
688  * clocksource_done_booting - Called near the end of core bootup
689  *
690  * Hack to avoid lots of clocksource churn at boot time.
691  * We use fs_initcall because we want this to start before
692  * device_initcall but after subsys_initcall.
693  */
694 static int __init clocksource_done_booting(void)
695 {
696 	mutex_lock(&clocksource_mutex);
697 	curr_clocksource = clocksource_default_clock();
698 	finished_booting = 1;
699 	/*
700 	 * Run the watchdog first to eliminate unstable clock sources
701 	 */
702 	__clocksource_watchdog_kthread();
703 	clocksource_select();
704 	mutex_unlock(&clocksource_mutex);
705 	return 0;
706 }
707 fs_initcall(clocksource_done_booting);
708 
709 /*
710  * Enqueue the clocksource sorted by rating
711  */
712 static void clocksource_enqueue(struct clocksource *cs)
713 {
714 	struct list_head *entry = &clocksource_list;
715 	struct clocksource *tmp;
716 
717 	list_for_each_entry(tmp, &clocksource_list, list)
718 		/* Keep track of the place, where to insert */
719 		if (tmp->rating >= cs->rating)
720 			entry = &tmp->list;
721 	list_add(&cs->list, entry);
722 }
723 
724 /**
725  * __clocksource_updatefreq_scale - Used update clocksource with new freq
726  * @cs:		clocksource to be registered
727  * @scale:	Scale factor multiplied against freq to get clocksource hz
728  * @freq:	clocksource frequency (cycles per second) divided by scale
729  *
730  * This should only be called from the clocksource->enable() method.
731  *
732  * This *SHOULD NOT* be called directly! Please use the
733  * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
734  */
735 void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
736 {
737 	u64 sec;
738 	/*
739 	 * Calc the maximum number of seconds which we can run before
740 	 * wrapping around. For clocksources which have a mask > 32bit
741 	 * we need to limit the max sleep time to have a good
742 	 * conversion precision. 10 minutes is still a reasonable
743 	 * amount. That results in a shift value of 24 for a
744 	 * clocksource with mask >= 40bit and f >= 4GHz. That maps to
745 	 * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
746 	 * margin as we do in clocksource_max_deferment()
747 	 */
748 	sec = (cs->mask - (cs->mask >> 3));
749 	do_div(sec, freq);
750 	do_div(sec, scale);
751 	if (!sec)
752 		sec = 1;
753 	else if (sec > 600 && cs->mask > UINT_MAX)
754 		sec = 600;
755 
756 	clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
757 			       NSEC_PER_SEC / scale, sec * scale);
758 
759 	/*
760 	 * for clocksources that have large mults, to avoid overflow.
761 	 * Since mult may be adjusted by ntp, add an safety extra margin
762 	 *
763 	 */
764 	cs->maxadj = clocksource_max_adjustment(cs);
765 	while ((cs->mult + cs->maxadj < cs->mult)
766 		|| (cs->mult - cs->maxadj > cs->mult)) {
767 		cs->mult >>= 1;
768 		cs->shift--;
769 		cs->maxadj = clocksource_max_adjustment(cs);
770 	}
771 
772 	cs->max_idle_ns = clocksource_max_deferment(cs);
773 }
774 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
775 
776 /**
777  * __clocksource_register_scale - Used to install new clocksources
778  * @cs:		clocksource to be registered
779  * @scale:	Scale factor multiplied against freq to get clocksource hz
780  * @freq:	clocksource frequency (cycles per second) divided by scale
781  *
782  * Returns -EBUSY if registration fails, zero otherwise.
783  *
784  * This *SHOULD NOT* be called directly! Please use the
785  * clocksource_register_hz() or clocksource_register_khz helper functions.
786  */
787 int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
788 {
789 
790 	/* Initialize mult/shift and max_idle_ns */
791 	__clocksource_updatefreq_scale(cs, scale, freq);
792 
793 	/* Add clocksource to the clcoksource list */
794 	mutex_lock(&clocksource_mutex);
795 	clocksource_enqueue(cs);
796 	clocksource_enqueue_watchdog(cs);
797 	clocksource_select();
798 	mutex_unlock(&clocksource_mutex);
799 	return 0;
800 }
801 EXPORT_SYMBOL_GPL(__clocksource_register_scale);
802 
803 
804 /**
805  * clocksource_register - Used to install new clocksources
806  * @cs:		clocksource to be registered
807  *
808  * Returns -EBUSY if registration fails, zero otherwise.
809  */
810 int clocksource_register(struct clocksource *cs)
811 {
812 	/* calculate max adjustment for given mult/shift */
813 	cs->maxadj = clocksource_max_adjustment(cs);
814 	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
815 		"Clocksource %s might overflow on 11%% adjustment\n",
816 		cs->name);
817 
818 	/* calculate max idle time permitted for this clocksource */
819 	cs->max_idle_ns = clocksource_max_deferment(cs);
820 
821 	mutex_lock(&clocksource_mutex);
822 	clocksource_enqueue(cs);
823 	clocksource_enqueue_watchdog(cs);
824 	clocksource_select();
825 	mutex_unlock(&clocksource_mutex);
826 	return 0;
827 }
828 EXPORT_SYMBOL(clocksource_register);
829 
830 static void __clocksource_change_rating(struct clocksource *cs, int rating)
831 {
832 	list_del(&cs->list);
833 	cs->rating = rating;
834 	clocksource_enqueue(cs);
835 }
836 
837 /**
838  * clocksource_change_rating - Change the rating of a registered clocksource
839  * @cs:		clocksource to be changed
840  * @rating:	new rating
841  */
842 void clocksource_change_rating(struct clocksource *cs, int rating)
843 {
844 	mutex_lock(&clocksource_mutex);
845 	__clocksource_change_rating(cs, rating);
846 	clocksource_select();
847 	mutex_unlock(&clocksource_mutex);
848 }
849 EXPORT_SYMBOL(clocksource_change_rating);
850 
851 /*
852  * Unbind clocksource @cs. Called with clocksource_mutex held
853  */
854 static int clocksource_unbind(struct clocksource *cs)
855 {
856 	/*
857 	 * I really can't convince myself to support this on hardware
858 	 * designed by lobotomized monkeys.
859 	 */
860 	if (clocksource_is_watchdog(cs))
861 		return -EBUSY;
862 
863 	if (cs == curr_clocksource) {
864 		/* Select and try to install a replacement clock source */
865 		clocksource_select_fallback();
866 		if (curr_clocksource == cs)
867 			return -EBUSY;
868 	}
869 	clocksource_dequeue_watchdog(cs);
870 	list_del_init(&cs->list);
871 	return 0;
872 }
873 
874 /**
875  * clocksource_unregister - remove a registered clocksource
876  * @cs:	clocksource to be unregistered
877  */
878 int clocksource_unregister(struct clocksource *cs)
879 {
880 	int ret = 0;
881 
882 	mutex_lock(&clocksource_mutex);
883 	if (!list_empty(&cs->list))
884 		ret = clocksource_unbind(cs);
885 	mutex_unlock(&clocksource_mutex);
886 	return ret;
887 }
888 EXPORT_SYMBOL(clocksource_unregister);
889 
890 #ifdef CONFIG_SYSFS
891 /**
892  * sysfs_show_current_clocksources - sysfs interface for current clocksource
893  * @dev:	unused
894  * @attr:	unused
895  * @buf:	char buffer to be filled with clocksource list
896  *
897  * Provides sysfs interface for listing current clocksource.
898  */
899 static ssize_t
900 sysfs_show_current_clocksources(struct device *dev,
901 				struct device_attribute *attr, char *buf)
902 {
903 	ssize_t count = 0;
904 
905 	mutex_lock(&clocksource_mutex);
906 	count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
907 	mutex_unlock(&clocksource_mutex);
908 
909 	return count;
910 }
911 
912 ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
913 {
914 	size_t ret = cnt;
915 
916 	/* strings from sysfs write are not 0 terminated! */
917 	if (!cnt || cnt >= CS_NAME_LEN)
918 		return -EINVAL;
919 
920 	/* strip of \n: */
921 	if (buf[cnt-1] == '\n')
922 		cnt--;
923 	if (cnt > 0)
924 		memcpy(dst, buf, cnt);
925 	dst[cnt] = 0;
926 	return ret;
927 }
928 
929 /**
930  * sysfs_override_clocksource - interface for manually overriding clocksource
931  * @dev:	unused
932  * @attr:	unused
933  * @buf:	name of override clocksource
934  * @count:	length of buffer
935  *
936  * Takes input from sysfs interface for manually overriding the default
937  * clocksource selection.
938  */
939 static ssize_t sysfs_override_clocksource(struct device *dev,
940 					  struct device_attribute *attr,
941 					  const char *buf, size_t count)
942 {
943 	ssize_t ret;
944 
945 	mutex_lock(&clocksource_mutex);
946 
947 	ret = sysfs_get_uname(buf, override_name, count);
948 	if (ret >= 0)
949 		clocksource_select();
950 
951 	mutex_unlock(&clocksource_mutex);
952 
953 	return ret;
954 }
955 
956 /**
957  * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource
958  * @dev:	unused
959  * @attr:	unused
960  * @buf:	unused
961  * @count:	length of buffer
962  *
963  * Takes input from sysfs interface for manually unbinding a clocksource.
964  */
965 static ssize_t sysfs_unbind_clocksource(struct device *dev,
966 					struct device_attribute *attr,
967 					const char *buf, size_t count)
968 {
969 	struct clocksource *cs;
970 	char name[CS_NAME_LEN];
971 	ssize_t ret;
972 
973 	ret = sysfs_get_uname(buf, name, count);
974 	if (ret < 0)
975 		return ret;
976 
977 	ret = -ENODEV;
978 	mutex_lock(&clocksource_mutex);
979 	list_for_each_entry(cs, &clocksource_list, list) {
980 		if (strcmp(cs->name, name))
981 			continue;
982 		ret = clocksource_unbind(cs);
983 		break;
984 	}
985 	mutex_unlock(&clocksource_mutex);
986 
987 	return ret ? ret : count;
988 }
989 
990 /**
991  * sysfs_show_available_clocksources - sysfs interface for listing clocksource
992  * @dev:	unused
993  * @attr:	unused
994  * @buf:	char buffer to be filled with clocksource list
995  *
996  * Provides sysfs interface for listing registered clocksources
997  */
998 static ssize_t
999 sysfs_show_available_clocksources(struct device *dev,
1000 				  struct device_attribute *attr,
1001 				  char *buf)
1002 {
1003 	struct clocksource *src;
1004 	ssize_t count = 0;
1005 
1006 	mutex_lock(&clocksource_mutex);
1007 	list_for_each_entry(src, &clocksource_list, list) {
1008 		/*
1009 		 * Don't show non-HRES clocksource if the tick code is
1010 		 * in one shot mode (highres=on or nohz=on)
1011 		 */
1012 		if (!tick_oneshot_mode_active() ||
1013 		    (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
1014 			count += snprintf(buf + count,
1015 				  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
1016 				  "%s ", src->name);
1017 	}
1018 	mutex_unlock(&clocksource_mutex);
1019 
1020 	count += snprintf(buf + count,
1021 			  max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
1022 
1023 	return count;
1024 }
1025 
1026 /*
1027  * Sysfs setup bits:
1028  */
1029 static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
1030 		   sysfs_override_clocksource);
1031 
1032 static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource);
1033 
1034 static DEVICE_ATTR(available_clocksource, 0444,
1035 		   sysfs_show_available_clocksources, NULL);
1036 
1037 static struct bus_type clocksource_subsys = {
1038 	.name = "clocksource",
1039 	.dev_name = "clocksource",
1040 };
1041 
1042 static struct device device_clocksource = {
1043 	.id	= 0,
1044 	.bus	= &clocksource_subsys,
1045 };
1046 
1047 static int __init init_clocksource_sysfs(void)
1048 {
1049 	int error = subsys_system_register(&clocksource_subsys, NULL);
1050 
1051 	if (!error)
1052 		error = device_register(&device_clocksource);
1053 	if (!error)
1054 		error = device_create_file(
1055 				&device_clocksource,
1056 				&dev_attr_current_clocksource);
1057 	if (!error)
1058 		error = device_create_file(&device_clocksource,
1059 					   &dev_attr_unbind_clocksource);
1060 	if (!error)
1061 		error = device_create_file(
1062 				&device_clocksource,
1063 				&dev_attr_available_clocksource);
1064 	return error;
1065 }
1066 
1067 device_initcall(init_clocksource_sysfs);
1068 #endif /* CONFIG_SYSFS */
1069 
1070 /**
1071  * boot_override_clocksource - boot clock override
1072  * @str:	override name
1073  *
1074  * Takes a clocksource= boot argument and uses it
1075  * as the clocksource override name.
1076  */
1077 static int __init boot_override_clocksource(char* str)
1078 {
1079 	mutex_lock(&clocksource_mutex);
1080 	if (str)
1081 		strlcpy(override_name, str, sizeof(override_name));
1082 	mutex_unlock(&clocksource_mutex);
1083 	return 1;
1084 }
1085 
1086 __setup("clocksource=", boot_override_clocksource);
1087 
1088 /**
1089  * boot_override_clock - Compatibility layer for deprecated boot option
1090  * @str:	override name
1091  *
1092  * DEPRECATED! Takes a clock= boot argument and uses it
1093  * as the clocksource override name
1094  */
1095 static int __init boot_override_clock(char* str)
1096 {
1097 	if (!strcmp(str, "pmtmr")) {
1098 		printk("Warning: clock=pmtmr is deprecated. "
1099 			"Use clocksource=acpi_pm.\n");
1100 		return boot_override_clocksource("acpi_pm");
1101 	}
1102 	printk("Warning! clock= boot option is deprecated. "
1103 		"Use clocksource=xyz\n");
1104 	return boot_override_clocksource(str);
1105 }
1106 
1107 __setup("clock=", boot_override_clock);
1108