xref: /openbmc/linux/kernel/sched/clock.c (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2391e43daSPeter Zijlstra /*
397fb7a0aSIngo Molnar  * sched_clock() for unstable CPU clocks
4391e43daSPeter Zijlstra  *
590eec103SPeter Zijlstra  *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra
6391e43daSPeter Zijlstra  *
7391e43daSPeter Zijlstra  *  Updates and enhancements:
8391e43daSPeter Zijlstra  *    Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
9391e43daSPeter Zijlstra  *
10391e43daSPeter Zijlstra  * Based on code by:
11391e43daSPeter Zijlstra  *   Ingo Molnar <mingo@redhat.com>
12391e43daSPeter Zijlstra  *   Guillaume Chazarain <guichaz@gmail.com>
13391e43daSPeter Zijlstra  *
14391e43daSPeter Zijlstra  *
1597fb7a0aSIngo Molnar  * What this file implements:
16391e43daSPeter Zijlstra  *
17391e43daSPeter Zijlstra  * cpu_clock(i) provides a fast (execution time) high resolution
18391e43daSPeter Zijlstra  * clock with bounded drift between CPUs. The value of cpu_clock(i)
19391e43daSPeter Zijlstra  * is monotonic for constant i. The timestamp returned is in nanoseconds.
20391e43daSPeter Zijlstra  *
21391e43daSPeter Zijlstra  * ######################### BIG FAT WARNING ##########################
22391e43daSPeter Zijlstra  * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
23391e43daSPeter Zijlstra  * # go backwards !!                                                  #
24391e43daSPeter Zijlstra  * ####################################################################
25391e43daSPeter Zijlstra  *
26391e43daSPeter Zijlstra  * There is no strict promise about the base, although it tends to start
27391e43daSPeter Zijlstra  * at 0 on boot (but people really shouldn't rely on that).
28391e43daSPeter Zijlstra  *
29391e43daSPeter Zijlstra  * cpu_clock(i)       -- can be used from any context, including NMI.
3097fb7a0aSIngo Molnar  * local_clock()      -- is cpu_clock() on the current CPU.
31391e43daSPeter Zijlstra  *
32ef08f0ffSPeter Zijlstra  * sched_clock_cpu(i)
33ef08f0ffSPeter Zijlstra  *
3497fb7a0aSIngo Molnar  * How it is implemented:
35391e43daSPeter Zijlstra  *
36391e43daSPeter Zijlstra  * The implementation either uses sched_clock() when
37391e43daSPeter Zijlstra  * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
38391e43daSPeter Zijlstra  * sched_clock() is assumed to provide these properties (mostly it means
39391e43daSPeter Zijlstra  * the architecture provides a globally synchronized highres time source).
40391e43daSPeter Zijlstra  *
41391e43daSPeter Zijlstra  * Otherwise it tries to create a semi stable clock from a mixture of other
42391e43daSPeter Zijlstra  * clocks, including:
43391e43daSPeter Zijlstra  *
443b03706fSIngo Molnar  *  - GTOD (clock monotonic)
45391e43daSPeter Zijlstra  *  - sched_clock()
46391e43daSPeter Zijlstra  *  - explicit idle events
47391e43daSPeter Zijlstra  *
48391e43daSPeter Zijlstra  * We use GTOD as base and use sched_clock() deltas to improve resolution. The
49391e43daSPeter Zijlstra  * deltas are filtered to provide monotonicity and keeping it within an
50391e43daSPeter Zijlstra  * expected window.
51391e43daSPeter Zijlstra  *
52391e43daSPeter Zijlstra  * Furthermore, explicit sleep and wakeup hooks allow us to account for time
53391e43daSPeter Zijlstra  * that is otherwise invisible (TSC gets stopped).
54391e43daSPeter Zijlstra  *
55391e43daSPeter Zijlstra  */
56391e43daSPeter Zijlstra 
57391e43daSPeter Zijlstra /*
58391e43daSPeter Zijlstra  * Scheduler clock - returns current time in nanosec units.
59391e43daSPeter Zijlstra  * This is default implementation.
60391e43daSPeter Zijlstra  * Architectures and sub-architectures can override this.
61391e43daSPeter Zijlstra  */
sched_clock(void)62fa28abedSIngo Molnar notrace unsigned long long __weak sched_clock(void)
63391e43daSPeter Zijlstra {
64391e43daSPeter Zijlstra 	return (unsigned long long)(jiffies - INITIAL_JIFFIES)
65391e43daSPeter Zijlstra 					* (NSEC_PER_SEC / HZ);
66391e43daSPeter Zijlstra }
67391e43daSPeter Zijlstra EXPORT_SYMBOL_GPL(sched_clock);
68391e43daSPeter Zijlstra 
6946457ea4SPavel Tatashin static DEFINE_STATIC_KEY_FALSE(sched_clock_running);
70391e43daSPeter Zijlstra 
71391e43daSPeter Zijlstra #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
72acb04058SPeter Zijlstra /*
73acb04058SPeter Zijlstra  * We must start with !__sched_clock_stable because the unstable -> stable
74acb04058SPeter Zijlstra  * transition is accurate, while the stable -> unstable transition is not.
75acb04058SPeter Zijlstra  *
76acb04058SPeter Zijlstra  * Similarly we start with __sched_clock_stable_early, thereby assuming we
77acb04058SPeter Zijlstra  * will become stable, such that there's only a single 1 -> 0 transition.
78acb04058SPeter Zijlstra  */
79555570d7SPeter Zijlstra static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
80acb04058SPeter Zijlstra static int __sched_clock_stable_early = 1;
8135af99e6SPeter Zijlstra 
825680d809SPeter Zijlstra /*
83698eff63SPeter Zijlstra  * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset
845680d809SPeter Zijlstra  */
85698eff63SPeter Zijlstra __read_mostly u64 __sched_clock_offset;
86698eff63SPeter Zijlstra static __read_mostly u64 __gtod_offset;
875680d809SPeter Zijlstra 
885680d809SPeter Zijlstra struct sched_clock_data {
895680d809SPeter Zijlstra 	u64			tick_raw;
905680d809SPeter Zijlstra 	u64			tick_gtod;
915680d809SPeter Zijlstra 	u64			clock;
925680d809SPeter Zijlstra };
935680d809SPeter Zijlstra 
945680d809SPeter Zijlstra static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
955680d809SPeter Zijlstra 
this_scd(void)96776f2291SPeter Zijlstra static __always_inline struct sched_clock_data *this_scd(void)
975680d809SPeter Zijlstra {
985680d809SPeter Zijlstra 	return this_cpu_ptr(&sched_clock_data);
995680d809SPeter Zijlstra }
1005680d809SPeter Zijlstra 
cpu_sdc(int cpu)101fa28abedSIngo Molnar notrace static inline struct sched_clock_data *cpu_sdc(int cpu)
1025680d809SPeter Zijlstra {
1035680d809SPeter Zijlstra 	return &per_cpu(sched_clock_data, cpu);
1045680d809SPeter Zijlstra }
1055680d809SPeter Zijlstra 
sched_clock_stable(void)106fa28abedSIngo Molnar notrace int sched_clock_stable(void)
10735af99e6SPeter Zijlstra {
108555570d7SPeter Zijlstra 	return static_branch_likely(&__sched_clock_stable);
109d375b4e0SPeter Zijlstra }
110d375b4e0SPeter Zijlstra 
__scd_stamp(struct sched_clock_data * scd)111fa28abedSIngo Molnar notrace static void __scd_stamp(struct sched_clock_data *scd)
112cf15ca8dSPeter Zijlstra {
113cf15ca8dSPeter Zijlstra 	scd->tick_gtod = ktime_get_ns();
114cf15ca8dSPeter Zijlstra 	scd->tick_raw = sched_clock();
115cf15ca8dSPeter Zijlstra }
116cf15ca8dSPeter Zijlstra 
__set_sched_clock_stable(void)117fa28abedSIngo Molnar notrace static void __set_sched_clock_stable(void)
118d375b4e0SPeter Zijlstra {
11945aea321SPeter Zijlstra 	struct sched_clock_data *scd;
1205680d809SPeter Zijlstra 
1215680d809SPeter Zijlstra 	/*
12245aea321SPeter Zijlstra 	 * Since we're still unstable and the tick is already running, we have
12345aea321SPeter Zijlstra 	 * to disable IRQs in order to get a consistent scd->tick* reading.
12445aea321SPeter Zijlstra 	 */
12545aea321SPeter Zijlstra 	local_irq_disable();
12645aea321SPeter Zijlstra 	scd = this_scd();
12745aea321SPeter Zijlstra 	/*
1285680d809SPeter Zijlstra 	 * Attempt to make the (initial) unstable->stable transition continuous.
1295680d809SPeter Zijlstra 	 */
130698eff63SPeter Zijlstra 	__sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
13145aea321SPeter Zijlstra 	local_irq_enable();
1325680d809SPeter Zijlstra 
1335680d809SPeter Zijlstra 	printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
134698eff63SPeter Zijlstra 			scd->tick_gtod, __gtod_offset,
135698eff63SPeter Zijlstra 			scd->tick_raw,  __sched_clock_offset);
1365680d809SPeter Zijlstra 
137555570d7SPeter Zijlstra 	static_branch_enable(&__sched_clock_stable);
1384f49b90aSFrederic Weisbecker 	tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
13935af99e6SPeter Zijlstra }
14035af99e6SPeter Zijlstra 
141cf15ca8dSPeter Zijlstra /*
142cf15ca8dSPeter Zijlstra  * If we ever get here, we're screwed, because we found out -- typically after
143cf15ca8dSPeter Zijlstra  * the fact -- that TSC wasn't good. This means all our clocksources (including
144cf15ca8dSPeter Zijlstra  * ktime) could have reported wrong values.
145cf15ca8dSPeter Zijlstra  *
146cf15ca8dSPeter Zijlstra  * What we do here is an attempt to fix up and continue sort of where we left
147cf15ca8dSPeter Zijlstra  * off in a coherent manner.
148cf15ca8dSPeter Zijlstra  *
149cf15ca8dSPeter Zijlstra  * The only way to fully avoid random clock jumps is to boot with:
150cf15ca8dSPeter Zijlstra  * "tsc=unstable".
151cf15ca8dSPeter Zijlstra  */
__sched_clock_work(struct work_struct * work)152fa28abedSIngo Molnar notrace static void __sched_clock_work(struct work_struct *work)
15371fdb70eSPeter Zijlstra {
154cf15ca8dSPeter Zijlstra 	struct sched_clock_data *scd;
155cf15ca8dSPeter Zijlstra 	int cpu;
156cf15ca8dSPeter Zijlstra 
157cf15ca8dSPeter Zijlstra 	/* take a current timestamp and set 'now' */
158cf15ca8dSPeter Zijlstra 	preempt_disable();
159cf15ca8dSPeter Zijlstra 	scd = this_scd();
160cf15ca8dSPeter Zijlstra 	__scd_stamp(scd);
161cf15ca8dSPeter Zijlstra 	scd->clock = scd->tick_gtod + __gtod_offset;
162cf15ca8dSPeter Zijlstra 	preempt_enable();
163cf15ca8dSPeter Zijlstra 
164cf15ca8dSPeter Zijlstra 	/* clone to all CPUs */
165cf15ca8dSPeter Zijlstra 	for_each_possible_cpu(cpu)
166cf15ca8dSPeter Zijlstra 		per_cpu(sched_clock_data, cpu) = *scd;
167cf15ca8dSPeter Zijlstra 
1687708d5f0SPeter Zijlstra 	printk(KERN_WARNING "TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.\n");
169cf15ca8dSPeter Zijlstra 	printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
170cf15ca8dSPeter Zijlstra 			scd->tick_gtod, __gtod_offset,
171cf15ca8dSPeter Zijlstra 			scd->tick_raw,  __sched_clock_offset);
172cf15ca8dSPeter Zijlstra 
17371fdb70eSPeter Zijlstra 	static_branch_disable(&__sched_clock_stable);
17471fdb70eSPeter Zijlstra }
17571fdb70eSPeter Zijlstra 
17671fdb70eSPeter Zijlstra static DECLARE_WORK(sched_clock_work, __sched_clock_work);
17771fdb70eSPeter Zijlstra 
__clear_sched_clock_stable(void)178fa28abedSIngo Molnar notrace static void __clear_sched_clock_stable(void)
17935af99e6SPeter Zijlstra {
180cf15ca8dSPeter Zijlstra 	if (!sched_clock_stable())
181cf15ca8dSPeter Zijlstra 		return;
1825680d809SPeter Zijlstra 
1834f49b90aSFrederic Weisbecker 	tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
18471fdb70eSPeter Zijlstra 	schedule_work(&sched_clock_work);
18571fdb70eSPeter Zijlstra }
1866577e42aSPeter Zijlstra 
clear_sched_clock_stable(void)187fa28abedSIngo Molnar notrace void clear_sched_clock_stable(void)
1886577e42aSPeter Zijlstra {
189d375b4e0SPeter Zijlstra 	__sched_clock_stable_early = 0;
190d375b4e0SPeter Zijlstra 
1919881b024SPeter Zijlstra 	smp_mb(); /* matches sched_clock_init_late() */
192d375b4e0SPeter Zijlstra 
19346457ea4SPavel Tatashin 	if (static_key_count(&sched_clock_running.key) == 2)
19471fdb70eSPeter Zijlstra 		__clear_sched_clock_stable();
1956577e42aSPeter Zijlstra }
1966577e42aSPeter Zijlstra 
__sched_clock_gtod_offset(void)197fa28abedSIngo Molnar notrace static void __sched_clock_gtod_offset(void)
1985d2a4e91SPavel Tatashin {
1999407f5a7SPeter Zijlstra 	struct sched_clock_data *scd = this_scd();
2009407f5a7SPeter Zijlstra 
2019407f5a7SPeter Zijlstra 	__scd_stamp(scd);
2029407f5a7SPeter Zijlstra 	__gtod_offset = (scd->tick_raw + __sched_clock_offset) - scd->tick_gtod;
2035d2a4e91SPavel Tatashin }
2045d2a4e91SPavel Tatashin 
sched_clock_init(void)2055d2a4e91SPavel Tatashin void __init sched_clock_init(void)
2065d2a4e91SPavel Tatashin {
207857baa87SPavel Tatashin 	/*
208857baa87SPavel Tatashin 	 * Set __gtod_offset such that once we mark sched_clock_running,
209857baa87SPavel Tatashin 	 * sched_clock_tick() continues where sched_clock() left off.
210857baa87SPavel Tatashin 	 *
211857baa87SPavel Tatashin 	 * Even if TSC is buggered, we're still UP at this point so it
212857baa87SPavel Tatashin 	 * can't really be out of sync.
213857baa87SPavel Tatashin 	 */
2149407f5a7SPeter Zijlstra 	local_irq_disable();
215857baa87SPavel Tatashin 	__sched_clock_gtod_offset();
2169407f5a7SPeter Zijlstra 	local_irq_enable();
217857baa87SPavel Tatashin 
21846457ea4SPavel Tatashin 	static_branch_inc(&sched_clock_running);
2195d2a4e91SPavel Tatashin }
2202e44b7ddSPeter Zijlstra /*
2212e44b7ddSPeter Zijlstra  * We run this as late_initcall() such that it runs after all built-in drivers,
2222e44b7ddSPeter Zijlstra  * notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
2232e44b7ddSPeter Zijlstra  */
sched_clock_init_late(void)2242e44b7ddSPeter Zijlstra static int __init sched_clock_init_late(void)
225391e43daSPeter Zijlstra {
22646457ea4SPavel Tatashin 	static_branch_inc(&sched_clock_running);
227d375b4e0SPeter Zijlstra 	/*
228d375b4e0SPeter Zijlstra 	 * Ensure that it is impossible to not do a static_key update.
229d375b4e0SPeter Zijlstra 	 *
230d375b4e0SPeter Zijlstra 	 * Either {set,clear}_sched_clock_stable() must see sched_clock_running
231d375b4e0SPeter Zijlstra 	 * and do the update, or we must see their __sched_clock_stable_early
232d375b4e0SPeter Zijlstra 	 * and do the update, or both.
233d375b4e0SPeter Zijlstra 	 */
234d375b4e0SPeter Zijlstra 	smp_mb(); /* matches {set,clear}_sched_clock_stable() */
235d375b4e0SPeter Zijlstra 
236d375b4e0SPeter Zijlstra 	if (__sched_clock_stable_early)
237d375b4e0SPeter Zijlstra 		__set_sched_clock_stable();
2382e44b7ddSPeter Zijlstra 
2392e44b7ddSPeter Zijlstra 	return 0;
240391e43daSPeter Zijlstra }
2412e44b7ddSPeter Zijlstra late_initcall(sched_clock_init_late);
242391e43daSPeter Zijlstra 
243391e43daSPeter Zijlstra /*
244391e43daSPeter Zijlstra  * min, max except they take wrapping into account
245391e43daSPeter Zijlstra  */
246391e43daSPeter Zijlstra 
wrap_min(u64 x,u64 y)247776f2291SPeter Zijlstra static __always_inline u64 wrap_min(u64 x, u64 y)
248391e43daSPeter Zijlstra {
249391e43daSPeter Zijlstra 	return (s64)(x - y) < 0 ? x : y;
250391e43daSPeter Zijlstra }
251391e43daSPeter Zijlstra 
wrap_max(u64 x,u64 y)252776f2291SPeter Zijlstra static __always_inline u64 wrap_max(u64 x, u64 y)
253391e43daSPeter Zijlstra {
254391e43daSPeter Zijlstra 	return (s64)(x - y) > 0 ? x : y;
255391e43daSPeter Zijlstra }
256391e43daSPeter Zijlstra 
257391e43daSPeter Zijlstra /*
258391e43daSPeter Zijlstra  * update the percpu scd from the raw @now value
259391e43daSPeter Zijlstra  *
260391e43daSPeter Zijlstra  *  - filter out backward motion
261391e43daSPeter Zijlstra  *  - use the GTOD tick value to create a window to filter crazy TSC values
262391e43daSPeter Zijlstra  */
sched_clock_local(struct sched_clock_data * scd)263776f2291SPeter Zijlstra static __always_inline u64 sched_clock_local(struct sched_clock_data *scd)
264391e43daSPeter Zijlstra {
2657b09cc5aSPavel Tatashin 	u64 now, clock, old_clock, min_clock, max_clock, gtod;
266391e43daSPeter Zijlstra 	s64 delta;
267391e43daSPeter Zijlstra 
268391e43daSPeter Zijlstra again:
269391e43daSPeter Zijlstra 	now = sched_clock_noinstr();
270391e43daSPeter Zijlstra 	delta = now - scd->tick_raw;
271391e43daSPeter Zijlstra 	if (unlikely(delta < 0))
272391e43daSPeter Zijlstra 		delta = 0;
273391e43daSPeter Zijlstra 
274391e43daSPeter Zijlstra 	old_clock = scd->clock;
275391e43daSPeter Zijlstra 
276391e43daSPeter Zijlstra 	/*
277391e43daSPeter Zijlstra 	 * scd->clock = clamp(scd->tick_gtod + delta,
278391e43daSPeter Zijlstra 	 *		      max(scd->tick_gtod, scd->clock),
279391e43daSPeter Zijlstra 	 *		      scd->tick_gtod + TICK_NSEC);
280391e43daSPeter Zijlstra 	 */
281391e43daSPeter Zijlstra 
2827b09cc5aSPavel Tatashin 	gtod = scd->tick_gtod + __gtod_offset;
2837b09cc5aSPavel Tatashin 	clock = gtod + delta;
2847b09cc5aSPavel Tatashin 	min_clock = wrap_max(gtod, old_clock);
2857b09cc5aSPavel Tatashin 	max_clock = wrap_max(old_clock, gtod + TICK_NSEC);
286391e43daSPeter Zijlstra 
287391e43daSPeter Zijlstra 	clock = wrap_max(clock, min_clock);
288391e43daSPeter Zijlstra 	clock = wrap_min(clock, max_clock);
289391e43daSPeter Zijlstra 
290776f2291SPeter Zijlstra 	if (!raw_try_cmpxchg64(&scd->clock, &old_clock, clock))
291391e43daSPeter Zijlstra 		goto again;
292391e43daSPeter Zijlstra 
293391e43daSPeter Zijlstra 	return clock;
294391e43daSPeter Zijlstra }
295391e43daSPeter Zijlstra 
local_clock_noinstr(void)296776f2291SPeter Zijlstra noinstr u64 local_clock_noinstr(void)
297776f2291SPeter Zijlstra {
298776f2291SPeter Zijlstra 	u64 clock;
299776f2291SPeter Zijlstra 
300776f2291SPeter Zijlstra 	if (static_branch_likely(&__sched_clock_stable))
301776f2291SPeter Zijlstra 		return sched_clock_noinstr() + __sched_clock_offset;
302776f2291SPeter Zijlstra 
303*f31dcb15SAaron Thompson 	if (!static_branch_likely(&sched_clock_running))
304*f31dcb15SAaron Thompson 		return sched_clock_noinstr();
305*f31dcb15SAaron Thompson 
306776f2291SPeter Zijlstra 	clock = sched_clock_local(this_scd());
307776f2291SPeter Zijlstra 
308776f2291SPeter Zijlstra 	return clock;
309776f2291SPeter Zijlstra }
310776f2291SPeter Zijlstra 
local_clock(void)311776f2291SPeter Zijlstra u64 local_clock(void)
312776f2291SPeter Zijlstra {
313776f2291SPeter Zijlstra 	u64 now;
314776f2291SPeter Zijlstra 	preempt_disable_notrace();
315391e43daSPeter Zijlstra 	now = local_clock_noinstr();
316391e43daSPeter Zijlstra 	preempt_enable_notrace();
317391e43daSPeter Zijlstra 	return now;
318391e43daSPeter Zijlstra }
319391e43daSPeter Zijlstra EXPORT_SYMBOL_GPL(local_clock);
320a1cbcaa9SThomas Gleixner 
sched_clock_remote(struct sched_clock_data * scd)321a1cbcaa9SThomas Gleixner static notrace u64 sched_clock_remote(struct sched_clock_data *scd)
322a1cbcaa9SThomas Gleixner {
323a1cbcaa9SThomas Gleixner 	struct sched_clock_data *my_scd = this_scd();
324a1cbcaa9SThomas Gleixner 	u64 this_clock, remote_clock;
325a1cbcaa9SThomas Gleixner 	u64 *ptr, old_val, val;
326a1cbcaa9SThomas Gleixner 
327a1cbcaa9SThomas Gleixner #if BITS_PER_LONG != 64
328a1cbcaa9SThomas Gleixner again:
32997fb7a0aSIngo Molnar 	/*
330a1cbcaa9SThomas Gleixner 	 * Careful here: The local and the remote clock values need to
33197fb7a0aSIngo Molnar 	 * be read out atomic as we need to compare the values and
332a1cbcaa9SThomas Gleixner 	 * then update either the local or the remote side. So the
333a1cbcaa9SThomas Gleixner 	 * cmpxchg64 below only protects one readout.
334a1cbcaa9SThomas Gleixner 	 *
33597fb7a0aSIngo Molnar 	 * We must reread via sched_clock_local() in the retry case on
33697fb7a0aSIngo Molnar 	 * 32-bit kernels as an NMI could use sched_clock_local() via the
33797fb7a0aSIngo Molnar 	 * tracer and hit between the readout of
338a1cbcaa9SThomas Gleixner 	 * the low 32-bit and the high 32-bit portion.
339a1cbcaa9SThomas Gleixner 	 */
340a1cbcaa9SThomas Gleixner 	this_clock = sched_clock_local(my_scd);
341a1cbcaa9SThomas Gleixner 	/*
34297fb7a0aSIngo Molnar 	 * We must enforce atomic readout on 32-bit, otherwise the
34397fb7a0aSIngo Molnar 	 * update on the remote CPU can hit inbetween the readout of
344a1cbcaa9SThomas Gleixner 	 * the low 32-bit and the high 32-bit portion.
345391e43daSPeter Zijlstra 	 */
346391e43daSPeter Zijlstra 	remote_clock = cmpxchg64(&scd->clock, 0, 0);
347391e43daSPeter Zijlstra #else
348391e43daSPeter Zijlstra 	/*
349a1cbcaa9SThomas Gleixner 	 * On 64-bit kernels the read of [my]scd->clock is atomic versus the
350391e43daSPeter Zijlstra 	 * update, so we can avoid the above 32-bit dance.
351391e43daSPeter Zijlstra 	 */
352391e43daSPeter Zijlstra 	sched_clock_local(my_scd);
353391e43daSPeter Zijlstra again:
354391e43daSPeter Zijlstra 	this_clock = my_scd->clock;
355391e43daSPeter Zijlstra 	remote_clock = scd->clock;
356391e43daSPeter Zijlstra #endif
357391e43daSPeter Zijlstra 
358391e43daSPeter Zijlstra 	/*
359391e43daSPeter Zijlstra 	 * Use the opportunity that we have both locks
360391e43daSPeter Zijlstra 	 * taken to couple the two clocks: we take the
361391e43daSPeter Zijlstra 	 * larger time as the latest time for both
362391e43daSPeter Zijlstra 	 * runqueues. (this creates monotonic movement)
363391e43daSPeter Zijlstra 	 */
364391e43daSPeter Zijlstra 	if (likely((s64)(remote_clock - this_clock) < 0)) {
365391e43daSPeter Zijlstra 		ptr = &scd->clock;
366391e43daSPeter Zijlstra 		old_val = remote_clock;
367391e43daSPeter Zijlstra 		val = this_clock;
368391e43daSPeter Zijlstra 	} else {
369391e43daSPeter Zijlstra 		/*
3708491d1bdSUros Bizjak 		 * Should be rare, but possible:
371391e43daSPeter Zijlstra 		 */
372391e43daSPeter Zijlstra 		ptr = &my_scd->clock;
373391e43daSPeter Zijlstra 		old_val = this_clock;
374391e43daSPeter Zijlstra 		val = remote_clock;
375391e43daSPeter Zijlstra 	}
376391e43daSPeter Zijlstra 
377391e43daSPeter Zijlstra 	if (!try_cmpxchg64(ptr, &old_val, val))
378391e43daSPeter Zijlstra 		goto again;
379391e43daSPeter Zijlstra 
380391e43daSPeter Zijlstra 	return val;
381fa28abedSIngo Molnar }
382391e43daSPeter Zijlstra 
383391e43daSPeter Zijlstra /*
384391e43daSPeter Zijlstra  * Similar to cpu_clock(), but requires local IRQs to be disabled.
385391e43daSPeter Zijlstra  *
38635af99e6SPeter Zijlstra  * See cpu_clock().
387698eff63SPeter Zijlstra  */
sched_clock_cpu(int cpu)388391e43daSPeter Zijlstra notrace u64 sched_clock_cpu(int cpu)
389c5105d76SZhenzhong Duan {
390857baa87SPavel Tatashin 	struct sched_clock_data *scd;
391391e43daSPeter Zijlstra 	u64 clock;
39296b3d28bSFernando Luis Vazquez Cao 
393391e43daSPeter Zijlstra 	if (sched_clock_stable())
394391e43daSPeter Zijlstra 		return sched_clock() + __sched_clock_offset;
395391e43daSPeter Zijlstra 
396391e43daSPeter Zijlstra 	if (!static_branch_likely(&sched_clock_running))
397391e43daSPeter Zijlstra 		return sched_clock();
398391e43daSPeter Zijlstra 
39996b3d28bSFernando Luis Vazquez Cao 	preempt_disable_notrace();
400391e43daSPeter Zijlstra 	scd = cpu_sdc(cpu);
401391e43daSPeter Zijlstra 
402391e43daSPeter Zijlstra 	if (cpu != smp_processor_id())
4032c923e94SDaniel Lezcano 		clock = sched_clock_remote(scd);
404391e43daSPeter Zijlstra 	else
405fa28abedSIngo Molnar 		clock = sched_clock_local(scd);
406391e43daSPeter Zijlstra 	preempt_enable_notrace();
407391e43daSPeter Zijlstra 
408391e43daSPeter Zijlstra 	return clock;
409b421b22bSPeter Zijlstra }
410b421b22bSPeter Zijlstra EXPORT_SYMBOL_GPL(sched_clock_cpu);
411b421b22bSPeter Zijlstra 
sched_clock_tick(void)412c5105d76SZhenzhong Duan notrace void sched_clock_tick(void)
413b421b22bSPeter Zijlstra {
414b421b22bSPeter Zijlstra 	struct sched_clock_data *scd;
4152c11dba0SFrederic Weisbecker 
416391e43daSPeter Zijlstra 	if (sched_clock_stable())
417391e43daSPeter Zijlstra 		return;
418cf15ca8dSPeter Zijlstra 
419391e43daSPeter Zijlstra 	if (!static_branch_likely(&sched_clock_running))
420391e43daSPeter Zijlstra 		return;
421391e43daSPeter Zijlstra 
422fa28abedSIngo Molnar 	lockdep_assert_irqs_disabled();
423b421b22bSPeter Zijlstra 
424b421b22bSPeter Zijlstra 	scd = this_scd();
425b421b22bSPeter Zijlstra 	__scd_stamp(scd);
426b421b22bSPeter Zijlstra 	sched_clock_local(scd);
427b421b22bSPeter Zijlstra }
428b421b22bSPeter Zijlstra 
sched_clock_tick_stable(void)429b421b22bSPeter Zijlstra notrace void sched_clock_tick_stable(void)
430b421b22bSPeter Zijlstra {
431b421b22bSPeter Zijlstra 	if (!sched_clock_stable())
432b421b22bSPeter Zijlstra 		return;
433b421b22bSPeter Zijlstra 
434b421b22bSPeter Zijlstra 	/*
4355d2a4e91SPavel Tatashin 	 * Called under watchdog_lock.
436b421b22bSPeter Zijlstra 	 *
437b421b22bSPeter Zijlstra 	 * The watchdog just found this TSC to (still) be stable, so now is a
438b421b22bSPeter Zijlstra 	 * good moment to update our __gtod_offset. Because once we find the
439391e43daSPeter Zijlstra 	 * TSC to be unstable, any computation will be computing crap.
440391e43daSPeter Zijlstra 	 */
441391e43daSPeter Zijlstra 	local_irq_disable();
442fa28abedSIngo Molnar 	__sched_clock_gtod_offset();
443391e43daSPeter Zijlstra 	local_irq_enable();
444391e43daSPeter Zijlstra }
445391e43daSPeter Zijlstra 
446391e43daSPeter Zijlstra /*
447391e43daSPeter Zijlstra  * We are going deep-idle (irqs are disabled):
448391e43daSPeter Zijlstra  */
sched_clock_idle_sleep_event(void)449f9fccdb9SPeter Zijlstra notrace void sched_clock_idle_sleep_event(void)
450391e43daSPeter Zijlstra {
451fa28abedSIngo Molnar 	sched_clock_cpu(smp_processor_id());
452391e43daSPeter Zijlstra }
453f9fccdb9SPeter Zijlstra EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
454f9fccdb9SPeter Zijlstra 
455f9fccdb9SPeter Zijlstra /*
456391e43daSPeter Zijlstra  * We just idled; resync with ktime.
457391e43daSPeter Zijlstra  */
sched_clock_idle_wakeup_event(void)458f9fccdb9SPeter Zijlstra notrace void sched_clock_idle_wakeup_event(void)
459f9fccdb9SPeter Zijlstra {
460f9fccdb9SPeter Zijlstra 	unsigned long flags;
461f9fccdb9SPeter Zijlstra 
462391e43daSPeter Zijlstra 	if (sched_clock_stable())
463f9fccdb9SPeter Zijlstra 		return;
464391e43daSPeter Zijlstra 
465391e43daSPeter Zijlstra 	if (unlikely(timekeeping_suspended))
466391e43daSPeter Zijlstra 		return;
467391e43daSPeter Zijlstra 
468391e43daSPeter Zijlstra 	local_irq_save(flags);
4695d2a4e91SPavel Tatashin 	sched_clock_tick();
4705d2a4e91SPavel Tatashin 	local_irq_restore(flags);
47146457ea4SPavel Tatashin }
472bd9f943eSPavel Tatashin EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
4735d2a4e91SPavel Tatashin 
474bd9f943eSPavel Tatashin #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
4755d2a4e91SPavel Tatashin 
sched_clock_init(void)4765d2a4e91SPavel Tatashin void __init sched_clock_init(void)
477fa28abedSIngo Molnar {
478391e43daSPeter Zijlstra 	static_branch_inc(&sched_clock_running);
479c5105d76SZhenzhong Duan 	local_irq_disable();
480391e43daSPeter Zijlstra 	generic_sched_clock_init();
481391e43daSPeter Zijlstra 	local_irq_enable();
482391e43daSPeter Zijlstra }
483391e43daSPeter Zijlstra 
sched_clock_cpu(int cpu)4849881b024SPeter Zijlstra notrace u64 sched_clock_cpu(int cpu)
485391e43daSPeter Zijlstra {
486391e43daSPeter Zijlstra 	if (!static_branch_likely(&sched_clock_running))
487545a2bf7SCyril Bur 		return 0;
488545a2bf7SCyril Bur 
489545a2bf7SCyril Bur 	return sched_clock();
490545a2bf7SCyril Bur }
491545a2bf7SCyril Bur 
492545a2bf7SCyril Bur #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
493545a2bf7SCyril Bur 
494545a2bf7SCyril Bur /*
495fa28abedSIngo Molnar  * Running clock - returns the time that has elapsed while a guest has been
496545a2bf7SCyril Bur  * running.
497545a2bf7SCyril Bur  * On a guest this value should be local_clock minus the time the guest was
498545a2bf7SCyril Bur  * suspended by the hypervisor (for any reason).
499  * On bare metal this function should return the same as local_clock.
500  * Architectures and sub-architectures can override this.
501  */
running_clock(void)502 notrace u64 __weak running_clock(void)
503 {
504 	return local_clock();
505 }
506