xref: /openbmc/linux/kernel/time/tick-sched.c (revision 01b4c39901e087ceebae2733857248de81476bd8)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
4  *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
5  *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
6  *
7  *  No idle tick implementation for low and high resolution timers
8  *
9  *  Started by: Thomas Gleixner and Ingo Molnar
10  */
11 #include <linux/cpu.h>
12 #include <linux/err.h>
13 #include <linux/hrtimer.h>
14 #include <linux/interrupt.h>
15 #include <linux/kernel_stat.h>
16 #include <linux/percpu.h>
17 #include <linux/nmi.h>
18 #include <linux/profile.h>
19 #include <linux/sched/signal.h>
20 #include <linux/sched/clock.h>
21 #include <linux/sched/stat.h>
22 #include <linux/sched/nohz.h>
23 #include <linux/module.h>
24 #include <linux/irq_work.h>
25 #include <linux/posix-timers.h>
26 #include <linux/context_tracking.h>
27 #include <linux/mm.h>
28 
29 #include <asm/irq_regs.h>
30 
31 #include "tick-internal.h"
32 
33 #include <trace/events/timer.h>
34 
35 /*
36  * Per-CPU nohz control structure
37  */
38 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
39 
40 struct tick_sched *tick_get_tick_sched(int cpu)
41 {
42 	return &per_cpu(tick_cpu_sched, cpu);
43 }
44 
45 #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
46 /*
47  * The time, when the last jiffy update happened. Protected by jiffies_lock.
48  */
49 static ktime_t last_jiffies_update;
50 
51 /*
52  * Must be called with interrupts disabled !
53  */
54 static void tick_do_update_jiffies64(ktime_t now)
55 {
56 	unsigned long ticks = 0;
57 	ktime_t delta;
58 
59 	/*
60 	 * Do a quick check without holding jiffies_lock:
61 	 */
62 	delta = ktime_sub(now, last_jiffies_update);
63 	if (delta < tick_period)
64 		return;
65 
66 	/* Reevaluate with jiffies_lock held */
67 	write_seqlock(&jiffies_lock);
68 
69 	delta = ktime_sub(now, last_jiffies_update);
70 	if (delta >= tick_period) {
71 
72 		delta = ktime_sub(delta, tick_period);
73 		last_jiffies_update = ktime_add(last_jiffies_update,
74 						tick_period);
75 
76 		/* Slow path for long timeouts */
77 		if (unlikely(delta >= tick_period)) {
78 			s64 incr = ktime_to_ns(tick_period);
79 
80 			ticks = ktime_divns(delta, incr);
81 
82 			last_jiffies_update = ktime_add_ns(last_jiffies_update,
83 							   incr * ticks);
84 		}
85 		do_timer(++ticks);
86 
87 		/* Keep the tick_next_period variable up to date */
88 		tick_next_period = ktime_add(last_jiffies_update, tick_period);
89 	} else {
90 		write_sequnlock(&jiffies_lock);
91 		return;
92 	}
93 	write_sequnlock(&jiffies_lock);
94 	update_wall_time();
95 }
96 
97 /*
98  * Initialize and return retrieve the jiffies update.
99  */
100 static ktime_t tick_init_jiffy_update(void)
101 {
102 	ktime_t period;
103 
104 	write_seqlock(&jiffies_lock);
105 	/* Did we start the jiffies update yet ? */
106 	if (last_jiffies_update == 0)
107 		last_jiffies_update = tick_next_period;
108 	period = last_jiffies_update;
109 	write_sequnlock(&jiffies_lock);
110 	return period;
111 }
112 
113 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
114 {
115 	int cpu = smp_processor_id();
116 
117 #ifdef CONFIG_NO_HZ_COMMON
118 	/*
119 	 * Check if the do_timer duty was dropped. We don't care about
120 	 * concurrency: This happens only when the CPU in charge went
121 	 * into a long sleep. If two CPUs happen to assign themselves to
122 	 * this duty, then the jiffies update is still serialized by
123 	 * jiffies_lock.
124 	 *
125 	 * If nohz_full is enabled, this should not happen because the
126 	 * tick_do_timer_cpu never relinquishes.
127 	 */
128 	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
129 #ifdef CONFIG_NO_HZ_FULL
130 		WARN_ON(tick_nohz_full_running);
131 #endif
132 		tick_do_timer_cpu = cpu;
133 	}
134 #endif
135 
136 	/* Check, if the jiffies need an update */
137 	if (tick_do_timer_cpu == cpu)
138 		tick_do_update_jiffies64(now);
139 
140 	if (ts->inidle)
141 		ts->got_idle_tick = 1;
142 }
143 
144 static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
145 {
146 #ifdef CONFIG_NO_HZ_COMMON
147 	/*
148 	 * When we are idle and the tick is stopped, we have to touch
149 	 * the watchdog as we might not schedule for a really long
150 	 * time. This happens on complete idle SMP systems while
151 	 * waiting on the login prompt. We also increment the "start of
152 	 * idle" jiffy stamp so the idle accounting adjustment we do
153 	 * when we go busy again does not account too much ticks.
154 	 */
155 	if (ts->tick_stopped) {
156 		touch_softlockup_watchdog_sched();
157 		if (is_idle_task(current))
158 			ts->idle_jiffies++;
159 		/*
160 		 * In case the current tick fired too early past its expected
161 		 * expiration, make sure we don't bypass the next clock reprogramming
162 		 * to the same deadline.
163 		 */
164 		ts->next_tick = 0;
165 	}
166 #endif
167 	update_process_times(user_mode(regs));
168 	profile_tick(CPU_PROFILING);
169 }
170 #endif
171 
172 #ifdef CONFIG_NO_HZ_FULL
173 cpumask_var_t tick_nohz_full_mask;
174 bool tick_nohz_full_running;
175 static atomic_t tick_dep_mask;
176 
177 static bool check_tick_dependency(atomic_t *dep)
178 {
179 	int val = atomic_read(dep);
180 
181 	if (val & TICK_DEP_MASK_POSIX_TIMER) {
182 		trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
183 		return true;
184 	}
185 
186 	if (val & TICK_DEP_MASK_PERF_EVENTS) {
187 		trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
188 		return true;
189 	}
190 
191 	if (val & TICK_DEP_MASK_SCHED) {
192 		trace_tick_stop(0, TICK_DEP_MASK_SCHED);
193 		return true;
194 	}
195 
196 	if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
197 		trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
198 		return true;
199 	}
200 
201 	if (val & TICK_DEP_MASK_RCU) {
202 		trace_tick_stop(0, TICK_DEP_MASK_RCU);
203 		return true;
204 	}
205 
206 	return false;
207 }
208 
209 static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
210 {
211 	lockdep_assert_irqs_disabled();
212 
213 	if (unlikely(!cpu_online(cpu)))
214 		return false;
215 
216 	if (check_tick_dependency(&tick_dep_mask))
217 		return false;
218 
219 	if (check_tick_dependency(&ts->tick_dep_mask))
220 		return false;
221 
222 	if (check_tick_dependency(&current->tick_dep_mask))
223 		return false;
224 
225 	if (check_tick_dependency(&current->signal->tick_dep_mask))
226 		return false;
227 
228 	return true;
229 }
230 
231 static void nohz_full_kick_func(struct irq_work *work)
232 {
233 	/* Empty, the tick restart happens on tick_nohz_irq_exit() */
234 }
235 
236 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
237 	.func = nohz_full_kick_func,
238 };
239 
240 /*
241  * Kick this CPU if it's full dynticks in order to force it to
242  * re-evaluate its dependency on the tick and restart it if necessary.
243  * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
244  * is NMI safe.
245  */
246 static void tick_nohz_full_kick(void)
247 {
248 	if (!tick_nohz_full_cpu(smp_processor_id()))
249 		return;
250 
251 	irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
252 }
253 
254 /*
255  * Kick the CPU if it's full dynticks in order to force it to
256  * re-evaluate its dependency on the tick and restart it if necessary.
257  */
258 void tick_nohz_full_kick_cpu(int cpu)
259 {
260 	if (!tick_nohz_full_cpu(cpu))
261 		return;
262 
263 	irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
264 }
265 
266 /*
267  * Kick all full dynticks CPUs in order to force these to re-evaluate
268  * their dependency on the tick and restart it if necessary.
269  */
270 static void tick_nohz_full_kick_all(void)
271 {
272 	int cpu;
273 
274 	if (!tick_nohz_full_running)
275 		return;
276 
277 	preempt_disable();
278 	for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
279 		tick_nohz_full_kick_cpu(cpu);
280 	preempt_enable();
281 }
282 
283 static void tick_nohz_dep_set_all(atomic_t *dep,
284 				  enum tick_dep_bits bit)
285 {
286 	int prev;
287 
288 	prev = atomic_fetch_or(BIT(bit), dep);
289 	if (!prev)
290 		tick_nohz_full_kick_all();
291 }
292 
293 /*
294  * Set a global tick dependency. Used by perf events that rely on freq and
295  * by unstable clock.
296  */
297 void tick_nohz_dep_set(enum tick_dep_bits bit)
298 {
299 	tick_nohz_dep_set_all(&tick_dep_mask, bit);
300 }
301 
302 void tick_nohz_dep_clear(enum tick_dep_bits bit)
303 {
304 	atomic_andnot(BIT(bit), &tick_dep_mask);
305 }
306 
307 /*
308  * Set per-CPU tick dependency. Used by scheduler and perf events in order to
309  * manage events throttling.
310  */
311 void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
312 {
313 	int prev;
314 	struct tick_sched *ts;
315 
316 	ts = per_cpu_ptr(&tick_cpu_sched, cpu);
317 
318 	prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
319 	if (!prev) {
320 		preempt_disable();
321 		/* Perf needs local kick that is NMI safe */
322 		if (cpu == smp_processor_id()) {
323 			tick_nohz_full_kick();
324 		} else {
325 			/* Remote irq work not NMI-safe */
326 			if (!WARN_ON_ONCE(in_nmi()))
327 				tick_nohz_full_kick_cpu(cpu);
328 		}
329 		preempt_enable();
330 	}
331 }
332 EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
333 
334 void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
335 {
336 	struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
337 
338 	atomic_andnot(BIT(bit), &ts->tick_dep_mask);
339 }
340 EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
341 
342 /*
343  * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
344  * per task timers.
345  */
346 void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
347 {
348 	/*
349 	 * We could optimize this with just kicking the target running the task
350 	 * if that noise matters for nohz full users.
351 	 */
352 	tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
353 }
354 
355 void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
356 {
357 	atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
358 }
359 
360 /*
361  * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
362  * per process timers.
363  */
364 void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
365 {
366 	tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
367 }
368 
369 void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
370 {
371 	atomic_andnot(BIT(bit), &sig->tick_dep_mask);
372 }
373 
374 /*
375  * Re-evaluate the need for the tick as we switch the current task.
376  * It might need the tick due to per task/process properties:
377  * perf events, posix CPU timers, ...
378  */
379 void __tick_nohz_task_switch(void)
380 {
381 	unsigned long flags;
382 	struct tick_sched *ts;
383 
384 	local_irq_save(flags);
385 
386 	if (!tick_nohz_full_cpu(smp_processor_id()))
387 		goto out;
388 
389 	ts = this_cpu_ptr(&tick_cpu_sched);
390 
391 	if (ts->tick_stopped) {
392 		if (atomic_read(&current->tick_dep_mask) ||
393 		    atomic_read(&current->signal->tick_dep_mask))
394 			tick_nohz_full_kick();
395 	}
396 out:
397 	local_irq_restore(flags);
398 }
399 
400 /* Get the boot-time nohz CPU list from the kernel parameters. */
401 void __init tick_nohz_full_setup(cpumask_var_t cpumask)
402 {
403 	alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
404 	cpumask_copy(tick_nohz_full_mask, cpumask);
405 	tick_nohz_full_running = true;
406 }
407 
408 static int tick_nohz_cpu_down(unsigned int cpu)
409 {
410 	/*
411 	 * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
412 	 * timers, workqueues, timekeeping, ...) on behalf of full dynticks
413 	 * CPUs. It must remain online when nohz full is enabled.
414 	 */
415 	if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
416 		return -EBUSY;
417 	return 0;
418 }
419 
420 void __init tick_nohz_init(void)
421 {
422 	int cpu, ret;
423 
424 	if (!tick_nohz_full_running)
425 		return;
426 
427 	/*
428 	 * Full dynticks uses irq work to drive the tick rescheduling on safe
429 	 * locking contexts. But then we need irq work to raise its own
430 	 * interrupts to avoid circular dependency on the tick
431 	 */
432 	if (!arch_irq_work_has_interrupt()) {
433 		pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
434 		cpumask_clear(tick_nohz_full_mask);
435 		tick_nohz_full_running = false;
436 		return;
437 	}
438 
439 	if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
440 			!IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
441 		cpu = smp_processor_id();
442 
443 		if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
444 			pr_warn("NO_HZ: Clearing %d from nohz_full range "
445 				"for timekeeping\n", cpu);
446 			cpumask_clear_cpu(cpu, tick_nohz_full_mask);
447 		}
448 	}
449 
450 	for_each_cpu(cpu, tick_nohz_full_mask)
451 		context_tracking_cpu_set(cpu);
452 
453 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
454 					"kernel/nohz:predown", NULL,
455 					tick_nohz_cpu_down);
456 	WARN_ON(ret < 0);
457 	pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
458 		cpumask_pr_args(tick_nohz_full_mask));
459 }
460 #endif
461 
462 /*
463  * NOHZ - aka dynamic tick functionality
464  */
465 #ifdef CONFIG_NO_HZ_COMMON
466 /*
467  * NO HZ enabled ?
468  */
469 bool tick_nohz_enabled __read_mostly  = true;
470 unsigned long tick_nohz_active  __read_mostly;
471 /*
472  * Enable / Disable tickless mode
473  */
474 static int __init setup_tick_nohz(char *str)
475 {
476 	return (kstrtobool(str, &tick_nohz_enabled) == 0);
477 }
478 
479 __setup("nohz=", setup_tick_nohz);
480 
481 bool tick_nohz_tick_stopped(void)
482 {
483 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
484 
485 	return ts->tick_stopped;
486 }
487 
488 bool tick_nohz_tick_stopped_cpu(int cpu)
489 {
490 	struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
491 
492 	return ts->tick_stopped;
493 }
494 
495 /**
496  * tick_nohz_update_jiffies - update jiffies when idle was interrupted
497  *
498  * Called from interrupt entry when the CPU was idle
499  *
500  * In case the sched_tick was stopped on this CPU, we have to check if jiffies
501  * must be updated. Otherwise an interrupt handler could use a stale jiffy
502  * value. We do this unconditionally on any CPU, as we don't know whether the
503  * CPU, which has the update task assigned is in a long sleep.
504  */
505 static void tick_nohz_update_jiffies(ktime_t now)
506 {
507 	unsigned long flags;
508 
509 	__this_cpu_write(tick_cpu_sched.idle_waketime, now);
510 
511 	local_irq_save(flags);
512 	tick_do_update_jiffies64(now);
513 	local_irq_restore(flags);
514 
515 	touch_softlockup_watchdog_sched();
516 }
517 
518 /*
519  * Updates the per-CPU time idle statistics counters
520  */
521 static void
522 update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
523 {
524 	ktime_t delta;
525 
526 	if (ts->idle_active) {
527 		delta = ktime_sub(now, ts->idle_entrytime);
528 		if (nr_iowait_cpu(cpu) > 0)
529 			ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
530 		else
531 			ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
532 		ts->idle_entrytime = now;
533 	}
534 
535 	if (last_update_time)
536 		*last_update_time = ktime_to_us(now);
537 
538 }
539 
540 static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
541 {
542 	update_ts_time_stats(smp_processor_id(), ts, now, NULL);
543 	ts->idle_active = 0;
544 
545 	sched_clock_idle_wakeup_event();
546 }
547 
548 static void tick_nohz_start_idle(struct tick_sched *ts)
549 {
550 	ts->idle_entrytime = ktime_get();
551 	ts->idle_active = 1;
552 	sched_clock_idle_sleep_event();
553 }
554 
555 /**
556  * get_cpu_idle_time_us - get the total idle time of a CPU
557  * @cpu: CPU number to query
558  * @last_update_time: variable to store update time in. Do not update
559  * counters if NULL.
560  *
561  * Return the cumulative idle time (since boot) for a given
562  * CPU, in microseconds.
563  *
564  * This time is measured via accounting rather than sampling,
565  * and is as accurate as ktime_get() is.
566  *
567  * This function returns -1 if NOHZ is not enabled.
568  */
569 u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
570 {
571 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
572 	ktime_t now, idle;
573 
574 	if (!tick_nohz_active)
575 		return -1;
576 
577 	now = ktime_get();
578 	if (last_update_time) {
579 		update_ts_time_stats(cpu, ts, now, last_update_time);
580 		idle = ts->idle_sleeptime;
581 	} else {
582 		if (ts->idle_active && !nr_iowait_cpu(cpu)) {
583 			ktime_t delta = ktime_sub(now, ts->idle_entrytime);
584 
585 			idle = ktime_add(ts->idle_sleeptime, delta);
586 		} else {
587 			idle = ts->idle_sleeptime;
588 		}
589 	}
590 
591 	return ktime_to_us(idle);
592 
593 }
594 EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
595 
596 /**
597  * get_cpu_iowait_time_us - get the total iowait time of a CPU
598  * @cpu: CPU number to query
599  * @last_update_time: variable to store update time in. Do not update
600  * counters if NULL.
601  *
602  * Return the cumulative iowait time (since boot) for a given
603  * CPU, in microseconds.
604  *
605  * This time is measured via accounting rather than sampling,
606  * and is as accurate as ktime_get() is.
607  *
608  * This function returns -1 if NOHZ is not enabled.
609  */
610 u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
611 {
612 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
613 	ktime_t now, iowait;
614 
615 	if (!tick_nohz_active)
616 		return -1;
617 
618 	now = ktime_get();
619 	if (last_update_time) {
620 		update_ts_time_stats(cpu, ts, now, last_update_time);
621 		iowait = ts->iowait_sleeptime;
622 	} else {
623 		if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
624 			ktime_t delta = ktime_sub(now, ts->idle_entrytime);
625 
626 			iowait = ktime_add(ts->iowait_sleeptime, delta);
627 		} else {
628 			iowait = ts->iowait_sleeptime;
629 		}
630 	}
631 
632 	return ktime_to_us(iowait);
633 }
634 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
635 
636 static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
637 {
638 	hrtimer_cancel(&ts->sched_timer);
639 	hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
640 
641 	/* Forward the time to expire in the future */
642 	hrtimer_forward(&ts->sched_timer, now, tick_period);
643 
644 	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
645 		hrtimer_start_expires(&ts->sched_timer,
646 				      HRTIMER_MODE_ABS_PINNED_HARD);
647 	} else {
648 		tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
649 	}
650 
651 	/*
652 	 * Reset to make sure next tick stop doesn't get fooled by past
653 	 * cached clock deadline.
654 	 */
655 	ts->next_tick = 0;
656 }
657 
658 static inline bool local_timer_softirq_pending(void)
659 {
660 	return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
661 }
662 
663 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
664 {
665 	u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
666 	unsigned long basejiff;
667 	unsigned int seq;
668 
669 	/* Read jiffies and the time when jiffies were updated last */
670 	do {
671 		seq = read_seqbegin(&jiffies_lock);
672 		basemono = last_jiffies_update;
673 		basejiff = jiffies;
674 	} while (read_seqretry(&jiffies_lock, seq));
675 	ts->last_jiffies = basejiff;
676 	ts->timer_expires_base = basemono;
677 
678 	/*
679 	 * Keep the periodic tick, when RCU, architecture or irq_work
680 	 * requests it.
681 	 * Aside of that check whether the local timer softirq is
682 	 * pending. If so its a bad idea to call get_next_timer_interrupt()
683 	 * because there is an already expired timer, so it will request
684 	 * immeditate expiry, which rearms the hardware timer with a
685 	 * minimal delta which brings us back to this place
686 	 * immediately. Lather, rinse and repeat...
687 	 */
688 	if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
689 	    irq_work_needs_cpu() || local_timer_softirq_pending()) {
690 		next_tick = basemono + TICK_NSEC;
691 	} else {
692 		/*
693 		 * Get the next pending timer. If high resolution
694 		 * timers are enabled this only takes the timer wheel
695 		 * timers into account. If high resolution timers are
696 		 * disabled this also looks at the next expiring
697 		 * hrtimer.
698 		 */
699 		next_tmr = get_next_timer_interrupt(basejiff, basemono);
700 		ts->next_timer = next_tmr;
701 		/* Take the next rcu event into account */
702 		next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
703 	}
704 
705 	/*
706 	 * If the tick is due in the next period, keep it ticking or
707 	 * force prod the timer.
708 	 */
709 	delta = next_tick - basemono;
710 	if (delta <= (u64)TICK_NSEC) {
711 		/*
712 		 * Tell the timer code that the base is not idle, i.e. undo
713 		 * the effect of get_next_timer_interrupt():
714 		 */
715 		timer_clear_idle();
716 		/*
717 		 * We've not stopped the tick yet, and there's a timer in the
718 		 * next period, so no point in stopping it either, bail.
719 		 */
720 		if (!ts->tick_stopped) {
721 			ts->timer_expires = 0;
722 			goto out;
723 		}
724 	}
725 
726 	/*
727 	 * If this CPU is the one which had the do_timer() duty last, we limit
728 	 * the sleep time to the timekeeping max_deferment value.
729 	 * Otherwise we can sleep as long as we want.
730 	 */
731 	delta = timekeeping_max_deferment();
732 	if (cpu != tick_do_timer_cpu &&
733 	    (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
734 		delta = KTIME_MAX;
735 
736 	/* Calculate the next expiry time */
737 	if (delta < (KTIME_MAX - basemono))
738 		expires = basemono + delta;
739 	else
740 		expires = KTIME_MAX;
741 
742 	ts->timer_expires = min_t(u64, expires, next_tick);
743 
744 out:
745 	return ts->timer_expires;
746 }
747 
748 static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
749 {
750 	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
751 	u64 basemono = ts->timer_expires_base;
752 	u64 expires = ts->timer_expires;
753 	ktime_t tick = expires;
754 
755 	/* Make sure we won't be trying to stop it twice in a row. */
756 	ts->timer_expires_base = 0;
757 
758 	/*
759 	 * If this CPU is the one which updates jiffies, then give up
760 	 * the assignment and let it be taken by the CPU which runs
761 	 * the tick timer next, which might be this CPU as well. If we
762 	 * don't drop this here the jiffies might be stale and
763 	 * do_timer() never invoked. Keep track of the fact that it
764 	 * was the one which had the do_timer() duty last.
765 	 */
766 	if (cpu == tick_do_timer_cpu) {
767 		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
768 		ts->do_timer_last = 1;
769 	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
770 		ts->do_timer_last = 0;
771 	}
772 
773 	/* Skip reprogram of event if its not changed */
774 	if (ts->tick_stopped && (expires == ts->next_tick)) {
775 		/* Sanity check: make sure clockevent is actually programmed */
776 		if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
777 			return;
778 
779 		WARN_ON_ONCE(1);
780 		printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
781 			    basemono, ts->next_tick, dev->next_event,
782 			    hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
783 	}
784 
785 	/*
786 	 * nohz_stop_sched_tick can be called several times before
787 	 * the nohz_restart_sched_tick is called. This happens when
788 	 * interrupts arrive which do not cause a reschedule. In the
789 	 * first call we save the current tick time, so we can restart
790 	 * the scheduler tick in nohz_restart_sched_tick.
791 	 */
792 	if (!ts->tick_stopped) {
793 		calc_load_nohz_start();
794 		quiet_vmstat();
795 
796 		ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
797 		ts->tick_stopped = 1;
798 		trace_tick_stop(1, TICK_DEP_MASK_NONE);
799 	}
800 
801 	ts->next_tick = tick;
802 
803 	/*
804 	 * If the expiration time == KTIME_MAX, then we simply stop
805 	 * the tick timer.
806 	 */
807 	if (unlikely(expires == KTIME_MAX)) {
808 		if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
809 			hrtimer_cancel(&ts->sched_timer);
810 		return;
811 	}
812 
813 	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
814 		hrtimer_start(&ts->sched_timer, tick,
815 			      HRTIMER_MODE_ABS_PINNED_HARD);
816 	} else {
817 		hrtimer_set_expires(&ts->sched_timer, tick);
818 		tick_program_event(tick, 1);
819 	}
820 }
821 
822 static void tick_nohz_retain_tick(struct tick_sched *ts)
823 {
824 	ts->timer_expires_base = 0;
825 }
826 
827 #ifdef CONFIG_NO_HZ_FULL
828 static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
829 {
830 	if (tick_nohz_next_event(ts, cpu))
831 		tick_nohz_stop_tick(ts, cpu);
832 	else
833 		tick_nohz_retain_tick(ts);
834 }
835 #endif /* CONFIG_NO_HZ_FULL */
836 
837 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
838 {
839 	/* Update jiffies first */
840 	tick_do_update_jiffies64(now);
841 
842 	/*
843 	 * Clear the timer idle flag, so we avoid IPIs on remote queueing and
844 	 * the clock forward checks in the enqueue path:
845 	 */
846 	timer_clear_idle();
847 
848 	calc_load_nohz_stop();
849 	touch_softlockup_watchdog_sched();
850 	/*
851 	 * Cancel the scheduled timer and restore the tick
852 	 */
853 	ts->tick_stopped  = 0;
854 	ts->idle_exittime = now;
855 
856 	tick_nohz_restart(ts, now);
857 }
858 
859 static void tick_nohz_full_update_tick(struct tick_sched *ts)
860 {
861 #ifdef CONFIG_NO_HZ_FULL
862 	int cpu = smp_processor_id();
863 
864 	if (!tick_nohz_full_cpu(cpu))
865 		return;
866 
867 	if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
868 		return;
869 
870 	if (can_stop_full_tick(cpu, ts))
871 		tick_nohz_stop_sched_tick(ts, cpu);
872 	else if (ts->tick_stopped)
873 		tick_nohz_restart_sched_tick(ts, ktime_get());
874 #endif
875 }
876 
877 static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
878 {
879 	/*
880 	 * If this CPU is offline and it is the one which updates
881 	 * jiffies, then give up the assignment and let it be taken by
882 	 * the CPU which runs the tick timer next. If we don't drop
883 	 * this here the jiffies might be stale and do_timer() never
884 	 * invoked.
885 	 */
886 	if (unlikely(!cpu_online(cpu))) {
887 		if (cpu == tick_do_timer_cpu)
888 			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
889 		/*
890 		 * Make sure the CPU doesn't get fooled by obsolete tick
891 		 * deadline if it comes back online later.
892 		 */
893 		ts->next_tick = 0;
894 		return false;
895 	}
896 
897 	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
898 		return false;
899 
900 	if (need_resched())
901 		return false;
902 
903 	if (unlikely(local_softirq_pending())) {
904 		static int ratelimit;
905 
906 		if (ratelimit < 10 &&
907 		    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
908 			pr_warn("NOHZ: local_softirq_pending %02x\n",
909 				(unsigned int) local_softirq_pending());
910 			ratelimit++;
911 		}
912 		return false;
913 	}
914 
915 	if (tick_nohz_full_enabled()) {
916 		/*
917 		 * Keep the tick alive to guarantee timekeeping progression
918 		 * if there are full dynticks CPUs around
919 		 */
920 		if (tick_do_timer_cpu == cpu)
921 			return false;
922 		/*
923 		 * Boot safety: make sure the timekeeping duty has been
924 		 * assigned before entering dyntick-idle mode,
925 		 * tick_do_timer_cpu is TICK_DO_TIMER_BOOT
926 		 */
927 		if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT))
928 			return false;
929 
930 		/* Should not happen for nohz-full */
931 		if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
932 			return false;
933 	}
934 
935 	return true;
936 }
937 
938 static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
939 {
940 	ktime_t expires;
941 	int cpu = smp_processor_id();
942 
943 	/*
944 	 * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
945 	 * tick timer expiration time is known already.
946 	 */
947 	if (ts->timer_expires_base)
948 		expires = ts->timer_expires;
949 	else if (can_stop_idle_tick(cpu, ts))
950 		expires = tick_nohz_next_event(ts, cpu);
951 	else
952 		return;
953 
954 	ts->idle_calls++;
955 
956 	if (expires > 0LL) {
957 		int was_stopped = ts->tick_stopped;
958 
959 		tick_nohz_stop_tick(ts, cpu);
960 
961 		ts->idle_sleeps++;
962 		ts->idle_expires = expires;
963 
964 		if (!was_stopped && ts->tick_stopped) {
965 			ts->idle_jiffies = ts->last_jiffies;
966 			nohz_balance_enter_idle(cpu);
967 		}
968 	} else {
969 		tick_nohz_retain_tick(ts);
970 	}
971 }
972 
973 /**
974  * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
975  *
976  * When the next event is more than a tick into the future, stop the idle tick
977  */
978 void tick_nohz_idle_stop_tick(void)
979 {
980 	__tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
981 }
982 
983 void tick_nohz_idle_retain_tick(void)
984 {
985 	tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
986 	/*
987 	 * Undo the effect of get_next_timer_interrupt() called from
988 	 * tick_nohz_next_event().
989 	 */
990 	timer_clear_idle();
991 }
992 
993 /**
994  * tick_nohz_idle_enter - prepare for entering idle on the current CPU
995  *
996  * Called when we start the idle loop.
997  */
998 void tick_nohz_idle_enter(void)
999 {
1000 	struct tick_sched *ts;
1001 
1002 	lockdep_assert_irqs_enabled();
1003 
1004 	local_irq_disable();
1005 
1006 	ts = this_cpu_ptr(&tick_cpu_sched);
1007 
1008 	WARN_ON_ONCE(ts->timer_expires_base);
1009 
1010 	ts->inidle = 1;
1011 	tick_nohz_start_idle(ts);
1012 
1013 	local_irq_enable();
1014 }
1015 
1016 /**
1017  * tick_nohz_irq_exit - update next tick event from interrupt exit
1018  *
1019  * When an interrupt fires while we are idle and it doesn't cause
1020  * a reschedule, it may still add, modify or delete a timer, enqueue
1021  * an RCU callback, etc...
1022  * So we need to re-calculate and reprogram the next tick event.
1023  */
1024 void tick_nohz_irq_exit(void)
1025 {
1026 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1027 
1028 	if (ts->inidle)
1029 		tick_nohz_start_idle(ts);
1030 	else
1031 		tick_nohz_full_update_tick(ts);
1032 }
1033 
1034 /**
1035  * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
1036  */
1037 bool tick_nohz_idle_got_tick(void)
1038 {
1039 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1040 
1041 	if (ts->got_idle_tick) {
1042 		ts->got_idle_tick = 0;
1043 		return true;
1044 	}
1045 	return false;
1046 }
1047 
1048 /**
1049  * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
1050  * or the tick, whatever that expires first. Note that, if the tick has been
1051  * stopped, it returns the next hrtimer.
1052  *
1053  * Called from power state control code with interrupts disabled
1054  */
1055 ktime_t tick_nohz_get_next_hrtimer(void)
1056 {
1057 	return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
1058 }
1059 
1060 /**
1061  * tick_nohz_get_sleep_length - return the expected length of the current sleep
1062  * @delta_next: duration until the next event if the tick cannot be stopped
1063  *
1064  * Called from power state control code with interrupts disabled
1065  */
1066 ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
1067 {
1068 	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
1069 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1070 	int cpu = smp_processor_id();
1071 	/*
1072 	 * The idle entry time is expected to be a sufficient approximation of
1073 	 * the current time at this point.
1074 	 */
1075 	ktime_t now = ts->idle_entrytime;
1076 	ktime_t next_event;
1077 
1078 	WARN_ON_ONCE(!ts->inidle);
1079 
1080 	*delta_next = ktime_sub(dev->next_event, now);
1081 
1082 	if (!can_stop_idle_tick(cpu, ts))
1083 		return *delta_next;
1084 
1085 	next_event = tick_nohz_next_event(ts, cpu);
1086 	if (!next_event)
1087 		return *delta_next;
1088 
1089 	/*
1090 	 * If the next highres timer to expire is earlier than next_event, the
1091 	 * idle governor needs to know that.
1092 	 */
1093 	next_event = min_t(u64, next_event,
1094 			   hrtimer_next_event_without(&ts->sched_timer));
1095 
1096 	return ktime_sub(next_event, now);
1097 }
1098 
1099 /**
1100  * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
1101  * for a particular CPU.
1102  *
1103  * Called from the schedutil frequency scaling governor in scheduler context.
1104  */
1105 unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
1106 {
1107 	struct tick_sched *ts = tick_get_tick_sched(cpu);
1108 
1109 	return ts->idle_calls;
1110 }
1111 
1112 /**
1113  * tick_nohz_get_idle_calls - return the current idle calls counter value
1114  *
1115  * Called from the schedutil frequency scaling governor in scheduler context.
1116  */
1117 unsigned long tick_nohz_get_idle_calls(void)
1118 {
1119 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1120 
1121 	return ts->idle_calls;
1122 }
1123 
1124 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
1125 {
1126 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
1127 	unsigned long ticks;
1128 
1129 	if (vtime_accounting_cpu_enabled())
1130 		return;
1131 	/*
1132 	 * We stopped the tick in idle. Update process times would miss the
1133 	 * time we slept as update_process_times does only a 1 tick
1134 	 * accounting. Enforce that this is accounted to idle !
1135 	 */
1136 	ticks = jiffies - ts->idle_jiffies;
1137 	/*
1138 	 * We might be one off. Do not randomly account a huge number of ticks!
1139 	 */
1140 	if (ticks && ticks < LONG_MAX)
1141 		account_idle_ticks(ticks);
1142 #endif
1143 }
1144 
1145 static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
1146 {
1147 	tick_nohz_restart_sched_tick(ts, now);
1148 	tick_nohz_account_idle_ticks(ts);
1149 }
1150 
1151 void tick_nohz_idle_restart_tick(void)
1152 {
1153 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1154 
1155 	if (ts->tick_stopped)
1156 		__tick_nohz_idle_restart_tick(ts, ktime_get());
1157 }
1158 
1159 /**
1160  * tick_nohz_idle_exit - restart the idle tick from the idle task
1161  *
1162  * Restart the idle tick when the CPU is woken up from idle
1163  * This also exit the RCU extended quiescent state. The CPU
1164  * can use RCU again after this function is called.
1165  */
1166 void tick_nohz_idle_exit(void)
1167 {
1168 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1169 	bool idle_active, tick_stopped;
1170 	ktime_t now;
1171 
1172 	local_irq_disable();
1173 
1174 	WARN_ON_ONCE(!ts->inidle);
1175 	WARN_ON_ONCE(ts->timer_expires_base);
1176 
1177 	ts->inidle = 0;
1178 	idle_active = ts->idle_active;
1179 	tick_stopped = ts->tick_stopped;
1180 
1181 	if (idle_active || tick_stopped)
1182 		now = ktime_get();
1183 
1184 	if (idle_active)
1185 		tick_nohz_stop_idle(ts, now);
1186 
1187 	if (tick_stopped)
1188 		__tick_nohz_idle_restart_tick(ts, now);
1189 
1190 	local_irq_enable();
1191 }
1192 
1193 /*
1194  * The nohz low res interrupt handler
1195  */
1196 static void tick_nohz_handler(struct clock_event_device *dev)
1197 {
1198 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1199 	struct pt_regs *regs = get_irq_regs();
1200 	ktime_t now = ktime_get();
1201 
1202 	dev->next_event = KTIME_MAX;
1203 
1204 	tick_sched_do_timer(ts, now);
1205 	tick_sched_handle(ts, regs);
1206 
1207 	/* No need to reprogram if we are running tickless  */
1208 	if (unlikely(ts->tick_stopped))
1209 		return;
1210 
1211 	hrtimer_forward(&ts->sched_timer, now, tick_period);
1212 	tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1213 }
1214 
1215 static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
1216 {
1217 	if (!tick_nohz_enabled)
1218 		return;
1219 	ts->nohz_mode = mode;
1220 	/* One update is enough */
1221 	if (!test_and_set_bit(0, &tick_nohz_active))
1222 		timers_update_nohz();
1223 }
1224 
1225 /**
1226  * tick_nohz_switch_to_nohz - switch to nohz mode
1227  */
1228 static void tick_nohz_switch_to_nohz(void)
1229 {
1230 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1231 	ktime_t next;
1232 
1233 	if (!tick_nohz_enabled)
1234 		return;
1235 
1236 	if (tick_switch_to_oneshot(tick_nohz_handler))
1237 		return;
1238 
1239 	/*
1240 	 * Recycle the hrtimer in ts, so we can share the
1241 	 * hrtimer_forward with the highres code.
1242 	 */
1243 	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1244 	/* Get the next period */
1245 	next = tick_init_jiffy_update();
1246 
1247 	hrtimer_set_expires(&ts->sched_timer, next);
1248 	hrtimer_forward_now(&ts->sched_timer, tick_period);
1249 	tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1250 	tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
1251 }
1252 
1253 static inline void tick_nohz_irq_enter(void)
1254 {
1255 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1256 	ktime_t now;
1257 
1258 	if (!ts->idle_active && !ts->tick_stopped)
1259 		return;
1260 	now = ktime_get();
1261 	if (ts->idle_active)
1262 		tick_nohz_stop_idle(ts, now);
1263 	if (ts->tick_stopped)
1264 		tick_nohz_update_jiffies(now);
1265 }
1266 
1267 #else
1268 
1269 static inline void tick_nohz_switch_to_nohz(void) { }
1270 static inline void tick_nohz_irq_enter(void) { }
1271 static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
1272 
1273 #endif /* CONFIG_NO_HZ_COMMON */
1274 
1275 /*
1276  * Called from irq_enter to notify about the possible interruption of idle()
1277  */
1278 void tick_irq_enter(void)
1279 {
1280 	tick_check_oneshot_broadcast_this_cpu();
1281 	tick_nohz_irq_enter();
1282 }
1283 
1284 /*
1285  * High resolution timer specific code
1286  */
1287 #ifdef CONFIG_HIGH_RES_TIMERS
1288 /*
1289  * We rearm the timer until we get disabled by the idle code.
1290  * Called with interrupts disabled.
1291  */
1292 static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
1293 {
1294 	struct tick_sched *ts =
1295 		container_of(timer, struct tick_sched, sched_timer);
1296 	struct pt_regs *regs = get_irq_regs();
1297 	ktime_t now = ktime_get();
1298 
1299 	tick_sched_do_timer(ts, now);
1300 
1301 	/*
1302 	 * Do not call, when we are not in irq context and have
1303 	 * no valid regs pointer
1304 	 */
1305 	if (regs)
1306 		tick_sched_handle(ts, regs);
1307 	else
1308 		ts->next_tick = 0;
1309 
1310 	/* No need to reprogram if we are in idle or full dynticks mode */
1311 	if (unlikely(ts->tick_stopped))
1312 		return HRTIMER_NORESTART;
1313 
1314 	hrtimer_forward(timer, now, tick_period);
1315 
1316 	return HRTIMER_RESTART;
1317 }
1318 
1319 static int sched_skew_tick;
1320 
1321 static int __init skew_tick(char *str)
1322 {
1323 	get_option(&str, &sched_skew_tick);
1324 
1325 	return 0;
1326 }
1327 early_param("skew_tick", skew_tick);
1328 
1329 /**
1330  * tick_setup_sched_timer - setup the tick emulation timer
1331  */
1332 void tick_setup_sched_timer(void)
1333 {
1334 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1335 	ktime_t now = ktime_get();
1336 
1337 	/*
1338 	 * Emulate tick processing via per-CPU hrtimers:
1339 	 */
1340 	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1341 	ts->sched_timer.function = tick_sched_timer;
1342 
1343 	/* Get the next period (per-CPU) */
1344 	hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
1345 
1346 	/* Offset the tick to avert jiffies_lock contention. */
1347 	if (sched_skew_tick) {
1348 		u64 offset = ktime_to_ns(tick_period) >> 1;
1349 		do_div(offset, num_possible_cpus());
1350 		offset *= smp_processor_id();
1351 		hrtimer_add_expires_ns(&ts->sched_timer, offset);
1352 	}
1353 
1354 	hrtimer_forward(&ts->sched_timer, now, tick_period);
1355 	hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
1356 	tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
1357 }
1358 #endif /* HIGH_RES_TIMERS */
1359 
1360 #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
1361 void tick_cancel_sched_timer(int cpu)
1362 {
1363 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
1364 
1365 # ifdef CONFIG_HIGH_RES_TIMERS
1366 	if (ts->sched_timer.base)
1367 		hrtimer_cancel(&ts->sched_timer);
1368 # endif
1369 
1370 	memset(ts, 0, sizeof(*ts));
1371 }
1372 #endif
1373 
1374 /**
1375  * Async notification about clocksource changes
1376  */
1377 void tick_clock_notify(void)
1378 {
1379 	int cpu;
1380 
1381 	for_each_possible_cpu(cpu)
1382 		set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
1383 }
1384 
1385 /*
1386  * Async notification about clock event changes
1387  */
1388 void tick_oneshot_notify(void)
1389 {
1390 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1391 
1392 	set_bit(0, &ts->check_clocks);
1393 }
1394 
1395 /**
1396  * Check, if a change happened, which makes oneshot possible.
1397  *
1398  * Called cyclic from the hrtimer softirq (driven by the timer
1399  * softirq) allow_nohz signals, that we can switch into low-res nohz
1400  * mode, because high resolution timers are disabled (either compile
1401  * or runtime). Called with interrupts disabled.
1402  */
1403 int tick_check_oneshot_change(int allow_nohz)
1404 {
1405 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1406 
1407 	if (!test_and_clear_bit(0, &ts->check_clocks))
1408 		return 0;
1409 
1410 	if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
1411 		return 0;
1412 
1413 	if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
1414 		return 0;
1415 
1416 	if (!allow_nohz)
1417 		return 1;
1418 
1419 	tick_nohz_switch_to_nohz();
1420 	return 0;
1421 }
1422