xref: /openbmc/linux/kernel/time/tick-broadcast.c (revision 5104d265)
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/smp.h>
22 #include <linux/module.h>
23 
24 #include "tick-internal.h"
25 
26 /*
27  * Broadcast support for broken x86 hardware, where the local apic
28  * timer stops in C3 state.
29  */
30 
31 static struct tick_device tick_broadcast_device;
32 static cpumask_var_t tick_broadcast_mask;
33 static cpumask_var_t tick_broadcast_on;
34 static cpumask_var_t tmpmask;
35 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
36 static int tick_broadcast_force;
37 
38 #ifdef CONFIG_TICK_ONESHOT
39 static void tick_broadcast_clear_oneshot(int cpu);
40 #else
41 static inline void tick_broadcast_clear_oneshot(int cpu) { }
42 #endif
43 
44 /*
45  * Debugging: see timer_list.c
46  */
47 struct tick_device *tick_get_broadcast_device(void)
48 {
49 	return &tick_broadcast_device;
50 }
51 
52 struct cpumask *tick_get_broadcast_mask(void)
53 {
54 	return tick_broadcast_mask;
55 }
56 
57 /*
58  * Start the device in periodic mode
59  */
60 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
61 {
62 	if (bc)
63 		tick_setup_periodic(bc, 1);
64 }
65 
66 /*
67  * Check, if the device can be utilized as broadcast device:
68  */
69 static bool tick_check_broadcast_device(struct clock_event_device *curdev,
70 					struct clock_event_device *newdev)
71 {
72 	if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
73 	    (newdev->features & CLOCK_EVT_FEAT_C3STOP))
74 		return false;
75 
76 	if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
77 	    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
78 		return false;
79 
80 	return !curdev || newdev->rating > curdev->rating;
81 }
82 
83 /*
84  * Conditionally install/replace broadcast device
85  */
86 void tick_install_broadcast_device(struct clock_event_device *dev)
87 {
88 	struct clock_event_device *cur = tick_broadcast_device.evtdev;
89 
90 	if (!tick_check_broadcast_device(cur, dev))
91 		return;
92 
93 	if (!try_module_get(dev->owner))
94 		return;
95 
96 	clockevents_exchange_device(cur, dev);
97 	if (cur)
98 		cur->event_handler = clockevents_handle_noop;
99 	tick_broadcast_device.evtdev = dev;
100 	if (!cpumask_empty(tick_broadcast_mask))
101 		tick_broadcast_start_periodic(dev);
102 	/*
103 	 * Inform all cpus about this. We might be in a situation
104 	 * where we did not switch to oneshot mode because the per cpu
105 	 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
106 	 * of a oneshot capable broadcast device. Without that
107 	 * notification the systems stays stuck in periodic mode
108 	 * forever.
109 	 */
110 	if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
111 		tick_clock_notify();
112 }
113 
114 /*
115  * Check, if the device is the broadcast device
116  */
117 int tick_is_broadcast_device(struct clock_event_device *dev)
118 {
119 	return (dev && tick_broadcast_device.evtdev == dev);
120 }
121 
122 static void err_broadcast(const struct cpumask *mask)
123 {
124 	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
125 }
126 
127 static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
128 {
129 	if (!dev->broadcast)
130 		dev->broadcast = tick_broadcast;
131 	if (!dev->broadcast) {
132 		pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
133 			     dev->name);
134 		dev->broadcast = err_broadcast;
135 	}
136 }
137 
138 /*
139  * Check, if the device is disfunctional and a place holder, which
140  * needs to be handled by the broadcast device.
141  */
142 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
143 {
144 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
145 	unsigned long flags;
146 	int ret;
147 
148 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
149 
150 	/*
151 	 * Devices might be registered with both periodic and oneshot
152 	 * mode disabled. This signals, that the device needs to be
153 	 * operated from the broadcast device and is a placeholder for
154 	 * the cpu local device.
155 	 */
156 	if (!tick_device_is_functional(dev)) {
157 		dev->event_handler = tick_handle_periodic;
158 		tick_device_setup_broadcast_func(dev);
159 		cpumask_set_cpu(cpu, tick_broadcast_mask);
160 		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
161 			tick_broadcast_start_periodic(bc);
162 		else
163 			tick_broadcast_setup_oneshot(bc);
164 		ret = 1;
165 	} else {
166 		/*
167 		 * Clear the broadcast bit for this cpu if the
168 		 * device is not power state affected.
169 		 */
170 		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
171 			cpumask_clear_cpu(cpu, tick_broadcast_mask);
172 		else
173 			tick_device_setup_broadcast_func(dev);
174 
175 		/*
176 		 * Clear the broadcast bit if the CPU is not in
177 		 * periodic broadcast on state.
178 		 */
179 		if (!cpumask_test_cpu(cpu, tick_broadcast_on))
180 			cpumask_clear_cpu(cpu, tick_broadcast_mask);
181 
182 		switch (tick_broadcast_device.mode) {
183 		case TICKDEV_MODE_ONESHOT:
184 			/*
185 			 * If the system is in oneshot mode we can
186 			 * unconditionally clear the oneshot mask bit,
187 			 * because the CPU is running and therefore
188 			 * not in an idle state which causes the power
189 			 * state affected device to stop. Let the
190 			 * caller initialize the device.
191 			 */
192 			tick_broadcast_clear_oneshot(cpu);
193 			ret = 0;
194 			break;
195 
196 		case TICKDEV_MODE_PERIODIC:
197 			/*
198 			 * If the system is in periodic mode, check
199 			 * whether the broadcast device can be
200 			 * switched off now.
201 			 */
202 			if (cpumask_empty(tick_broadcast_mask) && bc)
203 				clockevents_shutdown(bc);
204 			/*
205 			 * If we kept the cpu in the broadcast mask,
206 			 * tell the caller to leave the per cpu device
207 			 * in shutdown state. The periodic interrupt
208 			 * is delivered by the broadcast device.
209 			 */
210 			ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
211 			break;
212 		default:
213 			/* Nothing to do */
214 			ret = 0;
215 			break;
216 		}
217 	}
218 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
219 	return ret;
220 }
221 
222 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
223 int tick_receive_broadcast(void)
224 {
225 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
226 	struct clock_event_device *evt = td->evtdev;
227 
228 	if (!evt)
229 		return -ENODEV;
230 
231 	if (!evt->event_handler)
232 		return -EINVAL;
233 
234 	evt->event_handler(evt);
235 	return 0;
236 }
237 #endif
238 
239 /*
240  * Broadcast the event to the cpus, which are set in the mask (mangled).
241  */
242 static void tick_do_broadcast(struct cpumask *mask)
243 {
244 	int cpu = smp_processor_id();
245 	struct tick_device *td;
246 
247 	/*
248 	 * Check, if the current cpu is in the mask
249 	 */
250 	if (cpumask_test_cpu(cpu, mask)) {
251 		cpumask_clear_cpu(cpu, mask);
252 		td = &per_cpu(tick_cpu_device, cpu);
253 		td->evtdev->event_handler(td->evtdev);
254 	}
255 
256 	if (!cpumask_empty(mask)) {
257 		/*
258 		 * It might be necessary to actually check whether the devices
259 		 * have different broadcast functions. For now, just use the
260 		 * one of the first device. This works as long as we have this
261 		 * misfeature only on x86 (lapic)
262 		 */
263 		td = &per_cpu(tick_cpu_device, cpumask_first(mask));
264 		td->evtdev->broadcast(mask);
265 	}
266 }
267 
268 /*
269  * Periodic broadcast:
270  * - invoke the broadcast handlers
271  */
272 static void tick_do_periodic_broadcast(void)
273 {
274 	raw_spin_lock(&tick_broadcast_lock);
275 
276 	cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
277 	tick_do_broadcast(tmpmask);
278 
279 	raw_spin_unlock(&tick_broadcast_lock);
280 }
281 
282 /*
283  * Event handler for periodic broadcast ticks
284  */
285 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
286 {
287 	ktime_t next;
288 
289 	tick_do_periodic_broadcast();
290 
291 	/*
292 	 * The device is in periodic mode. No reprogramming necessary:
293 	 */
294 	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
295 		return;
296 
297 	/*
298 	 * Setup the next period for devices, which do not have
299 	 * periodic mode. We read dev->next_event first and add to it
300 	 * when the event already expired. clockevents_program_event()
301 	 * sets dev->next_event only when the event is really
302 	 * programmed to the device.
303 	 */
304 	for (next = dev->next_event; ;) {
305 		next = ktime_add(next, tick_period);
306 
307 		if (!clockevents_program_event(dev, next, false))
308 			return;
309 		tick_do_periodic_broadcast();
310 	}
311 }
312 
313 /*
314  * Powerstate information: The system enters/leaves a state, where
315  * affected devices might stop
316  */
317 static void tick_do_broadcast_on_off(unsigned long *reason)
318 {
319 	struct clock_event_device *bc, *dev;
320 	struct tick_device *td;
321 	unsigned long flags;
322 	int cpu, bc_stopped;
323 
324 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
325 
326 	cpu = smp_processor_id();
327 	td = &per_cpu(tick_cpu_device, cpu);
328 	dev = td->evtdev;
329 	bc = tick_broadcast_device.evtdev;
330 
331 	/*
332 	 * Is the device not affected by the powerstate ?
333 	 */
334 	if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
335 		goto out;
336 
337 	if (!tick_device_is_functional(dev))
338 		goto out;
339 
340 	bc_stopped = cpumask_empty(tick_broadcast_mask);
341 
342 	switch (*reason) {
343 	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
344 	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
345 		cpumask_set_cpu(cpu, tick_broadcast_on);
346 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
347 			if (tick_broadcast_device.mode ==
348 			    TICKDEV_MODE_PERIODIC)
349 				clockevents_shutdown(dev);
350 		}
351 		if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
352 			tick_broadcast_force = 1;
353 		break;
354 	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
355 		if (tick_broadcast_force)
356 			break;
357 		cpumask_clear_cpu(cpu, tick_broadcast_on);
358 		if (!tick_device_is_functional(dev))
359 			break;
360 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
361 			if (tick_broadcast_device.mode ==
362 			    TICKDEV_MODE_PERIODIC)
363 				tick_setup_periodic(dev, 0);
364 		}
365 		break;
366 	}
367 
368 	if (cpumask_empty(tick_broadcast_mask)) {
369 		if (!bc_stopped)
370 			clockevents_shutdown(bc);
371 	} else if (bc_stopped) {
372 		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
373 			tick_broadcast_start_periodic(bc);
374 		else
375 			tick_broadcast_setup_oneshot(bc);
376 	}
377 out:
378 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
379 }
380 
381 /*
382  * Powerstate information: The system enters/leaves a state, where
383  * affected devices might stop.
384  */
385 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
386 {
387 	if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
388 		printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
389 		       "offline CPU #%d\n", *oncpu);
390 	else
391 		tick_do_broadcast_on_off(&reason);
392 }
393 
394 /*
395  * Set the periodic handler depending on broadcast on/off
396  */
397 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
398 {
399 	if (!broadcast)
400 		dev->event_handler = tick_handle_periodic;
401 	else
402 		dev->event_handler = tick_handle_periodic_broadcast;
403 }
404 
405 /*
406  * Remove a CPU from broadcasting
407  */
408 void tick_shutdown_broadcast(unsigned int *cpup)
409 {
410 	struct clock_event_device *bc;
411 	unsigned long flags;
412 	unsigned int cpu = *cpup;
413 
414 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
415 
416 	bc = tick_broadcast_device.evtdev;
417 	cpumask_clear_cpu(cpu, tick_broadcast_mask);
418 	cpumask_clear_cpu(cpu, tick_broadcast_on);
419 
420 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
421 		if (bc && cpumask_empty(tick_broadcast_mask))
422 			clockevents_shutdown(bc);
423 	}
424 
425 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
426 }
427 
428 void tick_suspend_broadcast(void)
429 {
430 	struct clock_event_device *bc;
431 	unsigned long flags;
432 
433 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
434 
435 	bc = tick_broadcast_device.evtdev;
436 	if (bc)
437 		clockevents_shutdown(bc);
438 
439 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
440 }
441 
442 int tick_resume_broadcast(void)
443 {
444 	struct clock_event_device *bc;
445 	unsigned long flags;
446 	int broadcast = 0;
447 
448 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
449 
450 	bc = tick_broadcast_device.evtdev;
451 
452 	if (bc) {
453 		clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
454 
455 		switch (tick_broadcast_device.mode) {
456 		case TICKDEV_MODE_PERIODIC:
457 			if (!cpumask_empty(tick_broadcast_mask))
458 				tick_broadcast_start_periodic(bc);
459 			broadcast = cpumask_test_cpu(smp_processor_id(),
460 						     tick_broadcast_mask);
461 			break;
462 		case TICKDEV_MODE_ONESHOT:
463 			if (!cpumask_empty(tick_broadcast_mask))
464 				broadcast = tick_resume_broadcast_oneshot(bc);
465 			break;
466 		}
467 	}
468 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
469 
470 	return broadcast;
471 }
472 
473 
474 #ifdef CONFIG_TICK_ONESHOT
475 
476 static cpumask_var_t tick_broadcast_oneshot_mask;
477 static cpumask_var_t tick_broadcast_pending_mask;
478 static cpumask_var_t tick_broadcast_force_mask;
479 
480 /*
481  * Exposed for debugging: see timer_list.c
482  */
483 struct cpumask *tick_get_broadcast_oneshot_mask(void)
484 {
485 	return tick_broadcast_oneshot_mask;
486 }
487 
488 /*
489  * Called before going idle with interrupts disabled. Checks whether a
490  * broadcast event from the other core is about to happen. We detected
491  * that in tick_broadcast_oneshot_control(). The callsite can use this
492  * to avoid a deep idle transition as we are about to get the
493  * broadcast IPI right away.
494  */
495 int tick_check_broadcast_expired(void)
496 {
497 	return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
498 }
499 
500 /*
501  * Set broadcast interrupt affinity
502  */
503 static void tick_broadcast_set_affinity(struct clock_event_device *bc,
504 					const struct cpumask *cpumask)
505 {
506 	if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
507 		return;
508 
509 	if (cpumask_equal(bc->cpumask, cpumask))
510 		return;
511 
512 	bc->cpumask = cpumask;
513 	irq_set_affinity(bc->irq, bc->cpumask);
514 }
515 
516 static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
517 				    ktime_t expires, int force)
518 {
519 	int ret;
520 
521 	if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
522 		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
523 
524 	ret = clockevents_program_event(bc, expires, force);
525 	if (!ret)
526 		tick_broadcast_set_affinity(bc, cpumask_of(cpu));
527 	return ret;
528 }
529 
530 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
531 {
532 	clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
533 	return 0;
534 }
535 
536 /*
537  * Called from irq_enter() when idle was interrupted to reenable the
538  * per cpu device.
539  */
540 void tick_check_oneshot_broadcast(int cpu)
541 {
542 	if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
543 		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
544 
545 		/*
546 		 * We might be in the middle of switching over from
547 		 * periodic to oneshot. If the CPU has not yet
548 		 * switched over, leave the device alone.
549 		 */
550 		if (td->mode == TICKDEV_MODE_ONESHOT) {
551 			clockevents_set_mode(td->evtdev,
552 					     CLOCK_EVT_MODE_ONESHOT);
553 		}
554 	}
555 }
556 
557 /*
558  * Handle oneshot mode broadcasting
559  */
560 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
561 {
562 	struct tick_device *td;
563 	ktime_t now, next_event;
564 	int cpu, next_cpu = 0;
565 
566 	raw_spin_lock(&tick_broadcast_lock);
567 again:
568 	dev->next_event.tv64 = KTIME_MAX;
569 	next_event.tv64 = KTIME_MAX;
570 	cpumask_clear(tmpmask);
571 	now = ktime_get();
572 	/* Find all expired events */
573 	for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
574 		td = &per_cpu(tick_cpu_device, cpu);
575 		if (td->evtdev->next_event.tv64 <= now.tv64) {
576 			cpumask_set_cpu(cpu, tmpmask);
577 			/*
578 			 * Mark the remote cpu in the pending mask, so
579 			 * it can avoid reprogramming the cpu local
580 			 * timer in tick_broadcast_oneshot_control().
581 			 */
582 			cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
583 		} else if (td->evtdev->next_event.tv64 < next_event.tv64) {
584 			next_event.tv64 = td->evtdev->next_event.tv64;
585 			next_cpu = cpu;
586 		}
587 	}
588 
589 	/*
590 	 * Remove the current cpu from the pending mask. The event is
591 	 * delivered immediately in tick_do_broadcast() !
592 	 */
593 	cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
594 
595 	/* Take care of enforced broadcast requests */
596 	cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
597 	cpumask_clear(tick_broadcast_force_mask);
598 
599 	/*
600 	 * Sanity check. Catch the case where we try to broadcast to
601 	 * offline cpus.
602 	 */
603 	if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
604 		cpumask_and(tmpmask, tmpmask, cpu_online_mask);
605 
606 	/*
607 	 * Wakeup the cpus which have an expired event.
608 	 */
609 	tick_do_broadcast(tmpmask);
610 
611 	/*
612 	 * Two reasons for reprogram:
613 	 *
614 	 * - The global event did not expire any CPU local
615 	 * events. This happens in dyntick mode, as the maximum PIT
616 	 * delta is quite small.
617 	 *
618 	 * - There are pending events on sleeping CPUs which were not
619 	 * in the event mask
620 	 */
621 	if (next_event.tv64 != KTIME_MAX) {
622 		/*
623 		 * Rearm the broadcast device. If event expired,
624 		 * repeat the above
625 		 */
626 		if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
627 			goto again;
628 	}
629 	raw_spin_unlock(&tick_broadcast_lock);
630 }
631 
632 /*
633  * Powerstate information: The system enters/leaves a state, where
634  * affected devices might stop
635  */
636 void tick_broadcast_oneshot_control(unsigned long reason)
637 {
638 	struct clock_event_device *bc, *dev;
639 	struct tick_device *td;
640 	unsigned long flags;
641 	ktime_t now;
642 	int cpu;
643 
644 	/*
645 	 * Periodic mode does not care about the enter/exit of power
646 	 * states
647 	 */
648 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
649 		return;
650 
651 	/*
652 	 * We are called with preemtion disabled from the depth of the
653 	 * idle code, so we can't be moved away.
654 	 */
655 	cpu = smp_processor_id();
656 	td = &per_cpu(tick_cpu_device, cpu);
657 	dev = td->evtdev;
658 
659 	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
660 		return;
661 
662 	bc = tick_broadcast_device.evtdev;
663 
664 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
665 	if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
666 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
667 			WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
668 			clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
669 			/*
670 			 * We only reprogram the broadcast timer if we
671 			 * did not mark ourself in the force mask and
672 			 * if the cpu local event is earlier than the
673 			 * broadcast event. If the current CPU is in
674 			 * the force mask, then we are going to be
675 			 * woken by the IPI right away.
676 			 */
677 			if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
678 			    dev->next_event.tv64 < bc->next_event.tv64)
679 				tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
680 		}
681 	} else {
682 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
683 			clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
684 			/*
685 			 * The cpu which was handling the broadcast
686 			 * timer marked this cpu in the broadcast
687 			 * pending mask and fired the broadcast
688 			 * IPI. So we are going to handle the expired
689 			 * event anyway via the broadcast IPI
690 			 * handler. No need to reprogram the timer
691 			 * with an already expired event.
692 			 */
693 			if (cpumask_test_and_clear_cpu(cpu,
694 				       tick_broadcast_pending_mask))
695 				goto out;
696 
697 			/*
698 			 * Bail out if there is no next event.
699 			 */
700 			if (dev->next_event.tv64 == KTIME_MAX)
701 				goto out;
702 			/*
703 			 * If the pending bit is not set, then we are
704 			 * either the CPU handling the broadcast
705 			 * interrupt or we got woken by something else.
706 			 *
707 			 * We are not longer in the broadcast mask, so
708 			 * if the cpu local expiry time is already
709 			 * reached, we would reprogram the cpu local
710 			 * timer with an already expired event.
711 			 *
712 			 * This can lead to a ping-pong when we return
713 			 * to idle and therefor rearm the broadcast
714 			 * timer before the cpu local timer was able
715 			 * to fire. This happens because the forced
716 			 * reprogramming makes sure that the event
717 			 * will happen in the future and depending on
718 			 * the min_delta setting this might be far
719 			 * enough out that the ping-pong starts.
720 			 *
721 			 * If the cpu local next_event has expired
722 			 * then we know that the broadcast timer
723 			 * next_event has expired as well and
724 			 * broadcast is about to be handled. So we
725 			 * avoid reprogramming and enforce that the
726 			 * broadcast handler, which did not run yet,
727 			 * will invoke the cpu local handler.
728 			 *
729 			 * We cannot call the handler directly from
730 			 * here, because we might be in a NOHZ phase
731 			 * and we did not go through the irq_enter()
732 			 * nohz fixups.
733 			 */
734 			now = ktime_get();
735 			if (dev->next_event.tv64 <= now.tv64) {
736 				cpumask_set_cpu(cpu, tick_broadcast_force_mask);
737 				goto out;
738 			}
739 			/*
740 			 * We got woken by something else. Reprogram
741 			 * the cpu local timer device.
742 			 */
743 			tick_program_event(dev->next_event, 1);
744 		}
745 	}
746 out:
747 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
748 }
749 
750 /*
751  * Reset the one shot broadcast for a cpu
752  *
753  * Called with tick_broadcast_lock held
754  */
755 static void tick_broadcast_clear_oneshot(int cpu)
756 {
757 	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
758 }
759 
760 static void tick_broadcast_init_next_event(struct cpumask *mask,
761 					   ktime_t expires)
762 {
763 	struct tick_device *td;
764 	int cpu;
765 
766 	for_each_cpu(cpu, mask) {
767 		td = &per_cpu(tick_cpu_device, cpu);
768 		if (td->evtdev)
769 			td->evtdev->next_event = expires;
770 	}
771 }
772 
773 /**
774  * tick_broadcast_setup_oneshot - setup the broadcast device
775  */
776 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
777 {
778 	int cpu = smp_processor_id();
779 
780 	/* Set it up only once ! */
781 	if (bc->event_handler != tick_handle_oneshot_broadcast) {
782 		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
783 
784 		bc->event_handler = tick_handle_oneshot_broadcast;
785 
786 		/*
787 		 * We must be careful here. There might be other CPUs
788 		 * waiting for periodic broadcast. We need to set the
789 		 * oneshot_mask bits for those and program the
790 		 * broadcast device to fire.
791 		 */
792 		cpumask_copy(tmpmask, tick_broadcast_mask);
793 		cpumask_clear_cpu(cpu, tmpmask);
794 		cpumask_or(tick_broadcast_oneshot_mask,
795 			   tick_broadcast_oneshot_mask, tmpmask);
796 
797 		if (was_periodic && !cpumask_empty(tmpmask)) {
798 			clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
799 			tick_broadcast_init_next_event(tmpmask,
800 						       tick_next_period);
801 			tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
802 		} else
803 			bc->next_event.tv64 = KTIME_MAX;
804 	} else {
805 		/*
806 		 * The first cpu which switches to oneshot mode sets
807 		 * the bit for all other cpus which are in the general
808 		 * (periodic) broadcast mask. So the bit is set and
809 		 * would prevent the first broadcast enter after this
810 		 * to program the bc device.
811 		 */
812 		tick_broadcast_clear_oneshot(cpu);
813 	}
814 }
815 
816 /*
817  * Select oneshot operating mode for the broadcast device
818  */
819 void tick_broadcast_switch_to_oneshot(void)
820 {
821 	struct clock_event_device *bc;
822 	unsigned long flags;
823 
824 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
825 
826 	tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
827 	bc = tick_broadcast_device.evtdev;
828 	if (bc)
829 		tick_broadcast_setup_oneshot(bc);
830 
831 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
832 }
833 
834 
835 /*
836  * Remove a dead CPU from broadcasting
837  */
838 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
839 {
840 	unsigned long flags;
841 	unsigned int cpu = *cpup;
842 
843 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
844 
845 	/*
846 	 * Clear the broadcast masks for the dead cpu, but do not stop
847 	 * the broadcast device!
848 	 */
849 	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
850 	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
851 	cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
852 
853 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
854 }
855 
856 /*
857  * Check, whether the broadcast device is in one shot mode
858  */
859 int tick_broadcast_oneshot_active(void)
860 {
861 	return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
862 }
863 
864 /*
865  * Check whether the broadcast device supports oneshot.
866  */
867 bool tick_broadcast_oneshot_available(void)
868 {
869 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
870 
871 	return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
872 }
873 
874 #endif
875 
876 void __init tick_broadcast_init(void)
877 {
878 	zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
879 	zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
880 	zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
881 #ifdef CONFIG_TICK_ONESHOT
882 	zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
883 	zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
884 	zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
885 #endif
886 }
887