xref: /openbmc/linux/kernel/time/tick-broadcast.c (revision 161f4089)
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/smp.h>
22 #include <linux/module.h>
23 
24 #include "tick-internal.h"
25 
26 /*
27  * Broadcast support for broken x86 hardware, where the local apic
28  * timer stops in C3 state.
29  */
30 
31 static struct tick_device tick_broadcast_device;
32 static cpumask_var_t tick_broadcast_mask;
33 static cpumask_var_t tick_broadcast_on;
34 static cpumask_var_t tmpmask;
35 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
36 static int tick_broadcast_force;
37 
38 #ifdef CONFIG_TICK_ONESHOT
39 static void tick_broadcast_clear_oneshot(int cpu);
40 #else
41 static inline void tick_broadcast_clear_oneshot(int cpu) { }
42 #endif
43 
44 /*
45  * Debugging: see timer_list.c
46  */
47 struct tick_device *tick_get_broadcast_device(void)
48 {
49 	return &tick_broadcast_device;
50 }
51 
52 struct cpumask *tick_get_broadcast_mask(void)
53 {
54 	return tick_broadcast_mask;
55 }
56 
57 /*
58  * Start the device in periodic mode
59  */
60 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
61 {
62 	if (bc)
63 		tick_setup_periodic(bc, 1);
64 }
65 
66 /*
67  * Check, if the device can be utilized as broadcast device:
68  */
69 static bool tick_check_broadcast_device(struct clock_event_device *curdev,
70 					struct clock_event_device *newdev)
71 {
72 	if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
73 	    (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
74 	    (newdev->features & CLOCK_EVT_FEAT_C3STOP))
75 		return false;
76 
77 	if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
78 	    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
79 		return false;
80 
81 	return !curdev || newdev->rating > curdev->rating;
82 }
83 
84 /*
85  * Conditionally install/replace broadcast device
86  */
87 void tick_install_broadcast_device(struct clock_event_device *dev)
88 {
89 	struct clock_event_device *cur = tick_broadcast_device.evtdev;
90 
91 	if (!tick_check_broadcast_device(cur, dev))
92 		return;
93 
94 	if (!try_module_get(dev->owner))
95 		return;
96 
97 	clockevents_exchange_device(cur, dev);
98 	if (cur)
99 		cur->event_handler = clockevents_handle_noop;
100 	tick_broadcast_device.evtdev = dev;
101 	if (!cpumask_empty(tick_broadcast_mask))
102 		tick_broadcast_start_periodic(dev);
103 	/*
104 	 * Inform all cpus about this. We might be in a situation
105 	 * where we did not switch to oneshot mode because the per cpu
106 	 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
107 	 * of a oneshot capable broadcast device. Without that
108 	 * notification the systems stays stuck in periodic mode
109 	 * forever.
110 	 */
111 	if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
112 		tick_clock_notify();
113 }
114 
115 /*
116  * Check, if the device is the broadcast device
117  */
118 int tick_is_broadcast_device(struct clock_event_device *dev)
119 {
120 	return (dev && tick_broadcast_device.evtdev == dev);
121 }
122 
123 static void err_broadcast(const struct cpumask *mask)
124 {
125 	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
126 }
127 
128 static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
129 {
130 	if (!dev->broadcast)
131 		dev->broadcast = tick_broadcast;
132 	if (!dev->broadcast) {
133 		pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
134 			     dev->name);
135 		dev->broadcast = err_broadcast;
136 	}
137 }
138 
139 /*
140  * Check, if the device is disfunctional and a place holder, which
141  * needs to be handled by the broadcast device.
142  */
143 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
144 {
145 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
146 	unsigned long flags;
147 	int ret;
148 
149 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
150 
151 	/*
152 	 * Devices might be registered with both periodic and oneshot
153 	 * mode disabled. This signals, that the device needs to be
154 	 * operated from the broadcast device and is a placeholder for
155 	 * the cpu local device.
156 	 */
157 	if (!tick_device_is_functional(dev)) {
158 		dev->event_handler = tick_handle_periodic;
159 		tick_device_setup_broadcast_func(dev);
160 		cpumask_set_cpu(cpu, tick_broadcast_mask);
161 		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
162 			tick_broadcast_start_periodic(bc);
163 		else
164 			tick_broadcast_setup_oneshot(bc);
165 		ret = 1;
166 	} else {
167 		/*
168 		 * Clear the broadcast bit for this cpu if the
169 		 * device is not power state affected.
170 		 */
171 		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
172 			cpumask_clear_cpu(cpu, tick_broadcast_mask);
173 		else
174 			tick_device_setup_broadcast_func(dev);
175 
176 		/*
177 		 * Clear the broadcast bit if the CPU is not in
178 		 * periodic broadcast on state.
179 		 */
180 		if (!cpumask_test_cpu(cpu, tick_broadcast_on))
181 			cpumask_clear_cpu(cpu, tick_broadcast_mask);
182 
183 		switch (tick_broadcast_device.mode) {
184 		case TICKDEV_MODE_ONESHOT:
185 			/*
186 			 * If the system is in oneshot mode we can
187 			 * unconditionally clear the oneshot mask bit,
188 			 * because the CPU is running and therefore
189 			 * not in an idle state which causes the power
190 			 * state affected device to stop. Let the
191 			 * caller initialize the device.
192 			 */
193 			tick_broadcast_clear_oneshot(cpu);
194 			ret = 0;
195 			break;
196 
197 		case TICKDEV_MODE_PERIODIC:
198 			/*
199 			 * If the system is in periodic mode, check
200 			 * whether the broadcast device can be
201 			 * switched off now.
202 			 */
203 			if (cpumask_empty(tick_broadcast_mask) && bc)
204 				clockevents_shutdown(bc);
205 			/*
206 			 * If we kept the cpu in the broadcast mask,
207 			 * tell the caller to leave the per cpu device
208 			 * in shutdown state. The periodic interrupt
209 			 * is delivered by the broadcast device.
210 			 */
211 			ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
212 			break;
213 		default:
214 			/* Nothing to do */
215 			ret = 0;
216 			break;
217 		}
218 	}
219 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
220 	return ret;
221 }
222 
223 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
224 int tick_receive_broadcast(void)
225 {
226 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
227 	struct clock_event_device *evt = td->evtdev;
228 
229 	if (!evt)
230 		return -ENODEV;
231 
232 	if (!evt->event_handler)
233 		return -EINVAL;
234 
235 	evt->event_handler(evt);
236 	return 0;
237 }
238 #endif
239 
240 /*
241  * Broadcast the event to the cpus, which are set in the mask (mangled).
242  */
243 static void tick_do_broadcast(struct cpumask *mask)
244 {
245 	int cpu = smp_processor_id();
246 	struct tick_device *td;
247 
248 	/*
249 	 * Check, if the current cpu is in the mask
250 	 */
251 	if (cpumask_test_cpu(cpu, mask)) {
252 		cpumask_clear_cpu(cpu, mask);
253 		td = &per_cpu(tick_cpu_device, cpu);
254 		td->evtdev->event_handler(td->evtdev);
255 	}
256 
257 	if (!cpumask_empty(mask)) {
258 		/*
259 		 * It might be necessary to actually check whether the devices
260 		 * have different broadcast functions. For now, just use the
261 		 * one of the first device. This works as long as we have this
262 		 * misfeature only on x86 (lapic)
263 		 */
264 		td = &per_cpu(tick_cpu_device, cpumask_first(mask));
265 		td->evtdev->broadcast(mask);
266 	}
267 }
268 
269 /*
270  * Periodic broadcast:
271  * - invoke the broadcast handlers
272  */
273 static void tick_do_periodic_broadcast(void)
274 {
275 	raw_spin_lock(&tick_broadcast_lock);
276 
277 	cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
278 	tick_do_broadcast(tmpmask);
279 
280 	raw_spin_unlock(&tick_broadcast_lock);
281 }
282 
283 /*
284  * Event handler for periodic broadcast ticks
285  */
286 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
287 {
288 	ktime_t next;
289 
290 	tick_do_periodic_broadcast();
291 
292 	/*
293 	 * The device is in periodic mode. No reprogramming necessary:
294 	 */
295 	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
296 		return;
297 
298 	/*
299 	 * Setup the next period for devices, which do not have
300 	 * periodic mode. We read dev->next_event first and add to it
301 	 * when the event already expired. clockevents_program_event()
302 	 * sets dev->next_event only when the event is really
303 	 * programmed to the device.
304 	 */
305 	for (next = dev->next_event; ;) {
306 		next = ktime_add(next, tick_period);
307 
308 		if (!clockevents_program_event(dev, next, false))
309 			return;
310 		tick_do_periodic_broadcast();
311 	}
312 }
313 
314 /*
315  * Powerstate information: The system enters/leaves a state, where
316  * affected devices might stop
317  */
318 static void tick_do_broadcast_on_off(unsigned long *reason)
319 {
320 	struct clock_event_device *bc, *dev;
321 	struct tick_device *td;
322 	unsigned long flags;
323 	int cpu, bc_stopped;
324 
325 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
326 
327 	cpu = smp_processor_id();
328 	td = &per_cpu(tick_cpu_device, cpu);
329 	dev = td->evtdev;
330 	bc = tick_broadcast_device.evtdev;
331 
332 	/*
333 	 * Is the device not affected by the powerstate ?
334 	 */
335 	if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
336 		goto out;
337 
338 	if (!tick_device_is_functional(dev))
339 		goto out;
340 
341 	bc_stopped = cpumask_empty(tick_broadcast_mask);
342 
343 	switch (*reason) {
344 	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
345 	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
346 		cpumask_set_cpu(cpu, tick_broadcast_on);
347 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
348 			if (tick_broadcast_device.mode ==
349 			    TICKDEV_MODE_PERIODIC)
350 				clockevents_shutdown(dev);
351 		}
352 		if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
353 			tick_broadcast_force = 1;
354 		break;
355 	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
356 		if (tick_broadcast_force)
357 			break;
358 		cpumask_clear_cpu(cpu, tick_broadcast_on);
359 		if (!tick_device_is_functional(dev))
360 			break;
361 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
362 			if (tick_broadcast_device.mode ==
363 			    TICKDEV_MODE_PERIODIC)
364 				tick_setup_periodic(dev, 0);
365 		}
366 		break;
367 	}
368 
369 	if (cpumask_empty(tick_broadcast_mask)) {
370 		if (!bc_stopped)
371 			clockevents_shutdown(bc);
372 	} else if (bc_stopped) {
373 		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
374 			tick_broadcast_start_periodic(bc);
375 		else
376 			tick_broadcast_setup_oneshot(bc);
377 	}
378 out:
379 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
380 }
381 
382 /*
383  * Powerstate information: The system enters/leaves a state, where
384  * affected devices might stop.
385  */
386 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
387 {
388 	if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
389 		printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
390 		       "offline CPU #%d\n", *oncpu);
391 	else
392 		tick_do_broadcast_on_off(&reason);
393 }
394 
395 /*
396  * Set the periodic handler depending on broadcast on/off
397  */
398 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
399 {
400 	if (!broadcast)
401 		dev->event_handler = tick_handle_periodic;
402 	else
403 		dev->event_handler = tick_handle_periodic_broadcast;
404 }
405 
406 /*
407  * Remove a CPU from broadcasting
408  */
409 void tick_shutdown_broadcast(unsigned int *cpup)
410 {
411 	struct clock_event_device *bc;
412 	unsigned long flags;
413 	unsigned int cpu = *cpup;
414 
415 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
416 
417 	bc = tick_broadcast_device.evtdev;
418 	cpumask_clear_cpu(cpu, tick_broadcast_mask);
419 	cpumask_clear_cpu(cpu, tick_broadcast_on);
420 
421 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
422 		if (bc && cpumask_empty(tick_broadcast_mask))
423 			clockevents_shutdown(bc);
424 	}
425 
426 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
427 }
428 
429 void tick_suspend_broadcast(void)
430 {
431 	struct clock_event_device *bc;
432 	unsigned long flags;
433 
434 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
435 
436 	bc = tick_broadcast_device.evtdev;
437 	if (bc)
438 		clockevents_shutdown(bc);
439 
440 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
441 }
442 
443 int tick_resume_broadcast(void)
444 {
445 	struct clock_event_device *bc;
446 	unsigned long flags;
447 	int broadcast = 0;
448 
449 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
450 
451 	bc = tick_broadcast_device.evtdev;
452 
453 	if (bc) {
454 		clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
455 
456 		switch (tick_broadcast_device.mode) {
457 		case TICKDEV_MODE_PERIODIC:
458 			if (!cpumask_empty(tick_broadcast_mask))
459 				tick_broadcast_start_periodic(bc);
460 			broadcast = cpumask_test_cpu(smp_processor_id(),
461 						     tick_broadcast_mask);
462 			break;
463 		case TICKDEV_MODE_ONESHOT:
464 			if (!cpumask_empty(tick_broadcast_mask))
465 				broadcast = tick_resume_broadcast_oneshot(bc);
466 			break;
467 		}
468 	}
469 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
470 
471 	return broadcast;
472 }
473 
474 
475 #ifdef CONFIG_TICK_ONESHOT
476 
477 static cpumask_var_t tick_broadcast_oneshot_mask;
478 static cpumask_var_t tick_broadcast_pending_mask;
479 static cpumask_var_t tick_broadcast_force_mask;
480 
481 /*
482  * Exposed for debugging: see timer_list.c
483  */
484 struct cpumask *tick_get_broadcast_oneshot_mask(void)
485 {
486 	return tick_broadcast_oneshot_mask;
487 }
488 
489 /*
490  * Called before going idle with interrupts disabled. Checks whether a
491  * broadcast event from the other core is about to happen. We detected
492  * that in tick_broadcast_oneshot_control(). The callsite can use this
493  * to avoid a deep idle transition as we are about to get the
494  * broadcast IPI right away.
495  */
496 int tick_check_broadcast_expired(void)
497 {
498 	return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
499 }
500 
501 /*
502  * Set broadcast interrupt affinity
503  */
504 static void tick_broadcast_set_affinity(struct clock_event_device *bc,
505 					const struct cpumask *cpumask)
506 {
507 	if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
508 		return;
509 
510 	if (cpumask_equal(bc->cpumask, cpumask))
511 		return;
512 
513 	bc->cpumask = cpumask;
514 	irq_set_affinity(bc->irq, bc->cpumask);
515 }
516 
517 static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
518 				    ktime_t expires, int force)
519 {
520 	int ret;
521 
522 	if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
523 		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
524 
525 	ret = clockevents_program_event(bc, expires, force);
526 	if (!ret)
527 		tick_broadcast_set_affinity(bc, cpumask_of(cpu));
528 	return ret;
529 }
530 
531 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
532 {
533 	clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
534 	return 0;
535 }
536 
537 /*
538  * Called from irq_enter() when idle was interrupted to reenable the
539  * per cpu device.
540  */
541 void tick_check_oneshot_broadcast(int cpu)
542 {
543 	if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
544 		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
545 
546 		/*
547 		 * We might be in the middle of switching over from
548 		 * periodic to oneshot. If the CPU has not yet
549 		 * switched over, leave the device alone.
550 		 */
551 		if (td->mode == TICKDEV_MODE_ONESHOT) {
552 			clockevents_set_mode(td->evtdev,
553 					     CLOCK_EVT_MODE_ONESHOT);
554 		}
555 	}
556 }
557 
558 /*
559  * Handle oneshot mode broadcasting
560  */
561 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
562 {
563 	struct tick_device *td;
564 	ktime_t now, next_event;
565 	int cpu, next_cpu = 0;
566 
567 	raw_spin_lock(&tick_broadcast_lock);
568 again:
569 	dev->next_event.tv64 = KTIME_MAX;
570 	next_event.tv64 = KTIME_MAX;
571 	cpumask_clear(tmpmask);
572 	now = ktime_get();
573 	/* Find all expired events */
574 	for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
575 		td = &per_cpu(tick_cpu_device, cpu);
576 		if (td->evtdev->next_event.tv64 <= now.tv64) {
577 			cpumask_set_cpu(cpu, tmpmask);
578 			/*
579 			 * Mark the remote cpu in the pending mask, so
580 			 * it can avoid reprogramming the cpu local
581 			 * timer in tick_broadcast_oneshot_control().
582 			 */
583 			cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
584 		} else if (td->evtdev->next_event.tv64 < next_event.tv64) {
585 			next_event.tv64 = td->evtdev->next_event.tv64;
586 			next_cpu = cpu;
587 		}
588 	}
589 
590 	/*
591 	 * Remove the current cpu from the pending mask. The event is
592 	 * delivered immediately in tick_do_broadcast() !
593 	 */
594 	cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
595 
596 	/* Take care of enforced broadcast requests */
597 	cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
598 	cpumask_clear(tick_broadcast_force_mask);
599 
600 	/*
601 	 * Sanity check. Catch the case where we try to broadcast to
602 	 * offline cpus.
603 	 */
604 	if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
605 		cpumask_and(tmpmask, tmpmask, cpu_online_mask);
606 
607 	/*
608 	 * Wakeup the cpus which have an expired event.
609 	 */
610 	tick_do_broadcast(tmpmask);
611 
612 	/*
613 	 * Two reasons for reprogram:
614 	 *
615 	 * - The global event did not expire any CPU local
616 	 * events. This happens in dyntick mode, as the maximum PIT
617 	 * delta is quite small.
618 	 *
619 	 * - There are pending events on sleeping CPUs which were not
620 	 * in the event mask
621 	 */
622 	if (next_event.tv64 != KTIME_MAX) {
623 		/*
624 		 * Rearm the broadcast device. If event expired,
625 		 * repeat the above
626 		 */
627 		if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
628 			goto again;
629 	}
630 	raw_spin_unlock(&tick_broadcast_lock);
631 }
632 
633 /*
634  * Powerstate information: The system enters/leaves a state, where
635  * affected devices might stop
636  */
637 void tick_broadcast_oneshot_control(unsigned long reason)
638 {
639 	struct clock_event_device *bc, *dev;
640 	struct tick_device *td;
641 	unsigned long flags;
642 	ktime_t now;
643 	int cpu;
644 
645 	/*
646 	 * Periodic mode does not care about the enter/exit of power
647 	 * states
648 	 */
649 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
650 		return;
651 
652 	/*
653 	 * We are called with preemtion disabled from the depth of the
654 	 * idle code, so we can't be moved away.
655 	 */
656 	cpu = smp_processor_id();
657 	td = &per_cpu(tick_cpu_device, cpu);
658 	dev = td->evtdev;
659 
660 	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
661 		return;
662 
663 	bc = tick_broadcast_device.evtdev;
664 
665 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
666 	if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
667 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
668 			WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
669 			clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
670 			/*
671 			 * We only reprogram the broadcast timer if we
672 			 * did not mark ourself in the force mask and
673 			 * if the cpu local event is earlier than the
674 			 * broadcast event. If the current CPU is in
675 			 * the force mask, then we are going to be
676 			 * woken by the IPI right away.
677 			 */
678 			if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
679 			    dev->next_event.tv64 < bc->next_event.tv64)
680 				tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
681 		}
682 	} else {
683 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
684 			clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
685 			/*
686 			 * The cpu which was handling the broadcast
687 			 * timer marked this cpu in the broadcast
688 			 * pending mask and fired the broadcast
689 			 * IPI. So we are going to handle the expired
690 			 * event anyway via the broadcast IPI
691 			 * handler. No need to reprogram the timer
692 			 * with an already expired event.
693 			 */
694 			if (cpumask_test_and_clear_cpu(cpu,
695 				       tick_broadcast_pending_mask))
696 				goto out;
697 
698 			/*
699 			 * Bail out if there is no next event.
700 			 */
701 			if (dev->next_event.tv64 == KTIME_MAX)
702 				goto out;
703 			/*
704 			 * If the pending bit is not set, then we are
705 			 * either the CPU handling the broadcast
706 			 * interrupt or we got woken by something else.
707 			 *
708 			 * We are not longer in the broadcast mask, so
709 			 * if the cpu local expiry time is already
710 			 * reached, we would reprogram the cpu local
711 			 * timer with an already expired event.
712 			 *
713 			 * This can lead to a ping-pong when we return
714 			 * to idle and therefor rearm the broadcast
715 			 * timer before the cpu local timer was able
716 			 * to fire. This happens because the forced
717 			 * reprogramming makes sure that the event
718 			 * will happen in the future and depending on
719 			 * the min_delta setting this might be far
720 			 * enough out that the ping-pong starts.
721 			 *
722 			 * If the cpu local next_event has expired
723 			 * then we know that the broadcast timer
724 			 * next_event has expired as well and
725 			 * broadcast is about to be handled. So we
726 			 * avoid reprogramming and enforce that the
727 			 * broadcast handler, which did not run yet,
728 			 * will invoke the cpu local handler.
729 			 *
730 			 * We cannot call the handler directly from
731 			 * here, because we might be in a NOHZ phase
732 			 * and we did not go through the irq_enter()
733 			 * nohz fixups.
734 			 */
735 			now = ktime_get();
736 			if (dev->next_event.tv64 <= now.tv64) {
737 				cpumask_set_cpu(cpu, tick_broadcast_force_mask);
738 				goto out;
739 			}
740 			/*
741 			 * We got woken by something else. Reprogram
742 			 * the cpu local timer device.
743 			 */
744 			tick_program_event(dev->next_event, 1);
745 		}
746 	}
747 out:
748 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
749 }
750 
751 /*
752  * Reset the one shot broadcast for a cpu
753  *
754  * Called with tick_broadcast_lock held
755  */
756 static void tick_broadcast_clear_oneshot(int cpu)
757 {
758 	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
759 }
760 
761 static void tick_broadcast_init_next_event(struct cpumask *mask,
762 					   ktime_t expires)
763 {
764 	struct tick_device *td;
765 	int cpu;
766 
767 	for_each_cpu(cpu, mask) {
768 		td = &per_cpu(tick_cpu_device, cpu);
769 		if (td->evtdev)
770 			td->evtdev->next_event = expires;
771 	}
772 }
773 
774 /**
775  * tick_broadcast_setup_oneshot - setup the broadcast device
776  */
777 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
778 {
779 	int cpu = smp_processor_id();
780 
781 	/* Set it up only once ! */
782 	if (bc->event_handler != tick_handle_oneshot_broadcast) {
783 		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
784 
785 		bc->event_handler = tick_handle_oneshot_broadcast;
786 
787 		/*
788 		 * We must be careful here. There might be other CPUs
789 		 * waiting for periodic broadcast. We need to set the
790 		 * oneshot_mask bits for those and program the
791 		 * broadcast device to fire.
792 		 */
793 		cpumask_copy(tmpmask, tick_broadcast_mask);
794 		cpumask_clear_cpu(cpu, tmpmask);
795 		cpumask_or(tick_broadcast_oneshot_mask,
796 			   tick_broadcast_oneshot_mask, tmpmask);
797 
798 		if (was_periodic && !cpumask_empty(tmpmask)) {
799 			clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
800 			tick_broadcast_init_next_event(tmpmask,
801 						       tick_next_period);
802 			tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
803 		} else
804 			bc->next_event.tv64 = KTIME_MAX;
805 	} else {
806 		/*
807 		 * The first cpu which switches to oneshot mode sets
808 		 * the bit for all other cpus which are in the general
809 		 * (periodic) broadcast mask. So the bit is set and
810 		 * would prevent the first broadcast enter after this
811 		 * to program the bc device.
812 		 */
813 		tick_broadcast_clear_oneshot(cpu);
814 	}
815 }
816 
817 /*
818  * Select oneshot operating mode for the broadcast device
819  */
820 void tick_broadcast_switch_to_oneshot(void)
821 {
822 	struct clock_event_device *bc;
823 	unsigned long flags;
824 
825 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
826 
827 	tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
828 	bc = tick_broadcast_device.evtdev;
829 	if (bc)
830 		tick_broadcast_setup_oneshot(bc);
831 
832 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
833 }
834 
835 
836 /*
837  * Remove a dead CPU from broadcasting
838  */
839 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
840 {
841 	unsigned long flags;
842 	unsigned int cpu = *cpup;
843 
844 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
845 
846 	/*
847 	 * Clear the broadcast masks for the dead cpu, but do not stop
848 	 * the broadcast device!
849 	 */
850 	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
851 	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
852 	cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
853 
854 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
855 }
856 
857 /*
858  * Check, whether the broadcast device is in one shot mode
859  */
860 int tick_broadcast_oneshot_active(void)
861 {
862 	return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
863 }
864 
865 /*
866  * Check whether the broadcast device supports oneshot.
867  */
868 bool tick_broadcast_oneshot_available(void)
869 {
870 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
871 
872 	return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
873 }
874 
875 #endif
876 
877 void __init tick_broadcast_init(void)
878 {
879 	zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
880 	zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
881 	zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
882 #ifdef CONFIG_TICK_ONESHOT
883 	zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
884 	zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
885 	zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
886 #endif
887 }
888