xref: /openbmc/linux/kernel/time/tick-broadcast.c (revision 8c0b9ee8)
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/smp.h>
22 #include <linux/module.h>
23 
24 #include "tick-internal.h"
25 
26 /*
27  * Broadcast support for broken x86 hardware, where the local apic
28  * timer stops in C3 state.
29  */
30 
31 static struct tick_device tick_broadcast_device;
32 static cpumask_var_t tick_broadcast_mask;
33 static cpumask_var_t tick_broadcast_on;
34 static cpumask_var_t tmpmask;
35 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
36 static int tick_broadcast_force;
37 
38 #ifdef CONFIG_TICK_ONESHOT
39 static void tick_broadcast_clear_oneshot(int cpu);
40 #else
41 static inline void tick_broadcast_clear_oneshot(int cpu) { }
42 #endif
43 
44 /*
45  * Debugging: see timer_list.c
46  */
47 struct tick_device *tick_get_broadcast_device(void)
48 {
49 	return &tick_broadcast_device;
50 }
51 
52 struct cpumask *tick_get_broadcast_mask(void)
53 {
54 	return tick_broadcast_mask;
55 }
56 
57 /*
58  * Start the device in periodic mode
59  */
60 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
61 {
62 	if (bc)
63 		tick_setup_periodic(bc, 1);
64 }
65 
66 /*
67  * Check, if the device can be utilized as broadcast device:
68  */
69 static bool tick_check_broadcast_device(struct clock_event_device *curdev,
70 					struct clock_event_device *newdev)
71 {
72 	if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
73 	    (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
74 	    (newdev->features & CLOCK_EVT_FEAT_C3STOP))
75 		return false;
76 
77 	if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
78 	    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
79 		return false;
80 
81 	return !curdev || newdev->rating > curdev->rating;
82 }
83 
84 /*
85  * Conditionally install/replace broadcast device
86  */
87 void tick_install_broadcast_device(struct clock_event_device *dev)
88 {
89 	struct clock_event_device *cur = tick_broadcast_device.evtdev;
90 
91 	if (!tick_check_broadcast_device(cur, dev))
92 		return;
93 
94 	if (!try_module_get(dev->owner))
95 		return;
96 
97 	clockevents_exchange_device(cur, dev);
98 	if (cur)
99 		cur->event_handler = clockevents_handle_noop;
100 	tick_broadcast_device.evtdev = dev;
101 	if (!cpumask_empty(tick_broadcast_mask))
102 		tick_broadcast_start_periodic(dev);
103 	/*
104 	 * Inform all cpus about this. We might be in a situation
105 	 * where we did not switch to oneshot mode because the per cpu
106 	 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
107 	 * of a oneshot capable broadcast device. Without that
108 	 * notification the systems stays stuck in periodic mode
109 	 * forever.
110 	 */
111 	if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
112 		tick_clock_notify();
113 }
114 
115 /*
116  * Check, if the device is the broadcast device
117  */
118 int tick_is_broadcast_device(struct clock_event_device *dev)
119 {
120 	return (dev && tick_broadcast_device.evtdev == dev);
121 }
122 
123 int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
124 {
125 	int ret = -ENODEV;
126 
127 	if (tick_is_broadcast_device(dev)) {
128 		raw_spin_lock(&tick_broadcast_lock);
129 		ret = __clockevents_update_freq(dev, freq);
130 		raw_spin_unlock(&tick_broadcast_lock);
131 	}
132 	return ret;
133 }
134 
135 
136 static void err_broadcast(const struct cpumask *mask)
137 {
138 	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
139 }
140 
141 static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
142 {
143 	if (!dev->broadcast)
144 		dev->broadcast = tick_broadcast;
145 	if (!dev->broadcast) {
146 		pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
147 			     dev->name);
148 		dev->broadcast = err_broadcast;
149 	}
150 }
151 
152 /*
153  * Check, if the device is disfunctional and a place holder, which
154  * needs to be handled by the broadcast device.
155  */
156 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
157 {
158 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
159 	unsigned long flags;
160 	int ret;
161 
162 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
163 
164 	/*
165 	 * Devices might be registered with both periodic and oneshot
166 	 * mode disabled. This signals, that the device needs to be
167 	 * operated from the broadcast device and is a placeholder for
168 	 * the cpu local device.
169 	 */
170 	if (!tick_device_is_functional(dev)) {
171 		dev->event_handler = tick_handle_periodic;
172 		tick_device_setup_broadcast_func(dev);
173 		cpumask_set_cpu(cpu, tick_broadcast_mask);
174 		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
175 			tick_broadcast_start_periodic(bc);
176 		else
177 			tick_broadcast_setup_oneshot(bc);
178 		ret = 1;
179 	} else {
180 		/*
181 		 * Clear the broadcast bit for this cpu if the
182 		 * device is not power state affected.
183 		 */
184 		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
185 			cpumask_clear_cpu(cpu, tick_broadcast_mask);
186 		else
187 			tick_device_setup_broadcast_func(dev);
188 
189 		/*
190 		 * Clear the broadcast bit if the CPU is not in
191 		 * periodic broadcast on state.
192 		 */
193 		if (!cpumask_test_cpu(cpu, tick_broadcast_on))
194 			cpumask_clear_cpu(cpu, tick_broadcast_mask);
195 
196 		switch (tick_broadcast_device.mode) {
197 		case TICKDEV_MODE_ONESHOT:
198 			/*
199 			 * If the system is in oneshot mode we can
200 			 * unconditionally clear the oneshot mask bit,
201 			 * because the CPU is running and therefore
202 			 * not in an idle state which causes the power
203 			 * state affected device to stop. Let the
204 			 * caller initialize the device.
205 			 */
206 			tick_broadcast_clear_oneshot(cpu);
207 			ret = 0;
208 			break;
209 
210 		case TICKDEV_MODE_PERIODIC:
211 			/*
212 			 * If the system is in periodic mode, check
213 			 * whether the broadcast device can be
214 			 * switched off now.
215 			 */
216 			if (cpumask_empty(tick_broadcast_mask) && bc)
217 				clockevents_shutdown(bc);
218 			/*
219 			 * If we kept the cpu in the broadcast mask,
220 			 * tell the caller to leave the per cpu device
221 			 * in shutdown state. The periodic interrupt
222 			 * is delivered by the broadcast device.
223 			 */
224 			ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
225 			break;
226 		default:
227 			/* Nothing to do */
228 			ret = 0;
229 			break;
230 		}
231 	}
232 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
233 	return ret;
234 }
235 
236 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
237 int tick_receive_broadcast(void)
238 {
239 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
240 	struct clock_event_device *evt = td->evtdev;
241 
242 	if (!evt)
243 		return -ENODEV;
244 
245 	if (!evt->event_handler)
246 		return -EINVAL;
247 
248 	evt->event_handler(evt);
249 	return 0;
250 }
251 #endif
252 
253 /*
254  * Broadcast the event to the cpus, which are set in the mask (mangled).
255  */
256 static void tick_do_broadcast(struct cpumask *mask)
257 {
258 	int cpu = smp_processor_id();
259 	struct tick_device *td;
260 
261 	/*
262 	 * Check, if the current cpu is in the mask
263 	 */
264 	if (cpumask_test_cpu(cpu, mask)) {
265 		cpumask_clear_cpu(cpu, mask);
266 		td = &per_cpu(tick_cpu_device, cpu);
267 		td->evtdev->event_handler(td->evtdev);
268 	}
269 
270 	if (!cpumask_empty(mask)) {
271 		/*
272 		 * It might be necessary to actually check whether the devices
273 		 * have different broadcast functions. For now, just use the
274 		 * one of the first device. This works as long as we have this
275 		 * misfeature only on x86 (lapic)
276 		 */
277 		td = &per_cpu(tick_cpu_device, cpumask_first(mask));
278 		td->evtdev->broadcast(mask);
279 	}
280 }
281 
282 /*
283  * Periodic broadcast:
284  * - invoke the broadcast handlers
285  */
286 static void tick_do_periodic_broadcast(void)
287 {
288 	cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
289 	tick_do_broadcast(tmpmask);
290 }
291 
292 /*
293  * Event handler for periodic broadcast ticks
294  */
295 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
296 {
297 	ktime_t next;
298 
299 	raw_spin_lock(&tick_broadcast_lock);
300 
301 	tick_do_periodic_broadcast();
302 
303 	/*
304 	 * The device is in periodic mode. No reprogramming necessary:
305 	 */
306 	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
307 		goto unlock;
308 
309 	/*
310 	 * Setup the next period for devices, which do not have
311 	 * periodic mode. We read dev->next_event first and add to it
312 	 * when the event already expired. clockevents_program_event()
313 	 * sets dev->next_event only when the event is really
314 	 * programmed to the device.
315 	 */
316 	for (next = dev->next_event; ;) {
317 		next = ktime_add(next, tick_period);
318 
319 		if (!clockevents_program_event(dev, next, false))
320 			goto unlock;
321 		tick_do_periodic_broadcast();
322 	}
323 unlock:
324 	raw_spin_unlock(&tick_broadcast_lock);
325 }
326 
327 /*
328  * Powerstate information: The system enters/leaves a state, where
329  * affected devices might stop
330  */
331 static void tick_do_broadcast_on_off(unsigned long *reason)
332 {
333 	struct clock_event_device *bc, *dev;
334 	struct tick_device *td;
335 	unsigned long flags;
336 	int cpu, bc_stopped;
337 
338 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
339 
340 	cpu = smp_processor_id();
341 	td = &per_cpu(tick_cpu_device, cpu);
342 	dev = td->evtdev;
343 	bc = tick_broadcast_device.evtdev;
344 
345 	/*
346 	 * Is the device not affected by the powerstate ?
347 	 */
348 	if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
349 		goto out;
350 
351 	if (!tick_device_is_functional(dev))
352 		goto out;
353 
354 	bc_stopped = cpumask_empty(tick_broadcast_mask);
355 
356 	switch (*reason) {
357 	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
358 	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
359 		cpumask_set_cpu(cpu, tick_broadcast_on);
360 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
361 			if (tick_broadcast_device.mode ==
362 			    TICKDEV_MODE_PERIODIC)
363 				clockevents_shutdown(dev);
364 		}
365 		if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
366 			tick_broadcast_force = 1;
367 		break;
368 	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
369 		if (tick_broadcast_force)
370 			break;
371 		cpumask_clear_cpu(cpu, tick_broadcast_on);
372 		if (!tick_device_is_functional(dev))
373 			break;
374 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
375 			if (tick_broadcast_device.mode ==
376 			    TICKDEV_MODE_PERIODIC)
377 				tick_setup_periodic(dev, 0);
378 		}
379 		break;
380 	}
381 
382 	if (cpumask_empty(tick_broadcast_mask)) {
383 		if (!bc_stopped)
384 			clockevents_shutdown(bc);
385 	} else if (bc_stopped) {
386 		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
387 			tick_broadcast_start_periodic(bc);
388 		else
389 			tick_broadcast_setup_oneshot(bc);
390 	}
391 out:
392 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
393 }
394 
395 /*
396  * Powerstate information: The system enters/leaves a state, where
397  * affected devices might stop.
398  */
399 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
400 {
401 	if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
402 		printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
403 		       "offline CPU #%d\n", *oncpu);
404 	else
405 		tick_do_broadcast_on_off(&reason);
406 }
407 
408 /*
409  * Set the periodic handler depending on broadcast on/off
410  */
411 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
412 {
413 	if (!broadcast)
414 		dev->event_handler = tick_handle_periodic;
415 	else
416 		dev->event_handler = tick_handle_periodic_broadcast;
417 }
418 
419 /*
420  * Remove a CPU from broadcasting
421  */
422 void tick_shutdown_broadcast(unsigned int *cpup)
423 {
424 	struct clock_event_device *bc;
425 	unsigned long flags;
426 	unsigned int cpu = *cpup;
427 
428 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
429 
430 	bc = tick_broadcast_device.evtdev;
431 	cpumask_clear_cpu(cpu, tick_broadcast_mask);
432 	cpumask_clear_cpu(cpu, tick_broadcast_on);
433 
434 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
435 		if (bc && cpumask_empty(tick_broadcast_mask))
436 			clockevents_shutdown(bc);
437 	}
438 
439 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
440 }
441 
442 void tick_suspend_broadcast(void)
443 {
444 	struct clock_event_device *bc;
445 	unsigned long flags;
446 
447 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
448 
449 	bc = tick_broadcast_device.evtdev;
450 	if (bc)
451 		clockevents_shutdown(bc);
452 
453 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
454 }
455 
456 int tick_resume_broadcast(void)
457 {
458 	struct clock_event_device *bc;
459 	unsigned long flags;
460 	int broadcast = 0;
461 
462 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
463 
464 	bc = tick_broadcast_device.evtdev;
465 
466 	if (bc) {
467 		clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
468 
469 		switch (tick_broadcast_device.mode) {
470 		case TICKDEV_MODE_PERIODIC:
471 			if (!cpumask_empty(tick_broadcast_mask))
472 				tick_broadcast_start_periodic(bc);
473 			broadcast = cpumask_test_cpu(smp_processor_id(),
474 						     tick_broadcast_mask);
475 			break;
476 		case TICKDEV_MODE_ONESHOT:
477 			if (!cpumask_empty(tick_broadcast_mask))
478 				broadcast = tick_resume_broadcast_oneshot(bc);
479 			break;
480 		}
481 	}
482 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
483 
484 	return broadcast;
485 }
486 
487 
488 #ifdef CONFIG_TICK_ONESHOT
489 
490 static cpumask_var_t tick_broadcast_oneshot_mask;
491 static cpumask_var_t tick_broadcast_pending_mask;
492 static cpumask_var_t tick_broadcast_force_mask;
493 
494 /*
495  * Exposed for debugging: see timer_list.c
496  */
497 struct cpumask *tick_get_broadcast_oneshot_mask(void)
498 {
499 	return tick_broadcast_oneshot_mask;
500 }
501 
502 /*
503  * Called before going idle with interrupts disabled. Checks whether a
504  * broadcast event from the other core is about to happen. We detected
505  * that in tick_broadcast_oneshot_control(). The callsite can use this
506  * to avoid a deep idle transition as we are about to get the
507  * broadcast IPI right away.
508  */
509 int tick_check_broadcast_expired(void)
510 {
511 	return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
512 }
513 
514 /*
515  * Set broadcast interrupt affinity
516  */
517 static void tick_broadcast_set_affinity(struct clock_event_device *bc,
518 					const struct cpumask *cpumask)
519 {
520 	if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
521 		return;
522 
523 	if (cpumask_equal(bc->cpumask, cpumask))
524 		return;
525 
526 	bc->cpumask = cpumask;
527 	irq_set_affinity(bc->irq, bc->cpumask);
528 }
529 
530 static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
531 				    ktime_t expires, int force)
532 {
533 	int ret;
534 
535 	if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
536 		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
537 
538 	ret = clockevents_program_event(bc, expires, force);
539 	if (!ret)
540 		tick_broadcast_set_affinity(bc, cpumask_of(cpu));
541 	return ret;
542 }
543 
544 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
545 {
546 	clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
547 	return 0;
548 }
549 
550 /*
551  * Called from irq_enter() when idle was interrupted to reenable the
552  * per cpu device.
553  */
554 void tick_check_oneshot_broadcast_this_cpu(void)
555 {
556 	if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
557 		struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
558 
559 		/*
560 		 * We might be in the middle of switching over from
561 		 * periodic to oneshot. If the CPU has not yet
562 		 * switched over, leave the device alone.
563 		 */
564 		if (td->mode == TICKDEV_MODE_ONESHOT) {
565 			clockevents_set_mode(td->evtdev,
566 					     CLOCK_EVT_MODE_ONESHOT);
567 		}
568 	}
569 }
570 
571 /*
572  * Handle oneshot mode broadcasting
573  */
574 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
575 {
576 	struct tick_device *td;
577 	ktime_t now, next_event;
578 	int cpu, next_cpu = 0;
579 
580 	raw_spin_lock(&tick_broadcast_lock);
581 again:
582 	dev->next_event.tv64 = KTIME_MAX;
583 	next_event.tv64 = KTIME_MAX;
584 	cpumask_clear(tmpmask);
585 	now = ktime_get();
586 	/* Find all expired events */
587 	for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
588 		td = &per_cpu(tick_cpu_device, cpu);
589 		if (td->evtdev->next_event.tv64 <= now.tv64) {
590 			cpumask_set_cpu(cpu, tmpmask);
591 			/*
592 			 * Mark the remote cpu in the pending mask, so
593 			 * it can avoid reprogramming the cpu local
594 			 * timer in tick_broadcast_oneshot_control().
595 			 */
596 			cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
597 		} else if (td->evtdev->next_event.tv64 < next_event.tv64) {
598 			next_event.tv64 = td->evtdev->next_event.tv64;
599 			next_cpu = cpu;
600 		}
601 	}
602 
603 	/*
604 	 * Remove the current cpu from the pending mask. The event is
605 	 * delivered immediately in tick_do_broadcast() !
606 	 */
607 	cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
608 
609 	/* Take care of enforced broadcast requests */
610 	cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
611 	cpumask_clear(tick_broadcast_force_mask);
612 
613 	/*
614 	 * Sanity check. Catch the case where we try to broadcast to
615 	 * offline cpus.
616 	 */
617 	if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
618 		cpumask_and(tmpmask, tmpmask, cpu_online_mask);
619 
620 	/*
621 	 * Wakeup the cpus which have an expired event.
622 	 */
623 	tick_do_broadcast(tmpmask);
624 
625 	/*
626 	 * Two reasons for reprogram:
627 	 *
628 	 * - The global event did not expire any CPU local
629 	 * events. This happens in dyntick mode, as the maximum PIT
630 	 * delta is quite small.
631 	 *
632 	 * - There are pending events on sleeping CPUs which were not
633 	 * in the event mask
634 	 */
635 	if (next_event.tv64 != KTIME_MAX) {
636 		/*
637 		 * Rearm the broadcast device. If event expired,
638 		 * repeat the above
639 		 */
640 		if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
641 			goto again;
642 	}
643 	raw_spin_unlock(&tick_broadcast_lock);
644 }
645 
646 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
647 {
648 	if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
649 		return 0;
650 	if (bc->next_event.tv64 == KTIME_MAX)
651 		return 0;
652 	return bc->bound_on == cpu ? -EBUSY : 0;
653 }
654 
655 static void broadcast_shutdown_local(struct clock_event_device *bc,
656 				     struct clock_event_device *dev)
657 {
658 	/*
659 	 * For hrtimer based broadcasting we cannot shutdown the cpu
660 	 * local device if our own event is the first one to expire or
661 	 * if we own the broadcast timer.
662 	 */
663 	if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
664 		if (broadcast_needs_cpu(bc, smp_processor_id()))
665 			return;
666 		if (dev->next_event.tv64 < bc->next_event.tv64)
667 			return;
668 	}
669 	clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
670 }
671 
672 static void broadcast_move_bc(int deadcpu)
673 {
674 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
675 
676 	if (!bc || !broadcast_needs_cpu(bc, deadcpu))
677 		return;
678 	/* This moves the broadcast assignment to this cpu */
679 	clockevents_program_event(bc, bc->next_event, 1);
680 }
681 
682 /*
683  * Powerstate information: The system enters/leaves a state, where
684  * affected devices might stop
685  * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
686  */
687 int tick_broadcast_oneshot_control(unsigned long reason)
688 {
689 	struct clock_event_device *bc, *dev;
690 	struct tick_device *td;
691 	unsigned long flags;
692 	ktime_t now;
693 	int cpu, ret = 0;
694 
695 	/*
696 	 * Periodic mode does not care about the enter/exit of power
697 	 * states
698 	 */
699 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
700 		return 0;
701 
702 	/*
703 	 * We are called with preemtion disabled from the depth of the
704 	 * idle code, so we can't be moved away.
705 	 */
706 	cpu = smp_processor_id();
707 	td = &per_cpu(tick_cpu_device, cpu);
708 	dev = td->evtdev;
709 
710 	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
711 		return 0;
712 
713 	bc = tick_broadcast_device.evtdev;
714 
715 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
716 	if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
717 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
718 			WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
719 			broadcast_shutdown_local(bc, dev);
720 			/*
721 			 * We only reprogram the broadcast timer if we
722 			 * did not mark ourself in the force mask and
723 			 * if the cpu local event is earlier than the
724 			 * broadcast event. If the current CPU is in
725 			 * the force mask, then we are going to be
726 			 * woken by the IPI right away.
727 			 */
728 			if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
729 			    dev->next_event.tv64 < bc->next_event.tv64)
730 				tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
731 		}
732 		/*
733 		 * If the current CPU owns the hrtimer broadcast
734 		 * mechanism, it cannot go deep idle and we remove the
735 		 * CPU from the broadcast mask. We don't have to go
736 		 * through the EXIT path as the local timer is not
737 		 * shutdown.
738 		 */
739 		ret = broadcast_needs_cpu(bc, cpu);
740 		if (ret)
741 			cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
742 	} else {
743 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
744 			clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
745 			/*
746 			 * The cpu which was handling the broadcast
747 			 * timer marked this cpu in the broadcast
748 			 * pending mask and fired the broadcast
749 			 * IPI. So we are going to handle the expired
750 			 * event anyway via the broadcast IPI
751 			 * handler. No need to reprogram the timer
752 			 * with an already expired event.
753 			 */
754 			if (cpumask_test_and_clear_cpu(cpu,
755 				       tick_broadcast_pending_mask))
756 				goto out;
757 
758 			/*
759 			 * Bail out if there is no next event.
760 			 */
761 			if (dev->next_event.tv64 == KTIME_MAX)
762 				goto out;
763 			/*
764 			 * If the pending bit is not set, then we are
765 			 * either the CPU handling the broadcast
766 			 * interrupt or we got woken by something else.
767 			 *
768 			 * We are not longer in the broadcast mask, so
769 			 * if the cpu local expiry time is already
770 			 * reached, we would reprogram the cpu local
771 			 * timer with an already expired event.
772 			 *
773 			 * This can lead to a ping-pong when we return
774 			 * to idle and therefor rearm the broadcast
775 			 * timer before the cpu local timer was able
776 			 * to fire. This happens because the forced
777 			 * reprogramming makes sure that the event
778 			 * will happen in the future and depending on
779 			 * the min_delta setting this might be far
780 			 * enough out that the ping-pong starts.
781 			 *
782 			 * If the cpu local next_event has expired
783 			 * then we know that the broadcast timer
784 			 * next_event has expired as well and
785 			 * broadcast is about to be handled. So we
786 			 * avoid reprogramming and enforce that the
787 			 * broadcast handler, which did not run yet,
788 			 * will invoke the cpu local handler.
789 			 *
790 			 * We cannot call the handler directly from
791 			 * here, because we might be in a NOHZ phase
792 			 * and we did not go through the irq_enter()
793 			 * nohz fixups.
794 			 */
795 			now = ktime_get();
796 			if (dev->next_event.tv64 <= now.tv64) {
797 				cpumask_set_cpu(cpu, tick_broadcast_force_mask);
798 				goto out;
799 			}
800 			/*
801 			 * We got woken by something else. Reprogram
802 			 * the cpu local timer device.
803 			 */
804 			tick_program_event(dev->next_event, 1);
805 		}
806 	}
807 out:
808 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
809 	return ret;
810 }
811 
812 /*
813  * Reset the one shot broadcast for a cpu
814  *
815  * Called with tick_broadcast_lock held
816  */
817 static void tick_broadcast_clear_oneshot(int cpu)
818 {
819 	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
820 	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
821 }
822 
823 static void tick_broadcast_init_next_event(struct cpumask *mask,
824 					   ktime_t expires)
825 {
826 	struct tick_device *td;
827 	int cpu;
828 
829 	for_each_cpu(cpu, mask) {
830 		td = &per_cpu(tick_cpu_device, cpu);
831 		if (td->evtdev)
832 			td->evtdev->next_event = expires;
833 	}
834 }
835 
836 /**
837  * tick_broadcast_setup_oneshot - setup the broadcast device
838  */
839 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
840 {
841 	int cpu = smp_processor_id();
842 
843 	/* Set it up only once ! */
844 	if (bc->event_handler != tick_handle_oneshot_broadcast) {
845 		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
846 
847 		bc->event_handler = tick_handle_oneshot_broadcast;
848 
849 		/*
850 		 * We must be careful here. There might be other CPUs
851 		 * waiting for periodic broadcast. We need to set the
852 		 * oneshot_mask bits for those and program the
853 		 * broadcast device to fire.
854 		 */
855 		cpumask_copy(tmpmask, tick_broadcast_mask);
856 		cpumask_clear_cpu(cpu, tmpmask);
857 		cpumask_or(tick_broadcast_oneshot_mask,
858 			   tick_broadcast_oneshot_mask, tmpmask);
859 
860 		if (was_periodic && !cpumask_empty(tmpmask)) {
861 			clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
862 			tick_broadcast_init_next_event(tmpmask,
863 						       tick_next_period);
864 			tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
865 		} else
866 			bc->next_event.tv64 = KTIME_MAX;
867 	} else {
868 		/*
869 		 * The first cpu which switches to oneshot mode sets
870 		 * the bit for all other cpus which are in the general
871 		 * (periodic) broadcast mask. So the bit is set and
872 		 * would prevent the first broadcast enter after this
873 		 * to program the bc device.
874 		 */
875 		tick_broadcast_clear_oneshot(cpu);
876 	}
877 }
878 
879 /*
880  * Select oneshot operating mode for the broadcast device
881  */
882 void tick_broadcast_switch_to_oneshot(void)
883 {
884 	struct clock_event_device *bc;
885 	unsigned long flags;
886 
887 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
888 
889 	tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
890 	bc = tick_broadcast_device.evtdev;
891 	if (bc)
892 		tick_broadcast_setup_oneshot(bc);
893 
894 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
895 }
896 
897 
898 /*
899  * Remove a dead CPU from broadcasting
900  */
901 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
902 {
903 	unsigned long flags;
904 	unsigned int cpu = *cpup;
905 
906 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
907 
908 	/*
909 	 * Clear the broadcast masks for the dead cpu, but do not stop
910 	 * the broadcast device!
911 	 */
912 	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
913 	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
914 	cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
915 
916 	broadcast_move_bc(cpu);
917 
918 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
919 }
920 
921 /*
922  * Check, whether the broadcast device is in one shot mode
923  */
924 int tick_broadcast_oneshot_active(void)
925 {
926 	return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
927 }
928 
929 /*
930  * Check whether the broadcast device supports oneshot.
931  */
932 bool tick_broadcast_oneshot_available(void)
933 {
934 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
935 
936 	return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
937 }
938 
939 #endif
940 
941 void __init tick_broadcast_init(void)
942 {
943 	zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
944 	zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
945 	zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
946 #ifdef CONFIG_TICK_ONESHOT
947 	zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
948 	zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
949 	zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
950 #endif
951 }
952