xref: /openbmc/linux/kernel/softirq.c (revision b34e08d5)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  *	Distribute under GPLv2.
7  *
8  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  */
10 
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 
13 #include <linux/export.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 #include <linux/irq.h>
29 
30 #define CREATE_TRACE_POINTS
31 #include <trace/events/irq.h>
32 
33 /*
34    - No shared variables, all the data are CPU local.
35    - If a softirq needs serialization, let it serialize itself
36      by its own spinlocks.
37    - Even if softirq is serialized, only local cpu is marked for
38      execution. Hence, we get something sort of weak cpu binding.
39      Though it is still not clear, will it result in better locality
40      or will not.
41 
42    Examples:
43    - NET RX softirq. It is multithreaded and does not require
44      any global serialization.
45    - NET TX softirq. It kicks software netdevice queues, hence
46      it is logically serialized per device, but this serialization
47      is invisible to common code.
48    - Tasklets: serialized wrt itself.
49  */
50 
51 #ifndef __ARCH_IRQ_STAT
52 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
53 EXPORT_SYMBOL(irq_stat);
54 #endif
55 
56 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
57 
58 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
59 
60 const char * const softirq_to_name[NR_SOFTIRQS] = {
61 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
62 	"TASKLET", "SCHED", "HRTIMER", "RCU"
63 };
64 
65 /*
66  * we cannot loop indefinitely here to avoid userspace starvation,
67  * but we also don't want to introduce a worst case 1/HZ latency
68  * to the pending events, so lets the scheduler to balance
69  * the softirq load for us.
70  */
71 static void wakeup_softirqd(void)
72 {
73 	/* Interrupts are disabled: no need to stop preemption */
74 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
75 
76 	if (tsk && tsk->state != TASK_RUNNING)
77 		wake_up_process(tsk);
78 }
79 
80 /*
81  * preempt_count and SOFTIRQ_OFFSET usage:
82  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
83  *   softirq processing.
84  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
85  *   on local_bh_disable or local_bh_enable.
86  * This lets us distinguish between whether we are currently processing
87  * softirq and whether we just have bh disabled.
88  */
89 
90 /*
91  * This one is for softirq.c-internal use,
92  * where hardirqs are disabled legitimately:
93  */
94 #ifdef CONFIG_TRACE_IRQFLAGS
95 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
96 {
97 	unsigned long flags;
98 
99 	WARN_ON_ONCE(in_irq());
100 
101 	raw_local_irq_save(flags);
102 	/*
103 	 * The preempt tracer hooks into preempt_count_add and will break
104 	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
105 	 * is set and before current->softirq_enabled is cleared.
106 	 * We must manually increment preempt_count here and manually
107 	 * call the trace_preempt_off later.
108 	 */
109 	__preempt_count_add(cnt);
110 	/*
111 	 * Were softirqs turned off above:
112 	 */
113 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
114 		trace_softirqs_off(ip);
115 	raw_local_irq_restore(flags);
116 
117 	if (preempt_count() == cnt)
118 		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
119 }
120 EXPORT_SYMBOL(__local_bh_disable_ip);
121 #endif /* CONFIG_TRACE_IRQFLAGS */
122 
123 static void __local_bh_enable(unsigned int cnt)
124 {
125 	WARN_ON_ONCE(!irqs_disabled());
126 
127 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
128 		trace_softirqs_on(_RET_IP_);
129 	preempt_count_sub(cnt);
130 }
131 
132 /*
133  * Special-case - softirqs can safely be enabled in
134  * cond_resched_softirq(), or by __do_softirq(),
135  * without processing still-pending softirqs:
136  */
137 void _local_bh_enable(void)
138 {
139 	WARN_ON_ONCE(in_irq());
140 	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
141 }
142 EXPORT_SYMBOL(_local_bh_enable);
143 
144 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
145 {
146 	WARN_ON_ONCE(in_irq() || irqs_disabled());
147 #ifdef CONFIG_TRACE_IRQFLAGS
148 	local_irq_disable();
149 #endif
150 	/*
151 	 * Are softirqs going to be turned on now:
152 	 */
153 	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
154 		trace_softirqs_on(ip);
155 	/*
156 	 * Keep preemption disabled until we are done with
157 	 * softirq processing:
158 	 */
159 	preempt_count_sub(cnt - 1);
160 
161 	if (unlikely(!in_interrupt() && local_softirq_pending())) {
162 		/*
163 		 * Run softirq if any pending. And do it in its own stack
164 		 * as we may be calling this deep in a task call stack already.
165 		 */
166 		do_softirq();
167 	}
168 
169 	preempt_count_dec();
170 #ifdef CONFIG_TRACE_IRQFLAGS
171 	local_irq_enable();
172 #endif
173 	preempt_check_resched();
174 }
175 EXPORT_SYMBOL(__local_bh_enable_ip);
176 
177 /*
178  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
179  * but break the loop if need_resched() is set or after 2 ms.
180  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
181  * certain cases, such as stop_machine(), jiffies may cease to
182  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
183  * well to make sure we eventually return from this method.
184  *
185  * These limits have been established via experimentation.
186  * The two things to balance is latency against fairness -
187  * we want to handle softirqs as soon as possible, but they
188  * should not be able to lock up the box.
189  */
190 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
191 #define MAX_SOFTIRQ_RESTART 10
192 
193 #ifdef CONFIG_TRACE_IRQFLAGS
194 /*
195  * When we run softirqs from irq_exit() and thus on the hardirq stack we need
196  * to keep the lockdep irq context tracking as tight as possible in order to
197  * not miss-qualify lock contexts and miss possible deadlocks.
198  */
199 
200 static inline bool lockdep_softirq_start(void)
201 {
202 	bool in_hardirq = false;
203 
204 	if (trace_hardirq_context(current)) {
205 		in_hardirq = true;
206 		trace_hardirq_exit();
207 	}
208 
209 	lockdep_softirq_enter();
210 
211 	return in_hardirq;
212 }
213 
214 static inline void lockdep_softirq_end(bool in_hardirq)
215 {
216 	lockdep_softirq_exit();
217 
218 	if (in_hardirq)
219 		trace_hardirq_enter();
220 }
221 #else
222 static inline bool lockdep_softirq_start(void) { return false; }
223 static inline void lockdep_softirq_end(bool in_hardirq) { }
224 #endif
225 
226 asmlinkage void __do_softirq(void)
227 {
228 	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
229 	unsigned long old_flags = current->flags;
230 	int max_restart = MAX_SOFTIRQ_RESTART;
231 	struct softirq_action *h;
232 	bool in_hardirq;
233 	__u32 pending;
234 	int softirq_bit;
235 	int cpu;
236 
237 	/*
238 	 * Mask out PF_MEMALLOC s current task context is borrowed for the
239 	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
240 	 * again if the socket is related to swap
241 	 */
242 	current->flags &= ~PF_MEMALLOC;
243 
244 	pending = local_softirq_pending();
245 	account_irq_enter_time(current);
246 
247 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
248 	in_hardirq = lockdep_softirq_start();
249 
250 	cpu = smp_processor_id();
251 restart:
252 	/* Reset the pending bitmask before enabling irqs */
253 	set_softirq_pending(0);
254 
255 	local_irq_enable();
256 
257 	h = softirq_vec;
258 
259 	while ((softirq_bit = ffs(pending))) {
260 		unsigned int vec_nr;
261 		int prev_count;
262 
263 		h += softirq_bit - 1;
264 
265 		vec_nr = h - softirq_vec;
266 		prev_count = preempt_count();
267 
268 		kstat_incr_softirqs_this_cpu(vec_nr);
269 
270 		trace_softirq_entry(vec_nr);
271 		h->action(h);
272 		trace_softirq_exit(vec_nr);
273 		if (unlikely(prev_count != preempt_count())) {
274 			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
275 			       vec_nr, softirq_to_name[vec_nr], h->action,
276 			       prev_count, preempt_count());
277 			preempt_count_set(prev_count);
278 		}
279 		rcu_bh_qs(cpu);
280 		h++;
281 		pending >>= softirq_bit;
282 	}
283 
284 	local_irq_disable();
285 
286 	pending = local_softirq_pending();
287 	if (pending) {
288 		if (time_before(jiffies, end) && !need_resched() &&
289 		    --max_restart)
290 			goto restart;
291 
292 		wakeup_softirqd();
293 	}
294 
295 	lockdep_softirq_end(in_hardirq);
296 	account_irq_exit_time(current);
297 	__local_bh_enable(SOFTIRQ_OFFSET);
298 	WARN_ON_ONCE(in_interrupt());
299 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
300 }
301 
302 asmlinkage void do_softirq(void)
303 {
304 	__u32 pending;
305 	unsigned long flags;
306 
307 	if (in_interrupt())
308 		return;
309 
310 	local_irq_save(flags);
311 
312 	pending = local_softirq_pending();
313 
314 	if (pending)
315 		do_softirq_own_stack();
316 
317 	local_irq_restore(flags);
318 }
319 
320 /*
321  * Enter an interrupt context.
322  */
323 void irq_enter(void)
324 {
325 	rcu_irq_enter();
326 	if (is_idle_task(current) && !in_interrupt()) {
327 		/*
328 		 * Prevent raise_softirq from needlessly waking up ksoftirqd
329 		 * here, as softirq will be serviced on return from interrupt.
330 		 */
331 		local_bh_disable();
332 		tick_irq_enter();
333 		_local_bh_enable();
334 	}
335 
336 	__irq_enter();
337 }
338 
339 static inline void invoke_softirq(void)
340 {
341 	if (!force_irqthreads) {
342 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
343 		/*
344 		 * We can safely execute softirq on the current stack if
345 		 * it is the irq stack, because it should be near empty
346 		 * at this stage.
347 		 */
348 		__do_softirq();
349 #else
350 		/*
351 		 * Otherwise, irq_exit() is called on the task stack that can
352 		 * be potentially deep already. So call softirq in its own stack
353 		 * to prevent from any overrun.
354 		 */
355 		do_softirq_own_stack();
356 #endif
357 	} else {
358 		wakeup_softirqd();
359 	}
360 }
361 
362 static inline void tick_irq_exit(void)
363 {
364 #ifdef CONFIG_NO_HZ_COMMON
365 	int cpu = smp_processor_id();
366 
367 	/* Make sure that timer wheel updates are propagated */
368 	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
369 		if (!in_interrupt())
370 			tick_nohz_irq_exit();
371 	}
372 #endif
373 }
374 
375 /*
376  * Exit an interrupt context. Process softirqs if needed and possible:
377  */
378 void irq_exit(void)
379 {
380 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
381 	local_irq_disable();
382 #else
383 	WARN_ON_ONCE(!irqs_disabled());
384 #endif
385 
386 	account_irq_exit_time(current);
387 	preempt_count_sub(HARDIRQ_OFFSET);
388 	if (!in_interrupt() && local_softirq_pending())
389 		invoke_softirq();
390 
391 	tick_irq_exit();
392 	rcu_irq_exit();
393 	trace_hardirq_exit(); /* must be last! */
394 }
395 
396 /*
397  * This function must run with irqs disabled!
398  */
399 inline void raise_softirq_irqoff(unsigned int nr)
400 {
401 	__raise_softirq_irqoff(nr);
402 
403 	/*
404 	 * If we're in an interrupt or softirq, we're done
405 	 * (this also catches softirq-disabled code). We will
406 	 * actually run the softirq once we return from
407 	 * the irq or softirq.
408 	 *
409 	 * Otherwise we wake up ksoftirqd to make sure we
410 	 * schedule the softirq soon.
411 	 */
412 	if (!in_interrupt())
413 		wakeup_softirqd();
414 }
415 
416 void raise_softirq(unsigned int nr)
417 {
418 	unsigned long flags;
419 
420 	local_irq_save(flags);
421 	raise_softirq_irqoff(nr);
422 	local_irq_restore(flags);
423 }
424 
425 void __raise_softirq_irqoff(unsigned int nr)
426 {
427 	trace_softirq_raise(nr);
428 	or_softirq_pending(1UL << nr);
429 }
430 
431 void open_softirq(int nr, void (*action)(struct softirq_action *))
432 {
433 	softirq_vec[nr].action = action;
434 }
435 
436 /*
437  * Tasklets
438  */
439 struct tasklet_head {
440 	struct tasklet_struct *head;
441 	struct tasklet_struct **tail;
442 };
443 
444 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
445 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
446 
447 void __tasklet_schedule(struct tasklet_struct *t)
448 {
449 	unsigned long flags;
450 
451 	local_irq_save(flags);
452 	t->next = NULL;
453 	*__this_cpu_read(tasklet_vec.tail) = t;
454 	__this_cpu_write(tasklet_vec.tail, &(t->next));
455 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
456 	local_irq_restore(flags);
457 }
458 EXPORT_SYMBOL(__tasklet_schedule);
459 
460 void __tasklet_hi_schedule(struct tasklet_struct *t)
461 {
462 	unsigned long flags;
463 
464 	local_irq_save(flags);
465 	t->next = NULL;
466 	*__this_cpu_read(tasklet_hi_vec.tail) = t;
467 	__this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
468 	raise_softirq_irqoff(HI_SOFTIRQ);
469 	local_irq_restore(flags);
470 }
471 EXPORT_SYMBOL(__tasklet_hi_schedule);
472 
473 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
474 {
475 	BUG_ON(!irqs_disabled());
476 
477 	t->next = __this_cpu_read(tasklet_hi_vec.head);
478 	__this_cpu_write(tasklet_hi_vec.head, t);
479 	__raise_softirq_irqoff(HI_SOFTIRQ);
480 }
481 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
482 
483 static void tasklet_action(struct softirq_action *a)
484 {
485 	struct tasklet_struct *list;
486 
487 	local_irq_disable();
488 	list = __this_cpu_read(tasklet_vec.head);
489 	__this_cpu_write(tasklet_vec.head, NULL);
490 	__this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
491 	local_irq_enable();
492 
493 	while (list) {
494 		struct tasklet_struct *t = list;
495 
496 		list = list->next;
497 
498 		if (tasklet_trylock(t)) {
499 			if (!atomic_read(&t->count)) {
500 				if (!test_and_clear_bit(TASKLET_STATE_SCHED,
501 							&t->state))
502 					BUG();
503 				t->func(t->data);
504 				tasklet_unlock(t);
505 				continue;
506 			}
507 			tasklet_unlock(t);
508 		}
509 
510 		local_irq_disable();
511 		t->next = NULL;
512 		*__this_cpu_read(tasklet_vec.tail) = t;
513 		__this_cpu_write(tasklet_vec.tail, &(t->next));
514 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
515 		local_irq_enable();
516 	}
517 }
518 
519 static void tasklet_hi_action(struct softirq_action *a)
520 {
521 	struct tasklet_struct *list;
522 
523 	local_irq_disable();
524 	list = __this_cpu_read(tasklet_hi_vec.head);
525 	__this_cpu_write(tasklet_hi_vec.head, NULL);
526 	__this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
527 	local_irq_enable();
528 
529 	while (list) {
530 		struct tasklet_struct *t = list;
531 
532 		list = list->next;
533 
534 		if (tasklet_trylock(t)) {
535 			if (!atomic_read(&t->count)) {
536 				if (!test_and_clear_bit(TASKLET_STATE_SCHED,
537 							&t->state))
538 					BUG();
539 				t->func(t->data);
540 				tasklet_unlock(t);
541 				continue;
542 			}
543 			tasklet_unlock(t);
544 		}
545 
546 		local_irq_disable();
547 		t->next = NULL;
548 		*__this_cpu_read(tasklet_hi_vec.tail) = t;
549 		__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
550 		__raise_softirq_irqoff(HI_SOFTIRQ);
551 		local_irq_enable();
552 	}
553 }
554 
555 void tasklet_init(struct tasklet_struct *t,
556 		  void (*func)(unsigned long), unsigned long data)
557 {
558 	t->next = NULL;
559 	t->state = 0;
560 	atomic_set(&t->count, 0);
561 	t->func = func;
562 	t->data = data;
563 }
564 EXPORT_SYMBOL(tasklet_init);
565 
566 void tasklet_kill(struct tasklet_struct *t)
567 {
568 	if (in_interrupt())
569 		pr_notice("Attempt to kill tasklet from interrupt\n");
570 
571 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
572 		do {
573 			yield();
574 		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
575 	}
576 	tasklet_unlock_wait(t);
577 	clear_bit(TASKLET_STATE_SCHED, &t->state);
578 }
579 EXPORT_SYMBOL(tasklet_kill);
580 
581 /*
582  * tasklet_hrtimer
583  */
584 
585 /*
586  * The trampoline is called when the hrtimer expires. It schedules a tasklet
587  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
588  * hrtimer callback, but from softirq context.
589  */
590 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
591 {
592 	struct tasklet_hrtimer *ttimer =
593 		container_of(timer, struct tasklet_hrtimer, timer);
594 
595 	tasklet_hi_schedule(&ttimer->tasklet);
596 	return HRTIMER_NORESTART;
597 }
598 
599 /*
600  * Helper function which calls the hrtimer callback from
601  * tasklet/softirq context
602  */
603 static void __tasklet_hrtimer_trampoline(unsigned long data)
604 {
605 	struct tasklet_hrtimer *ttimer = (void *)data;
606 	enum hrtimer_restart restart;
607 
608 	restart = ttimer->function(&ttimer->timer);
609 	if (restart != HRTIMER_NORESTART)
610 		hrtimer_restart(&ttimer->timer);
611 }
612 
613 /**
614  * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
615  * @ttimer:	 tasklet_hrtimer which is initialized
616  * @function:	 hrtimer callback function which gets called from softirq context
617  * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
618  * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
619  */
620 void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
621 			  enum hrtimer_restart (*function)(struct hrtimer *),
622 			  clockid_t which_clock, enum hrtimer_mode mode)
623 {
624 	hrtimer_init(&ttimer->timer, which_clock, mode);
625 	ttimer->timer.function = __hrtimer_tasklet_trampoline;
626 	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
627 		     (unsigned long)ttimer);
628 	ttimer->function = function;
629 }
630 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
631 
632 void __init softirq_init(void)
633 {
634 	int cpu;
635 
636 	for_each_possible_cpu(cpu) {
637 		per_cpu(tasklet_vec, cpu).tail =
638 			&per_cpu(tasklet_vec, cpu).head;
639 		per_cpu(tasklet_hi_vec, cpu).tail =
640 			&per_cpu(tasklet_hi_vec, cpu).head;
641 	}
642 
643 	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
644 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
645 }
646 
647 static int ksoftirqd_should_run(unsigned int cpu)
648 {
649 	return local_softirq_pending();
650 }
651 
652 static void run_ksoftirqd(unsigned int cpu)
653 {
654 	local_irq_disable();
655 	if (local_softirq_pending()) {
656 		/*
657 		 * We can safely run softirq on inline stack, as we are not deep
658 		 * in the task stack here.
659 		 */
660 		__do_softirq();
661 		rcu_note_context_switch(cpu);
662 		local_irq_enable();
663 		cond_resched();
664 		return;
665 	}
666 	local_irq_enable();
667 }
668 
669 #ifdef CONFIG_HOTPLUG_CPU
670 /*
671  * tasklet_kill_immediate is called to remove a tasklet which can already be
672  * scheduled for execution on @cpu.
673  *
674  * Unlike tasklet_kill, this function removes the tasklet
675  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
676  *
677  * When this function is called, @cpu must be in the CPU_DEAD state.
678  */
679 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
680 {
681 	struct tasklet_struct **i;
682 
683 	BUG_ON(cpu_online(cpu));
684 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
685 
686 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
687 		return;
688 
689 	/* CPU is dead, so no lock needed. */
690 	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
691 		if (*i == t) {
692 			*i = t->next;
693 			/* If this was the tail element, move the tail ptr */
694 			if (*i == NULL)
695 				per_cpu(tasklet_vec, cpu).tail = i;
696 			return;
697 		}
698 	}
699 	BUG();
700 }
701 
702 static void takeover_tasklets(unsigned int cpu)
703 {
704 	/* CPU is dead, so no lock needed. */
705 	local_irq_disable();
706 
707 	/* Find end, append list for that CPU. */
708 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
709 		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
710 		this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
711 		per_cpu(tasklet_vec, cpu).head = NULL;
712 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
713 	}
714 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
715 
716 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
717 		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
718 		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
719 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
720 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
721 	}
722 	raise_softirq_irqoff(HI_SOFTIRQ);
723 
724 	local_irq_enable();
725 }
726 #endif /* CONFIG_HOTPLUG_CPU */
727 
728 static int cpu_callback(struct notifier_block *nfb, unsigned long action,
729 			void *hcpu)
730 {
731 	switch (action) {
732 #ifdef CONFIG_HOTPLUG_CPU
733 	case CPU_DEAD:
734 	case CPU_DEAD_FROZEN:
735 		takeover_tasklets((unsigned long)hcpu);
736 		break;
737 #endif /* CONFIG_HOTPLUG_CPU */
738 	}
739 	return NOTIFY_OK;
740 }
741 
742 static struct notifier_block cpu_nfb = {
743 	.notifier_call = cpu_callback
744 };
745 
746 static struct smp_hotplug_thread softirq_threads = {
747 	.store			= &ksoftirqd,
748 	.thread_should_run	= ksoftirqd_should_run,
749 	.thread_fn		= run_ksoftirqd,
750 	.thread_comm		= "ksoftirqd/%u",
751 };
752 
753 static __init int spawn_ksoftirqd(void)
754 {
755 	register_cpu_notifier(&cpu_nfb);
756 
757 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
758 
759 	return 0;
760 }
761 early_initcall(spawn_ksoftirqd);
762 
763 /*
764  * [ These __weak aliases are kept in a separate compilation unit, so that
765  *   GCC does not inline them incorrectly. ]
766  */
767 
768 int __init __weak early_irq_init(void)
769 {
770 	return 0;
771 }
772 
773 int __init __weak arch_probe_nr_irqs(void)
774 {
775 	return NR_IRQS_LEGACY;
776 }
777 
778 int __init __weak arch_early_irq_init(void)
779 {
780 	return 0;
781 }
782