xref: /openbmc/linux/kernel/softirq.c (revision e23feb16)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  *	Distribute under GPLv2.
7  *
8  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  *
10  *	Remote softirq infrastructure is by Jens Axboe.
11  */
12 
13 #include <linux/export.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 
29 #define CREATE_TRACE_POINTS
30 #include <trace/events/irq.h>
31 
32 #include <asm/irq.h>
33 /*
34    - No shared variables, all the data are CPU local.
35    - If a softirq needs serialization, let it serialize itself
36      by its own spinlocks.
37    - Even if softirq is serialized, only local cpu is marked for
38      execution. Hence, we get something sort of weak cpu binding.
39      Though it is still not clear, will it result in better locality
40      or will not.
41 
42    Examples:
43    - NET RX softirq. It is multithreaded and does not require
44      any global serialization.
45    - NET TX softirq. It kicks software netdevice queues, hence
46      it is logically serialized per device, but this serialization
47      is invisible to common code.
48    - Tasklets: serialized wrt itself.
49  */
50 
51 #ifndef __ARCH_IRQ_STAT
52 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
53 EXPORT_SYMBOL(irq_stat);
54 #endif
55 
56 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
57 
58 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
59 
60 char *softirq_to_name[NR_SOFTIRQS] = {
61 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
62 	"TASKLET", "SCHED", "HRTIMER", "RCU"
63 };
64 
65 /*
66  * we cannot loop indefinitely here to avoid userspace starvation,
67  * but we also don't want to introduce a worst case 1/HZ latency
68  * to the pending events, so lets the scheduler to balance
69  * the softirq load for us.
70  */
71 static void wakeup_softirqd(void)
72 {
73 	/* Interrupts are disabled: no need to stop preemption */
74 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
75 
76 	if (tsk && tsk->state != TASK_RUNNING)
77 		wake_up_process(tsk);
78 }
79 
80 /*
81  * preempt_count and SOFTIRQ_OFFSET usage:
82  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
83  *   softirq processing.
84  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
85  *   on local_bh_disable or local_bh_enable.
86  * This lets us distinguish between whether we are currently processing
87  * softirq and whether we just have bh disabled.
88  */
89 
90 /*
91  * This one is for softirq.c-internal use,
92  * where hardirqs are disabled legitimately:
93  */
94 #ifdef CONFIG_TRACE_IRQFLAGS
95 static void __local_bh_disable(unsigned long ip, unsigned int cnt)
96 {
97 	unsigned long flags;
98 
99 	WARN_ON_ONCE(in_irq());
100 
101 	raw_local_irq_save(flags);
102 	/*
103 	 * The preempt tracer hooks into add_preempt_count and will break
104 	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
105 	 * is set and before current->softirq_enabled is cleared.
106 	 * We must manually increment preempt_count here and manually
107 	 * call the trace_preempt_off later.
108 	 */
109 	preempt_count() += cnt;
110 	/*
111 	 * Were softirqs turned off above:
112 	 */
113 	if (softirq_count() == cnt)
114 		trace_softirqs_off(ip);
115 	raw_local_irq_restore(flags);
116 
117 	if (preempt_count() == cnt)
118 		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
119 }
120 #else /* !CONFIG_TRACE_IRQFLAGS */
121 static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
122 {
123 	add_preempt_count(cnt);
124 	barrier();
125 }
126 #endif /* CONFIG_TRACE_IRQFLAGS */
127 
128 void local_bh_disable(void)
129 {
130 	__local_bh_disable(_RET_IP_, SOFTIRQ_DISABLE_OFFSET);
131 }
132 
133 EXPORT_SYMBOL(local_bh_disable);
134 
135 static void __local_bh_enable(unsigned int cnt)
136 {
137 	WARN_ON_ONCE(in_irq());
138 	WARN_ON_ONCE(!irqs_disabled());
139 
140 	if (softirq_count() == cnt)
141 		trace_softirqs_on(_RET_IP_);
142 	sub_preempt_count(cnt);
143 }
144 
145 /*
146  * Special-case - softirqs can safely be enabled in
147  * cond_resched_softirq(), or by __do_softirq(),
148  * without processing still-pending softirqs:
149  */
150 void _local_bh_enable(void)
151 {
152 	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
153 }
154 
155 EXPORT_SYMBOL(_local_bh_enable);
156 
157 static inline void _local_bh_enable_ip(unsigned long ip)
158 {
159 	WARN_ON_ONCE(in_irq() || irqs_disabled());
160 #ifdef CONFIG_TRACE_IRQFLAGS
161 	local_irq_disable();
162 #endif
163 	/*
164 	 * Are softirqs going to be turned on now:
165 	 */
166 	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
167 		trace_softirqs_on(ip);
168 	/*
169 	 * Keep preemption disabled until we are done with
170 	 * softirq processing:
171  	 */
172 	sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
173 
174 	if (unlikely(!in_interrupt() && local_softirq_pending()))
175 		do_softirq();
176 
177 	dec_preempt_count();
178 #ifdef CONFIG_TRACE_IRQFLAGS
179 	local_irq_enable();
180 #endif
181 	preempt_check_resched();
182 }
183 
184 void local_bh_enable(void)
185 {
186 	_local_bh_enable_ip(_RET_IP_);
187 }
188 EXPORT_SYMBOL(local_bh_enable);
189 
190 void local_bh_enable_ip(unsigned long ip)
191 {
192 	_local_bh_enable_ip(ip);
193 }
194 EXPORT_SYMBOL(local_bh_enable_ip);
195 
196 /*
197  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
198  * but break the loop if need_resched() is set or after 2 ms.
199  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
200  * certain cases, such as stop_machine(), jiffies may cease to
201  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
202  * well to make sure we eventually return from this method.
203  *
204  * These limits have been established via experimentation.
205  * The two things to balance is latency against fairness -
206  * we want to handle softirqs as soon as possible, but they
207  * should not be able to lock up the box.
208  */
209 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
210 #define MAX_SOFTIRQ_RESTART 10
211 
212 asmlinkage void __do_softirq(void)
213 {
214 	struct softirq_action *h;
215 	__u32 pending;
216 	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
217 	int cpu;
218 	unsigned long old_flags = current->flags;
219 	int max_restart = MAX_SOFTIRQ_RESTART;
220 
221 	/*
222 	 * Mask out PF_MEMALLOC s current task context is borrowed for the
223 	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
224 	 * again if the socket is related to swap
225 	 */
226 	current->flags &= ~PF_MEMALLOC;
227 
228 	pending = local_softirq_pending();
229 	account_irq_enter_time(current);
230 
231 	__local_bh_disable(_RET_IP_, SOFTIRQ_OFFSET);
232 	lockdep_softirq_enter();
233 
234 	cpu = smp_processor_id();
235 restart:
236 	/* Reset the pending bitmask before enabling irqs */
237 	set_softirq_pending(0);
238 
239 	local_irq_enable();
240 
241 	h = softirq_vec;
242 
243 	do {
244 		if (pending & 1) {
245 			unsigned int vec_nr = h - softirq_vec;
246 			int prev_count = preempt_count();
247 
248 			kstat_incr_softirqs_this_cpu(vec_nr);
249 
250 			trace_softirq_entry(vec_nr);
251 			h->action(h);
252 			trace_softirq_exit(vec_nr);
253 			if (unlikely(prev_count != preempt_count())) {
254 				printk(KERN_ERR "huh, entered softirq %u %s %p"
255 				       "with preempt_count %08x,"
256 				       " exited with %08x?\n", vec_nr,
257 				       softirq_to_name[vec_nr], h->action,
258 				       prev_count, preempt_count());
259 				preempt_count() = prev_count;
260 			}
261 
262 			rcu_bh_qs(cpu);
263 		}
264 		h++;
265 		pending >>= 1;
266 	} while (pending);
267 
268 	local_irq_disable();
269 
270 	pending = local_softirq_pending();
271 	if (pending) {
272 		if (time_before(jiffies, end) && !need_resched() &&
273 		    --max_restart)
274 			goto restart;
275 
276 		wakeup_softirqd();
277 	}
278 
279 	lockdep_softirq_exit();
280 
281 	account_irq_exit_time(current);
282 	__local_bh_enable(SOFTIRQ_OFFSET);
283 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
284 }
285 
286 #ifndef __ARCH_HAS_DO_SOFTIRQ
287 
288 asmlinkage void do_softirq(void)
289 {
290 	__u32 pending;
291 	unsigned long flags;
292 
293 	if (in_interrupt())
294 		return;
295 
296 	local_irq_save(flags);
297 
298 	pending = local_softirq_pending();
299 
300 	if (pending)
301 		__do_softirq();
302 
303 	local_irq_restore(flags);
304 }
305 
306 #endif
307 
308 /*
309  * Enter an interrupt context.
310  */
311 void irq_enter(void)
312 {
313 	int cpu = smp_processor_id();
314 
315 	rcu_irq_enter();
316 	if (is_idle_task(current) && !in_interrupt()) {
317 		/*
318 		 * Prevent raise_softirq from needlessly waking up ksoftirqd
319 		 * here, as softirq will be serviced on return from interrupt.
320 		 */
321 		local_bh_disable();
322 		tick_check_idle(cpu);
323 		_local_bh_enable();
324 	}
325 
326 	__irq_enter();
327 }
328 
329 static inline void invoke_softirq(void)
330 {
331 	if (!force_irqthreads) {
332 		/*
333 		 * We can safely execute softirq on the current stack if
334 		 * it is the irq stack, because it should be near empty
335 		 * at this stage. But we have no way to know if the arch
336 		 * calls irq_exit() on the irq stack. So call softirq
337 		 * in its own stack to prevent from any overrun on top
338 		 * of a potentially deep task stack.
339 		 */
340 		do_softirq();
341 	} else {
342 		wakeup_softirqd();
343 	}
344 }
345 
346 static inline void tick_irq_exit(void)
347 {
348 #ifdef CONFIG_NO_HZ_COMMON
349 	int cpu = smp_processor_id();
350 
351 	/* Make sure that timer wheel updates are propagated */
352 	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
353 		if (!in_interrupt())
354 			tick_nohz_irq_exit();
355 	}
356 #endif
357 }
358 
359 /*
360  * Exit an interrupt context. Process softirqs if needed and possible:
361  */
362 void irq_exit(void)
363 {
364 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
365 	local_irq_disable();
366 #else
367 	WARN_ON_ONCE(!irqs_disabled());
368 #endif
369 
370 	account_irq_exit_time(current);
371 	trace_hardirq_exit();
372 	sub_preempt_count(HARDIRQ_OFFSET);
373 	if (!in_interrupt() && local_softirq_pending())
374 		invoke_softirq();
375 
376 	tick_irq_exit();
377 	rcu_irq_exit();
378 }
379 
380 /*
381  * This function must run with irqs disabled!
382  */
383 inline void raise_softirq_irqoff(unsigned int nr)
384 {
385 	__raise_softirq_irqoff(nr);
386 
387 	/*
388 	 * If we're in an interrupt or softirq, we're done
389 	 * (this also catches softirq-disabled code). We will
390 	 * actually run the softirq once we return from
391 	 * the irq or softirq.
392 	 *
393 	 * Otherwise we wake up ksoftirqd to make sure we
394 	 * schedule the softirq soon.
395 	 */
396 	if (!in_interrupt())
397 		wakeup_softirqd();
398 }
399 
400 void raise_softirq(unsigned int nr)
401 {
402 	unsigned long flags;
403 
404 	local_irq_save(flags);
405 	raise_softirq_irqoff(nr);
406 	local_irq_restore(flags);
407 }
408 
409 void __raise_softirq_irqoff(unsigned int nr)
410 {
411 	trace_softirq_raise(nr);
412 	or_softirq_pending(1UL << nr);
413 }
414 
415 void open_softirq(int nr, void (*action)(struct softirq_action *))
416 {
417 	softirq_vec[nr].action = action;
418 }
419 
420 /*
421  * Tasklets
422  */
423 struct tasklet_head
424 {
425 	struct tasklet_struct *head;
426 	struct tasklet_struct **tail;
427 };
428 
429 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
430 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
431 
432 void __tasklet_schedule(struct tasklet_struct *t)
433 {
434 	unsigned long flags;
435 
436 	local_irq_save(flags);
437 	t->next = NULL;
438 	*__this_cpu_read(tasklet_vec.tail) = t;
439 	__this_cpu_write(tasklet_vec.tail, &(t->next));
440 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
441 	local_irq_restore(flags);
442 }
443 
444 EXPORT_SYMBOL(__tasklet_schedule);
445 
446 void __tasklet_hi_schedule(struct tasklet_struct *t)
447 {
448 	unsigned long flags;
449 
450 	local_irq_save(flags);
451 	t->next = NULL;
452 	*__this_cpu_read(tasklet_hi_vec.tail) = t;
453 	__this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
454 	raise_softirq_irqoff(HI_SOFTIRQ);
455 	local_irq_restore(flags);
456 }
457 
458 EXPORT_SYMBOL(__tasklet_hi_schedule);
459 
460 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
461 {
462 	BUG_ON(!irqs_disabled());
463 
464 	t->next = __this_cpu_read(tasklet_hi_vec.head);
465 	__this_cpu_write(tasklet_hi_vec.head, t);
466 	__raise_softirq_irqoff(HI_SOFTIRQ);
467 }
468 
469 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
470 
471 static void tasklet_action(struct softirq_action *a)
472 {
473 	struct tasklet_struct *list;
474 
475 	local_irq_disable();
476 	list = __this_cpu_read(tasklet_vec.head);
477 	__this_cpu_write(tasklet_vec.head, NULL);
478 	__this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
479 	local_irq_enable();
480 
481 	while (list) {
482 		struct tasklet_struct *t = list;
483 
484 		list = list->next;
485 
486 		if (tasklet_trylock(t)) {
487 			if (!atomic_read(&t->count)) {
488 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
489 					BUG();
490 				t->func(t->data);
491 				tasklet_unlock(t);
492 				continue;
493 			}
494 			tasklet_unlock(t);
495 		}
496 
497 		local_irq_disable();
498 		t->next = NULL;
499 		*__this_cpu_read(tasklet_vec.tail) = t;
500 		__this_cpu_write(tasklet_vec.tail, &(t->next));
501 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
502 		local_irq_enable();
503 	}
504 }
505 
506 static void tasklet_hi_action(struct softirq_action *a)
507 {
508 	struct tasklet_struct *list;
509 
510 	local_irq_disable();
511 	list = __this_cpu_read(tasklet_hi_vec.head);
512 	__this_cpu_write(tasklet_hi_vec.head, NULL);
513 	__this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
514 	local_irq_enable();
515 
516 	while (list) {
517 		struct tasklet_struct *t = list;
518 
519 		list = list->next;
520 
521 		if (tasklet_trylock(t)) {
522 			if (!atomic_read(&t->count)) {
523 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
524 					BUG();
525 				t->func(t->data);
526 				tasklet_unlock(t);
527 				continue;
528 			}
529 			tasklet_unlock(t);
530 		}
531 
532 		local_irq_disable();
533 		t->next = NULL;
534 		*__this_cpu_read(tasklet_hi_vec.tail) = t;
535 		__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
536 		__raise_softirq_irqoff(HI_SOFTIRQ);
537 		local_irq_enable();
538 	}
539 }
540 
541 
542 void tasklet_init(struct tasklet_struct *t,
543 		  void (*func)(unsigned long), unsigned long data)
544 {
545 	t->next = NULL;
546 	t->state = 0;
547 	atomic_set(&t->count, 0);
548 	t->func = func;
549 	t->data = data;
550 }
551 
552 EXPORT_SYMBOL(tasklet_init);
553 
554 void tasklet_kill(struct tasklet_struct *t)
555 {
556 	if (in_interrupt())
557 		printk("Attempt to kill tasklet from interrupt\n");
558 
559 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
560 		do {
561 			yield();
562 		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
563 	}
564 	tasklet_unlock_wait(t);
565 	clear_bit(TASKLET_STATE_SCHED, &t->state);
566 }
567 
568 EXPORT_SYMBOL(tasklet_kill);
569 
570 /*
571  * tasklet_hrtimer
572  */
573 
574 /*
575  * The trampoline is called when the hrtimer expires. It schedules a tasklet
576  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
577  * hrtimer callback, but from softirq context.
578  */
579 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
580 {
581 	struct tasklet_hrtimer *ttimer =
582 		container_of(timer, struct tasklet_hrtimer, timer);
583 
584 	tasklet_hi_schedule(&ttimer->tasklet);
585 	return HRTIMER_NORESTART;
586 }
587 
588 /*
589  * Helper function which calls the hrtimer callback from
590  * tasklet/softirq context
591  */
592 static void __tasklet_hrtimer_trampoline(unsigned long data)
593 {
594 	struct tasklet_hrtimer *ttimer = (void *)data;
595 	enum hrtimer_restart restart;
596 
597 	restart = ttimer->function(&ttimer->timer);
598 	if (restart != HRTIMER_NORESTART)
599 		hrtimer_restart(&ttimer->timer);
600 }
601 
602 /**
603  * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
604  * @ttimer:	 tasklet_hrtimer which is initialized
605  * @function:	 hrtimer callback function which gets called from softirq context
606  * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
607  * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
608  */
609 void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
610 			  enum hrtimer_restart (*function)(struct hrtimer *),
611 			  clockid_t which_clock, enum hrtimer_mode mode)
612 {
613 	hrtimer_init(&ttimer->timer, which_clock, mode);
614 	ttimer->timer.function = __hrtimer_tasklet_trampoline;
615 	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
616 		     (unsigned long)ttimer);
617 	ttimer->function = function;
618 }
619 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
620 
621 /*
622  * Remote softirq bits
623  */
624 
625 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
626 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
627 
628 static void __local_trigger(struct call_single_data *cp, int softirq)
629 {
630 	struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
631 
632 	list_add_tail(&cp->list, head);
633 
634 	/* Trigger the softirq only if the list was previously empty.  */
635 	if (head->next == &cp->list)
636 		raise_softirq_irqoff(softirq);
637 }
638 
639 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
640 static void remote_softirq_receive(void *data)
641 {
642 	struct call_single_data *cp = data;
643 	unsigned long flags;
644 	int softirq;
645 
646 	softirq = *(int *)cp->info;
647 	local_irq_save(flags);
648 	__local_trigger(cp, softirq);
649 	local_irq_restore(flags);
650 }
651 
652 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
653 {
654 	if (cpu_online(cpu)) {
655 		cp->func = remote_softirq_receive;
656 		cp->info = &softirq;
657 		cp->flags = 0;
658 
659 		__smp_call_function_single(cpu, cp, 0);
660 		return 0;
661 	}
662 	return 1;
663 }
664 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
665 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
666 {
667 	return 1;
668 }
669 #endif
670 
671 /**
672  * __send_remote_softirq - try to schedule softirq work on a remote cpu
673  * @cp: private SMP call function data area
674  * @cpu: the remote cpu
675  * @this_cpu: the currently executing cpu
676  * @softirq: the softirq for the work
677  *
678  * Attempt to schedule softirq work on a remote cpu.  If this cannot be
679  * done, the work is instead queued up on the local cpu.
680  *
681  * Interrupts must be disabled.
682  */
683 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
684 {
685 	if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
686 		__local_trigger(cp, softirq);
687 }
688 EXPORT_SYMBOL(__send_remote_softirq);
689 
690 /**
691  * send_remote_softirq - try to schedule softirq work on a remote cpu
692  * @cp: private SMP call function data area
693  * @cpu: the remote cpu
694  * @softirq: the softirq for the work
695  *
696  * Like __send_remote_softirq except that disabling interrupts and
697  * computing the current cpu is done for the caller.
698  */
699 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
700 {
701 	unsigned long flags;
702 	int this_cpu;
703 
704 	local_irq_save(flags);
705 	this_cpu = smp_processor_id();
706 	__send_remote_softirq(cp, cpu, this_cpu, softirq);
707 	local_irq_restore(flags);
708 }
709 EXPORT_SYMBOL(send_remote_softirq);
710 
711 static int remote_softirq_cpu_notify(struct notifier_block *self,
712 					       unsigned long action, void *hcpu)
713 {
714 	/*
715 	 * If a CPU goes away, splice its entries to the current CPU
716 	 * and trigger a run of the softirq
717 	 */
718 	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
719 		int cpu = (unsigned long) hcpu;
720 		int i;
721 
722 		local_irq_disable();
723 		for (i = 0; i < NR_SOFTIRQS; i++) {
724 			struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
725 			struct list_head *local_head;
726 
727 			if (list_empty(head))
728 				continue;
729 
730 			local_head = &__get_cpu_var(softirq_work_list[i]);
731 			list_splice_init(head, local_head);
732 			raise_softirq_irqoff(i);
733 		}
734 		local_irq_enable();
735 	}
736 
737 	return NOTIFY_OK;
738 }
739 
740 static struct notifier_block remote_softirq_cpu_notifier = {
741 	.notifier_call	= remote_softirq_cpu_notify,
742 };
743 
744 void __init softirq_init(void)
745 {
746 	int cpu;
747 
748 	for_each_possible_cpu(cpu) {
749 		int i;
750 
751 		per_cpu(tasklet_vec, cpu).tail =
752 			&per_cpu(tasklet_vec, cpu).head;
753 		per_cpu(tasklet_hi_vec, cpu).tail =
754 			&per_cpu(tasklet_hi_vec, cpu).head;
755 		for (i = 0; i < NR_SOFTIRQS; i++)
756 			INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
757 	}
758 
759 	register_hotcpu_notifier(&remote_softirq_cpu_notifier);
760 
761 	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
762 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
763 }
764 
765 static int ksoftirqd_should_run(unsigned int cpu)
766 {
767 	return local_softirq_pending();
768 }
769 
770 static void run_ksoftirqd(unsigned int cpu)
771 {
772 	local_irq_disable();
773 	if (local_softirq_pending()) {
774 		__do_softirq();
775 		rcu_note_context_switch(cpu);
776 		local_irq_enable();
777 		cond_resched();
778 		return;
779 	}
780 	local_irq_enable();
781 }
782 
783 #ifdef CONFIG_HOTPLUG_CPU
784 /*
785  * tasklet_kill_immediate is called to remove a tasklet which can already be
786  * scheduled for execution on @cpu.
787  *
788  * Unlike tasklet_kill, this function removes the tasklet
789  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
790  *
791  * When this function is called, @cpu must be in the CPU_DEAD state.
792  */
793 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
794 {
795 	struct tasklet_struct **i;
796 
797 	BUG_ON(cpu_online(cpu));
798 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
799 
800 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
801 		return;
802 
803 	/* CPU is dead, so no lock needed. */
804 	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
805 		if (*i == t) {
806 			*i = t->next;
807 			/* If this was the tail element, move the tail ptr */
808 			if (*i == NULL)
809 				per_cpu(tasklet_vec, cpu).tail = i;
810 			return;
811 		}
812 	}
813 	BUG();
814 }
815 
816 static void takeover_tasklets(unsigned int cpu)
817 {
818 	/* CPU is dead, so no lock needed. */
819 	local_irq_disable();
820 
821 	/* Find end, append list for that CPU. */
822 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
823 		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
824 		this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
825 		per_cpu(tasklet_vec, cpu).head = NULL;
826 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
827 	}
828 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
829 
830 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
831 		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
832 		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
833 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
834 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
835 	}
836 	raise_softirq_irqoff(HI_SOFTIRQ);
837 
838 	local_irq_enable();
839 }
840 #endif /* CONFIG_HOTPLUG_CPU */
841 
842 static int cpu_callback(struct notifier_block *nfb,
843 				  unsigned long action,
844 				  void *hcpu)
845 {
846 	switch (action) {
847 #ifdef CONFIG_HOTPLUG_CPU
848 	case CPU_DEAD:
849 	case CPU_DEAD_FROZEN:
850 		takeover_tasklets((unsigned long)hcpu);
851 		break;
852 #endif /* CONFIG_HOTPLUG_CPU */
853 	}
854 	return NOTIFY_OK;
855 }
856 
857 static struct notifier_block cpu_nfb = {
858 	.notifier_call = cpu_callback
859 };
860 
861 static struct smp_hotplug_thread softirq_threads = {
862 	.store			= &ksoftirqd,
863 	.thread_should_run	= ksoftirqd_should_run,
864 	.thread_fn		= run_ksoftirqd,
865 	.thread_comm		= "ksoftirqd/%u",
866 };
867 
868 static __init int spawn_ksoftirqd(void)
869 {
870 	register_cpu_notifier(&cpu_nfb);
871 
872 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
873 
874 	return 0;
875 }
876 early_initcall(spawn_ksoftirqd);
877 
878 /*
879  * [ These __weak aliases are kept in a separate compilation unit, so that
880  *   GCC does not inline them incorrectly. ]
881  */
882 
883 int __init __weak early_irq_init(void)
884 {
885 	return 0;
886 }
887 
888 int __init __weak arch_probe_nr_irqs(void)
889 {
890 	return NR_IRQS_LEGACY;
891 }
892 
893 int __init __weak arch_early_irq_init(void)
894 {
895 	return 0;
896 }
897