xref: /openbmc/linux/kernel/softirq.c (revision 5bd8e16d)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  *	Distribute under GPLv2.
7  *
8  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  *
10  *	Remote softirq infrastructure is by Jens Axboe.
11  */
12 
13 #include <linux/export.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 
29 #define CREATE_TRACE_POINTS
30 #include <trace/events/irq.h>
31 
32 #include <asm/irq.h>
33 /*
34    - No shared variables, all the data are CPU local.
35    - If a softirq needs serialization, let it serialize itself
36      by its own spinlocks.
37    - Even if softirq is serialized, only local cpu is marked for
38      execution. Hence, we get something sort of weak cpu binding.
39      Though it is still not clear, will it result in better locality
40      or will not.
41 
42    Examples:
43    - NET RX softirq. It is multithreaded and does not require
44      any global serialization.
45    - NET TX softirq. It kicks software netdevice queues, hence
46      it is logically serialized per device, but this serialization
47      is invisible to common code.
48    - Tasklets: serialized wrt itself.
49  */
50 
51 #ifndef __ARCH_IRQ_STAT
52 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
53 EXPORT_SYMBOL(irq_stat);
54 #endif
55 
56 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
57 
58 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
59 
60 char *softirq_to_name[NR_SOFTIRQS] = {
61 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
62 	"TASKLET", "SCHED", "HRTIMER", "RCU"
63 };
64 
65 /*
66  * we cannot loop indefinitely here to avoid userspace starvation,
67  * but we also don't want to introduce a worst case 1/HZ latency
68  * to the pending events, so lets the scheduler to balance
69  * the softirq load for us.
70  */
71 static void wakeup_softirqd(void)
72 {
73 	/* Interrupts are disabled: no need to stop preemption */
74 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
75 
76 	if (tsk && tsk->state != TASK_RUNNING)
77 		wake_up_process(tsk);
78 }
79 
80 /*
81  * preempt_count and SOFTIRQ_OFFSET usage:
82  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
83  *   softirq processing.
84  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
85  *   on local_bh_disable or local_bh_enable.
86  * This lets us distinguish between whether we are currently processing
87  * softirq and whether we just have bh disabled.
88  */
89 
90 /*
91  * This one is for softirq.c-internal use,
92  * where hardirqs are disabled legitimately:
93  */
94 #ifdef CONFIG_TRACE_IRQFLAGS
95 static void __local_bh_disable(unsigned long ip, unsigned int cnt)
96 {
97 	unsigned long flags;
98 
99 	WARN_ON_ONCE(in_irq());
100 
101 	raw_local_irq_save(flags);
102 	/*
103 	 * The preempt tracer hooks into add_preempt_count and will break
104 	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
105 	 * is set and before current->softirq_enabled is cleared.
106 	 * We must manually increment preempt_count here and manually
107 	 * call the trace_preempt_off later.
108 	 */
109 	preempt_count() += cnt;
110 	/*
111 	 * Were softirqs turned off above:
112 	 */
113 	if (softirq_count() == cnt)
114 		trace_softirqs_off(ip);
115 	raw_local_irq_restore(flags);
116 
117 	if (preempt_count() == cnt)
118 		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
119 }
120 #else /* !CONFIG_TRACE_IRQFLAGS */
121 static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
122 {
123 	add_preempt_count(cnt);
124 	barrier();
125 }
126 #endif /* CONFIG_TRACE_IRQFLAGS */
127 
128 void local_bh_disable(void)
129 {
130 	__local_bh_disable(_RET_IP_, SOFTIRQ_DISABLE_OFFSET);
131 }
132 
133 EXPORT_SYMBOL(local_bh_disable);
134 
135 static void __local_bh_enable(unsigned int cnt)
136 {
137 	WARN_ON_ONCE(in_irq());
138 	WARN_ON_ONCE(!irqs_disabled());
139 
140 	if (softirq_count() == cnt)
141 		trace_softirqs_on(_RET_IP_);
142 	sub_preempt_count(cnt);
143 }
144 
145 /*
146  * Special-case - softirqs can safely be enabled in
147  * cond_resched_softirq(), or by __do_softirq(),
148  * without processing still-pending softirqs:
149  */
150 void _local_bh_enable(void)
151 {
152 	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
153 }
154 
155 EXPORT_SYMBOL(_local_bh_enable);
156 
157 static inline void _local_bh_enable_ip(unsigned long ip)
158 {
159 	WARN_ON_ONCE(in_irq() || irqs_disabled());
160 #ifdef CONFIG_TRACE_IRQFLAGS
161 	local_irq_disable();
162 #endif
163 	/*
164 	 * Are softirqs going to be turned on now:
165 	 */
166 	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
167 		trace_softirqs_on(ip);
168 	/*
169 	 * Keep preemption disabled until we are done with
170 	 * softirq processing:
171  	 */
172 	sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
173 
174 	if (unlikely(!in_interrupt() && local_softirq_pending()))
175 		do_softirq();
176 
177 	dec_preempt_count();
178 #ifdef CONFIG_TRACE_IRQFLAGS
179 	local_irq_enable();
180 #endif
181 	preempt_check_resched();
182 }
183 
184 void local_bh_enable(void)
185 {
186 	_local_bh_enable_ip(_RET_IP_);
187 }
188 EXPORT_SYMBOL(local_bh_enable);
189 
190 void local_bh_enable_ip(unsigned long ip)
191 {
192 	_local_bh_enable_ip(ip);
193 }
194 EXPORT_SYMBOL(local_bh_enable_ip);
195 
196 /*
197  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
198  * but break the loop if need_resched() is set or after 2 ms.
199  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
200  * certain cases, such as stop_machine(), jiffies may cease to
201  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
202  * well to make sure we eventually return from this method.
203  *
204  * These limits have been established via experimentation.
205  * The two things to balance is latency against fairness -
206  * we want to handle softirqs as soon as possible, but they
207  * should not be able to lock up the box.
208  */
209 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
210 #define MAX_SOFTIRQ_RESTART 10
211 
212 asmlinkage void __do_softirq(void)
213 {
214 	struct softirq_action *h;
215 	__u32 pending;
216 	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
217 	int cpu;
218 	unsigned long old_flags = current->flags;
219 	int max_restart = MAX_SOFTIRQ_RESTART;
220 
221 	/*
222 	 * Mask out PF_MEMALLOC s current task context is borrowed for the
223 	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
224 	 * again if the socket is related to swap
225 	 */
226 	current->flags &= ~PF_MEMALLOC;
227 
228 	pending = local_softirq_pending();
229 	account_irq_enter_time(current);
230 
231 	__local_bh_disable(_RET_IP_, SOFTIRQ_OFFSET);
232 	lockdep_softirq_enter();
233 
234 	cpu = smp_processor_id();
235 restart:
236 	/* Reset the pending bitmask before enabling irqs */
237 	set_softirq_pending(0);
238 
239 	local_irq_enable();
240 
241 	h = softirq_vec;
242 
243 	do {
244 		if (pending & 1) {
245 			unsigned int vec_nr = h - softirq_vec;
246 			int prev_count = preempt_count();
247 
248 			kstat_incr_softirqs_this_cpu(vec_nr);
249 
250 			trace_softirq_entry(vec_nr);
251 			h->action(h);
252 			trace_softirq_exit(vec_nr);
253 			if (unlikely(prev_count != preempt_count())) {
254 				printk(KERN_ERR "huh, entered softirq %u %s %p"
255 				       "with preempt_count %08x,"
256 				       " exited with %08x?\n", vec_nr,
257 				       softirq_to_name[vec_nr], h->action,
258 				       prev_count, preempt_count());
259 				preempt_count() = prev_count;
260 			}
261 
262 			rcu_bh_qs(cpu);
263 		}
264 		h++;
265 		pending >>= 1;
266 	} while (pending);
267 
268 	local_irq_disable();
269 
270 	pending = local_softirq_pending();
271 	if (pending) {
272 		if (time_before(jiffies, end) && !need_resched() &&
273 		    --max_restart)
274 			goto restart;
275 
276 		wakeup_softirqd();
277 	}
278 
279 	lockdep_softirq_exit();
280 
281 	account_irq_exit_time(current);
282 	__local_bh_enable(SOFTIRQ_OFFSET);
283 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
284 }
285 
286 #ifndef __ARCH_HAS_DO_SOFTIRQ
287 
288 asmlinkage void do_softirq(void)
289 {
290 	__u32 pending;
291 	unsigned long flags;
292 
293 	if (in_interrupt())
294 		return;
295 
296 	local_irq_save(flags);
297 
298 	pending = local_softirq_pending();
299 
300 	if (pending)
301 		__do_softirq();
302 
303 	local_irq_restore(flags);
304 }
305 
306 #endif
307 
308 /*
309  * Enter an interrupt context.
310  */
311 void irq_enter(void)
312 {
313 	int cpu = smp_processor_id();
314 
315 	rcu_irq_enter();
316 	if (is_idle_task(current) && !in_interrupt()) {
317 		/*
318 		 * Prevent raise_softirq from needlessly waking up ksoftirqd
319 		 * here, as softirq will be serviced on return from interrupt.
320 		 */
321 		local_bh_disable();
322 		tick_check_idle(cpu);
323 		_local_bh_enable();
324 	}
325 
326 	__irq_enter();
327 }
328 
329 static inline void invoke_softirq(void)
330 {
331 	if (!force_irqthreads)
332 		__do_softirq();
333 	else
334 		wakeup_softirqd();
335 }
336 
337 static inline void tick_irq_exit(void)
338 {
339 #ifdef CONFIG_NO_HZ_COMMON
340 	int cpu = smp_processor_id();
341 
342 	/* Make sure that timer wheel updates are propagated */
343 	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
344 		if (!in_interrupt())
345 			tick_nohz_irq_exit();
346 	}
347 #endif
348 }
349 
350 /*
351  * Exit an interrupt context. Process softirqs if needed and possible:
352  */
353 void irq_exit(void)
354 {
355 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
356 	local_irq_disable();
357 #else
358 	WARN_ON_ONCE(!irqs_disabled());
359 #endif
360 
361 	account_irq_exit_time(current);
362 	trace_hardirq_exit();
363 	sub_preempt_count(HARDIRQ_OFFSET);
364 	if (!in_interrupt() && local_softirq_pending())
365 		invoke_softirq();
366 
367 	tick_irq_exit();
368 	rcu_irq_exit();
369 }
370 
371 /*
372  * This function must run with irqs disabled!
373  */
374 inline void raise_softirq_irqoff(unsigned int nr)
375 {
376 	__raise_softirq_irqoff(nr);
377 
378 	/*
379 	 * If we're in an interrupt or softirq, we're done
380 	 * (this also catches softirq-disabled code). We will
381 	 * actually run the softirq once we return from
382 	 * the irq or softirq.
383 	 *
384 	 * Otherwise we wake up ksoftirqd to make sure we
385 	 * schedule the softirq soon.
386 	 */
387 	if (!in_interrupt())
388 		wakeup_softirqd();
389 }
390 
391 void raise_softirq(unsigned int nr)
392 {
393 	unsigned long flags;
394 
395 	local_irq_save(flags);
396 	raise_softirq_irqoff(nr);
397 	local_irq_restore(flags);
398 }
399 
400 void __raise_softirq_irqoff(unsigned int nr)
401 {
402 	trace_softirq_raise(nr);
403 	or_softirq_pending(1UL << nr);
404 }
405 
406 void open_softirq(int nr, void (*action)(struct softirq_action *))
407 {
408 	softirq_vec[nr].action = action;
409 }
410 
411 /*
412  * Tasklets
413  */
414 struct tasklet_head
415 {
416 	struct tasklet_struct *head;
417 	struct tasklet_struct **tail;
418 };
419 
420 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
421 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
422 
423 void __tasklet_schedule(struct tasklet_struct *t)
424 {
425 	unsigned long flags;
426 
427 	local_irq_save(flags);
428 	t->next = NULL;
429 	*__this_cpu_read(tasklet_vec.tail) = t;
430 	__this_cpu_write(tasklet_vec.tail, &(t->next));
431 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
432 	local_irq_restore(flags);
433 }
434 
435 EXPORT_SYMBOL(__tasklet_schedule);
436 
437 void __tasklet_hi_schedule(struct tasklet_struct *t)
438 {
439 	unsigned long flags;
440 
441 	local_irq_save(flags);
442 	t->next = NULL;
443 	*__this_cpu_read(tasklet_hi_vec.tail) = t;
444 	__this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
445 	raise_softirq_irqoff(HI_SOFTIRQ);
446 	local_irq_restore(flags);
447 }
448 
449 EXPORT_SYMBOL(__tasklet_hi_schedule);
450 
451 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
452 {
453 	BUG_ON(!irqs_disabled());
454 
455 	t->next = __this_cpu_read(tasklet_hi_vec.head);
456 	__this_cpu_write(tasklet_hi_vec.head, t);
457 	__raise_softirq_irqoff(HI_SOFTIRQ);
458 }
459 
460 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
461 
462 static void tasklet_action(struct softirq_action *a)
463 {
464 	struct tasklet_struct *list;
465 
466 	local_irq_disable();
467 	list = __this_cpu_read(tasklet_vec.head);
468 	__this_cpu_write(tasklet_vec.head, NULL);
469 	__this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
470 	local_irq_enable();
471 
472 	while (list) {
473 		struct tasklet_struct *t = list;
474 
475 		list = list->next;
476 
477 		if (tasklet_trylock(t)) {
478 			if (!atomic_read(&t->count)) {
479 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
480 					BUG();
481 				t->func(t->data);
482 				tasklet_unlock(t);
483 				continue;
484 			}
485 			tasklet_unlock(t);
486 		}
487 
488 		local_irq_disable();
489 		t->next = NULL;
490 		*__this_cpu_read(tasklet_vec.tail) = t;
491 		__this_cpu_write(tasklet_vec.tail, &(t->next));
492 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
493 		local_irq_enable();
494 	}
495 }
496 
497 static void tasklet_hi_action(struct softirq_action *a)
498 {
499 	struct tasklet_struct *list;
500 
501 	local_irq_disable();
502 	list = __this_cpu_read(tasklet_hi_vec.head);
503 	__this_cpu_write(tasklet_hi_vec.head, NULL);
504 	__this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
505 	local_irq_enable();
506 
507 	while (list) {
508 		struct tasklet_struct *t = list;
509 
510 		list = list->next;
511 
512 		if (tasklet_trylock(t)) {
513 			if (!atomic_read(&t->count)) {
514 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
515 					BUG();
516 				t->func(t->data);
517 				tasklet_unlock(t);
518 				continue;
519 			}
520 			tasklet_unlock(t);
521 		}
522 
523 		local_irq_disable();
524 		t->next = NULL;
525 		*__this_cpu_read(tasklet_hi_vec.tail) = t;
526 		__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
527 		__raise_softirq_irqoff(HI_SOFTIRQ);
528 		local_irq_enable();
529 	}
530 }
531 
532 
533 void tasklet_init(struct tasklet_struct *t,
534 		  void (*func)(unsigned long), unsigned long data)
535 {
536 	t->next = NULL;
537 	t->state = 0;
538 	atomic_set(&t->count, 0);
539 	t->func = func;
540 	t->data = data;
541 }
542 
543 EXPORT_SYMBOL(tasklet_init);
544 
545 void tasklet_kill(struct tasklet_struct *t)
546 {
547 	if (in_interrupt())
548 		printk("Attempt to kill tasklet from interrupt\n");
549 
550 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
551 		do {
552 			yield();
553 		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
554 	}
555 	tasklet_unlock_wait(t);
556 	clear_bit(TASKLET_STATE_SCHED, &t->state);
557 }
558 
559 EXPORT_SYMBOL(tasklet_kill);
560 
561 /*
562  * tasklet_hrtimer
563  */
564 
565 /*
566  * The trampoline is called when the hrtimer expires. It schedules a tasklet
567  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
568  * hrtimer callback, but from softirq context.
569  */
570 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
571 {
572 	struct tasklet_hrtimer *ttimer =
573 		container_of(timer, struct tasklet_hrtimer, timer);
574 
575 	tasklet_hi_schedule(&ttimer->tasklet);
576 	return HRTIMER_NORESTART;
577 }
578 
579 /*
580  * Helper function which calls the hrtimer callback from
581  * tasklet/softirq context
582  */
583 static void __tasklet_hrtimer_trampoline(unsigned long data)
584 {
585 	struct tasklet_hrtimer *ttimer = (void *)data;
586 	enum hrtimer_restart restart;
587 
588 	restart = ttimer->function(&ttimer->timer);
589 	if (restart != HRTIMER_NORESTART)
590 		hrtimer_restart(&ttimer->timer);
591 }
592 
593 /**
594  * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
595  * @ttimer:	 tasklet_hrtimer which is initialized
596  * @function:	 hrtimer callback function which gets called from softirq context
597  * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
598  * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
599  */
600 void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
601 			  enum hrtimer_restart (*function)(struct hrtimer *),
602 			  clockid_t which_clock, enum hrtimer_mode mode)
603 {
604 	hrtimer_init(&ttimer->timer, which_clock, mode);
605 	ttimer->timer.function = __hrtimer_tasklet_trampoline;
606 	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
607 		     (unsigned long)ttimer);
608 	ttimer->function = function;
609 }
610 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
611 
612 /*
613  * Remote softirq bits
614  */
615 
616 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
617 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
618 
619 static void __local_trigger(struct call_single_data *cp, int softirq)
620 {
621 	struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
622 
623 	list_add_tail(&cp->list, head);
624 
625 	/* Trigger the softirq only if the list was previously empty.  */
626 	if (head->next == &cp->list)
627 		raise_softirq_irqoff(softirq);
628 }
629 
630 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
631 static void remote_softirq_receive(void *data)
632 {
633 	struct call_single_data *cp = data;
634 	unsigned long flags;
635 	int softirq;
636 
637 	softirq = *(int *)cp->info;
638 	local_irq_save(flags);
639 	__local_trigger(cp, softirq);
640 	local_irq_restore(flags);
641 }
642 
643 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
644 {
645 	if (cpu_online(cpu)) {
646 		cp->func = remote_softirq_receive;
647 		cp->info = &softirq;
648 		cp->flags = 0;
649 
650 		__smp_call_function_single(cpu, cp, 0);
651 		return 0;
652 	}
653 	return 1;
654 }
655 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
656 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
657 {
658 	return 1;
659 }
660 #endif
661 
662 /**
663  * __send_remote_softirq - try to schedule softirq work on a remote cpu
664  * @cp: private SMP call function data area
665  * @cpu: the remote cpu
666  * @this_cpu: the currently executing cpu
667  * @softirq: the softirq for the work
668  *
669  * Attempt to schedule softirq work on a remote cpu.  If this cannot be
670  * done, the work is instead queued up on the local cpu.
671  *
672  * Interrupts must be disabled.
673  */
674 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
675 {
676 	if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
677 		__local_trigger(cp, softirq);
678 }
679 EXPORT_SYMBOL(__send_remote_softirq);
680 
681 /**
682  * send_remote_softirq - try to schedule softirq work on a remote cpu
683  * @cp: private SMP call function data area
684  * @cpu: the remote cpu
685  * @softirq: the softirq for the work
686  *
687  * Like __send_remote_softirq except that disabling interrupts and
688  * computing the current cpu is done for the caller.
689  */
690 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
691 {
692 	unsigned long flags;
693 	int this_cpu;
694 
695 	local_irq_save(flags);
696 	this_cpu = smp_processor_id();
697 	__send_remote_softirq(cp, cpu, this_cpu, softirq);
698 	local_irq_restore(flags);
699 }
700 EXPORT_SYMBOL(send_remote_softirq);
701 
702 static int remote_softirq_cpu_notify(struct notifier_block *self,
703 					       unsigned long action, void *hcpu)
704 {
705 	/*
706 	 * If a CPU goes away, splice its entries to the current CPU
707 	 * and trigger a run of the softirq
708 	 */
709 	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
710 		int cpu = (unsigned long) hcpu;
711 		int i;
712 
713 		local_irq_disable();
714 		for (i = 0; i < NR_SOFTIRQS; i++) {
715 			struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
716 			struct list_head *local_head;
717 
718 			if (list_empty(head))
719 				continue;
720 
721 			local_head = &__get_cpu_var(softirq_work_list[i]);
722 			list_splice_init(head, local_head);
723 			raise_softirq_irqoff(i);
724 		}
725 		local_irq_enable();
726 	}
727 
728 	return NOTIFY_OK;
729 }
730 
731 static struct notifier_block remote_softirq_cpu_notifier = {
732 	.notifier_call	= remote_softirq_cpu_notify,
733 };
734 
735 void __init softirq_init(void)
736 {
737 	int cpu;
738 
739 	for_each_possible_cpu(cpu) {
740 		int i;
741 
742 		per_cpu(tasklet_vec, cpu).tail =
743 			&per_cpu(tasklet_vec, cpu).head;
744 		per_cpu(tasklet_hi_vec, cpu).tail =
745 			&per_cpu(tasklet_hi_vec, cpu).head;
746 		for (i = 0; i < NR_SOFTIRQS; i++)
747 			INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
748 	}
749 
750 	register_hotcpu_notifier(&remote_softirq_cpu_notifier);
751 
752 	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
753 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
754 }
755 
756 static int ksoftirqd_should_run(unsigned int cpu)
757 {
758 	return local_softirq_pending();
759 }
760 
761 static void run_ksoftirqd(unsigned int cpu)
762 {
763 	local_irq_disable();
764 	if (local_softirq_pending()) {
765 		__do_softirq();
766 		rcu_note_context_switch(cpu);
767 		local_irq_enable();
768 		cond_resched();
769 		return;
770 	}
771 	local_irq_enable();
772 }
773 
774 #ifdef CONFIG_HOTPLUG_CPU
775 /*
776  * tasklet_kill_immediate is called to remove a tasklet which can already be
777  * scheduled for execution on @cpu.
778  *
779  * Unlike tasklet_kill, this function removes the tasklet
780  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
781  *
782  * When this function is called, @cpu must be in the CPU_DEAD state.
783  */
784 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
785 {
786 	struct tasklet_struct **i;
787 
788 	BUG_ON(cpu_online(cpu));
789 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
790 
791 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
792 		return;
793 
794 	/* CPU is dead, so no lock needed. */
795 	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
796 		if (*i == t) {
797 			*i = t->next;
798 			/* If this was the tail element, move the tail ptr */
799 			if (*i == NULL)
800 				per_cpu(tasklet_vec, cpu).tail = i;
801 			return;
802 		}
803 	}
804 	BUG();
805 }
806 
807 static void takeover_tasklets(unsigned int cpu)
808 {
809 	/* CPU is dead, so no lock needed. */
810 	local_irq_disable();
811 
812 	/* Find end, append list for that CPU. */
813 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
814 		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
815 		this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
816 		per_cpu(tasklet_vec, cpu).head = NULL;
817 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
818 	}
819 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
820 
821 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
822 		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
823 		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
824 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
825 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
826 	}
827 	raise_softirq_irqoff(HI_SOFTIRQ);
828 
829 	local_irq_enable();
830 }
831 #endif /* CONFIG_HOTPLUG_CPU */
832 
833 static int cpu_callback(struct notifier_block *nfb,
834 				  unsigned long action,
835 				  void *hcpu)
836 {
837 	switch (action) {
838 #ifdef CONFIG_HOTPLUG_CPU
839 	case CPU_DEAD:
840 	case CPU_DEAD_FROZEN:
841 		takeover_tasklets((unsigned long)hcpu);
842 		break;
843 #endif /* CONFIG_HOTPLUG_CPU */
844 	}
845 	return NOTIFY_OK;
846 }
847 
848 static struct notifier_block cpu_nfb = {
849 	.notifier_call = cpu_callback
850 };
851 
852 static struct smp_hotplug_thread softirq_threads = {
853 	.store			= &ksoftirqd,
854 	.thread_should_run	= ksoftirqd_should_run,
855 	.thread_fn		= run_ksoftirqd,
856 	.thread_comm		= "ksoftirqd/%u",
857 };
858 
859 static __init int spawn_ksoftirqd(void)
860 {
861 	register_cpu_notifier(&cpu_nfb);
862 
863 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
864 
865 	return 0;
866 }
867 early_initcall(spawn_ksoftirqd);
868 
869 /*
870  * [ These __weak aliases are kept in a separate compilation unit, so that
871  *   GCC does not inline them incorrectly. ]
872  */
873 
874 int __init __weak early_irq_init(void)
875 {
876 	return 0;
877 }
878 
879 int __init __weak arch_probe_nr_irqs(void)
880 {
881 	return NR_IRQS_LEGACY;
882 }
883 
884 int __init __weak arch_early_irq_init(void)
885 {
886 	return 0;
887 }
888