xref: /openbmc/linux/kernel/softirq.c (revision 161f4089)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  *	Distribute under GPLv2.
7  *
8  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  *
10  *	Remote softirq infrastructure is by Jens Axboe.
11  */
12 
13 #include <linux/export.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 
29 #define CREATE_TRACE_POINTS
30 #include <trace/events/irq.h>
31 
32 /*
33    - No shared variables, all the data are CPU local.
34    - If a softirq needs serialization, let it serialize itself
35      by its own spinlocks.
36    - Even if softirq is serialized, only local cpu is marked for
37      execution. Hence, we get something sort of weak cpu binding.
38      Though it is still not clear, will it result in better locality
39      or will not.
40 
41    Examples:
42    - NET RX softirq. It is multithreaded and does not require
43      any global serialization.
44    - NET TX softirq. It kicks software netdevice queues, hence
45      it is logically serialized per device, but this serialization
46      is invisible to common code.
47    - Tasklets: serialized wrt itself.
48  */
49 
50 #ifndef __ARCH_IRQ_STAT
51 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
52 EXPORT_SYMBOL(irq_stat);
53 #endif
54 
55 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
56 
57 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58 
59 char *softirq_to_name[NR_SOFTIRQS] = {
60 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
61 	"TASKLET", "SCHED", "HRTIMER", "RCU"
62 };
63 
64 /*
65  * we cannot loop indefinitely here to avoid userspace starvation,
66  * but we also don't want to introduce a worst case 1/HZ latency
67  * to the pending events, so lets the scheduler to balance
68  * the softirq load for us.
69  */
70 static void wakeup_softirqd(void)
71 {
72 	/* Interrupts are disabled: no need to stop preemption */
73 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
74 
75 	if (tsk && tsk->state != TASK_RUNNING)
76 		wake_up_process(tsk);
77 }
78 
79 /*
80  * preempt_count and SOFTIRQ_OFFSET usage:
81  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
82  *   softirq processing.
83  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
84  *   on local_bh_disable or local_bh_enable.
85  * This lets us distinguish between whether we are currently processing
86  * softirq and whether we just have bh disabled.
87  */
88 
89 /*
90  * This one is for softirq.c-internal use,
91  * where hardirqs are disabled legitimately:
92  */
93 #ifdef CONFIG_TRACE_IRQFLAGS
94 static void __local_bh_disable(unsigned long ip, unsigned int cnt)
95 {
96 	unsigned long flags;
97 
98 	WARN_ON_ONCE(in_irq());
99 
100 	raw_local_irq_save(flags);
101 	/*
102 	 * The preempt tracer hooks into preempt_count_add and will break
103 	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
104 	 * is set and before current->softirq_enabled is cleared.
105 	 * We must manually increment preempt_count here and manually
106 	 * call the trace_preempt_off later.
107 	 */
108 	__preempt_count_add(cnt);
109 	/*
110 	 * Were softirqs turned off above:
111 	 */
112 	if (softirq_count() == cnt)
113 		trace_softirqs_off(ip);
114 	raw_local_irq_restore(flags);
115 
116 	if (preempt_count() == cnt)
117 		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
118 }
119 #else /* !CONFIG_TRACE_IRQFLAGS */
120 static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
121 {
122 	preempt_count_add(cnt);
123 	barrier();
124 }
125 #endif /* CONFIG_TRACE_IRQFLAGS */
126 
127 void local_bh_disable(void)
128 {
129 	__local_bh_disable(_RET_IP_, SOFTIRQ_DISABLE_OFFSET);
130 }
131 
132 EXPORT_SYMBOL(local_bh_disable);
133 
134 static void __local_bh_enable(unsigned int cnt)
135 {
136 	WARN_ON_ONCE(!irqs_disabled());
137 
138 	if (softirq_count() == cnt)
139 		trace_softirqs_on(_RET_IP_);
140 	preempt_count_sub(cnt);
141 }
142 
143 /*
144  * Special-case - softirqs can safely be enabled in
145  * cond_resched_softirq(), or by __do_softirq(),
146  * without processing still-pending softirqs:
147  */
148 void _local_bh_enable(void)
149 {
150 	WARN_ON_ONCE(in_irq());
151 	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
152 }
153 
154 EXPORT_SYMBOL(_local_bh_enable);
155 
156 static inline void _local_bh_enable_ip(unsigned long ip)
157 {
158 	WARN_ON_ONCE(in_irq() || irqs_disabled());
159 #ifdef CONFIG_TRACE_IRQFLAGS
160 	local_irq_disable();
161 #endif
162 	/*
163 	 * Are softirqs going to be turned on now:
164 	 */
165 	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
166 		trace_softirqs_on(ip);
167 	/*
168 	 * Keep preemption disabled until we are done with
169 	 * softirq processing:
170  	 */
171 	preempt_count_sub(SOFTIRQ_DISABLE_OFFSET - 1);
172 
173 	if (unlikely(!in_interrupt() && local_softirq_pending())) {
174 		/*
175 		 * Run softirq if any pending. And do it in its own stack
176 		 * as we may be calling this deep in a task call stack already.
177 		 */
178 		do_softirq();
179 	}
180 
181 	preempt_count_dec();
182 #ifdef CONFIG_TRACE_IRQFLAGS
183 	local_irq_enable();
184 #endif
185 	preempt_check_resched();
186 }
187 
188 void local_bh_enable(void)
189 {
190 	_local_bh_enable_ip(_RET_IP_);
191 }
192 EXPORT_SYMBOL(local_bh_enable);
193 
194 void local_bh_enable_ip(unsigned long ip)
195 {
196 	_local_bh_enable_ip(ip);
197 }
198 EXPORT_SYMBOL(local_bh_enable_ip);
199 
200 /*
201  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
202  * but break the loop if need_resched() is set or after 2 ms.
203  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
204  * certain cases, such as stop_machine(), jiffies may cease to
205  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
206  * well to make sure we eventually return from this method.
207  *
208  * These limits have been established via experimentation.
209  * The two things to balance is latency against fairness -
210  * we want to handle softirqs as soon as possible, but they
211  * should not be able to lock up the box.
212  */
213 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
214 #define MAX_SOFTIRQ_RESTART 10
215 
216 asmlinkage void __do_softirq(void)
217 {
218 	struct softirq_action *h;
219 	__u32 pending;
220 	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
221 	int cpu;
222 	unsigned long old_flags = current->flags;
223 	int max_restart = MAX_SOFTIRQ_RESTART;
224 
225 	/*
226 	 * Mask out PF_MEMALLOC s current task context is borrowed for the
227 	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
228 	 * again if the socket is related to swap
229 	 */
230 	current->flags &= ~PF_MEMALLOC;
231 
232 	pending = local_softirq_pending();
233 	account_irq_enter_time(current);
234 
235 	__local_bh_disable(_RET_IP_, SOFTIRQ_OFFSET);
236 	lockdep_softirq_enter();
237 
238 	cpu = smp_processor_id();
239 restart:
240 	/* Reset the pending bitmask before enabling irqs */
241 	set_softirq_pending(0);
242 
243 	local_irq_enable();
244 
245 	h = softirq_vec;
246 
247 	do {
248 		if (pending & 1) {
249 			unsigned int vec_nr = h - softirq_vec;
250 			int prev_count = preempt_count();
251 
252 			kstat_incr_softirqs_this_cpu(vec_nr);
253 
254 			trace_softirq_entry(vec_nr);
255 			h->action(h);
256 			trace_softirq_exit(vec_nr);
257 			if (unlikely(prev_count != preempt_count())) {
258 				printk(KERN_ERR "huh, entered softirq %u %s %p"
259 				       "with preempt_count %08x,"
260 				       " exited with %08x?\n", vec_nr,
261 				       softirq_to_name[vec_nr], h->action,
262 				       prev_count, preempt_count());
263 				preempt_count_set(prev_count);
264 			}
265 
266 			rcu_bh_qs(cpu);
267 		}
268 		h++;
269 		pending >>= 1;
270 	} while (pending);
271 
272 	local_irq_disable();
273 
274 	pending = local_softirq_pending();
275 	if (pending) {
276 		if (time_before(jiffies, end) && !need_resched() &&
277 		    --max_restart)
278 			goto restart;
279 
280 		wakeup_softirqd();
281 	}
282 
283 	lockdep_softirq_exit();
284 
285 	account_irq_exit_time(current);
286 	__local_bh_enable(SOFTIRQ_OFFSET);
287 	WARN_ON_ONCE(in_interrupt());
288 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
289 }
290 
291 
292 
293 asmlinkage void do_softirq(void)
294 {
295 	__u32 pending;
296 	unsigned long flags;
297 
298 	if (in_interrupt())
299 		return;
300 
301 	local_irq_save(flags);
302 
303 	pending = local_softirq_pending();
304 
305 	if (pending)
306 		do_softirq_own_stack();
307 
308 	local_irq_restore(flags);
309 }
310 
311 /*
312  * Enter an interrupt context.
313  */
314 void irq_enter(void)
315 {
316 	int cpu = smp_processor_id();
317 
318 	rcu_irq_enter();
319 	if (is_idle_task(current) && !in_interrupt()) {
320 		/*
321 		 * Prevent raise_softirq from needlessly waking up ksoftirqd
322 		 * here, as softirq will be serviced on return from interrupt.
323 		 */
324 		local_bh_disable();
325 		tick_check_idle(cpu);
326 		_local_bh_enable();
327 	}
328 
329 	__irq_enter();
330 }
331 
332 static inline void invoke_softirq(void)
333 {
334 	if (!force_irqthreads) {
335 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
336 		/*
337 		 * We can safely execute softirq on the current stack if
338 		 * it is the irq stack, because it should be near empty
339 		 * at this stage.
340 		 */
341 		__do_softirq();
342 #else
343 		/*
344 		 * Otherwise, irq_exit() is called on the task stack that can
345 		 * be potentially deep already. So call softirq in its own stack
346 		 * to prevent from any overrun.
347 		 */
348 		do_softirq_own_stack();
349 #endif
350 	} else {
351 		wakeup_softirqd();
352 	}
353 }
354 
355 static inline void tick_irq_exit(void)
356 {
357 #ifdef CONFIG_NO_HZ_COMMON
358 	int cpu = smp_processor_id();
359 
360 	/* Make sure that timer wheel updates are propagated */
361 	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
362 		if (!in_interrupt())
363 			tick_nohz_irq_exit();
364 	}
365 #endif
366 }
367 
368 /*
369  * Exit an interrupt context. Process softirqs if needed and possible:
370  */
371 void irq_exit(void)
372 {
373 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
374 	local_irq_disable();
375 #else
376 	WARN_ON_ONCE(!irqs_disabled());
377 #endif
378 
379 	account_irq_exit_time(current);
380 	trace_hardirq_exit();
381 	preempt_count_sub(HARDIRQ_OFFSET);
382 	if (!in_interrupt() && local_softirq_pending())
383 		invoke_softirq();
384 
385 	tick_irq_exit();
386 	rcu_irq_exit();
387 }
388 
389 /*
390  * This function must run with irqs disabled!
391  */
392 inline void raise_softirq_irqoff(unsigned int nr)
393 {
394 	__raise_softirq_irqoff(nr);
395 
396 	/*
397 	 * If we're in an interrupt or softirq, we're done
398 	 * (this also catches softirq-disabled code). We will
399 	 * actually run the softirq once we return from
400 	 * the irq or softirq.
401 	 *
402 	 * Otherwise we wake up ksoftirqd to make sure we
403 	 * schedule the softirq soon.
404 	 */
405 	if (!in_interrupt())
406 		wakeup_softirqd();
407 }
408 
409 void raise_softirq(unsigned int nr)
410 {
411 	unsigned long flags;
412 
413 	local_irq_save(flags);
414 	raise_softirq_irqoff(nr);
415 	local_irq_restore(flags);
416 }
417 
418 void __raise_softirq_irqoff(unsigned int nr)
419 {
420 	trace_softirq_raise(nr);
421 	or_softirq_pending(1UL << nr);
422 }
423 
424 void open_softirq(int nr, void (*action)(struct softirq_action *))
425 {
426 	softirq_vec[nr].action = action;
427 }
428 
429 /*
430  * Tasklets
431  */
432 struct tasklet_head
433 {
434 	struct tasklet_struct *head;
435 	struct tasklet_struct **tail;
436 };
437 
438 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
439 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
440 
441 void __tasklet_schedule(struct tasklet_struct *t)
442 {
443 	unsigned long flags;
444 
445 	local_irq_save(flags);
446 	t->next = NULL;
447 	*__this_cpu_read(tasklet_vec.tail) = t;
448 	__this_cpu_write(tasklet_vec.tail, &(t->next));
449 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
450 	local_irq_restore(flags);
451 }
452 
453 EXPORT_SYMBOL(__tasklet_schedule);
454 
455 void __tasklet_hi_schedule(struct tasklet_struct *t)
456 {
457 	unsigned long flags;
458 
459 	local_irq_save(flags);
460 	t->next = NULL;
461 	*__this_cpu_read(tasklet_hi_vec.tail) = t;
462 	__this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
463 	raise_softirq_irqoff(HI_SOFTIRQ);
464 	local_irq_restore(flags);
465 }
466 
467 EXPORT_SYMBOL(__tasklet_hi_schedule);
468 
469 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
470 {
471 	BUG_ON(!irqs_disabled());
472 
473 	t->next = __this_cpu_read(tasklet_hi_vec.head);
474 	__this_cpu_write(tasklet_hi_vec.head, t);
475 	__raise_softirq_irqoff(HI_SOFTIRQ);
476 }
477 
478 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
479 
480 static void tasklet_action(struct softirq_action *a)
481 {
482 	struct tasklet_struct *list;
483 
484 	local_irq_disable();
485 	list = __this_cpu_read(tasklet_vec.head);
486 	__this_cpu_write(tasklet_vec.head, NULL);
487 	__this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
488 	local_irq_enable();
489 
490 	while (list) {
491 		struct tasklet_struct *t = list;
492 
493 		list = list->next;
494 
495 		if (tasklet_trylock(t)) {
496 			if (!atomic_read(&t->count)) {
497 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
498 					BUG();
499 				t->func(t->data);
500 				tasklet_unlock(t);
501 				continue;
502 			}
503 			tasklet_unlock(t);
504 		}
505 
506 		local_irq_disable();
507 		t->next = NULL;
508 		*__this_cpu_read(tasklet_vec.tail) = t;
509 		__this_cpu_write(tasklet_vec.tail, &(t->next));
510 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
511 		local_irq_enable();
512 	}
513 }
514 
515 static void tasklet_hi_action(struct softirq_action *a)
516 {
517 	struct tasklet_struct *list;
518 
519 	local_irq_disable();
520 	list = __this_cpu_read(tasklet_hi_vec.head);
521 	__this_cpu_write(tasklet_hi_vec.head, NULL);
522 	__this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
523 	local_irq_enable();
524 
525 	while (list) {
526 		struct tasklet_struct *t = list;
527 
528 		list = list->next;
529 
530 		if (tasklet_trylock(t)) {
531 			if (!atomic_read(&t->count)) {
532 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
533 					BUG();
534 				t->func(t->data);
535 				tasklet_unlock(t);
536 				continue;
537 			}
538 			tasklet_unlock(t);
539 		}
540 
541 		local_irq_disable();
542 		t->next = NULL;
543 		*__this_cpu_read(tasklet_hi_vec.tail) = t;
544 		__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
545 		__raise_softirq_irqoff(HI_SOFTIRQ);
546 		local_irq_enable();
547 	}
548 }
549 
550 
551 void tasklet_init(struct tasklet_struct *t,
552 		  void (*func)(unsigned long), unsigned long data)
553 {
554 	t->next = NULL;
555 	t->state = 0;
556 	atomic_set(&t->count, 0);
557 	t->func = func;
558 	t->data = data;
559 }
560 
561 EXPORT_SYMBOL(tasklet_init);
562 
563 void tasklet_kill(struct tasklet_struct *t)
564 {
565 	if (in_interrupt())
566 		printk("Attempt to kill tasklet from interrupt\n");
567 
568 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
569 		do {
570 			yield();
571 		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
572 	}
573 	tasklet_unlock_wait(t);
574 	clear_bit(TASKLET_STATE_SCHED, &t->state);
575 }
576 
577 EXPORT_SYMBOL(tasklet_kill);
578 
579 /*
580  * tasklet_hrtimer
581  */
582 
583 /*
584  * The trampoline is called when the hrtimer expires. It schedules a tasklet
585  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
586  * hrtimer callback, but from softirq context.
587  */
588 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
589 {
590 	struct tasklet_hrtimer *ttimer =
591 		container_of(timer, struct tasklet_hrtimer, timer);
592 
593 	tasklet_hi_schedule(&ttimer->tasklet);
594 	return HRTIMER_NORESTART;
595 }
596 
597 /*
598  * Helper function which calls the hrtimer callback from
599  * tasklet/softirq context
600  */
601 static void __tasklet_hrtimer_trampoline(unsigned long data)
602 {
603 	struct tasklet_hrtimer *ttimer = (void *)data;
604 	enum hrtimer_restart restart;
605 
606 	restart = ttimer->function(&ttimer->timer);
607 	if (restart != HRTIMER_NORESTART)
608 		hrtimer_restart(&ttimer->timer);
609 }
610 
611 /**
612  * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
613  * @ttimer:	 tasklet_hrtimer which is initialized
614  * @function:	 hrtimer callback function which gets called from softirq context
615  * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
616  * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
617  */
618 void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
619 			  enum hrtimer_restart (*function)(struct hrtimer *),
620 			  clockid_t which_clock, enum hrtimer_mode mode)
621 {
622 	hrtimer_init(&ttimer->timer, which_clock, mode);
623 	ttimer->timer.function = __hrtimer_tasklet_trampoline;
624 	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
625 		     (unsigned long)ttimer);
626 	ttimer->function = function;
627 }
628 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
629 
630 /*
631  * Remote softirq bits
632  */
633 
634 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
635 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
636 
637 static void __local_trigger(struct call_single_data *cp, int softirq)
638 {
639 	struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
640 
641 	list_add_tail(&cp->list, head);
642 
643 	/* Trigger the softirq only if the list was previously empty.  */
644 	if (head->next == &cp->list)
645 		raise_softirq_irqoff(softirq);
646 }
647 
648 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
649 static void remote_softirq_receive(void *data)
650 {
651 	struct call_single_data *cp = data;
652 	unsigned long flags;
653 	int softirq;
654 
655 	softirq = *(int *)cp->info;
656 	local_irq_save(flags);
657 	__local_trigger(cp, softirq);
658 	local_irq_restore(flags);
659 }
660 
661 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
662 {
663 	if (cpu_online(cpu)) {
664 		cp->func = remote_softirq_receive;
665 		cp->info = &softirq;
666 		cp->flags = 0;
667 
668 		__smp_call_function_single(cpu, cp, 0);
669 		return 0;
670 	}
671 	return 1;
672 }
673 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
674 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
675 {
676 	return 1;
677 }
678 #endif
679 
680 /**
681  * __send_remote_softirq - try to schedule softirq work on a remote cpu
682  * @cp: private SMP call function data area
683  * @cpu: the remote cpu
684  * @this_cpu: the currently executing cpu
685  * @softirq: the softirq for the work
686  *
687  * Attempt to schedule softirq work on a remote cpu.  If this cannot be
688  * done, the work is instead queued up on the local cpu.
689  *
690  * Interrupts must be disabled.
691  */
692 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
693 {
694 	if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
695 		__local_trigger(cp, softirq);
696 }
697 EXPORT_SYMBOL(__send_remote_softirq);
698 
699 /**
700  * send_remote_softirq - try to schedule softirq work on a remote cpu
701  * @cp: private SMP call function data area
702  * @cpu: the remote cpu
703  * @softirq: the softirq for the work
704  *
705  * Like __send_remote_softirq except that disabling interrupts and
706  * computing the current cpu is done for the caller.
707  */
708 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
709 {
710 	unsigned long flags;
711 	int this_cpu;
712 
713 	local_irq_save(flags);
714 	this_cpu = smp_processor_id();
715 	__send_remote_softirq(cp, cpu, this_cpu, softirq);
716 	local_irq_restore(flags);
717 }
718 EXPORT_SYMBOL(send_remote_softirq);
719 
720 static int remote_softirq_cpu_notify(struct notifier_block *self,
721 					       unsigned long action, void *hcpu)
722 {
723 	/*
724 	 * If a CPU goes away, splice its entries to the current CPU
725 	 * and trigger a run of the softirq
726 	 */
727 	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
728 		int cpu = (unsigned long) hcpu;
729 		int i;
730 
731 		local_irq_disable();
732 		for (i = 0; i < NR_SOFTIRQS; i++) {
733 			struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
734 			struct list_head *local_head;
735 
736 			if (list_empty(head))
737 				continue;
738 
739 			local_head = &__get_cpu_var(softirq_work_list[i]);
740 			list_splice_init(head, local_head);
741 			raise_softirq_irqoff(i);
742 		}
743 		local_irq_enable();
744 	}
745 
746 	return NOTIFY_OK;
747 }
748 
749 static struct notifier_block remote_softirq_cpu_notifier = {
750 	.notifier_call	= remote_softirq_cpu_notify,
751 };
752 
753 void __init softirq_init(void)
754 {
755 	int cpu;
756 
757 	for_each_possible_cpu(cpu) {
758 		int i;
759 
760 		per_cpu(tasklet_vec, cpu).tail =
761 			&per_cpu(tasklet_vec, cpu).head;
762 		per_cpu(tasklet_hi_vec, cpu).tail =
763 			&per_cpu(tasklet_hi_vec, cpu).head;
764 		for (i = 0; i < NR_SOFTIRQS; i++)
765 			INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
766 	}
767 
768 	register_hotcpu_notifier(&remote_softirq_cpu_notifier);
769 
770 	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
771 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
772 }
773 
774 static int ksoftirqd_should_run(unsigned int cpu)
775 {
776 	return local_softirq_pending();
777 }
778 
779 static void run_ksoftirqd(unsigned int cpu)
780 {
781 	local_irq_disable();
782 	if (local_softirq_pending()) {
783 		/*
784 		 * We can safely run softirq on inline stack, as we are not deep
785 		 * in the task stack here.
786 		 */
787 		__do_softirq();
788 		rcu_note_context_switch(cpu);
789 		local_irq_enable();
790 		cond_resched();
791 		return;
792 	}
793 	local_irq_enable();
794 }
795 
796 #ifdef CONFIG_HOTPLUG_CPU
797 /*
798  * tasklet_kill_immediate is called to remove a tasklet which can already be
799  * scheduled for execution on @cpu.
800  *
801  * Unlike tasklet_kill, this function removes the tasklet
802  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
803  *
804  * When this function is called, @cpu must be in the CPU_DEAD state.
805  */
806 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
807 {
808 	struct tasklet_struct **i;
809 
810 	BUG_ON(cpu_online(cpu));
811 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
812 
813 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
814 		return;
815 
816 	/* CPU is dead, so no lock needed. */
817 	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
818 		if (*i == t) {
819 			*i = t->next;
820 			/* If this was the tail element, move the tail ptr */
821 			if (*i == NULL)
822 				per_cpu(tasklet_vec, cpu).tail = i;
823 			return;
824 		}
825 	}
826 	BUG();
827 }
828 
829 static void takeover_tasklets(unsigned int cpu)
830 {
831 	/* CPU is dead, so no lock needed. */
832 	local_irq_disable();
833 
834 	/* Find end, append list for that CPU. */
835 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
836 		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
837 		this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
838 		per_cpu(tasklet_vec, cpu).head = NULL;
839 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
840 	}
841 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
842 
843 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
844 		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
845 		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
846 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
847 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
848 	}
849 	raise_softirq_irqoff(HI_SOFTIRQ);
850 
851 	local_irq_enable();
852 }
853 #endif /* CONFIG_HOTPLUG_CPU */
854 
855 static int cpu_callback(struct notifier_block *nfb,
856 				  unsigned long action,
857 				  void *hcpu)
858 {
859 	switch (action) {
860 #ifdef CONFIG_HOTPLUG_CPU
861 	case CPU_DEAD:
862 	case CPU_DEAD_FROZEN:
863 		takeover_tasklets((unsigned long)hcpu);
864 		break;
865 #endif /* CONFIG_HOTPLUG_CPU */
866 	}
867 	return NOTIFY_OK;
868 }
869 
870 static struct notifier_block cpu_nfb = {
871 	.notifier_call = cpu_callback
872 };
873 
874 static struct smp_hotplug_thread softirq_threads = {
875 	.store			= &ksoftirqd,
876 	.thread_should_run	= ksoftirqd_should_run,
877 	.thread_fn		= run_ksoftirqd,
878 	.thread_comm		= "ksoftirqd/%u",
879 };
880 
881 static __init int spawn_ksoftirqd(void)
882 {
883 	register_cpu_notifier(&cpu_nfb);
884 
885 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
886 
887 	return 0;
888 }
889 early_initcall(spawn_ksoftirqd);
890 
891 /*
892  * [ These __weak aliases are kept in a separate compilation unit, so that
893  *   GCC does not inline them incorrectly. ]
894  */
895 
896 int __init __weak early_irq_init(void)
897 {
898 	return 0;
899 }
900 
901 int __init __weak arch_probe_nr_irqs(void)
902 {
903 	return NR_IRQS_LEGACY;
904 }
905 
906 int __init __weak arch_early_irq_init(void)
907 {
908 	return 0;
909 }
910