xref: /openbmc/linux/kernel/softirq.c (revision f4356947)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *	linux/kernel/softirq.c
4  *
5  *	Copyright (C) 1992 Linus Torvalds
6  *
7  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
8  */
9 
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 
12 #include <linux/export.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/interrupt.h>
15 #include <linux/init.h>
16 #include <linux/local_lock.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 #include <linux/irq.h>
29 #include <linux/wait_bit.h>
30 
31 #include <asm/softirq_stack.h>
32 
33 #define CREATE_TRACE_POINTS
34 #include <trace/events/irq.h>
35 
36 /*
37    - No shared variables, all the data are CPU local.
38    - If a softirq needs serialization, let it serialize itself
39      by its own spinlocks.
40    - Even if softirq is serialized, only local cpu is marked for
41      execution. Hence, we get something sort of weak cpu binding.
42      Though it is still not clear, will it result in better locality
43      or will not.
44 
45    Examples:
46    - NET RX softirq. It is multithreaded and does not require
47      any global serialization.
48    - NET TX softirq. It kicks software netdevice queues, hence
49      it is logically serialized per device, but this serialization
50      is invisible to common code.
51    - Tasklets: serialized wrt itself.
52  */
53 
54 #ifndef __ARCH_IRQ_STAT
55 DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
56 EXPORT_PER_CPU_SYMBOL(irq_stat);
57 #endif
58 
59 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
60 
61 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
62 
63 const char * const softirq_to_name[NR_SOFTIRQS] = {
64 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
65 	"TASKLET", "SCHED", "HRTIMER", "RCU"
66 };
67 
68 /*
69  * we cannot loop indefinitely here to avoid userspace starvation,
70  * but we also don't want to introduce a worst case 1/HZ latency
71  * to the pending events, so lets the scheduler to balance
72  * the softirq load for us.
73  */
74 static void wakeup_softirqd(void)
75 {
76 	/* Interrupts are disabled: no need to stop preemption */
77 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
78 
79 	if (tsk)
80 		wake_up_process(tsk);
81 }
82 
83 /*
84  * If ksoftirqd is scheduled, we do not want to process pending softirqs
85  * right now. Let ksoftirqd handle this at its own rate, to get fairness,
86  * unless we're doing some of the synchronous softirqs.
87  */
88 #define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
89 static bool ksoftirqd_running(unsigned long pending)
90 {
91 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
92 
93 	if (pending & SOFTIRQ_NOW_MASK)
94 		return false;
95 	return tsk && task_is_running(tsk) && !__kthread_should_park(tsk);
96 }
97 
98 #ifdef CONFIG_TRACE_IRQFLAGS
99 DEFINE_PER_CPU(int, hardirqs_enabled);
100 DEFINE_PER_CPU(int, hardirq_context);
101 EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
102 EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
103 #endif
104 
105 /*
106  * SOFTIRQ_OFFSET usage:
107  *
108  * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
109  * to a per CPU counter and to task::softirqs_disabled_cnt.
110  *
111  * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
112  *   processing.
113  *
114  * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
115  *   on local_bh_disable or local_bh_enable.
116  *
117  * This lets us distinguish between whether we are currently processing
118  * softirq and whether we just have bh disabled.
119  */
120 #ifdef CONFIG_PREEMPT_RT
121 
122 /*
123  * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
124  * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
125  * softirq disabled section to be preempted.
126  *
127  * The per task counter is used for softirq_count(), in_softirq() and
128  * in_serving_softirqs() because these counts are only valid when the task
129  * holding softirq_ctrl::lock is running.
130  *
131  * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
132  * the task which is in a softirq disabled section is preempted or blocks.
133  */
134 struct softirq_ctrl {
135 	local_lock_t	lock;
136 	int		cnt;
137 };
138 
139 static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
140 	.lock	= INIT_LOCAL_LOCK(softirq_ctrl.lock),
141 };
142 
143 /**
144  * local_bh_blocked() - Check for idle whether BH processing is blocked
145  *
146  * Returns false if the per CPU softirq::cnt is 0 otherwise true.
147  *
148  * This is invoked from the idle task to guard against false positive
149  * softirq pending warnings, which would happen when the task which holds
150  * softirq_ctrl::lock was the only running task on the CPU and blocks on
151  * some other lock.
152  */
153 bool local_bh_blocked(void)
154 {
155 	return __this_cpu_read(softirq_ctrl.cnt) != 0;
156 }
157 
158 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
159 {
160 	unsigned long flags;
161 	int newcnt;
162 
163 	WARN_ON_ONCE(in_hardirq());
164 
165 	/* First entry of a task into a BH disabled section? */
166 	if (!current->softirq_disable_cnt) {
167 		if (preemptible()) {
168 			local_lock(&softirq_ctrl.lock);
169 			/* Required to meet the RCU bottomhalf requirements. */
170 			rcu_read_lock();
171 		} else {
172 			DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
173 		}
174 	}
175 
176 	/*
177 	 * Track the per CPU softirq disabled state. On RT this is per CPU
178 	 * state to allow preemption of bottom half disabled sections.
179 	 */
180 	newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
181 	/*
182 	 * Reflect the result in the task state to prevent recursion on the
183 	 * local lock and to make softirq_count() & al work.
184 	 */
185 	current->softirq_disable_cnt = newcnt;
186 
187 	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
188 		raw_local_irq_save(flags);
189 		lockdep_softirqs_off(ip);
190 		raw_local_irq_restore(flags);
191 	}
192 }
193 EXPORT_SYMBOL(__local_bh_disable_ip);
194 
195 static void __local_bh_enable(unsigned int cnt, bool unlock)
196 {
197 	unsigned long flags;
198 	int newcnt;
199 
200 	DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
201 			    this_cpu_read(softirq_ctrl.cnt));
202 
203 	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
204 		raw_local_irq_save(flags);
205 		lockdep_softirqs_on(_RET_IP_);
206 		raw_local_irq_restore(flags);
207 	}
208 
209 	newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
210 	current->softirq_disable_cnt = newcnt;
211 
212 	if (!newcnt && unlock) {
213 		rcu_read_unlock();
214 		local_unlock(&softirq_ctrl.lock);
215 	}
216 }
217 
218 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
219 {
220 	bool preempt_on = preemptible();
221 	unsigned long flags;
222 	u32 pending;
223 	int curcnt;
224 
225 	WARN_ON_ONCE(in_hardirq());
226 	lockdep_assert_irqs_enabled();
227 
228 	local_irq_save(flags);
229 	curcnt = __this_cpu_read(softirq_ctrl.cnt);
230 
231 	/*
232 	 * If this is not reenabling soft interrupts, no point in trying to
233 	 * run pending ones.
234 	 */
235 	if (curcnt != cnt)
236 		goto out;
237 
238 	pending = local_softirq_pending();
239 	if (!pending || ksoftirqd_running(pending))
240 		goto out;
241 
242 	/*
243 	 * If this was called from non preemptible context, wake up the
244 	 * softirq daemon.
245 	 */
246 	if (!preempt_on) {
247 		wakeup_softirqd();
248 		goto out;
249 	}
250 
251 	/*
252 	 * Adjust softirq count to SOFTIRQ_OFFSET which makes
253 	 * in_serving_softirq() become true.
254 	 */
255 	cnt = SOFTIRQ_OFFSET;
256 	__local_bh_enable(cnt, false);
257 	__do_softirq();
258 
259 out:
260 	__local_bh_enable(cnt, preempt_on);
261 	local_irq_restore(flags);
262 }
263 EXPORT_SYMBOL(__local_bh_enable_ip);
264 
265 /*
266  * Invoked from ksoftirqd_run() outside of the interrupt disabled section
267  * to acquire the per CPU local lock for reentrancy protection.
268  */
269 static inline void ksoftirqd_run_begin(void)
270 {
271 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
272 	local_irq_disable();
273 }
274 
275 /* Counterpart to ksoftirqd_run_begin() */
276 static inline void ksoftirqd_run_end(void)
277 {
278 	__local_bh_enable(SOFTIRQ_OFFSET, true);
279 	WARN_ON_ONCE(in_interrupt());
280 	local_irq_enable();
281 }
282 
283 static inline void softirq_handle_begin(void) { }
284 static inline void softirq_handle_end(void) { }
285 
286 static inline bool should_wake_ksoftirqd(void)
287 {
288 	return !this_cpu_read(softirq_ctrl.cnt);
289 }
290 
291 static inline void invoke_softirq(void)
292 {
293 	if (should_wake_ksoftirqd())
294 		wakeup_softirqd();
295 }
296 
297 /*
298  * flush_smp_call_function_queue() can raise a soft interrupt in a function
299  * call. On RT kernels this is undesired and the only known functionality
300  * in the block layer which does this is disabled on RT. If soft interrupts
301  * get raised which haven't been raised before the flush, warn so it can be
302  * investigated.
303  */
304 void do_softirq_post_smp_call_flush(unsigned int was_pending)
305 {
306 	if (WARN_ON_ONCE(was_pending != local_softirq_pending()))
307 		invoke_softirq();
308 }
309 
310 #else /* CONFIG_PREEMPT_RT */
311 
312 /*
313  * This one is for softirq.c-internal use, where hardirqs are disabled
314  * legitimately:
315  */
316 #ifdef CONFIG_TRACE_IRQFLAGS
317 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
318 {
319 	unsigned long flags;
320 
321 	WARN_ON_ONCE(in_hardirq());
322 
323 	raw_local_irq_save(flags);
324 	/*
325 	 * The preempt tracer hooks into preempt_count_add and will break
326 	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
327 	 * is set and before current->softirq_enabled is cleared.
328 	 * We must manually increment preempt_count here and manually
329 	 * call the trace_preempt_off later.
330 	 */
331 	__preempt_count_add(cnt);
332 	/*
333 	 * Were softirqs turned off above:
334 	 */
335 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
336 		lockdep_softirqs_off(ip);
337 	raw_local_irq_restore(flags);
338 
339 	if (preempt_count() == cnt) {
340 #ifdef CONFIG_DEBUG_PREEMPT
341 		current->preempt_disable_ip = get_lock_parent_ip();
342 #endif
343 		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
344 	}
345 }
346 EXPORT_SYMBOL(__local_bh_disable_ip);
347 #endif /* CONFIG_TRACE_IRQFLAGS */
348 
349 static void __local_bh_enable(unsigned int cnt)
350 {
351 	lockdep_assert_irqs_disabled();
352 
353 	if (preempt_count() == cnt)
354 		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
355 
356 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
357 		lockdep_softirqs_on(_RET_IP_);
358 
359 	__preempt_count_sub(cnt);
360 }
361 
362 /*
363  * Special-case - softirqs can safely be enabled by __do_softirq(),
364  * without processing still-pending softirqs:
365  */
366 void _local_bh_enable(void)
367 {
368 	WARN_ON_ONCE(in_hardirq());
369 	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
370 }
371 EXPORT_SYMBOL(_local_bh_enable);
372 
373 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
374 {
375 	WARN_ON_ONCE(in_hardirq());
376 	lockdep_assert_irqs_enabled();
377 #ifdef CONFIG_TRACE_IRQFLAGS
378 	local_irq_disable();
379 #endif
380 	/*
381 	 * Are softirqs going to be turned on now:
382 	 */
383 	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
384 		lockdep_softirqs_on(ip);
385 	/*
386 	 * Keep preemption disabled until we are done with
387 	 * softirq processing:
388 	 */
389 	__preempt_count_sub(cnt - 1);
390 
391 	if (unlikely(!in_interrupt() && local_softirq_pending())) {
392 		/*
393 		 * Run softirq if any pending. And do it in its own stack
394 		 * as we may be calling this deep in a task call stack already.
395 		 */
396 		do_softirq();
397 	}
398 
399 	preempt_count_dec();
400 #ifdef CONFIG_TRACE_IRQFLAGS
401 	local_irq_enable();
402 #endif
403 	preempt_check_resched();
404 }
405 EXPORT_SYMBOL(__local_bh_enable_ip);
406 
407 static inline void softirq_handle_begin(void)
408 {
409 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
410 }
411 
412 static inline void softirq_handle_end(void)
413 {
414 	__local_bh_enable(SOFTIRQ_OFFSET);
415 	WARN_ON_ONCE(in_interrupt());
416 }
417 
418 static inline void ksoftirqd_run_begin(void)
419 {
420 	local_irq_disable();
421 }
422 
423 static inline void ksoftirqd_run_end(void)
424 {
425 	local_irq_enable();
426 }
427 
428 static inline bool should_wake_ksoftirqd(void)
429 {
430 	return true;
431 }
432 
433 static inline void invoke_softirq(void)
434 {
435 	if (ksoftirqd_running(local_softirq_pending()))
436 		return;
437 
438 	if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
439 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
440 		/*
441 		 * We can safely execute softirq on the current stack if
442 		 * it is the irq stack, because it should be near empty
443 		 * at this stage.
444 		 */
445 		__do_softirq();
446 #else
447 		/*
448 		 * Otherwise, irq_exit() is called on the task stack that can
449 		 * be potentially deep already. So call softirq in its own stack
450 		 * to prevent from any overrun.
451 		 */
452 		do_softirq_own_stack();
453 #endif
454 	} else {
455 		wakeup_softirqd();
456 	}
457 }
458 
459 asmlinkage __visible void do_softirq(void)
460 {
461 	__u32 pending;
462 	unsigned long flags;
463 
464 	if (in_interrupt())
465 		return;
466 
467 	local_irq_save(flags);
468 
469 	pending = local_softirq_pending();
470 
471 	if (pending && !ksoftirqd_running(pending))
472 		do_softirq_own_stack();
473 
474 	local_irq_restore(flags);
475 }
476 
477 #endif /* !CONFIG_PREEMPT_RT */
478 
479 /*
480  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
481  * but break the loop if need_resched() is set or after 2 ms.
482  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
483  * certain cases, such as stop_machine(), jiffies may cease to
484  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
485  * well to make sure we eventually return from this method.
486  *
487  * These limits have been established via experimentation.
488  * The two things to balance is latency against fairness -
489  * we want to handle softirqs as soon as possible, but they
490  * should not be able to lock up the box.
491  */
492 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
493 #define MAX_SOFTIRQ_RESTART 10
494 
495 #ifdef CONFIG_TRACE_IRQFLAGS
496 /*
497  * When we run softirqs from irq_exit() and thus on the hardirq stack we need
498  * to keep the lockdep irq context tracking as tight as possible in order to
499  * not miss-qualify lock contexts and miss possible deadlocks.
500  */
501 
502 static inline bool lockdep_softirq_start(void)
503 {
504 	bool in_hardirq = false;
505 
506 	if (lockdep_hardirq_context()) {
507 		in_hardirq = true;
508 		lockdep_hardirq_exit();
509 	}
510 
511 	lockdep_softirq_enter();
512 
513 	return in_hardirq;
514 }
515 
516 static inline void lockdep_softirq_end(bool in_hardirq)
517 {
518 	lockdep_softirq_exit();
519 
520 	if (in_hardirq)
521 		lockdep_hardirq_enter();
522 }
523 #else
524 static inline bool lockdep_softirq_start(void) { return false; }
525 static inline void lockdep_softirq_end(bool in_hardirq) { }
526 #endif
527 
528 asmlinkage __visible void __softirq_entry __do_softirq(void)
529 {
530 	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
531 	unsigned long old_flags = current->flags;
532 	int max_restart = MAX_SOFTIRQ_RESTART;
533 	struct softirq_action *h;
534 	bool in_hardirq;
535 	__u32 pending;
536 	int softirq_bit;
537 
538 	/*
539 	 * Mask out PF_MEMALLOC as the current task context is borrowed for the
540 	 * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
541 	 * again if the socket is related to swapping.
542 	 */
543 	current->flags &= ~PF_MEMALLOC;
544 
545 	pending = local_softirq_pending();
546 
547 	softirq_handle_begin();
548 	in_hardirq = lockdep_softirq_start();
549 	account_softirq_enter(current);
550 
551 restart:
552 	/* Reset the pending bitmask before enabling irqs */
553 	set_softirq_pending(0);
554 
555 	local_irq_enable();
556 
557 	h = softirq_vec;
558 
559 	while ((softirq_bit = ffs(pending))) {
560 		unsigned int vec_nr;
561 		int prev_count;
562 
563 		h += softirq_bit - 1;
564 
565 		vec_nr = h - softirq_vec;
566 		prev_count = preempt_count();
567 
568 		kstat_incr_softirqs_this_cpu(vec_nr);
569 
570 		trace_softirq_entry(vec_nr);
571 		h->action(h);
572 		trace_softirq_exit(vec_nr);
573 		if (unlikely(prev_count != preempt_count())) {
574 			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
575 			       vec_nr, softirq_to_name[vec_nr], h->action,
576 			       prev_count, preempt_count());
577 			preempt_count_set(prev_count);
578 		}
579 		h++;
580 		pending >>= softirq_bit;
581 	}
582 
583 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
584 	    __this_cpu_read(ksoftirqd) == current)
585 		rcu_softirq_qs();
586 
587 	local_irq_disable();
588 
589 	pending = local_softirq_pending();
590 	if (pending) {
591 		if (time_before(jiffies, end) && !need_resched() &&
592 		    --max_restart)
593 			goto restart;
594 
595 		wakeup_softirqd();
596 	}
597 
598 	account_softirq_exit(current);
599 	lockdep_softirq_end(in_hardirq);
600 	softirq_handle_end();
601 	current_restore_flags(old_flags, PF_MEMALLOC);
602 }
603 
604 /**
605  * irq_enter_rcu - Enter an interrupt context with RCU watching
606  */
607 void irq_enter_rcu(void)
608 {
609 	__irq_enter_raw();
610 
611 	if (tick_nohz_full_cpu(smp_processor_id()) ||
612 	    (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))
613 		tick_irq_enter();
614 
615 	account_hardirq_enter(current);
616 }
617 
618 /**
619  * irq_enter - Enter an interrupt context including RCU update
620  */
621 void irq_enter(void)
622 {
623 	ct_irq_enter();
624 	irq_enter_rcu();
625 }
626 
627 static inline void tick_irq_exit(void)
628 {
629 #ifdef CONFIG_NO_HZ_COMMON
630 	int cpu = smp_processor_id();
631 
632 	/* Make sure that timer wheel updates are propagated */
633 	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
634 		if (!in_hardirq())
635 			tick_nohz_irq_exit();
636 	}
637 #endif
638 }
639 
640 static inline void __irq_exit_rcu(void)
641 {
642 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
643 	local_irq_disable();
644 #else
645 	lockdep_assert_irqs_disabled();
646 #endif
647 	account_hardirq_exit(current);
648 	preempt_count_sub(HARDIRQ_OFFSET);
649 	if (!in_interrupt() && local_softirq_pending())
650 		invoke_softirq();
651 
652 	tick_irq_exit();
653 }
654 
655 /**
656  * irq_exit_rcu() - Exit an interrupt context without updating RCU
657  *
658  * Also processes softirqs if needed and possible.
659  */
660 void irq_exit_rcu(void)
661 {
662 	__irq_exit_rcu();
663 	 /* must be last! */
664 	lockdep_hardirq_exit();
665 }
666 
667 /**
668  * irq_exit - Exit an interrupt context, update RCU and lockdep
669  *
670  * Also processes softirqs if needed and possible.
671  */
672 void irq_exit(void)
673 {
674 	__irq_exit_rcu();
675 	ct_irq_exit();
676 	 /* must be last! */
677 	lockdep_hardirq_exit();
678 }
679 
680 /*
681  * This function must run with irqs disabled!
682  */
683 inline void raise_softirq_irqoff(unsigned int nr)
684 {
685 	__raise_softirq_irqoff(nr);
686 
687 	/*
688 	 * If we're in an interrupt or softirq, we're done
689 	 * (this also catches softirq-disabled code). We will
690 	 * actually run the softirq once we return from
691 	 * the irq or softirq.
692 	 *
693 	 * Otherwise we wake up ksoftirqd to make sure we
694 	 * schedule the softirq soon.
695 	 */
696 	if (!in_interrupt() && should_wake_ksoftirqd())
697 		wakeup_softirqd();
698 }
699 
700 void raise_softirq(unsigned int nr)
701 {
702 	unsigned long flags;
703 
704 	local_irq_save(flags);
705 	raise_softirq_irqoff(nr);
706 	local_irq_restore(flags);
707 }
708 
709 void __raise_softirq_irqoff(unsigned int nr)
710 {
711 	lockdep_assert_irqs_disabled();
712 	trace_softirq_raise(nr);
713 	or_softirq_pending(1UL << nr);
714 }
715 
716 void open_softirq(int nr, void (*action)(struct softirq_action *))
717 {
718 	softirq_vec[nr].action = action;
719 }
720 
721 /*
722  * Tasklets
723  */
724 struct tasklet_head {
725 	struct tasklet_struct *head;
726 	struct tasklet_struct **tail;
727 };
728 
729 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
730 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
731 
732 static void __tasklet_schedule_common(struct tasklet_struct *t,
733 				      struct tasklet_head __percpu *headp,
734 				      unsigned int softirq_nr)
735 {
736 	struct tasklet_head *head;
737 	unsigned long flags;
738 
739 	local_irq_save(flags);
740 	head = this_cpu_ptr(headp);
741 	t->next = NULL;
742 	*head->tail = t;
743 	head->tail = &(t->next);
744 	raise_softirq_irqoff(softirq_nr);
745 	local_irq_restore(flags);
746 }
747 
748 void __tasklet_schedule(struct tasklet_struct *t)
749 {
750 	__tasklet_schedule_common(t, &tasklet_vec,
751 				  TASKLET_SOFTIRQ);
752 }
753 EXPORT_SYMBOL(__tasklet_schedule);
754 
755 void __tasklet_hi_schedule(struct tasklet_struct *t)
756 {
757 	__tasklet_schedule_common(t, &tasklet_hi_vec,
758 				  HI_SOFTIRQ);
759 }
760 EXPORT_SYMBOL(__tasklet_hi_schedule);
761 
762 static bool tasklet_clear_sched(struct tasklet_struct *t)
763 {
764 	if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
765 		wake_up_var(&t->state);
766 		return true;
767 	}
768 
769 	WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n",
770 		  t->use_callback ? "callback" : "func",
771 		  t->use_callback ? (void *)t->callback : (void *)t->func);
772 
773 	return false;
774 }
775 
776 static void tasklet_action_common(struct softirq_action *a,
777 				  struct tasklet_head *tl_head,
778 				  unsigned int softirq_nr)
779 {
780 	struct tasklet_struct *list;
781 
782 	local_irq_disable();
783 	list = tl_head->head;
784 	tl_head->head = NULL;
785 	tl_head->tail = &tl_head->head;
786 	local_irq_enable();
787 
788 	while (list) {
789 		struct tasklet_struct *t = list;
790 
791 		list = list->next;
792 
793 		if (tasklet_trylock(t)) {
794 			if (!atomic_read(&t->count)) {
795 				if (tasklet_clear_sched(t)) {
796 					if (t->use_callback) {
797 						trace_tasklet_entry(t, t->callback);
798 						t->callback(t);
799 						trace_tasklet_exit(t, t->callback);
800 					} else {
801 						trace_tasklet_entry(t, t->func);
802 						t->func(t->data);
803 						trace_tasklet_exit(t, t->func);
804 					}
805 				}
806 				tasklet_unlock(t);
807 				continue;
808 			}
809 			tasklet_unlock(t);
810 		}
811 
812 		local_irq_disable();
813 		t->next = NULL;
814 		*tl_head->tail = t;
815 		tl_head->tail = &t->next;
816 		__raise_softirq_irqoff(softirq_nr);
817 		local_irq_enable();
818 	}
819 }
820 
821 static __latent_entropy void tasklet_action(struct softirq_action *a)
822 {
823 	tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
824 }
825 
826 static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
827 {
828 	tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
829 }
830 
831 void tasklet_setup(struct tasklet_struct *t,
832 		   void (*callback)(struct tasklet_struct *))
833 {
834 	t->next = NULL;
835 	t->state = 0;
836 	atomic_set(&t->count, 0);
837 	t->callback = callback;
838 	t->use_callback = true;
839 	t->data = 0;
840 }
841 EXPORT_SYMBOL(tasklet_setup);
842 
843 void tasklet_init(struct tasklet_struct *t,
844 		  void (*func)(unsigned long), unsigned long data)
845 {
846 	t->next = NULL;
847 	t->state = 0;
848 	atomic_set(&t->count, 0);
849 	t->func = func;
850 	t->use_callback = false;
851 	t->data = data;
852 }
853 EXPORT_SYMBOL(tasklet_init);
854 
855 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
856 /*
857  * Do not use in new code. Waiting for tasklets from atomic contexts is
858  * error prone and should be avoided.
859  */
860 void tasklet_unlock_spin_wait(struct tasklet_struct *t)
861 {
862 	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
863 		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
864 			/*
865 			 * Prevent a live lock when current preempted soft
866 			 * interrupt processing or prevents ksoftirqd from
867 			 * running. If the tasklet runs on a different CPU
868 			 * then this has no effect other than doing the BH
869 			 * disable/enable dance for nothing.
870 			 */
871 			local_bh_disable();
872 			local_bh_enable();
873 		} else {
874 			cpu_relax();
875 		}
876 	}
877 }
878 EXPORT_SYMBOL(tasklet_unlock_spin_wait);
879 #endif
880 
881 void tasklet_kill(struct tasklet_struct *t)
882 {
883 	if (in_interrupt())
884 		pr_notice("Attempt to kill tasklet from interrupt\n");
885 
886 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
887 		wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
888 
889 	tasklet_unlock_wait(t);
890 	tasklet_clear_sched(t);
891 }
892 EXPORT_SYMBOL(tasklet_kill);
893 
894 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
895 void tasklet_unlock(struct tasklet_struct *t)
896 {
897 	smp_mb__before_atomic();
898 	clear_bit(TASKLET_STATE_RUN, &t->state);
899 	smp_mb__after_atomic();
900 	wake_up_var(&t->state);
901 }
902 EXPORT_SYMBOL_GPL(tasklet_unlock);
903 
904 void tasklet_unlock_wait(struct tasklet_struct *t)
905 {
906 	wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
907 }
908 EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
909 #endif
910 
911 void __init softirq_init(void)
912 {
913 	int cpu;
914 
915 	for_each_possible_cpu(cpu) {
916 		per_cpu(tasklet_vec, cpu).tail =
917 			&per_cpu(tasklet_vec, cpu).head;
918 		per_cpu(tasklet_hi_vec, cpu).tail =
919 			&per_cpu(tasklet_hi_vec, cpu).head;
920 	}
921 
922 	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
923 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
924 }
925 
926 static int ksoftirqd_should_run(unsigned int cpu)
927 {
928 	return local_softirq_pending();
929 }
930 
931 static void run_ksoftirqd(unsigned int cpu)
932 {
933 	ksoftirqd_run_begin();
934 	if (local_softirq_pending()) {
935 		/*
936 		 * We can safely run softirq on inline stack, as we are not deep
937 		 * in the task stack here.
938 		 */
939 		__do_softirq();
940 		ksoftirqd_run_end();
941 		cond_resched();
942 		return;
943 	}
944 	ksoftirqd_run_end();
945 }
946 
947 #ifdef CONFIG_HOTPLUG_CPU
948 static int takeover_tasklets(unsigned int cpu)
949 {
950 	/* CPU is dead, so no lock needed. */
951 	local_irq_disable();
952 
953 	/* Find end, append list for that CPU. */
954 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
955 		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
956 		__this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
957 		per_cpu(tasklet_vec, cpu).head = NULL;
958 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
959 	}
960 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
961 
962 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
963 		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
964 		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
965 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
966 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
967 	}
968 	raise_softirq_irqoff(HI_SOFTIRQ);
969 
970 	local_irq_enable();
971 	return 0;
972 }
973 #else
974 #define takeover_tasklets	NULL
975 #endif /* CONFIG_HOTPLUG_CPU */
976 
977 static struct smp_hotplug_thread softirq_threads = {
978 	.store			= &ksoftirqd,
979 	.thread_should_run	= ksoftirqd_should_run,
980 	.thread_fn		= run_ksoftirqd,
981 	.thread_comm		= "ksoftirqd/%u",
982 };
983 
984 static __init int spawn_ksoftirqd(void)
985 {
986 	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
987 				  takeover_tasklets);
988 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
989 
990 	return 0;
991 }
992 early_initcall(spawn_ksoftirqd);
993 
994 /*
995  * [ These __weak aliases are kept in a separate compilation unit, so that
996  *   GCC does not inline them incorrectly. ]
997  */
998 
999 int __init __weak early_irq_init(void)
1000 {
1001 	return 0;
1002 }
1003 
1004 int __init __weak arch_probe_nr_irqs(void)
1005 {
1006 	return NR_IRQS_LEGACY;
1007 }
1008 
1009 int __init __weak arch_early_irq_init(void)
1010 {
1011 	return 0;
1012 }
1013 
1014 unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
1015 {
1016 	return from;
1017 }
1018