xref: /openbmc/linux/kernel/softirq.c (revision 9144f784f852f9a125cabe9927b986d909bfa439)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *	linux/kernel/softirq.c
4   *
5   *	Copyright (C) 1992 Linus Torvalds
6   *
7   *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
8   */
9  
10  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11  
12  #include <linux/export.h>
13  #include <linux/kernel_stat.h>
14  #include <linux/interrupt.h>
15  #include <linux/init.h>
16  #include <linux/local_lock.h>
17  #include <linux/mm.h>
18  #include <linux/notifier.h>
19  #include <linux/percpu.h>
20  #include <linux/cpu.h>
21  #include <linux/freezer.h>
22  #include <linux/kthread.h>
23  #include <linux/rcupdate.h>
24  #include <linux/ftrace.h>
25  #include <linux/smp.h>
26  #include <linux/smpboot.h>
27  #include <linux/tick.h>
28  #include <linux/irq.h>
29  #include <linux/wait_bit.h>
30  
31  #include <asm/softirq_stack.h>
32  
33  #define CREATE_TRACE_POINTS
34  #include <trace/events/irq.h>
35  
36  /*
37     - No shared variables, all the data are CPU local.
38     - If a softirq needs serialization, let it serialize itself
39       by its own spinlocks.
40     - Even if softirq is serialized, only local cpu is marked for
41       execution. Hence, we get something sort of weak cpu binding.
42       Though it is still not clear, will it result in better locality
43       or will not.
44  
45     Examples:
46     - NET RX softirq. It is multithreaded and does not require
47       any global serialization.
48     - NET TX softirq. It kicks software netdevice queues, hence
49       it is logically serialized per device, but this serialization
50       is invisible to common code.
51     - Tasklets: serialized wrt itself.
52   */
53  
54  #ifndef __ARCH_IRQ_STAT
55  DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
56  EXPORT_PER_CPU_SYMBOL(irq_stat);
57  #endif
58  
59  static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
60  
61  DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
62  
63  const char * const softirq_to_name[NR_SOFTIRQS] = {
64  	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
65  	"TASKLET", "SCHED", "HRTIMER", "RCU"
66  };
67  
68  /*
69   * we cannot loop indefinitely here to avoid userspace starvation,
70   * but we also don't want to introduce a worst case 1/HZ latency
71   * to the pending events, so lets the scheduler to balance
72   * the softirq load for us.
73   */
wakeup_softirqd(void)74  static void wakeup_softirqd(void)
75  {
76  	/* Interrupts are disabled: no need to stop preemption */
77  	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
78  
79  	if (tsk)
80  		wake_up_process(tsk);
81  }
82  
83  #ifdef CONFIG_TRACE_IRQFLAGS
84  DEFINE_PER_CPU(int, hardirqs_enabled);
85  DEFINE_PER_CPU(int, hardirq_context);
86  EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
87  EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
88  #endif
89  
90  /*
91   * SOFTIRQ_OFFSET usage:
92   *
93   * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
94   * to a per CPU counter and to task::softirqs_disabled_cnt.
95   *
96   * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
97   *   processing.
98   *
99   * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
100   *   on local_bh_disable or local_bh_enable.
101   *
102   * This lets us distinguish between whether we are currently processing
103   * softirq and whether we just have bh disabled.
104   */
105  #ifdef CONFIG_PREEMPT_RT
106  
107  /*
108   * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
109   * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
110   * softirq disabled section to be preempted.
111   *
112   * The per task counter is used for softirq_count(), in_softirq() and
113   * in_serving_softirqs() because these counts are only valid when the task
114   * holding softirq_ctrl::lock is running.
115   *
116   * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
117   * the task which is in a softirq disabled section is preempted or blocks.
118   */
119  struct softirq_ctrl {
120  	local_lock_t	lock;
121  	int		cnt;
122  };
123  
124  static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
125  	.lock	= INIT_LOCAL_LOCK(softirq_ctrl.lock),
126  };
127  
128  /**
129   * local_bh_blocked() - Check for idle whether BH processing is blocked
130   *
131   * Returns false if the per CPU softirq::cnt is 0 otherwise true.
132   *
133   * This is invoked from the idle task to guard against false positive
134   * softirq pending warnings, which would happen when the task which holds
135   * softirq_ctrl::lock was the only running task on the CPU and blocks on
136   * some other lock.
137   */
local_bh_blocked(void)138  bool local_bh_blocked(void)
139  {
140  	return __this_cpu_read(softirq_ctrl.cnt) != 0;
141  }
142  
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)143  void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
144  {
145  	unsigned long flags;
146  	int newcnt;
147  
148  	WARN_ON_ONCE(in_hardirq());
149  
150  	/* First entry of a task into a BH disabled section? */
151  	if (!current->softirq_disable_cnt) {
152  		if (preemptible()) {
153  			local_lock(&softirq_ctrl.lock);
154  			/* Required to meet the RCU bottomhalf requirements. */
155  			rcu_read_lock();
156  		} else {
157  			DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
158  		}
159  	}
160  
161  	/*
162  	 * Track the per CPU softirq disabled state. On RT this is per CPU
163  	 * state to allow preemption of bottom half disabled sections.
164  	 */
165  	newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
166  	/*
167  	 * Reflect the result in the task state to prevent recursion on the
168  	 * local lock and to make softirq_count() & al work.
169  	 */
170  	current->softirq_disable_cnt = newcnt;
171  
172  	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
173  		raw_local_irq_save(flags);
174  		lockdep_softirqs_off(ip);
175  		raw_local_irq_restore(flags);
176  	}
177  }
178  EXPORT_SYMBOL(__local_bh_disable_ip);
179  
__local_bh_enable(unsigned int cnt,bool unlock)180  static void __local_bh_enable(unsigned int cnt, bool unlock)
181  {
182  	unsigned long flags;
183  	int newcnt;
184  
185  	DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
186  			    this_cpu_read(softirq_ctrl.cnt));
187  
188  	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
189  		raw_local_irq_save(flags);
190  		lockdep_softirqs_on(_RET_IP_);
191  		raw_local_irq_restore(flags);
192  	}
193  
194  	newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
195  	current->softirq_disable_cnt = newcnt;
196  
197  	if (!newcnt && unlock) {
198  		rcu_read_unlock();
199  		local_unlock(&softirq_ctrl.lock);
200  	}
201  }
202  
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)203  void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
204  {
205  	bool preempt_on = preemptible();
206  	unsigned long flags;
207  	u32 pending;
208  	int curcnt;
209  
210  	WARN_ON_ONCE(in_hardirq());
211  	lockdep_assert_irqs_enabled();
212  
213  	local_irq_save(flags);
214  	curcnt = __this_cpu_read(softirq_ctrl.cnt);
215  
216  	/*
217  	 * If this is not reenabling soft interrupts, no point in trying to
218  	 * run pending ones.
219  	 */
220  	if (curcnt != cnt)
221  		goto out;
222  
223  	pending = local_softirq_pending();
224  	if (!pending)
225  		goto out;
226  
227  	/*
228  	 * If this was called from non preemptible context, wake up the
229  	 * softirq daemon.
230  	 */
231  	if (!preempt_on) {
232  		wakeup_softirqd();
233  		goto out;
234  	}
235  
236  	/*
237  	 * Adjust softirq count to SOFTIRQ_OFFSET which makes
238  	 * in_serving_softirq() become true.
239  	 */
240  	cnt = SOFTIRQ_OFFSET;
241  	__local_bh_enable(cnt, false);
242  	__do_softirq();
243  
244  out:
245  	__local_bh_enable(cnt, preempt_on);
246  	local_irq_restore(flags);
247  }
248  EXPORT_SYMBOL(__local_bh_enable_ip);
249  
250  /*
251   * Invoked from ksoftirqd_run() outside of the interrupt disabled section
252   * to acquire the per CPU local lock for reentrancy protection.
253   */
ksoftirqd_run_begin(void)254  static inline void ksoftirqd_run_begin(void)
255  {
256  	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
257  	local_irq_disable();
258  }
259  
260  /* Counterpart to ksoftirqd_run_begin() */
ksoftirqd_run_end(void)261  static inline void ksoftirqd_run_end(void)
262  {
263  	__local_bh_enable(SOFTIRQ_OFFSET, true);
264  	WARN_ON_ONCE(in_interrupt());
265  	local_irq_enable();
266  }
267  
softirq_handle_begin(void)268  static inline void softirq_handle_begin(void) { }
softirq_handle_end(void)269  static inline void softirq_handle_end(void) { }
270  
should_wake_ksoftirqd(void)271  static inline bool should_wake_ksoftirqd(void)
272  {
273  	return !this_cpu_read(softirq_ctrl.cnt);
274  }
275  
invoke_softirq(void)276  static inline void invoke_softirq(void)
277  {
278  	if (should_wake_ksoftirqd())
279  		wakeup_softirqd();
280  }
281  
282  #define SCHED_SOFTIRQ_MASK	BIT(SCHED_SOFTIRQ)
283  
284  /*
285   * flush_smp_call_function_queue() can raise a soft interrupt in a function
286   * call. On RT kernels this is undesired and the only known functionalities
287   * are in the block layer which is disabled on RT, and in the scheduler for
288   * idle load balancing. If soft interrupts get raised which haven't been
289   * raised before the flush, warn if it is not a SCHED_SOFTIRQ so it can be
290   * investigated.
291   */
do_softirq_post_smp_call_flush(unsigned int was_pending)292  void do_softirq_post_smp_call_flush(unsigned int was_pending)
293  {
294  	unsigned int is_pending = local_softirq_pending();
295  
296  	if (unlikely(was_pending != is_pending)) {
297  		WARN_ON_ONCE(was_pending != (is_pending & ~SCHED_SOFTIRQ_MASK));
298  		invoke_softirq();
299  	}
300  }
301  
302  #else /* CONFIG_PREEMPT_RT */
303  
304  /*
305   * This one is for softirq.c-internal use, where hardirqs are disabled
306   * legitimately:
307   */
308  #ifdef CONFIG_TRACE_IRQFLAGS
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)309  void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
310  {
311  	unsigned long flags;
312  
313  	WARN_ON_ONCE(in_hardirq());
314  
315  	raw_local_irq_save(flags);
316  	/*
317  	 * The preempt tracer hooks into preempt_count_add and will break
318  	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
319  	 * is set and before current->softirq_enabled is cleared.
320  	 * We must manually increment preempt_count here and manually
321  	 * call the trace_preempt_off later.
322  	 */
323  	__preempt_count_add(cnt);
324  	/*
325  	 * Were softirqs turned off above:
326  	 */
327  	if (softirq_count() == (cnt & SOFTIRQ_MASK))
328  		lockdep_softirqs_off(ip);
329  	raw_local_irq_restore(flags);
330  
331  	if (preempt_count() == cnt) {
332  #ifdef CONFIG_DEBUG_PREEMPT
333  		current->preempt_disable_ip = get_lock_parent_ip();
334  #endif
335  		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
336  	}
337  }
338  EXPORT_SYMBOL(__local_bh_disable_ip);
339  #endif /* CONFIG_TRACE_IRQFLAGS */
340  
__local_bh_enable(unsigned int cnt)341  static void __local_bh_enable(unsigned int cnt)
342  {
343  	lockdep_assert_irqs_disabled();
344  
345  	if (preempt_count() == cnt)
346  		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
347  
348  	if (softirq_count() == (cnt & SOFTIRQ_MASK))
349  		lockdep_softirqs_on(_RET_IP_);
350  
351  	__preempt_count_sub(cnt);
352  }
353  
354  /*
355   * Special-case - softirqs can safely be enabled by __do_softirq(),
356   * without processing still-pending softirqs:
357   */
_local_bh_enable(void)358  void _local_bh_enable(void)
359  {
360  	WARN_ON_ONCE(in_hardirq());
361  	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
362  }
363  EXPORT_SYMBOL(_local_bh_enable);
364  
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)365  void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
366  {
367  	WARN_ON_ONCE(in_hardirq());
368  	lockdep_assert_irqs_enabled();
369  #ifdef CONFIG_TRACE_IRQFLAGS
370  	local_irq_disable();
371  #endif
372  	/*
373  	 * Are softirqs going to be turned on now:
374  	 */
375  	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
376  		lockdep_softirqs_on(ip);
377  	/*
378  	 * Keep preemption disabled until we are done with
379  	 * softirq processing:
380  	 */
381  	__preempt_count_sub(cnt - 1);
382  
383  	if (unlikely(!in_interrupt() && local_softirq_pending())) {
384  		/*
385  		 * Run softirq if any pending. And do it in its own stack
386  		 * as we may be calling this deep in a task call stack already.
387  		 */
388  		do_softirq();
389  	}
390  
391  	preempt_count_dec();
392  #ifdef CONFIG_TRACE_IRQFLAGS
393  	local_irq_enable();
394  #endif
395  	preempt_check_resched();
396  }
397  EXPORT_SYMBOL(__local_bh_enable_ip);
398  
softirq_handle_begin(void)399  static inline void softirq_handle_begin(void)
400  {
401  	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
402  }
403  
softirq_handle_end(void)404  static inline void softirq_handle_end(void)
405  {
406  	__local_bh_enable(SOFTIRQ_OFFSET);
407  	WARN_ON_ONCE(in_interrupt());
408  }
409  
ksoftirqd_run_begin(void)410  static inline void ksoftirqd_run_begin(void)
411  {
412  	local_irq_disable();
413  }
414  
ksoftirqd_run_end(void)415  static inline void ksoftirqd_run_end(void)
416  {
417  	local_irq_enable();
418  }
419  
should_wake_ksoftirqd(void)420  static inline bool should_wake_ksoftirqd(void)
421  {
422  	return true;
423  }
424  
invoke_softirq(void)425  static inline void invoke_softirq(void)
426  {
427  	if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
428  #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
429  		/*
430  		 * We can safely execute softirq on the current stack if
431  		 * it is the irq stack, because it should be near empty
432  		 * at this stage.
433  		 */
434  		__do_softirq();
435  #else
436  		/*
437  		 * Otherwise, irq_exit() is called on the task stack that can
438  		 * be potentially deep already. So call softirq in its own stack
439  		 * to prevent from any overrun.
440  		 */
441  		do_softirq_own_stack();
442  #endif
443  	} else {
444  		wakeup_softirqd();
445  	}
446  }
447  
do_softirq(void)448  asmlinkage __visible void do_softirq(void)
449  {
450  	__u32 pending;
451  	unsigned long flags;
452  
453  	if (in_interrupt())
454  		return;
455  
456  	local_irq_save(flags);
457  
458  	pending = local_softirq_pending();
459  
460  	if (pending)
461  		do_softirq_own_stack();
462  
463  	local_irq_restore(flags);
464  }
465  
466  #endif /* !CONFIG_PREEMPT_RT */
467  
468  /*
469   * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
470   * but break the loop if need_resched() is set or after 2 ms.
471   * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
472   * certain cases, such as stop_machine(), jiffies may cease to
473   * increment and so we need the MAX_SOFTIRQ_RESTART limit as
474   * well to make sure we eventually return from this method.
475   *
476   * These limits have been established via experimentation.
477   * The two things to balance is latency against fairness -
478   * we want to handle softirqs as soon as possible, but they
479   * should not be able to lock up the box.
480   */
481  #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
482  #define MAX_SOFTIRQ_RESTART 10
483  
484  #ifdef CONFIG_TRACE_IRQFLAGS
485  /*
486   * When we run softirqs from irq_exit() and thus on the hardirq stack we need
487   * to keep the lockdep irq context tracking as tight as possible in order to
488   * not miss-qualify lock contexts and miss possible deadlocks.
489   */
490  
lockdep_softirq_start(void)491  static inline bool lockdep_softirq_start(void)
492  {
493  	bool in_hardirq = false;
494  
495  	if (lockdep_hardirq_context()) {
496  		in_hardirq = true;
497  		lockdep_hardirq_exit();
498  	}
499  
500  	lockdep_softirq_enter();
501  
502  	return in_hardirq;
503  }
504  
lockdep_softirq_end(bool in_hardirq)505  static inline void lockdep_softirq_end(bool in_hardirq)
506  {
507  	lockdep_softirq_exit();
508  
509  	if (in_hardirq)
510  		lockdep_hardirq_enter();
511  }
512  #else
lockdep_softirq_start(void)513  static inline bool lockdep_softirq_start(void) { return false; }
lockdep_softirq_end(bool in_hardirq)514  static inline void lockdep_softirq_end(bool in_hardirq) { }
515  #endif
516  
handle_softirqs(bool ksirqd)517  static void handle_softirqs(bool ksirqd)
518  {
519  	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
520  	unsigned long old_flags = current->flags;
521  	int max_restart = MAX_SOFTIRQ_RESTART;
522  	struct softirq_action *h;
523  	bool in_hardirq;
524  	__u32 pending;
525  	int softirq_bit;
526  
527  	/*
528  	 * Mask out PF_MEMALLOC as the current task context is borrowed for the
529  	 * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
530  	 * again if the socket is related to swapping.
531  	 */
532  	current->flags &= ~PF_MEMALLOC;
533  
534  	pending = local_softirq_pending();
535  
536  	softirq_handle_begin();
537  	in_hardirq = lockdep_softirq_start();
538  	account_softirq_enter(current);
539  
540  restart:
541  	/* Reset the pending bitmask before enabling irqs */
542  	set_softirq_pending(0);
543  
544  	local_irq_enable();
545  
546  	h = softirq_vec;
547  
548  	while ((softirq_bit = ffs(pending))) {
549  		unsigned int vec_nr;
550  		int prev_count;
551  
552  		h += softirq_bit - 1;
553  
554  		vec_nr = h - softirq_vec;
555  		prev_count = preempt_count();
556  
557  		kstat_incr_softirqs_this_cpu(vec_nr);
558  
559  		trace_softirq_entry(vec_nr);
560  		h->action(h);
561  		trace_softirq_exit(vec_nr);
562  		if (unlikely(prev_count != preempt_count())) {
563  			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
564  			       vec_nr, softirq_to_name[vec_nr], h->action,
565  			       prev_count, preempt_count());
566  			preempt_count_set(prev_count);
567  		}
568  		h++;
569  		pending >>= softirq_bit;
570  	}
571  
572  	if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd)
573  		rcu_softirq_qs();
574  
575  	local_irq_disable();
576  
577  	pending = local_softirq_pending();
578  	if (pending) {
579  		if (time_before(jiffies, end) && !need_resched() &&
580  		    --max_restart)
581  			goto restart;
582  
583  		wakeup_softirqd();
584  	}
585  
586  	account_softirq_exit(current);
587  	lockdep_softirq_end(in_hardirq);
588  	softirq_handle_end();
589  	current_restore_flags(old_flags, PF_MEMALLOC);
590  }
591  
__do_softirq(void)592  asmlinkage __visible void __softirq_entry __do_softirq(void)
593  {
594  	handle_softirqs(false);
595  }
596  
597  /**
598   * irq_enter_rcu - Enter an interrupt context with RCU watching
599   */
irq_enter_rcu(void)600  void irq_enter_rcu(void)
601  {
602  	__irq_enter_raw();
603  
604  	if (tick_nohz_full_cpu(smp_processor_id()) ||
605  	    (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))
606  		tick_irq_enter();
607  
608  	account_hardirq_enter(current);
609  }
610  
611  /**
612   * irq_enter - Enter an interrupt context including RCU update
613   */
irq_enter(void)614  void irq_enter(void)
615  {
616  	ct_irq_enter();
617  	irq_enter_rcu();
618  }
619  
tick_irq_exit(void)620  static inline void tick_irq_exit(void)
621  {
622  #ifdef CONFIG_NO_HZ_COMMON
623  	int cpu = smp_processor_id();
624  
625  	/* Make sure that timer wheel updates are propagated */
626  	if ((sched_core_idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
627  		if (!in_hardirq())
628  			tick_nohz_irq_exit();
629  	}
630  #endif
631  }
632  
__irq_exit_rcu(void)633  static inline void __irq_exit_rcu(void)
634  {
635  #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
636  	local_irq_disable();
637  #else
638  	lockdep_assert_irqs_disabled();
639  #endif
640  	account_hardirq_exit(current);
641  	preempt_count_sub(HARDIRQ_OFFSET);
642  	if (!in_interrupt() && local_softirq_pending())
643  		invoke_softirq();
644  
645  	tick_irq_exit();
646  }
647  
648  /**
649   * irq_exit_rcu() - Exit an interrupt context without updating RCU
650   *
651   * Also processes softirqs if needed and possible.
652   */
irq_exit_rcu(void)653  void irq_exit_rcu(void)
654  {
655  	__irq_exit_rcu();
656  	 /* must be last! */
657  	lockdep_hardirq_exit();
658  }
659  
660  /**
661   * irq_exit - Exit an interrupt context, update RCU and lockdep
662   *
663   * Also processes softirqs if needed and possible.
664   */
irq_exit(void)665  void irq_exit(void)
666  {
667  	__irq_exit_rcu();
668  	ct_irq_exit();
669  	 /* must be last! */
670  	lockdep_hardirq_exit();
671  }
672  
673  /*
674   * This function must run with irqs disabled!
675   */
raise_softirq_irqoff(unsigned int nr)676  inline void raise_softirq_irqoff(unsigned int nr)
677  {
678  	__raise_softirq_irqoff(nr);
679  
680  	/*
681  	 * If we're in an interrupt or softirq, we're done
682  	 * (this also catches softirq-disabled code). We will
683  	 * actually run the softirq once we return from
684  	 * the irq or softirq.
685  	 *
686  	 * Otherwise we wake up ksoftirqd to make sure we
687  	 * schedule the softirq soon.
688  	 */
689  	if (!in_interrupt() && should_wake_ksoftirqd())
690  		wakeup_softirqd();
691  }
692  
raise_softirq(unsigned int nr)693  void raise_softirq(unsigned int nr)
694  {
695  	unsigned long flags;
696  
697  	local_irq_save(flags);
698  	raise_softirq_irqoff(nr);
699  	local_irq_restore(flags);
700  }
701  
__raise_softirq_irqoff(unsigned int nr)702  void __raise_softirq_irqoff(unsigned int nr)
703  {
704  	lockdep_assert_irqs_disabled();
705  	trace_softirq_raise(nr);
706  	or_softirq_pending(1UL << nr);
707  }
708  
open_softirq(int nr,void (* action)(struct softirq_action *))709  void open_softirq(int nr, void (*action)(struct softirq_action *))
710  {
711  	softirq_vec[nr].action = action;
712  }
713  
714  /*
715   * Tasklets
716   */
717  struct tasklet_head {
718  	struct tasklet_struct *head;
719  	struct tasklet_struct **tail;
720  };
721  
722  static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
723  static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
724  
__tasklet_schedule_common(struct tasklet_struct * t,struct tasklet_head __percpu * headp,unsigned int softirq_nr)725  static void __tasklet_schedule_common(struct tasklet_struct *t,
726  				      struct tasklet_head __percpu *headp,
727  				      unsigned int softirq_nr)
728  {
729  	struct tasklet_head *head;
730  	unsigned long flags;
731  
732  	local_irq_save(flags);
733  	head = this_cpu_ptr(headp);
734  	t->next = NULL;
735  	*head->tail = t;
736  	head->tail = &(t->next);
737  	raise_softirq_irqoff(softirq_nr);
738  	local_irq_restore(flags);
739  }
740  
__tasklet_schedule(struct tasklet_struct * t)741  void __tasklet_schedule(struct tasklet_struct *t)
742  {
743  	__tasklet_schedule_common(t, &tasklet_vec,
744  				  TASKLET_SOFTIRQ);
745  }
746  EXPORT_SYMBOL(__tasklet_schedule);
747  
__tasklet_hi_schedule(struct tasklet_struct * t)748  void __tasklet_hi_schedule(struct tasklet_struct *t)
749  {
750  	__tasklet_schedule_common(t, &tasklet_hi_vec,
751  				  HI_SOFTIRQ);
752  }
753  EXPORT_SYMBOL(__tasklet_hi_schedule);
754  
tasklet_clear_sched(struct tasklet_struct * t)755  static bool tasklet_clear_sched(struct tasklet_struct *t)
756  {
757  	if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
758  		wake_up_var(&t->state);
759  		return true;
760  	}
761  
762  	WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n",
763  		  t->use_callback ? "callback" : "func",
764  		  t->use_callback ? (void *)t->callback : (void *)t->func);
765  
766  	return false;
767  }
768  
tasklet_action_common(struct softirq_action * a,struct tasklet_head * tl_head,unsigned int softirq_nr)769  static void tasklet_action_common(struct softirq_action *a,
770  				  struct tasklet_head *tl_head,
771  				  unsigned int softirq_nr)
772  {
773  	struct tasklet_struct *list;
774  
775  	local_irq_disable();
776  	list = tl_head->head;
777  	tl_head->head = NULL;
778  	tl_head->tail = &tl_head->head;
779  	local_irq_enable();
780  
781  	while (list) {
782  		struct tasklet_struct *t = list;
783  
784  		list = list->next;
785  
786  		if (tasklet_trylock(t)) {
787  			if (!atomic_read(&t->count)) {
788  				if (tasklet_clear_sched(t)) {
789  					if (t->use_callback) {
790  						trace_tasklet_entry(t, t->callback);
791  						t->callback(t);
792  						trace_tasklet_exit(t, t->callback);
793  					} else {
794  						trace_tasklet_entry(t, t->func);
795  						t->func(t->data);
796  						trace_tasklet_exit(t, t->func);
797  					}
798  				}
799  				tasklet_unlock(t);
800  				continue;
801  			}
802  			tasklet_unlock(t);
803  		}
804  
805  		local_irq_disable();
806  		t->next = NULL;
807  		*tl_head->tail = t;
808  		tl_head->tail = &t->next;
809  		__raise_softirq_irqoff(softirq_nr);
810  		local_irq_enable();
811  	}
812  }
813  
tasklet_action(struct softirq_action * a)814  static __latent_entropy void tasklet_action(struct softirq_action *a)
815  {
816  	tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
817  }
818  
tasklet_hi_action(struct softirq_action * a)819  static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
820  {
821  	tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
822  }
823  
tasklet_setup(struct tasklet_struct * t,void (* callback)(struct tasklet_struct *))824  void tasklet_setup(struct tasklet_struct *t,
825  		   void (*callback)(struct tasklet_struct *))
826  {
827  	t->next = NULL;
828  	t->state = 0;
829  	atomic_set(&t->count, 0);
830  	t->callback = callback;
831  	t->use_callback = true;
832  	t->data = 0;
833  }
834  EXPORT_SYMBOL(tasklet_setup);
835  
tasklet_init(struct tasklet_struct * t,void (* func)(unsigned long),unsigned long data)836  void tasklet_init(struct tasklet_struct *t,
837  		  void (*func)(unsigned long), unsigned long data)
838  {
839  	t->next = NULL;
840  	t->state = 0;
841  	atomic_set(&t->count, 0);
842  	t->func = func;
843  	t->use_callback = false;
844  	t->data = data;
845  }
846  EXPORT_SYMBOL(tasklet_init);
847  
848  #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
849  /*
850   * Do not use in new code. Waiting for tasklets from atomic contexts is
851   * error prone and should be avoided.
852   */
tasklet_unlock_spin_wait(struct tasklet_struct * t)853  void tasklet_unlock_spin_wait(struct tasklet_struct *t)
854  {
855  	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
856  		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
857  			/*
858  			 * Prevent a live lock when current preempted soft
859  			 * interrupt processing or prevents ksoftirqd from
860  			 * running. If the tasklet runs on a different CPU
861  			 * then this has no effect other than doing the BH
862  			 * disable/enable dance for nothing.
863  			 */
864  			local_bh_disable();
865  			local_bh_enable();
866  		} else {
867  			cpu_relax();
868  		}
869  	}
870  }
871  EXPORT_SYMBOL(tasklet_unlock_spin_wait);
872  #endif
873  
tasklet_kill(struct tasklet_struct * t)874  void tasklet_kill(struct tasklet_struct *t)
875  {
876  	if (in_interrupt())
877  		pr_notice("Attempt to kill tasklet from interrupt\n");
878  
879  	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
880  		wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
881  
882  	tasklet_unlock_wait(t);
883  	tasklet_clear_sched(t);
884  }
885  EXPORT_SYMBOL(tasklet_kill);
886  
887  #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
tasklet_unlock(struct tasklet_struct * t)888  void tasklet_unlock(struct tasklet_struct *t)
889  {
890  	smp_mb__before_atomic();
891  	clear_bit(TASKLET_STATE_RUN, &t->state);
892  	smp_mb__after_atomic();
893  	wake_up_var(&t->state);
894  }
895  EXPORT_SYMBOL_GPL(tasklet_unlock);
896  
tasklet_unlock_wait(struct tasklet_struct * t)897  void tasklet_unlock_wait(struct tasklet_struct *t)
898  {
899  	wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
900  }
901  EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
902  #endif
903  
softirq_init(void)904  void __init softirq_init(void)
905  {
906  	int cpu;
907  
908  	for_each_possible_cpu(cpu) {
909  		per_cpu(tasklet_vec, cpu).tail =
910  			&per_cpu(tasklet_vec, cpu).head;
911  		per_cpu(tasklet_hi_vec, cpu).tail =
912  			&per_cpu(tasklet_hi_vec, cpu).head;
913  	}
914  
915  	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
916  	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
917  }
918  
ksoftirqd_should_run(unsigned int cpu)919  static int ksoftirqd_should_run(unsigned int cpu)
920  {
921  	return local_softirq_pending();
922  }
923  
run_ksoftirqd(unsigned int cpu)924  static void run_ksoftirqd(unsigned int cpu)
925  {
926  	ksoftirqd_run_begin();
927  	if (local_softirq_pending()) {
928  		/*
929  		 * We can safely run softirq on inline stack, as we are not deep
930  		 * in the task stack here.
931  		 */
932  		handle_softirqs(true);
933  		ksoftirqd_run_end();
934  		cond_resched();
935  		return;
936  	}
937  	ksoftirqd_run_end();
938  }
939  
940  #ifdef CONFIG_HOTPLUG_CPU
takeover_tasklets(unsigned int cpu)941  static int takeover_tasklets(unsigned int cpu)
942  {
943  	/* CPU is dead, so no lock needed. */
944  	local_irq_disable();
945  
946  	/* Find end, append list for that CPU. */
947  	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
948  		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
949  		__this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
950  		per_cpu(tasklet_vec, cpu).head = NULL;
951  		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
952  	}
953  	raise_softirq_irqoff(TASKLET_SOFTIRQ);
954  
955  	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
956  		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
957  		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
958  		per_cpu(tasklet_hi_vec, cpu).head = NULL;
959  		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
960  	}
961  	raise_softirq_irqoff(HI_SOFTIRQ);
962  
963  	local_irq_enable();
964  	return 0;
965  }
966  #else
967  #define takeover_tasklets	NULL
968  #endif /* CONFIG_HOTPLUG_CPU */
969  
970  static struct smp_hotplug_thread softirq_threads = {
971  	.store			= &ksoftirqd,
972  	.thread_should_run	= ksoftirqd_should_run,
973  	.thread_fn		= run_ksoftirqd,
974  	.thread_comm		= "ksoftirqd/%u",
975  };
976  
spawn_ksoftirqd(void)977  static __init int spawn_ksoftirqd(void)
978  {
979  	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
980  				  takeover_tasklets);
981  	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
982  
983  	return 0;
984  }
985  early_initcall(spawn_ksoftirqd);
986  
987  /*
988   * [ These __weak aliases are kept in a separate compilation unit, so that
989   *   GCC does not inline them incorrectly. ]
990   */
991  
early_irq_init(void)992  int __init __weak early_irq_init(void)
993  {
994  	return 0;
995  }
996  
arch_probe_nr_irqs(void)997  int __init __weak arch_probe_nr_irqs(void)
998  {
999  	return NR_IRQS_LEGACY;
1000  }
1001  
arch_early_irq_init(void)1002  int __init __weak arch_early_irq_init(void)
1003  {
1004  	return 0;
1005  }
1006  
arch_dynirq_lower_bound(unsigned int from)1007  unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
1008  {
1009  	return from;
1010  }
1011