xref: /openbmc/linux/kernel/softirq.c (revision 9144f784f852f9a125cabe9927b986d909bfa439)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *	linux/kernel/softirq.c
4  *
5  *	Copyright (C) 1992 Linus Torvalds
6  *
7  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
8  */
9 
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 
12 #include <linux/export.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/interrupt.h>
15 #include <linux/init.h>
16 #include <linux/local_lock.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 #include <linux/irq.h>
29 #include <linux/wait_bit.h>
30 
31 #include <asm/softirq_stack.h>
32 
33 #define CREATE_TRACE_POINTS
34 #include <trace/events/irq.h>
35 
36 /*
37    - No shared variables, all the data are CPU local.
38    - If a softirq needs serialization, let it serialize itself
39      by its own spinlocks.
40    - Even if softirq is serialized, only local cpu is marked for
41      execution. Hence, we get something sort of weak cpu binding.
42      Though it is still not clear, will it result in better locality
43      or will not.
44 
45    Examples:
46    - NET RX softirq. It is multithreaded and does not require
47      any global serialization.
48    - NET TX softirq. It kicks software netdevice queues, hence
49      it is logically serialized per device, but this serialization
50      is invisible to common code.
51    - Tasklets: serialized wrt itself.
52  */
53 
54 #ifndef __ARCH_IRQ_STAT
55 DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
56 EXPORT_PER_CPU_SYMBOL(irq_stat);
57 #endif
58 
59 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
60 
61 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
62 
63 const char * const softirq_to_name[NR_SOFTIRQS] = {
64 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
65 	"TASKLET", "SCHED", "HRTIMER", "RCU"
66 };
67 
68 /*
69  * we cannot loop indefinitely here to avoid userspace starvation,
70  * but we also don't want to introduce a worst case 1/HZ latency
71  * to the pending events, so lets the scheduler to balance
72  * the softirq load for us.
73  */
wakeup_softirqd(void)74 static void wakeup_softirqd(void)
75 {
76 	/* Interrupts are disabled: no need to stop preemption */
77 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
78 
79 	if (tsk)
80 		wake_up_process(tsk);
81 }
82 
83 #ifdef CONFIG_TRACE_IRQFLAGS
84 DEFINE_PER_CPU(int, hardirqs_enabled);
85 DEFINE_PER_CPU(int, hardirq_context);
86 EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
87 EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
88 #endif
89 
90 /*
91  * SOFTIRQ_OFFSET usage:
92  *
93  * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
94  * to a per CPU counter and to task::softirqs_disabled_cnt.
95  *
96  * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
97  *   processing.
98  *
99  * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
100  *   on local_bh_disable or local_bh_enable.
101  *
102  * This lets us distinguish between whether we are currently processing
103  * softirq and whether we just have bh disabled.
104  */
105 #ifdef CONFIG_PREEMPT_RT
106 
107 /*
108  * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
109  * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
110  * softirq disabled section to be preempted.
111  *
112  * The per task counter is used for softirq_count(), in_softirq() and
113  * in_serving_softirqs() because these counts are only valid when the task
114  * holding softirq_ctrl::lock is running.
115  *
116  * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
117  * the task which is in a softirq disabled section is preempted or blocks.
118  */
119 struct softirq_ctrl {
120 	local_lock_t	lock;
121 	int		cnt;
122 };
123 
124 static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
125 	.lock	= INIT_LOCAL_LOCK(softirq_ctrl.lock),
126 };
127 
128 /**
129  * local_bh_blocked() - Check for idle whether BH processing is blocked
130  *
131  * Returns false if the per CPU softirq::cnt is 0 otherwise true.
132  *
133  * This is invoked from the idle task to guard against false positive
134  * softirq pending warnings, which would happen when the task which holds
135  * softirq_ctrl::lock was the only running task on the CPU and blocks on
136  * some other lock.
137  */
local_bh_blocked(void)138 bool local_bh_blocked(void)
139 {
140 	return __this_cpu_read(softirq_ctrl.cnt) != 0;
141 }
142 
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)143 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
144 {
145 	unsigned long flags;
146 	int newcnt;
147 
148 	WARN_ON_ONCE(in_hardirq());
149 
150 	/* First entry of a task into a BH disabled section? */
151 	if (!current->softirq_disable_cnt) {
152 		if (preemptible()) {
153 			local_lock(&softirq_ctrl.lock);
154 			/* Required to meet the RCU bottomhalf requirements. */
155 			rcu_read_lock();
156 		} else {
157 			DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
158 		}
159 	}
160 
161 	/*
162 	 * Track the per CPU softirq disabled state. On RT this is per CPU
163 	 * state to allow preemption of bottom half disabled sections.
164 	 */
165 	newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
166 	/*
167 	 * Reflect the result in the task state to prevent recursion on the
168 	 * local lock and to make softirq_count() & al work.
169 	 */
170 	current->softirq_disable_cnt = newcnt;
171 
172 	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
173 		raw_local_irq_save(flags);
174 		lockdep_softirqs_off(ip);
175 		raw_local_irq_restore(flags);
176 	}
177 }
178 EXPORT_SYMBOL(__local_bh_disable_ip);
179 
__local_bh_enable(unsigned int cnt,bool unlock)180 static void __local_bh_enable(unsigned int cnt, bool unlock)
181 {
182 	unsigned long flags;
183 	int newcnt;
184 
185 	DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
186 			    this_cpu_read(softirq_ctrl.cnt));
187 
188 	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
189 		raw_local_irq_save(flags);
190 		lockdep_softirqs_on(_RET_IP_);
191 		raw_local_irq_restore(flags);
192 	}
193 
194 	newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
195 	current->softirq_disable_cnt = newcnt;
196 
197 	if (!newcnt && unlock) {
198 		rcu_read_unlock();
199 		local_unlock(&softirq_ctrl.lock);
200 	}
201 }
202 
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)203 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
204 {
205 	bool preempt_on = preemptible();
206 	unsigned long flags;
207 	u32 pending;
208 	int curcnt;
209 
210 	WARN_ON_ONCE(in_hardirq());
211 	lockdep_assert_irqs_enabled();
212 
213 	local_irq_save(flags);
214 	curcnt = __this_cpu_read(softirq_ctrl.cnt);
215 
216 	/*
217 	 * If this is not reenabling soft interrupts, no point in trying to
218 	 * run pending ones.
219 	 */
220 	if (curcnt != cnt)
221 		goto out;
222 
223 	pending = local_softirq_pending();
224 	if (!pending)
225 		goto out;
226 
227 	/*
228 	 * If this was called from non preemptible context, wake up the
229 	 * softirq daemon.
230 	 */
231 	if (!preempt_on) {
232 		wakeup_softirqd();
233 		goto out;
234 	}
235 
236 	/*
237 	 * Adjust softirq count to SOFTIRQ_OFFSET which makes
238 	 * in_serving_softirq() become true.
239 	 */
240 	cnt = SOFTIRQ_OFFSET;
241 	__local_bh_enable(cnt, false);
242 	__do_softirq();
243 
244 out:
245 	__local_bh_enable(cnt, preempt_on);
246 	local_irq_restore(flags);
247 }
248 EXPORT_SYMBOL(__local_bh_enable_ip);
249 
250 /*
251  * Invoked from ksoftirqd_run() outside of the interrupt disabled section
252  * to acquire the per CPU local lock for reentrancy protection.
253  */
ksoftirqd_run_begin(void)254 static inline void ksoftirqd_run_begin(void)
255 {
256 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
257 	local_irq_disable();
258 }
259 
260 /* Counterpart to ksoftirqd_run_begin() */
ksoftirqd_run_end(void)261 static inline void ksoftirqd_run_end(void)
262 {
263 	__local_bh_enable(SOFTIRQ_OFFSET, true);
264 	WARN_ON_ONCE(in_interrupt());
265 	local_irq_enable();
266 }
267 
softirq_handle_begin(void)268 static inline void softirq_handle_begin(void) { }
softirq_handle_end(void)269 static inline void softirq_handle_end(void) { }
270 
should_wake_ksoftirqd(void)271 static inline bool should_wake_ksoftirqd(void)
272 {
273 	return !this_cpu_read(softirq_ctrl.cnt);
274 }
275 
invoke_softirq(void)276 static inline void invoke_softirq(void)
277 {
278 	if (should_wake_ksoftirqd())
279 		wakeup_softirqd();
280 }
281 
282 #define SCHED_SOFTIRQ_MASK	BIT(SCHED_SOFTIRQ)
283 
284 /*
285  * flush_smp_call_function_queue() can raise a soft interrupt in a function
286  * call. On RT kernels this is undesired and the only known functionalities
287  * are in the block layer which is disabled on RT, and in the scheduler for
288  * idle load balancing. If soft interrupts get raised which haven't been
289  * raised before the flush, warn if it is not a SCHED_SOFTIRQ so it can be
290  * investigated.
291  */
do_softirq_post_smp_call_flush(unsigned int was_pending)292 void do_softirq_post_smp_call_flush(unsigned int was_pending)
293 {
294 	unsigned int is_pending = local_softirq_pending();
295 
296 	if (unlikely(was_pending != is_pending)) {
297 		WARN_ON_ONCE(was_pending != (is_pending & ~SCHED_SOFTIRQ_MASK));
298 		invoke_softirq();
299 	}
300 }
301 
302 #else /* CONFIG_PREEMPT_RT */
303 
304 /*
305  * This one is for softirq.c-internal use, where hardirqs are disabled
306  * legitimately:
307  */
308 #ifdef CONFIG_TRACE_IRQFLAGS
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)309 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
310 {
311 	unsigned long flags;
312 
313 	WARN_ON_ONCE(in_hardirq());
314 
315 	raw_local_irq_save(flags);
316 	/*
317 	 * The preempt tracer hooks into preempt_count_add and will break
318 	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
319 	 * is set and before current->softirq_enabled is cleared.
320 	 * We must manually increment preempt_count here and manually
321 	 * call the trace_preempt_off later.
322 	 */
323 	__preempt_count_add(cnt);
324 	/*
325 	 * Were softirqs turned off above:
326 	 */
327 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
328 		lockdep_softirqs_off(ip);
329 	raw_local_irq_restore(flags);
330 
331 	if (preempt_count() == cnt) {
332 #ifdef CONFIG_DEBUG_PREEMPT
333 		current->preempt_disable_ip = get_lock_parent_ip();
334 #endif
335 		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
336 	}
337 }
338 EXPORT_SYMBOL(__local_bh_disable_ip);
339 #endif /* CONFIG_TRACE_IRQFLAGS */
340 
__local_bh_enable(unsigned int cnt)341 static void __local_bh_enable(unsigned int cnt)
342 {
343 	lockdep_assert_irqs_disabled();
344 
345 	if (preempt_count() == cnt)
346 		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
347 
348 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
349 		lockdep_softirqs_on(_RET_IP_);
350 
351 	__preempt_count_sub(cnt);
352 }
353 
354 /*
355  * Special-case - softirqs can safely be enabled by __do_softirq(),
356  * without processing still-pending softirqs:
357  */
_local_bh_enable(void)358 void _local_bh_enable(void)
359 {
360 	WARN_ON_ONCE(in_hardirq());
361 	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
362 }
363 EXPORT_SYMBOL(_local_bh_enable);
364 
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)365 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
366 {
367 	WARN_ON_ONCE(in_hardirq());
368 	lockdep_assert_irqs_enabled();
369 #ifdef CONFIG_TRACE_IRQFLAGS
370 	local_irq_disable();
371 #endif
372 	/*
373 	 * Are softirqs going to be turned on now:
374 	 */
375 	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
376 		lockdep_softirqs_on(ip);
377 	/*
378 	 * Keep preemption disabled until we are done with
379 	 * softirq processing:
380 	 */
381 	__preempt_count_sub(cnt - 1);
382 
383 	if (unlikely(!in_interrupt() && local_softirq_pending())) {
384 		/*
385 		 * Run softirq if any pending. And do it in its own stack
386 		 * as we may be calling this deep in a task call stack already.
387 		 */
388 		do_softirq();
389 	}
390 
391 	preempt_count_dec();
392 #ifdef CONFIG_TRACE_IRQFLAGS
393 	local_irq_enable();
394 #endif
395 	preempt_check_resched();
396 }
397 EXPORT_SYMBOL(__local_bh_enable_ip);
398 
softirq_handle_begin(void)399 static inline void softirq_handle_begin(void)
400 {
401 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
402 }
403 
softirq_handle_end(void)404 static inline void softirq_handle_end(void)
405 {
406 	__local_bh_enable(SOFTIRQ_OFFSET);
407 	WARN_ON_ONCE(in_interrupt());
408 }
409 
ksoftirqd_run_begin(void)410 static inline void ksoftirqd_run_begin(void)
411 {
412 	local_irq_disable();
413 }
414 
ksoftirqd_run_end(void)415 static inline void ksoftirqd_run_end(void)
416 {
417 	local_irq_enable();
418 }
419 
should_wake_ksoftirqd(void)420 static inline bool should_wake_ksoftirqd(void)
421 {
422 	return true;
423 }
424 
invoke_softirq(void)425 static inline void invoke_softirq(void)
426 {
427 	if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
428 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
429 		/*
430 		 * We can safely execute softirq on the current stack if
431 		 * it is the irq stack, because it should be near empty
432 		 * at this stage.
433 		 */
434 		__do_softirq();
435 #else
436 		/*
437 		 * Otherwise, irq_exit() is called on the task stack that can
438 		 * be potentially deep already. So call softirq in its own stack
439 		 * to prevent from any overrun.
440 		 */
441 		do_softirq_own_stack();
442 #endif
443 	} else {
444 		wakeup_softirqd();
445 	}
446 }
447 
do_softirq(void)448 asmlinkage __visible void do_softirq(void)
449 {
450 	__u32 pending;
451 	unsigned long flags;
452 
453 	if (in_interrupt())
454 		return;
455 
456 	local_irq_save(flags);
457 
458 	pending = local_softirq_pending();
459 
460 	if (pending)
461 		do_softirq_own_stack();
462 
463 	local_irq_restore(flags);
464 }
465 
466 #endif /* !CONFIG_PREEMPT_RT */
467 
468 /*
469  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
470  * but break the loop if need_resched() is set or after 2 ms.
471  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
472  * certain cases, such as stop_machine(), jiffies may cease to
473  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
474  * well to make sure we eventually return from this method.
475  *
476  * These limits have been established via experimentation.
477  * The two things to balance is latency against fairness -
478  * we want to handle softirqs as soon as possible, but they
479  * should not be able to lock up the box.
480  */
481 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
482 #define MAX_SOFTIRQ_RESTART 10
483 
484 #ifdef CONFIG_TRACE_IRQFLAGS
485 /*
486  * When we run softirqs from irq_exit() and thus on the hardirq stack we need
487  * to keep the lockdep irq context tracking as tight as possible in order to
488  * not miss-qualify lock contexts and miss possible deadlocks.
489  */
490 
lockdep_softirq_start(void)491 static inline bool lockdep_softirq_start(void)
492 {
493 	bool in_hardirq = false;
494 
495 	if (lockdep_hardirq_context()) {
496 		in_hardirq = true;
497 		lockdep_hardirq_exit();
498 	}
499 
500 	lockdep_softirq_enter();
501 
502 	return in_hardirq;
503 }
504 
lockdep_softirq_end(bool in_hardirq)505 static inline void lockdep_softirq_end(bool in_hardirq)
506 {
507 	lockdep_softirq_exit();
508 
509 	if (in_hardirq)
510 		lockdep_hardirq_enter();
511 }
512 #else
lockdep_softirq_start(void)513 static inline bool lockdep_softirq_start(void) { return false; }
lockdep_softirq_end(bool in_hardirq)514 static inline void lockdep_softirq_end(bool in_hardirq) { }
515 #endif
516 
handle_softirqs(bool ksirqd)517 static void handle_softirqs(bool ksirqd)
518 {
519 	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
520 	unsigned long old_flags = current->flags;
521 	int max_restart = MAX_SOFTIRQ_RESTART;
522 	struct softirq_action *h;
523 	bool in_hardirq;
524 	__u32 pending;
525 	int softirq_bit;
526 
527 	/*
528 	 * Mask out PF_MEMALLOC as the current task context is borrowed for the
529 	 * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
530 	 * again if the socket is related to swapping.
531 	 */
532 	current->flags &= ~PF_MEMALLOC;
533 
534 	pending = local_softirq_pending();
535 
536 	softirq_handle_begin();
537 	in_hardirq = lockdep_softirq_start();
538 	account_softirq_enter(current);
539 
540 restart:
541 	/* Reset the pending bitmask before enabling irqs */
542 	set_softirq_pending(0);
543 
544 	local_irq_enable();
545 
546 	h = softirq_vec;
547 
548 	while ((softirq_bit = ffs(pending))) {
549 		unsigned int vec_nr;
550 		int prev_count;
551 
552 		h += softirq_bit - 1;
553 
554 		vec_nr = h - softirq_vec;
555 		prev_count = preempt_count();
556 
557 		kstat_incr_softirqs_this_cpu(vec_nr);
558 
559 		trace_softirq_entry(vec_nr);
560 		h->action(h);
561 		trace_softirq_exit(vec_nr);
562 		if (unlikely(prev_count != preempt_count())) {
563 			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
564 			       vec_nr, softirq_to_name[vec_nr], h->action,
565 			       prev_count, preempt_count());
566 			preempt_count_set(prev_count);
567 		}
568 		h++;
569 		pending >>= softirq_bit;
570 	}
571 
572 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd)
573 		rcu_softirq_qs();
574 
575 	local_irq_disable();
576 
577 	pending = local_softirq_pending();
578 	if (pending) {
579 		if (time_before(jiffies, end) && !need_resched() &&
580 		    --max_restart)
581 			goto restart;
582 
583 		wakeup_softirqd();
584 	}
585 
586 	account_softirq_exit(current);
587 	lockdep_softirq_end(in_hardirq);
588 	softirq_handle_end();
589 	current_restore_flags(old_flags, PF_MEMALLOC);
590 }
591 
__do_softirq(void)592 asmlinkage __visible void __softirq_entry __do_softirq(void)
593 {
594 	handle_softirqs(false);
595 }
596 
597 /**
598  * irq_enter_rcu - Enter an interrupt context with RCU watching
599  */
irq_enter_rcu(void)600 void irq_enter_rcu(void)
601 {
602 	__irq_enter_raw();
603 
604 	if (tick_nohz_full_cpu(smp_processor_id()) ||
605 	    (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))
606 		tick_irq_enter();
607 
608 	account_hardirq_enter(current);
609 }
610 
611 /**
612  * irq_enter - Enter an interrupt context including RCU update
613  */
irq_enter(void)614 void irq_enter(void)
615 {
616 	ct_irq_enter();
617 	irq_enter_rcu();
618 }
619 
tick_irq_exit(void)620 static inline void tick_irq_exit(void)
621 {
622 #ifdef CONFIG_NO_HZ_COMMON
623 	int cpu = smp_processor_id();
624 
625 	/* Make sure that timer wheel updates are propagated */
626 	if ((sched_core_idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
627 		if (!in_hardirq())
628 			tick_nohz_irq_exit();
629 	}
630 #endif
631 }
632 
__irq_exit_rcu(void)633 static inline void __irq_exit_rcu(void)
634 {
635 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
636 	local_irq_disable();
637 #else
638 	lockdep_assert_irqs_disabled();
639 #endif
640 	account_hardirq_exit(current);
641 	preempt_count_sub(HARDIRQ_OFFSET);
642 	if (!in_interrupt() && local_softirq_pending())
643 		invoke_softirq();
644 
645 	tick_irq_exit();
646 }
647 
648 /**
649  * irq_exit_rcu() - Exit an interrupt context without updating RCU
650  *
651  * Also processes softirqs if needed and possible.
652  */
irq_exit_rcu(void)653 void irq_exit_rcu(void)
654 {
655 	__irq_exit_rcu();
656 	 /* must be last! */
657 	lockdep_hardirq_exit();
658 }
659 
660 /**
661  * irq_exit - Exit an interrupt context, update RCU and lockdep
662  *
663  * Also processes softirqs if needed and possible.
664  */
irq_exit(void)665 void irq_exit(void)
666 {
667 	__irq_exit_rcu();
668 	ct_irq_exit();
669 	 /* must be last! */
670 	lockdep_hardirq_exit();
671 }
672 
673 /*
674  * This function must run with irqs disabled!
675  */
raise_softirq_irqoff(unsigned int nr)676 inline void raise_softirq_irqoff(unsigned int nr)
677 {
678 	__raise_softirq_irqoff(nr);
679 
680 	/*
681 	 * If we're in an interrupt or softirq, we're done
682 	 * (this also catches softirq-disabled code). We will
683 	 * actually run the softirq once we return from
684 	 * the irq or softirq.
685 	 *
686 	 * Otherwise we wake up ksoftirqd to make sure we
687 	 * schedule the softirq soon.
688 	 */
689 	if (!in_interrupt() && should_wake_ksoftirqd())
690 		wakeup_softirqd();
691 }
692 
raise_softirq(unsigned int nr)693 void raise_softirq(unsigned int nr)
694 {
695 	unsigned long flags;
696 
697 	local_irq_save(flags);
698 	raise_softirq_irqoff(nr);
699 	local_irq_restore(flags);
700 }
701 
__raise_softirq_irqoff(unsigned int nr)702 void __raise_softirq_irqoff(unsigned int nr)
703 {
704 	lockdep_assert_irqs_disabled();
705 	trace_softirq_raise(nr);
706 	or_softirq_pending(1UL << nr);
707 }
708 
open_softirq(int nr,void (* action)(struct softirq_action *))709 void open_softirq(int nr, void (*action)(struct softirq_action *))
710 {
711 	softirq_vec[nr].action = action;
712 }
713 
714 /*
715  * Tasklets
716  */
717 struct tasklet_head {
718 	struct tasklet_struct *head;
719 	struct tasklet_struct **tail;
720 };
721 
722 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
723 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
724 
__tasklet_schedule_common(struct tasklet_struct * t,struct tasklet_head __percpu * headp,unsigned int softirq_nr)725 static void __tasklet_schedule_common(struct tasklet_struct *t,
726 				      struct tasklet_head __percpu *headp,
727 				      unsigned int softirq_nr)
728 {
729 	struct tasklet_head *head;
730 	unsigned long flags;
731 
732 	local_irq_save(flags);
733 	head = this_cpu_ptr(headp);
734 	t->next = NULL;
735 	*head->tail = t;
736 	head->tail = &(t->next);
737 	raise_softirq_irqoff(softirq_nr);
738 	local_irq_restore(flags);
739 }
740 
__tasklet_schedule(struct tasklet_struct * t)741 void __tasklet_schedule(struct tasklet_struct *t)
742 {
743 	__tasklet_schedule_common(t, &tasklet_vec,
744 				  TASKLET_SOFTIRQ);
745 }
746 EXPORT_SYMBOL(__tasklet_schedule);
747 
__tasklet_hi_schedule(struct tasklet_struct * t)748 void __tasklet_hi_schedule(struct tasklet_struct *t)
749 {
750 	__tasklet_schedule_common(t, &tasklet_hi_vec,
751 				  HI_SOFTIRQ);
752 }
753 EXPORT_SYMBOL(__tasklet_hi_schedule);
754 
tasklet_clear_sched(struct tasklet_struct * t)755 static bool tasklet_clear_sched(struct tasklet_struct *t)
756 {
757 	if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
758 		wake_up_var(&t->state);
759 		return true;
760 	}
761 
762 	WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n",
763 		  t->use_callback ? "callback" : "func",
764 		  t->use_callback ? (void *)t->callback : (void *)t->func);
765 
766 	return false;
767 }
768 
tasklet_action_common(struct softirq_action * a,struct tasklet_head * tl_head,unsigned int softirq_nr)769 static void tasklet_action_common(struct softirq_action *a,
770 				  struct tasklet_head *tl_head,
771 				  unsigned int softirq_nr)
772 {
773 	struct tasklet_struct *list;
774 
775 	local_irq_disable();
776 	list = tl_head->head;
777 	tl_head->head = NULL;
778 	tl_head->tail = &tl_head->head;
779 	local_irq_enable();
780 
781 	while (list) {
782 		struct tasklet_struct *t = list;
783 
784 		list = list->next;
785 
786 		if (tasklet_trylock(t)) {
787 			if (!atomic_read(&t->count)) {
788 				if (tasklet_clear_sched(t)) {
789 					if (t->use_callback) {
790 						trace_tasklet_entry(t, t->callback);
791 						t->callback(t);
792 						trace_tasklet_exit(t, t->callback);
793 					} else {
794 						trace_tasklet_entry(t, t->func);
795 						t->func(t->data);
796 						trace_tasklet_exit(t, t->func);
797 					}
798 				}
799 				tasklet_unlock(t);
800 				continue;
801 			}
802 			tasklet_unlock(t);
803 		}
804 
805 		local_irq_disable();
806 		t->next = NULL;
807 		*tl_head->tail = t;
808 		tl_head->tail = &t->next;
809 		__raise_softirq_irqoff(softirq_nr);
810 		local_irq_enable();
811 	}
812 }
813 
tasklet_action(struct softirq_action * a)814 static __latent_entropy void tasklet_action(struct softirq_action *a)
815 {
816 	tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
817 }
818 
tasklet_hi_action(struct softirq_action * a)819 static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
820 {
821 	tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
822 }
823 
tasklet_setup(struct tasklet_struct * t,void (* callback)(struct tasklet_struct *))824 void tasklet_setup(struct tasklet_struct *t,
825 		   void (*callback)(struct tasklet_struct *))
826 {
827 	t->next = NULL;
828 	t->state = 0;
829 	atomic_set(&t->count, 0);
830 	t->callback = callback;
831 	t->use_callback = true;
832 	t->data = 0;
833 }
834 EXPORT_SYMBOL(tasklet_setup);
835 
tasklet_init(struct tasklet_struct * t,void (* func)(unsigned long),unsigned long data)836 void tasklet_init(struct tasklet_struct *t,
837 		  void (*func)(unsigned long), unsigned long data)
838 {
839 	t->next = NULL;
840 	t->state = 0;
841 	atomic_set(&t->count, 0);
842 	t->func = func;
843 	t->use_callback = false;
844 	t->data = data;
845 }
846 EXPORT_SYMBOL(tasklet_init);
847 
848 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
849 /*
850  * Do not use in new code. Waiting for tasklets from atomic contexts is
851  * error prone and should be avoided.
852  */
tasklet_unlock_spin_wait(struct tasklet_struct * t)853 void tasklet_unlock_spin_wait(struct tasklet_struct *t)
854 {
855 	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
856 		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
857 			/*
858 			 * Prevent a live lock when current preempted soft
859 			 * interrupt processing or prevents ksoftirqd from
860 			 * running. If the tasklet runs on a different CPU
861 			 * then this has no effect other than doing the BH
862 			 * disable/enable dance for nothing.
863 			 */
864 			local_bh_disable();
865 			local_bh_enable();
866 		} else {
867 			cpu_relax();
868 		}
869 	}
870 }
871 EXPORT_SYMBOL(tasklet_unlock_spin_wait);
872 #endif
873 
tasklet_kill(struct tasklet_struct * t)874 void tasklet_kill(struct tasklet_struct *t)
875 {
876 	if (in_interrupt())
877 		pr_notice("Attempt to kill tasklet from interrupt\n");
878 
879 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
880 		wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
881 
882 	tasklet_unlock_wait(t);
883 	tasklet_clear_sched(t);
884 }
885 EXPORT_SYMBOL(tasklet_kill);
886 
887 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
tasklet_unlock(struct tasklet_struct * t)888 void tasklet_unlock(struct tasklet_struct *t)
889 {
890 	smp_mb__before_atomic();
891 	clear_bit(TASKLET_STATE_RUN, &t->state);
892 	smp_mb__after_atomic();
893 	wake_up_var(&t->state);
894 }
895 EXPORT_SYMBOL_GPL(tasklet_unlock);
896 
tasklet_unlock_wait(struct tasklet_struct * t)897 void tasklet_unlock_wait(struct tasklet_struct *t)
898 {
899 	wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
900 }
901 EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
902 #endif
903 
softirq_init(void)904 void __init softirq_init(void)
905 {
906 	int cpu;
907 
908 	for_each_possible_cpu(cpu) {
909 		per_cpu(tasklet_vec, cpu).tail =
910 			&per_cpu(tasklet_vec, cpu).head;
911 		per_cpu(tasklet_hi_vec, cpu).tail =
912 			&per_cpu(tasklet_hi_vec, cpu).head;
913 	}
914 
915 	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
916 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
917 }
918 
ksoftirqd_should_run(unsigned int cpu)919 static int ksoftirqd_should_run(unsigned int cpu)
920 {
921 	return local_softirq_pending();
922 }
923 
run_ksoftirqd(unsigned int cpu)924 static void run_ksoftirqd(unsigned int cpu)
925 {
926 	ksoftirqd_run_begin();
927 	if (local_softirq_pending()) {
928 		/*
929 		 * We can safely run softirq on inline stack, as we are not deep
930 		 * in the task stack here.
931 		 */
932 		handle_softirqs(true);
933 		ksoftirqd_run_end();
934 		cond_resched();
935 		return;
936 	}
937 	ksoftirqd_run_end();
938 }
939 
940 #ifdef CONFIG_HOTPLUG_CPU
takeover_tasklets(unsigned int cpu)941 static int takeover_tasklets(unsigned int cpu)
942 {
943 	/* CPU is dead, so no lock needed. */
944 	local_irq_disable();
945 
946 	/* Find end, append list for that CPU. */
947 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
948 		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
949 		__this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
950 		per_cpu(tasklet_vec, cpu).head = NULL;
951 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
952 	}
953 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
954 
955 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
956 		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
957 		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
958 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
959 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
960 	}
961 	raise_softirq_irqoff(HI_SOFTIRQ);
962 
963 	local_irq_enable();
964 	return 0;
965 }
966 #else
967 #define takeover_tasklets	NULL
968 #endif /* CONFIG_HOTPLUG_CPU */
969 
970 static struct smp_hotplug_thread softirq_threads = {
971 	.store			= &ksoftirqd,
972 	.thread_should_run	= ksoftirqd_should_run,
973 	.thread_fn		= run_ksoftirqd,
974 	.thread_comm		= "ksoftirqd/%u",
975 };
976 
spawn_ksoftirqd(void)977 static __init int spawn_ksoftirqd(void)
978 {
979 	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
980 				  takeover_tasklets);
981 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
982 
983 	return 0;
984 }
985 early_initcall(spawn_ksoftirqd);
986 
987 /*
988  * [ These __weak aliases are kept in a separate compilation unit, so that
989  *   GCC does not inline them incorrectly. ]
990  */
991 
early_irq_init(void)992 int __init __weak early_irq_init(void)
993 {
994 	return 0;
995 }
996 
arch_probe_nr_irqs(void)997 int __init __weak arch_probe_nr_irqs(void)
998 {
999 	return NR_IRQS_LEGACY;
1000 }
1001 
arch_early_irq_init(void)1002 int __init __weak arch_early_irq_init(void)
1003 {
1004 	return 0;
1005 }
1006 
arch_dynirq_lower_bound(unsigned int from)1007 unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
1008 {
1009 	return from;
1010 }
1011