1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * linux/kernel/softirq.c
4 *
5 * Copyright (C) 1992 Linus Torvalds
6 *
7 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
8 */
9
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12 #include <linux/export.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/interrupt.h>
15 #include <linux/init.h>
16 #include <linux/local_lock.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 #include <linux/irq.h>
29 #include <linux/wait_bit.h>
30
31 #include <asm/softirq_stack.h>
32
33 #define CREATE_TRACE_POINTS
34 #include <trace/events/irq.h>
35
36 /*
37 - No shared variables, all the data are CPU local.
38 - If a softirq needs serialization, let it serialize itself
39 by its own spinlocks.
40 - Even if softirq is serialized, only local cpu is marked for
41 execution. Hence, we get something sort of weak cpu binding.
42 Though it is still not clear, will it result in better locality
43 or will not.
44
45 Examples:
46 - NET RX softirq. It is multithreaded and does not require
47 any global serialization.
48 - NET TX softirq. It kicks software netdevice queues, hence
49 it is logically serialized per device, but this serialization
50 is invisible to common code.
51 - Tasklets: serialized wrt itself.
52 */
53
54 #ifndef __ARCH_IRQ_STAT
55 DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
56 EXPORT_PER_CPU_SYMBOL(irq_stat);
57 #endif
58
59 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
60
61 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
62
63 const char * const softirq_to_name[NR_SOFTIRQS] = {
64 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
65 "TASKLET", "SCHED", "HRTIMER", "RCU"
66 };
67
68 /*
69 * we cannot loop indefinitely here to avoid userspace starvation,
70 * but we also don't want to introduce a worst case 1/HZ latency
71 * to the pending events, so lets the scheduler to balance
72 * the softirq load for us.
73 */
wakeup_softirqd(void)74 static void wakeup_softirqd(void)
75 {
76 /* Interrupts are disabled: no need to stop preemption */
77 struct task_struct *tsk = __this_cpu_read(ksoftirqd);
78
79 if (tsk)
80 wake_up_process(tsk);
81 }
82
83 #ifdef CONFIG_TRACE_IRQFLAGS
84 DEFINE_PER_CPU(int, hardirqs_enabled);
85 DEFINE_PER_CPU(int, hardirq_context);
86 EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
87 EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
88 #endif
89
90 /*
91 * SOFTIRQ_OFFSET usage:
92 *
93 * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
94 * to a per CPU counter and to task::softirqs_disabled_cnt.
95 *
96 * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
97 * processing.
98 *
99 * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
100 * on local_bh_disable or local_bh_enable.
101 *
102 * This lets us distinguish between whether we are currently processing
103 * softirq and whether we just have bh disabled.
104 */
105 #ifdef CONFIG_PREEMPT_RT
106
107 /*
108 * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
109 * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
110 * softirq disabled section to be preempted.
111 *
112 * The per task counter is used for softirq_count(), in_softirq() and
113 * in_serving_softirqs() because these counts are only valid when the task
114 * holding softirq_ctrl::lock is running.
115 *
116 * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
117 * the task which is in a softirq disabled section is preempted or blocks.
118 */
119 struct softirq_ctrl {
120 local_lock_t lock;
121 int cnt;
122 };
123
124 static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
125 .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock),
126 };
127
128 /**
129 * local_bh_blocked() - Check for idle whether BH processing is blocked
130 *
131 * Returns false if the per CPU softirq::cnt is 0 otherwise true.
132 *
133 * This is invoked from the idle task to guard against false positive
134 * softirq pending warnings, which would happen when the task which holds
135 * softirq_ctrl::lock was the only running task on the CPU and blocks on
136 * some other lock.
137 */
local_bh_blocked(void)138 bool local_bh_blocked(void)
139 {
140 return __this_cpu_read(softirq_ctrl.cnt) != 0;
141 }
142
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)143 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
144 {
145 unsigned long flags;
146 int newcnt;
147
148 WARN_ON_ONCE(in_hardirq());
149
150 /* First entry of a task into a BH disabled section? */
151 if (!current->softirq_disable_cnt) {
152 if (preemptible()) {
153 local_lock(&softirq_ctrl.lock);
154 /* Required to meet the RCU bottomhalf requirements. */
155 rcu_read_lock();
156 } else {
157 DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
158 }
159 }
160
161 /*
162 * Track the per CPU softirq disabled state. On RT this is per CPU
163 * state to allow preemption of bottom half disabled sections.
164 */
165 newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
166 /*
167 * Reflect the result in the task state to prevent recursion on the
168 * local lock and to make softirq_count() & al work.
169 */
170 current->softirq_disable_cnt = newcnt;
171
172 if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
173 raw_local_irq_save(flags);
174 lockdep_softirqs_off(ip);
175 raw_local_irq_restore(flags);
176 }
177 }
178 EXPORT_SYMBOL(__local_bh_disable_ip);
179
__local_bh_enable(unsigned int cnt,bool unlock)180 static void __local_bh_enable(unsigned int cnt, bool unlock)
181 {
182 unsigned long flags;
183 int newcnt;
184
185 DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
186 this_cpu_read(softirq_ctrl.cnt));
187
188 if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
189 raw_local_irq_save(flags);
190 lockdep_softirqs_on(_RET_IP_);
191 raw_local_irq_restore(flags);
192 }
193
194 newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
195 current->softirq_disable_cnt = newcnt;
196
197 if (!newcnt && unlock) {
198 rcu_read_unlock();
199 local_unlock(&softirq_ctrl.lock);
200 }
201 }
202
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)203 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
204 {
205 bool preempt_on = preemptible();
206 unsigned long flags;
207 u32 pending;
208 int curcnt;
209
210 WARN_ON_ONCE(in_hardirq());
211 lockdep_assert_irqs_enabled();
212
213 local_irq_save(flags);
214 curcnt = __this_cpu_read(softirq_ctrl.cnt);
215
216 /*
217 * If this is not reenabling soft interrupts, no point in trying to
218 * run pending ones.
219 */
220 if (curcnt != cnt)
221 goto out;
222
223 pending = local_softirq_pending();
224 if (!pending)
225 goto out;
226
227 /*
228 * If this was called from non preemptible context, wake up the
229 * softirq daemon.
230 */
231 if (!preempt_on) {
232 wakeup_softirqd();
233 goto out;
234 }
235
236 /*
237 * Adjust softirq count to SOFTIRQ_OFFSET which makes
238 * in_serving_softirq() become true.
239 */
240 cnt = SOFTIRQ_OFFSET;
241 __local_bh_enable(cnt, false);
242 __do_softirq();
243
244 out:
245 __local_bh_enable(cnt, preempt_on);
246 local_irq_restore(flags);
247 }
248 EXPORT_SYMBOL(__local_bh_enable_ip);
249
250 /*
251 * Invoked from ksoftirqd_run() outside of the interrupt disabled section
252 * to acquire the per CPU local lock for reentrancy protection.
253 */
ksoftirqd_run_begin(void)254 static inline void ksoftirqd_run_begin(void)
255 {
256 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
257 local_irq_disable();
258 }
259
260 /* Counterpart to ksoftirqd_run_begin() */
ksoftirqd_run_end(void)261 static inline void ksoftirqd_run_end(void)
262 {
263 __local_bh_enable(SOFTIRQ_OFFSET, true);
264 WARN_ON_ONCE(in_interrupt());
265 local_irq_enable();
266 }
267
softirq_handle_begin(void)268 static inline void softirq_handle_begin(void) { }
softirq_handle_end(void)269 static inline void softirq_handle_end(void) { }
270
should_wake_ksoftirqd(void)271 static inline bool should_wake_ksoftirqd(void)
272 {
273 return !this_cpu_read(softirq_ctrl.cnt);
274 }
275
invoke_softirq(void)276 static inline void invoke_softirq(void)
277 {
278 if (should_wake_ksoftirqd())
279 wakeup_softirqd();
280 }
281
282 #define SCHED_SOFTIRQ_MASK BIT(SCHED_SOFTIRQ)
283
284 /*
285 * flush_smp_call_function_queue() can raise a soft interrupt in a function
286 * call. On RT kernels this is undesired and the only known functionalities
287 * are in the block layer which is disabled on RT, and in the scheduler for
288 * idle load balancing. If soft interrupts get raised which haven't been
289 * raised before the flush, warn if it is not a SCHED_SOFTIRQ so it can be
290 * investigated.
291 */
do_softirq_post_smp_call_flush(unsigned int was_pending)292 void do_softirq_post_smp_call_flush(unsigned int was_pending)
293 {
294 unsigned int is_pending = local_softirq_pending();
295
296 if (unlikely(was_pending != is_pending)) {
297 WARN_ON_ONCE(was_pending != (is_pending & ~SCHED_SOFTIRQ_MASK));
298 invoke_softirq();
299 }
300 }
301
302 #else /* CONFIG_PREEMPT_RT */
303
304 /*
305 * This one is for softirq.c-internal use, where hardirqs are disabled
306 * legitimately:
307 */
308 #ifdef CONFIG_TRACE_IRQFLAGS
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)309 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
310 {
311 unsigned long flags;
312
313 WARN_ON_ONCE(in_hardirq());
314
315 raw_local_irq_save(flags);
316 /*
317 * The preempt tracer hooks into preempt_count_add and will break
318 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
319 * is set and before current->softirq_enabled is cleared.
320 * We must manually increment preempt_count here and manually
321 * call the trace_preempt_off later.
322 */
323 __preempt_count_add(cnt);
324 /*
325 * Were softirqs turned off above:
326 */
327 if (softirq_count() == (cnt & SOFTIRQ_MASK))
328 lockdep_softirqs_off(ip);
329 raw_local_irq_restore(flags);
330
331 if (preempt_count() == cnt) {
332 #ifdef CONFIG_DEBUG_PREEMPT
333 current->preempt_disable_ip = get_lock_parent_ip();
334 #endif
335 trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
336 }
337 }
338 EXPORT_SYMBOL(__local_bh_disable_ip);
339 #endif /* CONFIG_TRACE_IRQFLAGS */
340
__local_bh_enable(unsigned int cnt)341 static void __local_bh_enable(unsigned int cnt)
342 {
343 lockdep_assert_irqs_disabled();
344
345 if (preempt_count() == cnt)
346 trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
347
348 if (softirq_count() == (cnt & SOFTIRQ_MASK))
349 lockdep_softirqs_on(_RET_IP_);
350
351 __preempt_count_sub(cnt);
352 }
353
354 /*
355 * Special-case - softirqs can safely be enabled by __do_softirq(),
356 * without processing still-pending softirqs:
357 */
_local_bh_enable(void)358 void _local_bh_enable(void)
359 {
360 WARN_ON_ONCE(in_hardirq());
361 __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
362 }
363 EXPORT_SYMBOL(_local_bh_enable);
364
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)365 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
366 {
367 WARN_ON_ONCE(in_hardirq());
368 lockdep_assert_irqs_enabled();
369 #ifdef CONFIG_TRACE_IRQFLAGS
370 local_irq_disable();
371 #endif
372 /*
373 * Are softirqs going to be turned on now:
374 */
375 if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
376 lockdep_softirqs_on(ip);
377 /*
378 * Keep preemption disabled until we are done with
379 * softirq processing:
380 */
381 __preempt_count_sub(cnt - 1);
382
383 if (unlikely(!in_interrupt() && local_softirq_pending())) {
384 /*
385 * Run softirq if any pending. And do it in its own stack
386 * as we may be calling this deep in a task call stack already.
387 */
388 do_softirq();
389 }
390
391 preempt_count_dec();
392 #ifdef CONFIG_TRACE_IRQFLAGS
393 local_irq_enable();
394 #endif
395 preempt_check_resched();
396 }
397 EXPORT_SYMBOL(__local_bh_enable_ip);
398
softirq_handle_begin(void)399 static inline void softirq_handle_begin(void)
400 {
401 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
402 }
403
softirq_handle_end(void)404 static inline void softirq_handle_end(void)
405 {
406 __local_bh_enable(SOFTIRQ_OFFSET);
407 WARN_ON_ONCE(in_interrupt());
408 }
409
ksoftirqd_run_begin(void)410 static inline void ksoftirqd_run_begin(void)
411 {
412 local_irq_disable();
413 }
414
ksoftirqd_run_end(void)415 static inline void ksoftirqd_run_end(void)
416 {
417 local_irq_enable();
418 }
419
should_wake_ksoftirqd(void)420 static inline bool should_wake_ksoftirqd(void)
421 {
422 return true;
423 }
424
invoke_softirq(void)425 static inline void invoke_softirq(void)
426 {
427 if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
428 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
429 /*
430 * We can safely execute softirq on the current stack if
431 * it is the irq stack, because it should be near empty
432 * at this stage.
433 */
434 __do_softirq();
435 #else
436 /*
437 * Otherwise, irq_exit() is called on the task stack that can
438 * be potentially deep already. So call softirq in its own stack
439 * to prevent from any overrun.
440 */
441 do_softirq_own_stack();
442 #endif
443 } else {
444 wakeup_softirqd();
445 }
446 }
447
do_softirq(void)448 asmlinkage __visible void do_softirq(void)
449 {
450 __u32 pending;
451 unsigned long flags;
452
453 if (in_interrupt())
454 return;
455
456 local_irq_save(flags);
457
458 pending = local_softirq_pending();
459
460 if (pending)
461 do_softirq_own_stack();
462
463 local_irq_restore(flags);
464 }
465
466 #endif /* !CONFIG_PREEMPT_RT */
467
468 /*
469 * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
470 * but break the loop if need_resched() is set or after 2 ms.
471 * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
472 * certain cases, such as stop_machine(), jiffies may cease to
473 * increment and so we need the MAX_SOFTIRQ_RESTART limit as
474 * well to make sure we eventually return from this method.
475 *
476 * These limits have been established via experimentation.
477 * The two things to balance is latency against fairness -
478 * we want to handle softirqs as soon as possible, but they
479 * should not be able to lock up the box.
480 */
481 #define MAX_SOFTIRQ_TIME msecs_to_jiffies(2)
482 #define MAX_SOFTIRQ_RESTART 10
483
484 #ifdef CONFIG_TRACE_IRQFLAGS
485 /*
486 * When we run softirqs from irq_exit() and thus on the hardirq stack we need
487 * to keep the lockdep irq context tracking as tight as possible in order to
488 * not miss-qualify lock contexts and miss possible deadlocks.
489 */
490
lockdep_softirq_start(void)491 static inline bool lockdep_softirq_start(void)
492 {
493 bool in_hardirq = false;
494
495 if (lockdep_hardirq_context()) {
496 in_hardirq = true;
497 lockdep_hardirq_exit();
498 }
499
500 lockdep_softirq_enter();
501
502 return in_hardirq;
503 }
504
lockdep_softirq_end(bool in_hardirq)505 static inline void lockdep_softirq_end(bool in_hardirq)
506 {
507 lockdep_softirq_exit();
508
509 if (in_hardirq)
510 lockdep_hardirq_enter();
511 }
512 #else
lockdep_softirq_start(void)513 static inline bool lockdep_softirq_start(void) { return false; }
lockdep_softirq_end(bool in_hardirq)514 static inline void lockdep_softirq_end(bool in_hardirq) { }
515 #endif
516
handle_softirqs(bool ksirqd)517 static void handle_softirqs(bool ksirqd)
518 {
519 unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
520 unsigned long old_flags = current->flags;
521 int max_restart = MAX_SOFTIRQ_RESTART;
522 struct softirq_action *h;
523 bool in_hardirq;
524 __u32 pending;
525 int softirq_bit;
526
527 /*
528 * Mask out PF_MEMALLOC as the current task context is borrowed for the
529 * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
530 * again if the socket is related to swapping.
531 */
532 current->flags &= ~PF_MEMALLOC;
533
534 pending = local_softirq_pending();
535
536 softirq_handle_begin();
537 in_hardirq = lockdep_softirq_start();
538 account_softirq_enter(current);
539
540 restart:
541 /* Reset the pending bitmask before enabling irqs */
542 set_softirq_pending(0);
543
544 local_irq_enable();
545
546 h = softirq_vec;
547
548 while ((softirq_bit = ffs(pending))) {
549 unsigned int vec_nr;
550 int prev_count;
551
552 h += softirq_bit - 1;
553
554 vec_nr = h - softirq_vec;
555 prev_count = preempt_count();
556
557 kstat_incr_softirqs_this_cpu(vec_nr);
558
559 trace_softirq_entry(vec_nr);
560 h->action(h);
561 trace_softirq_exit(vec_nr);
562 if (unlikely(prev_count != preempt_count())) {
563 pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
564 vec_nr, softirq_to_name[vec_nr], h->action,
565 prev_count, preempt_count());
566 preempt_count_set(prev_count);
567 }
568 h++;
569 pending >>= softirq_bit;
570 }
571
572 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd)
573 rcu_softirq_qs();
574
575 local_irq_disable();
576
577 pending = local_softirq_pending();
578 if (pending) {
579 if (time_before(jiffies, end) && !need_resched() &&
580 --max_restart)
581 goto restart;
582
583 wakeup_softirqd();
584 }
585
586 account_softirq_exit(current);
587 lockdep_softirq_end(in_hardirq);
588 softirq_handle_end();
589 current_restore_flags(old_flags, PF_MEMALLOC);
590 }
591
__do_softirq(void)592 asmlinkage __visible void __softirq_entry __do_softirq(void)
593 {
594 handle_softirqs(false);
595 }
596
597 /**
598 * irq_enter_rcu - Enter an interrupt context with RCU watching
599 */
irq_enter_rcu(void)600 void irq_enter_rcu(void)
601 {
602 __irq_enter_raw();
603
604 if (tick_nohz_full_cpu(smp_processor_id()) ||
605 (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))
606 tick_irq_enter();
607
608 account_hardirq_enter(current);
609 }
610
611 /**
612 * irq_enter - Enter an interrupt context including RCU update
613 */
irq_enter(void)614 void irq_enter(void)
615 {
616 ct_irq_enter();
617 irq_enter_rcu();
618 }
619
tick_irq_exit(void)620 static inline void tick_irq_exit(void)
621 {
622 #ifdef CONFIG_NO_HZ_COMMON
623 int cpu = smp_processor_id();
624
625 /* Make sure that timer wheel updates are propagated */
626 if ((sched_core_idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
627 if (!in_hardirq())
628 tick_nohz_irq_exit();
629 }
630 #endif
631 }
632
__irq_exit_rcu(void)633 static inline void __irq_exit_rcu(void)
634 {
635 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
636 local_irq_disable();
637 #else
638 lockdep_assert_irqs_disabled();
639 #endif
640 account_hardirq_exit(current);
641 preempt_count_sub(HARDIRQ_OFFSET);
642 if (!in_interrupt() && local_softirq_pending())
643 invoke_softirq();
644
645 tick_irq_exit();
646 }
647
648 /**
649 * irq_exit_rcu() - Exit an interrupt context without updating RCU
650 *
651 * Also processes softirqs if needed and possible.
652 */
irq_exit_rcu(void)653 void irq_exit_rcu(void)
654 {
655 __irq_exit_rcu();
656 /* must be last! */
657 lockdep_hardirq_exit();
658 }
659
660 /**
661 * irq_exit - Exit an interrupt context, update RCU and lockdep
662 *
663 * Also processes softirqs if needed and possible.
664 */
irq_exit(void)665 void irq_exit(void)
666 {
667 __irq_exit_rcu();
668 ct_irq_exit();
669 /* must be last! */
670 lockdep_hardirq_exit();
671 }
672
673 /*
674 * This function must run with irqs disabled!
675 */
raise_softirq_irqoff(unsigned int nr)676 inline void raise_softirq_irqoff(unsigned int nr)
677 {
678 __raise_softirq_irqoff(nr);
679
680 /*
681 * If we're in an interrupt or softirq, we're done
682 * (this also catches softirq-disabled code). We will
683 * actually run the softirq once we return from
684 * the irq or softirq.
685 *
686 * Otherwise we wake up ksoftirqd to make sure we
687 * schedule the softirq soon.
688 */
689 if (!in_interrupt() && should_wake_ksoftirqd())
690 wakeup_softirqd();
691 }
692
raise_softirq(unsigned int nr)693 void raise_softirq(unsigned int nr)
694 {
695 unsigned long flags;
696
697 local_irq_save(flags);
698 raise_softirq_irqoff(nr);
699 local_irq_restore(flags);
700 }
701
__raise_softirq_irqoff(unsigned int nr)702 void __raise_softirq_irqoff(unsigned int nr)
703 {
704 lockdep_assert_irqs_disabled();
705 trace_softirq_raise(nr);
706 or_softirq_pending(1UL << nr);
707 }
708
open_softirq(int nr,void (* action)(struct softirq_action *))709 void open_softirq(int nr, void (*action)(struct softirq_action *))
710 {
711 softirq_vec[nr].action = action;
712 }
713
714 /*
715 * Tasklets
716 */
717 struct tasklet_head {
718 struct tasklet_struct *head;
719 struct tasklet_struct **tail;
720 };
721
722 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
723 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
724
__tasklet_schedule_common(struct tasklet_struct * t,struct tasklet_head __percpu * headp,unsigned int softirq_nr)725 static void __tasklet_schedule_common(struct tasklet_struct *t,
726 struct tasklet_head __percpu *headp,
727 unsigned int softirq_nr)
728 {
729 struct tasklet_head *head;
730 unsigned long flags;
731
732 local_irq_save(flags);
733 head = this_cpu_ptr(headp);
734 t->next = NULL;
735 *head->tail = t;
736 head->tail = &(t->next);
737 raise_softirq_irqoff(softirq_nr);
738 local_irq_restore(flags);
739 }
740
__tasklet_schedule(struct tasklet_struct * t)741 void __tasklet_schedule(struct tasklet_struct *t)
742 {
743 __tasklet_schedule_common(t, &tasklet_vec,
744 TASKLET_SOFTIRQ);
745 }
746 EXPORT_SYMBOL(__tasklet_schedule);
747
__tasklet_hi_schedule(struct tasklet_struct * t)748 void __tasklet_hi_schedule(struct tasklet_struct *t)
749 {
750 __tasklet_schedule_common(t, &tasklet_hi_vec,
751 HI_SOFTIRQ);
752 }
753 EXPORT_SYMBOL(__tasklet_hi_schedule);
754
tasklet_clear_sched(struct tasklet_struct * t)755 static bool tasklet_clear_sched(struct tasklet_struct *t)
756 {
757 if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
758 wake_up_var(&t->state);
759 return true;
760 }
761
762 WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n",
763 t->use_callback ? "callback" : "func",
764 t->use_callback ? (void *)t->callback : (void *)t->func);
765
766 return false;
767 }
768
tasklet_action_common(struct softirq_action * a,struct tasklet_head * tl_head,unsigned int softirq_nr)769 static void tasklet_action_common(struct softirq_action *a,
770 struct tasklet_head *tl_head,
771 unsigned int softirq_nr)
772 {
773 struct tasklet_struct *list;
774
775 local_irq_disable();
776 list = tl_head->head;
777 tl_head->head = NULL;
778 tl_head->tail = &tl_head->head;
779 local_irq_enable();
780
781 while (list) {
782 struct tasklet_struct *t = list;
783
784 list = list->next;
785
786 if (tasklet_trylock(t)) {
787 if (!atomic_read(&t->count)) {
788 if (tasklet_clear_sched(t)) {
789 if (t->use_callback) {
790 trace_tasklet_entry(t, t->callback);
791 t->callback(t);
792 trace_tasklet_exit(t, t->callback);
793 } else {
794 trace_tasklet_entry(t, t->func);
795 t->func(t->data);
796 trace_tasklet_exit(t, t->func);
797 }
798 }
799 tasklet_unlock(t);
800 continue;
801 }
802 tasklet_unlock(t);
803 }
804
805 local_irq_disable();
806 t->next = NULL;
807 *tl_head->tail = t;
808 tl_head->tail = &t->next;
809 __raise_softirq_irqoff(softirq_nr);
810 local_irq_enable();
811 }
812 }
813
tasklet_action(struct softirq_action * a)814 static __latent_entropy void tasklet_action(struct softirq_action *a)
815 {
816 tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
817 }
818
tasklet_hi_action(struct softirq_action * a)819 static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
820 {
821 tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
822 }
823
tasklet_setup(struct tasklet_struct * t,void (* callback)(struct tasklet_struct *))824 void tasklet_setup(struct tasklet_struct *t,
825 void (*callback)(struct tasklet_struct *))
826 {
827 t->next = NULL;
828 t->state = 0;
829 atomic_set(&t->count, 0);
830 t->callback = callback;
831 t->use_callback = true;
832 t->data = 0;
833 }
834 EXPORT_SYMBOL(tasklet_setup);
835
tasklet_init(struct tasklet_struct * t,void (* func)(unsigned long),unsigned long data)836 void tasklet_init(struct tasklet_struct *t,
837 void (*func)(unsigned long), unsigned long data)
838 {
839 t->next = NULL;
840 t->state = 0;
841 atomic_set(&t->count, 0);
842 t->func = func;
843 t->use_callback = false;
844 t->data = data;
845 }
846 EXPORT_SYMBOL(tasklet_init);
847
848 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
849 /*
850 * Do not use in new code. Waiting for tasklets from atomic contexts is
851 * error prone and should be avoided.
852 */
tasklet_unlock_spin_wait(struct tasklet_struct * t)853 void tasklet_unlock_spin_wait(struct tasklet_struct *t)
854 {
855 while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
856 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
857 /*
858 * Prevent a live lock when current preempted soft
859 * interrupt processing or prevents ksoftirqd from
860 * running. If the tasklet runs on a different CPU
861 * then this has no effect other than doing the BH
862 * disable/enable dance for nothing.
863 */
864 local_bh_disable();
865 local_bh_enable();
866 } else {
867 cpu_relax();
868 }
869 }
870 }
871 EXPORT_SYMBOL(tasklet_unlock_spin_wait);
872 #endif
873
tasklet_kill(struct tasklet_struct * t)874 void tasklet_kill(struct tasklet_struct *t)
875 {
876 if (in_interrupt())
877 pr_notice("Attempt to kill tasklet from interrupt\n");
878
879 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
880 wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
881
882 tasklet_unlock_wait(t);
883 tasklet_clear_sched(t);
884 }
885 EXPORT_SYMBOL(tasklet_kill);
886
887 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
tasklet_unlock(struct tasklet_struct * t)888 void tasklet_unlock(struct tasklet_struct *t)
889 {
890 smp_mb__before_atomic();
891 clear_bit(TASKLET_STATE_RUN, &t->state);
892 smp_mb__after_atomic();
893 wake_up_var(&t->state);
894 }
895 EXPORT_SYMBOL_GPL(tasklet_unlock);
896
tasklet_unlock_wait(struct tasklet_struct * t)897 void tasklet_unlock_wait(struct tasklet_struct *t)
898 {
899 wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
900 }
901 EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
902 #endif
903
softirq_init(void)904 void __init softirq_init(void)
905 {
906 int cpu;
907
908 for_each_possible_cpu(cpu) {
909 per_cpu(tasklet_vec, cpu).tail =
910 &per_cpu(tasklet_vec, cpu).head;
911 per_cpu(tasklet_hi_vec, cpu).tail =
912 &per_cpu(tasklet_hi_vec, cpu).head;
913 }
914
915 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
916 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
917 }
918
ksoftirqd_should_run(unsigned int cpu)919 static int ksoftirqd_should_run(unsigned int cpu)
920 {
921 return local_softirq_pending();
922 }
923
run_ksoftirqd(unsigned int cpu)924 static void run_ksoftirqd(unsigned int cpu)
925 {
926 ksoftirqd_run_begin();
927 if (local_softirq_pending()) {
928 /*
929 * We can safely run softirq on inline stack, as we are not deep
930 * in the task stack here.
931 */
932 handle_softirqs(true);
933 ksoftirqd_run_end();
934 cond_resched();
935 return;
936 }
937 ksoftirqd_run_end();
938 }
939
940 #ifdef CONFIG_HOTPLUG_CPU
takeover_tasklets(unsigned int cpu)941 static int takeover_tasklets(unsigned int cpu)
942 {
943 /* CPU is dead, so no lock needed. */
944 local_irq_disable();
945
946 /* Find end, append list for that CPU. */
947 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
948 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
949 __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
950 per_cpu(tasklet_vec, cpu).head = NULL;
951 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
952 }
953 raise_softirq_irqoff(TASKLET_SOFTIRQ);
954
955 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
956 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
957 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
958 per_cpu(tasklet_hi_vec, cpu).head = NULL;
959 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
960 }
961 raise_softirq_irqoff(HI_SOFTIRQ);
962
963 local_irq_enable();
964 return 0;
965 }
966 #else
967 #define takeover_tasklets NULL
968 #endif /* CONFIG_HOTPLUG_CPU */
969
970 static struct smp_hotplug_thread softirq_threads = {
971 .store = &ksoftirqd,
972 .thread_should_run = ksoftirqd_should_run,
973 .thread_fn = run_ksoftirqd,
974 .thread_comm = "ksoftirqd/%u",
975 };
976
spawn_ksoftirqd(void)977 static __init int spawn_ksoftirqd(void)
978 {
979 cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
980 takeover_tasklets);
981 BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
982
983 return 0;
984 }
985 early_initcall(spawn_ksoftirqd);
986
987 /*
988 * [ These __weak aliases are kept in a separate compilation unit, so that
989 * GCC does not inline them incorrectly. ]
990 */
991
early_irq_init(void)992 int __init __weak early_irq_init(void)
993 {
994 return 0;
995 }
996
arch_probe_nr_irqs(void)997 int __init __weak arch_probe_nr_irqs(void)
998 {
999 return NR_IRQS_LEGACY;
1000 }
1001
arch_early_irq_init(void)1002 int __init __weak arch_early_irq_init(void)
1003 {
1004 return 0;
1005 }
1006
arch_dynirq_lower_bound(unsigned int from)1007 unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
1008 {
1009 return from;
1010 }
1011