1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/softirq.c 4 * 5 * Copyright (C) 1992 Linus Torvalds 6 * 7 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) 8 */ 9 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/export.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/interrupt.h> 15 #include <linux/init.h> 16 #include <linux/local_lock.h> 17 #include <linux/mm.h> 18 #include <linux/notifier.h> 19 #include <linux/percpu.h> 20 #include <linux/cpu.h> 21 #include <linux/freezer.h> 22 #include <linux/kthread.h> 23 #include <linux/rcupdate.h> 24 #include <linux/ftrace.h> 25 #include <linux/smp.h> 26 #include <linux/smpboot.h> 27 #include <linux/tick.h> 28 #include <linux/irq.h> 29 #include <linux/wait_bit.h> 30 31 #include <asm/softirq_stack.h> 32 33 #define CREATE_TRACE_POINTS 34 #include <trace/events/irq.h> 35 36 /* 37 - No shared variables, all the data are CPU local. 38 - If a softirq needs serialization, let it serialize itself 39 by its own spinlocks. 40 - Even if softirq is serialized, only local cpu is marked for 41 execution. Hence, we get something sort of weak cpu binding. 42 Though it is still not clear, will it result in better locality 43 or will not. 44 45 Examples: 46 - NET RX softirq. It is multithreaded and does not require 47 any global serialization. 48 - NET TX softirq. It kicks software netdevice queues, hence 49 it is logically serialized per device, but this serialization 50 is invisible to common code. 51 - Tasklets: serialized wrt itself. 52 */ 53 54 #ifndef __ARCH_IRQ_STAT 55 DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); 56 EXPORT_PER_CPU_SYMBOL(irq_stat); 57 #endif 58 59 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; 60 61 DEFINE_PER_CPU(struct task_struct *, ksoftirqd); 62 63 const char * const softirq_to_name[NR_SOFTIRQS] = { 64 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", 65 "TASKLET", "SCHED", "HRTIMER", "RCU" 66 }; 67 68 /* 69 * we cannot loop indefinitely here to avoid userspace starvation, 70 * but we also don't want to introduce a worst case 1/HZ latency 71 * to the pending events, so lets the scheduler to balance 72 * the softirq load for us. 73 */ 74 static void wakeup_softirqd(void) 75 { 76 /* Interrupts are disabled: no need to stop preemption */ 77 struct task_struct *tsk = __this_cpu_read(ksoftirqd); 78 79 if (tsk) 80 wake_up_process(tsk); 81 } 82 83 /* 84 * If ksoftirqd is scheduled, we do not want to process pending softirqs 85 * right now. Let ksoftirqd handle this at its own rate, to get fairness, 86 * unless we're doing some of the synchronous softirqs. 87 */ 88 #define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ)) 89 static bool ksoftirqd_running(unsigned long pending) 90 { 91 struct task_struct *tsk = __this_cpu_read(ksoftirqd); 92 93 if (pending & SOFTIRQ_NOW_MASK) 94 return false; 95 return tsk && task_is_running(tsk) && !__kthread_should_park(tsk); 96 } 97 98 #ifdef CONFIG_TRACE_IRQFLAGS 99 DEFINE_PER_CPU(int, hardirqs_enabled); 100 DEFINE_PER_CPU(int, hardirq_context); 101 EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled); 102 EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context); 103 #endif 104 105 /* 106 * SOFTIRQ_OFFSET usage: 107 * 108 * On !RT kernels 'count' is the preempt counter, on RT kernels this applies 109 * to a per CPU counter and to task::softirqs_disabled_cnt. 110 * 111 * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq 112 * processing. 113 * 114 * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) 115 * on local_bh_disable or local_bh_enable. 116 * 117 * This lets us distinguish between whether we are currently processing 118 * softirq and whether we just have bh disabled. 119 */ 120 #ifdef CONFIG_PREEMPT_RT 121 122 /* 123 * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and 124 * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a 125 * softirq disabled section to be preempted. 126 * 127 * The per task counter is used for softirq_count(), in_softirq() and 128 * in_serving_softirqs() because these counts are only valid when the task 129 * holding softirq_ctrl::lock is running. 130 * 131 * The per CPU counter prevents pointless wakeups of ksoftirqd in case that 132 * the task which is in a softirq disabled section is preempted or blocks. 133 */ 134 struct softirq_ctrl { 135 local_lock_t lock; 136 int cnt; 137 }; 138 139 static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = { 140 .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock), 141 }; 142 143 /** 144 * local_bh_blocked() - Check for idle whether BH processing is blocked 145 * 146 * Returns false if the per CPU softirq::cnt is 0 otherwise true. 147 * 148 * This is invoked from the idle task to guard against false positive 149 * softirq pending warnings, which would happen when the task which holds 150 * softirq_ctrl::lock was the only running task on the CPU and blocks on 151 * some other lock. 152 */ 153 bool local_bh_blocked(void) 154 { 155 return __this_cpu_read(softirq_ctrl.cnt) != 0; 156 } 157 158 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) 159 { 160 unsigned long flags; 161 int newcnt; 162 163 WARN_ON_ONCE(in_hardirq()); 164 165 /* First entry of a task into a BH disabled section? */ 166 if (!current->softirq_disable_cnt) { 167 if (preemptible()) { 168 local_lock(&softirq_ctrl.lock); 169 /* Required to meet the RCU bottomhalf requirements. */ 170 rcu_read_lock(); 171 } else { 172 DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt)); 173 } 174 } 175 176 /* 177 * Track the per CPU softirq disabled state. On RT this is per CPU 178 * state to allow preemption of bottom half disabled sections. 179 */ 180 newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt); 181 /* 182 * Reflect the result in the task state to prevent recursion on the 183 * local lock and to make softirq_count() & al work. 184 */ 185 current->softirq_disable_cnt = newcnt; 186 187 if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { 188 raw_local_irq_save(flags); 189 lockdep_softirqs_off(ip); 190 raw_local_irq_restore(flags); 191 } 192 } 193 EXPORT_SYMBOL(__local_bh_disable_ip); 194 195 static void __local_bh_enable(unsigned int cnt, bool unlock) 196 { 197 unsigned long flags; 198 int newcnt; 199 200 DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != 201 this_cpu_read(softirq_ctrl.cnt)); 202 203 if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) { 204 raw_local_irq_save(flags); 205 lockdep_softirqs_on(_RET_IP_); 206 raw_local_irq_restore(flags); 207 } 208 209 newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt); 210 current->softirq_disable_cnt = newcnt; 211 212 if (!newcnt && unlock) { 213 rcu_read_unlock(); 214 local_unlock(&softirq_ctrl.lock); 215 } 216 } 217 218 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) 219 { 220 bool preempt_on = preemptible(); 221 unsigned long flags; 222 u32 pending; 223 int curcnt; 224 225 WARN_ON_ONCE(in_hardirq()); 226 lockdep_assert_irqs_enabled(); 227 228 local_irq_save(flags); 229 curcnt = __this_cpu_read(softirq_ctrl.cnt); 230 231 /* 232 * If this is not reenabling soft interrupts, no point in trying to 233 * run pending ones. 234 */ 235 if (curcnt != cnt) 236 goto out; 237 238 pending = local_softirq_pending(); 239 if (!pending || ksoftirqd_running(pending)) 240 goto out; 241 242 /* 243 * If this was called from non preemptible context, wake up the 244 * softirq daemon. 245 */ 246 if (!preempt_on) { 247 wakeup_softirqd(); 248 goto out; 249 } 250 251 /* 252 * Adjust softirq count to SOFTIRQ_OFFSET which makes 253 * in_serving_softirq() become true. 254 */ 255 cnt = SOFTIRQ_OFFSET; 256 __local_bh_enable(cnt, false); 257 __do_softirq(); 258 259 out: 260 __local_bh_enable(cnt, preempt_on); 261 local_irq_restore(flags); 262 } 263 EXPORT_SYMBOL(__local_bh_enable_ip); 264 265 /* 266 * Invoked from ksoftirqd_run() outside of the interrupt disabled section 267 * to acquire the per CPU local lock for reentrancy protection. 268 */ 269 static inline void ksoftirqd_run_begin(void) 270 { 271 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); 272 local_irq_disable(); 273 } 274 275 /* Counterpart to ksoftirqd_run_begin() */ 276 static inline void ksoftirqd_run_end(void) 277 { 278 __local_bh_enable(SOFTIRQ_OFFSET, true); 279 WARN_ON_ONCE(in_interrupt()); 280 local_irq_enable(); 281 } 282 283 static inline void softirq_handle_begin(void) { } 284 static inline void softirq_handle_end(void) { } 285 286 static inline bool should_wake_ksoftirqd(void) 287 { 288 return !this_cpu_read(softirq_ctrl.cnt); 289 } 290 291 static inline void invoke_softirq(void) 292 { 293 if (should_wake_ksoftirqd()) 294 wakeup_softirqd(); 295 } 296 297 /* 298 * flush_smp_call_function_queue() can raise a soft interrupt in a function 299 * call. On RT kernels this is undesired and the only known functionality 300 * in the block layer which does this is disabled on RT. If soft interrupts 301 * get raised which haven't been raised before the flush, warn so it can be 302 * investigated. 303 */ 304 void do_softirq_post_smp_call_flush(unsigned int was_pending) 305 { 306 if (WARN_ON_ONCE(was_pending != local_softirq_pending())) 307 invoke_softirq(); 308 } 309 310 #else /* CONFIG_PREEMPT_RT */ 311 312 /* 313 * This one is for softirq.c-internal use, where hardirqs are disabled 314 * legitimately: 315 */ 316 #ifdef CONFIG_TRACE_IRQFLAGS 317 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) 318 { 319 unsigned long flags; 320 321 WARN_ON_ONCE(in_hardirq()); 322 323 raw_local_irq_save(flags); 324 /* 325 * The preempt tracer hooks into preempt_count_add and will break 326 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET 327 * is set and before current->softirq_enabled is cleared. 328 * We must manually increment preempt_count here and manually 329 * call the trace_preempt_off later. 330 */ 331 __preempt_count_add(cnt); 332 /* 333 * Were softirqs turned off above: 334 */ 335 if (softirq_count() == (cnt & SOFTIRQ_MASK)) 336 lockdep_softirqs_off(ip); 337 raw_local_irq_restore(flags); 338 339 if (preempt_count() == cnt) { 340 #ifdef CONFIG_DEBUG_PREEMPT 341 current->preempt_disable_ip = get_lock_parent_ip(); 342 #endif 343 trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip()); 344 } 345 } 346 EXPORT_SYMBOL(__local_bh_disable_ip); 347 #endif /* CONFIG_TRACE_IRQFLAGS */ 348 349 static void __local_bh_enable(unsigned int cnt) 350 { 351 lockdep_assert_irqs_disabled(); 352 353 if (preempt_count() == cnt) 354 trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip()); 355 356 if (softirq_count() == (cnt & SOFTIRQ_MASK)) 357 lockdep_softirqs_on(_RET_IP_); 358 359 __preempt_count_sub(cnt); 360 } 361 362 /* 363 * Special-case - softirqs can safely be enabled by __do_softirq(), 364 * without processing still-pending softirqs: 365 */ 366 void _local_bh_enable(void) 367 { 368 WARN_ON_ONCE(in_hardirq()); 369 __local_bh_enable(SOFTIRQ_DISABLE_OFFSET); 370 } 371 EXPORT_SYMBOL(_local_bh_enable); 372 373 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) 374 { 375 WARN_ON_ONCE(in_hardirq()); 376 lockdep_assert_irqs_enabled(); 377 #ifdef CONFIG_TRACE_IRQFLAGS 378 local_irq_disable(); 379 #endif 380 /* 381 * Are softirqs going to be turned on now: 382 */ 383 if (softirq_count() == SOFTIRQ_DISABLE_OFFSET) 384 lockdep_softirqs_on(ip); 385 /* 386 * Keep preemption disabled until we are done with 387 * softirq processing: 388 */ 389 __preempt_count_sub(cnt - 1); 390 391 if (unlikely(!in_interrupt() && local_softirq_pending())) { 392 /* 393 * Run softirq if any pending. And do it in its own stack 394 * as we may be calling this deep in a task call stack already. 395 */ 396 do_softirq(); 397 } 398 399 preempt_count_dec(); 400 #ifdef CONFIG_TRACE_IRQFLAGS 401 local_irq_enable(); 402 #endif 403 preempt_check_resched(); 404 } 405 EXPORT_SYMBOL(__local_bh_enable_ip); 406 407 static inline void softirq_handle_begin(void) 408 { 409 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); 410 } 411 412 static inline void softirq_handle_end(void) 413 { 414 __local_bh_enable(SOFTIRQ_OFFSET); 415 WARN_ON_ONCE(in_interrupt()); 416 } 417 418 static inline void ksoftirqd_run_begin(void) 419 { 420 local_irq_disable(); 421 } 422 423 static inline void ksoftirqd_run_end(void) 424 { 425 local_irq_enable(); 426 } 427 428 static inline bool should_wake_ksoftirqd(void) 429 { 430 return true; 431 } 432 433 static inline void invoke_softirq(void) 434 { 435 if (ksoftirqd_running(local_softirq_pending())) 436 return; 437 438 if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) { 439 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK 440 /* 441 * We can safely execute softirq on the current stack if 442 * it is the irq stack, because it should be near empty 443 * at this stage. 444 */ 445 __do_softirq(); 446 #else 447 /* 448 * Otherwise, irq_exit() is called on the task stack that can 449 * be potentially deep already. So call softirq in its own stack 450 * to prevent from any overrun. 451 */ 452 do_softirq_own_stack(); 453 #endif 454 } else { 455 wakeup_softirqd(); 456 } 457 } 458 459 asmlinkage __visible void do_softirq(void) 460 { 461 __u32 pending; 462 unsigned long flags; 463 464 if (in_interrupt()) 465 return; 466 467 local_irq_save(flags); 468 469 pending = local_softirq_pending(); 470 471 if (pending && !ksoftirqd_running(pending)) 472 do_softirq_own_stack(); 473 474 local_irq_restore(flags); 475 } 476 477 #endif /* !CONFIG_PREEMPT_RT */ 478 479 /* 480 * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, 481 * but break the loop if need_resched() is set or after 2 ms. 482 * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in 483 * certain cases, such as stop_machine(), jiffies may cease to 484 * increment and so we need the MAX_SOFTIRQ_RESTART limit as 485 * well to make sure we eventually return from this method. 486 * 487 * These limits have been established via experimentation. 488 * The two things to balance is latency against fairness - 489 * we want to handle softirqs as soon as possible, but they 490 * should not be able to lock up the box. 491 */ 492 #define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) 493 #define MAX_SOFTIRQ_RESTART 10 494 495 #ifdef CONFIG_TRACE_IRQFLAGS 496 /* 497 * When we run softirqs from irq_exit() and thus on the hardirq stack we need 498 * to keep the lockdep irq context tracking as tight as possible in order to 499 * not miss-qualify lock contexts and miss possible deadlocks. 500 */ 501 502 static inline bool lockdep_softirq_start(void) 503 { 504 bool in_hardirq = false; 505 506 if (lockdep_hardirq_context()) { 507 in_hardirq = true; 508 lockdep_hardirq_exit(); 509 } 510 511 lockdep_softirq_enter(); 512 513 return in_hardirq; 514 } 515 516 static inline void lockdep_softirq_end(bool in_hardirq) 517 { 518 lockdep_softirq_exit(); 519 520 if (in_hardirq) 521 lockdep_hardirq_enter(); 522 } 523 #else 524 static inline bool lockdep_softirq_start(void) { return false; } 525 static inline void lockdep_softirq_end(bool in_hardirq) { } 526 #endif 527 528 asmlinkage __visible void __softirq_entry __do_softirq(void) 529 { 530 unsigned long end = jiffies + MAX_SOFTIRQ_TIME; 531 unsigned long old_flags = current->flags; 532 int max_restart = MAX_SOFTIRQ_RESTART; 533 struct softirq_action *h; 534 bool in_hardirq; 535 __u32 pending; 536 int softirq_bit; 537 538 /* 539 * Mask out PF_MEMALLOC as the current task context is borrowed for the 540 * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC 541 * again if the socket is related to swapping. 542 */ 543 current->flags &= ~PF_MEMALLOC; 544 545 pending = local_softirq_pending(); 546 547 softirq_handle_begin(); 548 in_hardirq = lockdep_softirq_start(); 549 account_softirq_enter(current); 550 551 restart: 552 /* Reset the pending bitmask before enabling irqs */ 553 set_softirq_pending(0); 554 555 local_irq_enable(); 556 557 h = softirq_vec; 558 559 while ((softirq_bit = ffs(pending))) { 560 unsigned int vec_nr; 561 int prev_count; 562 563 h += softirq_bit - 1; 564 565 vec_nr = h - softirq_vec; 566 prev_count = preempt_count(); 567 568 kstat_incr_softirqs_this_cpu(vec_nr); 569 570 trace_softirq_entry(vec_nr); 571 h->action(h); 572 trace_softirq_exit(vec_nr); 573 if (unlikely(prev_count != preempt_count())) { 574 pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", 575 vec_nr, softirq_to_name[vec_nr], h->action, 576 prev_count, preempt_count()); 577 preempt_count_set(prev_count); 578 } 579 h++; 580 pending >>= softirq_bit; 581 } 582 583 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && 584 __this_cpu_read(ksoftirqd) == current) 585 rcu_softirq_qs(); 586 587 local_irq_disable(); 588 589 pending = local_softirq_pending(); 590 if (pending) { 591 if (time_before(jiffies, end) && !need_resched() && 592 --max_restart) 593 goto restart; 594 595 wakeup_softirqd(); 596 } 597 598 account_softirq_exit(current); 599 lockdep_softirq_end(in_hardirq); 600 softirq_handle_end(); 601 current_restore_flags(old_flags, PF_MEMALLOC); 602 } 603 604 /** 605 * irq_enter_rcu - Enter an interrupt context with RCU watching 606 */ 607 void irq_enter_rcu(void) 608 { 609 __irq_enter_raw(); 610 611 if (tick_nohz_full_cpu(smp_processor_id()) || 612 (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET))) 613 tick_irq_enter(); 614 615 account_hardirq_enter(current); 616 } 617 618 /** 619 * irq_enter - Enter an interrupt context including RCU update 620 */ 621 void irq_enter(void) 622 { 623 ct_irq_enter(); 624 irq_enter_rcu(); 625 } 626 627 static inline void tick_irq_exit(void) 628 { 629 #ifdef CONFIG_NO_HZ_COMMON 630 int cpu = smp_processor_id(); 631 632 /* Make sure that timer wheel updates are propagated */ 633 if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) { 634 if (!in_hardirq()) 635 tick_nohz_irq_exit(); 636 } 637 #endif 638 } 639 640 static inline void __irq_exit_rcu(void) 641 { 642 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED 643 local_irq_disable(); 644 #else 645 lockdep_assert_irqs_disabled(); 646 #endif 647 account_hardirq_exit(current); 648 preempt_count_sub(HARDIRQ_OFFSET); 649 if (!in_interrupt() && local_softirq_pending()) 650 invoke_softirq(); 651 652 tick_irq_exit(); 653 } 654 655 /** 656 * irq_exit_rcu() - Exit an interrupt context without updating RCU 657 * 658 * Also processes softirqs if needed and possible. 659 */ 660 void irq_exit_rcu(void) 661 { 662 __irq_exit_rcu(); 663 /* must be last! */ 664 lockdep_hardirq_exit(); 665 } 666 667 /** 668 * irq_exit - Exit an interrupt context, update RCU and lockdep 669 * 670 * Also processes softirqs if needed and possible. 671 */ 672 void irq_exit(void) 673 { 674 __irq_exit_rcu(); 675 ct_irq_exit(); 676 /* must be last! */ 677 lockdep_hardirq_exit(); 678 } 679 680 /* 681 * This function must run with irqs disabled! 682 */ 683 inline void raise_softirq_irqoff(unsigned int nr) 684 { 685 __raise_softirq_irqoff(nr); 686 687 /* 688 * If we're in an interrupt or softirq, we're done 689 * (this also catches softirq-disabled code). We will 690 * actually run the softirq once we return from 691 * the irq or softirq. 692 * 693 * Otherwise we wake up ksoftirqd to make sure we 694 * schedule the softirq soon. 695 */ 696 if (!in_interrupt() && should_wake_ksoftirqd()) 697 wakeup_softirqd(); 698 } 699 700 void raise_softirq(unsigned int nr) 701 { 702 unsigned long flags; 703 704 local_irq_save(flags); 705 raise_softirq_irqoff(nr); 706 local_irq_restore(flags); 707 } 708 709 void __raise_softirq_irqoff(unsigned int nr) 710 { 711 lockdep_assert_irqs_disabled(); 712 trace_softirq_raise(nr); 713 or_softirq_pending(1UL << nr); 714 } 715 716 void open_softirq(int nr, void (*action)(struct softirq_action *)) 717 { 718 softirq_vec[nr].action = action; 719 } 720 721 /* 722 * Tasklets 723 */ 724 struct tasklet_head { 725 struct tasklet_struct *head; 726 struct tasklet_struct **tail; 727 }; 728 729 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); 730 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); 731 732 static void __tasklet_schedule_common(struct tasklet_struct *t, 733 struct tasklet_head __percpu *headp, 734 unsigned int softirq_nr) 735 { 736 struct tasklet_head *head; 737 unsigned long flags; 738 739 local_irq_save(flags); 740 head = this_cpu_ptr(headp); 741 t->next = NULL; 742 *head->tail = t; 743 head->tail = &(t->next); 744 raise_softirq_irqoff(softirq_nr); 745 local_irq_restore(flags); 746 } 747 748 void __tasklet_schedule(struct tasklet_struct *t) 749 { 750 __tasklet_schedule_common(t, &tasklet_vec, 751 TASKLET_SOFTIRQ); 752 } 753 EXPORT_SYMBOL(__tasklet_schedule); 754 755 void __tasklet_hi_schedule(struct tasklet_struct *t) 756 { 757 __tasklet_schedule_common(t, &tasklet_hi_vec, 758 HI_SOFTIRQ); 759 } 760 EXPORT_SYMBOL(__tasklet_hi_schedule); 761 762 static bool tasklet_clear_sched(struct tasklet_struct *t) 763 { 764 if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) { 765 wake_up_var(&t->state); 766 return true; 767 } 768 769 WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n", 770 t->use_callback ? "callback" : "func", 771 t->use_callback ? (void *)t->callback : (void *)t->func); 772 773 return false; 774 } 775 776 static void tasklet_action_common(struct softirq_action *a, 777 struct tasklet_head *tl_head, 778 unsigned int softirq_nr) 779 { 780 struct tasklet_struct *list; 781 782 local_irq_disable(); 783 list = tl_head->head; 784 tl_head->head = NULL; 785 tl_head->tail = &tl_head->head; 786 local_irq_enable(); 787 788 while (list) { 789 struct tasklet_struct *t = list; 790 791 list = list->next; 792 793 if (tasklet_trylock(t)) { 794 if (!atomic_read(&t->count)) { 795 if (tasklet_clear_sched(t)) { 796 if (t->use_callback) { 797 trace_tasklet_entry(t, t->callback); 798 t->callback(t); 799 trace_tasklet_exit(t, t->callback); 800 } else { 801 trace_tasklet_entry(t, t->func); 802 t->func(t->data); 803 trace_tasklet_exit(t, t->func); 804 } 805 } 806 tasklet_unlock(t); 807 continue; 808 } 809 tasklet_unlock(t); 810 } 811 812 local_irq_disable(); 813 t->next = NULL; 814 *tl_head->tail = t; 815 tl_head->tail = &t->next; 816 __raise_softirq_irqoff(softirq_nr); 817 local_irq_enable(); 818 } 819 } 820 821 static __latent_entropy void tasklet_action(struct softirq_action *a) 822 { 823 tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); 824 } 825 826 static __latent_entropy void tasklet_hi_action(struct softirq_action *a) 827 { 828 tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); 829 } 830 831 void tasklet_setup(struct tasklet_struct *t, 832 void (*callback)(struct tasklet_struct *)) 833 { 834 t->next = NULL; 835 t->state = 0; 836 atomic_set(&t->count, 0); 837 t->callback = callback; 838 t->use_callback = true; 839 t->data = 0; 840 } 841 EXPORT_SYMBOL(tasklet_setup); 842 843 void tasklet_init(struct tasklet_struct *t, 844 void (*func)(unsigned long), unsigned long data) 845 { 846 t->next = NULL; 847 t->state = 0; 848 atomic_set(&t->count, 0); 849 t->func = func; 850 t->use_callback = false; 851 t->data = data; 852 } 853 EXPORT_SYMBOL(tasklet_init); 854 855 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) 856 /* 857 * Do not use in new code. Waiting for tasklets from atomic contexts is 858 * error prone and should be avoided. 859 */ 860 void tasklet_unlock_spin_wait(struct tasklet_struct *t) 861 { 862 while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { 863 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 864 /* 865 * Prevent a live lock when current preempted soft 866 * interrupt processing or prevents ksoftirqd from 867 * running. If the tasklet runs on a different CPU 868 * then this has no effect other than doing the BH 869 * disable/enable dance for nothing. 870 */ 871 local_bh_disable(); 872 local_bh_enable(); 873 } else { 874 cpu_relax(); 875 } 876 } 877 } 878 EXPORT_SYMBOL(tasklet_unlock_spin_wait); 879 #endif 880 881 void tasklet_kill(struct tasklet_struct *t) 882 { 883 if (in_interrupt()) 884 pr_notice("Attempt to kill tasklet from interrupt\n"); 885 886 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) 887 wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state)); 888 889 tasklet_unlock_wait(t); 890 tasklet_clear_sched(t); 891 } 892 EXPORT_SYMBOL(tasklet_kill); 893 894 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) 895 void tasklet_unlock(struct tasklet_struct *t) 896 { 897 smp_mb__before_atomic(); 898 clear_bit(TASKLET_STATE_RUN, &t->state); 899 smp_mb__after_atomic(); 900 wake_up_var(&t->state); 901 } 902 EXPORT_SYMBOL_GPL(tasklet_unlock); 903 904 void tasklet_unlock_wait(struct tasklet_struct *t) 905 { 906 wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state)); 907 } 908 EXPORT_SYMBOL_GPL(tasklet_unlock_wait); 909 #endif 910 911 void __init softirq_init(void) 912 { 913 int cpu; 914 915 for_each_possible_cpu(cpu) { 916 per_cpu(tasklet_vec, cpu).tail = 917 &per_cpu(tasklet_vec, cpu).head; 918 per_cpu(tasklet_hi_vec, cpu).tail = 919 &per_cpu(tasklet_hi_vec, cpu).head; 920 } 921 922 open_softirq(TASKLET_SOFTIRQ, tasklet_action); 923 open_softirq(HI_SOFTIRQ, tasklet_hi_action); 924 } 925 926 static int ksoftirqd_should_run(unsigned int cpu) 927 { 928 return local_softirq_pending(); 929 } 930 931 static void run_ksoftirqd(unsigned int cpu) 932 { 933 ksoftirqd_run_begin(); 934 if (local_softirq_pending()) { 935 /* 936 * We can safely run softirq on inline stack, as we are not deep 937 * in the task stack here. 938 */ 939 __do_softirq(); 940 ksoftirqd_run_end(); 941 cond_resched(); 942 return; 943 } 944 ksoftirqd_run_end(); 945 } 946 947 #ifdef CONFIG_HOTPLUG_CPU 948 static int takeover_tasklets(unsigned int cpu) 949 { 950 /* CPU is dead, so no lock needed. */ 951 local_irq_disable(); 952 953 /* Find end, append list for that CPU. */ 954 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { 955 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head; 956 __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail); 957 per_cpu(tasklet_vec, cpu).head = NULL; 958 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; 959 } 960 raise_softirq_irqoff(TASKLET_SOFTIRQ); 961 962 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) { 963 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head; 964 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail); 965 per_cpu(tasklet_hi_vec, cpu).head = NULL; 966 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; 967 } 968 raise_softirq_irqoff(HI_SOFTIRQ); 969 970 local_irq_enable(); 971 return 0; 972 } 973 #else 974 #define takeover_tasklets NULL 975 #endif /* CONFIG_HOTPLUG_CPU */ 976 977 static struct smp_hotplug_thread softirq_threads = { 978 .store = &ksoftirqd, 979 .thread_should_run = ksoftirqd_should_run, 980 .thread_fn = run_ksoftirqd, 981 .thread_comm = "ksoftirqd/%u", 982 }; 983 984 static __init int spawn_ksoftirqd(void) 985 { 986 cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, 987 takeover_tasklets); 988 BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); 989 990 return 0; 991 } 992 early_initcall(spawn_ksoftirqd); 993 994 /* 995 * [ These __weak aliases are kept in a separate compilation unit, so that 996 * GCC does not inline them incorrectly. ] 997 */ 998 999 int __init __weak early_irq_init(void) 1000 { 1001 return 0; 1002 } 1003 1004 int __init __weak arch_probe_nr_irqs(void) 1005 { 1006 return NR_IRQS_LEGACY; 1007 } 1008 1009 int __init __weak arch_early_irq_init(void) 1010 { 1011 return 0; 1012 } 1013 1014 unsigned int __weak arch_dynirq_lower_bound(unsigned int from) 1015 { 1016 return from; 1017 } 1018