xref: /openbmc/linux/kernel/softirq.c (revision 384740dc)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  *	Distribute under GPLv2.
7  *
8  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  */
10 
11 #include <linux/module.h>
12 #include <linux/kernel_stat.h>
13 #include <linux/interrupt.h>
14 #include <linux/init.h>
15 #include <linux/mm.h>
16 #include <linux/notifier.h>
17 #include <linux/percpu.h>
18 #include <linux/cpu.h>
19 #include <linux/freezer.h>
20 #include <linux/kthread.h>
21 #include <linux/rcupdate.h>
22 #include <linux/smp.h>
23 #include <linux/tick.h>
24 
25 #include <asm/irq.h>
26 /*
27    - No shared variables, all the data are CPU local.
28    - If a softirq needs serialization, let it serialize itself
29      by its own spinlocks.
30    - Even if softirq is serialized, only local cpu is marked for
31      execution. Hence, we get something sort of weak cpu binding.
32      Though it is still not clear, will it result in better locality
33      or will not.
34 
35    Examples:
36    - NET RX softirq. It is multithreaded and does not require
37      any global serialization.
38    - NET TX softirq. It kicks software netdevice queues, hence
39      it is logically serialized per device, but this serialization
40      is invisible to common code.
41    - Tasklets: serialized wrt itself.
42  */
43 
44 #ifndef __ARCH_IRQ_STAT
45 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
46 EXPORT_SYMBOL(irq_stat);
47 #endif
48 
49 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
50 
51 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
52 
53 /*
54  * we cannot loop indefinitely here to avoid userspace starvation,
55  * but we also don't want to introduce a worst case 1/HZ latency
56  * to the pending events, so lets the scheduler to balance
57  * the softirq load for us.
58  */
59 static inline void wakeup_softirqd(void)
60 {
61 	/* Interrupts are disabled: no need to stop preemption */
62 	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
63 
64 	if (tsk && tsk->state != TASK_RUNNING)
65 		wake_up_process(tsk);
66 }
67 
68 /*
69  * This one is for softirq.c-internal use,
70  * where hardirqs are disabled legitimately:
71  */
72 #ifdef CONFIG_TRACE_IRQFLAGS
73 static void __local_bh_disable(unsigned long ip)
74 {
75 	unsigned long flags;
76 
77 	WARN_ON_ONCE(in_irq());
78 
79 	raw_local_irq_save(flags);
80 	add_preempt_count(SOFTIRQ_OFFSET);
81 	/*
82 	 * Were softirqs turned off above:
83 	 */
84 	if (softirq_count() == SOFTIRQ_OFFSET)
85 		trace_softirqs_off(ip);
86 	raw_local_irq_restore(flags);
87 }
88 #else /* !CONFIG_TRACE_IRQFLAGS */
89 static inline void __local_bh_disable(unsigned long ip)
90 {
91 	add_preempt_count(SOFTIRQ_OFFSET);
92 	barrier();
93 }
94 #endif /* CONFIG_TRACE_IRQFLAGS */
95 
96 void local_bh_disable(void)
97 {
98 	__local_bh_disable((unsigned long)__builtin_return_address(0));
99 }
100 
101 EXPORT_SYMBOL(local_bh_disable);
102 
103 void __local_bh_enable(void)
104 {
105 	WARN_ON_ONCE(in_irq());
106 
107 	/*
108 	 * softirqs should never be enabled by __local_bh_enable(),
109 	 * it always nests inside local_bh_enable() sections:
110 	 */
111 	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
112 
113 	sub_preempt_count(SOFTIRQ_OFFSET);
114 }
115 EXPORT_SYMBOL_GPL(__local_bh_enable);
116 
117 /*
118  * Special-case - softirqs can safely be enabled in
119  * cond_resched_softirq(), or by __do_softirq(),
120  * without processing still-pending softirqs:
121  */
122 void _local_bh_enable(void)
123 {
124 	WARN_ON_ONCE(in_irq());
125 	WARN_ON_ONCE(!irqs_disabled());
126 
127 	if (softirq_count() == SOFTIRQ_OFFSET)
128 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
129 	sub_preempt_count(SOFTIRQ_OFFSET);
130 }
131 
132 EXPORT_SYMBOL(_local_bh_enable);
133 
134 static inline void _local_bh_enable_ip(unsigned long ip)
135 {
136 	WARN_ON_ONCE(in_irq() || irqs_disabled());
137 #ifdef CONFIG_TRACE_IRQFLAGS
138 	local_irq_disable();
139 #endif
140 	/*
141 	 * Are softirqs going to be turned on now:
142 	 */
143 	if (softirq_count() == SOFTIRQ_OFFSET)
144 		trace_softirqs_on(ip);
145 	/*
146 	 * Keep preemption disabled until we are done with
147 	 * softirq processing:
148  	 */
149  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
150 
151 	if (unlikely(!in_interrupt() && local_softirq_pending()))
152 		do_softirq();
153 
154 	dec_preempt_count();
155 #ifdef CONFIG_TRACE_IRQFLAGS
156 	local_irq_enable();
157 #endif
158 	preempt_check_resched();
159 }
160 
161 void local_bh_enable(void)
162 {
163 	_local_bh_enable_ip((unsigned long)__builtin_return_address(0));
164 }
165 EXPORT_SYMBOL(local_bh_enable);
166 
167 void local_bh_enable_ip(unsigned long ip)
168 {
169 	_local_bh_enable_ip(ip);
170 }
171 EXPORT_SYMBOL(local_bh_enable_ip);
172 
173 /*
174  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
175  * and we fall back to softirqd after that.
176  *
177  * This number has been established via experimentation.
178  * The two things to balance is latency against fairness -
179  * we want to handle softirqs as soon as possible, but they
180  * should not be able to lock up the box.
181  */
182 #define MAX_SOFTIRQ_RESTART 10
183 
184 asmlinkage void __do_softirq(void)
185 {
186 	struct softirq_action *h;
187 	__u32 pending;
188 	int max_restart = MAX_SOFTIRQ_RESTART;
189 	int cpu;
190 
191 	pending = local_softirq_pending();
192 	account_system_vtime(current);
193 
194 	__local_bh_disable((unsigned long)__builtin_return_address(0));
195 	trace_softirq_enter();
196 
197 	cpu = smp_processor_id();
198 restart:
199 	/* Reset the pending bitmask before enabling irqs */
200 	set_softirq_pending(0);
201 
202 	local_irq_enable();
203 
204 	h = softirq_vec;
205 
206 	do {
207 		if (pending & 1) {
208 			h->action(h);
209 			rcu_bh_qsctr_inc(cpu);
210 		}
211 		h++;
212 		pending >>= 1;
213 	} while (pending);
214 
215 	local_irq_disable();
216 
217 	pending = local_softirq_pending();
218 	if (pending && --max_restart)
219 		goto restart;
220 
221 	if (pending)
222 		wakeup_softirqd();
223 
224 	trace_softirq_exit();
225 
226 	account_system_vtime(current);
227 	_local_bh_enable();
228 }
229 
230 #ifndef __ARCH_HAS_DO_SOFTIRQ
231 
232 asmlinkage void do_softirq(void)
233 {
234 	__u32 pending;
235 	unsigned long flags;
236 
237 	if (in_interrupt())
238 		return;
239 
240 	local_irq_save(flags);
241 
242 	pending = local_softirq_pending();
243 
244 	if (pending)
245 		__do_softirq();
246 
247 	local_irq_restore(flags);
248 }
249 
250 #endif
251 
252 /*
253  * Enter an interrupt context.
254  */
255 void irq_enter(void)
256 {
257 #ifdef CONFIG_NO_HZ
258 	int cpu = smp_processor_id();
259 	if (idle_cpu(cpu) && !in_interrupt())
260 		tick_nohz_stop_idle(cpu);
261 #endif
262 	__irq_enter();
263 #ifdef CONFIG_NO_HZ
264 	if (idle_cpu(cpu))
265 		tick_nohz_update_jiffies();
266 #endif
267 }
268 
269 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
270 # define invoke_softirq()	__do_softirq()
271 #else
272 # define invoke_softirq()	do_softirq()
273 #endif
274 
275 /*
276  * Exit an interrupt context. Process softirqs if needed and possible:
277  */
278 void irq_exit(void)
279 {
280 	account_system_vtime(current);
281 	trace_hardirq_exit();
282 	sub_preempt_count(IRQ_EXIT_OFFSET);
283 	if (!in_interrupt() && local_softirq_pending())
284 		invoke_softirq();
285 
286 #ifdef CONFIG_NO_HZ
287 	/* Make sure that timer wheel updates are propagated */
288 	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
289 		tick_nohz_stop_sched_tick(0);
290 	rcu_irq_exit();
291 #endif
292 	preempt_enable_no_resched();
293 }
294 
295 /*
296  * This function must run with irqs disabled!
297  */
298 inline void raise_softirq_irqoff(unsigned int nr)
299 {
300 	__raise_softirq_irqoff(nr);
301 
302 	/*
303 	 * If we're in an interrupt or softirq, we're done
304 	 * (this also catches softirq-disabled code). We will
305 	 * actually run the softirq once we return from
306 	 * the irq or softirq.
307 	 *
308 	 * Otherwise we wake up ksoftirqd to make sure we
309 	 * schedule the softirq soon.
310 	 */
311 	if (!in_interrupt())
312 		wakeup_softirqd();
313 }
314 
315 void raise_softirq(unsigned int nr)
316 {
317 	unsigned long flags;
318 
319 	local_irq_save(flags);
320 	raise_softirq_irqoff(nr);
321 	local_irq_restore(flags);
322 }
323 
324 void open_softirq(int nr, void (*action)(struct softirq_action *))
325 {
326 	softirq_vec[nr].action = action;
327 }
328 
329 /* Tasklets */
330 struct tasklet_head
331 {
332 	struct tasklet_struct *head;
333 	struct tasklet_struct **tail;
334 };
335 
336 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
337 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
338 
339 void __tasklet_schedule(struct tasklet_struct *t)
340 {
341 	unsigned long flags;
342 
343 	local_irq_save(flags);
344 	t->next = NULL;
345 	*__get_cpu_var(tasklet_vec).tail = t;
346 	__get_cpu_var(tasklet_vec).tail = &(t->next);
347 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
348 	local_irq_restore(flags);
349 }
350 
351 EXPORT_SYMBOL(__tasklet_schedule);
352 
353 void __tasklet_hi_schedule(struct tasklet_struct *t)
354 {
355 	unsigned long flags;
356 
357 	local_irq_save(flags);
358 	t->next = NULL;
359 	*__get_cpu_var(tasklet_hi_vec).tail = t;
360 	__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
361 	raise_softirq_irqoff(HI_SOFTIRQ);
362 	local_irq_restore(flags);
363 }
364 
365 EXPORT_SYMBOL(__tasklet_hi_schedule);
366 
367 static void tasklet_action(struct softirq_action *a)
368 {
369 	struct tasklet_struct *list;
370 
371 	local_irq_disable();
372 	list = __get_cpu_var(tasklet_vec).head;
373 	__get_cpu_var(tasklet_vec).head = NULL;
374 	__get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
375 	local_irq_enable();
376 
377 	while (list) {
378 		struct tasklet_struct *t = list;
379 
380 		list = list->next;
381 
382 		if (tasklet_trylock(t)) {
383 			if (!atomic_read(&t->count)) {
384 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
385 					BUG();
386 				t->func(t->data);
387 				tasklet_unlock(t);
388 				continue;
389 			}
390 			tasklet_unlock(t);
391 		}
392 
393 		local_irq_disable();
394 		t->next = NULL;
395 		*__get_cpu_var(tasklet_vec).tail = t;
396 		__get_cpu_var(tasklet_vec).tail = &(t->next);
397 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
398 		local_irq_enable();
399 	}
400 }
401 
402 static void tasklet_hi_action(struct softirq_action *a)
403 {
404 	struct tasklet_struct *list;
405 
406 	local_irq_disable();
407 	list = __get_cpu_var(tasklet_hi_vec).head;
408 	__get_cpu_var(tasklet_hi_vec).head = NULL;
409 	__get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
410 	local_irq_enable();
411 
412 	while (list) {
413 		struct tasklet_struct *t = list;
414 
415 		list = list->next;
416 
417 		if (tasklet_trylock(t)) {
418 			if (!atomic_read(&t->count)) {
419 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
420 					BUG();
421 				t->func(t->data);
422 				tasklet_unlock(t);
423 				continue;
424 			}
425 			tasklet_unlock(t);
426 		}
427 
428 		local_irq_disable();
429 		t->next = NULL;
430 		*__get_cpu_var(tasklet_hi_vec).tail = t;
431 		__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
432 		__raise_softirq_irqoff(HI_SOFTIRQ);
433 		local_irq_enable();
434 	}
435 }
436 
437 
438 void tasklet_init(struct tasklet_struct *t,
439 		  void (*func)(unsigned long), unsigned long data)
440 {
441 	t->next = NULL;
442 	t->state = 0;
443 	atomic_set(&t->count, 0);
444 	t->func = func;
445 	t->data = data;
446 }
447 
448 EXPORT_SYMBOL(tasklet_init);
449 
450 void tasklet_kill(struct tasklet_struct *t)
451 {
452 	if (in_interrupt())
453 		printk("Attempt to kill tasklet from interrupt\n");
454 
455 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
456 		do
457 			yield();
458 		while (test_bit(TASKLET_STATE_SCHED, &t->state));
459 	}
460 	tasklet_unlock_wait(t);
461 	clear_bit(TASKLET_STATE_SCHED, &t->state);
462 }
463 
464 EXPORT_SYMBOL(tasklet_kill);
465 
466 void __init softirq_init(void)
467 {
468 	int cpu;
469 
470 	for_each_possible_cpu(cpu) {
471 		per_cpu(tasklet_vec, cpu).tail =
472 			&per_cpu(tasklet_vec, cpu).head;
473 		per_cpu(tasklet_hi_vec, cpu).tail =
474 			&per_cpu(tasklet_hi_vec, cpu).head;
475 	}
476 
477 	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
478 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
479 }
480 
481 static int ksoftirqd(void * __bind_cpu)
482 {
483 	set_current_state(TASK_INTERRUPTIBLE);
484 
485 	while (!kthread_should_stop()) {
486 		preempt_disable();
487 		if (!local_softirq_pending()) {
488 			preempt_enable_no_resched();
489 			schedule();
490 			preempt_disable();
491 		}
492 
493 		__set_current_state(TASK_RUNNING);
494 
495 		while (local_softirq_pending()) {
496 			/* Preempt disable stops cpu going offline.
497 			   If already offline, we'll be on wrong CPU:
498 			   don't process */
499 			if (cpu_is_offline((long)__bind_cpu))
500 				goto wait_to_die;
501 			do_softirq();
502 			preempt_enable_no_resched();
503 			cond_resched();
504 			preempt_disable();
505 		}
506 		preempt_enable();
507 		set_current_state(TASK_INTERRUPTIBLE);
508 	}
509 	__set_current_state(TASK_RUNNING);
510 	return 0;
511 
512 wait_to_die:
513 	preempt_enable();
514 	/* Wait for kthread_stop */
515 	set_current_state(TASK_INTERRUPTIBLE);
516 	while (!kthread_should_stop()) {
517 		schedule();
518 		set_current_state(TASK_INTERRUPTIBLE);
519 	}
520 	__set_current_state(TASK_RUNNING);
521 	return 0;
522 }
523 
524 #ifdef CONFIG_HOTPLUG_CPU
525 /*
526  * tasklet_kill_immediate is called to remove a tasklet which can already be
527  * scheduled for execution on @cpu.
528  *
529  * Unlike tasklet_kill, this function removes the tasklet
530  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
531  *
532  * When this function is called, @cpu must be in the CPU_DEAD state.
533  */
534 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
535 {
536 	struct tasklet_struct **i;
537 
538 	BUG_ON(cpu_online(cpu));
539 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
540 
541 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
542 		return;
543 
544 	/* CPU is dead, so no lock needed. */
545 	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
546 		if (*i == t) {
547 			*i = t->next;
548 			/* If this was the tail element, move the tail ptr */
549 			if (*i == NULL)
550 				per_cpu(tasklet_vec, cpu).tail = i;
551 			return;
552 		}
553 	}
554 	BUG();
555 }
556 
557 static void takeover_tasklets(unsigned int cpu)
558 {
559 	/* CPU is dead, so no lock needed. */
560 	local_irq_disable();
561 
562 	/* Find end, append list for that CPU. */
563 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
564 		*(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
565 		__get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
566 		per_cpu(tasklet_vec, cpu).head = NULL;
567 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
568 	}
569 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
570 
571 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
572 		*__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
573 		__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
574 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
575 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
576 	}
577 	raise_softirq_irqoff(HI_SOFTIRQ);
578 
579 	local_irq_enable();
580 }
581 #endif /* CONFIG_HOTPLUG_CPU */
582 
583 static int __cpuinit cpu_callback(struct notifier_block *nfb,
584 				  unsigned long action,
585 				  void *hcpu)
586 {
587 	int hotcpu = (unsigned long)hcpu;
588 	struct task_struct *p;
589 
590 	switch (action) {
591 	case CPU_UP_PREPARE:
592 	case CPU_UP_PREPARE_FROZEN:
593 		p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
594 		if (IS_ERR(p)) {
595 			printk("ksoftirqd for %i failed\n", hotcpu);
596 			return NOTIFY_BAD;
597 		}
598 		kthread_bind(p, hotcpu);
599   		per_cpu(ksoftirqd, hotcpu) = p;
600  		break;
601 	case CPU_ONLINE:
602 	case CPU_ONLINE_FROZEN:
603 		wake_up_process(per_cpu(ksoftirqd, hotcpu));
604 		break;
605 #ifdef CONFIG_HOTPLUG_CPU
606 	case CPU_UP_CANCELED:
607 	case CPU_UP_CANCELED_FROZEN:
608 		if (!per_cpu(ksoftirqd, hotcpu))
609 			break;
610 		/* Unbind so it can run.  Fall thru. */
611 		kthread_bind(per_cpu(ksoftirqd, hotcpu),
612 			     any_online_cpu(cpu_online_map));
613 	case CPU_DEAD:
614 	case CPU_DEAD_FROZEN: {
615 		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
616 
617 		p = per_cpu(ksoftirqd, hotcpu);
618 		per_cpu(ksoftirqd, hotcpu) = NULL;
619 		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
620 		kthread_stop(p);
621 		takeover_tasklets(hotcpu);
622 		break;
623 	}
624 #endif /* CONFIG_HOTPLUG_CPU */
625  	}
626 	return NOTIFY_OK;
627 }
628 
629 static struct notifier_block __cpuinitdata cpu_nfb = {
630 	.notifier_call = cpu_callback
631 };
632 
633 static __init int spawn_ksoftirqd(void)
634 {
635 	void *cpu = (void *)(long)smp_processor_id();
636 	int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
637 
638 	BUG_ON(err == NOTIFY_BAD);
639 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
640 	register_cpu_notifier(&cpu_nfb);
641 	return 0;
642 }
643 early_initcall(spawn_ksoftirqd);
644 
645 #ifdef CONFIG_SMP
646 /*
647  * Call a function on all processors
648  */
649 int on_each_cpu(void (*func) (void *info), void *info, int wait)
650 {
651 	int ret = 0;
652 
653 	preempt_disable();
654 	ret = smp_call_function(func, info, wait);
655 	local_irq_disable();
656 	func(info);
657 	local_irq_enable();
658 	preempt_enable();
659 	return ret;
660 }
661 EXPORT_SYMBOL(on_each_cpu);
662 #endif
663