xref: /openbmc/linux/kernel/softirq.c (revision a1e58bbd)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  *	Distribute under GPLv2.
7  *
8  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  */
10 
11 #include <linux/module.h>
12 #include <linux/kernel_stat.h>
13 #include <linux/interrupt.h>
14 #include <linux/init.h>
15 #include <linux/mm.h>
16 #include <linux/notifier.h>
17 #include <linux/percpu.h>
18 #include <linux/cpu.h>
19 #include <linux/freezer.h>
20 #include <linux/kthread.h>
21 #include <linux/rcupdate.h>
22 #include <linux/smp.h>
23 #include <linux/tick.h>
24 
25 #include <asm/irq.h>
26 /*
27    - No shared variables, all the data are CPU local.
28    - If a softirq needs serialization, let it serialize itself
29      by its own spinlocks.
30    - Even if softirq is serialized, only local cpu is marked for
31      execution. Hence, we get something sort of weak cpu binding.
32      Though it is still not clear, will it result in better locality
33      or will not.
34 
35    Examples:
36    - NET RX softirq. It is multithreaded and does not require
37      any global serialization.
38    - NET TX softirq. It kicks software netdevice queues, hence
39      it is logically serialized per device, but this serialization
40      is invisible to common code.
41    - Tasklets: serialized wrt itself.
42  */
43 
44 #ifndef __ARCH_IRQ_STAT
45 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
46 EXPORT_SYMBOL(irq_stat);
47 #endif
48 
49 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
50 
51 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
52 
53 /*
54  * we cannot loop indefinitely here to avoid userspace starvation,
55  * but we also don't want to introduce a worst case 1/HZ latency
56  * to the pending events, so lets the scheduler to balance
57  * the softirq load for us.
58  */
59 static inline void wakeup_softirqd(void)
60 {
61 	/* Interrupts are disabled: no need to stop preemption */
62 	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
63 
64 	if (tsk && tsk->state != TASK_RUNNING)
65 		wake_up_process(tsk);
66 }
67 
68 /*
69  * This one is for softirq.c-internal use,
70  * where hardirqs are disabled legitimately:
71  */
72 #ifdef CONFIG_TRACE_IRQFLAGS
73 static void __local_bh_disable(unsigned long ip)
74 {
75 	unsigned long flags;
76 
77 	WARN_ON_ONCE(in_irq());
78 
79 	raw_local_irq_save(flags);
80 	add_preempt_count(SOFTIRQ_OFFSET);
81 	/*
82 	 * Were softirqs turned off above:
83 	 */
84 	if (softirq_count() == SOFTIRQ_OFFSET)
85 		trace_softirqs_off(ip);
86 	raw_local_irq_restore(flags);
87 }
88 #else /* !CONFIG_TRACE_IRQFLAGS */
89 static inline void __local_bh_disable(unsigned long ip)
90 {
91 	add_preempt_count(SOFTIRQ_OFFSET);
92 	barrier();
93 }
94 #endif /* CONFIG_TRACE_IRQFLAGS */
95 
96 void local_bh_disable(void)
97 {
98 	__local_bh_disable((unsigned long)__builtin_return_address(0));
99 }
100 
101 EXPORT_SYMBOL(local_bh_disable);
102 
103 void __local_bh_enable(void)
104 {
105 	WARN_ON_ONCE(in_irq());
106 
107 	/*
108 	 * softirqs should never be enabled by __local_bh_enable(),
109 	 * it always nests inside local_bh_enable() sections:
110 	 */
111 	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
112 
113 	sub_preempt_count(SOFTIRQ_OFFSET);
114 }
115 EXPORT_SYMBOL_GPL(__local_bh_enable);
116 
117 /*
118  * Special-case - softirqs can safely be enabled in
119  * cond_resched_softirq(), or by __do_softirq(),
120  * without processing still-pending softirqs:
121  */
122 void _local_bh_enable(void)
123 {
124 	WARN_ON_ONCE(in_irq());
125 	WARN_ON_ONCE(!irqs_disabled());
126 
127 	if (softirq_count() == SOFTIRQ_OFFSET)
128 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
129 	sub_preempt_count(SOFTIRQ_OFFSET);
130 }
131 
132 EXPORT_SYMBOL(_local_bh_enable);
133 
134 void local_bh_enable(void)
135 {
136 #ifdef CONFIG_TRACE_IRQFLAGS
137 	unsigned long flags;
138 
139 	WARN_ON_ONCE(in_irq());
140 #endif
141 	WARN_ON_ONCE(irqs_disabled());
142 
143 #ifdef CONFIG_TRACE_IRQFLAGS
144 	local_irq_save(flags);
145 #endif
146 	/*
147 	 * Are softirqs going to be turned on now:
148 	 */
149 	if (softirq_count() == SOFTIRQ_OFFSET)
150 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
151 	/*
152 	 * Keep preemption disabled until we are done with
153 	 * softirq processing:
154  	 */
155  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
156 
157 	if (unlikely(!in_interrupt() && local_softirq_pending()))
158 		do_softirq();
159 
160 	dec_preempt_count();
161 #ifdef CONFIG_TRACE_IRQFLAGS
162 	local_irq_restore(flags);
163 #endif
164 	preempt_check_resched();
165 }
166 EXPORT_SYMBOL(local_bh_enable);
167 
168 void local_bh_enable_ip(unsigned long ip)
169 {
170 #ifdef CONFIG_TRACE_IRQFLAGS
171 	unsigned long flags;
172 
173 	WARN_ON_ONCE(in_irq());
174 
175 	local_irq_save(flags);
176 #endif
177 	/*
178 	 * Are softirqs going to be turned on now:
179 	 */
180 	if (softirq_count() == SOFTIRQ_OFFSET)
181 		trace_softirqs_on(ip);
182 	/*
183 	 * Keep preemption disabled until we are done with
184 	 * softirq processing:
185  	 */
186  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
187 
188 	if (unlikely(!in_interrupt() && local_softirq_pending()))
189 		do_softirq();
190 
191 	dec_preempt_count();
192 #ifdef CONFIG_TRACE_IRQFLAGS
193 	local_irq_restore(flags);
194 #endif
195 	preempt_check_resched();
196 }
197 EXPORT_SYMBOL(local_bh_enable_ip);
198 
199 /*
200  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
201  * and we fall back to softirqd after that.
202  *
203  * This number has been established via experimentation.
204  * The two things to balance is latency against fairness -
205  * we want to handle softirqs as soon as possible, but they
206  * should not be able to lock up the box.
207  */
208 #define MAX_SOFTIRQ_RESTART 10
209 
210 asmlinkage void __do_softirq(void)
211 {
212 	struct softirq_action *h;
213 	__u32 pending;
214 	int max_restart = MAX_SOFTIRQ_RESTART;
215 	int cpu;
216 
217 	pending = local_softirq_pending();
218 	account_system_vtime(current);
219 
220 	__local_bh_disable((unsigned long)__builtin_return_address(0));
221 	trace_softirq_enter();
222 
223 	cpu = smp_processor_id();
224 restart:
225 	/* Reset the pending bitmask before enabling irqs */
226 	set_softirq_pending(0);
227 
228 	local_irq_enable();
229 
230 	h = softirq_vec;
231 
232 	do {
233 		if (pending & 1) {
234 			h->action(h);
235 			rcu_bh_qsctr_inc(cpu);
236 		}
237 		h++;
238 		pending >>= 1;
239 	} while (pending);
240 
241 	local_irq_disable();
242 
243 	pending = local_softirq_pending();
244 	if (pending && --max_restart)
245 		goto restart;
246 
247 	if (pending)
248 		wakeup_softirqd();
249 
250 	trace_softirq_exit();
251 
252 	account_system_vtime(current);
253 	_local_bh_enable();
254 }
255 
256 #ifndef __ARCH_HAS_DO_SOFTIRQ
257 
258 asmlinkage void do_softirq(void)
259 {
260 	__u32 pending;
261 	unsigned long flags;
262 
263 	if (in_interrupt())
264 		return;
265 
266 	local_irq_save(flags);
267 
268 	pending = local_softirq_pending();
269 
270 	if (pending)
271 		__do_softirq();
272 
273 	local_irq_restore(flags);
274 }
275 
276 #endif
277 
278 /*
279  * Enter an interrupt context.
280  */
281 void irq_enter(void)
282 {
283 #ifdef CONFIG_NO_HZ
284 	int cpu = smp_processor_id();
285 	if (idle_cpu(cpu) && !in_interrupt())
286 		tick_nohz_stop_idle(cpu);
287 #endif
288 	__irq_enter();
289 #ifdef CONFIG_NO_HZ
290 	if (idle_cpu(cpu))
291 		tick_nohz_update_jiffies();
292 #endif
293 }
294 
295 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
296 # define invoke_softirq()	__do_softirq()
297 #else
298 # define invoke_softirq()	do_softirq()
299 #endif
300 
301 /*
302  * Exit an interrupt context. Process softirqs if needed and possible:
303  */
304 void irq_exit(void)
305 {
306 	account_system_vtime(current);
307 	trace_hardirq_exit();
308 	sub_preempt_count(IRQ_EXIT_OFFSET);
309 	if (!in_interrupt() && local_softirq_pending())
310 		invoke_softirq();
311 
312 #ifdef CONFIG_NO_HZ
313 	/* Make sure that timer wheel updates are propagated */
314 	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
315 		tick_nohz_stop_sched_tick();
316 	rcu_irq_exit();
317 #endif
318 	preempt_enable_no_resched();
319 }
320 
321 /*
322  * This function must run with irqs disabled!
323  */
324 inline void raise_softirq_irqoff(unsigned int nr)
325 {
326 	__raise_softirq_irqoff(nr);
327 
328 	/*
329 	 * If we're in an interrupt or softirq, we're done
330 	 * (this also catches softirq-disabled code). We will
331 	 * actually run the softirq once we return from
332 	 * the irq or softirq.
333 	 *
334 	 * Otherwise we wake up ksoftirqd to make sure we
335 	 * schedule the softirq soon.
336 	 */
337 	if (!in_interrupt())
338 		wakeup_softirqd();
339 }
340 
341 void raise_softirq(unsigned int nr)
342 {
343 	unsigned long flags;
344 
345 	local_irq_save(flags);
346 	raise_softirq_irqoff(nr);
347 	local_irq_restore(flags);
348 }
349 
350 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
351 {
352 	softirq_vec[nr].data = data;
353 	softirq_vec[nr].action = action;
354 }
355 
356 /* Tasklets */
357 struct tasklet_head
358 {
359 	struct tasklet_struct *list;
360 };
361 
362 /* Some compilers disobey section attribute on statics when not
363    initialized -- RR */
364 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
365 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
366 
367 void __tasklet_schedule(struct tasklet_struct *t)
368 {
369 	unsigned long flags;
370 
371 	local_irq_save(flags);
372 	t->next = __get_cpu_var(tasklet_vec).list;
373 	__get_cpu_var(tasklet_vec).list = t;
374 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
375 	local_irq_restore(flags);
376 }
377 
378 EXPORT_SYMBOL(__tasklet_schedule);
379 
380 void __tasklet_hi_schedule(struct tasklet_struct *t)
381 {
382 	unsigned long flags;
383 
384 	local_irq_save(flags);
385 	t->next = __get_cpu_var(tasklet_hi_vec).list;
386 	__get_cpu_var(tasklet_hi_vec).list = t;
387 	raise_softirq_irqoff(HI_SOFTIRQ);
388 	local_irq_restore(flags);
389 }
390 
391 EXPORT_SYMBOL(__tasklet_hi_schedule);
392 
393 static void tasklet_action(struct softirq_action *a)
394 {
395 	struct tasklet_struct *list;
396 
397 	local_irq_disable();
398 	list = __get_cpu_var(tasklet_vec).list;
399 	__get_cpu_var(tasklet_vec).list = NULL;
400 	local_irq_enable();
401 
402 	while (list) {
403 		struct tasklet_struct *t = list;
404 
405 		list = list->next;
406 
407 		if (tasklet_trylock(t)) {
408 			if (!atomic_read(&t->count)) {
409 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
410 					BUG();
411 				t->func(t->data);
412 				tasklet_unlock(t);
413 				continue;
414 			}
415 			tasklet_unlock(t);
416 		}
417 
418 		local_irq_disable();
419 		t->next = __get_cpu_var(tasklet_vec).list;
420 		__get_cpu_var(tasklet_vec).list = t;
421 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
422 		local_irq_enable();
423 	}
424 }
425 
426 static void tasklet_hi_action(struct softirq_action *a)
427 {
428 	struct tasklet_struct *list;
429 
430 	local_irq_disable();
431 	list = __get_cpu_var(tasklet_hi_vec).list;
432 	__get_cpu_var(tasklet_hi_vec).list = NULL;
433 	local_irq_enable();
434 
435 	while (list) {
436 		struct tasklet_struct *t = list;
437 
438 		list = list->next;
439 
440 		if (tasklet_trylock(t)) {
441 			if (!atomic_read(&t->count)) {
442 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
443 					BUG();
444 				t->func(t->data);
445 				tasklet_unlock(t);
446 				continue;
447 			}
448 			tasklet_unlock(t);
449 		}
450 
451 		local_irq_disable();
452 		t->next = __get_cpu_var(tasklet_hi_vec).list;
453 		__get_cpu_var(tasklet_hi_vec).list = t;
454 		__raise_softirq_irqoff(HI_SOFTIRQ);
455 		local_irq_enable();
456 	}
457 }
458 
459 
460 void tasklet_init(struct tasklet_struct *t,
461 		  void (*func)(unsigned long), unsigned long data)
462 {
463 	t->next = NULL;
464 	t->state = 0;
465 	atomic_set(&t->count, 0);
466 	t->func = func;
467 	t->data = data;
468 }
469 
470 EXPORT_SYMBOL(tasklet_init);
471 
472 void tasklet_kill(struct tasklet_struct *t)
473 {
474 	if (in_interrupt())
475 		printk("Attempt to kill tasklet from interrupt\n");
476 
477 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
478 		do
479 			yield();
480 		while (test_bit(TASKLET_STATE_SCHED, &t->state));
481 	}
482 	tasklet_unlock_wait(t);
483 	clear_bit(TASKLET_STATE_SCHED, &t->state);
484 }
485 
486 EXPORT_SYMBOL(tasklet_kill);
487 
488 void __init softirq_init(void)
489 {
490 	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
491 	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
492 }
493 
494 static int ksoftirqd(void * __bind_cpu)
495 {
496 	set_current_state(TASK_INTERRUPTIBLE);
497 
498 	while (!kthread_should_stop()) {
499 		preempt_disable();
500 		if (!local_softirq_pending()) {
501 			preempt_enable_no_resched();
502 			schedule();
503 			preempt_disable();
504 		}
505 
506 		__set_current_state(TASK_RUNNING);
507 
508 		while (local_softirq_pending()) {
509 			/* Preempt disable stops cpu going offline.
510 			   If already offline, we'll be on wrong CPU:
511 			   don't process */
512 			if (cpu_is_offline((long)__bind_cpu))
513 				goto wait_to_die;
514 			do_softirq();
515 			preempt_enable_no_resched();
516 			cond_resched();
517 			preempt_disable();
518 		}
519 		preempt_enable();
520 		set_current_state(TASK_INTERRUPTIBLE);
521 	}
522 	__set_current_state(TASK_RUNNING);
523 	return 0;
524 
525 wait_to_die:
526 	preempt_enable();
527 	/* Wait for kthread_stop */
528 	set_current_state(TASK_INTERRUPTIBLE);
529 	while (!kthread_should_stop()) {
530 		schedule();
531 		set_current_state(TASK_INTERRUPTIBLE);
532 	}
533 	__set_current_state(TASK_RUNNING);
534 	return 0;
535 }
536 
537 #ifdef CONFIG_HOTPLUG_CPU
538 /*
539  * tasklet_kill_immediate is called to remove a tasklet which can already be
540  * scheduled for execution on @cpu.
541  *
542  * Unlike tasklet_kill, this function removes the tasklet
543  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
544  *
545  * When this function is called, @cpu must be in the CPU_DEAD state.
546  */
547 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
548 {
549 	struct tasklet_struct **i;
550 
551 	BUG_ON(cpu_online(cpu));
552 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
553 
554 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
555 		return;
556 
557 	/* CPU is dead, so no lock needed. */
558 	for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
559 		if (*i == t) {
560 			*i = t->next;
561 			return;
562 		}
563 	}
564 	BUG();
565 }
566 
567 static void takeover_tasklets(unsigned int cpu)
568 {
569 	struct tasklet_struct **i;
570 
571 	/* CPU is dead, so no lock needed. */
572 	local_irq_disable();
573 
574 	/* Find end, append list for that CPU. */
575 	for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
576 	*i = per_cpu(tasklet_vec, cpu).list;
577 	per_cpu(tasklet_vec, cpu).list = NULL;
578 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
579 
580 	for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
581 	*i = per_cpu(tasklet_hi_vec, cpu).list;
582 	per_cpu(tasklet_hi_vec, cpu).list = NULL;
583 	raise_softirq_irqoff(HI_SOFTIRQ);
584 
585 	local_irq_enable();
586 }
587 #endif /* CONFIG_HOTPLUG_CPU */
588 
589 static int __cpuinit cpu_callback(struct notifier_block *nfb,
590 				  unsigned long action,
591 				  void *hcpu)
592 {
593 	int hotcpu = (unsigned long)hcpu;
594 	struct task_struct *p;
595 
596 	switch (action) {
597 	case CPU_UP_PREPARE:
598 	case CPU_UP_PREPARE_FROZEN:
599 		p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
600 		if (IS_ERR(p)) {
601 			printk("ksoftirqd for %i failed\n", hotcpu);
602 			return NOTIFY_BAD;
603 		}
604 		kthread_bind(p, hotcpu);
605   		per_cpu(ksoftirqd, hotcpu) = p;
606  		break;
607 	case CPU_ONLINE:
608 	case CPU_ONLINE_FROZEN:
609 		wake_up_process(per_cpu(ksoftirqd, hotcpu));
610 		break;
611 #ifdef CONFIG_HOTPLUG_CPU
612 	case CPU_UP_CANCELED:
613 	case CPU_UP_CANCELED_FROZEN:
614 		if (!per_cpu(ksoftirqd, hotcpu))
615 			break;
616 		/* Unbind so it can run.  Fall thru. */
617 		kthread_bind(per_cpu(ksoftirqd, hotcpu),
618 			     any_online_cpu(cpu_online_map));
619 	case CPU_DEAD:
620 	case CPU_DEAD_FROZEN: {
621 		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
622 
623 		p = per_cpu(ksoftirqd, hotcpu);
624 		per_cpu(ksoftirqd, hotcpu) = NULL;
625 		sched_setscheduler(p, SCHED_FIFO, &param);
626 		kthread_stop(p);
627 		takeover_tasklets(hotcpu);
628 		break;
629 	}
630 #endif /* CONFIG_HOTPLUG_CPU */
631  	}
632 	return NOTIFY_OK;
633 }
634 
635 static struct notifier_block __cpuinitdata cpu_nfb = {
636 	.notifier_call = cpu_callback
637 };
638 
639 __init int spawn_ksoftirqd(void)
640 {
641 	void *cpu = (void *)(long)smp_processor_id();
642 	int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
643 
644 	BUG_ON(err == NOTIFY_BAD);
645 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
646 	register_cpu_notifier(&cpu_nfb);
647 	return 0;
648 }
649 
650 #ifdef CONFIG_SMP
651 /*
652  * Call a function on all processors
653  */
654 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
655 {
656 	int ret = 0;
657 
658 	preempt_disable();
659 	ret = smp_call_function(func, info, retry, wait);
660 	local_irq_disable();
661 	func(info);
662 	local_irq_enable();
663 	preempt_enable();
664 	return ret;
665 }
666 EXPORT_SYMBOL(on_each_cpu);
667 #endif
668