xref: /openbmc/linux/kernel/softirq.c (revision 22246614)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  *	Distribute under GPLv2.
7  *
8  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  */
10 
11 #include <linux/module.h>
12 #include <linux/kernel_stat.h>
13 #include <linux/interrupt.h>
14 #include <linux/init.h>
15 #include <linux/mm.h>
16 #include <linux/notifier.h>
17 #include <linux/percpu.h>
18 #include <linux/cpu.h>
19 #include <linux/freezer.h>
20 #include <linux/kthread.h>
21 #include <linux/rcupdate.h>
22 #include <linux/smp.h>
23 #include <linux/tick.h>
24 
25 #include <asm/irq.h>
26 /*
27    - No shared variables, all the data are CPU local.
28    - If a softirq needs serialization, let it serialize itself
29      by its own spinlocks.
30    - Even if softirq is serialized, only local cpu is marked for
31      execution. Hence, we get something sort of weak cpu binding.
32      Though it is still not clear, will it result in better locality
33      or will not.
34 
35    Examples:
36    - NET RX softirq. It is multithreaded and does not require
37      any global serialization.
38    - NET TX softirq. It kicks software netdevice queues, hence
39      it is logically serialized per device, but this serialization
40      is invisible to common code.
41    - Tasklets: serialized wrt itself.
42  */
43 
44 #ifndef __ARCH_IRQ_STAT
45 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
46 EXPORT_SYMBOL(irq_stat);
47 #endif
48 
49 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
50 
51 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
52 
53 /*
54  * we cannot loop indefinitely here to avoid userspace starvation,
55  * but we also don't want to introduce a worst case 1/HZ latency
56  * to the pending events, so lets the scheduler to balance
57  * the softirq load for us.
58  */
59 static inline void wakeup_softirqd(void)
60 {
61 	/* Interrupts are disabled: no need to stop preemption */
62 	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
63 
64 	if (tsk && tsk->state != TASK_RUNNING)
65 		wake_up_process(tsk);
66 }
67 
68 /*
69  * This one is for softirq.c-internal use,
70  * where hardirqs are disabled legitimately:
71  */
72 #ifdef CONFIG_TRACE_IRQFLAGS
73 static void __local_bh_disable(unsigned long ip)
74 {
75 	unsigned long flags;
76 
77 	WARN_ON_ONCE(in_irq());
78 
79 	raw_local_irq_save(flags);
80 	add_preempt_count(SOFTIRQ_OFFSET);
81 	/*
82 	 * Were softirqs turned off above:
83 	 */
84 	if (softirq_count() == SOFTIRQ_OFFSET)
85 		trace_softirqs_off(ip);
86 	raw_local_irq_restore(flags);
87 }
88 #else /* !CONFIG_TRACE_IRQFLAGS */
89 static inline void __local_bh_disable(unsigned long ip)
90 {
91 	add_preempt_count(SOFTIRQ_OFFSET);
92 	barrier();
93 }
94 #endif /* CONFIG_TRACE_IRQFLAGS */
95 
96 void local_bh_disable(void)
97 {
98 	__local_bh_disable((unsigned long)__builtin_return_address(0));
99 }
100 
101 EXPORT_SYMBOL(local_bh_disable);
102 
103 void __local_bh_enable(void)
104 {
105 	WARN_ON_ONCE(in_irq());
106 
107 	/*
108 	 * softirqs should never be enabled by __local_bh_enable(),
109 	 * it always nests inside local_bh_enable() sections:
110 	 */
111 	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
112 
113 	sub_preempt_count(SOFTIRQ_OFFSET);
114 }
115 EXPORT_SYMBOL_GPL(__local_bh_enable);
116 
117 /*
118  * Special-case - softirqs can safely be enabled in
119  * cond_resched_softirq(), or by __do_softirq(),
120  * without processing still-pending softirqs:
121  */
122 void _local_bh_enable(void)
123 {
124 	WARN_ON_ONCE(in_irq());
125 	WARN_ON_ONCE(!irqs_disabled());
126 
127 	if (softirq_count() == SOFTIRQ_OFFSET)
128 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
129 	sub_preempt_count(SOFTIRQ_OFFSET);
130 }
131 
132 EXPORT_SYMBOL(_local_bh_enable);
133 
134 void local_bh_enable(void)
135 {
136 #ifdef CONFIG_TRACE_IRQFLAGS
137 	unsigned long flags;
138 
139 	WARN_ON_ONCE(in_irq());
140 #endif
141 	WARN_ON_ONCE(irqs_disabled());
142 
143 #ifdef CONFIG_TRACE_IRQFLAGS
144 	local_irq_save(flags);
145 #endif
146 	/*
147 	 * Are softirqs going to be turned on now:
148 	 */
149 	if (softirq_count() == SOFTIRQ_OFFSET)
150 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
151 	/*
152 	 * Keep preemption disabled until we are done with
153 	 * softirq processing:
154  	 */
155  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
156 
157 	if (unlikely(!in_interrupt() && local_softirq_pending()))
158 		do_softirq();
159 
160 	dec_preempt_count();
161 #ifdef CONFIG_TRACE_IRQFLAGS
162 	local_irq_restore(flags);
163 #endif
164 	preempt_check_resched();
165 }
166 EXPORT_SYMBOL(local_bh_enable);
167 
168 void local_bh_enable_ip(unsigned long ip)
169 {
170 #ifdef CONFIG_TRACE_IRQFLAGS
171 	unsigned long flags;
172 
173 	WARN_ON_ONCE(in_irq());
174 
175 	local_irq_save(flags);
176 #endif
177 	/*
178 	 * Are softirqs going to be turned on now:
179 	 */
180 	if (softirq_count() == SOFTIRQ_OFFSET)
181 		trace_softirqs_on(ip);
182 	/*
183 	 * Keep preemption disabled until we are done with
184 	 * softirq processing:
185  	 */
186  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
187 
188 	if (unlikely(!in_interrupt() && local_softirq_pending()))
189 		do_softirq();
190 
191 	dec_preempt_count();
192 #ifdef CONFIG_TRACE_IRQFLAGS
193 	local_irq_restore(flags);
194 #endif
195 	preempt_check_resched();
196 }
197 EXPORT_SYMBOL(local_bh_enable_ip);
198 
199 /*
200  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
201  * and we fall back to softirqd after that.
202  *
203  * This number has been established via experimentation.
204  * The two things to balance is latency against fairness -
205  * we want to handle softirqs as soon as possible, but they
206  * should not be able to lock up the box.
207  */
208 #define MAX_SOFTIRQ_RESTART 10
209 
210 asmlinkage void __do_softirq(void)
211 {
212 	struct softirq_action *h;
213 	__u32 pending;
214 	int max_restart = MAX_SOFTIRQ_RESTART;
215 	int cpu;
216 
217 	pending = local_softirq_pending();
218 	account_system_vtime(current);
219 
220 	__local_bh_disable((unsigned long)__builtin_return_address(0));
221 	trace_softirq_enter();
222 
223 	cpu = smp_processor_id();
224 restart:
225 	/* Reset the pending bitmask before enabling irqs */
226 	set_softirq_pending(0);
227 
228 	local_irq_enable();
229 
230 	h = softirq_vec;
231 
232 	do {
233 		if (pending & 1) {
234 			h->action(h);
235 			rcu_bh_qsctr_inc(cpu);
236 		}
237 		h++;
238 		pending >>= 1;
239 	} while (pending);
240 
241 	local_irq_disable();
242 
243 	pending = local_softirq_pending();
244 	if (pending && --max_restart)
245 		goto restart;
246 
247 	if (pending)
248 		wakeup_softirqd();
249 
250 	trace_softirq_exit();
251 
252 	account_system_vtime(current);
253 	_local_bh_enable();
254 }
255 
256 #ifndef __ARCH_HAS_DO_SOFTIRQ
257 
258 asmlinkage void do_softirq(void)
259 {
260 	__u32 pending;
261 	unsigned long flags;
262 
263 	if (in_interrupt())
264 		return;
265 
266 	local_irq_save(flags);
267 
268 	pending = local_softirq_pending();
269 
270 	if (pending)
271 		__do_softirq();
272 
273 	local_irq_restore(flags);
274 }
275 
276 #endif
277 
278 /*
279  * Enter an interrupt context.
280  */
281 void irq_enter(void)
282 {
283 #ifdef CONFIG_NO_HZ
284 	int cpu = smp_processor_id();
285 	if (idle_cpu(cpu) && !in_interrupt())
286 		tick_nohz_stop_idle(cpu);
287 #endif
288 	__irq_enter();
289 #ifdef CONFIG_NO_HZ
290 	if (idle_cpu(cpu))
291 		tick_nohz_update_jiffies();
292 #endif
293 }
294 
295 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
296 # define invoke_softirq()	__do_softirq()
297 #else
298 # define invoke_softirq()	do_softirq()
299 #endif
300 
301 /*
302  * Exit an interrupt context. Process softirqs if needed and possible:
303  */
304 void irq_exit(void)
305 {
306 	account_system_vtime(current);
307 	trace_hardirq_exit();
308 	sub_preempt_count(IRQ_EXIT_OFFSET);
309 	if (!in_interrupt() && local_softirq_pending())
310 		invoke_softirq();
311 
312 #ifdef CONFIG_NO_HZ
313 	/* Make sure that timer wheel updates are propagated */
314 	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
315 		tick_nohz_stop_sched_tick();
316 	rcu_irq_exit();
317 #endif
318 	preempt_enable_no_resched();
319 }
320 
321 /*
322  * This function must run with irqs disabled!
323  */
324 inline void raise_softirq_irqoff(unsigned int nr)
325 {
326 	__raise_softirq_irqoff(nr);
327 
328 	/*
329 	 * If we're in an interrupt or softirq, we're done
330 	 * (this also catches softirq-disabled code). We will
331 	 * actually run the softirq once we return from
332 	 * the irq or softirq.
333 	 *
334 	 * Otherwise we wake up ksoftirqd to make sure we
335 	 * schedule the softirq soon.
336 	 */
337 	if (!in_interrupt())
338 		wakeup_softirqd();
339 }
340 
341 void raise_softirq(unsigned int nr)
342 {
343 	unsigned long flags;
344 
345 	local_irq_save(flags);
346 	raise_softirq_irqoff(nr);
347 	local_irq_restore(flags);
348 }
349 
350 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
351 {
352 	softirq_vec[nr].data = data;
353 	softirq_vec[nr].action = action;
354 }
355 
356 /* Tasklets */
357 struct tasklet_head
358 {
359 	struct tasklet_struct *head;
360 	struct tasklet_struct **tail;
361 };
362 
363 /* Some compilers disobey section attribute on statics when not
364    initialized -- RR */
365 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
366 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
367 
368 void __tasklet_schedule(struct tasklet_struct *t)
369 {
370 	unsigned long flags;
371 
372 	local_irq_save(flags);
373 	t->next = NULL;
374 	*__get_cpu_var(tasklet_vec).tail = t;
375 	__get_cpu_var(tasklet_vec).tail = &(t->next);
376 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
377 	local_irq_restore(flags);
378 }
379 
380 EXPORT_SYMBOL(__tasklet_schedule);
381 
382 void __tasklet_hi_schedule(struct tasklet_struct *t)
383 {
384 	unsigned long flags;
385 
386 	local_irq_save(flags);
387 	t->next = NULL;
388 	*__get_cpu_var(tasklet_hi_vec).tail = t;
389 	__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
390 	raise_softirq_irqoff(HI_SOFTIRQ);
391 	local_irq_restore(flags);
392 }
393 
394 EXPORT_SYMBOL(__tasklet_hi_schedule);
395 
396 static void tasklet_action(struct softirq_action *a)
397 {
398 	struct tasklet_struct *list;
399 
400 	local_irq_disable();
401 	list = __get_cpu_var(tasklet_vec).head;
402 	__get_cpu_var(tasklet_vec).head = NULL;
403 	__get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
404 	local_irq_enable();
405 
406 	while (list) {
407 		struct tasklet_struct *t = list;
408 
409 		list = list->next;
410 
411 		if (tasklet_trylock(t)) {
412 			if (!atomic_read(&t->count)) {
413 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
414 					BUG();
415 				t->func(t->data);
416 				tasklet_unlock(t);
417 				continue;
418 			}
419 			tasklet_unlock(t);
420 		}
421 
422 		local_irq_disable();
423 		t->next = NULL;
424 		*__get_cpu_var(tasklet_vec).tail = t;
425 		__get_cpu_var(tasklet_vec).tail = &(t->next);
426 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
427 		local_irq_enable();
428 	}
429 }
430 
431 static void tasklet_hi_action(struct softirq_action *a)
432 {
433 	struct tasklet_struct *list;
434 
435 	local_irq_disable();
436 	list = __get_cpu_var(tasklet_hi_vec).head;
437 	__get_cpu_var(tasklet_hi_vec).head = NULL;
438 	__get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
439 	local_irq_enable();
440 
441 	while (list) {
442 		struct tasklet_struct *t = list;
443 
444 		list = list->next;
445 
446 		if (tasklet_trylock(t)) {
447 			if (!atomic_read(&t->count)) {
448 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
449 					BUG();
450 				t->func(t->data);
451 				tasklet_unlock(t);
452 				continue;
453 			}
454 			tasklet_unlock(t);
455 		}
456 
457 		local_irq_disable();
458 		t->next = NULL;
459 		*__get_cpu_var(tasklet_hi_vec).tail = t;
460 		__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
461 		__raise_softirq_irqoff(HI_SOFTIRQ);
462 		local_irq_enable();
463 	}
464 }
465 
466 
467 void tasklet_init(struct tasklet_struct *t,
468 		  void (*func)(unsigned long), unsigned long data)
469 {
470 	t->next = NULL;
471 	t->state = 0;
472 	atomic_set(&t->count, 0);
473 	t->func = func;
474 	t->data = data;
475 }
476 
477 EXPORT_SYMBOL(tasklet_init);
478 
479 void tasklet_kill(struct tasklet_struct *t)
480 {
481 	if (in_interrupt())
482 		printk("Attempt to kill tasklet from interrupt\n");
483 
484 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
485 		do
486 			yield();
487 		while (test_bit(TASKLET_STATE_SCHED, &t->state));
488 	}
489 	tasklet_unlock_wait(t);
490 	clear_bit(TASKLET_STATE_SCHED, &t->state);
491 }
492 
493 EXPORT_SYMBOL(tasklet_kill);
494 
495 void __init softirq_init(void)
496 {
497 	int cpu;
498 
499 	for_each_possible_cpu(cpu) {
500 		per_cpu(tasklet_vec, cpu).tail =
501 			&per_cpu(tasklet_vec, cpu).head;
502 		per_cpu(tasklet_hi_vec, cpu).tail =
503 			&per_cpu(tasklet_hi_vec, cpu).head;
504 	}
505 
506 	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
507 	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
508 }
509 
510 static int ksoftirqd(void * __bind_cpu)
511 {
512 	set_current_state(TASK_INTERRUPTIBLE);
513 
514 	while (!kthread_should_stop()) {
515 		preempt_disable();
516 		if (!local_softirq_pending()) {
517 			preempt_enable_no_resched();
518 			schedule();
519 			preempt_disable();
520 		}
521 
522 		__set_current_state(TASK_RUNNING);
523 
524 		while (local_softirq_pending()) {
525 			/* Preempt disable stops cpu going offline.
526 			   If already offline, we'll be on wrong CPU:
527 			   don't process */
528 			if (cpu_is_offline((long)__bind_cpu))
529 				goto wait_to_die;
530 			do_softirq();
531 			preempt_enable_no_resched();
532 			cond_resched();
533 			preempt_disable();
534 		}
535 		preempt_enable();
536 		set_current_state(TASK_INTERRUPTIBLE);
537 	}
538 	__set_current_state(TASK_RUNNING);
539 	return 0;
540 
541 wait_to_die:
542 	preempt_enable();
543 	/* Wait for kthread_stop */
544 	set_current_state(TASK_INTERRUPTIBLE);
545 	while (!kthread_should_stop()) {
546 		schedule();
547 		set_current_state(TASK_INTERRUPTIBLE);
548 	}
549 	__set_current_state(TASK_RUNNING);
550 	return 0;
551 }
552 
553 #ifdef CONFIG_HOTPLUG_CPU
554 /*
555  * tasklet_kill_immediate is called to remove a tasklet which can already be
556  * scheduled for execution on @cpu.
557  *
558  * Unlike tasklet_kill, this function removes the tasklet
559  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
560  *
561  * When this function is called, @cpu must be in the CPU_DEAD state.
562  */
563 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
564 {
565 	struct tasklet_struct **i;
566 
567 	BUG_ON(cpu_online(cpu));
568 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
569 
570 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
571 		return;
572 
573 	/* CPU is dead, so no lock needed. */
574 	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
575 		if (*i == t) {
576 			*i = t->next;
577 			/* If this was the tail element, move the tail ptr */
578 			if (*i == NULL)
579 				per_cpu(tasklet_vec, cpu).tail = i;
580 			return;
581 		}
582 	}
583 	BUG();
584 }
585 
586 static void takeover_tasklets(unsigned int cpu)
587 {
588 	/* CPU is dead, so no lock needed. */
589 	local_irq_disable();
590 
591 	/* Find end, append list for that CPU. */
592 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
593 		*(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
594 		__get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
595 		per_cpu(tasklet_vec, cpu).head = NULL;
596 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
597 	}
598 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
599 
600 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
601 		*__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
602 		__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
603 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
604 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
605 	}
606 	raise_softirq_irqoff(HI_SOFTIRQ);
607 
608 	local_irq_enable();
609 }
610 #endif /* CONFIG_HOTPLUG_CPU */
611 
612 static int __cpuinit cpu_callback(struct notifier_block *nfb,
613 				  unsigned long action,
614 				  void *hcpu)
615 {
616 	int hotcpu = (unsigned long)hcpu;
617 	struct task_struct *p;
618 
619 	switch (action) {
620 	case CPU_UP_PREPARE:
621 	case CPU_UP_PREPARE_FROZEN:
622 		p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
623 		if (IS_ERR(p)) {
624 			printk("ksoftirqd for %i failed\n", hotcpu);
625 			return NOTIFY_BAD;
626 		}
627 		kthread_bind(p, hotcpu);
628   		per_cpu(ksoftirqd, hotcpu) = p;
629  		break;
630 	case CPU_ONLINE:
631 	case CPU_ONLINE_FROZEN:
632 		wake_up_process(per_cpu(ksoftirqd, hotcpu));
633 		break;
634 #ifdef CONFIG_HOTPLUG_CPU
635 	case CPU_UP_CANCELED:
636 	case CPU_UP_CANCELED_FROZEN:
637 		if (!per_cpu(ksoftirqd, hotcpu))
638 			break;
639 		/* Unbind so it can run.  Fall thru. */
640 		kthread_bind(per_cpu(ksoftirqd, hotcpu),
641 			     any_online_cpu(cpu_online_map));
642 	case CPU_DEAD:
643 	case CPU_DEAD_FROZEN: {
644 		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
645 
646 		p = per_cpu(ksoftirqd, hotcpu);
647 		per_cpu(ksoftirqd, hotcpu) = NULL;
648 		sched_setscheduler(p, SCHED_FIFO, &param);
649 		kthread_stop(p);
650 		takeover_tasklets(hotcpu);
651 		break;
652 	}
653 #endif /* CONFIG_HOTPLUG_CPU */
654  	}
655 	return NOTIFY_OK;
656 }
657 
658 static struct notifier_block __cpuinitdata cpu_nfb = {
659 	.notifier_call = cpu_callback
660 };
661 
662 __init int spawn_ksoftirqd(void)
663 {
664 	void *cpu = (void *)(long)smp_processor_id();
665 	int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
666 
667 	BUG_ON(err == NOTIFY_BAD);
668 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
669 	register_cpu_notifier(&cpu_nfb);
670 	return 0;
671 }
672 
673 #ifdef CONFIG_SMP
674 /*
675  * Call a function on all processors
676  */
677 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
678 {
679 	int ret = 0;
680 
681 	preempt_disable();
682 	ret = smp_call_function(func, info, retry, wait);
683 	local_irq_disable();
684 	func(info);
685 	local_irq_enable();
686 	preempt_enable();
687 	return ret;
688 }
689 EXPORT_SYMBOL(on_each_cpu);
690 #endif
691