xref: /openbmc/linux/kernel/smp.c (revision f7777dcc)
1 /*
2  * Generic helpers for smp ipi calls
3  *
4  * (C) Jens Axboe <jens.axboe@oracle.com> 2008
5  */
6 #include <linux/rcupdate.h>
7 #include <linux/rculist.h>
8 #include <linux/kernel.h>
9 #include <linux/export.h>
10 #include <linux/percpu.h>
11 #include <linux/init.h>
12 #include <linux/gfp.h>
13 #include <linux/smp.h>
14 #include <linux/cpu.h>
15 
16 #include "smpboot.h"
17 
18 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
19 enum {
20 	CSD_FLAG_LOCK		= 0x01,
21 };
22 
23 struct call_function_data {
24 	struct call_single_data	__percpu *csd;
25 	cpumask_var_t		cpumask;
26 	cpumask_var_t		cpumask_ipi;
27 };
28 
29 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
30 
31 struct call_single_queue {
32 	struct list_head	list;
33 	raw_spinlock_t		lock;
34 };
35 
36 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue);
37 
38 static int
39 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
40 {
41 	long cpu = (long)hcpu;
42 	struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
43 
44 	switch (action) {
45 	case CPU_UP_PREPARE:
46 	case CPU_UP_PREPARE_FROZEN:
47 		if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
48 				cpu_to_node(cpu)))
49 			return notifier_from_errno(-ENOMEM);
50 		if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
51 				cpu_to_node(cpu))) {
52 			free_cpumask_var(cfd->cpumask);
53 			return notifier_from_errno(-ENOMEM);
54 		}
55 		cfd->csd = alloc_percpu(struct call_single_data);
56 		if (!cfd->csd) {
57 			free_cpumask_var(cfd->cpumask_ipi);
58 			free_cpumask_var(cfd->cpumask);
59 			return notifier_from_errno(-ENOMEM);
60 		}
61 		break;
62 
63 #ifdef CONFIG_HOTPLUG_CPU
64 	case CPU_UP_CANCELED:
65 	case CPU_UP_CANCELED_FROZEN:
66 
67 	case CPU_DEAD:
68 	case CPU_DEAD_FROZEN:
69 		free_cpumask_var(cfd->cpumask);
70 		free_cpumask_var(cfd->cpumask_ipi);
71 		free_percpu(cfd->csd);
72 		break;
73 #endif
74 	};
75 
76 	return NOTIFY_OK;
77 }
78 
79 static struct notifier_block hotplug_cfd_notifier = {
80 	.notifier_call		= hotplug_cfd,
81 };
82 
83 void __init call_function_init(void)
84 {
85 	void *cpu = (void *)(long)smp_processor_id();
86 	int i;
87 
88 	for_each_possible_cpu(i) {
89 		struct call_single_queue *q = &per_cpu(call_single_queue, i);
90 
91 		raw_spin_lock_init(&q->lock);
92 		INIT_LIST_HEAD(&q->list);
93 	}
94 
95 	hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
96 	register_cpu_notifier(&hotplug_cfd_notifier);
97 }
98 
99 /*
100  * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
101  *
102  * For non-synchronous ipi calls the csd can still be in use by the
103  * previous function call. For multi-cpu calls its even more interesting
104  * as we'll have to ensure no other cpu is observing our csd.
105  */
106 static void csd_lock_wait(struct call_single_data *csd)
107 {
108 	while (csd->flags & CSD_FLAG_LOCK)
109 		cpu_relax();
110 }
111 
112 static void csd_lock(struct call_single_data *csd)
113 {
114 	csd_lock_wait(csd);
115 	csd->flags |= CSD_FLAG_LOCK;
116 
117 	/*
118 	 * prevent CPU from reordering the above assignment
119 	 * to ->flags with any subsequent assignments to other
120 	 * fields of the specified call_single_data structure:
121 	 */
122 	smp_mb();
123 }
124 
125 static void csd_unlock(struct call_single_data *csd)
126 {
127 	WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
128 
129 	/*
130 	 * ensure we're all done before releasing data:
131 	 */
132 	smp_mb();
133 
134 	csd->flags &= ~CSD_FLAG_LOCK;
135 }
136 
137 /*
138  * Insert a previously allocated call_single_data element
139  * for execution on the given CPU. data must already have
140  * ->func, ->info, and ->flags set.
141  */
142 static
143 void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
144 {
145 	struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
146 	unsigned long flags;
147 	int ipi;
148 
149 	raw_spin_lock_irqsave(&dst->lock, flags);
150 	ipi = list_empty(&dst->list);
151 	list_add_tail(&csd->list, &dst->list);
152 	raw_spin_unlock_irqrestore(&dst->lock, flags);
153 
154 	/*
155 	 * The list addition should be visible before sending the IPI
156 	 * handler locks the list to pull the entry off it because of
157 	 * normal cache coherency rules implied by spinlocks.
158 	 *
159 	 * If IPIs can go out of order to the cache coherency protocol
160 	 * in an architecture, sufficient synchronisation should be added
161 	 * to arch code to make it appear to obey cache coherency WRT
162 	 * locking and barrier primitives. Generic code isn't really
163 	 * equipped to do the right thing...
164 	 */
165 	if (ipi)
166 		arch_send_call_function_single_ipi(cpu);
167 
168 	if (wait)
169 		csd_lock_wait(csd);
170 }
171 
172 /*
173  * Invoked by arch to handle an IPI for call function single. Must be
174  * called from the arch with interrupts disabled.
175  */
176 void generic_smp_call_function_single_interrupt(void)
177 {
178 	struct call_single_queue *q = &__get_cpu_var(call_single_queue);
179 	LIST_HEAD(list);
180 
181 	/*
182 	 * Shouldn't receive this interrupt on a cpu that is not yet online.
183 	 */
184 	WARN_ON_ONCE(!cpu_online(smp_processor_id()));
185 
186 	raw_spin_lock(&q->lock);
187 	list_replace_init(&q->list, &list);
188 	raw_spin_unlock(&q->lock);
189 
190 	while (!list_empty(&list)) {
191 		struct call_single_data *csd;
192 
193 		csd = list_entry(list.next, struct call_single_data, list);
194 		list_del(&csd->list);
195 
196 		csd->func(csd->info);
197 
198 		csd_unlock(csd);
199 	}
200 }
201 
202 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
203 
204 /*
205  * smp_call_function_single - Run a function on a specific CPU
206  * @func: The function to run. This must be fast and non-blocking.
207  * @info: An arbitrary pointer to pass to the function.
208  * @wait: If true, wait until function has completed on other CPUs.
209  *
210  * Returns 0 on success, else a negative status code.
211  */
212 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
213 			     int wait)
214 {
215 	struct call_single_data d = {
216 		.flags = 0,
217 	};
218 	unsigned long flags;
219 	int this_cpu;
220 	int err = 0;
221 
222 	/*
223 	 * prevent preemption and reschedule on another processor,
224 	 * as well as CPU removal
225 	 */
226 	this_cpu = get_cpu();
227 
228 	/*
229 	 * Can deadlock when called with interrupts disabled.
230 	 * We allow cpu's that are not yet online though, as no one else can
231 	 * send smp call function interrupt to this cpu and as such deadlocks
232 	 * can't happen.
233 	 */
234 	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
235 		     && !oops_in_progress);
236 
237 	if (cpu == this_cpu) {
238 		local_irq_save(flags);
239 		func(info);
240 		local_irq_restore(flags);
241 	} else {
242 		if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
243 			struct call_single_data *csd = &d;
244 
245 			if (!wait)
246 				csd = &__get_cpu_var(csd_data);
247 
248 			csd_lock(csd);
249 
250 			csd->func = func;
251 			csd->info = info;
252 			generic_exec_single(cpu, csd, wait);
253 		} else {
254 			err = -ENXIO;	/* CPU not online */
255 		}
256 	}
257 
258 	put_cpu();
259 
260 	return err;
261 }
262 EXPORT_SYMBOL(smp_call_function_single);
263 
264 /*
265  * smp_call_function_any - Run a function on any of the given cpus
266  * @mask: The mask of cpus it can run on.
267  * @func: The function to run. This must be fast and non-blocking.
268  * @info: An arbitrary pointer to pass to the function.
269  * @wait: If true, wait until function has completed.
270  *
271  * Returns 0 on success, else a negative status code (if no cpus were online).
272  *
273  * Selection preference:
274  *	1) current cpu if in @mask
275  *	2) any cpu of current node if in @mask
276  *	3) any other online cpu in @mask
277  */
278 int smp_call_function_any(const struct cpumask *mask,
279 			  smp_call_func_t func, void *info, int wait)
280 {
281 	unsigned int cpu;
282 	const struct cpumask *nodemask;
283 	int ret;
284 
285 	/* Try for same CPU (cheapest) */
286 	cpu = get_cpu();
287 	if (cpumask_test_cpu(cpu, mask))
288 		goto call;
289 
290 	/* Try for same node. */
291 	nodemask = cpumask_of_node(cpu_to_node(cpu));
292 	for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
293 	     cpu = cpumask_next_and(cpu, nodemask, mask)) {
294 		if (cpu_online(cpu))
295 			goto call;
296 	}
297 
298 	/* Any online will do: smp_call_function_single handles nr_cpu_ids. */
299 	cpu = cpumask_any_and(mask, cpu_online_mask);
300 call:
301 	ret = smp_call_function_single(cpu, func, info, wait);
302 	put_cpu();
303 	return ret;
304 }
305 EXPORT_SYMBOL_GPL(smp_call_function_any);
306 
307 /**
308  * __smp_call_function_single(): Run a function on a specific CPU
309  * @cpu: The CPU to run on.
310  * @data: Pre-allocated and setup data structure
311  * @wait: If true, wait until function has completed on specified CPU.
312  *
313  * Like smp_call_function_single(), but allow caller to pass in a
314  * pre-allocated data structure. Useful for embedding @data inside
315  * other structures, for instance.
316  */
317 void __smp_call_function_single(int cpu, struct call_single_data *csd,
318 				int wait)
319 {
320 	unsigned int this_cpu;
321 	unsigned long flags;
322 
323 	this_cpu = get_cpu();
324 	/*
325 	 * Can deadlock when called with interrupts disabled.
326 	 * We allow cpu's that are not yet online though, as no one else can
327 	 * send smp call function interrupt to this cpu and as such deadlocks
328 	 * can't happen.
329 	 */
330 	WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
331 		     && !oops_in_progress);
332 
333 	if (cpu == this_cpu) {
334 		local_irq_save(flags);
335 		csd->func(csd->info);
336 		local_irq_restore(flags);
337 	} else {
338 		csd_lock(csd);
339 		generic_exec_single(cpu, csd, wait);
340 	}
341 	put_cpu();
342 }
343 
344 /**
345  * smp_call_function_many(): Run a function on a set of other CPUs.
346  * @mask: The set of cpus to run on (only runs on online subset).
347  * @func: The function to run. This must be fast and non-blocking.
348  * @info: An arbitrary pointer to pass to the function.
349  * @wait: If true, wait (atomically) until function has completed
350  *        on other CPUs.
351  *
352  * If @wait is true, then returns once @func has returned.
353  *
354  * You must not call this function with disabled interrupts or from a
355  * hardware interrupt handler or from a bottom half handler. Preemption
356  * must be disabled when calling this function.
357  */
358 void smp_call_function_many(const struct cpumask *mask,
359 			    smp_call_func_t func, void *info, bool wait)
360 {
361 	struct call_function_data *cfd;
362 	int cpu, next_cpu, this_cpu = smp_processor_id();
363 
364 	/*
365 	 * Can deadlock when called with interrupts disabled.
366 	 * We allow cpu's that are not yet online though, as no one else can
367 	 * send smp call function interrupt to this cpu and as such deadlocks
368 	 * can't happen.
369 	 */
370 	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
371 		     && !oops_in_progress && !early_boot_irqs_disabled);
372 
373 	/* Try to fastpath.  So, what's a CPU they want? Ignoring this one. */
374 	cpu = cpumask_first_and(mask, cpu_online_mask);
375 	if (cpu == this_cpu)
376 		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
377 
378 	/* No online cpus?  We're done. */
379 	if (cpu >= nr_cpu_ids)
380 		return;
381 
382 	/* Do we have another CPU which isn't us? */
383 	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
384 	if (next_cpu == this_cpu)
385 		next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
386 
387 	/* Fastpath: do that cpu by itself. */
388 	if (next_cpu >= nr_cpu_ids) {
389 		smp_call_function_single(cpu, func, info, wait);
390 		return;
391 	}
392 
393 	cfd = &__get_cpu_var(cfd_data);
394 
395 	cpumask_and(cfd->cpumask, mask, cpu_online_mask);
396 	cpumask_clear_cpu(this_cpu, cfd->cpumask);
397 
398 	/* Some callers race with other cpus changing the passed mask */
399 	if (unlikely(!cpumask_weight(cfd->cpumask)))
400 		return;
401 
402 	/*
403 	 * After we put an entry into the list, cfd->cpumask may be cleared
404 	 * again when another CPU sends another IPI for a SMP function call, so
405 	 * cfd->cpumask will be zero.
406 	 */
407 	cpumask_copy(cfd->cpumask_ipi, cfd->cpumask);
408 
409 	for_each_cpu(cpu, cfd->cpumask) {
410 		struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
411 		struct call_single_queue *dst =
412 					&per_cpu(call_single_queue, cpu);
413 		unsigned long flags;
414 
415 		csd_lock(csd);
416 		csd->func = func;
417 		csd->info = info;
418 
419 		raw_spin_lock_irqsave(&dst->lock, flags);
420 		list_add_tail(&csd->list, &dst->list);
421 		raw_spin_unlock_irqrestore(&dst->lock, flags);
422 	}
423 
424 	/* Send a message to all CPUs in the map */
425 	arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
426 
427 	if (wait) {
428 		for_each_cpu(cpu, cfd->cpumask) {
429 			struct call_single_data *csd;
430 
431 			csd = per_cpu_ptr(cfd->csd, cpu);
432 			csd_lock_wait(csd);
433 		}
434 	}
435 }
436 EXPORT_SYMBOL(smp_call_function_many);
437 
438 /**
439  * smp_call_function(): Run a function on all other CPUs.
440  * @func: The function to run. This must be fast and non-blocking.
441  * @info: An arbitrary pointer to pass to the function.
442  * @wait: If true, wait (atomically) until function has completed
443  *        on other CPUs.
444  *
445  * Returns 0.
446  *
447  * If @wait is true, then returns once @func has returned; otherwise
448  * it returns just before the target cpu calls @func.
449  *
450  * You must not call this function with disabled interrupts or from a
451  * hardware interrupt handler or from a bottom half handler.
452  */
453 int smp_call_function(smp_call_func_t func, void *info, int wait)
454 {
455 	preempt_disable();
456 	smp_call_function_many(cpu_online_mask, func, info, wait);
457 	preempt_enable();
458 
459 	return 0;
460 }
461 EXPORT_SYMBOL(smp_call_function);
462 #endif /* USE_GENERIC_SMP_HELPERS */
463 
464 /* Setup configured maximum number of CPUs to activate */
465 unsigned int setup_max_cpus = NR_CPUS;
466 EXPORT_SYMBOL(setup_max_cpus);
467 
468 
469 /*
470  * Setup routine for controlling SMP activation
471  *
472  * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
473  * activation entirely (the MPS table probe still happens, though).
474  *
475  * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
476  * greater than 0, limits the maximum number of CPUs activated in
477  * SMP mode to <NUM>.
478  */
479 
480 void __weak arch_disable_smp_support(void) { }
481 
482 static int __init nosmp(char *str)
483 {
484 	setup_max_cpus = 0;
485 	arch_disable_smp_support();
486 
487 	return 0;
488 }
489 
490 early_param("nosmp", nosmp);
491 
492 /* this is hard limit */
493 static int __init nrcpus(char *str)
494 {
495 	int nr_cpus;
496 
497 	get_option(&str, &nr_cpus);
498 	if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
499 		nr_cpu_ids = nr_cpus;
500 
501 	return 0;
502 }
503 
504 early_param("nr_cpus", nrcpus);
505 
506 static int __init maxcpus(char *str)
507 {
508 	get_option(&str, &setup_max_cpus);
509 	if (setup_max_cpus == 0)
510 		arch_disable_smp_support();
511 
512 	return 0;
513 }
514 
515 early_param("maxcpus", maxcpus);
516 
517 /* Setup number of possible processor ids */
518 int nr_cpu_ids __read_mostly = NR_CPUS;
519 EXPORT_SYMBOL(nr_cpu_ids);
520 
521 /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
522 void __init setup_nr_cpu_ids(void)
523 {
524 	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
525 }
526 
527 /* Called by boot processor to activate the rest. */
528 void __init smp_init(void)
529 {
530 	unsigned int cpu;
531 
532 	idle_threads_init();
533 
534 	/* FIXME: This should be done in userspace --RR */
535 	for_each_present_cpu(cpu) {
536 		if (num_online_cpus() >= setup_max_cpus)
537 			break;
538 		if (!cpu_online(cpu))
539 			cpu_up(cpu);
540 	}
541 
542 	/* Any cleanup work */
543 	printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
544 	smp_cpus_done(setup_max_cpus);
545 }
546 
547 /*
548  * Call a function on all processors.  May be used during early boot while
549  * early_boot_irqs_disabled is set.  Use local_irq_save/restore() instead
550  * of local_irq_disable/enable().
551  */
552 int on_each_cpu(void (*func) (void *info), void *info, int wait)
553 {
554 	unsigned long flags;
555 	int ret = 0;
556 
557 	preempt_disable();
558 	ret = smp_call_function(func, info, wait);
559 	local_irq_save(flags);
560 	func(info);
561 	local_irq_restore(flags);
562 	preempt_enable();
563 	return ret;
564 }
565 EXPORT_SYMBOL(on_each_cpu);
566 
567 /**
568  * on_each_cpu_mask(): Run a function on processors specified by
569  * cpumask, which may include the local processor.
570  * @mask: The set of cpus to run on (only runs on online subset).
571  * @func: The function to run. This must be fast and non-blocking.
572  * @info: An arbitrary pointer to pass to the function.
573  * @wait: If true, wait (atomically) until function has completed
574  *        on other CPUs.
575  *
576  * If @wait is true, then returns once @func has returned.
577  *
578  * You must not call this function with disabled interrupts or from a
579  * hardware interrupt handler or from a bottom half handler.  The
580  * exception is that it may be used during early boot while
581  * early_boot_irqs_disabled is set.
582  */
583 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
584 			void *info, bool wait)
585 {
586 	int cpu = get_cpu();
587 
588 	smp_call_function_many(mask, func, info, wait);
589 	if (cpumask_test_cpu(cpu, mask)) {
590 		unsigned long flags;
591 		local_irq_save(flags);
592 		func(info);
593 		local_irq_restore(flags);
594 	}
595 	put_cpu();
596 }
597 EXPORT_SYMBOL(on_each_cpu_mask);
598 
599 /*
600  * on_each_cpu_cond(): Call a function on each processor for which
601  * the supplied function cond_func returns true, optionally waiting
602  * for all the required CPUs to finish. This may include the local
603  * processor.
604  * @cond_func:	A callback function that is passed a cpu id and
605  *		the the info parameter. The function is called
606  *		with preemption disabled. The function should
607  *		return a blooean value indicating whether to IPI
608  *		the specified CPU.
609  * @func:	The function to run on all applicable CPUs.
610  *		This must be fast and non-blocking.
611  * @info:	An arbitrary pointer to pass to both functions.
612  * @wait:	If true, wait (atomically) until function has
613  *		completed on other CPUs.
614  * @gfp_flags:	GFP flags to use when allocating the cpumask
615  *		used internally by the function.
616  *
617  * The function might sleep if the GFP flags indicates a non
618  * atomic allocation is allowed.
619  *
620  * Preemption is disabled to protect against CPUs going offline but not online.
621  * CPUs going online during the call will not be seen or sent an IPI.
622  *
623  * You must not call this function with disabled interrupts or
624  * from a hardware interrupt handler or from a bottom half handler.
625  */
626 void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
627 			smp_call_func_t func, void *info, bool wait,
628 			gfp_t gfp_flags)
629 {
630 	cpumask_var_t cpus;
631 	int cpu, ret;
632 
633 	might_sleep_if(gfp_flags & __GFP_WAIT);
634 
635 	if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
636 		preempt_disable();
637 		for_each_online_cpu(cpu)
638 			if (cond_func(cpu, info))
639 				cpumask_set_cpu(cpu, cpus);
640 		on_each_cpu_mask(cpus, func, info, wait);
641 		preempt_enable();
642 		free_cpumask_var(cpus);
643 	} else {
644 		/*
645 		 * No free cpumask, bother. No matter, we'll
646 		 * just have to IPI them one by one.
647 		 */
648 		preempt_disable();
649 		for_each_online_cpu(cpu)
650 			if (cond_func(cpu, info)) {
651 				ret = smp_call_function_single(cpu, func,
652 								info, wait);
653 				WARN_ON_ONCE(!ret);
654 			}
655 		preempt_enable();
656 	}
657 }
658 EXPORT_SYMBOL(on_each_cpu_cond);
659 
660 static void do_nothing(void *unused)
661 {
662 }
663 
664 /**
665  * kick_all_cpus_sync - Force all cpus out of idle
666  *
667  * Used to synchronize the update of pm_idle function pointer. It's
668  * called after the pointer is updated and returns after the dummy
669  * callback function has been executed on all cpus. The execution of
670  * the function can only happen on the remote cpus after they have
671  * left the idle function which had been called via pm_idle function
672  * pointer. So it's guaranteed that nothing uses the previous pointer
673  * anymore.
674  */
675 void kick_all_cpus_sync(void)
676 {
677 	/* Make sure the change is visible before we kick the cpus */
678 	smp_mb();
679 	smp_call_function(do_nothing, NULL, 1);
680 }
681 EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
682