xref: /openbmc/linux/kernel/smp.c (revision a395b8d1)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Generic helpers for smp ipi calls
4   *
5   * (C) Jens Axboe <jens.axboe@oracle.com> 2008
6   */
7  
8  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9  
10  #include <linux/irq_work.h>
11  #include <linux/rcupdate.h>
12  #include <linux/rculist.h>
13  #include <linux/kernel.h>
14  #include <linux/export.h>
15  #include <linux/percpu.h>
16  #include <linux/init.h>
17  #include <linux/interrupt.h>
18  #include <linux/gfp.h>
19  #include <linux/smp.h>
20  #include <linux/cpu.h>
21  #include <linux/sched.h>
22  #include <linux/sched/idle.h>
23  #include <linux/hypervisor.h>
24  #include <linux/sched/clock.h>
25  #include <linux/nmi.h>
26  #include <linux/sched/debug.h>
27  #include <linux/jump_label.h>
28  
29  #include <trace/events/ipi.h>
30  
31  #include "smpboot.h"
32  #include "sched/smp.h"
33  
34  #define CSD_TYPE(_csd)	((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
35  
36  struct call_function_data {
37  	call_single_data_t	__percpu *csd;
38  	cpumask_var_t		cpumask;
39  	cpumask_var_t		cpumask_ipi;
40  };
41  
42  static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
43  
44  static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
45  
46  static void __flush_smp_call_function_queue(bool warn_cpu_offline);
47  
48  int smpcfd_prepare_cpu(unsigned int cpu)
49  {
50  	struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
51  
52  	if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
53  				     cpu_to_node(cpu)))
54  		return -ENOMEM;
55  	if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
56  				     cpu_to_node(cpu))) {
57  		free_cpumask_var(cfd->cpumask);
58  		return -ENOMEM;
59  	}
60  	cfd->csd = alloc_percpu(call_single_data_t);
61  	if (!cfd->csd) {
62  		free_cpumask_var(cfd->cpumask);
63  		free_cpumask_var(cfd->cpumask_ipi);
64  		return -ENOMEM;
65  	}
66  
67  	return 0;
68  }
69  
70  int smpcfd_dead_cpu(unsigned int cpu)
71  {
72  	struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
73  
74  	free_cpumask_var(cfd->cpumask);
75  	free_cpumask_var(cfd->cpumask_ipi);
76  	free_percpu(cfd->csd);
77  	return 0;
78  }
79  
80  int smpcfd_dying_cpu(unsigned int cpu)
81  {
82  	/*
83  	 * The IPIs for the smp-call-function callbacks queued by other
84  	 * CPUs might arrive late, either due to hardware latencies or
85  	 * because this CPU disabled interrupts (inside stop-machine)
86  	 * before the IPIs were sent. So flush out any pending callbacks
87  	 * explicitly (without waiting for the IPIs to arrive), to
88  	 * ensure that the outgoing CPU doesn't go offline with work
89  	 * still pending.
90  	 */
91  	__flush_smp_call_function_queue(false);
92  	irq_work_run();
93  	return 0;
94  }
95  
96  void __init call_function_init(void)
97  {
98  	int i;
99  
100  	for_each_possible_cpu(i)
101  		init_llist_head(&per_cpu(call_single_queue, i));
102  
103  	smpcfd_prepare_cpu(smp_processor_id());
104  }
105  
106  static __always_inline void
107  send_call_function_single_ipi(int cpu)
108  {
109  	if (call_function_single_prep_ipi(cpu)) {
110  		trace_ipi_send_cpu(cpu, _RET_IP_,
111  				   generic_smp_call_function_single_interrupt);
112  		arch_send_call_function_single_ipi(cpu);
113  	}
114  }
115  
116  static __always_inline void
117  send_call_function_ipi_mask(struct cpumask *mask)
118  {
119  	trace_ipi_send_cpumask(mask, _RET_IP_,
120  			       generic_smp_call_function_single_interrupt);
121  	arch_send_call_function_ipi_mask(mask);
122  }
123  
124  #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
125  
126  static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);
127  
128  /*
129   * Parse the csdlock_debug= kernel boot parameter.
130   *
131   * If you need to restore the old "ext" value that once provided
132   * additional debugging information, reapply the following commits:
133   *
134   * de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging")
135   * a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
136   */
137  static int __init csdlock_debug(char *str)
138  {
139  	int ret;
140  	unsigned int val = 0;
141  
142  	ret = get_option(&str, &val);
143  	if (ret) {
144  		if (val)
145  			static_branch_enable(&csdlock_debug_enabled);
146  		else
147  			static_branch_disable(&csdlock_debug_enabled);
148  	}
149  
150  	return 1;
151  }
152  __setup("csdlock_debug=", csdlock_debug);
153  
154  static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
155  static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
156  static DEFINE_PER_CPU(void *, cur_csd_info);
157  
158  static ulong csd_lock_timeout = 5000;  /* CSD lock timeout in milliseconds. */
159  module_param(csd_lock_timeout, ulong, 0444);
160  
161  static atomic_t csd_bug_count = ATOMIC_INIT(0);
162  
163  /* Record current CSD work for current CPU, NULL to erase. */
164  static void __csd_lock_record(struct __call_single_data *csd)
165  {
166  	if (!csd) {
167  		smp_mb(); /* NULL cur_csd after unlock. */
168  		__this_cpu_write(cur_csd, NULL);
169  		return;
170  	}
171  	__this_cpu_write(cur_csd_func, csd->func);
172  	__this_cpu_write(cur_csd_info, csd->info);
173  	smp_wmb(); /* func and info before csd. */
174  	__this_cpu_write(cur_csd, csd);
175  	smp_mb(); /* Update cur_csd before function call. */
176  		  /* Or before unlock, as the case may be. */
177  }
178  
179  static __always_inline void csd_lock_record(struct __call_single_data *csd)
180  {
181  	if (static_branch_unlikely(&csdlock_debug_enabled))
182  		__csd_lock_record(csd);
183  }
184  
185  static int csd_lock_wait_getcpu(struct __call_single_data *csd)
186  {
187  	unsigned int csd_type;
188  
189  	csd_type = CSD_TYPE(csd);
190  	if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
191  		return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */
192  	return -1;
193  }
194  
195  /*
196   * Complain if too much time spent waiting.  Note that only
197   * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
198   * so waiting on other types gets much less information.
199   */
200  static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *ts1, int *bug_id)
201  {
202  	int cpu = -1;
203  	int cpux;
204  	bool firsttime;
205  	u64 ts2, ts_delta;
206  	call_single_data_t *cpu_cur_csd;
207  	unsigned int flags = READ_ONCE(csd->node.u_flags);
208  	unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;
209  
210  	if (!(flags & CSD_FLAG_LOCK)) {
211  		if (!unlikely(*bug_id))
212  			return true;
213  		cpu = csd_lock_wait_getcpu(csd);
214  		pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
215  			 *bug_id, raw_smp_processor_id(), cpu);
216  		return true;
217  	}
218  
219  	ts2 = sched_clock();
220  	ts_delta = ts2 - *ts1;
221  	if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
222  		return false;
223  
224  	firsttime = !*bug_id;
225  	if (firsttime)
226  		*bug_id = atomic_inc_return(&csd_bug_count);
227  	cpu = csd_lock_wait_getcpu(csd);
228  	if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
229  		cpux = 0;
230  	else
231  		cpux = cpu;
232  	cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
233  	pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
234  		 firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0,
235  		 cpu, csd->func, csd->info);
236  	if (cpu_cur_csd && csd != cpu_cur_csd) {
237  		pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
238  			 *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
239  			 READ_ONCE(per_cpu(cur_csd_info, cpux)));
240  	} else {
241  		pr_alert("\tcsd: CSD lock (#%d) %s.\n",
242  			 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
243  	}
244  	if (cpu >= 0) {
245  		dump_cpu_task(cpu);
246  		if (!cpu_cur_csd) {
247  			pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
248  			arch_send_call_function_single_ipi(cpu);
249  		}
250  	}
251  	dump_stack();
252  	*ts1 = ts2;
253  
254  	return false;
255  }
256  
257  /*
258   * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
259   *
260   * For non-synchronous ipi calls the csd can still be in use by the
261   * previous function call. For multi-cpu calls its even more interesting
262   * as we'll have to ensure no other cpu is observing our csd.
263   */
264  static void __csd_lock_wait(struct __call_single_data *csd)
265  {
266  	int bug_id = 0;
267  	u64 ts0, ts1;
268  
269  	ts1 = ts0 = sched_clock();
270  	for (;;) {
271  		if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
272  			break;
273  		cpu_relax();
274  	}
275  	smp_acquire__after_ctrl_dep();
276  }
277  
278  static __always_inline void csd_lock_wait(struct __call_single_data *csd)
279  {
280  	if (static_branch_unlikely(&csdlock_debug_enabled)) {
281  		__csd_lock_wait(csd);
282  		return;
283  	}
284  
285  	smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
286  }
287  #else
288  static void csd_lock_record(struct __call_single_data *csd)
289  {
290  }
291  
292  static __always_inline void csd_lock_wait(struct __call_single_data *csd)
293  {
294  	smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
295  }
296  #endif
297  
298  static __always_inline void csd_lock(struct __call_single_data *csd)
299  {
300  	csd_lock_wait(csd);
301  	csd->node.u_flags |= CSD_FLAG_LOCK;
302  
303  	/*
304  	 * prevent CPU from reordering the above assignment
305  	 * to ->flags with any subsequent assignments to other
306  	 * fields of the specified call_single_data_t structure:
307  	 */
308  	smp_wmb();
309  }
310  
311  static __always_inline void csd_unlock(struct __call_single_data *csd)
312  {
313  	WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
314  
315  	/*
316  	 * ensure we're all done before releasing data:
317  	 */
318  	smp_store_release(&csd->node.u_flags, 0);
319  }
320  
321  static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
322  
323  void __smp_call_single_queue(int cpu, struct llist_node *node)
324  {
325  	/*
326  	 * We have to check the type of the CSD before queueing it, because
327  	 * once queued it can have its flags cleared by
328  	 *   flush_smp_call_function_queue()
329  	 * even if we haven't sent the smp_call IPI yet (e.g. the stopper
330  	 * executes migration_cpu_stop() on the remote CPU).
331  	 */
332  	if (trace_ipi_send_cpu_enabled()) {
333  		call_single_data_t *csd;
334  		smp_call_func_t func;
335  
336  		csd = container_of(node, call_single_data_t, node.llist);
337  		func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
338  			sched_ttwu_pending : csd->func;
339  
340  		trace_ipi_send_cpu(cpu, _RET_IP_, func);
341  	}
342  
343  	/*
344  	 * The list addition should be visible to the target CPU when it pops
345  	 * the head of the list to pull the entry off it in the IPI handler
346  	 * because of normal cache coherency rules implied by the underlying
347  	 * llist ops.
348  	 *
349  	 * If IPIs can go out of order to the cache coherency protocol
350  	 * in an architecture, sufficient synchronisation should be added
351  	 * to arch code to make it appear to obey cache coherency WRT
352  	 * locking and barrier primitives. Generic code isn't really
353  	 * equipped to do the right thing...
354  	 */
355  	if (llist_add(node, &per_cpu(call_single_queue, cpu)))
356  		send_call_function_single_ipi(cpu);
357  }
358  
359  /*
360   * Insert a previously allocated call_single_data_t element
361   * for execution on the given CPU. data must already have
362   * ->func, ->info, and ->flags set.
363   */
364  static int generic_exec_single(int cpu, struct __call_single_data *csd)
365  {
366  	if (cpu == smp_processor_id()) {
367  		smp_call_func_t func = csd->func;
368  		void *info = csd->info;
369  		unsigned long flags;
370  
371  		/*
372  		 * We can unlock early even for the synchronous on-stack case,
373  		 * since we're doing this from the same CPU..
374  		 */
375  		csd_lock_record(csd);
376  		csd_unlock(csd);
377  		local_irq_save(flags);
378  		func(info);
379  		csd_lock_record(NULL);
380  		local_irq_restore(flags);
381  		return 0;
382  	}
383  
384  	if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
385  		csd_unlock(csd);
386  		return -ENXIO;
387  	}
388  
389  	__smp_call_single_queue(cpu, &csd->node.llist);
390  
391  	return 0;
392  }
393  
394  /**
395   * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
396   *
397   * Invoked by arch to handle an IPI for call function single.
398   * Must be called with interrupts disabled.
399   */
400  void generic_smp_call_function_single_interrupt(void)
401  {
402  	__flush_smp_call_function_queue(true);
403  }
404  
405  /**
406   * __flush_smp_call_function_queue - Flush pending smp-call-function callbacks
407   *
408   * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
409   *		      offline CPU. Skip this check if set to 'false'.
410   *
411   * Flush any pending smp-call-function callbacks queued on this CPU. This is
412   * invoked by the generic IPI handler, as well as by a CPU about to go offline,
413   * to ensure that all pending IPI callbacks are run before it goes completely
414   * offline.
415   *
416   * Loop through the call_single_queue and run all the queued callbacks.
417   * Must be called with interrupts disabled.
418   */
419  static void __flush_smp_call_function_queue(bool warn_cpu_offline)
420  {
421  	call_single_data_t *csd, *csd_next;
422  	struct llist_node *entry, *prev;
423  	struct llist_head *head;
424  	static bool warned;
425  
426  	lockdep_assert_irqs_disabled();
427  
428  	head = this_cpu_ptr(&call_single_queue);
429  	entry = llist_del_all(head);
430  	entry = llist_reverse_order(entry);
431  
432  	/* There shouldn't be any pending callbacks on an offline CPU. */
433  	if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
434  		     !warned && entry != NULL)) {
435  		warned = true;
436  		WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
437  
438  		/*
439  		 * We don't have to use the _safe() variant here
440  		 * because we are not invoking the IPI handlers yet.
441  		 */
442  		llist_for_each_entry(csd, entry, node.llist) {
443  			switch (CSD_TYPE(csd)) {
444  			case CSD_TYPE_ASYNC:
445  			case CSD_TYPE_SYNC:
446  			case CSD_TYPE_IRQ_WORK:
447  				pr_warn("IPI callback %pS sent to offline CPU\n",
448  					csd->func);
449  				break;
450  
451  			case CSD_TYPE_TTWU:
452  				pr_warn("IPI task-wakeup sent to offline CPU\n");
453  				break;
454  
455  			default:
456  				pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
457  					CSD_TYPE(csd));
458  				break;
459  			}
460  		}
461  	}
462  
463  	/*
464  	 * First; run all SYNC callbacks, people are waiting for us.
465  	 */
466  	prev = NULL;
467  	llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
468  		/* Do we wait until *after* callback? */
469  		if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
470  			smp_call_func_t func = csd->func;
471  			void *info = csd->info;
472  
473  			if (prev) {
474  				prev->next = &csd_next->node.llist;
475  			} else {
476  				entry = &csd_next->node.llist;
477  			}
478  
479  			csd_lock_record(csd);
480  			func(info);
481  			csd_unlock(csd);
482  			csd_lock_record(NULL);
483  		} else {
484  			prev = &csd->node.llist;
485  		}
486  	}
487  
488  	if (!entry)
489  		return;
490  
491  	/*
492  	 * Second; run all !SYNC callbacks.
493  	 */
494  	prev = NULL;
495  	llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
496  		int type = CSD_TYPE(csd);
497  
498  		if (type != CSD_TYPE_TTWU) {
499  			if (prev) {
500  				prev->next = &csd_next->node.llist;
501  			} else {
502  				entry = &csd_next->node.llist;
503  			}
504  
505  			if (type == CSD_TYPE_ASYNC) {
506  				smp_call_func_t func = csd->func;
507  				void *info = csd->info;
508  
509  				csd_lock_record(csd);
510  				csd_unlock(csd);
511  				func(info);
512  				csd_lock_record(NULL);
513  			} else if (type == CSD_TYPE_IRQ_WORK) {
514  				irq_work_single(csd);
515  			}
516  
517  		} else {
518  			prev = &csd->node.llist;
519  		}
520  	}
521  
522  	/*
523  	 * Third; only CSD_TYPE_TTWU is left, issue those.
524  	 */
525  	if (entry)
526  		sched_ttwu_pending(entry);
527  }
528  
529  
530  /**
531   * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
532   *				   from task context (idle, migration thread)
533   *
534   * When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it
535   * set, then remote CPUs can avoid sending IPIs and wake the idle CPU by
536   * setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to
537   * handle queued SMP function calls before scheduling.
538   *
539   * The migration thread has to ensure that an eventually pending wakeup has
540   * been handled before it migrates a task.
541   */
542  void flush_smp_call_function_queue(void)
543  {
544  	unsigned int was_pending;
545  	unsigned long flags;
546  
547  	if (llist_empty(this_cpu_ptr(&call_single_queue)))
548  		return;
549  
550  	local_irq_save(flags);
551  	/* Get the already pending soft interrupts for RT enabled kernels */
552  	was_pending = local_softirq_pending();
553  	__flush_smp_call_function_queue(true);
554  	if (local_softirq_pending())
555  		do_softirq_post_smp_call_flush(was_pending);
556  
557  	local_irq_restore(flags);
558  }
559  
560  /*
561   * smp_call_function_single - Run a function on a specific CPU
562   * @func: The function to run. This must be fast and non-blocking.
563   * @info: An arbitrary pointer to pass to the function.
564   * @wait: If true, wait until function has completed on other CPUs.
565   *
566   * Returns 0 on success, else a negative status code.
567   */
568  int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
569  			     int wait)
570  {
571  	call_single_data_t *csd;
572  	call_single_data_t csd_stack = {
573  		.node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, },
574  	};
575  	int this_cpu;
576  	int err;
577  
578  	/*
579  	 * prevent preemption and reschedule on another processor,
580  	 * as well as CPU removal
581  	 */
582  	this_cpu = get_cpu();
583  
584  	/*
585  	 * Can deadlock when called with interrupts disabled.
586  	 * We allow cpu's that are not yet online though, as no one else can
587  	 * send smp call function interrupt to this cpu and as such deadlocks
588  	 * can't happen.
589  	 */
590  	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
591  		     && !oops_in_progress);
592  
593  	/*
594  	 * When @wait we can deadlock when we interrupt between llist_add() and
595  	 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
596  	 * csd_lock() on because the interrupt context uses the same csd
597  	 * storage.
598  	 */
599  	WARN_ON_ONCE(!in_task());
600  
601  	csd = &csd_stack;
602  	if (!wait) {
603  		csd = this_cpu_ptr(&csd_data);
604  		csd_lock(csd);
605  	}
606  
607  	csd->func = func;
608  	csd->info = info;
609  #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
610  	csd->node.src = smp_processor_id();
611  	csd->node.dst = cpu;
612  #endif
613  
614  	err = generic_exec_single(cpu, csd);
615  
616  	if (wait)
617  		csd_lock_wait(csd);
618  
619  	put_cpu();
620  
621  	return err;
622  }
623  EXPORT_SYMBOL(smp_call_function_single);
624  
625  /**
626   * smp_call_function_single_async() - Run an asynchronous function on a
627   * 			         specific CPU.
628   * @cpu: The CPU to run on.
629   * @csd: Pre-allocated and setup data structure
630   *
631   * Like smp_call_function_single(), but the call is asynchonous and
632   * can thus be done from contexts with disabled interrupts.
633   *
634   * The caller passes his own pre-allocated data structure
635   * (ie: embedded in an object) and is responsible for synchronizing it
636   * such that the IPIs performed on the @csd are strictly serialized.
637   *
638   * If the function is called with one csd which has not yet been
639   * processed by previous call to smp_call_function_single_async(), the
640   * function will return immediately with -EBUSY showing that the csd
641   * object is still in progress.
642   *
643   * NOTE: Be careful, there is unfortunately no current debugging facility to
644   * validate the correctness of this serialization.
645   *
646   * Return: %0 on success or negative errno value on error
647   */
648  int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
649  {
650  	int err = 0;
651  
652  	preempt_disable();
653  
654  	if (csd->node.u_flags & CSD_FLAG_LOCK) {
655  		err = -EBUSY;
656  		goto out;
657  	}
658  
659  	csd->node.u_flags = CSD_FLAG_LOCK;
660  	smp_wmb();
661  
662  	err = generic_exec_single(cpu, csd);
663  
664  out:
665  	preempt_enable();
666  
667  	return err;
668  }
669  EXPORT_SYMBOL_GPL(smp_call_function_single_async);
670  
671  /*
672   * smp_call_function_any - Run a function on any of the given cpus
673   * @mask: The mask of cpus it can run on.
674   * @func: The function to run. This must be fast and non-blocking.
675   * @info: An arbitrary pointer to pass to the function.
676   * @wait: If true, wait until function has completed.
677   *
678   * Returns 0 on success, else a negative status code (if no cpus were online).
679   *
680   * Selection preference:
681   *	1) current cpu if in @mask
682   *	2) any cpu of current node if in @mask
683   *	3) any other online cpu in @mask
684   */
685  int smp_call_function_any(const struct cpumask *mask,
686  			  smp_call_func_t func, void *info, int wait)
687  {
688  	unsigned int cpu;
689  	const struct cpumask *nodemask;
690  	int ret;
691  
692  	/* Try for same CPU (cheapest) */
693  	cpu = get_cpu();
694  	if (cpumask_test_cpu(cpu, mask))
695  		goto call;
696  
697  	/* Try for same node. */
698  	nodemask = cpumask_of_node(cpu_to_node(cpu));
699  	for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
700  	     cpu = cpumask_next_and(cpu, nodemask, mask)) {
701  		if (cpu_online(cpu))
702  			goto call;
703  	}
704  
705  	/* Any online will do: smp_call_function_single handles nr_cpu_ids. */
706  	cpu = cpumask_any_and(mask, cpu_online_mask);
707  call:
708  	ret = smp_call_function_single(cpu, func, info, wait);
709  	put_cpu();
710  	return ret;
711  }
712  EXPORT_SYMBOL_GPL(smp_call_function_any);
713  
714  /*
715   * Flags to be used as scf_flags argument of smp_call_function_many_cond().
716   *
717   * %SCF_WAIT:		Wait until function execution is completed
718   * %SCF_RUN_LOCAL:	Run also locally if local cpu is set in cpumask
719   */
720  #define SCF_WAIT	(1U << 0)
721  #define SCF_RUN_LOCAL	(1U << 1)
722  
723  static void smp_call_function_many_cond(const struct cpumask *mask,
724  					smp_call_func_t func, void *info,
725  					unsigned int scf_flags,
726  					smp_cond_func_t cond_func)
727  {
728  	int cpu, last_cpu, this_cpu = smp_processor_id();
729  	struct call_function_data *cfd;
730  	bool wait = scf_flags & SCF_WAIT;
731  	int nr_cpus = 0, nr_queued = 0;
732  	bool run_remote = false;
733  	bool run_local = false;
734  
735  	lockdep_assert_preemption_disabled();
736  
737  	/*
738  	 * Can deadlock when called with interrupts disabled.
739  	 * We allow cpu's that are not yet online though, as no one else can
740  	 * send smp call function interrupt to this cpu and as such deadlocks
741  	 * can't happen.
742  	 */
743  	if (cpu_online(this_cpu) && !oops_in_progress &&
744  	    !early_boot_irqs_disabled)
745  		lockdep_assert_irqs_enabled();
746  
747  	/*
748  	 * When @wait we can deadlock when we interrupt between llist_add() and
749  	 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
750  	 * csd_lock() on because the interrupt context uses the same csd
751  	 * storage.
752  	 */
753  	WARN_ON_ONCE(!in_task());
754  
755  	/* Check if we need local execution. */
756  	if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
757  		run_local = true;
758  
759  	/* Check if we need remote execution, i.e., any CPU excluding this one. */
760  	cpu = cpumask_first_and(mask, cpu_online_mask);
761  	if (cpu == this_cpu)
762  		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
763  	if (cpu < nr_cpu_ids)
764  		run_remote = true;
765  
766  	if (run_remote) {
767  		cfd = this_cpu_ptr(&cfd_data);
768  		cpumask_and(cfd->cpumask, mask, cpu_online_mask);
769  		__cpumask_clear_cpu(this_cpu, cfd->cpumask);
770  
771  		cpumask_clear(cfd->cpumask_ipi);
772  		for_each_cpu(cpu, cfd->cpumask) {
773  			call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
774  
775  			if (cond_func && !cond_func(cpu, info)) {
776  				__cpumask_clear_cpu(cpu, cfd->cpumask);
777  				continue;
778  			}
779  
780  			csd_lock(csd);
781  			if (wait)
782  				csd->node.u_flags |= CSD_TYPE_SYNC;
783  			csd->func = func;
784  			csd->info = info;
785  #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
786  			csd->node.src = smp_processor_id();
787  			csd->node.dst = cpu;
788  #endif
789  			if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
790  				__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
791  				nr_cpus++;
792  				last_cpu = cpu;
793  			}
794  			nr_queued++;
795  		}
796  
797  		/*
798  		 * Trace each smp_function_call_*() as an IPI, actual IPIs
799  		 * will be traced with func==generic_smp_call_function_single_ipi().
800  		 */
801  		if (nr_queued)
802  			trace_ipi_send_cpumask(cfd->cpumask, _RET_IP_, func);
803  
804  		/*
805  		 * Choose the most efficient way to send an IPI. Note that the
806  		 * number of CPUs might be zero due to concurrent changes to the
807  		 * provided mask.
808  		 */
809  		if (nr_cpus == 1)
810  			send_call_function_single_ipi(last_cpu);
811  		else if (likely(nr_cpus > 1))
812  			send_call_function_ipi_mask(cfd->cpumask_ipi);
813  	}
814  
815  	if (run_local && (!cond_func || cond_func(this_cpu, info))) {
816  		unsigned long flags;
817  
818  		local_irq_save(flags);
819  		func(info);
820  		local_irq_restore(flags);
821  	}
822  
823  	if (run_remote && wait) {
824  		for_each_cpu(cpu, cfd->cpumask) {
825  			call_single_data_t *csd;
826  
827  			csd = per_cpu_ptr(cfd->csd, cpu);
828  			csd_lock_wait(csd);
829  		}
830  	}
831  }
832  
833  /**
834   * smp_call_function_many(): Run a function on a set of CPUs.
835   * @mask: The set of cpus to run on (only runs on online subset).
836   * @func: The function to run. This must be fast and non-blocking.
837   * @info: An arbitrary pointer to pass to the function.
838   * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait
839   *        (atomically) until function has completed on other CPUs. If
840   *        %SCF_RUN_LOCAL is set, the function will also be run locally
841   *        if the local CPU is set in the @cpumask.
842   *
843   * If @wait is true, then returns once @func has returned.
844   *
845   * You must not call this function with disabled interrupts or from a
846   * hardware interrupt handler or from a bottom half handler. Preemption
847   * must be disabled when calling this function.
848   */
849  void smp_call_function_many(const struct cpumask *mask,
850  			    smp_call_func_t func, void *info, bool wait)
851  {
852  	smp_call_function_many_cond(mask, func, info, wait * SCF_WAIT, NULL);
853  }
854  EXPORT_SYMBOL(smp_call_function_many);
855  
856  /**
857   * smp_call_function(): Run a function on all other CPUs.
858   * @func: The function to run. This must be fast and non-blocking.
859   * @info: An arbitrary pointer to pass to the function.
860   * @wait: If true, wait (atomically) until function has completed
861   *        on other CPUs.
862   *
863   * Returns 0.
864   *
865   * If @wait is true, then returns once @func has returned; otherwise
866   * it returns just before the target cpu calls @func.
867   *
868   * You must not call this function with disabled interrupts or from a
869   * hardware interrupt handler or from a bottom half handler.
870   */
871  void smp_call_function(smp_call_func_t func, void *info, int wait)
872  {
873  	preempt_disable();
874  	smp_call_function_many(cpu_online_mask, func, info, wait);
875  	preempt_enable();
876  }
877  EXPORT_SYMBOL(smp_call_function);
878  
879  /* Setup configured maximum number of CPUs to activate */
880  unsigned int setup_max_cpus = NR_CPUS;
881  EXPORT_SYMBOL(setup_max_cpus);
882  
883  
884  /*
885   * Setup routine for controlling SMP activation
886   *
887   * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
888   * activation entirely (the MPS table probe still happens, though).
889   *
890   * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
891   * greater than 0, limits the maximum number of CPUs activated in
892   * SMP mode to <NUM>.
893   */
894  
895  void __weak arch_disable_smp_support(void) { }
896  
897  static int __init nosmp(char *str)
898  {
899  	setup_max_cpus = 0;
900  	arch_disable_smp_support();
901  
902  	return 0;
903  }
904  
905  early_param("nosmp", nosmp);
906  
907  /* this is hard limit */
908  static int __init nrcpus(char *str)
909  {
910  	int nr_cpus;
911  
912  	if (get_option(&str, &nr_cpus) && nr_cpus > 0 && nr_cpus < nr_cpu_ids)
913  		set_nr_cpu_ids(nr_cpus);
914  
915  	return 0;
916  }
917  
918  early_param("nr_cpus", nrcpus);
919  
920  static int __init maxcpus(char *str)
921  {
922  	get_option(&str, &setup_max_cpus);
923  	if (setup_max_cpus == 0)
924  		arch_disable_smp_support();
925  
926  	return 0;
927  }
928  
929  early_param("maxcpus", maxcpus);
930  
931  #if (NR_CPUS > 1) && !defined(CONFIG_FORCE_NR_CPUS)
932  /* Setup number of possible processor ids */
933  unsigned int nr_cpu_ids __read_mostly = NR_CPUS;
934  EXPORT_SYMBOL(nr_cpu_ids);
935  #endif
936  
937  /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
938  void __init setup_nr_cpu_ids(void)
939  {
940  	set_nr_cpu_ids(find_last_bit(cpumask_bits(cpu_possible_mask), NR_CPUS) + 1);
941  }
942  
943  /* Called by boot processor to activate the rest. */
944  void __init smp_init(void)
945  {
946  	int num_nodes, num_cpus;
947  
948  	idle_threads_init();
949  	cpuhp_threads_init();
950  
951  	pr_info("Bringing up secondary CPUs ...\n");
952  
953  	bringup_nonboot_cpus(setup_max_cpus);
954  
955  	num_nodes = num_online_nodes();
956  	num_cpus  = num_online_cpus();
957  	pr_info("Brought up %d node%s, %d CPU%s\n",
958  		num_nodes, (num_nodes > 1 ? "s" : ""),
959  		num_cpus,  (num_cpus  > 1 ? "s" : ""));
960  
961  	/* Any cleanup work */
962  	smp_cpus_done(setup_max_cpus);
963  }
964  
965  /*
966   * on_each_cpu_cond(): Call a function on each processor for which
967   * the supplied function cond_func returns true, optionally waiting
968   * for all the required CPUs to finish. This may include the local
969   * processor.
970   * @cond_func:	A callback function that is passed a cpu id and
971   *		the info parameter. The function is called
972   *		with preemption disabled. The function should
973   *		return a blooean value indicating whether to IPI
974   *		the specified CPU.
975   * @func:	The function to run on all applicable CPUs.
976   *		This must be fast and non-blocking.
977   * @info:	An arbitrary pointer to pass to both functions.
978   * @wait:	If true, wait (atomically) until function has
979   *		completed on other CPUs.
980   *
981   * Preemption is disabled to protect against CPUs going offline but not online.
982   * CPUs going online during the call will not be seen or sent an IPI.
983   *
984   * You must not call this function with disabled interrupts or
985   * from a hardware interrupt handler or from a bottom half handler.
986   */
987  void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
988  			   void *info, bool wait, const struct cpumask *mask)
989  {
990  	unsigned int scf_flags = SCF_RUN_LOCAL;
991  
992  	if (wait)
993  		scf_flags |= SCF_WAIT;
994  
995  	preempt_disable();
996  	smp_call_function_many_cond(mask, func, info, scf_flags, cond_func);
997  	preempt_enable();
998  }
999  EXPORT_SYMBOL(on_each_cpu_cond_mask);
1000  
1001  static void do_nothing(void *unused)
1002  {
1003  }
1004  
1005  /**
1006   * kick_all_cpus_sync - Force all cpus out of idle
1007   *
1008   * Used to synchronize the update of pm_idle function pointer. It's
1009   * called after the pointer is updated and returns after the dummy
1010   * callback function has been executed on all cpus. The execution of
1011   * the function can only happen on the remote cpus after they have
1012   * left the idle function which had been called via pm_idle function
1013   * pointer. So it's guaranteed that nothing uses the previous pointer
1014   * anymore.
1015   */
1016  void kick_all_cpus_sync(void)
1017  {
1018  	/* Make sure the change is visible before we kick the cpus */
1019  	smp_mb();
1020  	smp_call_function(do_nothing, NULL, 1);
1021  }
1022  EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
1023  
1024  /**
1025   * wake_up_all_idle_cpus - break all cpus out of idle
1026   * wake_up_all_idle_cpus try to break all cpus which is in idle state even
1027   * including idle polling cpus, for non-idle cpus, we will do nothing
1028   * for them.
1029   */
1030  void wake_up_all_idle_cpus(void)
1031  {
1032  	int cpu;
1033  
1034  	for_each_possible_cpu(cpu) {
1035  		preempt_disable();
1036  		if (cpu != smp_processor_id() && cpu_online(cpu))
1037  			wake_up_if_idle(cpu);
1038  		preempt_enable();
1039  	}
1040  }
1041  EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
1042  
1043  /**
1044   * struct smp_call_on_cpu_struct - Call a function on a specific CPU
1045   * @work: &work_struct
1046   * @done: &completion to signal
1047   * @func: function to call
1048   * @data: function's data argument
1049   * @ret: return value from @func
1050   * @cpu: target CPU (%-1 for any CPU)
1051   *
1052   * Used to call a function on a specific cpu and wait for it to return.
1053   * Optionally make sure the call is done on a specified physical cpu via vcpu
1054   * pinning in order to support virtualized environments.
1055   */
1056  struct smp_call_on_cpu_struct {
1057  	struct work_struct	work;
1058  	struct completion	done;
1059  	int			(*func)(void *);
1060  	void			*data;
1061  	int			ret;
1062  	int			cpu;
1063  };
1064  
1065  static void smp_call_on_cpu_callback(struct work_struct *work)
1066  {
1067  	struct smp_call_on_cpu_struct *sscs;
1068  
1069  	sscs = container_of(work, struct smp_call_on_cpu_struct, work);
1070  	if (sscs->cpu >= 0)
1071  		hypervisor_pin_vcpu(sscs->cpu);
1072  	sscs->ret = sscs->func(sscs->data);
1073  	if (sscs->cpu >= 0)
1074  		hypervisor_pin_vcpu(-1);
1075  
1076  	complete(&sscs->done);
1077  }
1078  
1079  int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
1080  {
1081  	struct smp_call_on_cpu_struct sscs = {
1082  		.done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
1083  		.func = func,
1084  		.data = par,
1085  		.cpu  = phys ? cpu : -1,
1086  	};
1087  
1088  	INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
1089  
1090  	if (cpu >= nr_cpu_ids || !cpu_online(cpu))
1091  		return -ENXIO;
1092  
1093  	queue_work_on(cpu, system_wq, &sscs.work);
1094  	wait_for_completion(&sscs.done);
1095  
1096  	return sscs.ret;
1097  }
1098  EXPORT_SYMBOL_GPL(smp_call_on_cpu);
1099