xref: /openbmc/linux/kernel/cpu.c (revision 93f5715e)
1 /* CPU control.
2  * (C) 2001, 2002, 2003, 2004 Rusty Russell
3  *
4  * This code is licenced under the GPL.
5  */
6 #include <linux/proc_fs.h>
7 #include <linux/smp.h>
8 #include <linux/init.h>
9 #include <linux/notifier.h>
10 #include <linux/sched/signal.h>
11 #include <linux/sched/hotplug.h>
12 #include <linux/sched/task.h>
13 #include <linux/unistd.h>
14 #include <linux/cpu.h>
15 #include <linux/oom.h>
16 #include <linux/rcupdate.h>
17 #include <linux/export.h>
18 #include <linux/bug.h>
19 #include <linux/kthread.h>
20 #include <linux/stop_machine.h>
21 #include <linux/mutex.h>
22 #include <linux/gfp.h>
23 #include <linux/suspend.h>
24 #include <linux/lockdep.h>
25 #include <linux/tick.h>
26 #include <linux/irq.h>
27 #include <linux/nmi.h>
28 #include <linux/smpboot.h>
29 #include <linux/relay.h>
30 #include <linux/slab.h>
31 #include <linux/percpu-rwsem.h>
32 
33 #include <trace/events/power.h>
34 #define CREATE_TRACE_POINTS
35 #include <trace/events/cpuhp.h>
36 
37 #include "smpboot.h"
38 
39 /**
40  * cpuhp_cpu_state - Per cpu hotplug state storage
41  * @state:	The current cpu state
42  * @target:	The target state
43  * @thread:	Pointer to the hotplug thread
44  * @should_run:	Thread should execute
45  * @rollback:	Perform a rollback
46  * @single:	Single callback invocation
47  * @bringup:	Single callback bringup or teardown selector
48  * @cb_state:	The state for a single callback (install/uninstall)
49  * @result:	Result of the operation
50  * @done_up:	Signal completion to the issuer of the task for cpu-up
51  * @done_down:	Signal completion to the issuer of the task for cpu-down
52  */
53 struct cpuhp_cpu_state {
54 	enum cpuhp_state	state;
55 	enum cpuhp_state	target;
56 	enum cpuhp_state	fail;
57 #ifdef CONFIG_SMP
58 	struct task_struct	*thread;
59 	bool			should_run;
60 	bool			rollback;
61 	bool			single;
62 	bool			bringup;
63 	bool			booted_once;
64 	struct hlist_node	*node;
65 	struct hlist_node	*last;
66 	enum cpuhp_state	cb_state;
67 	int			result;
68 	struct completion	done_up;
69 	struct completion	done_down;
70 #endif
71 };
72 
73 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
74 	.fail = CPUHP_INVALID,
75 };
76 
77 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
78 static struct lockdep_map cpuhp_state_up_map =
79 	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
80 static struct lockdep_map cpuhp_state_down_map =
81 	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
82 
83 
84 static inline void cpuhp_lock_acquire(bool bringup)
85 {
86 	lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
87 }
88 
89 static inline void cpuhp_lock_release(bool bringup)
90 {
91 	lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
92 }
93 #else
94 
95 static inline void cpuhp_lock_acquire(bool bringup) { }
96 static inline void cpuhp_lock_release(bool bringup) { }
97 
98 #endif
99 
100 /**
101  * cpuhp_step - Hotplug state machine step
102  * @name:	Name of the step
103  * @startup:	Startup function of the step
104  * @teardown:	Teardown function of the step
105  * @skip_onerr:	Do not invoke the functions on error rollback
106  *		Will go away once the notifiers	are gone
107  * @cant_stop:	Bringup/teardown can't be stopped at this step
108  */
109 struct cpuhp_step {
110 	const char		*name;
111 	union {
112 		int		(*single)(unsigned int cpu);
113 		int		(*multi)(unsigned int cpu,
114 					 struct hlist_node *node);
115 	} startup;
116 	union {
117 		int		(*single)(unsigned int cpu);
118 		int		(*multi)(unsigned int cpu,
119 					 struct hlist_node *node);
120 	} teardown;
121 	struct hlist_head	list;
122 	bool			skip_onerr;
123 	bool			cant_stop;
124 	bool			multi_instance;
125 };
126 
127 static DEFINE_MUTEX(cpuhp_state_mutex);
128 static struct cpuhp_step cpuhp_hp_states[];
129 
130 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
131 {
132 	return cpuhp_hp_states + state;
133 }
134 
135 /**
136  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
137  * @cpu:	The cpu for which the callback should be invoked
138  * @state:	The state to do callbacks for
139  * @bringup:	True if the bringup callback should be invoked
140  * @node:	For multi-instance, do a single entry callback for install/remove
141  * @lastp:	For multi-instance rollback, remember how far we got
142  *
143  * Called from cpu hotplug and from the state register machinery.
144  */
145 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
146 				 bool bringup, struct hlist_node *node,
147 				 struct hlist_node **lastp)
148 {
149 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
150 	struct cpuhp_step *step = cpuhp_get_step(state);
151 	int (*cbm)(unsigned int cpu, struct hlist_node *node);
152 	int (*cb)(unsigned int cpu);
153 	int ret, cnt;
154 
155 	if (st->fail == state) {
156 		st->fail = CPUHP_INVALID;
157 
158 		if (!(bringup ? step->startup.single : step->teardown.single))
159 			return 0;
160 
161 		return -EAGAIN;
162 	}
163 
164 	if (!step->multi_instance) {
165 		WARN_ON_ONCE(lastp && *lastp);
166 		cb = bringup ? step->startup.single : step->teardown.single;
167 		if (!cb)
168 			return 0;
169 		trace_cpuhp_enter(cpu, st->target, state, cb);
170 		ret = cb(cpu);
171 		trace_cpuhp_exit(cpu, st->state, state, ret);
172 		return ret;
173 	}
174 	cbm = bringup ? step->startup.multi : step->teardown.multi;
175 	if (!cbm)
176 		return 0;
177 
178 	/* Single invocation for instance add/remove */
179 	if (node) {
180 		WARN_ON_ONCE(lastp && *lastp);
181 		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
182 		ret = cbm(cpu, node);
183 		trace_cpuhp_exit(cpu, st->state, state, ret);
184 		return ret;
185 	}
186 
187 	/* State transition. Invoke on all instances */
188 	cnt = 0;
189 	hlist_for_each(node, &step->list) {
190 		if (lastp && node == *lastp)
191 			break;
192 
193 		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
194 		ret = cbm(cpu, node);
195 		trace_cpuhp_exit(cpu, st->state, state, ret);
196 		if (ret) {
197 			if (!lastp)
198 				goto err;
199 
200 			*lastp = node;
201 			return ret;
202 		}
203 		cnt++;
204 	}
205 	if (lastp)
206 		*lastp = NULL;
207 	return 0;
208 err:
209 	/* Rollback the instances if one failed */
210 	cbm = !bringup ? step->startup.multi : step->teardown.multi;
211 	if (!cbm)
212 		return ret;
213 
214 	hlist_for_each(node, &step->list) {
215 		if (!cnt--)
216 			break;
217 
218 		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
219 		ret = cbm(cpu, node);
220 		trace_cpuhp_exit(cpu, st->state, state, ret);
221 		/*
222 		 * Rollback must not fail,
223 		 */
224 		WARN_ON_ONCE(ret);
225 	}
226 	return ret;
227 }
228 
229 #ifdef CONFIG_SMP
230 static bool cpuhp_is_ap_state(enum cpuhp_state state)
231 {
232 	/*
233 	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
234 	 * purposes as that state is handled explicitly in cpu_down.
235 	 */
236 	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
237 }
238 
239 static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
240 {
241 	struct completion *done = bringup ? &st->done_up : &st->done_down;
242 	wait_for_completion(done);
243 }
244 
245 static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
246 {
247 	struct completion *done = bringup ? &st->done_up : &st->done_down;
248 	complete(done);
249 }
250 
251 /*
252  * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
253  */
254 static bool cpuhp_is_atomic_state(enum cpuhp_state state)
255 {
256 	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
257 }
258 
259 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
260 static DEFINE_MUTEX(cpu_add_remove_lock);
261 bool cpuhp_tasks_frozen;
262 EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
263 
264 /*
265  * The following two APIs (cpu_maps_update_begin/done) must be used when
266  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
267  */
268 void cpu_maps_update_begin(void)
269 {
270 	mutex_lock(&cpu_add_remove_lock);
271 }
272 
273 void cpu_maps_update_done(void)
274 {
275 	mutex_unlock(&cpu_add_remove_lock);
276 }
277 
278 /*
279  * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
280  * Should always be manipulated under cpu_add_remove_lock
281  */
282 static int cpu_hotplug_disabled;
283 
284 #ifdef CONFIG_HOTPLUG_CPU
285 
286 DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
287 
288 void cpus_read_lock(void)
289 {
290 	percpu_down_read(&cpu_hotplug_lock);
291 }
292 EXPORT_SYMBOL_GPL(cpus_read_lock);
293 
294 int cpus_read_trylock(void)
295 {
296 	return percpu_down_read_trylock(&cpu_hotplug_lock);
297 }
298 EXPORT_SYMBOL_GPL(cpus_read_trylock);
299 
300 void cpus_read_unlock(void)
301 {
302 	percpu_up_read(&cpu_hotplug_lock);
303 }
304 EXPORT_SYMBOL_GPL(cpus_read_unlock);
305 
306 void cpus_write_lock(void)
307 {
308 	percpu_down_write(&cpu_hotplug_lock);
309 }
310 
311 void cpus_write_unlock(void)
312 {
313 	percpu_up_write(&cpu_hotplug_lock);
314 }
315 
316 void lockdep_assert_cpus_held(void)
317 {
318 	percpu_rwsem_assert_held(&cpu_hotplug_lock);
319 }
320 
321 /*
322  * Wait for currently running CPU hotplug operations to complete (if any) and
323  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
324  * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
325  * hotplug path before performing hotplug operations. So acquiring that lock
326  * guarantees mutual exclusion from any currently running hotplug operations.
327  */
328 void cpu_hotplug_disable(void)
329 {
330 	cpu_maps_update_begin();
331 	cpu_hotplug_disabled++;
332 	cpu_maps_update_done();
333 }
334 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
335 
336 static void __cpu_hotplug_enable(void)
337 {
338 	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
339 		return;
340 	cpu_hotplug_disabled--;
341 }
342 
343 void cpu_hotplug_enable(void)
344 {
345 	cpu_maps_update_begin();
346 	__cpu_hotplug_enable();
347 	cpu_maps_update_done();
348 }
349 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
350 #endif	/* CONFIG_HOTPLUG_CPU */
351 
352 #ifdef CONFIG_HOTPLUG_SMT
353 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
354 EXPORT_SYMBOL_GPL(cpu_smt_control);
355 
356 static bool cpu_smt_available __read_mostly;
357 
358 void __init cpu_smt_disable(bool force)
359 {
360 	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
361 		cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
362 		return;
363 
364 	if (force) {
365 		pr_info("SMT: Force disabled\n");
366 		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
367 	} else {
368 		cpu_smt_control = CPU_SMT_DISABLED;
369 	}
370 }
371 
372 /*
373  * The decision whether SMT is supported can only be done after the full
374  * CPU identification. Called from architecture code before non boot CPUs
375  * are brought up.
376  */
377 void __init cpu_smt_check_topology_early(void)
378 {
379 	if (!topology_smt_supported())
380 		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
381 }
382 
383 /*
384  * If SMT was disabled by BIOS, detect it here, after the CPUs have been
385  * brought online. This ensures the smt/l1tf sysfs entries are consistent
386  * with reality. cpu_smt_available is set to true during the bringup of non
387  * boot CPUs when a SMT sibling is detected. Note, this may overwrite
388  * cpu_smt_control's previous setting.
389  */
390 void __init cpu_smt_check_topology(void)
391 {
392 	if (!cpu_smt_available)
393 		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
394 }
395 
396 static int __init smt_cmdline_disable(char *str)
397 {
398 	cpu_smt_disable(str && !strcmp(str, "force"));
399 	return 0;
400 }
401 early_param("nosmt", smt_cmdline_disable);
402 
403 static inline bool cpu_smt_allowed(unsigned int cpu)
404 {
405 	if (topology_is_primary_thread(cpu))
406 		return true;
407 
408 	/*
409 	 * If the CPU is not a 'primary' thread and the booted_once bit is
410 	 * set then the processor has SMT support. Store this information
411 	 * for the late check of SMT support in cpu_smt_check_topology().
412 	 */
413 	if (per_cpu(cpuhp_state, cpu).booted_once)
414 		cpu_smt_available = true;
415 
416 	if (cpu_smt_control == CPU_SMT_ENABLED)
417 		return true;
418 
419 	/*
420 	 * On x86 it's required to boot all logical CPUs at least once so
421 	 * that the init code can get a chance to set CR4.MCE on each
422 	 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
423 	 * core will shutdown the machine.
424 	 */
425 	return !per_cpu(cpuhp_state, cpu).booted_once;
426 }
427 #else
428 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
429 #endif
430 
431 static inline enum cpuhp_state
432 cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
433 {
434 	enum cpuhp_state prev_state = st->state;
435 
436 	st->rollback = false;
437 	st->last = NULL;
438 
439 	st->target = target;
440 	st->single = false;
441 	st->bringup = st->state < target;
442 
443 	return prev_state;
444 }
445 
446 static inline void
447 cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
448 {
449 	st->rollback = true;
450 
451 	/*
452 	 * If we have st->last we need to undo partial multi_instance of this
453 	 * state first. Otherwise start undo at the previous state.
454 	 */
455 	if (!st->last) {
456 		if (st->bringup)
457 			st->state--;
458 		else
459 			st->state++;
460 	}
461 
462 	st->target = prev_state;
463 	st->bringup = !st->bringup;
464 }
465 
466 /* Regular hotplug invocation of the AP hotplug thread */
467 static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
468 {
469 	if (!st->single && st->state == st->target)
470 		return;
471 
472 	st->result = 0;
473 	/*
474 	 * Make sure the above stores are visible before should_run becomes
475 	 * true. Paired with the mb() above in cpuhp_thread_fun()
476 	 */
477 	smp_mb();
478 	st->should_run = true;
479 	wake_up_process(st->thread);
480 	wait_for_ap_thread(st, st->bringup);
481 }
482 
483 static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
484 {
485 	enum cpuhp_state prev_state;
486 	int ret;
487 
488 	prev_state = cpuhp_set_state(st, target);
489 	__cpuhp_kick_ap(st);
490 	if ((ret = st->result)) {
491 		cpuhp_reset_state(st, prev_state);
492 		__cpuhp_kick_ap(st);
493 	}
494 
495 	return ret;
496 }
497 
498 static int bringup_wait_for_ap(unsigned int cpu)
499 {
500 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
501 
502 	/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
503 	wait_for_ap_thread(st, true);
504 	if (WARN_ON_ONCE((!cpu_online(cpu))))
505 		return -ECANCELED;
506 
507 	/* Unpark the stopper thread and the hotplug thread of the target cpu */
508 	stop_machine_unpark(cpu);
509 	kthread_unpark(st->thread);
510 
511 	/*
512 	 * SMT soft disabling on X86 requires to bring the CPU out of the
513 	 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
514 	 * CPU marked itself as booted_once in cpu_notify_starting() so the
515 	 * cpu_smt_allowed() check will now return false if this is not the
516 	 * primary sibling.
517 	 */
518 	if (!cpu_smt_allowed(cpu))
519 		return -ECANCELED;
520 
521 	if (st->target <= CPUHP_AP_ONLINE_IDLE)
522 		return 0;
523 
524 	return cpuhp_kick_ap(st, st->target);
525 }
526 
527 static int bringup_cpu(unsigned int cpu)
528 {
529 	struct task_struct *idle = idle_thread_get(cpu);
530 	int ret;
531 
532 	/*
533 	 * Some architectures have to walk the irq descriptors to
534 	 * setup the vector space for the cpu which comes online.
535 	 * Prevent irq alloc/free across the bringup.
536 	 */
537 	irq_lock_sparse();
538 
539 	/* Arch-specific enabling code. */
540 	ret = __cpu_up(cpu, idle);
541 	irq_unlock_sparse();
542 	if (ret)
543 		return ret;
544 	return bringup_wait_for_ap(cpu);
545 }
546 
547 /*
548  * Hotplug state machine related functions
549  */
550 
551 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
552 {
553 	for (st->state--; st->state > st->target; st->state--) {
554 		struct cpuhp_step *step = cpuhp_get_step(st->state);
555 
556 		if (!step->skip_onerr)
557 			cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
558 	}
559 }
560 
561 static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
562 			      enum cpuhp_state target)
563 {
564 	enum cpuhp_state prev_state = st->state;
565 	int ret = 0;
566 
567 	while (st->state < target) {
568 		st->state++;
569 		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
570 		if (ret) {
571 			st->target = prev_state;
572 			undo_cpu_up(cpu, st);
573 			break;
574 		}
575 	}
576 	return ret;
577 }
578 
579 /*
580  * The cpu hotplug threads manage the bringup and teardown of the cpus
581  */
582 static void cpuhp_create(unsigned int cpu)
583 {
584 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
585 
586 	init_completion(&st->done_up);
587 	init_completion(&st->done_down);
588 }
589 
590 static int cpuhp_should_run(unsigned int cpu)
591 {
592 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
593 
594 	return st->should_run;
595 }
596 
597 /*
598  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
599  * callbacks when a state gets [un]installed at runtime.
600  *
601  * Each invocation of this function by the smpboot thread does a single AP
602  * state callback.
603  *
604  * It has 3 modes of operation:
605  *  - single: runs st->cb_state
606  *  - up:     runs ++st->state, while st->state < st->target
607  *  - down:   runs st->state--, while st->state > st->target
608  *
609  * When complete or on error, should_run is cleared and the completion is fired.
610  */
611 static void cpuhp_thread_fun(unsigned int cpu)
612 {
613 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
614 	bool bringup = st->bringup;
615 	enum cpuhp_state state;
616 
617 	/*
618 	 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
619 	 * that if we see ->should_run we also see the rest of the state.
620 	 */
621 	smp_mb();
622 
623 	if (WARN_ON_ONCE(!st->should_run))
624 		return;
625 
626 	cpuhp_lock_acquire(bringup);
627 
628 	if (st->single) {
629 		state = st->cb_state;
630 		st->should_run = false;
631 	} else {
632 		if (bringup) {
633 			st->state++;
634 			state = st->state;
635 			st->should_run = (st->state < st->target);
636 			WARN_ON_ONCE(st->state > st->target);
637 		} else {
638 			state = st->state;
639 			st->state--;
640 			st->should_run = (st->state > st->target);
641 			WARN_ON_ONCE(st->state < st->target);
642 		}
643 	}
644 
645 	WARN_ON_ONCE(!cpuhp_is_ap_state(state));
646 
647 	if (st->rollback) {
648 		struct cpuhp_step *step = cpuhp_get_step(state);
649 		if (step->skip_onerr)
650 			goto next;
651 	}
652 
653 	if (cpuhp_is_atomic_state(state)) {
654 		local_irq_disable();
655 		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
656 		local_irq_enable();
657 
658 		/*
659 		 * STARTING/DYING must not fail!
660 		 */
661 		WARN_ON_ONCE(st->result);
662 	} else {
663 		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
664 	}
665 
666 	if (st->result) {
667 		/*
668 		 * If we fail on a rollback, we're up a creek without no
669 		 * paddle, no way forward, no way back. We loose, thanks for
670 		 * playing.
671 		 */
672 		WARN_ON_ONCE(st->rollback);
673 		st->should_run = false;
674 	}
675 
676 next:
677 	cpuhp_lock_release(bringup);
678 
679 	if (!st->should_run)
680 		complete_ap_thread(st, bringup);
681 }
682 
683 /* Invoke a single callback on a remote cpu */
684 static int
685 cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
686 			 struct hlist_node *node)
687 {
688 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
689 	int ret;
690 
691 	if (!cpu_online(cpu))
692 		return 0;
693 
694 	cpuhp_lock_acquire(false);
695 	cpuhp_lock_release(false);
696 
697 	cpuhp_lock_acquire(true);
698 	cpuhp_lock_release(true);
699 
700 	/*
701 	 * If we are up and running, use the hotplug thread. For early calls
702 	 * we invoke the thread function directly.
703 	 */
704 	if (!st->thread)
705 		return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
706 
707 	st->rollback = false;
708 	st->last = NULL;
709 
710 	st->node = node;
711 	st->bringup = bringup;
712 	st->cb_state = state;
713 	st->single = true;
714 
715 	__cpuhp_kick_ap(st);
716 
717 	/*
718 	 * If we failed and did a partial, do a rollback.
719 	 */
720 	if ((ret = st->result) && st->last) {
721 		st->rollback = true;
722 		st->bringup = !bringup;
723 
724 		__cpuhp_kick_ap(st);
725 	}
726 
727 	/*
728 	 * Clean up the leftovers so the next hotplug operation wont use stale
729 	 * data.
730 	 */
731 	st->node = st->last = NULL;
732 	return ret;
733 }
734 
735 static int cpuhp_kick_ap_work(unsigned int cpu)
736 {
737 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
738 	enum cpuhp_state prev_state = st->state;
739 	int ret;
740 
741 	cpuhp_lock_acquire(false);
742 	cpuhp_lock_release(false);
743 
744 	cpuhp_lock_acquire(true);
745 	cpuhp_lock_release(true);
746 
747 	trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
748 	ret = cpuhp_kick_ap(st, st->target);
749 	trace_cpuhp_exit(cpu, st->state, prev_state, ret);
750 
751 	return ret;
752 }
753 
754 static struct smp_hotplug_thread cpuhp_threads = {
755 	.store			= &cpuhp_state.thread,
756 	.create			= &cpuhp_create,
757 	.thread_should_run	= cpuhp_should_run,
758 	.thread_fn		= cpuhp_thread_fun,
759 	.thread_comm		= "cpuhp/%u",
760 	.selfparking		= true,
761 };
762 
763 void __init cpuhp_threads_init(void)
764 {
765 	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
766 	kthread_unpark(this_cpu_read(cpuhp_state.thread));
767 }
768 
769 #ifdef CONFIG_HOTPLUG_CPU
770 /**
771  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
772  * @cpu: a CPU id
773  *
774  * This function walks all processes, finds a valid mm struct for each one and
775  * then clears a corresponding bit in mm's cpumask.  While this all sounds
776  * trivial, there are various non-obvious corner cases, which this function
777  * tries to solve in a safe manner.
778  *
779  * Also note that the function uses a somewhat relaxed locking scheme, so it may
780  * be called only for an already offlined CPU.
781  */
782 void clear_tasks_mm_cpumask(int cpu)
783 {
784 	struct task_struct *p;
785 
786 	/*
787 	 * This function is called after the cpu is taken down and marked
788 	 * offline, so its not like new tasks will ever get this cpu set in
789 	 * their mm mask. -- Peter Zijlstra
790 	 * Thus, we may use rcu_read_lock() here, instead of grabbing
791 	 * full-fledged tasklist_lock.
792 	 */
793 	WARN_ON(cpu_online(cpu));
794 	rcu_read_lock();
795 	for_each_process(p) {
796 		struct task_struct *t;
797 
798 		/*
799 		 * Main thread might exit, but other threads may still have
800 		 * a valid mm. Find one.
801 		 */
802 		t = find_lock_task_mm(p);
803 		if (!t)
804 			continue;
805 		cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
806 		task_unlock(t);
807 	}
808 	rcu_read_unlock();
809 }
810 
811 /* Take this CPU down. */
812 static int take_cpu_down(void *_param)
813 {
814 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
815 	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
816 	int err, cpu = smp_processor_id();
817 	int ret;
818 
819 	/* Ensure this CPU doesn't handle any more interrupts. */
820 	err = __cpu_disable();
821 	if (err < 0)
822 		return err;
823 
824 	/*
825 	 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
826 	 * do this step again.
827 	 */
828 	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
829 	st->state--;
830 	/* Invoke the former CPU_DYING callbacks */
831 	for (; st->state > target; st->state--) {
832 		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
833 		/*
834 		 * DYING must not fail!
835 		 */
836 		WARN_ON_ONCE(ret);
837 	}
838 
839 	/* Give up timekeeping duties */
840 	tick_handover_do_timer();
841 	/* Park the stopper thread */
842 	stop_machine_park(cpu);
843 	return 0;
844 }
845 
846 static int takedown_cpu(unsigned int cpu)
847 {
848 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
849 	int err;
850 
851 	/* Park the smpboot threads */
852 	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
853 
854 	/*
855 	 * Prevent irq alloc/free while the dying cpu reorganizes the
856 	 * interrupt affinities.
857 	 */
858 	irq_lock_sparse();
859 
860 	/*
861 	 * So now all preempt/rcu users must observe !cpu_active().
862 	 */
863 	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
864 	if (err) {
865 		/* CPU refused to die */
866 		irq_unlock_sparse();
867 		/* Unpark the hotplug thread so we can rollback there */
868 		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
869 		return err;
870 	}
871 	BUG_ON(cpu_online(cpu));
872 
873 	/*
874 	 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
875 	 * all runnable tasks from the CPU, there's only the idle task left now
876 	 * that the migration thread is done doing the stop_machine thing.
877 	 *
878 	 * Wait for the stop thread to go away.
879 	 */
880 	wait_for_ap_thread(st, false);
881 	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
882 
883 	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
884 	irq_unlock_sparse();
885 
886 	hotplug_cpu__broadcast_tick_pull(cpu);
887 	/* This actually kills the CPU. */
888 	__cpu_die(cpu);
889 
890 	tick_cleanup_dead_cpu(cpu);
891 	rcutree_migrate_callbacks(cpu);
892 	return 0;
893 }
894 
895 static void cpuhp_complete_idle_dead(void *arg)
896 {
897 	struct cpuhp_cpu_state *st = arg;
898 
899 	complete_ap_thread(st, false);
900 }
901 
902 void cpuhp_report_idle_dead(void)
903 {
904 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
905 
906 	BUG_ON(st->state != CPUHP_AP_OFFLINE);
907 	rcu_report_dead(smp_processor_id());
908 	st->state = CPUHP_AP_IDLE_DEAD;
909 	/*
910 	 * We cannot call complete after rcu_report_dead() so we delegate it
911 	 * to an online cpu.
912 	 */
913 	smp_call_function_single(cpumask_first(cpu_online_mask),
914 				 cpuhp_complete_idle_dead, st, 0);
915 }
916 
917 static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
918 {
919 	for (st->state++; st->state < st->target; st->state++) {
920 		struct cpuhp_step *step = cpuhp_get_step(st->state);
921 
922 		if (!step->skip_onerr)
923 			cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
924 	}
925 }
926 
927 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
928 				enum cpuhp_state target)
929 {
930 	enum cpuhp_state prev_state = st->state;
931 	int ret = 0;
932 
933 	for (; st->state > target; st->state--) {
934 		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
935 		if (ret) {
936 			st->target = prev_state;
937 			undo_cpu_down(cpu, st);
938 			break;
939 		}
940 	}
941 	return ret;
942 }
943 
944 /* Requires cpu_add_remove_lock to be held */
945 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
946 			   enum cpuhp_state target)
947 {
948 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
949 	int prev_state, ret = 0;
950 
951 	if (num_online_cpus() == 1)
952 		return -EBUSY;
953 
954 	if (!cpu_present(cpu))
955 		return -EINVAL;
956 
957 	cpus_write_lock();
958 
959 	cpuhp_tasks_frozen = tasks_frozen;
960 
961 	prev_state = cpuhp_set_state(st, target);
962 	/*
963 	 * If the current CPU state is in the range of the AP hotplug thread,
964 	 * then we need to kick the thread.
965 	 */
966 	if (st->state > CPUHP_TEARDOWN_CPU) {
967 		st->target = max((int)target, CPUHP_TEARDOWN_CPU);
968 		ret = cpuhp_kick_ap_work(cpu);
969 		/*
970 		 * The AP side has done the error rollback already. Just
971 		 * return the error code..
972 		 */
973 		if (ret)
974 			goto out;
975 
976 		/*
977 		 * We might have stopped still in the range of the AP hotplug
978 		 * thread. Nothing to do anymore.
979 		 */
980 		if (st->state > CPUHP_TEARDOWN_CPU)
981 			goto out;
982 
983 		st->target = target;
984 	}
985 	/*
986 	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
987 	 * to do the further cleanups.
988 	 */
989 	ret = cpuhp_down_callbacks(cpu, st, target);
990 	if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
991 		cpuhp_reset_state(st, prev_state);
992 		__cpuhp_kick_ap(st);
993 	}
994 
995 out:
996 	cpus_write_unlock();
997 	/*
998 	 * Do post unplug cleanup. This is still protected against
999 	 * concurrent CPU hotplug via cpu_add_remove_lock.
1000 	 */
1001 	lockup_detector_cleanup();
1002 	return ret;
1003 }
1004 
1005 static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1006 {
1007 	if (cpu_hotplug_disabled)
1008 		return -EBUSY;
1009 	return _cpu_down(cpu, 0, target);
1010 }
1011 
1012 static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
1013 {
1014 	int err;
1015 
1016 	cpu_maps_update_begin();
1017 	err = cpu_down_maps_locked(cpu, target);
1018 	cpu_maps_update_done();
1019 	return err;
1020 }
1021 
1022 int cpu_down(unsigned int cpu)
1023 {
1024 	return do_cpu_down(cpu, CPUHP_OFFLINE);
1025 }
1026 EXPORT_SYMBOL(cpu_down);
1027 
1028 #else
1029 #define takedown_cpu		NULL
1030 #endif /*CONFIG_HOTPLUG_CPU*/
1031 
1032 /**
1033  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1034  * @cpu: cpu that just started
1035  *
1036  * It must be called by the arch code on the new cpu, before the new cpu
1037  * enables interrupts and before the "boot" cpu returns from __cpu_up().
1038  */
1039 void notify_cpu_starting(unsigned int cpu)
1040 {
1041 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1042 	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1043 	int ret;
1044 
1045 	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
1046 	st->booted_once = true;
1047 	while (st->state < target) {
1048 		st->state++;
1049 		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1050 		/*
1051 		 * STARTING must not fail!
1052 		 */
1053 		WARN_ON_ONCE(ret);
1054 	}
1055 }
1056 
1057 /*
1058  * Called from the idle task. Wake up the controlling task which brings the
1059  * stopper and the hotplug thread of the upcoming CPU up and then delegates
1060  * the rest of the online bringup to the hotplug thread.
1061  */
1062 void cpuhp_online_idle(enum cpuhp_state state)
1063 {
1064 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1065 
1066 	/* Happens for the boot cpu */
1067 	if (state != CPUHP_AP_ONLINE_IDLE)
1068 		return;
1069 
1070 	st->state = CPUHP_AP_ONLINE_IDLE;
1071 	complete_ap_thread(st, true);
1072 }
1073 
1074 /* Requires cpu_add_remove_lock to be held */
1075 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1076 {
1077 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1078 	struct task_struct *idle;
1079 	int ret = 0;
1080 
1081 	cpus_write_lock();
1082 
1083 	if (!cpu_present(cpu)) {
1084 		ret = -EINVAL;
1085 		goto out;
1086 	}
1087 
1088 	/*
1089 	 * The caller of do_cpu_up might have raced with another
1090 	 * caller. Ignore it for now.
1091 	 */
1092 	if (st->state >= target)
1093 		goto out;
1094 
1095 	if (st->state == CPUHP_OFFLINE) {
1096 		/* Let it fail before we try to bring the cpu up */
1097 		idle = idle_thread_get(cpu);
1098 		if (IS_ERR(idle)) {
1099 			ret = PTR_ERR(idle);
1100 			goto out;
1101 		}
1102 	}
1103 
1104 	cpuhp_tasks_frozen = tasks_frozen;
1105 
1106 	cpuhp_set_state(st, target);
1107 	/*
1108 	 * If the current CPU state is in the range of the AP hotplug thread,
1109 	 * then we need to kick the thread once more.
1110 	 */
1111 	if (st->state > CPUHP_BRINGUP_CPU) {
1112 		ret = cpuhp_kick_ap_work(cpu);
1113 		/*
1114 		 * The AP side has done the error rollback already. Just
1115 		 * return the error code..
1116 		 */
1117 		if (ret)
1118 			goto out;
1119 	}
1120 
1121 	/*
1122 	 * Try to reach the target state. We max out on the BP at
1123 	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1124 	 * responsible for bringing it up to the target state.
1125 	 */
1126 	target = min((int)target, CPUHP_BRINGUP_CPU);
1127 	ret = cpuhp_up_callbacks(cpu, st, target);
1128 out:
1129 	cpus_write_unlock();
1130 	return ret;
1131 }
1132 
1133 static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
1134 {
1135 	int err = 0;
1136 
1137 	if (!cpu_possible(cpu)) {
1138 		pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1139 		       cpu);
1140 #if defined(CONFIG_IA64)
1141 		pr_err("please check additional_cpus= boot parameter\n");
1142 #endif
1143 		return -EINVAL;
1144 	}
1145 
1146 	err = try_online_node(cpu_to_node(cpu));
1147 	if (err)
1148 		return err;
1149 
1150 	cpu_maps_update_begin();
1151 
1152 	if (cpu_hotplug_disabled) {
1153 		err = -EBUSY;
1154 		goto out;
1155 	}
1156 	if (!cpu_smt_allowed(cpu)) {
1157 		err = -EPERM;
1158 		goto out;
1159 	}
1160 
1161 	err = _cpu_up(cpu, 0, target);
1162 out:
1163 	cpu_maps_update_done();
1164 	return err;
1165 }
1166 
1167 int cpu_up(unsigned int cpu)
1168 {
1169 	return do_cpu_up(cpu, CPUHP_ONLINE);
1170 }
1171 EXPORT_SYMBOL_GPL(cpu_up);
1172 
1173 #ifdef CONFIG_PM_SLEEP_SMP
1174 static cpumask_var_t frozen_cpus;
1175 
1176 int freeze_secondary_cpus(int primary)
1177 {
1178 	int cpu, error = 0;
1179 
1180 	cpu_maps_update_begin();
1181 	if (!cpu_online(primary))
1182 		primary = cpumask_first(cpu_online_mask);
1183 	/*
1184 	 * We take down all of the non-boot CPUs in one shot to avoid races
1185 	 * with the userspace trying to use the CPU hotplug at the same time
1186 	 */
1187 	cpumask_clear(frozen_cpus);
1188 
1189 	pr_info("Disabling non-boot CPUs ...\n");
1190 	for_each_online_cpu(cpu) {
1191 		if (cpu == primary)
1192 			continue;
1193 		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1194 		error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1195 		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1196 		if (!error)
1197 			cpumask_set_cpu(cpu, frozen_cpus);
1198 		else {
1199 			pr_err("Error taking CPU%d down: %d\n", cpu, error);
1200 			break;
1201 		}
1202 	}
1203 
1204 	if (!error)
1205 		BUG_ON(num_online_cpus() > 1);
1206 	else
1207 		pr_err("Non-boot CPUs are not disabled\n");
1208 
1209 	/*
1210 	 * Make sure the CPUs won't be enabled by someone else. We need to do
1211 	 * this even in case of failure as all disable_nonboot_cpus() users are
1212 	 * supposed to do enable_nonboot_cpus() on the failure path.
1213 	 */
1214 	cpu_hotplug_disabled++;
1215 
1216 	cpu_maps_update_done();
1217 	return error;
1218 }
1219 
1220 void __weak arch_enable_nonboot_cpus_begin(void)
1221 {
1222 }
1223 
1224 void __weak arch_enable_nonboot_cpus_end(void)
1225 {
1226 }
1227 
1228 void enable_nonboot_cpus(void)
1229 {
1230 	int cpu, error;
1231 
1232 	/* Allow everyone to use the CPU hotplug again */
1233 	cpu_maps_update_begin();
1234 	__cpu_hotplug_enable();
1235 	if (cpumask_empty(frozen_cpus))
1236 		goto out;
1237 
1238 	pr_info("Enabling non-boot CPUs ...\n");
1239 
1240 	arch_enable_nonboot_cpus_begin();
1241 
1242 	for_each_cpu(cpu, frozen_cpus) {
1243 		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1244 		error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1245 		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1246 		if (!error) {
1247 			pr_info("CPU%d is up\n", cpu);
1248 			continue;
1249 		}
1250 		pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1251 	}
1252 
1253 	arch_enable_nonboot_cpus_end();
1254 
1255 	cpumask_clear(frozen_cpus);
1256 out:
1257 	cpu_maps_update_done();
1258 }
1259 
1260 static int __init alloc_frozen_cpus(void)
1261 {
1262 	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1263 		return -ENOMEM;
1264 	return 0;
1265 }
1266 core_initcall(alloc_frozen_cpus);
1267 
1268 /*
1269  * When callbacks for CPU hotplug notifications are being executed, we must
1270  * ensure that the state of the system with respect to the tasks being frozen
1271  * or not, as reported by the notification, remains unchanged *throughout the
1272  * duration* of the execution of the callbacks.
1273  * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1274  *
1275  * This synchronization is implemented by mutually excluding regular CPU
1276  * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1277  * Hibernate notifications.
1278  */
1279 static int
1280 cpu_hotplug_pm_callback(struct notifier_block *nb,
1281 			unsigned long action, void *ptr)
1282 {
1283 	switch (action) {
1284 
1285 	case PM_SUSPEND_PREPARE:
1286 	case PM_HIBERNATION_PREPARE:
1287 		cpu_hotplug_disable();
1288 		break;
1289 
1290 	case PM_POST_SUSPEND:
1291 	case PM_POST_HIBERNATION:
1292 		cpu_hotplug_enable();
1293 		break;
1294 
1295 	default:
1296 		return NOTIFY_DONE;
1297 	}
1298 
1299 	return NOTIFY_OK;
1300 }
1301 
1302 
1303 static int __init cpu_hotplug_pm_sync_init(void)
1304 {
1305 	/*
1306 	 * cpu_hotplug_pm_callback has higher priority than x86
1307 	 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1308 	 * to disable cpu hotplug to avoid cpu hotplug race.
1309 	 */
1310 	pm_notifier(cpu_hotplug_pm_callback, 0);
1311 	return 0;
1312 }
1313 core_initcall(cpu_hotplug_pm_sync_init);
1314 
1315 #endif /* CONFIG_PM_SLEEP_SMP */
1316 
1317 int __boot_cpu_id;
1318 
1319 #endif /* CONFIG_SMP */
1320 
1321 /* Boot processor state steps */
1322 static struct cpuhp_step cpuhp_hp_states[] = {
1323 	[CPUHP_OFFLINE] = {
1324 		.name			= "offline",
1325 		.startup.single		= NULL,
1326 		.teardown.single	= NULL,
1327 	},
1328 #ifdef CONFIG_SMP
1329 	[CPUHP_CREATE_THREADS]= {
1330 		.name			= "threads:prepare",
1331 		.startup.single		= smpboot_create_threads,
1332 		.teardown.single	= NULL,
1333 		.cant_stop		= true,
1334 	},
1335 	[CPUHP_PERF_PREPARE] = {
1336 		.name			= "perf:prepare",
1337 		.startup.single		= perf_event_init_cpu,
1338 		.teardown.single	= perf_event_exit_cpu,
1339 	},
1340 	[CPUHP_WORKQUEUE_PREP] = {
1341 		.name			= "workqueue:prepare",
1342 		.startup.single		= workqueue_prepare_cpu,
1343 		.teardown.single	= NULL,
1344 	},
1345 	[CPUHP_HRTIMERS_PREPARE] = {
1346 		.name			= "hrtimers:prepare",
1347 		.startup.single		= hrtimers_prepare_cpu,
1348 		.teardown.single	= hrtimers_dead_cpu,
1349 	},
1350 	[CPUHP_SMPCFD_PREPARE] = {
1351 		.name			= "smpcfd:prepare",
1352 		.startup.single		= smpcfd_prepare_cpu,
1353 		.teardown.single	= smpcfd_dead_cpu,
1354 	},
1355 	[CPUHP_RELAY_PREPARE] = {
1356 		.name			= "relay:prepare",
1357 		.startup.single		= relay_prepare_cpu,
1358 		.teardown.single	= NULL,
1359 	},
1360 	[CPUHP_SLAB_PREPARE] = {
1361 		.name			= "slab:prepare",
1362 		.startup.single		= slab_prepare_cpu,
1363 		.teardown.single	= slab_dead_cpu,
1364 	},
1365 	[CPUHP_RCUTREE_PREP] = {
1366 		.name			= "RCU/tree:prepare",
1367 		.startup.single		= rcutree_prepare_cpu,
1368 		.teardown.single	= rcutree_dead_cpu,
1369 	},
1370 	/*
1371 	 * On the tear-down path, timers_dead_cpu() must be invoked
1372 	 * before blk_mq_queue_reinit_notify() from notify_dead(),
1373 	 * otherwise a RCU stall occurs.
1374 	 */
1375 	[CPUHP_TIMERS_PREPARE] = {
1376 		.name			= "timers:prepare",
1377 		.startup.single		= timers_prepare_cpu,
1378 		.teardown.single	= timers_dead_cpu,
1379 	},
1380 	/* Kicks the plugged cpu into life */
1381 	[CPUHP_BRINGUP_CPU] = {
1382 		.name			= "cpu:bringup",
1383 		.startup.single		= bringup_cpu,
1384 		.teardown.single	= NULL,
1385 		.cant_stop		= true,
1386 	},
1387 	/* Final state before CPU kills itself */
1388 	[CPUHP_AP_IDLE_DEAD] = {
1389 		.name			= "idle:dead",
1390 	},
1391 	/*
1392 	 * Last state before CPU enters the idle loop to die. Transient state
1393 	 * for synchronization.
1394 	 */
1395 	[CPUHP_AP_OFFLINE] = {
1396 		.name			= "ap:offline",
1397 		.cant_stop		= true,
1398 	},
1399 	/* First state is scheduler control. Interrupts are disabled */
1400 	[CPUHP_AP_SCHED_STARTING] = {
1401 		.name			= "sched:starting",
1402 		.startup.single		= sched_cpu_starting,
1403 		.teardown.single	= sched_cpu_dying,
1404 	},
1405 	[CPUHP_AP_RCUTREE_DYING] = {
1406 		.name			= "RCU/tree:dying",
1407 		.startup.single		= NULL,
1408 		.teardown.single	= rcutree_dying_cpu,
1409 	},
1410 	[CPUHP_AP_SMPCFD_DYING] = {
1411 		.name			= "smpcfd:dying",
1412 		.startup.single		= NULL,
1413 		.teardown.single	= smpcfd_dying_cpu,
1414 	},
1415 	/* Entry state on starting. Interrupts enabled from here on. Transient
1416 	 * state for synchronsization */
1417 	[CPUHP_AP_ONLINE] = {
1418 		.name			= "ap:online",
1419 	},
1420 	/*
1421 	 * Handled on controll processor until the plugged processor manages
1422 	 * this itself.
1423 	 */
1424 	[CPUHP_TEARDOWN_CPU] = {
1425 		.name			= "cpu:teardown",
1426 		.startup.single		= NULL,
1427 		.teardown.single	= takedown_cpu,
1428 		.cant_stop		= true,
1429 	},
1430 	/* Handle smpboot threads park/unpark */
1431 	[CPUHP_AP_SMPBOOT_THREADS] = {
1432 		.name			= "smpboot/threads:online",
1433 		.startup.single		= smpboot_unpark_threads,
1434 		.teardown.single	= smpboot_park_threads,
1435 	},
1436 	[CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
1437 		.name			= "irq/affinity:online",
1438 		.startup.single		= irq_affinity_online_cpu,
1439 		.teardown.single	= NULL,
1440 	},
1441 	[CPUHP_AP_PERF_ONLINE] = {
1442 		.name			= "perf:online",
1443 		.startup.single		= perf_event_init_cpu,
1444 		.teardown.single	= perf_event_exit_cpu,
1445 	},
1446 	[CPUHP_AP_WATCHDOG_ONLINE] = {
1447 		.name			= "lockup_detector:online",
1448 		.startup.single		= lockup_detector_online_cpu,
1449 		.teardown.single	= lockup_detector_offline_cpu,
1450 	},
1451 	[CPUHP_AP_WORKQUEUE_ONLINE] = {
1452 		.name			= "workqueue:online",
1453 		.startup.single		= workqueue_online_cpu,
1454 		.teardown.single	= workqueue_offline_cpu,
1455 	},
1456 	[CPUHP_AP_RCUTREE_ONLINE] = {
1457 		.name			= "RCU/tree:online",
1458 		.startup.single		= rcutree_online_cpu,
1459 		.teardown.single	= rcutree_offline_cpu,
1460 	},
1461 #endif
1462 	/*
1463 	 * The dynamically registered state space is here
1464 	 */
1465 
1466 #ifdef CONFIG_SMP
1467 	/* Last state is scheduler control setting the cpu active */
1468 	[CPUHP_AP_ACTIVE] = {
1469 		.name			= "sched:active",
1470 		.startup.single		= sched_cpu_activate,
1471 		.teardown.single	= sched_cpu_deactivate,
1472 	},
1473 #endif
1474 
1475 	/* CPU is fully up and running. */
1476 	[CPUHP_ONLINE] = {
1477 		.name			= "online",
1478 		.startup.single		= NULL,
1479 		.teardown.single	= NULL,
1480 	},
1481 };
1482 
1483 /* Sanity check for callbacks */
1484 static int cpuhp_cb_check(enum cpuhp_state state)
1485 {
1486 	if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1487 		return -EINVAL;
1488 	return 0;
1489 }
1490 
1491 /*
1492  * Returns a free for dynamic slot assignment of the Online state. The states
1493  * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1494  * by having no name assigned.
1495  */
1496 static int cpuhp_reserve_state(enum cpuhp_state state)
1497 {
1498 	enum cpuhp_state i, end;
1499 	struct cpuhp_step *step;
1500 
1501 	switch (state) {
1502 	case CPUHP_AP_ONLINE_DYN:
1503 		step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1504 		end = CPUHP_AP_ONLINE_DYN_END;
1505 		break;
1506 	case CPUHP_BP_PREPARE_DYN:
1507 		step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1508 		end = CPUHP_BP_PREPARE_DYN_END;
1509 		break;
1510 	default:
1511 		return -EINVAL;
1512 	}
1513 
1514 	for (i = state; i <= end; i++, step++) {
1515 		if (!step->name)
1516 			return i;
1517 	}
1518 	WARN(1, "No more dynamic states available for CPU hotplug\n");
1519 	return -ENOSPC;
1520 }
1521 
1522 static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1523 				 int (*startup)(unsigned int cpu),
1524 				 int (*teardown)(unsigned int cpu),
1525 				 bool multi_instance)
1526 {
1527 	/* (Un)Install the callbacks for further cpu hotplug operations */
1528 	struct cpuhp_step *sp;
1529 	int ret = 0;
1530 
1531 	/*
1532 	 * If name is NULL, then the state gets removed.
1533 	 *
1534 	 * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1535 	 * the first allocation from these dynamic ranges, so the removal
1536 	 * would trigger a new allocation and clear the wrong (already
1537 	 * empty) state, leaving the callbacks of the to be cleared state
1538 	 * dangling, which causes wreckage on the next hotplug operation.
1539 	 */
1540 	if (name && (state == CPUHP_AP_ONLINE_DYN ||
1541 		     state == CPUHP_BP_PREPARE_DYN)) {
1542 		ret = cpuhp_reserve_state(state);
1543 		if (ret < 0)
1544 			return ret;
1545 		state = ret;
1546 	}
1547 	sp = cpuhp_get_step(state);
1548 	if (name && sp->name)
1549 		return -EBUSY;
1550 
1551 	sp->startup.single = startup;
1552 	sp->teardown.single = teardown;
1553 	sp->name = name;
1554 	sp->multi_instance = multi_instance;
1555 	INIT_HLIST_HEAD(&sp->list);
1556 	return ret;
1557 }
1558 
1559 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1560 {
1561 	return cpuhp_get_step(state)->teardown.single;
1562 }
1563 
1564 /*
1565  * Call the startup/teardown function for a step either on the AP or
1566  * on the current CPU.
1567  */
1568 static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1569 			    struct hlist_node *node)
1570 {
1571 	struct cpuhp_step *sp = cpuhp_get_step(state);
1572 	int ret;
1573 
1574 	/*
1575 	 * If there's nothing to do, we done.
1576 	 * Relies on the union for multi_instance.
1577 	 */
1578 	if ((bringup && !sp->startup.single) ||
1579 	    (!bringup && !sp->teardown.single))
1580 		return 0;
1581 	/*
1582 	 * The non AP bound callbacks can fail on bringup. On teardown
1583 	 * e.g. module removal we crash for now.
1584 	 */
1585 #ifdef CONFIG_SMP
1586 	if (cpuhp_is_ap_state(state))
1587 		ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1588 	else
1589 		ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1590 #else
1591 	ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1592 #endif
1593 	BUG_ON(ret && !bringup);
1594 	return ret;
1595 }
1596 
1597 /*
1598  * Called from __cpuhp_setup_state on a recoverable failure.
1599  *
1600  * Note: The teardown callbacks for rollback are not allowed to fail!
1601  */
1602 static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1603 				   struct hlist_node *node)
1604 {
1605 	int cpu;
1606 
1607 	/* Roll back the already executed steps on the other cpus */
1608 	for_each_present_cpu(cpu) {
1609 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1610 		int cpustate = st->state;
1611 
1612 		if (cpu >= failedcpu)
1613 			break;
1614 
1615 		/* Did we invoke the startup call on that cpu ? */
1616 		if (cpustate >= state)
1617 			cpuhp_issue_call(cpu, state, false, node);
1618 	}
1619 }
1620 
1621 int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
1622 					  struct hlist_node *node,
1623 					  bool invoke)
1624 {
1625 	struct cpuhp_step *sp;
1626 	int cpu;
1627 	int ret;
1628 
1629 	lockdep_assert_cpus_held();
1630 
1631 	sp = cpuhp_get_step(state);
1632 	if (sp->multi_instance == false)
1633 		return -EINVAL;
1634 
1635 	mutex_lock(&cpuhp_state_mutex);
1636 
1637 	if (!invoke || !sp->startup.multi)
1638 		goto add_node;
1639 
1640 	/*
1641 	 * Try to call the startup callback for each present cpu
1642 	 * depending on the hotplug state of the cpu.
1643 	 */
1644 	for_each_present_cpu(cpu) {
1645 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1646 		int cpustate = st->state;
1647 
1648 		if (cpustate < state)
1649 			continue;
1650 
1651 		ret = cpuhp_issue_call(cpu, state, true, node);
1652 		if (ret) {
1653 			if (sp->teardown.multi)
1654 				cpuhp_rollback_install(cpu, state, node);
1655 			goto unlock;
1656 		}
1657 	}
1658 add_node:
1659 	ret = 0;
1660 	hlist_add_head(node, &sp->list);
1661 unlock:
1662 	mutex_unlock(&cpuhp_state_mutex);
1663 	return ret;
1664 }
1665 
1666 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
1667 			       bool invoke)
1668 {
1669 	int ret;
1670 
1671 	cpus_read_lock();
1672 	ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
1673 	cpus_read_unlock();
1674 	return ret;
1675 }
1676 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
1677 
1678 /**
1679  * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
1680  * @state:		The state to setup
1681  * @invoke:		If true, the startup function is invoked for cpus where
1682  *			cpu state >= @state
1683  * @startup:		startup callback function
1684  * @teardown:		teardown callback function
1685  * @multi_instance:	State is set up for multiple instances which get
1686  *			added afterwards.
1687  *
1688  * The caller needs to hold cpus read locked while calling this function.
1689  * Returns:
1690  *   On success:
1691  *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
1692  *      0 for all other states
1693  *   On failure: proper (negative) error code
1694  */
1695 int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
1696 				   const char *name, bool invoke,
1697 				   int (*startup)(unsigned int cpu),
1698 				   int (*teardown)(unsigned int cpu),
1699 				   bool multi_instance)
1700 {
1701 	int cpu, ret = 0;
1702 	bool dynstate;
1703 
1704 	lockdep_assert_cpus_held();
1705 
1706 	if (cpuhp_cb_check(state) || !name)
1707 		return -EINVAL;
1708 
1709 	mutex_lock(&cpuhp_state_mutex);
1710 
1711 	ret = cpuhp_store_callbacks(state, name, startup, teardown,
1712 				    multi_instance);
1713 
1714 	dynstate = state == CPUHP_AP_ONLINE_DYN;
1715 	if (ret > 0 && dynstate) {
1716 		state = ret;
1717 		ret = 0;
1718 	}
1719 
1720 	if (ret || !invoke || !startup)
1721 		goto out;
1722 
1723 	/*
1724 	 * Try to call the startup callback for each present cpu
1725 	 * depending on the hotplug state of the cpu.
1726 	 */
1727 	for_each_present_cpu(cpu) {
1728 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1729 		int cpustate = st->state;
1730 
1731 		if (cpustate < state)
1732 			continue;
1733 
1734 		ret = cpuhp_issue_call(cpu, state, true, NULL);
1735 		if (ret) {
1736 			if (teardown)
1737 				cpuhp_rollback_install(cpu, state, NULL);
1738 			cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1739 			goto out;
1740 		}
1741 	}
1742 out:
1743 	mutex_unlock(&cpuhp_state_mutex);
1744 	/*
1745 	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
1746 	 * dynamically allocated state in case of success.
1747 	 */
1748 	if (!ret && dynstate)
1749 		return state;
1750 	return ret;
1751 }
1752 EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
1753 
1754 int __cpuhp_setup_state(enum cpuhp_state state,
1755 			const char *name, bool invoke,
1756 			int (*startup)(unsigned int cpu),
1757 			int (*teardown)(unsigned int cpu),
1758 			bool multi_instance)
1759 {
1760 	int ret;
1761 
1762 	cpus_read_lock();
1763 	ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
1764 					     teardown, multi_instance);
1765 	cpus_read_unlock();
1766 	return ret;
1767 }
1768 EXPORT_SYMBOL(__cpuhp_setup_state);
1769 
1770 int __cpuhp_state_remove_instance(enum cpuhp_state state,
1771 				  struct hlist_node *node, bool invoke)
1772 {
1773 	struct cpuhp_step *sp = cpuhp_get_step(state);
1774 	int cpu;
1775 
1776 	BUG_ON(cpuhp_cb_check(state));
1777 
1778 	if (!sp->multi_instance)
1779 		return -EINVAL;
1780 
1781 	cpus_read_lock();
1782 	mutex_lock(&cpuhp_state_mutex);
1783 
1784 	if (!invoke || !cpuhp_get_teardown_cb(state))
1785 		goto remove;
1786 	/*
1787 	 * Call the teardown callback for each present cpu depending
1788 	 * on the hotplug state of the cpu. This function is not
1789 	 * allowed to fail currently!
1790 	 */
1791 	for_each_present_cpu(cpu) {
1792 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1793 		int cpustate = st->state;
1794 
1795 		if (cpustate >= state)
1796 			cpuhp_issue_call(cpu, state, false, node);
1797 	}
1798 
1799 remove:
1800 	hlist_del(node);
1801 	mutex_unlock(&cpuhp_state_mutex);
1802 	cpus_read_unlock();
1803 
1804 	return 0;
1805 }
1806 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1807 
1808 /**
1809  * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
1810  * @state:	The state to remove
1811  * @invoke:	If true, the teardown function is invoked for cpus where
1812  *		cpu state >= @state
1813  *
1814  * The caller needs to hold cpus read locked while calling this function.
1815  * The teardown callback is currently not allowed to fail. Think
1816  * about module removal!
1817  */
1818 void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
1819 {
1820 	struct cpuhp_step *sp = cpuhp_get_step(state);
1821 	int cpu;
1822 
1823 	BUG_ON(cpuhp_cb_check(state));
1824 
1825 	lockdep_assert_cpus_held();
1826 
1827 	mutex_lock(&cpuhp_state_mutex);
1828 	if (sp->multi_instance) {
1829 		WARN(!hlist_empty(&sp->list),
1830 		     "Error: Removing state %d which has instances left.\n",
1831 		     state);
1832 		goto remove;
1833 	}
1834 
1835 	if (!invoke || !cpuhp_get_teardown_cb(state))
1836 		goto remove;
1837 
1838 	/*
1839 	 * Call the teardown callback for each present cpu depending
1840 	 * on the hotplug state of the cpu. This function is not
1841 	 * allowed to fail currently!
1842 	 */
1843 	for_each_present_cpu(cpu) {
1844 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1845 		int cpustate = st->state;
1846 
1847 		if (cpustate >= state)
1848 			cpuhp_issue_call(cpu, state, false, NULL);
1849 	}
1850 remove:
1851 	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1852 	mutex_unlock(&cpuhp_state_mutex);
1853 }
1854 EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
1855 
1856 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
1857 {
1858 	cpus_read_lock();
1859 	__cpuhp_remove_state_cpuslocked(state, invoke);
1860 	cpus_read_unlock();
1861 }
1862 EXPORT_SYMBOL(__cpuhp_remove_state);
1863 
1864 #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1865 static ssize_t show_cpuhp_state(struct device *dev,
1866 				struct device_attribute *attr, char *buf)
1867 {
1868 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1869 
1870 	return sprintf(buf, "%d\n", st->state);
1871 }
1872 static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
1873 
1874 static ssize_t write_cpuhp_target(struct device *dev,
1875 				  struct device_attribute *attr,
1876 				  const char *buf, size_t count)
1877 {
1878 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1879 	struct cpuhp_step *sp;
1880 	int target, ret;
1881 
1882 	ret = kstrtoint(buf, 10, &target);
1883 	if (ret)
1884 		return ret;
1885 
1886 #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
1887 	if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
1888 		return -EINVAL;
1889 #else
1890 	if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
1891 		return -EINVAL;
1892 #endif
1893 
1894 	ret = lock_device_hotplug_sysfs();
1895 	if (ret)
1896 		return ret;
1897 
1898 	mutex_lock(&cpuhp_state_mutex);
1899 	sp = cpuhp_get_step(target);
1900 	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
1901 	mutex_unlock(&cpuhp_state_mutex);
1902 	if (ret)
1903 		goto out;
1904 
1905 	if (st->state < target)
1906 		ret = do_cpu_up(dev->id, target);
1907 	else
1908 		ret = do_cpu_down(dev->id, target);
1909 out:
1910 	unlock_device_hotplug();
1911 	return ret ? ret : count;
1912 }
1913 
1914 static ssize_t show_cpuhp_target(struct device *dev,
1915 				 struct device_attribute *attr, char *buf)
1916 {
1917 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1918 
1919 	return sprintf(buf, "%d\n", st->target);
1920 }
1921 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1922 
1923 
1924 static ssize_t write_cpuhp_fail(struct device *dev,
1925 				struct device_attribute *attr,
1926 				const char *buf, size_t count)
1927 {
1928 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1929 	struct cpuhp_step *sp;
1930 	int fail, ret;
1931 
1932 	ret = kstrtoint(buf, 10, &fail);
1933 	if (ret)
1934 		return ret;
1935 
1936 	/*
1937 	 * Cannot fail STARTING/DYING callbacks.
1938 	 */
1939 	if (cpuhp_is_atomic_state(fail))
1940 		return -EINVAL;
1941 
1942 	/*
1943 	 * Cannot fail anything that doesn't have callbacks.
1944 	 */
1945 	mutex_lock(&cpuhp_state_mutex);
1946 	sp = cpuhp_get_step(fail);
1947 	if (!sp->startup.single && !sp->teardown.single)
1948 		ret = -EINVAL;
1949 	mutex_unlock(&cpuhp_state_mutex);
1950 	if (ret)
1951 		return ret;
1952 
1953 	st->fail = fail;
1954 
1955 	return count;
1956 }
1957 
1958 static ssize_t show_cpuhp_fail(struct device *dev,
1959 			       struct device_attribute *attr, char *buf)
1960 {
1961 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1962 
1963 	return sprintf(buf, "%d\n", st->fail);
1964 }
1965 
1966 static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
1967 
1968 static struct attribute *cpuhp_cpu_attrs[] = {
1969 	&dev_attr_state.attr,
1970 	&dev_attr_target.attr,
1971 	&dev_attr_fail.attr,
1972 	NULL
1973 };
1974 
1975 static const struct attribute_group cpuhp_cpu_attr_group = {
1976 	.attrs = cpuhp_cpu_attrs,
1977 	.name = "hotplug",
1978 	NULL
1979 };
1980 
1981 static ssize_t show_cpuhp_states(struct device *dev,
1982 				 struct device_attribute *attr, char *buf)
1983 {
1984 	ssize_t cur, res = 0;
1985 	int i;
1986 
1987 	mutex_lock(&cpuhp_state_mutex);
1988 	for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
1989 		struct cpuhp_step *sp = cpuhp_get_step(i);
1990 
1991 		if (sp->name) {
1992 			cur = sprintf(buf, "%3d: %s\n", i, sp->name);
1993 			buf += cur;
1994 			res += cur;
1995 		}
1996 	}
1997 	mutex_unlock(&cpuhp_state_mutex);
1998 	return res;
1999 }
2000 static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
2001 
2002 static struct attribute *cpuhp_cpu_root_attrs[] = {
2003 	&dev_attr_states.attr,
2004 	NULL
2005 };
2006 
2007 static const struct attribute_group cpuhp_cpu_root_attr_group = {
2008 	.attrs = cpuhp_cpu_root_attrs,
2009 	.name = "hotplug",
2010 	NULL
2011 };
2012 
2013 #ifdef CONFIG_HOTPLUG_SMT
2014 
2015 static const char *smt_states[] = {
2016 	[CPU_SMT_ENABLED]		= "on",
2017 	[CPU_SMT_DISABLED]		= "off",
2018 	[CPU_SMT_FORCE_DISABLED]	= "forceoff",
2019 	[CPU_SMT_NOT_SUPPORTED]		= "notsupported",
2020 };
2021 
2022 static ssize_t
2023 show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
2024 {
2025 	return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
2026 }
2027 
2028 static void cpuhp_offline_cpu_device(unsigned int cpu)
2029 {
2030 	struct device *dev = get_cpu_device(cpu);
2031 
2032 	dev->offline = true;
2033 	/* Tell user space about the state change */
2034 	kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2035 }
2036 
2037 static void cpuhp_online_cpu_device(unsigned int cpu)
2038 {
2039 	struct device *dev = get_cpu_device(cpu);
2040 
2041 	dev->offline = false;
2042 	/* Tell user space about the state change */
2043 	kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2044 }
2045 
2046 static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2047 {
2048 	int cpu, ret = 0;
2049 
2050 	cpu_maps_update_begin();
2051 	for_each_online_cpu(cpu) {
2052 		if (topology_is_primary_thread(cpu))
2053 			continue;
2054 		ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2055 		if (ret)
2056 			break;
2057 		/*
2058 		 * As this needs to hold the cpu maps lock it's impossible
2059 		 * to call device_offline() because that ends up calling
2060 		 * cpu_down() which takes cpu maps lock. cpu maps lock
2061 		 * needs to be held as this might race against in kernel
2062 		 * abusers of the hotplug machinery (thermal management).
2063 		 *
2064 		 * So nothing would update device:offline state. That would
2065 		 * leave the sysfs entry stale and prevent onlining after
2066 		 * smt control has been changed to 'off' again. This is
2067 		 * called under the sysfs hotplug lock, so it is properly
2068 		 * serialized against the regular offline usage.
2069 		 */
2070 		cpuhp_offline_cpu_device(cpu);
2071 	}
2072 	if (!ret)
2073 		cpu_smt_control = ctrlval;
2074 	cpu_maps_update_done();
2075 	return ret;
2076 }
2077 
2078 static int cpuhp_smt_enable(void)
2079 {
2080 	int cpu, ret = 0;
2081 
2082 	cpu_maps_update_begin();
2083 	cpu_smt_control = CPU_SMT_ENABLED;
2084 	for_each_present_cpu(cpu) {
2085 		/* Skip online CPUs and CPUs on offline nodes */
2086 		if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2087 			continue;
2088 		ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2089 		if (ret)
2090 			break;
2091 		/* See comment in cpuhp_smt_disable() */
2092 		cpuhp_online_cpu_device(cpu);
2093 	}
2094 	cpu_maps_update_done();
2095 	return ret;
2096 }
2097 
2098 static ssize_t
2099 store_smt_control(struct device *dev, struct device_attribute *attr,
2100 		  const char *buf, size_t count)
2101 {
2102 	int ctrlval, ret;
2103 
2104 	if (sysfs_streq(buf, "on"))
2105 		ctrlval = CPU_SMT_ENABLED;
2106 	else if (sysfs_streq(buf, "off"))
2107 		ctrlval = CPU_SMT_DISABLED;
2108 	else if (sysfs_streq(buf, "forceoff"))
2109 		ctrlval = CPU_SMT_FORCE_DISABLED;
2110 	else
2111 		return -EINVAL;
2112 
2113 	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2114 		return -EPERM;
2115 
2116 	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2117 		return -ENODEV;
2118 
2119 	ret = lock_device_hotplug_sysfs();
2120 	if (ret)
2121 		return ret;
2122 
2123 	if (ctrlval != cpu_smt_control) {
2124 		switch (ctrlval) {
2125 		case CPU_SMT_ENABLED:
2126 			ret = cpuhp_smt_enable();
2127 			break;
2128 		case CPU_SMT_DISABLED:
2129 		case CPU_SMT_FORCE_DISABLED:
2130 			ret = cpuhp_smt_disable(ctrlval);
2131 			break;
2132 		}
2133 	}
2134 
2135 	unlock_device_hotplug();
2136 	return ret ? ret : count;
2137 }
2138 static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2139 
2140 static ssize_t
2141 show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
2142 {
2143 	bool active = topology_max_smt_threads() > 1;
2144 
2145 	return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
2146 }
2147 static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2148 
2149 static struct attribute *cpuhp_smt_attrs[] = {
2150 	&dev_attr_control.attr,
2151 	&dev_attr_active.attr,
2152 	NULL
2153 };
2154 
2155 static const struct attribute_group cpuhp_smt_attr_group = {
2156 	.attrs = cpuhp_smt_attrs,
2157 	.name = "smt",
2158 	NULL
2159 };
2160 
2161 static int __init cpu_smt_state_init(void)
2162 {
2163 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2164 				  &cpuhp_smt_attr_group);
2165 }
2166 
2167 #else
2168 static inline int cpu_smt_state_init(void) { return 0; }
2169 #endif
2170 
2171 static int __init cpuhp_sysfs_init(void)
2172 {
2173 	int cpu, ret;
2174 
2175 	ret = cpu_smt_state_init();
2176 	if (ret)
2177 		return ret;
2178 
2179 	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
2180 				 &cpuhp_cpu_root_attr_group);
2181 	if (ret)
2182 		return ret;
2183 
2184 	for_each_possible_cpu(cpu) {
2185 		struct device *dev = get_cpu_device(cpu);
2186 
2187 		if (!dev)
2188 			continue;
2189 		ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2190 		if (ret)
2191 			return ret;
2192 	}
2193 	return 0;
2194 }
2195 device_initcall(cpuhp_sysfs_init);
2196 #endif
2197 
2198 /*
2199  * cpu_bit_bitmap[] is a special, "compressed" data structure that
2200  * represents all NR_CPUS bits binary values of 1<<nr.
2201  *
2202  * It is used by cpumask_of() to get a constant address to a CPU
2203  * mask value that has a single bit set only.
2204  */
2205 
2206 /* cpu_bit_bitmap[0] is empty - so we can back into it */
2207 #define MASK_DECLARE_1(x)	[x+1][0] = (1UL << (x))
2208 #define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
2209 #define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
2210 #define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
2211 
2212 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
2213 
2214 	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
2215 	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
2216 #if BITS_PER_LONG > 32
2217 	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
2218 	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
2219 #endif
2220 };
2221 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2222 
2223 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
2224 EXPORT_SYMBOL(cpu_all_bits);
2225 
2226 #ifdef CONFIG_INIT_ALL_POSSIBLE
2227 struct cpumask __cpu_possible_mask __read_mostly
2228 	= {CPU_BITS_ALL};
2229 #else
2230 struct cpumask __cpu_possible_mask __read_mostly;
2231 #endif
2232 EXPORT_SYMBOL(__cpu_possible_mask);
2233 
2234 struct cpumask __cpu_online_mask __read_mostly;
2235 EXPORT_SYMBOL(__cpu_online_mask);
2236 
2237 struct cpumask __cpu_present_mask __read_mostly;
2238 EXPORT_SYMBOL(__cpu_present_mask);
2239 
2240 struct cpumask __cpu_active_mask __read_mostly;
2241 EXPORT_SYMBOL(__cpu_active_mask);
2242 
2243 void init_cpu_present(const struct cpumask *src)
2244 {
2245 	cpumask_copy(&__cpu_present_mask, src);
2246 }
2247 
2248 void init_cpu_possible(const struct cpumask *src)
2249 {
2250 	cpumask_copy(&__cpu_possible_mask, src);
2251 }
2252 
2253 void init_cpu_online(const struct cpumask *src)
2254 {
2255 	cpumask_copy(&__cpu_online_mask, src);
2256 }
2257 
2258 /*
2259  * Activate the first processor.
2260  */
2261 void __init boot_cpu_init(void)
2262 {
2263 	int cpu = smp_processor_id();
2264 
2265 	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
2266 	set_cpu_online(cpu, true);
2267 	set_cpu_active(cpu, true);
2268 	set_cpu_present(cpu, true);
2269 	set_cpu_possible(cpu, true);
2270 
2271 #ifdef CONFIG_SMP
2272 	__boot_cpu_id = cpu;
2273 #endif
2274 }
2275 
2276 /*
2277  * Must be called _AFTER_ setting up the per_cpu areas
2278  */
2279 void __init boot_cpu_hotplug_init(void)
2280 {
2281 #ifdef CONFIG_SMP
2282 	this_cpu_write(cpuhp_state.booted_once, true);
2283 #endif
2284 	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2285 }
2286