xref: /openbmc/linux/arch/powerpc/platforms/pseries/hotplug-cpu.c (revision 4ed91d48259d9ddd378424d008f2e6559f7e78f8)
1 /*
2  * pseries CPU Hotplug infrastructure.
3  *
4  * Split out from arch/powerpc/platforms/pseries/setup.c
5  *  arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
6  *
7  * Peter Bergner, IBM	March 2001.
8  * Copyright (C) 2001 IBM.
9  * Dave Engebretsen, Peter Bergner, and
10  * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
11  * Plus various changes from other IBM teams...
12  *
13  * Copyright (C) 2006 Michael Ellerman, IBM Corporation
14  *
15  *      This program is free software; you can redistribute it and/or
16  *      modify it under the terms of the GNU General Public License
17  *      as published by the Free Software Foundation; either version
18  *      2 of the License, or (at your option) any later version.
19  */
20 
21 #define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt
22 
23 #include <linux/kernel.h>
24 #include <linux/interrupt.h>
25 #include <linux/delay.h>
26 #include <linux/sched.h>	/* for idle_task_exit */
27 #include <linux/sched/hotplug.h>
28 #include <linux/cpu.h>
29 #include <linux/of.h>
30 #include <linux/slab.h>
31 #include <asm/prom.h>
32 #include <asm/rtas.h>
33 #include <asm/firmware.h>
34 #include <asm/machdep.h>
35 #include <asm/vdso_datapage.h>
36 #include <asm/xics.h>
37 #include <asm/plpar_wrappers.h>
38 
39 #include "pseries.h"
40 #include "offline_states.h"
41 
42 /* This version can't take the spinlock, because it never returns */
43 static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
44 
45 static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
46 							CPU_STATE_OFFLINE;
47 static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
48 
49 static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
50 
51 static bool cede_offline_enabled __read_mostly = true;
52 
53 /*
54  * Enable/disable cede_offline when available.
55  */
56 static int __init setup_cede_offline(char *str)
57 {
58 	return (kstrtobool(str, &cede_offline_enabled) == 0);
59 }
60 
61 __setup("cede_offline=", setup_cede_offline);
62 
63 enum cpu_state_vals get_cpu_current_state(int cpu)
64 {
65 	return per_cpu(current_state, cpu);
66 }
67 
68 void set_cpu_current_state(int cpu, enum cpu_state_vals state)
69 {
70 	per_cpu(current_state, cpu) = state;
71 }
72 
73 enum cpu_state_vals get_preferred_offline_state(int cpu)
74 {
75 	return per_cpu(preferred_offline_state, cpu);
76 }
77 
78 void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
79 {
80 	per_cpu(preferred_offline_state, cpu) = state;
81 }
82 
83 void set_default_offline_state(int cpu)
84 {
85 	per_cpu(preferred_offline_state, cpu) = default_offline_state;
86 }
87 
88 static void rtas_stop_self(void)
89 {
90 	static struct rtas_args args;
91 
92 	local_irq_disable();
93 
94 	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
95 
96 	printk("cpu %u (hwid %u) Ready to die...\n",
97 	       smp_processor_id(), hard_smp_processor_id());
98 
99 	rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
100 
101 	panic("Alas, I survived.\n");
102 }
103 
104 static void pseries_mach_cpu_die(void)
105 {
106 	unsigned int cpu = smp_processor_id();
107 	unsigned int hwcpu = hard_smp_processor_id();
108 	u8 cede_latency_hint = 0;
109 
110 	local_irq_disable();
111 	idle_task_exit();
112 	xics_teardown_cpu();
113 
114 	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
115 		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
116 		if (ppc_md.suspend_disable_cpu)
117 			ppc_md.suspend_disable_cpu();
118 
119 		cede_latency_hint = 2;
120 
121 		get_lppaca()->idle = 1;
122 		if (!lppaca_shared_proc(get_lppaca()))
123 			get_lppaca()->donate_dedicated_cpu = 1;
124 
125 		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
126 			while (!prep_irq_for_idle()) {
127 				local_irq_enable();
128 				local_irq_disable();
129 			}
130 
131 			extended_cede_processor(cede_latency_hint);
132 		}
133 
134 		local_irq_disable();
135 
136 		if (!lppaca_shared_proc(get_lppaca()))
137 			get_lppaca()->donate_dedicated_cpu = 0;
138 		get_lppaca()->idle = 0;
139 
140 		if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
141 			unregister_slb_shadow(hwcpu);
142 
143 			hard_irq_disable();
144 			/*
145 			 * Call to start_secondary_resume() will not return.
146 			 * Kernel stack will be reset and start_secondary()
147 			 * will be called to continue the online operation.
148 			 */
149 			start_secondary_resume();
150 		}
151 	}
152 
153 	/* Requested state is CPU_STATE_OFFLINE at this point */
154 	WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
155 
156 	set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
157 	unregister_slb_shadow(hwcpu);
158 	rtas_stop_self();
159 
160 	/* Should never get here... */
161 	BUG();
162 	for(;;);
163 }
164 
165 static int pseries_cpu_disable(void)
166 {
167 	int cpu = smp_processor_id();
168 
169 	set_cpu_online(cpu, false);
170 	vdso_data->processorCount--;
171 
172 	/*fix boot_cpuid here*/
173 	if (cpu == boot_cpuid)
174 		boot_cpuid = cpumask_any(cpu_online_mask);
175 
176 	/* FIXME: abstract this to not be platform specific later on */
177 	xics_migrate_irqs_away();
178 	return 0;
179 }
180 
181 /*
182  * pseries_cpu_die: Wait for the cpu to die.
183  * @cpu: logical processor id of the CPU whose death we're awaiting.
184  *
185  * This function is called from the context of the thread which is performing
186  * the cpu-offline. Here we wait for long enough to allow the cpu in question
187  * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
188  * notifications.
189  *
190  * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
191  * self-destruct.
192  */
193 static void pseries_cpu_die(unsigned int cpu)
194 {
195 	int tries;
196 	int cpu_status = 1;
197 	unsigned int pcpu = get_hard_smp_processor_id(cpu);
198 
199 	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
200 		cpu_status = 1;
201 		for (tries = 0; tries < 5000; tries++) {
202 			if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
203 				cpu_status = 0;
204 				break;
205 			}
206 			msleep(1);
207 		}
208 	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
209 
210 		for (tries = 0; tries < 25; tries++) {
211 			cpu_status = smp_query_cpu_stopped(pcpu);
212 			if (cpu_status == QCSS_STOPPED ||
213 			    cpu_status == QCSS_HARDWARE_ERROR)
214 				break;
215 			cpu_relax();
216 		}
217 	}
218 
219 	if (cpu_status != 0) {
220 		printk("Querying DEAD? cpu %i (%i) shows %i\n",
221 		       cpu, pcpu, cpu_status);
222 	}
223 
224 	/* Isolation and deallocation are definitely done by
225 	 * drslot_chrp_cpu.  If they were not they would be
226 	 * done here.  Change isolate state to Isolate and
227 	 * change allocation-state to Unusable.
228 	 */
229 	paca[cpu].cpu_start = 0;
230 }
231 
232 /*
233  * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
234  * here is that a cpu device node may represent up to two logical cpus
235  * in the SMT case.  We must honor the assumption in other code that
236  * the logical ids for sibling SMT threads x and y are adjacent, such
237  * that x^1 == y and y^1 == x.
238  */
239 static int pseries_add_processor(struct device_node *np)
240 {
241 	unsigned int cpu;
242 	cpumask_var_t candidate_mask, tmp;
243 	int err = -ENOSPC, len, nthreads, i;
244 	const __be32 *intserv;
245 
246 	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
247 	if (!intserv)
248 		return 0;
249 
250 	zalloc_cpumask_var(&candidate_mask, GFP_KERNEL);
251 	zalloc_cpumask_var(&tmp, GFP_KERNEL);
252 
253 	nthreads = len / sizeof(u32);
254 	for (i = 0; i < nthreads; i++)
255 		cpumask_set_cpu(i, tmp);
256 
257 	cpu_maps_update_begin();
258 
259 	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
260 
261 	/* Get a bitmap of unoccupied slots. */
262 	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
263 	if (cpumask_empty(candidate_mask)) {
264 		/* If we get here, it most likely means that NR_CPUS is
265 		 * less than the partition's max processors setting.
266 		 */
267 		printk(KERN_ERR "Cannot add cpu %s; this system configuration"
268 		       " supports %d logical cpus.\n", np->full_name,
269 		       num_possible_cpus());
270 		goto out_unlock;
271 	}
272 
273 	while (!cpumask_empty(tmp))
274 		if (cpumask_subset(tmp, candidate_mask))
275 			/* Found a range where we can insert the new cpu(s) */
276 			break;
277 		else
278 			cpumask_shift_left(tmp, tmp, nthreads);
279 
280 	if (cpumask_empty(tmp)) {
281 		printk(KERN_ERR "Unable to find space in cpu_present_mask for"
282 		       " processor %s with %d thread(s)\n", np->name,
283 		       nthreads);
284 		goto out_unlock;
285 	}
286 
287 	for_each_cpu(cpu, tmp) {
288 		BUG_ON(cpu_present(cpu));
289 		set_cpu_present(cpu, true);
290 		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
291 	}
292 	err = 0;
293 out_unlock:
294 	cpu_maps_update_done();
295 	free_cpumask_var(candidate_mask);
296 	free_cpumask_var(tmp);
297 	return err;
298 }
299 
300 /*
301  * Update the present map for a cpu node which is going away, and set
302  * the hard id in the paca(s) to -1 to be consistent with boot time
303  * convention for non-present cpus.
304  */
305 static void pseries_remove_processor(struct device_node *np)
306 {
307 	unsigned int cpu;
308 	int len, nthreads, i;
309 	const __be32 *intserv;
310 	u32 thread;
311 
312 	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
313 	if (!intserv)
314 		return;
315 
316 	nthreads = len / sizeof(u32);
317 
318 	cpu_maps_update_begin();
319 	for (i = 0; i < nthreads; i++) {
320 		thread = be32_to_cpu(intserv[i]);
321 		for_each_present_cpu(cpu) {
322 			if (get_hard_smp_processor_id(cpu) != thread)
323 				continue;
324 			BUG_ON(cpu_online(cpu));
325 			set_cpu_present(cpu, false);
326 			set_hard_smp_processor_id(cpu, -1);
327 			break;
328 		}
329 		if (cpu >= nr_cpu_ids)
330 			printk(KERN_WARNING "Could not find cpu to remove "
331 			       "with physical id 0x%x\n", thread);
332 	}
333 	cpu_maps_update_done();
334 }
335 
336 static int dlpar_online_cpu(struct device_node *dn)
337 {
338 	int rc = 0;
339 	unsigned int cpu;
340 	int len, nthreads, i;
341 	const __be32 *intserv;
342 	u32 thread;
343 
344 	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
345 	if (!intserv)
346 		return -EINVAL;
347 
348 	nthreads = len / sizeof(u32);
349 
350 	cpu_maps_update_begin();
351 	for (i = 0; i < nthreads; i++) {
352 		thread = be32_to_cpu(intserv[i]);
353 		for_each_present_cpu(cpu) {
354 			if (get_hard_smp_processor_id(cpu) != thread)
355 				continue;
356 			BUG_ON(get_cpu_current_state(cpu)
357 					!= CPU_STATE_OFFLINE);
358 			cpu_maps_update_done();
359 			rc = device_online(get_cpu_device(cpu));
360 			if (rc)
361 				goto out;
362 			cpu_maps_update_begin();
363 
364 			break;
365 		}
366 		if (cpu == num_possible_cpus())
367 			printk(KERN_WARNING "Could not find cpu to online "
368 			       "with physical id 0x%x\n", thread);
369 	}
370 	cpu_maps_update_done();
371 
372 out:
373 	return rc;
374 
375 }
376 
377 static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
378 {
379 	struct device_node *child = NULL;
380 	u32 my_drc_index;
381 	bool found;
382 	int rc;
383 
384 	/* Assume cpu doesn't exist */
385 	found = false;
386 
387 	for_each_child_of_node(parent, child) {
388 		rc = of_property_read_u32(child, "ibm,my-drc-index",
389 					  &my_drc_index);
390 		if (rc)
391 			continue;
392 
393 		if (my_drc_index == drc_index) {
394 			of_node_put(child);
395 			found = true;
396 			break;
397 		}
398 	}
399 
400 	return found;
401 }
402 
403 static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
404 {
405 	bool found = false;
406 	int rc, index;
407 
408 	index = 0;
409 	while (!found) {
410 		u32 drc;
411 
412 		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
413 						index++, &drc);
414 		if (rc)
415 			break;
416 
417 		if (drc == drc_index)
418 			found = true;
419 	}
420 
421 	return found;
422 }
423 
424 static ssize_t dlpar_cpu_add(u32 drc_index)
425 {
426 	struct device_node *dn, *parent;
427 	int rc, saved_rc;
428 
429 	pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);
430 
431 	parent = of_find_node_by_path("/cpus");
432 	if (!parent) {
433 		pr_warn("Failed to find CPU root node \"/cpus\"\n");
434 		return -ENODEV;
435 	}
436 
437 	if (dlpar_cpu_exists(parent, drc_index)) {
438 		of_node_put(parent);
439 		pr_warn("CPU with drc index %x already exists\n", drc_index);
440 		return -EINVAL;
441 	}
442 
443 	if (!valid_cpu_drc_index(parent, drc_index)) {
444 		of_node_put(parent);
445 		pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);
446 		return -EINVAL;
447 	}
448 
449 	rc = dlpar_acquire_drc(drc_index);
450 	if (rc) {
451 		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
452 			rc, drc_index);
453 		of_node_put(parent);
454 		return -EINVAL;
455 	}
456 
457 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
458 	of_node_put(parent);
459 	if (!dn) {
460 		pr_warn("Failed call to configure-connector, drc index: %x\n",
461 			drc_index);
462 		dlpar_release_drc(drc_index);
463 		return -EINVAL;
464 	}
465 
466 	rc = dlpar_attach_node(dn);
467 	if (rc) {
468 		saved_rc = rc;
469 		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
470 			dn->name, rc, drc_index);
471 
472 		rc = dlpar_release_drc(drc_index);
473 		if (!rc)
474 			dlpar_free_cc_nodes(dn);
475 
476 		return saved_rc;
477 	}
478 
479 	rc = dlpar_online_cpu(dn);
480 	if (rc) {
481 		saved_rc = rc;
482 		pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n",
483 			dn->name, rc, drc_index);
484 
485 		rc = dlpar_detach_node(dn);
486 		if (!rc)
487 			dlpar_release_drc(drc_index);
488 
489 		return saved_rc;
490 	}
491 
492 	pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name,
493 		 drc_index);
494 	return rc;
495 }
496 
497 static int dlpar_offline_cpu(struct device_node *dn)
498 {
499 	int rc = 0;
500 	unsigned int cpu;
501 	int len, nthreads, i;
502 	const __be32 *intserv;
503 	u32 thread;
504 
505 	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
506 	if (!intserv)
507 		return -EINVAL;
508 
509 	nthreads = len / sizeof(u32);
510 
511 	cpu_maps_update_begin();
512 	for (i = 0; i < nthreads; i++) {
513 		thread = be32_to_cpu(intserv[i]);
514 		for_each_present_cpu(cpu) {
515 			if (get_hard_smp_processor_id(cpu) != thread)
516 				continue;
517 
518 			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
519 				break;
520 
521 			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
522 				set_preferred_offline_state(cpu,
523 							    CPU_STATE_OFFLINE);
524 				cpu_maps_update_done();
525 				rc = device_offline(get_cpu_device(cpu));
526 				if (rc)
527 					goto out;
528 				cpu_maps_update_begin();
529 				break;
530 
531 			}
532 
533 			/*
534 			 * The cpu is in CPU_STATE_INACTIVE.
535 			 * Upgrade it's state to CPU_STATE_OFFLINE.
536 			 */
537 			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
538 			BUG_ON(plpar_hcall_norets(H_PROD, thread)
539 								!= H_SUCCESS);
540 			__cpu_die(cpu);
541 			break;
542 		}
543 		if (cpu == num_possible_cpus())
544 			printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
545 	}
546 	cpu_maps_update_done();
547 
548 out:
549 	return rc;
550 
551 }
552 
553 static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
554 {
555 	int rc;
556 
557 	pr_debug("Attemping to remove CPU %s, drc index: %x\n",
558 		 dn->name, drc_index);
559 
560 	rc = dlpar_offline_cpu(dn);
561 	if (rc) {
562 		pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc);
563 		return -EINVAL;
564 	}
565 
566 	rc = dlpar_release_drc(drc_index);
567 	if (rc) {
568 		pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
569 			drc_index, dn->name, rc);
570 		dlpar_online_cpu(dn);
571 		return rc;
572 	}
573 
574 	rc = dlpar_detach_node(dn);
575 	if (rc) {
576 		int saved_rc = rc;
577 
578 		pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);
579 
580 		rc = dlpar_acquire_drc(drc_index);
581 		if (!rc)
582 			dlpar_online_cpu(dn);
583 
584 		return saved_rc;
585 	}
586 
587 	pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);
588 	return 0;
589 }
590 
591 static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
592 {
593 	struct device_node *dn;
594 	u32 my_index;
595 	int rc;
596 
597 	for_each_node_by_type(dn, "cpu") {
598 		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
599 		if (rc)
600 			continue;
601 
602 		if (my_index == drc_index)
603 			break;
604 	}
605 
606 	return dn;
607 }
608 
609 static int dlpar_cpu_remove_by_index(u32 drc_index)
610 {
611 	struct device_node *dn;
612 	int rc;
613 
614 	dn = cpu_drc_index_to_dn(drc_index);
615 	if (!dn) {
616 		pr_warn("Cannot find CPU (drc index %x) to remove\n",
617 			drc_index);
618 		return -ENODEV;
619 	}
620 
621 	rc = dlpar_cpu_remove(dn, drc_index);
622 	of_node_put(dn);
623 	return rc;
624 }
625 
626 static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
627 {
628 	struct device_node *dn;
629 	int cpus_found = 0;
630 	int rc;
631 
632 	/* We want to find cpus_to_remove + 1 CPUs to ensure we do not
633 	 * remove the last CPU.
634 	 */
635 	for_each_node_by_type(dn, "cpu") {
636 		cpus_found++;
637 
638 		if (cpus_found > cpus_to_remove) {
639 			of_node_put(dn);
640 			break;
641 		}
642 
643 		/* Note that cpus_found is always 1 ahead of the index
644 		 * into the cpu_drcs array, so we use cpus_found - 1
645 		 */
646 		rc = of_property_read_u32(dn, "ibm,my-drc-index",
647 					  &cpu_drcs[cpus_found - 1]);
648 		if (rc) {
649 			pr_warn("Error occurred getting drc-index for %s\n",
650 				dn->name);
651 			of_node_put(dn);
652 			return -1;
653 		}
654 	}
655 
656 	if (cpus_found < cpus_to_remove) {
657 		pr_warn("Failed to find enough CPUs (%d of %d) to remove\n",
658 			cpus_found, cpus_to_remove);
659 	} else if (cpus_found == cpus_to_remove) {
660 		pr_warn("Cannot remove all CPUs\n");
661 	}
662 
663 	return cpus_found;
664 }
665 
666 static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
667 {
668 	u32 *cpu_drcs;
669 	int cpus_found;
670 	int cpus_removed = 0;
671 	int i, rc;
672 
673 	pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove);
674 
675 	cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL);
676 	if (!cpu_drcs)
677 		return -EINVAL;
678 
679 	cpus_found = find_dlpar_cpus_to_remove(cpu_drcs, cpus_to_remove);
680 	if (cpus_found <= cpus_to_remove) {
681 		kfree(cpu_drcs);
682 		return -EINVAL;
683 	}
684 
685 	for (i = 0; i < cpus_to_remove; i++) {
686 		rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
687 		if (rc)
688 			break;
689 
690 		cpus_removed++;
691 	}
692 
693 	if (cpus_removed != cpus_to_remove) {
694 		pr_warn("CPU hot-remove failed, adding back removed CPUs\n");
695 
696 		for (i = 0; i < cpus_removed; i++)
697 			dlpar_cpu_add(cpu_drcs[i]);
698 
699 		rc = -EINVAL;
700 	} else {
701 		rc = 0;
702 	}
703 
704 	kfree(cpu_drcs);
705 	return rc;
706 }
707 
708 static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
709 {
710 	struct device_node *parent;
711 	int cpus_found = 0;
712 	int index, rc;
713 
714 	parent = of_find_node_by_path("/cpus");
715 	if (!parent) {
716 		pr_warn("Could not find CPU root node in device tree\n");
717 		kfree(cpu_drcs);
718 		return -1;
719 	}
720 
721 	/* Search the ibm,drc-indexes array for possible CPU drcs to
722 	 * add. Note that the format of the ibm,drc-indexes array is
723 	 * the number of entries in the array followed by the array
724 	 * of drc values so we start looking at index = 1.
725 	 */
726 	index = 1;
727 	while (cpus_found < cpus_to_add) {
728 		u32 drc;
729 
730 		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
731 						index++, &drc);
732 		if (rc)
733 			break;
734 
735 		if (dlpar_cpu_exists(parent, drc))
736 			continue;
737 
738 		cpu_drcs[cpus_found++] = drc;
739 	}
740 
741 	of_node_put(parent);
742 	return cpus_found;
743 }
744 
745 static int dlpar_cpu_add_by_count(u32 cpus_to_add)
746 {
747 	u32 *cpu_drcs;
748 	int cpus_added = 0;
749 	int cpus_found;
750 	int i, rc;
751 
752 	pr_debug("Attempting to hot-add %d CPUs\n", cpus_to_add);
753 
754 	cpu_drcs = kcalloc(cpus_to_add, sizeof(*cpu_drcs), GFP_KERNEL);
755 	if (!cpu_drcs)
756 		return -EINVAL;
757 
758 	cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add);
759 	if (cpus_found < cpus_to_add) {
760 		pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
761 			cpus_found, cpus_to_add);
762 		kfree(cpu_drcs);
763 		return -EINVAL;
764 	}
765 
766 	for (i = 0; i < cpus_to_add; i++) {
767 		rc = dlpar_cpu_add(cpu_drcs[i]);
768 		if (rc)
769 			break;
770 
771 		cpus_added++;
772 	}
773 
774 	if (cpus_added < cpus_to_add) {
775 		pr_warn("CPU hot-add failed, removing any added CPUs\n");
776 
777 		for (i = 0; i < cpus_added; i++)
778 			dlpar_cpu_remove_by_index(cpu_drcs[i]);
779 
780 		rc = -EINVAL;
781 	} else {
782 		rc = 0;
783 	}
784 
785 	kfree(cpu_drcs);
786 	return rc;
787 }
788 
789 int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
790 {
791 	u32 count, drc_index;
792 	int rc;
793 
794 	count = hp_elog->_drc_u.drc_count;
795 	drc_index = hp_elog->_drc_u.drc_index;
796 
797 	lock_device_hotplug();
798 
799 	switch (hp_elog->action) {
800 	case PSERIES_HP_ELOG_ACTION_REMOVE:
801 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
802 			rc = dlpar_cpu_remove_by_count(count);
803 		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
804 			rc = dlpar_cpu_remove_by_index(drc_index);
805 		else
806 			rc = -EINVAL;
807 		break;
808 	case PSERIES_HP_ELOG_ACTION_ADD:
809 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
810 			rc = dlpar_cpu_add_by_count(count);
811 		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
812 			rc = dlpar_cpu_add(drc_index);
813 		else
814 			rc = -EINVAL;
815 		break;
816 	default:
817 		pr_err("Invalid action (%d) specified\n", hp_elog->action);
818 		rc = -EINVAL;
819 		break;
820 	}
821 
822 	unlock_device_hotplug();
823 	return rc;
824 }
825 
826 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
827 
828 static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
829 {
830 	u32 drc_index;
831 	int rc;
832 
833 	rc = kstrtou32(buf, 0, &drc_index);
834 	if (rc)
835 		return -EINVAL;
836 
837 	rc = dlpar_cpu_add(drc_index);
838 
839 	return rc ? rc : count;
840 }
841 
842 static ssize_t dlpar_cpu_release(const char *buf, size_t count)
843 {
844 	struct device_node *dn;
845 	u32 drc_index;
846 	int rc;
847 
848 	dn = of_find_node_by_path(buf);
849 	if (!dn)
850 		return -EINVAL;
851 
852 	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
853 	if (rc) {
854 		of_node_put(dn);
855 		return -EINVAL;
856 	}
857 
858 	rc = dlpar_cpu_remove(dn, drc_index);
859 	of_node_put(dn);
860 
861 	return rc ? rc : count;
862 }
863 
864 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
865 
866 static int pseries_smp_notifier(struct notifier_block *nb,
867 				unsigned long action, void *data)
868 {
869 	struct of_reconfig_data *rd = data;
870 	int err = 0;
871 
872 	switch (action) {
873 	case OF_RECONFIG_ATTACH_NODE:
874 		err = pseries_add_processor(rd->dn);
875 		break;
876 	case OF_RECONFIG_DETACH_NODE:
877 		pseries_remove_processor(rd->dn);
878 		break;
879 	}
880 	return notifier_from_errno(err);
881 }
882 
883 static struct notifier_block pseries_smp_nb = {
884 	.notifier_call = pseries_smp_notifier,
885 };
886 
887 #define MAX_CEDE_LATENCY_LEVELS		4
888 #define	CEDE_LATENCY_PARAM_LENGTH	10
889 #define CEDE_LATENCY_PARAM_MAX_LENGTH	\
890 	(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
891 #define CEDE_LATENCY_TOKEN		45
892 
893 static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];
894 
895 static int parse_cede_parameters(void)
896 {
897 	memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
898 	return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
899 			 NULL,
900 			 CEDE_LATENCY_TOKEN,
901 			 __pa(cede_parameters),
902 			 CEDE_LATENCY_PARAM_MAX_LENGTH);
903 }
904 
905 static int __init pseries_cpu_hotplug_init(void)
906 {
907 	int cpu;
908 	int qcss_tok;
909 
910 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
911 	ppc_md.cpu_probe = dlpar_cpu_probe;
912 	ppc_md.cpu_release = dlpar_cpu_release;
913 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
914 
915 	rtas_stop_self_token = rtas_token("stop-self");
916 	qcss_tok = rtas_token("query-cpu-stopped-state");
917 
918 	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
919 			qcss_tok == RTAS_UNKNOWN_SERVICE) {
920 		printk(KERN_INFO "CPU Hotplug not supported by firmware "
921 				"- disabling.\n");
922 		return 0;
923 	}
924 
925 	ppc_md.cpu_die = pseries_mach_cpu_die;
926 	smp_ops->cpu_disable = pseries_cpu_disable;
927 	smp_ops->cpu_die = pseries_cpu_die;
928 
929 	/* Processors can be added/removed only on LPAR */
930 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
931 		of_reconfig_notifier_register(&pseries_smp_nb);
932 		cpu_maps_update_begin();
933 		if (cede_offline_enabled && parse_cede_parameters() == 0) {
934 			default_offline_state = CPU_STATE_INACTIVE;
935 			for_each_online_cpu(cpu)
936 				set_default_offline_state(cpu);
937 		}
938 		cpu_maps_update_done();
939 	}
940 
941 	return 0;
942 }
943 machine_arch_initcall(pseries, pseries_cpu_hotplug_init);
944