1 /*
2  * PowerNV OPAL high level interfaces
3  *
4  * Copyright 2011 IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #define pr_fmt(fmt)	"opal: " fmt
13 
14 #include <linux/printk.h>
15 #include <linux/types.h>
16 #include <linux/of.h>
17 #include <linux/of_fdt.h>
18 #include <linux/of_platform.h>
19 #include <linux/interrupt.h>
20 #include <linux/notifier.h>
21 #include <linux/slab.h>
22 #include <linux/sched.h>
23 #include <linux/kobject.h>
24 #include <linux/delay.h>
25 #include <linux/memblock.h>
26 #include <linux/kthread.h>
27 #include <linux/freezer.h>
28 
29 #include <asm/machdep.h>
30 #include <asm/opal.h>
31 #include <asm/firmware.h>
32 #include <asm/mce.h>
33 
34 #include "powernv.h"
35 
36 /* /sys/firmware/opal */
37 struct kobject *opal_kobj;
38 
39 struct opal {
40 	u64 base;
41 	u64 entry;
42 	u64 size;
43 } opal;
44 
45 struct mcheck_recoverable_range {
46 	u64 start_addr;
47 	u64 end_addr;
48 	u64 recover_addr;
49 };
50 
51 static struct mcheck_recoverable_range *mc_recoverable_range;
52 static int mc_recoverable_range_len;
53 
54 struct device_node *opal_node;
55 static DEFINE_SPINLOCK(opal_write_lock);
56 static unsigned int *opal_irqs;
57 static unsigned int opal_irq_count;
58 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
59 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
60 static DEFINE_SPINLOCK(opal_notifier_lock);
61 static uint64_t last_notified_mask = 0x0ul;
62 static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
63 static uint32_t opal_heartbeat;
64 
65 static void opal_reinit_cores(void)
66 {
67 	/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
68 	 *
69 	 * It will preserve non volatile GPRs and HSPRG0/1. It will
70 	 * also restore HIDs and other SPRs to their original value
71 	 * but it might clobber a bunch.
72 	 */
73 #ifdef __BIG_ENDIAN__
74 	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
75 #else
76 	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
77 #endif
78 }
79 
80 int __init early_init_dt_scan_opal(unsigned long node,
81 				   const char *uname, int depth, void *data)
82 {
83 	const void *basep, *entryp, *sizep;
84 	int basesz, entrysz, runtimesz;
85 
86 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
87 		return 0;
88 
89 	basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
90 	entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
91 	sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
92 
93 	if (!basep || !entryp || !sizep)
94 		return 1;
95 
96 	opal.base = of_read_number(basep, basesz/4);
97 	opal.entry = of_read_number(entryp, entrysz/4);
98 	opal.size = of_read_number(sizep, runtimesz/4);
99 
100 	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
101 		 opal.base, basep, basesz);
102 	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
103 		 opal.entry, entryp, entrysz);
104 	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
105 		 opal.size, sizep, runtimesz);
106 
107 	powerpc_firmware_features |= FW_FEATURE_OPAL;
108 	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
109 		powerpc_firmware_features |= FW_FEATURE_OPALv2;
110 		powerpc_firmware_features |= FW_FEATURE_OPALv3;
111 		pr_info("OPAL V3 detected !\n");
112 	} else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
113 		powerpc_firmware_features |= FW_FEATURE_OPALv2;
114 		pr_info("OPAL V2 detected !\n");
115 	} else {
116 		pr_info("OPAL V1 detected !\n");
117 	}
118 
119 	/* Reinit all cores with the right endian */
120 	opal_reinit_cores();
121 
122 	/* Restore some bits */
123 	if (cur_cpu_spec->cpu_restore)
124 		cur_cpu_spec->cpu_restore();
125 
126 	return 1;
127 }
128 
129 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
130 				   const char *uname, int depth, void *data)
131 {
132 	int i, psize, size;
133 	const __be32 *prop;
134 
135 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
136 		return 0;
137 
138 	prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
139 
140 	if (!prop)
141 		return 1;
142 
143 	pr_debug("Found machine check recoverable ranges.\n");
144 
145 	/*
146 	 * Calculate number of available entries.
147 	 *
148 	 * Each recoverable address range entry is (start address, len,
149 	 * recovery address), 2 cells each for start and recovery address,
150 	 * 1 cell for len, totalling 5 cells per entry.
151 	 */
152 	mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
153 
154 	/* Sanity check */
155 	if (!mc_recoverable_range_len)
156 		return 1;
157 
158 	/* Size required to hold all the entries. */
159 	size = mc_recoverable_range_len *
160 			sizeof(struct mcheck_recoverable_range);
161 
162 	/*
163 	 * Allocate a buffer to hold the MC recoverable ranges. We would be
164 	 * accessing them in real mode, hence it needs to be within
165 	 * RMO region.
166 	 */
167 	mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
168 							ppc64_rma_size));
169 	memset(mc_recoverable_range, 0, size);
170 
171 	for (i = 0; i < mc_recoverable_range_len; i++) {
172 		mc_recoverable_range[i].start_addr =
173 					of_read_number(prop + (i * 5) + 0, 2);
174 		mc_recoverable_range[i].end_addr =
175 					mc_recoverable_range[i].start_addr +
176 					of_read_number(prop + (i * 5) + 2, 1);
177 		mc_recoverable_range[i].recover_addr =
178 					of_read_number(prop + (i * 5) + 3, 2);
179 
180 		pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
181 				mc_recoverable_range[i].start_addr,
182 				mc_recoverable_range[i].end_addr,
183 				mc_recoverable_range[i].recover_addr);
184 	}
185 	return 1;
186 }
187 
188 static int __init opal_register_exception_handlers(void)
189 {
190 #ifdef __BIG_ENDIAN__
191 	u64 glue;
192 
193 	if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
194 		return -ENODEV;
195 
196 	/* Hookup some exception handlers except machine check. We use the
197 	 * fwnmi area at 0x7000 to provide the glue space to OPAL
198 	 */
199 	glue = 0x7000;
200 
201 	/*
202 	 * Check if we are running on newer firmware that exports
203 	 * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
204 	 * the HMI interrupt and we catch it directly in Linux.
205 	 *
206 	 * For older firmware (i.e currently released POWER8 System Firmware
207 	 * as of today <= SV810_087), we fallback to old behavior and let OPAL
208 	 * patch the HMI vector and handle it inside OPAL firmware.
209 	 *
210 	 * For newer firmware (in development/yet to be released) we will
211 	 * start catching/handling HMI directly in Linux.
212 	 */
213 	if (!opal_check_token(OPAL_HANDLE_HMI)) {
214 		pr_info("Old firmware detected, OPAL handles HMIs.\n");
215 		opal_register_exception_handler(
216 				OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
217 				0, glue);
218 		glue += 128;
219 	}
220 
221 	opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
222 #endif
223 
224 	return 0;
225 }
226 machine_early_initcall(powernv, opal_register_exception_handlers);
227 
228 int opal_notifier_register(struct notifier_block *nb)
229 {
230 	if (!nb) {
231 		pr_warning("%s: Invalid argument (%p)\n",
232 			   __func__, nb);
233 		return -EINVAL;
234 	}
235 
236 	atomic_notifier_chain_register(&opal_notifier_head, nb);
237 	return 0;
238 }
239 EXPORT_SYMBOL_GPL(opal_notifier_register);
240 
241 int opal_notifier_unregister(struct notifier_block *nb)
242 {
243 	if (!nb) {
244 		pr_warning("%s: Invalid argument (%p)\n",
245 			   __func__, nb);
246 		return -EINVAL;
247 	}
248 
249 	atomic_notifier_chain_unregister(&opal_notifier_head, nb);
250 	return 0;
251 }
252 EXPORT_SYMBOL_GPL(opal_notifier_unregister);
253 
254 static void opal_do_notifier(uint64_t events)
255 {
256 	unsigned long flags;
257 	uint64_t changed_mask;
258 
259 	if (atomic_read(&opal_notifier_hold))
260 		return;
261 
262 	spin_lock_irqsave(&opal_notifier_lock, flags);
263 	changed_mask = last_notified_mask ^ events;
264 	last_notified_mask = events;
265 	spin_unlock_irqrestore(&opal_notifier_lock, flags);
266 
267 	/*
268 	 * We feed with the event bits and changed bits for
269 	 * enough information to the callback.
270 	 */
271 	atomic_notifier_call_chain(&opal_notifier_head,
272 				   events, (void *)changed_mask);
273 }
274 
275 void opal_notifier_update_evt(uint64_t evt_mask,
276 			      uint64_t evt_val)
277 {
278 	unsigned long flags;
279 
280 	spin_lock_irqsave(&opal_notifier_lock, flags);
281 	last_notified_mask &= ~evt_mask;
282 	last_notified_mask |= evt_val;
283 	spin_unlock_irqrestore(&opal_notifier_lock, flags);
284 }
285 
286 void opal_notifier_enable(void)
287 {
288 	int64_t rc;
289 	__be64 evt = 0;
290 
291 	atomic_set(&opal_notifier_hold, 0);
292 
293 	/* Process pending events */
294 	rc = opal_poll_events(&evt);
295 	if (rc == OPAL_SUCCESS && evt)
296 		opal_do_notifier(be64_to_cpu(evt));
297 }
298 
299 void opal_notifier_disable(void)
300 {
301 	atomic_set(&opal_notifier_hold, 1);
302 }
303 
304 /*
305  * Opal message notifier based on message type. Allow subscribers to get
306  * notified for specific messgae type.
307  */
308 int opal_message_notifier_register(enum opal_msg_type msg_type,
309 					struct notifier_block *nb)
310 {
311 	if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
312 		pr_warning("%s: Invalid arguments, msg_type:%d\n",
313 			   __func__, msg_type);
314 		return -EINVAL;
315 	}
316 
317 	return atomic_notifier_chain_register(
318 				&opal_msg_notifier_head[msg_type], nb);
319 }
320 
321 int opal_message_notifier_unregister(enum opal_msg_type msg_type,
322 				     struct notifier_block *nb)
323 {
324 	return atomic_notifier_chain_unregister(
325 			&opal_msg_notifier_head[msg_type], nb);
326 }
327 
328 static void opal_message_do_notify(uint32_t msg_type, void *msg)
329 {
330 	/* notify subscribers */
331 	atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
332 					msg_type, msg);
333 }
334 
335 static void opal_handle_message(void)
336 {
337 	s64 ret;
338 	/*
339 	 * TODO: pre-allocate a message buffer depending on opal-msg-size
340 	 * value in /proc/device-tree.
341 	 */
342 	static struct opal_msg msg;
343 	u32 type;
344 
345 	ret = opal_get_msg(__pa(&msg), sizeof(msg));
346 	/* No opal message pending. */
347 	if (ret == OPAL_RESOURCE)
348 		return;
349 
350 	/* check for errors. */
351 	if (ret) {
352 		pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
353 				__func__, ret);
354 		return;
355 	}
356 
357 	type = be32_to_cpu(msg.msg_type);
358 
359 	/* Sanity check */
360 	if (type >= OPAL_MSG_TYPE_MAX) {
361 		pr_warning("%s: Unknown message type: %u\n", __func__, type);
362 		return;
363 	}
364 	opal_message_do_notify(type, (void *)&msg);
365 }
366 
367 static int opal_message_notify(struct notifier_block *nb,
368 			  unsigned long events, void *change)
369 {
370 	if (events & OPAL_EVENT_MSG_PENDING)
371 		opal_handle_message();
372 	return 0;
373 }
374 
375 static struct notifier_block opal_message_nb = {
376 	.notifier_call	= opal_message_notify,
377 	.next		= NULL,
378 	.priority	= 0,
379 };
380 
381 static int __init opal_message_init(void)
382 {
383 	int ret, i;
384 
385 	for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
386 		ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
387 
388 	ret = opal_notifier_register(&opal_message_nb);
389 	if (ret) {
390 		pr_err("%s: Can't register OPAL event notifier (%d)\n",
391 		       __func__, ret);
392 		return ret;
393 	}
394 	return 0;
395 }
396 machine_early_initcall(powernv, opal_message_init);
397 
398 int opal_get_chars(uint32_t vtermno, char *buf, int count)
399 {
400 	s64 rc;
401 	__be64 evt, len;
402 
403 	if (!opal.entry)
404 		return -ENODEV;
405 	opal_poll_events(&evt);
406 	if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
407 		return 0;
408 	len = cpu_to_be64(count);
409 	rc = opal_console_read(vtermno, &len, buf);
410 	if (rc == OPAL_SUCCESS)
411 		return be64_to_cpu(len);
412 	return 0;
413 }
414 
415 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
416 {
417 	int written = 0;
418 	__be64 olen;
419 	s64 len, rc;
420 	unsigned long flags;
421 	__be64 evt;
422 
423 	if (!opal.entry)
424 		return -ENODEV;
425 
426 	/* We want put_chars to be atomic to avoid mangling of hvsi
427 	 * packets. To do that, we first test for room and return
428 	 * -EAGAIN if there isn't enough.
429 	 *
430 	 * Unfortunately, opal_console_write_buffer_space() doesn't
431 	 * appear to work on opal v1, so we just assume there is
432 	 * enough room and be done with it
433 	 */
434 	spin_lock_irqsave(&opal_write_lock, flags);
435 	if (firmware_has_feature(FW_FEATURE_OPALv2)) {
436 		rc = opal_console_write_buffer_space(vtermno, &olen);
437 		len = be64_to_cpu(olen);
438 		if (rc || len < total_len) {
439 			spin_unlock_irqrestore(&opal_write_lock, flags);
440 			/* Closed -> drop characters */
441 			if (rc)
442 				return total_len;
443 			opal_poll_events(NULL);
444 			return -EAGAIN;
445 		}
446 	}
447 
448 	/* We still try to handle partial completions, though they
449 	 * should no longer happen.
450 	 */
451 	rc = OPAL_BUSY;
452 	while(total_len > 0 && (rc == OPAL_BUSY ||
453 				rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
454 		olen = cpu_to_be64(total_len);
455 		rc = opal_console_write(vtermno, &olen, data);
456 		len = be64_to_cpu(olen);
457 
458 		/* Closed or other error drop */
459 		if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
460 		    rc != OPAL_BUSY_EVENT) {
461 			written = total_len;
462 			break;
463 		}
464 		if (rc == OPAL_SUCCESS) {
465 			total_len -= len;
466 			data += len;
467 			written += len;
468 		}
469 		/* This is a bit nasty but we need that for the console to
470 		 * flush when there aren't any interrupts. We will clean
471 		 * things a bit later to limit that to synchronous path
472 		 * such as the kernel console and xmon/udbg
473 		 */
474 		do
475 			opal_poll_events(&evt);
476 		while(rc == OPAL_SUCCESS &&
477 			(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
478 	}
479 	spin_unlock_irqrestore(&opal_write_lock, flags);
480 	return written;
481 }
482 
483 static int opal_recover_mce(struct pt_regs *regs,
484 					struct machine_check_event *evt)
485 {
486 	int recovered = 0;
487 	uint64_t ea = get_mce_fault_addr(evt);
488 
489 	if (!(regs->msr & MSR_RI)) {
490 		/* If MSR_RI isn't set, we cannot recover */
491 		recovered = 0;
492 	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
493 		/* Platform corrected itself */
494 		recovered = 1;
495 	} else if (ea && !is_kernel_addr(ea)) {
496 		/*
497 		 * Faulting address is not in kernel text. We should be fine.
498 		 * We need to find which process uses this address.
499 		 * For now, kill the task if we have received exception when
500 		 * in userspace.
501 		 *
502 		 * TODO: Queue up this address for hwpoisioning later.
503 		 */
504 		if (user_mode(regs) && !is_global_init(current)) {
505 			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
506 			recovered = 1;
507 		} else
508 			recovered = 0;
509 	} else if (user_mode(regs) && !is_global_init(current) &&
510 		evt->severity == MCE_SEV_ERROR_SYNC) {
511 		/*
512 		 * If we have received a synchronous error when in userspace
513 		 * kill the task.
514 		 */
515 		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
516 		recovered = 1;
517 	}
518 	return recovered;
519 }
520 
521 int opal_machine_check(struct pt_regs *regs)
522 {
523 	struct machine_check_event evt;
524 
525 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
526 		return 0;
527 
528 	/* Print things out */
529 	if (evt.version != MCE_V1) {
530 		pr_err("Machine Check Exception, Unknown event version %d !\n",
531 		       evt.version);
532 		return 0;
533 	}
534 	machine_check_print_event_info(&evt);
535 
536 	if (opal_recover_mce(regs, &evt))
537 		return 1;
538 	return 0;
539 }
540 
541 /* Early hmi handler called in real mode. */
542 int opal_hmi_exception_early(struct pt_regs *regs)
543 {
544 	s64 rc;
545 
546 	/*
547 	 * call opal hmi handler. Pass paca address as token.
548 	 * The return value OPAL_SUCCESS is an indication that there is
549 	 * an HMI event generated waiting to pull by Linux.
550 	 */
551 	rc = opal_handle_hmi();
552 	if (rc == OPAL_SUCCESS) {
553 		local_paca->hmi_event_available = 1;
554 		return 1;
555 	}
556 	return 0;
557 }
558 
559 /* HMI exception handler called in virtual mode during check_irq_replay. */
560 int opal_handle_hmi_exception(struct pt_regs *regs)
561 {
562 	s64 rc;
563 	__be64 evt = 0;
564 
565 	/*
566 	 * Check if HMI event is available.
567 	 * if Yes, then call opal_poll_events to pull opal messages and
568 	 * process them.
569 	 */
570 	if (!local_paca->hmi_event_available)
571 		return 0;
572 
573 	local_paca->hmi_event_available = 0;
574 	rc = opal_poll_events(&evt);
575 	if (rc == OPAL_SUCCESS && evt)
576 		opal_do_notifier(be64_to_cpu(evt));
577 
578 	return 1;
579 }
580 
581 static uint64_t find_recovery_address(uint64_t nip)
582 {
583 	int i;
584 
585 	for (i = 0; i < mc_recoverable_range_len; i++)
586 		if ((nip >= mc_recoverable_range[i].start_addr) &&
587 		    (nip < mc_recoverable_range[i].end_addr))
588 		    return mc_recoverable_range[i].recover_addr;
589 	return 0;
590 }
591 
592 bool opal_mce_check_early_recovery(struct pt_regs *regs)
593 {
594 	uint64_t recover_addr = 0;
595 
596 	if (!opal.base || !opal.size)
597 		goto out;
598 
599 	if ((regs->nip >= opal.base) &&
600 			(regs->nip <= (opal.base + opal.size)))
601 		recover_addr = find_recovery_address(regs->nip);
602 
603 	/*
604 	 * Setup regs->nip to rfi into fixup address.
605 	 */
606 	if (recover_addr)
607 		regs->nip = recover_addr;
608 
609 out:
610 	return !!recover_addr;
611 }
612 
613 static irqreturn_t opal_interrupt(int irq, void *data)
614 {
615 	__be64 events;
616 
617 	opal_handle_interrupt(virq_to_hw(irq), &events);
618 
619 	opal_do_notifier(be64_to_cpu(events));
620 
621 	return IRQ_HANDLED;
622 }
623 
624 static int opal_sysfs_init(void)
625 {
626 	opal_kobj = kobject_create_and_add("opal", firmware_kobj);
627 	if (!opal_kobj) {
628 		pr_warn("kobject_create_and_add opal failed\n");
629 		return -ENOMEM;
630 	}
631 
632 	return 0;
633 }
634 
635 static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
636 			       struct bin_attribute *bin_attr,
637 			       char *buf, loff_t off, size_t count)
638 {
639 	return memory_read_from_buffer(buf, count, &off, bin_attr->private,
640 				       bin_attr->size);
641 }
642 
643 static BIN_ATTR_RO(symbol_map, 0);
644 
645 static void opal_export_symmap(void)
646 {
647 	const __be64 *syms;
648 	unsigned int size;
649 	struct device_node *fw;
650 	int rc;
651 
652 	fw = of_find_node_by_path("/ibm,opal/firmware");
653 	if (!fw)
654 		return;
655 	syms = of_get_property(fw, "symbol-map", &size);
656 	if (!syms || size != 2 * sizeof(__be64))
657 		return;
658 
659 	/* Setup attributes */
660 	bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
661 	bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
662 
663 	rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
664 	if (rc)
665 		pr_warn("Error %d creating OPAL symbols file\n", rc);
666 }
667 
668 static void __init opal_dump_region_init(void)
669 {
670 	void *addr;
671 	uint64_t size;
672 	int rc;
673 
674 	if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
675 		return;
676 
677 	/* Register kernel log buffer */
678 	addr = log_buf_addr_get();
679 	if (addr == NULL)
680 		return;
681 
682 	size = log_buf_len_get();
683 	if (size == 0)
684 		return;
685 
686 	rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
687 				       __pa(addr), size);
688 	/* Don't warn if this is just an older OPAL that doesn't
689 	 * know about that call
690 	 */
691 	if (rc && rc != OPAL_UNSUPPORTED)
692 		pr_warn("DUMP: Failed to register kernel log buffer. "
693 			"rc = %d\n", rc);
694 }
695 
696 static void opal_flash_init(struct device_node *opal_node)
697 {
698 	struct device_node *np;
699 
700 	for_each_child_of_node(opal_node, np)
701 		if (of_device_is_compatible(np, "ibm,opal-flash"))
702 			of_platform_device_create(np, NULL, NULL);
703 }
704 
705 static void opal_ipmi_init(struct device_node *opal_node)
706 {
707 	struct device_node *np;
708 
709 	for_each_child_of_node(opal_node, np)
710 		if (of_device_is_compatible(np, "ibm,opal-ipmi"))
711 			of_platform_device_create(np, NULL, NULL);
712 }
713 
714 static void opal_i2c_create_devs(void)
715 {
716 	struct device_node *np;
717 
718 	for_each_compatible_node(np, NULL, "ibm,opal-i2c")
719 		of_platform_device_create(np, NULL, NULL);
720 }
721 
722 static void __init opal_irq_init(struct device_node *dn)
723 {
724 	const __be32 *irqs;
725 	int i, irqlen;
726 
727 	/* Get interrupt property */
728 	irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
729 	opal_irq_count = irqs ? (irqlen / 4) : 0;
730 	pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
731 	if (!opal_irq_count)
732 		return;
733 
734 	/* Install interrupt handlers */
735 	opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
736 	for (i = 0; irqs && i < opal_irq_count; i++, irqs++) {
737 		unsigned int irq, virq;
738 		int rc;
739 
740 		/* Get hardware and virtual IRQ */
741 		irq = be32_to_cpup(irqs);
742 		virq = irq_create_mapping(NULL, irq);
743 		if (virq == NO_IRQ) {
744 			pr_warn("Failed to map irq 0x%x\n", irq);
745 			continue;
746 		}
747 
748 		/* Install interrupt handler */
749 		rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
750 		if (rc) {
751 			irq_dispose_mapping(virq);
752 			pr_warn("Error %d requesting irq %d (0x%x)\n",
753 				 rc, virq, irq);
754 			continue;
755 		}
756 
757 		/* Cache IRQ */
758 		opal_irqs[i] = virq;
759 	}
760 }
761 
762 static int kopald(void *unused)
763 {
764 	set_freezable();
765 	do {
766 		try_to_freeze();
767 		opal_poll_events(NULL);
768 		msleep_interruptible(opal_heartbeat);
769 	} while (!kthread_should_stop());
770 
771 	return 0;
772 }
773 
774 static void opal_init_heartbeat(void)
775 {
776 	/* Old firwmware, we assume the HVC heartbeat is sufficient */
777 	if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
778 				 &opal_heartbeat) != 0)
779 		opal_heartbeat = 0;
780 
781 	if (opal_heartbeat)
782 		kthread_run(kopald, NULL, "kopald");
783 }
784 
785 static int __init opal_init(void)
786 {
787 	struct device_node *np, *consoles;
788 	int rc;
789 
790 	opal_node = of_find_node_by_path("/ibm,opal");
791 	if (!opal_node) {
792 		pr_warn("Device node not found\n");
793 		return -ENODEV;
794 	}
795 
796 	/* Register OPAL consoles if any ports */
797 	if (firmware_has_feature(FW_FEATURE_OPALv2))
798 		consoles = of_find_node_by_path("/ibm,opal/consoles");
799 	else
800 		consoles = of_node_get(opal_node);
801 	if (consoles) {
802 		for_each_child_of_node(consoles, np) {
803 			if (strcmp(np->name, "serial"))
804 				continue;
805 			of_platform_device_create(np, NULL, NULL);
806 		}
807 		of_node_put(consoles);
808 	}
809 
810 	/* Create i2c platform devices */
811 	opal_i2c_create_devs();
812 
813 	/* Setup a heatbeat thread if requested by OPAL */
814 	opal_init_heartbeat();
815 
816 	/* Find all OPAL interrupts and request them */
817 	opal_irq_init(opal_node);
818 
819 	/* Create "opal" kobject under /sys/firmware */
820 	rc = opal_sysfs_init();
821 	if (rc == 0) {
822 		/* Export symbol map to userspace */
823 		opal_export_symmap();
824 		/* Setup dump region interface */
825 		opal_dump_region_init();
826 		/* Setup error log interface */
827 		rc = opal_elog_init();
828 		/* Setup code update interface */
829 		opal_flash_update_init();
830 		/* Setup platform dump extract interface */
831 		opal_platform_dump_init();
832 		/* Setup system parameters interface */
833 		opal_sys_param_init();
834 		/* Setup message log interface. */
835 		opal_msglog_init();
836 	}
837 
838 	/* Initialize OPAL IPMI backend */
839 	opal_ipmi_init(opal_node);
840 
841 	opal_flash_init(opal_node);
842 
843 	return 0;
844 }
845 machine_subsys_initcall(powernv, opal_init);
846 
847 void opal_shutdown(void)
848 {
849 	unsigned int i;
850 	long rc = OPAL_BUSY;
851 
852 	/* First free interrupts, which will also mask them */
853 	for (i = 0; i < opal_irq_count; i++) {
854 		if (opal_irqs[i])
855 			free_irq(opal_irqs[i], NULL);
856 		opal_irqs[i] = 0;
857 	}
858 
859 	/*
860 	 * Then sync with OPAL which ensure anything that can
861 	 * potentially write to our memory has completed such
862 	 * as an ongoing dump retrieval
863 	 */
864 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
865 		rc = opal_sync_host_reboot();
866 		if (rc == OPAL_BUSY)
867 			opal_poll_events(NULL);
868 		else
869 			mdelay(10);
870 	}
871 
872 	/* Unregister memory dump region */
873 	if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
874 		opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
875 }
876 
877 /* Export this so that test modules can use it */
878 EXPORT_SYMBOL_GPL(opal_invalid_call);
879 EXPORT_SYMBOL_GPL(opal_ipmi_send);
880 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
881 EXPORT_SYMBOL_GPL(opal_flash_read);
882 EXPORT_SYMBOL_GPL(opal_flash_write);
883 EXPORT_SYMBOL_GPL(opal_flash_erase);
884 
885 /* Convert a region of vmalloc memory to an opal sg list */
886 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
887 					     unsigned long vmalloc_size)
888 {
889 	struct opal_sg_list *sg, *first = NULL;
890 	unsigned long i = 0;
891 
892 	sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
893 	if (!sg)
894 		goto nomem;
895 
896 	first = sg;
897 
898 	while (vmalloc_size > 0) {
899 		uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
900 		uint64_t length = min(vmalloc_size, PAGE_SIZE);
901 
902 		sg->entry[i].data = cpu_to_be64(data);
903 		sg->entry[i].length = cpu_to_be64(length);
904 		i++;
905 
906 		if (i >= SG_ENTRIES_PER_NODE) {
907 			struct opal_sg_list *next;
908 
909 			next = kzalloc(PAGE_SIZE, GFP_KERNEL);
910 			if (!next)
911 				goto nomem;
912 
913 			sg->length = cpu_to_be64(
914 					i * sizeof(struct opal_sg_entry) + 16);
915 			i = 0;
916 			sg->next = cpu_to_be64(__pa(next));
917 			sg = next;
918 		}
919 
920 		vmalloc_addr += length;
921 		vmalloc_size -= length;
922 	}
923 
924 	sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
925 
926 	return first;
927 
928 nomem:
929 	pr_err("%s : Failed to allocate memory\n", __func__);
930 	opal_free_sg_list(first);
931 	return NULL;
932 }
933 
934 void opal_free_sg_list(struct opal_sg_list *sg)
935 {
936 	while (sg) {
937 		uint64_t next = be64_to_cpu(sg->next);
938 
939 		kfree(sg);
940 
941 		if (next)
942 			sg = __va(next);
943 		else
944 			sg = NULL;
945 	}
946 }
947 
948 int opal_error_code(int rc)
949 {
950 	switch (rc) {
951 	case OPAL_SUCCESS:		return 0;
952 
953 	case OPAL_PARAMETER:		return -EINVAL;
954 	case OPAL_ASYNC_COMPLETION:	return -EINPROGRESS;
955 	case OPAL_BUSY_EVENT:		return -EBUSY;
956 	case OPAL_NO_MEM:		return -ENOMEM;
957 
958 	case OPAL_UNSUPPORTED:		return -EIO;
959 	case OPAL_HARDWARE:		return -EIO;
960 	case OPAL_INTERNAL_ERROR:	return -EIO;
961 	default:
962 		pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
963 		return -EIO;
964 	}
965 }
966 
967 EXPORT_SYMBOL_GPL(opal_poll_events);
968 EXPORT_SYMBOL_GPL(opal_rtc_read);
969 EXPORT_SYMBOL_GPL(opal_rtc_write);
970 EXPORT_SYMBOL_GPL(opal_tpo_read);
971 EXPORT_SYMBOL_GPL(opal_tpo_write);
972 EXPORT_SYMBOL_GPL(opal_i2c_request);
973