1 /*
2  * PowerNV OPAL high level interfaces
3  *
4  * Copyright 2011 IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #define pr_fmt(fmt)	"opal: " fmt
13 
14 #include <linux/printk.h>
15 #include <linux/types.h>
16 #include <linux/of.h>
17 #include <linux/of_fdt.h>
18 #include <linux/of_platform.h>
19 #include <linux/interrupt.h>
20 #include <linux/notifier.h>
21 #include <linux/slab.h>
22 #include <linux/sched.h>
23 #include <linux/kobject.h>
24 #include <linux/delay.h>
25 #include <linux/memblock.h>
26 #include <linux/kthread.h>
27 #include <linux/freezer.h>
28 
29 #include <asm/machdep.h>
30 #include <asm/opal.h>
31 #include <asm/firmware.h>
32 #include <asm/mce.h>
33 
34 #include "powernv.h"
35 
36 /* /sys/firmware/opal */
37 struct kobject *opal_kobj;
38 
39 struct opal {
40 	u64 base;
41 	u64 entry;
42 	u64 size;
43 } opal;
44 
45 struct mcheck_recoverable_range {
46 	u64 start_addr;
47 	u64 end_addr;
48 	u64 recover_addr;
49 };
50 
51 static struct mcheck_recoverable_range *mc_recoverable_range;
52 static int mc_recoverable_range_len;
53 
54 struct device_node *opal_node;
55 static DEFINE_SPINLOCK(opal_write_lock);
56 static unsigned int *opal_irqs;
57 static unsigned int opal_irq_count;
58 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
59 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
60 static DEFINE_SPINLOCK(opal_notifier_lock);
61 static uint64_t last_notified_mask = 0x0ul;
62 static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
63 static uint32_t opal_heartbeat;
64 
65 static void opal_reinit_cores(void)
66 {
67 	/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
68 	 *
69 	 * It will preserve non volatile GPRs and HSPRG0/1. It will
70 	 * also restore HIDs and other SPRs to their original value
71 	 * but it might clobber a bunch.
72 	 */
73 #ifdef __BIG_ENDIAN__
74 	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
75 #else
76 	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
77 #endif
78 }
79 
80 int __init early_init_dt_scan_opal(unsigned long node,
81 				   const char *uname, int depth, void *data)
82 {
83 	const void *basep, *entryp, *sizep;
84 	int basesz, entrysz, runtimesz;
85 
86 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
87 		return 0;
88 
89 	basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
90 	entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
91 	sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
92 
93 	if (!basep || !entryp || !sizep)
94 		return 1;
95 
96 	opal.base = of_read_number(basep, basesz/4);
97 	opal.entry = of_read_number(entryp, entrysz/4);
98 	opal.size = of_read_number(sizep, runtimesz/4);
99 
100 	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
101 		 opal.base, basep, basesz);
102 	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
103 		 opal.entry, entryp, entrysz);
104 	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
105 		 opal.size, sizep, runtimesz);
106 
107 	powerpc_firmware_features |= FW_FEATURE_OPAL;
108 	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
109 		powerpc_firmware_features |= FW_FEATURE_OPALv2;
110 		powerpc_firmware_features |= FW_FEATURE_OPALv3;
111 		pr_info("OPAL V3 detected !\n");
112 	} else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
113 		powerpc_firmware_features |= FW_FEATURE_OPALv2;
114 		pr_info("OPAL V2 detected !\n");
115 	} else {
116 		pr_info("OPAL V1 detected !\n");
117 	}
118 
119 	/* Reinit all cores with the right endian */
120 	opal_reinit_cores();
121 
122 	/* Restore some bits */
123 	if (cur_cpu_spec->cpu_restore)
124 		cur_cpu_spec->cpu_restore();
125 
126 	return 1;
127 }
128 
129 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
130 				   const char *uname, int depth, void *data)
131 {
132 	int i, psize, size;
133 	const __be32 *prop;
134 
135 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
136 		return 0;
137 
138 	prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
139 
140 	if (!prop)
141 		return 1;
142 
143 	pr_debug("Found machine check recoverable ranges.\n");
144 
145 	/*
146 	 * Calculate number of available entries.
147 	 *
148 	 * Each recoverable address range entry is (start address, len,
149 	 * recovery address), 2 cells each for start and recovery address,
150 	 * 1 cell for len, totalling 5 cells per entry.
151 	 */
152 	mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
153 
154 	/* Sanity check */
155 	if (!mc_recoverable_range_len)
156 		return 1;
157 
158 	/* Size required to hold all the entries. */
159 	size = mc_recoverable_range_len *
160 			sizeof(struct mcheck_recoverable_range);
161 
162 	/*
163 	 * Allocate a buffer to hold the MC recoverable ranges. We would be
164 	 * accessing them in real mode, hence it needs to be within
165 	 * RMO region.
166 	 */
167 	mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
168 							ppc64_rma_size));
169 	memset(mc_recoverable_range, 0, size);
170 
171 	for (i = 0; i < mc_recoverable_range_len; i++) {
172 		mc_recoverable_range[i].start_addr =
173 					of_read_number(prop + (i * 5) + 0, 2);
174 		mc_recoverable_range[i].end_addr =
175 					mc_recoverable_range[i].start_addr +
176 					of_read_number(prop + (i * 5) + 2, 1);
177 		mc_recoverable_range[i].recover_addr =
178 					of_read_number(prop + (i * 5) + 3, 2);
179 
180 		pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
181 				mc_recoverable_range[i].start_addr,
182 				mc_recoverable_range[i].end_addr,
183 				mc_recoverable_range[i].recover_addr);
184 	}
185 	return 1;
186 }
187 
188 static int __init opal_register_exception_handlers(void)
189 {
190 #ifdef __BIG_ENDIAN__
191 	u64 glue;
192 
193 	if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
194 		return -ENODEV;
195 
196 	/* Hookup some exception handlers except machine check. We use the
197 	 * fwnmi area at 0x7000 to provide the glue space to OPAL
198 	 */
199 	glue = 0x7000;
200 
201 	/*
202 	 * Check if we are running on newer firmware that exports
203 	 * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
204 	 * the HMI interrupt and we catch it directly in Linux.
205 	 *
206 	 * For older firmware (i.e currently released POWER8 System Firmware
207 	 * as of today <= SV810_087), we fallback to old behavior and let OPAL
208 	 * patch the HMI vector and handle it inside OPAL firmware.
209 	 *
210 	 * For newer firmware (in development/yet to be released) we will
211 	 * start catching/handling HMI directly in Linux.
212 	 */
213 	if (!opal_check_token(OPAL_HANDLE_HMI)) {
214 		pr_info("Old firmware detected, OPAL handles HMIs.\n");
215 		opal_register_exception_handler(
216 				OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
217 				0, glue);
218 		glue += 128;
219 	}
220 
221 	opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
222 #endif
223 
224 	return 0;
225 }
226 machine_early_initcall(powernv, opal_register_exception_handlers);
227 
228 int opal_notifier_register(struct notifier_block *nb)
229 {
230 	if (!nb) {
231 		pr_warning("%s: Invalid argument (%p)\n",
232 			   __func__, nb);
233 		return -EINVAL;
234 	}
235 
236 	atomic_notifier_chain_register(&opal_notifier_head, nb);
237 	return 0;
238 }
239 EXPORT_SYMBOL_GPL(opal_notifier_register);
240 
241 int opal_notifier_unregister(struct notifier_block *nb)
242 {
243 	if (!nb) {
244 		pr_warning("%s: Invalid argument (%p)\n",
245 			   __func__, nb);
246 		return -EINVAL;
247 	}
248 
249 	atomic_notifier_chain_unregister(&opal_notifier_head, nb);
250 	return 0;
251 }
252 EXPORT_SYMBOL_GPL(opal_notifier_unregister);
253 
254 static void opal_do_notifier(uint64_t events)
255 {
256 	unsigned long flags;
257 	uint64_t changed_mask;
258 
259 	if (atomic_read(&opal_notifier_hold))
260 		return;
261 
262 	spin_lock_irqsave(&opal_notifier_lock, flags);
263 	changed_mask = last_notified_mask ^ events;
264 	last_notified_mask = events;
265 	spin_unlock_irqrestore(&opal_notifier_lock, flags);
266 
267 	/*
268 	 * We feed with the event bits and changed bits for
269 	 * enough information to the callback.
270 	 */
271 	atomic_notifier_call_chain(&opal_notifier_head,
272 				   events, (void *)changed_mask);
273 }
274 
275 void opal_notifier_update_evt(uint64_t evt_mask,
276 			      uint64_t evt_val)
277 {
278 	unsigned long flags;
279 
280 	spin_lock_irqsave(&opal_notifier_lock, flags);
281 	last_notified_mask &= ~evt_mask;
282 	last_notified_mask |= evt_val;
283 	spin_unlock_irqrestore(&opal_notifier_lock, flags);
284 }
285 
286 void opal_notifier_enable(void)
287 {
288 	int64_t rc;
289 	__be64 evt = 0;
290 
291 	atomic_set(&opal_notifier_hold, 0);
292 
293 	/* Process pending events */
294 	rc = opal_poll_events(&evt);
295 	if (rc == OPAL_SUCCESS && evt)
296 		opal_do_notifier(be64_to_cpu(evt));
297 }
298 
299 void opal_notifier_disable(void)
300 {
301 	atomic_set(&opal_notifier_hold, 1);
302 }
303 
304 /*
305  * Opal message notifier based on message type. Allow subscribers to get
306  * notified for specific messgae type.
307  */
308 int opal_message_notifier_register(enum OpalMessageType msg_type,
309 					struct notifier_block *nb)
310 {
311 	if (!nb) {
312 		pr_warning("%s: Invalid argument (%p)\n",
313 			   __func__, nb);
314 		return -EINVAL;
315 	}
316 	if (msg_type > OPAL_MSG_TYPE_MAX) {
317 		pr_warning("%s: Invalid message type argument (%d)\n",
318 			   __func__, msg_type);
319 		return -EINVAL;
320 	}
321 	return atomic_notifier_chain_register(
322 				&opal_msg_notifier_head[msg_type], nb);
323 }
324 
325 static void opal_message_do_notify(uint32_t msg_type, void *msg)
326 {
327 	/* notify subscribers */
328 	atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
329 					msg_type, msg);
330 }
331 
332 static void opal_handle_message(void)
333 {
334 	s64 ret;
335 	/*
336 	 * TODO: pre-allocate a message buffer depending on opal-msg-size
337 	 * value in /proc/device-tree.
338 	 */
339 	static struct opal_msg msg;
340 	u32 type;
341 
342 	ret = opal_get_msg(__pa(&msg), sizeof(msg));
343 	/* No opal message pending. */
344 	if (ret == OPAL_RESOURCE)
345 		return;
346 
347 	/* check for errors. */
348 	if (ret) {
349 		pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
350 				__func__, ret);
351 		return;
352 	}
353 
354 	type = be32_to_cpu(msg.msg_type);
355 
356 	/* Sanity check */
357 	if (type > OPAL_MSG_TYPE_MAX) {
358 		pr_warning("%s: Unknown message type: %u\n", __func__, type);
359 		return;
360 	}
361 	opal_message_do_notify(type, (void *)&msg);
362 }
363 
364 static int opal_message_notify(struct notifier_block *nb,
365 			  unsigned long events, void *change)
366 {
367 	if (events & OPAL_EVENT_MSG_PENDING)
368 		opal_handle_message();
369 	return 0;
370 }
371 
372 static struct notifier_block opal_message_nb = {
373 	.notifier_call	= opal_message_notify,
374 	.next		= NULL,
375 	.priority	= 0,
376 };
377 
378 static int __init opal_message_init(void)
379 {
380 	int ret, i;
381 
382 	for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
383 		ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
384 
385 	ret = opal_notifier_register(&opal_message_nb);
386 	if (ret) {
387 		pr_err("%s: Can't register OPAL event notifier (%d)\n",
388 		       __func__, ret);
389 		return ret;
390 	}
391 	return 0;
392 }
393 machine_early_initcall(powernv, opal_message_init);
394 
395 int opal_get_chars(uint32_t vtermno, char *buf, int count)
396 {
397 	s64 rc;
398 	__be64 evt, len;
399 
400 	if (!opal.entry)
401 		return -ENODEV;
402 	opal_poll_events(&evt);
403 	if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
404 		return 0;
405 	len = cpu_to_be64(count);
406 	rc = opal_console_read(vtermno, &len, buf);
407 	if (rc == OPAL_SUCCESS)
408 		return be64_to_cpu(len);
409 	return 0;
410 }
411 
412 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
413 {
414 	int written = 0;
415 	__be64 olen;
416 	s64 len, rc;
417 	unsigned long flags;
418 	__be64 evt;
419 
420 	if (!opal.entry)
421 		return -ENODEV;
422 
423 	/* We want put_chars to be atomic to avoid mangling of hvsi
424 	 * packets. To do that, we first test for room and return
425 	 * -EAGAIN if there isn't enough.
426 	 *
427 	 * Unfortunately, opal_console_write_buffer_space() doesn't
428 	 * appear to work on opal v1, so we just assume there is
429 	 * enough room and be done with it
430 	 */
431 	spin_lock_irqsave(&opal_write_lock, flags);
432 	if (firmware_has_feature(FW_FEATURE_OPALv2)) {
433 		rc = opal_console_write_buffer_space(vtermno, &olen);
434 		len = be64_to_cpu(olen);
435 		if (rc || len < total_len) {
436 			spin_unlock_irqrestore(&opal_write_lock, flags);
437 			/* Closed -> drop characters */
438 			if (rc)
439 				return total_len;
440 			opal_poll_events(NULL);
441 			return -EAGAIN;
442 		}
443 	}
444 
445 	/* We still try to handle partial completions, though they
446 	 * should no longer happen.
447 	 */
448 	rc = OPAL_BUSY;
449 	while(total_len > 0 && (rc == OPAL_BUSY ||
450 				rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
451 		olen = cpu_to_be64(total_len);
452 		rc = opal_console_write(vtermno, &olen, data);
453 		len = be64_to_cpu(olen);
454 
455 		/* Closed or other error drop */
456 		if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
457 		    rc != OPAL_BUSY_EVENT) {
458 			written = total_len;
459 			break;
460 		}
461 		if (rc == OPAL_SUCCESS) {
462 			total_len -= len;
463 			data += len;
464 			written += len;
465 		}
466 		/* This is a bit nasty but we need that for the console to
467 		 * flush when there aren't any interrupts. We will clean
468 		 * things a bit later to limit that to synchronous path
469 		 * such as the kernel console and xmon/udbg
470 		 */
471 		do
472 			opal_poll_events(&evt);
473 		while(rc == OPAL_SUCCESS &&
474 			(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
475 	}
476 	spin_unlock_irqrestore(&opal_write_lock, flags);
477 	return written;
478 }
479 
480 static int opal_recover_mce(struct pt_regs *regs,
481 					struct machine_check_event *evt)
482 {
483 	int recovered = 0;
484 	uint64_t ea = get_mce_fault_addr(evt);
485 
486 	if (!(regs->msr & MSR_RI)) {
487 		/* If MSR_RI isn't set, we cannot recover */
488 		recovered = 0;
489 	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
490 		/* Platform corrected itself */
491 		recovered = 1;
492 	} else if (ea && !is_kernel_addr(ea)) {
493 		/*
494 		 * Faulting address is not in kernel text. We should be fine.
495 		 * We need to find which process uses this address.
496 		 * For now, kill the task if we have received exception when
497 		 * in userspace.
498 		 *
499 		 * TODO: Queue up this address for hwpoisioning later.
500 		 */
501 		if (user_mode(regs) && !is_global_init(current)) {
502 			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
503 			recovered = 1;
504 		} else
505 			recovered = 0;
506 	} else if (user_mode(regs) && !is_global_init(current) &&
507 		evt->severity == MCE_SEV_ERROR_SYNC) {
508 		/*
509 		 * If we have received a synchronous error when in userspace
510 		 * kill the task.
511 		 */
512 		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
513 		recovered = 1;
514 	}
515 	return recovered;
516 }
517 
518 int opal_machine_check(struct pt_regs *regs)
519 {
520 	struct machine_check_event evt;
521 
522 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
523 		return 0;
524 
525 	/* Print things out */
526 	if (evt.version != MCE_V1) {
527 		pr_err("Machine Check Exception, Unknown event version %d !\n",
528 		       evt.version);
529 		return 0;
530 	}
531 	machine_check_print_event_info(&evt);
532 
533 	if (opal_recover_mce(regs, &evt))
534 		return 1;
535 	return 0;
536 }
537 
538 /* Early hmi handler called in real mode. */
539 int opal_hmi_exception_early(struct pt_regs *regs)
540 {
541 	s64 rc;
542 
543 	/*
544 	 * call opal hmi handler. Pass paca address as token.
545 	 * The return value OPAL_SUCCESS is an indication that there is
546 	 * an HMI event generated waiting to pull by Linux.
547 	 */
548 	rc = opal_handle_hmi();
549 	if (rc == OPAL_SUCCESS) {
550 		local_paca->hmi_event_available = 1;
551 		return 1;
552 	}
553 	return 0;
554 }
555 
556 /* HMI exception handler called in virtual mode during check_irq_replay. */
557 int opal_handle_hmi_exception(struct pt_regs *regs)
558 {
559 	s64 rc;
560 	__be64 evt = 0;
561 
562 	/*
563 	 * Check if HMI event is available.
564 	 * if Yes, then call opal_poll_events to pull opal messages and
565 	 * process them.
566 	 */
567 	if (!local_paca->hmi_event_available)
568 		return 0;
569 
570 	local_paca->hmi_event_available = 0;
571 	rc = opal_poll_events(&evt);
572 	if (rc == OPAL_SUCCESS && evt)
573 		opal_do_notifier(be64_to_cpu(evt));
574 
575 	return 1;
576 }
577 
578 static uint64_t find_recovery_address(uint64_t nip)
579 {
580 	int i;
581 
582 	for (i = 0; i < mc_recoverable_range_len; i++)
583 		if ((nip >= mc_recoverable_range[i].start_addr) &&
584 		    (nip < mc_recoverable_range[i].end_addr))
585 		    return mc_recoverable_range[i].recover_addr;
586 	return 0;
587 }
588 
589 bool opal_mce_check_early_recovery(struct pt_regs *regs)
590 {
591 	uint64_t recover_addr = 0;
592 
593 	if (!opal.base || !opal.size)
594 		goto out;
595 
596 	if ((regs->nip >= opal.base) &&
597 			(regs->nip <= (opal.base + opal.size)))
598 		recover_addr = find_recovery_address(regs->nip);
599 
600 	/*
601 	 * Setup regs->nip to rfi into fixup address.
602 	 */
603 	if (recover_addr)
604 		regs->nip = recover_addr;
605 
606 out:
607 	return !!recover_addr;
608 }
609 
610 static irqreturn_t opal_interrupt(int irq, void *data)
611 {
612 	__be64 events;
613 
614 	opal_handle_interrupt(virq_to_hw(irq), &events);
615 
616 	opal_do_notifier(be64_to_cpu(events));
617 
618 	return IRQ_HANDLED;
619 }
620 
621 static int opal_sysfs_init(void)
622 {
623 	opal_kobj = kobject_create_and_add("opal", firmware_kobj);
624 	if (!opal_kobj) {
625 		pr_warn("kobject_create_and_add opal failed\n");
626 		return -ENOMEM;
627 	}
628 
629 	return 0;
630 }
631 
632 static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
633 			       struct bin_attribute *bin_attr,
634 			       char *buf, loff_t off, size_t count)
635 {
636 	return memory_read_from_buffer(buf, count, &off, bin_attr->private,
637 				       bin_attr->size);
638 }
639 
640 static BIN_ATTR_RO(symbol_map, 0);
641 
642 static void opal_export_symmap(void)
643 {
644 	const __be64 *syms;
645 	unsigned int size;
646 	struct device_node *fw;
647 	int rc;
648 
649 	fw = of_find_node_by_path("/ibm,opal/firmware");
650 	if (!fw)
651 		return;
652 	syms = of_get_property(fw, "symbol-map", &size);
653 	if (!syms || size != 2 * sizeof(__be64))
654 		return;
655 
656 	/* Setup attributes */
657 	bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
658 	bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
659 
660 	rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
661 	if (rc)
662 		pr_warn("Error %d creating OPAL symbols file\n", rc);
663 }
664 
665 static void __init opal_dump_region_init(void)
666 {
667 	void *addr;
668 	uint64_t size;
669 	int rc;
670 
671 	/* Register kernel log buffer */
672 	addr = log_buf_addr_get();
673 	if (addr == NULL)
674 		return;
675 
676 	size = log_buf_len_get();
677 	if (size == 0)
678 		return;
679 
680 	rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
681 				       __pa(addr), size);
682 	/* Don't warn if this is just an older OPAL that doesn't
683 	 * know about that call
684 	 */
685 	if (rc && rc != OPAL_UNSUPPORTED)
686 		pr_warn("DUMP: Failed to register kernel log buffer. "
687 			"rc = %d\n", rc);
688 }
689 
690 static void opal_ipmi_init(struct device_node *opal_node)
691 {
692 	struct device_node *np;
693 
694 	for_each_child_of_node(opal_node, np)
695 		if (of_device_is_compatible(np, "ibm,opal-ipmi"))
696 			of_platform_device_create(np, NULL, NULL);
697 }
698 
699 static void opal_i2c_create_devs(void)
700 {
701 	struct device_node *np;
702 
703 	for_each_compatible_node(np, NULL, "ibm,opal-i2c")
704 		of_platform_device_create(np, NULL, NULL);
705 }
706 
707 static void __init opal_irq_init(struct device_node *dn)
708 {
709 	const __be32 *irqs;
710 	int i, irqlen;
711 
712 	/* Get interrupt property */
713 	irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
714 	opal_irq_count = irqs ? (irqlen / 4) : 0;
715 	pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
716 	if (!opal_irq_count)
717 		return;
718 
719 	/* Install interrupt handlers */
720 	opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
721 	for (i = 0; irqs && i < opal_irq_count; i++, irqs++) {
722 		unsigned int irq, virq;
723 		int rc;
724 
725 		/* Get hardware and virtual IRQ */
726 		irq = be32_to_cpup(irqs);
727 		virq = irq_create_mapping(NULL, irq);
728 		if (virq == NO_IRQ) {
729 			pr_warn("Failed to map irq 0x%x\n", irq);
730 			continue;
731 		}
732 
733 		/* Install interrupt handler */
734 		rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
735 		if (rc) {
736 			irq_dispose_mapping(virq);
737 			pr_warn("Error %d requesting irq %d (0x%x)\n",
738 				 rc, virq, irq);
739 			continue;
740 		}
741 
742 		/* Cache IRQ */
743 		opal_irqs[i] = virq;
744 	}
745 }
746 
747 static int kopald(void *unused)
748 {
749 	set_freezable();
750 	do {
751 		try_to_freeze();
752 		opal_poll_events(NULL);
753 		msleep_interruptible(opal_heartbeat);
754 	} while (!kthread_should_stop());
755 
756 	return 0;
757 }
758 
759 static void opal_init_heartbeat(void)
760 {
761 	/* Old firwmware, we assume the HVC heartbeat is sufficient */
762 	if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
763 				 &opal_heartbeat) != 0)
764 		opal_heartbeat = 0;
765 
766 	if (opal_heartbeat)
767 		kthread_run(kopald, NULL, "kopald");
768 }
769 
770 static int __init opal_init(void)
771 {
772 	struct device_node *np, *consoles;
773 	int rc;
774 
775 	opal_node = of_find_node_by_path("/ibm,opal");
776 	if (!opal_node) {
777 		pr_warn("Device node not found\n");
778 		return -ENODEV;
779 	}
780 
781 	/* Register OPAL consoles if any ports */
782 	if (firmware_has_feature(FW_FEATURE_OPALv2))
783 		consoles = of_find_node_by_path("/ibm,opal/consoles");
784 	else
785 		consoles = of_node_get(opal_node);
786 	if (consoles) {
787 		for_each_child_of_node(consoles, np) {
788 			if (strcmp(np->name, "serial"))
789 				continue;
790 			of_platform_device_create(np, NULL, NULL);
791 		}
792 		of_node_put(consoles);
793 	}
794 
795 	/* Create i2c platform devices */
796 	opal_i2c_create_devs();
797 
798 	/* Setup a heatbeat thread if requested by OPAL */
799 	opal_init_heartbeat();
800 
801 	/* Find all OPAL interrupts and request them */
802 	opal_irq_init(opal_node);
803 
804 	/* Create "opal" kobject under /sys/firmware */
805 	rc = opal_sysfs_init();
806 	if (rc == 0) {
807 		/* Export symbol map to userspace */
808 		opal_export_symmap();
809 		/* Setup dump region interface */
810 		opal_dump_region_init();
811 		/* Setup error log interface */
812 		rc = opal_elog_init();
813 		/* Setup code update interface */
814 		opal_flash_init();
815 		/* Setup platform dump extract interface */
816 		opal_platform_dump_init();
817 		/* Setup system parameters interface */
818 		opal_sys_param_init();
819 		/* Setup message log interface. */
820 		opal_msglog_init();
821 	}
822 
823 	/* Initialize OPAL IPMI backend */
824 	opal_ipmi_init(opal_node);
825 
826 	return 0;
827 }
828 machine_subsys_initcall(powernv, opal_init);
829 
830 void opal_shutdown(void)
831 {
832 	unsigned int i;
833 	long rc = OPAL_BUSY;
834 
835 	/* First free interrupts, which will also mask them */
836 	for (i = 0; i < opal_irq_count; i++) {
837 		if (opal_irqs[i])
838 			free_irq(opal_irqs[i], NULL);
839 		opal_irqs[i] = 0;
840 	}
841 
842 	/*
843 	 * Then sync with OPAL which ensure anything that can
844 	 * potentially write to our memory has completed such
845 	 * as an ongoing dump retrieval
846 	 */
847 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
848 		rc = opal_sync_host_reboot();
849 		if (rc == OPAL_BUSY)
850 			opal_poll_events(NULL);
851 		else
852 			mdelay(10);
853 	}
854 
855 	/* Unregister memory dump region */
856 	opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
857 }
858 
859 /* Export this so that test modules can use it */
860 EXPORT_SYMBOL_GPL(opal_invalid_call);
861 EXPORT_SYMBOL_GPL(opal_ipmi_send);
862 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
863 
864 /* Convert a region of vmalloc memory to an opal sg list */
865 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
866 					     unsigned long vmalloc_size)
867 {
868 	struct opal_sg_list *sg, *first = NULL;
869 	unsigned long i = 0;
870 
871 	sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
872 	if (!sg)
873 		goto nomem;
874 
875 	first = sg;
876 
877 	while (vmalloc_size > 0) {
878 		uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
879 		uint64_t length = min(vmalloc_size, PAGE_SIZE);
880 
881 		sg->entry[i].data = cpu_to_be64(data);
882 		sg->entry[i].length = cpu_to_be64(length);
883 		i++;
884 
885 		if (i >= SG_ENTRIES_PER_NODE) {
886 			struct opal_sg_list *next;
887 
888 			next = kzalloc(PAGE_SIZE, GFP_KERNEL);
889 			if (!next)
890 				goto nomem;
891 
892 			sg->length = cpu_to_be64(
893 					i * sizeof(struct opal_sg_entry) + 16);
894 			i = 0;
895 			sg->next = cpu_to_be64(__pa(next));
896 			sg = next;
897 		}
898 
899 		vmalloc_addr += length;
900 		vmalloc_size -= length;
901 	}
902 
903 	sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
904 
905 	return first;
906 
907 nomem:
908 	pr_err("%s : Failed to allocate memory\n", __func__);
909 	opal_free_sg_list(first);
910 	return NULL;
911 }
912 
913 void opal_free_sg_list(struct opal_sg_list *sg)
914 {
915 	while (sg) {
916 		uint64_t next = be64_to_cpu(sg->next);
917 
918 		kfree(sg);
919 
920 		if (next)
921 			sg = __va(next);
922 		else
923 			sg = NULL;
924 	}
925 }
926 
927 EXPORT_SYMBOL_GPL(opal_poll_events);
928 EXPORT_SYMBOL_GPL(opal_rtc_read);
929 EXPORT_SYMBOL_GPL(opal_rtc_write);
930 EXPORT_SYMBOL_GPL(opal_tpo_read);
931 EXPORT_SYMBOL_GPL(opal_tpo_write);
932 EXPORT_SYMBOL_GPL(opal_i2c_request);
933