1 /*
2  * PowerNV OPAL high level interfaces
3  *
4  * Copyright 2011 IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #define pr_fmt(fmt)	"opal: " fmt
13 
14 #include <linux/printk.h>
15 #include <linux/types.h>
16 #include <linux/of.h>
17 #include <linux/of_fdt.h>
18 #include <linux/of_platform.h>
19 #include <linux/interrupt.h>
20 #include <linux/notifier.h>
21 #include <linux/slab.h>
22 #include <linux/sched.h>
23 #include <linux/kobject.h>
24 #include <linux/delay.h>
25 #include <linux/memblock.h>
26 #include <linux/kthread.h>
27 #include <linux/freezer.h>
28 
29 #include <asm/machdep.h>
30 #include <asm/opal.h>
31 #include <asm/firmware.h>
32 #include <asm/mce.h>
33 
34 #include "powernv.h"
35 
36 /* /sys/firmware/opal */
37 struct kobject *opal_kobj;
38 
39 struct opal {
40 	u64 base;
41 	u64 entry;
42 	u64 size;
43 } opal;
44 
45 struct mcheck_recoverable_range {
46 	u64 start_addr;
47 	u64 end_addr;
48 	u64 recover_addr;
49 };
50 
51 static struct mcheck_recoverable_range *mc_recoverable_range;
52 static int mc_recoverable_range_len;
53 
54 struct device_node *opal_node;
55 static DEFINE_SPINLOCK(opal_write_lock);
56 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
57 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
58 static DEFINE_SPINLOCK(opal_notifier_lock);
59 static uint64_t last_notified_mask = 0x0ul;
60 static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
61 static uint32_t opal_heartbeat;
62 
63 static void opal_reinit_cores(void)
64 {
65 	/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
66 	 *
67 	 * It will preserve non volatile GPRs and HSPRG0/1. It will
68 	 * also restore HIDs and other SPRs to their original value
69 	 * but it might clobber a bunch.
70 	 */
71 #ifdef __BIG_ENDIAN__
72 	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
73 #else
74 	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
75 #endif
76 }
77 
78 int __init early_init_dt_scan_opal(unsigned long node,
79 				   const char *uname, int depth, void *data)
80 {
81 	const void *basep, *entryp, *sizep;
82 	int basesz, entrysz, runtimesz;
83 
84 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
85 		return 0;
86 
87 	basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
88 	entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
89 	sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
90 
91 	if (!basep || !entryp || !sizep)
92 		return 1;
93 
94 	opal.base = of_read_number(basep, basesz/4);
95 	opal.entry = of_read_number(entryp, entrysz/4);
96 	opal.size = of_read_number(sizep, runtimesz/4);
97 
98 	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
99 		 opal.base, basep, basesz);
100 	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
101 		 opal.entry, entryp, entrysz);
102 	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
103 		 opal.size, sizep, runtimesz);
104 
105 	powerpc_firmware_features |= FW_FEATURE_OPAL;
106 	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
107 		powerpc_firmware_features |= FW_FEATURE_OPALv2;
108 		powerpc_firmware_features |= FW_FEATURE_OPALv3;
109 		pr_info("OPAL V3 detected !\n");
110 	} else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
111 		powerpc_firmware_features |= FW_FEATURE_OPALv2;
112 		pr_info("OPAL V2 detected !\n");
113 	} else {
114 		pr_info("OPAL V1 detected !\n");
115 	}
116 
117 	/* Reinit all cores with the right endian */
118 	opal_reinit_cores();
119 
120 	/* Restore some bits */
121 	if (cur_cpu_spec->cpu_restore)
122 		cur_cpu_spec->cpu_restore();
123 
124 	return 1;
125 }
126 
127 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
128 				   const char *uname, int depth, void *data)
129 {
130 	int i, psize, size;
131 	const __be32 *prop;
132 
133 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
134 		return 0;
135 
136 	prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
137 
138 	if (!prop)
139 		return 1;
140 
141 	pr_debug("Found machine check recoverable ranges.\n");
142 
143 	/*
144 	 * Calculate number of available entries.
145 	 *
146 	 * Each recoverable address range entry is (start address, len,
147 	 * recovery address), 2 cells each for start and recovery address,
148 	 * 1 cell for len, totalling 5 cells per entry.
149 	 */
150 	mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
151 
152 	/* Sanity check */
153 	if (!mc_recoverable_range_len)
154 		return 1;
155 
156 	/* Size required to hold all the entries. */
157 	size = mc_recoverable_range_len *
158 			sizeof(struct mcheck_recoverable_range);
159 
160 	/*
161 	 * Allocate a buffer to hold the MC recoverable ranges. We would be
162 	 * accessing them in real mode, hence it needs to be within
163 	 * RMO region.
164 	 */
165 	mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
166 							ppc64_rma_size));
167 	memset(mc_recoverable_range, 0, size);
168 
169 	for (i = 0; i < mc_recoverable_range_len; i++) {
170 		mc_recoverable_range[i].start_addr =
171 					of_read_number(prop + (i * 5) + 0, 2);
172 		mc_recoverable_range[i].end_addr =
173 					mc_recoverable_range[i].start_addr +
174 					of_read_number(prop + (i * 5) + 2, 1);
175 		mc_recoverable_range[i].recover_addr =
176 					of_read_number(prop + (i * 5) + 3, 2);
177 
178 		pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
179 				mc_recoverable_range[i].start_addr,
180 				mc_recoverable_range[i].end_addr,
181 				mc_recoverable_range[i].recover_addr);
182 	}
183 	return 1;
184 }
185 
186 static int __init opal_register_exception_handlers(void)
187 {
188 #ifdef __BIG_ENDIAN__
189 	u64 glue;
190 
191 	if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
192 		return -ENODEV;
193 
194 	/* Hookup some exception handlers except machine check. We use the
195 	 * fwnmi area at 0x7000 to provide the glue space to OPAL
196 	 */
197 	glue = 0x7000;
198 
199 	/*
200 	 * Check if we are running on newer firmware that exports
201 	 * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
202 	 * the HMI interrupt and we catch it directly in Linux.
203 	 *
204 	 * For older firmware (i.e currently released POWER8 System Firmware
205 	 * as of today <= SV810_087), we fallback to old behavior and let OPAL
206 	 * patch the HMI vector and handle it inside OPAL firmware.
207 	 *
208 	 * For newer firmware (in development/yet to be released) we will
209 	 * start catching/handling HMI directly in Linux.
210 	 */
211 	if (!opal_check_token(OPAL_HANDLE_HMI)) {
212 		pr_info("Old firmware detected, OPAL handles HMIs.\n");
213 		opal_register_exception_handler(
214 				OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
215 				0, glue);
216 		glue += 128;
217 	}
218 
219 	opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
220 #endif
221 
222 	return 0;
223 }
224 machine_early_initcall(powernv, opal_register_exception_handlers);
225 
226 int opal_notifier_register(struct notifier_block *nb)
227 {
228 	if (!nb) {
229 		pr_warning("%s: Invalid argument (%p)\n",
230 			   __func__, nb);
231 		return -EINVAL;
232 	}
233 
234 	atomic_notifier_chain_register(&opal_notifier_head, nb);
235 	return 0;
236 }
237 EXPORT_SYMBOL_GPL(opal_notifier_register);
238 
239 int opal_notifier_unregister(struct notifier_block *nb)
240 {
241 	if (!nb) {
242 		pr_warning("%s: Invalid argument (%p)\n",
243 			   __func__, nb);
244 		return -EINVAL;
245 	}
246 
247 	atomic_notifier_chain_unregister(&opal_notifier_head, nb);
248 	return 0;
249 }
250 EXPORT_SYMBOL_GPL(opal_notifier_unregister);
251 
252 void opal_do_notifier(uint64_t events)
253 {
254 	unsigned long flags;
255 	uint64_t changed_mask;
256 
257 	if (atomic_read(&opal_notifier_hold))
258 		return;
259 
260 	spin_lock_irqsave(&opal_notifier_lock, flags);
261 	changed_mask = last_notified_mask ^ events;
262 	last_notified_mask = events;
263 	spin_unlock_irqrestore(&opal_notifier_lock, flags);
264 
265 	/*
266 	 * We feed with the event bits and changed bits for
267 	 * enough information to the callback.
268 	 */
269 	atomic_notifier_call_chain(&opal_notifier_head,
270 				   events, (void *)changed_mask);
271 }
272 
273 void opal_notifier_update_evt(uint64_t evt_mask,
274 			      uint64_t evt_val)
275 {
276 	unsigned long flags;
277 
278 	spin_lock_irqsave(&opal_notifier_lock, flags);
279 	last_notified_mask &= ~evt_mask;
280 	last_notified_mask |= evt_val;
281 	spin_unlock_irqrestore(&opal_notifier_lock, flags);
282 }
283 
284 void opal_notifier_enable(void)
285 {
286 	int64_t rc;
287 	__be64 evt = 0;
288 
289 	atomic_set(&opal_notifier_hold, 0);
290 
291 	/* Process pending events */
292 	rc = opal_poll_events(&evt);
293 	if (rc == OPAL_SUCCESS && evt)
294 		opal_do_notifier(be64_to_cpu(evt));
295 }
296 
297 void opal_notifier_disable(void)
298 {
299 	atomic_set(&opal_notifier_hold, 1);
300 }
301 
302 /*
303  * Opal message notifier based on message type. Allow subscribers to get
304  * notified for specific messgae type.
305  */
306 int opal_message_notifier_register(enum opal_msg_type msg_type,
307 					struct notifier_block *nb)
308 {
309 	if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
310 		pr_warning("%s: Invalid arguments, msg_type:%d\n",
311 			   __func__, msg_type);
312 		return -EINVAL;
313 	}
314 
315 	return atomic_notifier_chain_register(
316 				&opal_msg_notifier_head[msg_type], nb);
317 }
318 
319 int opal_message_notifier_unregister(enum opal_msg_type msg_type,
320 				     struct notifier_block *nb)
321 {
322 	return atomic_notifier_chain_unregister(
323 			&opal_msg_notifier_head[msg_type], nb);
324 }
325 
326 static void opal_message_do_notify(uint32_t msg_type, void *msg)
327 {
328 	/* notify subscribers */
329 	atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
330 					msg_type, msg);
331 }
332 
333 static void opal_handle_message(void)
334 {
335 	s64 ret;
336 	/*
337 	 * TODO: pre-allocate a message buffer depending on opal-msg-size
338 	 * value in /proc/device-tree.
339 	 */
340 	static struct opal_msg msg;
341 	u32 type;
342 
343 	ret = opal_get_msg(__pa(&msg), sizeof(msg));
344 	/* No opal message pending. */
345 	if (ret == OPAL_RESOURCE)
346 		return;
347 
348 	/* check for errors. */
349 	if (ret) {
350 		pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
351 				__func__, ret);
352 		return;
353 	}
354 
355 	type = be32_to_cpu(msg.msg_type);
356 
357 	/* Sanity check */
358 	if (type >= OPAL_MSG_TYPE_MAX) {
359 		pr_warning("%s: Unknown message type: %u\n", __func__, type);
360 		return;
361 	}
362 	opal_message_do_notify(type, (void *)&msg);
363 }
364 
365 static int opal_message_notify(struct notifier_block *nb,
366 			  unsigned long events, void *change)
367 {
368 	if (events & OPAL_EVENT_MSG_PENDING)
369 		opal_handle_message();
370 	return 0;
371 }
372 
373 static struct notifier_block opal_message_nb = {
374 	.notifier_call	= opal_message_notify,
375 	.next		= NULL,
376 	.priority	= 0,
377 };
378 
379 static int __init opal_message_init(void)
380 {
381 	int ret, i;
382 
383 	for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
384 		ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
385 
386 	ret = opal_notifier_register(&opal_message_nb);
387 	if (ret) {
388 		pr_err("%s: Can't register OPAL event notifier (%d)\n",
389 		       __func__, ret);
390 		return ret;
391 	}
392 	return 0;
393 }
394 
395 int opal_get_chars(uint32_t vtermno, char *buf, int count)
396 {
397 	s64 rc;
398 	__be64 evt, len;
399 
400 	if (!opal.entry)
401 		return -ENODEV;
402 	opal_poll_events(&evt);
403 	if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
404 		return 0;
405 	len = cpu_to_be64(count);
406 	rc = opal_console_read(vtermno, &len, buf);
407 	if (rc == OPAL_SUCCESS)
408 		return be64_to_cpu(len);
409 	return 0;
410 }
411 
412 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
413 {
414 	int written = 0;
415 	__be64 olen;
416 	s64 len, rc;
417 	unsigned long flags;
418 	__be64 evt;
419 
420 	if (!opal.entry)
421 		return -ENODEV;
422 
423 	/* We want put_chars to be atomic to avoid mangling of hvsi
424 	 * packets. To do that, we first test for room and return
425 	 * -EAGAIN if there isn't enough.
426 	 *
427 	 * Unfortunately, opal_console_write_buffer_space() doesn't
428 	 * appear to work on opal v1, so we just assume there is
429 	 * enough room and be done with it
430 	 */
431 	spin_lock_irqsave(&opal_write_lock, flags);
432 	if (firmware_has_feature(FW_FEATURE_OPALv2)) {
433 		rc = opal_console_write_buffer_space(vtermno, &olen);
434 		len = be64_to_cpu(olen);
435 		if (rc || len < total_len) {
436 			spin_unlock_irqrestore(&opal_write_lock, flags);
437 			/* Closed -> drop characters */
438 			if (rc)
439 				return total_len;
440 			opal_poll_events(NULL);
441 			return -EAGAIN;
442 		}
443 	}
444 
445 	/* We still try to handle partial completions, though they
446 	 * should no longer happen.
447 	 */
448 	rc = OPAL_BUSY;
449 	while(total_len > 0 && (rc == OPAL_BUSY ||
450 				rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
451 		olen = cpu_to_be64(total_len);
452 		rc = opal_console_write(vtermno, &olen, data);
453 		len = be64_to_cpu(olen);
454 
455 		/* Closed or other error drop */
456 		if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
457 		    rc != OPAL_BUSY_EVENT) {
458 			written = total_len;
459 			break;
460 		}
461 		if (rc == OPAL_SUCCESS) {
462 			total_len -= len;
463 			data += len;
464 			written += len;
465 		}
466 		/* This is a bit nasty but we need that for the console to
467 		 * flush when there aren't any interrupts. We will clean
468 		 * things a bit later to limit that to synchronous path
469 		 * such as the kernel console and xmon/udbg
470 		 */
471 		do
472 			opal_poll_events(&evt);
473 		while(rc == OPAL_SUCCESS &&
474 			(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
475 	}
476 	spin_unlock_irqrestore(&opal_write_lock, flags);
477 	return written;
478 }
479 
480 static int opal_recover_mce(struct pt_regs *regs,
481 					struct machine_check_event *evt)
482 {
483 	int recovered = 0;
484 	uint64_t ea = get_mce_fault_addr(evt);
485 
486 	if (!(regs->msr & MSR_RI)) {
487 		/* If MSR_RI isn't set, we cannot recover */
488 		recovered = 0;
489 	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
490 		/* Platform corrected itself */
491 		recovered = 1;
492 	} else if (ea && !is_kernel_addr(ea)) {
493 		/*
494 		 * Faulting address is not in kernel text. We should be fine.
495 		 * We need to find which process uses this address.
496 		 * For now, kill the task if we have received exception when
497 		 * in userspace.
498 		 *
499 		 * TODO: Queue up this address for hwpoisioning later.
500 		 */
501 		if (user_mode(regs) && !is_global_init(current)) {
502 			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
503 			recovered = 1;
504 		} else
505 			recovered = 0;
506 	} else if (user_mode(regs) && !is_global_init(current) &&
507 		evt->severity == MCE_SEV_ERROR_SYNC) {
508 		/*
509 		 * If we have received a synchronous error when in userspace
510 		 * kill the task.
511 		 */
512 		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
513 		recovered = 1;
514 	}
515 	return recovered;
516 }
517 
518 int opal_machine_check(struct pt_regs *regs)
519 {
520 	struct machine_check_event evt;
521 
522 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
523 		return 0;
524 
525 	/* Print things out */
526 	if (evt.version != MCE_V1) {
527 		pr_err("Machine Check Exception, Unknown event version %d !\n",
528 		       evt.version);
529 		return 0;
530 	}
531 	machine_check_print_event_info(&evt);
532 
533 	if (opal_recover_mce(regs, &evt))
534 		return 1;
535 	return 0;
536 }
537 
538 /* Early hmi handler called in real mode. */
539 int opal_hmi_exception_early(struct pt_regs *regs)
540 {
541 	s64 rc;
542 
543 	/*
544 	 * call opal hmi handler. Pass paca address as token.
545 	 * The return value OPAL_SUCCESS is an indication that there is
546 	 * an HMI event generated waiting to pull by Linux.
547 	 */
548 	rc = opal_handle_hmi();
549 	if (rc == OPAL_SUCCESS) {
550 		local_paca->hmi_event_available = 1;
551 		return 1;
552 	}
553 	return 0;
554 }
555 
556 /* HMI exception handler called in virtual mode during check_irq_replay. */
557 int opal_handle_hmi_exception(struct pt_regs *regs)
558 {
559 	s64 rc;
560 	__be64 evt = 0;
561 
562 	/*
563 	 * Check if HMI event is available.
564 	 * if Yes, then call opal_poll_events to pull opal messages and
565 	 * process them.
566 	 */
567 	if (!local_paca->hmi_event_available)
568 		return 0;
569 
570 	local_paca->hmi_event_available = 0;
571 	rc = opal_poll_events(&evt);
572 	if (rc == OPAL_SUCCESS && evt) {
573 		opal_do_notifier(be64_to_cpu(evt));
574 		opal_handle_events(be64_to_cpu(evt));
575 	}
576 
577 	return 1;
578 }
579 
580 static uint64_t find_recovery_address(uint64_t nip)
581 {
582 	int i;
583 
584 	for (i = 0; i < mc_recoverable_range_len; i++)
585 		if ((nip >= mc_recoverable_range[i].start_addr) &&
586 		    (nip < mc_recoverable_range[i].end_addr))
587 		    return mc_recoverable_range[i].recover_addr;
588 	return 0;
589 }
590 
591 bool opal_mce_check_early_recovery(struct pt_regs *regs)
592 {
593 	uint64_t recover_addr = 0;
594 
595 	if (!opal.base || !opal.size)
596 		goto out;
597 
598 	if ((regs->nip >= opal.base) &&
599 			(regs->nip <= (opal.base + opal.size)))
600 		recover_addr = find_recovery_address(regs->nip);
601 
602 	/*
603 	 * Setup regs->nip to rfi into fixup address.
604 	 */
605 	if (recover_addr)
606 		regs->nip = recover_addr;
607 
608 out:
609 	return !!recover_addr;
610 }
611 
612 static int opal_sysfs_init(void)
613 {
614 	opal_kobj = kobject_create_and_add("opal", firmware_kobj);
615 	if (!opal_kobj) {
616 		pr_warn("kobject_create_and_add opal failed\n");
617 		return -ENOMEM;
618 	}
619 
620 	return 0;
621 }
622 
623 static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
624 			       struct bin_attribute *bin_attr,
625 			       char *buf, loff_t off, size_t count)
626 {
627 	return memory_read_from_buffer(buf, count, &off, bin_attr->private,
628 				       bin_attr->size);
629 }
630 
631 static BIN_ATTR_RO(symbol_map, 0);
632 
633 static void opal_export_symmap(void)
634 {
635 	const __be64 *syms;
636 	unsigned int size;
637 	struct device_node *fw;
638 	int rc;
639 
640 	fw = of_find_node_by_path("/ibm,opal/firmware");
641 	if (!fw)
642 		return;
643 	syms = of_get_property(fw, "symbol-map", &size);
644 	if (!syms || size != 2 * sizeof(__be64))
645 		return;
646 
647 	/* Setup attributes */
648 	bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
649 	bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
650 
651 	rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
652 	if (rc)
653 		pr_warn("Error %d creating OPAL symbols file\n", rc);
654 }
655 
656 static void __init opal_dump_region_init(void)
657 {
658 	void *addr;
659 	uint64_t size;
660 	int rc;
661 
662 	if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
663 		return;
664 
665 	/* Register kernel log buffer */
666 	addr = log_buf_addr_get();
667 	if (addr == NULL)
668 		return;
669 
670 	size = log_buf_len_get();
671 	if (size == 0)
672 		return;
673 
674 	rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
675 				       __pa(addr), size);
676 	/* Don't warn if this is just an older OPAL that doesn't
677 	 * know about that call
678 	 */
679 	if (rc && rc != OPAL_UNSUPPORTED)
680 		pr_warn("DUMP: Failed to register kernel log buffer. "
681 			"rc = %d\n", rc);
682 }
683 
684 static void opal_flash_init(struct device_node *opal_node)
685 {
686 	struct device_node *np;
687 
688 	for_each_child_of_node(opal_node, np)
689 		if (of_device_is_compatible(np, "ibm,opal-flash"))
690 			of_platform_device_create(np, NULL, NULL);
691 }
692 
693 static void opal_ipmi_init(struct device_node *opal_node)
694 {
695 	struct device_node *np;
696 
697 	for_each_child_of_node(opal_node, np)
698 		if (of_device_is_compatible(np, "ibm,opal-ipmi"))
699 			of_platform_device_create(np, NULL, NULL);
700 }
701 
702 static void opal_i2c_create_devs(void)
703 {
704 	struct device_node *np;
705 
706 	for_each_compatible_node(np, NULL, "ibm,opal-i2c")
707 		of_platform_device_create(np, NULL, NULL);
708 }
709 
710 static int kopald(void *unused)
711 {
712 	__be64 events;
713 
714 	set_freezable();
715 	do {
716 		try_to_freeze();
717 		opal_poll_events(&events);
718 		opal_handle_events(be64_to_cpu(events));
719 		msleep_interruptible(opal_heartbeat);
720 	} while (!kthread_should_stop());
721 
722 	return 0;
723 }
724 
725 static void opal_init_heartbeat(void)
726 {
727 	/* Old firwmware, we assume the HVC heartbeat is sufficient */
728 	if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
729 				 &opal_heartbeat) != 0)
730 		opal_heartbeat = 0;
731 
732 	if (opal_heartbeat)
733 		kthread_run(kopald, NULL, "kopald");
734 }
735 
736 static int __init opal_init(void)
737 {
738 	struct device_node *np, *consoles;
739 	int rc;
740 
741 	opal_node = of_find_node_by_path("/ibm,opal");
742 	if (!opal_node) {
743 		pr_warn("Device node not found\n");
744 		return -ENODEV;
745 	}
746 
747 	/* Initialise OPAL events */
748 	opal_event_init();
749 
750 	/* Register OPAL consoles if any ports */
751 	if (firmware_has_feature(FW_FEATURE_OPALv2))
752 		consoles = of_find_node_by_path("/ibm,opal/consoles");
753 	else
754 		consoles = of_node_get(opal_node);
755 	if (consoles) {
756 		for_each_child_of_node(consoles, np) {
757 			if (strcmp(np->name, "serial"))
758 				continue;
759 			of_platform_device_create(np, NULL, NULL);
760 		}
761 		of_node_put(consoles);
762 	}
763 
764 	/* Initialise OPAL messaging system */
765 	opal_message_init();
766 
767 	/* Initialise OPAL asynchronous completion interface */
768 	opal_async_comp_init();
769 
770 	/* Initialise OPAL sensor interface */
771 	opal_sensor_init();
772 
773 	/* Initialise OPAL hypervisor maintainence interrupt handling */
774 	opal_hmi_handler_init();
775 
776 	/* Create i2c platform devices */
777 	opal_i2c_create_devs();
778 
779 	/* Setup a heatbeat thread if requested by OPAL */
780 	opal_init_heartbeat();
781 
782 	/* Create "opal" kobject under /sys/firmware */
783 	rc = opal_sysfs_init();
784 	if (rc == 0) {
785 		/* Export symbol map to userspace */
786 		opal_export_symmap();
787 		/* Setup dump region interface */
788 		opal_dump_region_init();
789 		/* Setup error log interface */
790 		rc = opal_elog_init();
791 		/* Setup code update interface */
792 		opal_flash_update_init();
793 		/* Setup platform dump extract interface */
794 		opal_platform_dump_init();
795 		/* Setup system parameters interface */
796 		opal_sys_param_init();
797 		/* Setup message log interface. */
798 		opal_msglog_init();
799 	}
800 
801 	/* Initialize OPAL IPMI backend */
802 	opal_ipmi_init(opal_node);
803 
804 	opal_flash_init(opal_node);
805 
806 	return 0;
807 }
808 machine_subsys_initcall(powernv, opal_init);
809 
810 void opal_shutdown(void)
811 {
812 	long rc = OPAL_BUSY;
813 
814 	opal_event_shutdown();
815 
816 	/*
817 	 * Then sync with OPAL which ensure anything that can
818 	 * potentially write to our memory has completed such
819 	 * as an ongoing dump retrieval
820 	 */
821 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
822 		rc = opal_sync_host_reboot();
823 		if (rc == OPAL_BUSY)
824 			opal_poll_events(NULL);
825 		else
826 			mdelay(10);
827 	}
828 
829 	/* Unregister memory dump region */
830 	if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
831 		opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
832 }
833 
834 /* Export this so that test modules can use it */
835 EXPORT_SYMBOL_GPL(opal_invalid_call);
836 EXPORT_SYMBOL_GPL(opal_ipmi_send);
837 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
838 EXPORT_SYMBOL_GPL(opal_flash_read);
839 EXPORT_SYMBOL_GPL(opal_flash_write);
840 EXPORT_SYMBOL_GPL(opal_flash_erase);
841 
842 /* Convert a region of vmalloc memory to an opal sg list */
843 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
844 					     unsigned long vmalloc_size)
845 {
846 	struct opal_sg_list *sg, *first = NULL;
847 	unsigned long i = 0;
848 
849 	sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
850 	if (!sg)
851 		goto nomem;
852 
853 	first = sg;
854 
855 	while (vmalloc_size > 0) {
856 		uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
857 		uint64_t length = min(vmalloc_size, PAGE_SIZE);
858 
859 		sg->entry[i].data = cpu_to_be64(data);
860 		sg->entry[i].length = cpu_to_be64(length);
861 		i++;
862 
863 		if (i >= SG_ENTRIES_PER_NODE) {
864 			struct opal_sg_list *next;
865 
866 			next = kzalloc(PAGE_SIZE, GFP_KERNEL);
867 			if (!next)
868 				goto nomem;
869 
870 			sg->length = cpu_to_be64(
871 					i * sizeof(struct opal_sg_entry) + 16);
872 			i = 0;
873 			sg->next = cpu_to_be64(__pa(next));
874 			sg = next;
875 		}
876 
877 		vmalloc_addr += length;
878 		vmalloc_size -= length;
879 	}
880 
881 	sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
882 
883 	return first;
884 
885 nomem:
886 	pr_err("%s : Failed to allocate memory\n", __func__);
887 	opal_free_sg_list(first);
888 	return NULL;
889 }
890 
891 void opal_free_sg_list(struct opal_sg_list *sg)
892 {
893 	while (sg) {
894 		uint64_t next = be64_to_cpu(sg->next);
895 
896 		kfree(sg);
897 
898 		if (next)
899 			sg = __va(next);
900 		else
901 			sg = NULL;
902 	}
903 }
904 
905 int opal_error_code(int rc)
906 {
907 	switch (rc) {
908 	case OPAL_SUCCESS:		return 0;
909 
910 	case OPAL_PARAMETER:		return -EINVAL;
911 	case OPAL_ASYNC_COMPLETION:	return -EINPROGRESS;
912 	case OPAL_BUSY_EVENT:		return -EBUSY;
913 	case OPAL_NO_MEM:		return -ENOMEM;
914 
915 	case OPAL_UNSUPPORTED:		return -EIO;
916 	case OPAL_HARDWARE:		return -EIO;
917 	case OPAL_INTERNAL_ERROR:	return -EIO;
918 	default:
919 		pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
920 		return -EIO;
921 	}
922 }
923 
924 EXPORT_SYMBOL_GPL(opal_poll_events);
925 EXPORT_SYMBOL_GPL(opal_rtc_read);
926 EXPORT_SYMBOL_GPL(opal_rtc_write);
927 EXPORT_SYMBOL_GPL(opal_tpo_read);
928 EXPORT_SYMBOL_GPL(opal_tpo_write);
929 EXPORT_SYMBOL_GPL(opal_i2c_request);
930