1 /*
2  * PowerNV OPAL high level interfaces
3  *
4  * Copyright 2011 IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #undef DEBUG
13 
14 #include <linux/types.h>
15 #include <linux/of.h>
16 #include <linux/of_fdt.h>
17 #include <linux/of_platform.h>
18 #include <linux/interrupt.h>
19 #include <linux/notifier.h>
20 #include <linux/slab.h>
21 #include <linux/sched.h>
22 #include <linux/kobject.h>
23 #include <linux/delay.h>
24 #include <linux/memblock.h>
25 
26 #include <asm/machdep.h>
27 #include <asm/opal.h>
28 #include <asm/firmware.h>
29 #include <asm/mce.h>
30 
31 #include "powernv.h"
32 
33 /* /sys/firmware/opal */
34 struct kobject *opal_kobj;
35 
36 struct opal {
37 	u64 base;
38 	u64 entry;
39 	u64 size;
40 } opal;
41 
42 struct mcheck_recoverable_range {
43 	u64 start_addr;
44 	u64 end_addr;
45 	u64 recover_addr;
46 };
47 
48 static struct mcheck_recoverable_range *mc_recoverable_range;
49 static int mc_recoverable_range_len;
50 
51 struct device_node *opal_node;
52 static DEFINE_SPINLOCK(opal_write_lock);
53 static unsigned int *opal_irqs;
54 static unsigned int opal_irq_count;
55 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
56 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
57 static DEFINE_SPINLOCK(opal_notifier_lock);
58 static uint64_t last_notified_mask = 0x0ul;
59 static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
60 
61 static void opal_reinit_cores(void)
62 {
63 	/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
64 	 *
65 	 * It will preserve non volatile GPRs and HSPRG0/1. It will
66 	 * also restore HIDs and other SPRs to their original value
67 	 * but it might clobber a bunch.
68 	 */
69 #ifdef __BIG_ENDIAN__
70 	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
71 #else
72 	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
73 #endif
74 }
75 
76 int __init early_init_dt_scan_opal(unsigned long node,
77 				   const char *uname, int depth, void *data)
78 {
79 	const void *basep, *entryp, *sizep;
80 	int basesz, entrysz, runtimesz;
81 
82 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
83 		return 0;
84 
85 	basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
86 	entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
87 	sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
88 
89 	if (!basep || !entryp || !sizep)
90 		return 1;
91 
92 	opal.base = of_read_number(basep, basesz/4);
93 	opal.entry = of_read_number(entryp, entrysz/4);
94 	opal.size = of_read_number(sizep, runtimesz/4);
95 
96 	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
97 		 opal.base, basep, basesz);
98 	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
99 		 opal.entry, entryp, entrysz);
100 	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
101 		 opal.size, sizep, runtimesz);
102 
103 	powerpc_firmware_features |= FW_FEATURE_OPAL;
104 	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
105 		powerpc_firmware_features |= FW_FEATURE_OPALv2;
106 		powerpc_firmware_features |= FW_FEATURE_OPALv3;
107 		pr_info("OPAL V3 detected !\n");
108 	} else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
109 		powerpc_firmware_features |= FW_FEATURE_OPALv2;
110 		pr_info("OPAL V2 detected !\n");
111 	} else {
112 		pr_info("OPAL V1 detected !\n");
113 	}
114 
115 	/* Reinit all cores with the right endian */
116 	opal_reinit_cores();
117 
118 	/* Restore some bits */
119 	if (cur_cpu_spec->cpu_restore)
120 		cur_cpu_spec->cpu_restore();
121 
122 	return 1;
123 }
124 
125 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
126 				   const char *uname, int depth, void *data)
127 {
128 	int i, psize, size;
129 	const __be32 *prop;
130 
131 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
132 		return 0;
133 
134 	prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
135 
136 	if (!prop)
137 		return 1;
138 
139 	pr_debug("Found machine check recoverable ranges.\n");
140 
141 	/*
142 	 * Calculate number of available entries.
143 	 *
144 	 * Each recoverable address range entry is (start address, len,
145 	 * recovery address), 2 cells each for start and recovery address,
146 	 * 1 cell for len, totalling 5 cells per entry.
147 	 */
148 	mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
149 
150 	/* Sanity check */
151 	if (!mc_recoverable_range_len)
152 		return 1;
153 
154 	/* Size required to hold all the entries. */
155 	size = mc_recoverable_range_len *
156 			sizeof(struct mcheck_recoverable_range);
157 
158 	/*
159 	 * Allocate a buffer to hold the MC recoverable ranges. We would be
160 	 * accessing them in real mode, hence it needs to be within
161 	 * RMO region.
162 	 */
163 	mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
164 							ppc64_rma_size));
165 	memset(mc_recoverable_range, 0, size);
166 
167 	for (i = 0; i < mc_recoverable_range_len; i++) {
168 		mc_recoverable_range[i].start_addr =
169 					of_read_number(prop + (i * 5) + 0, 2);
170 		mc_recoverable_range[i].end_addr =
171 					mc_recoverable_range[i].start_addr +
172 					of_read_number(prop + (i * 5) + 2, 1);
173 		mc_recoverable_range[i].recover_addr =
174 					of_read_number(prop + (i * 5) + 3, 2);
175 
176 		pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
177 				mc_recoverable_range[i].start_addr,
178 				mc_recoverable_range[i].end_addr,
179 				mc_recoverable_range[i].recover_addr);
180 	}
181 	return 1;
182 }
183 
184 static int __init opal_register_exception_handlers(void)
185 {
186 #ifdef __BIG_ENDIAN__
187 	u64 glue;
188 
189 	if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
190 		return -ENODEV;
191 
192 	/* Hookup some exception handlers except machine check. We use the
193 	 * fwnmi area at 0x7000 to provide the glue space to OPAL
194 	 */
195 	glue = 0x7000;
196 
197 	/*
198 	 * Check if we are running on newer firmware that exports
199 	 * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
200 	 * the HMI interrupt and we catch it directly in Linux.
201 	 *
202 	 * For older firmware (i.e currently released POWER8 System Firmware
203 	 * as of today <= SV810_087), we fallback to old behavior and let OPAL
204 	 * patch the HMI vector and handle it inside OPAL firmware.
205 	 *
206 	 * For newer firmware (in development/yet to be released) we will
207 	 * start catching/handling HMI directly in Linux.
208 	 */
209 	if (!opal_check_token(OPAL_HANDLE_HMI)) {
210 		pr_info("opal: Old firmware detected, OPAL handles HMIs.\n");
211 		opal_register_exception_handler(
212 				OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
213 				0, glue);
214 		glue += 128;
215 	}
216 
217 	opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
218 #endif
219 
220 	return 0;
221 }
222 machine_early_initcall(powernv, opal_register_exception_handlers);
223 
224 int opal_notifier_register(struct notifier_block *nb)
225 {
226 	if (!nb) {
227 		pr_warning("%s: Invalid argument (%p)\n",
228 			   __func__, nb);
229 		return -EINVAL;
230 	}
231 
232 	atomic_notifier_chain_register(&opal_notifier_head, nb);
233 	return 0;
234 }
235 EXPORT_SYMBOL_GPL(opal_notifier_register);
236 
237 int opal_notifier_unregister(struct notifier_block *nb)
238 {
239 	if (!nb) {
240 		pr_warning("%s: Invalid argument (%p)\n",
241 			   __func__, nb);
242 		return -EINVAL;
243 	}
244 
245 	atomic_notifier_chain_unregister(&opal_notifier_head, nb);
246 	return 0;
247 }
248 EXPORT_SYMBOL_GPL(opal_notifier_unregister);
249 
250 static void opal_do_notifier(uint64_t events)
251 {
252 	unsigned long flags;
253 	uint64_t changed_mask;
254 
255 	if (atomic_read(&opal_notifier_hold))
256 		return;
257 
258 	spin_lock_irqsave(&opal_notifier_lock, flags);
259 	changed_mask = last_notified_mask ^ events;
260 	last_notified_mask = events;
261 	spin_unlock_irqrestore(&opal_notifier_lock, flags);
262 
263 	/*
264 	 * We feed with the event bits and changed bits for
265 	 * enough information to the callback.
266 	 */
267 	atomic_notifier_call_chain(&opal_notifier_head,
268 				   events, (void *)changed_mask);
269 }
270 
271 void opal_notifier_update_evt(uint64_t evt_mask,
272 			      uint64_t evt_val)
273 {
274 	unsigned long flags;
275 
276 	spin_lock_irqsave(&opal_notifier_lock, flags);
277 	last_notified_mask &= ~evt_mask;
278 	last_notified_mask |= evt_val;
279 	spin_unlock_irqrestore(&opal_notifier_lock, flags);
280 }
281 
282 void opal_notifier_enable(void)
283 {
284 	int64_t rc;
285 	__be64 evt = 0;
286 
287 	atomic_set(&opal_notifier_hold, 0);
288 
289 	/* Process pending events */
290 	rc = opal_poll_events(&evt);
291 	if (rc == OPAL_SUCCESS && evt)
292 		opal_do_notifier(be64_to_cpu(evt));
293 }
294 
295 void opal_notifier_disable(void)
296 {
297 	atomic_set(&opal_notifier_hold, 1);
298 }
299 
300 /*
301  * Opal message notifier based on message type. Allow subscribers to get
302  * notified for specific messgae type.
303  */
304 int opal_message_notifier_register(enum OpalMessageType msg_type,
305 					struct notifier_block *nb)
306 {
307 	if (!nb) {
308 		pr_warning("%s: Invalid argument (%p)\n",
309 			   __func__, nb);
310 		return -EINVAL;
311 	}
312 	if (msg_type > OPAL_MSG_TYPE_MAX) {
313 		pr_warning("%s: Invalid message type argument (%d)\n",
314 			   __func__, msg_type);
315 		return -EINVAL;
316 	}
317 	return atomic_notifier_chain_register(
318 				&opal_msg_notifier_head[msg_type], nb);
319 }
320 
321 static void opal_message_do_notify(uint32_t msg_type, void *msg)
322 {
323 	/* notify subscribers */
324 	atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
325 					msg_type, msg);
326 }
327 
328 static void opal_handle_message(void)
329 {
330 	s64 ret;
331 	/*
332 	 * TODO: pre-allocate a message buffer depending on opal-msg-size
333 	 * value in /proc/device-tree.
334 	 */
335 	static struct opal_msg msg;
336 	u32 type;
337 
338 	ret = opal_get_msg(__pa(&msg), sizeof(msg));
339 	/* No opal message pending. */
340 	if (ret == OPAL_RESOURCE)
341 		return;
342 
343 	/* check for errors. */
344 	if (ret) {
345 		pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
346 				__func__, ret);
347 		return;
348 	}
349 
350 	type = be32_to_cpu(msg.msg_type);
351 
352 	/* Sanity check */
353 	if (type > OPAL_MSG_TYPE_MAX) {
354 		pr_warning("%s: Unknown message type: %u\n", __func__, type);
355 		return;
356 	}
357 	opal_message_do_notify(type, (void *)&msg);
358 }
359 
360 static int opal_message_notify(struct notifier_block *nb,
361 			  unsigned long events, void *change)
362 {
363 	if (events & OPAL_EVENT_MSG_PENDING)
364 		opal_handle_message();
365 	return 0;
366 }
367 
368 static struct notifier_block opal_message_nb = {
369 	.notifier_call	= opal_message_notify,
370 	.next		= NULL,
371 	.priority	= 0,
372 };
373 
374 static int __init opal_message_init(void)
375 {
376 	int ret, i;
377 
378 	for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
379 		ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
380 
381 	ret = opal_notifier_register(&opal_message_nb);
382 	if (ret) {
383 		pr_err("%s: Can't register OPAL event notifier (%d)\n",
384 		       __func__, ret);
385 		return ret;
386 	}
387 	return 0;
388 }
389 machine_early_initcall(powernv, opal_message_init);
390 
391 int opal_get_chars(uint32_t vtermno, char *buf, int count)
392 {
393 	s64 rc;
394 	__be64 evt, len;
395 
396 	if (!opal.entry)
397 		return -ENODEV;
398 	opal_poll_events(&evt);
399 	if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
400 		return 0;
401 	len = cpu_to_be64(count);
402 	rc = opal_console_read(vtermno, &len, buf);
403 	if (rc == OPAL_SUCCESS)
404 		return be64_to_cpu(len);
405 	return 0;
406 }
407 
408 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
409 {
410 	int written = 0;
411 	__be64 olen;
412 	s64 len, rc;
413 	unsigned long flags;
414 	__be64 evt;
415 
416 	if (!opal.entry)
417 		return -ENODEV;
418 
419 	/* We want put_chars to be atomic to avoid mangling of hvsi
420 	 * packets. To do that, we first test for room and return
421 	 * -EAGAIN if there isn't enough.
422 	 *
423 	 * Unfortunately, opal_console_write_buffer_space() doesn't
424 	 * appear to work on opal v1, so we just assume there is
425 	 * enough room and be done with it
426 	 */
427 	spin_lock_irqsave(&opal_write_lock, flags);
428 	if (firmware_has_feature(FW_FEATURE_OPALv2)) {
429 		rc = opal_console_write_buffer_space(vtermno, &olen);
430 		len = be64_to_cpu(olen);
431 		if (rc || len < total_len) {
432 			spin_unlock_irqrestore(&opal_write_lock, flags);
433 			/* Closed -> drop characters */
434 			if (rc)
435 				return total_len;
436 			opal_poll_events(NULL);
437 			return -EAGAIN;
438 		}
439 	}
440 
441 	/* We still try to handle partial completions, though they
442 	 * should no longer happen.
443 	 */
444 	rc = OPAL_BUSY;
445 	while(total_len > 0 && (rc == OPAL_BUSY ||
446 				rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
447 		olen = cpu_to_be64(total_len);
448 		rc = opal_console_write(vtermno, &olen, data);
449 		len = be64_to_cpu(olen);
450 
451 		/* Closed or other error drop */
452 		if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
453 		    rc != OPAL_BUSY_EVENT) {
454 			written = total_len;
455 			break;
456 		}
457 		if (rc == OPAL_SUCCESS) {
458 			total_len -= len;
459 			data += len;
460 			written += len;
461 		}
462 		/* This is a bit nasty but we need that for the console to
463 		 * flush when there aren't any interrupts. We will clean
464 		 * things a bit later to limit that to synchronous path
465 		 * such as the kernel console and xmon/udbg
466 		 */
467 		do
468 			opal_poll_events(&evt);
469 		while(rc == OPAL_SUCCESS &&
470 			(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
471 	}
472 	spin_unlock_irqrestore(&opal_write_lock, flags);
473 	return written;
474 }
475 
476 static int opal_recover_mce(struct pt_regs *regs,
477 					struct machine_check_event *evt)
478 {
479 	int recovered = 0;
480 	uint64_t ea = get_mce_fault_addr(evt);
481 
482 	if (!(regs->msr & MSR_RI)) {
483 		/* If MSR_RI isn't set, we cannot recover */
484 		recovered = 0;
485 	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
486 		/* Platform corrected itself */
487 		recovered = 1;
488 	} else if (ea && !is_kernel_addr(ea)) {
489 		/*
490 		 * Faulting address is not in kernel text. We should be fine.
491 		 * We need to find which process uses this address.
492 		 * For now, kill the task if we have received exception when
493 		 * in userspace.
494 		 *
495 		 * TODO: Queue up this address for hwpoisioning later.
496 		 */
497 		if (user_mode(regs) && !is_global_init(current)) {
498 			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
499 			recovered = 1;
500 		} else
501 			recovered = 0;
502 	} else if (user_mode(regs) && !is_global_init(current) &&
503 		evt->severity == MCE_SEV_ERROR_SYNC) {
504 		/*
505 		 * If we have received a synchronous error when in userspace
506 		 * kill the task.
507 		 */
508 		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
509 		recovered = 1;
510 	}
511 	return recovered;
512 }
513 
514 int opal_machine_check(struct pt_regs *regs)
515 {
516 	struct machine_check_event evt;
517 
518 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
519 		return 0;
520 
521 	/* Print things out */
522 	if (evt.version != MCE_V1) {
523 		pr_err("Machine Check Exception, Unknown event version %d !\n",
524 		       evt.version);
525 		return 0;
526 	}
527 	machine_check_print_event_info(&evt);
528 
529 	if (opal_recover_mce(regs, &evt))
530 		return 1;
531 	return 0;
532 }
533 
534 /* Early hmi handler called in real mode. */
535 int opal_hmi_exception_early(struct pt_regs *regs)
536 {
537 	s64 rc;
538 
539 	/*
540 	 * call opal hmi handler. Pass paca address as token.
541 	 * The return value OPAL_SUCCESS is an indication that there is
542 	 * an HMI event generated waiting to pull by Linux.
543 	 */
544 	rc = opal_handle_hmi();
545 	if (rc == OPAL_SUCCESS) {
546 		local_paca->hmi_event_available = 1;
547 		return 1;
548 	}
549 	return 0;
550 }
551 
552 /* HMI exception handler called in virtual mode during check_irq_replay. */
553 int opal_handle_hmi_exception(struct pt_regs *regs)
554 {
555 	s64 rc;
556 	__be64 evt = 0;
557 
558 	/*
559 	 * Check if HMI event is available.
560 	 * if Yes, then call opal_poll_events to pull opal messages and
561 	 * process them.
562 	 */
563 	if (!local_paca->hmi_event_available)
564 		return 0;
565 
566 	local_paca->hmi_event_available = 0;
567 	rc = opal_poll_events(&evt);
568 	if (rc == OPAL_SUCCESS && evt)
569 		opal_do_notifier(be64_to_cpu(evt));
570 
571 	return 1;
572 }
573 
574 static uint64_t find_recovery_address(uint64_t nip)
575 {
576 	int i;
577 
578 	for (i = 0; i < mc_recoverable_range_len; i++)
579 		if ((nip >= mc_recoverable_range[i].start_addr) &&
580 		    (nip < mc_recoverable_range[i].end_addr))
581 		    return mc_recoverable_range[i].recover_addr;
582 	return 0;
583 }
584 
585 bool opal_mce_check_early_recovery(struct pt_regs *regs)
586 {
587 	uint64_t recover_addr = 0;
588 
589 	if (!opal.base || !opal.size)
590 		goto out;
591 
592 	if ((regs->nip >= opal.base) &&
593 			(regs->nip <= (opal.base + opal.size)))
594 		recover_addr = find_recovery_address(regs->nip);
595 
596 	/*
597 	 * Setup regs->nip to rfi into fixup address.
598 	 */
599 	if (recover_addr)
600 		regs->nip = recover_addr;
601 
602 out:
603 	return !!recover_addr;
604 }
605 
606 static irqreturn_t opal_interrupt(int irq, void *data)
607 {
608 	__be64 events;
609 
610 	opal_handle_interrupt(virq_to_hw(irq), &events);
611 
612 	opal_do_notifier(be64_to_cpu(events));
613 
614 	return IRQ_HANDLED;
615 }
616 
617 static int opal_sysfs_init(void)
618 {
619 	opal_kobj = kobject_create_and_add("opal", firmware_kobj);
620 	if (!opal_kobj) {
621 		pr_warn("kobject_create_and_add opal failed\n");
622 		return -ENOMEM;
623 	}
624 
625 	return 0;
626 }
627 
628 static void __init opal_dump_region_init(void)
629 {
630 	void *addr;
631 	uint64_t size;
632 	int rc;
633 
634 	/* Register kernel log buffer */
635 	addr = log_buf_addr_get();
636 	size = log_buf_len_get();
637 	rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
638 				       __pa(addr), size);
639 	/* Don't warn if this is just an older OPAL that doesn't
640 	 * know about that call
641 	 */
642 	if (rc && rc != OPAL_UNSUPPORTED)
643 		pr_warn("DUMP: Failed to register kernel log buffer. "
644 			"rc = %d\n", rc);
645 }
646 
647 static void opal_ipmi_init(struct device_node *opal_node)
648 {
649 	struct device_node *np;
650 
651 	for_each_child_of_node(opal_node, np)
652 		if (of_device_is_compatible(np, "ibm,opal-ipmi"))
653 			of_platform_device_create(np, NULL, NULL);
654 }
655 
656 static int __init opal_init(void)
657 {
658 	struct device_node *np, *consoles;
659 	const __be32 *irqs;
660 	int rc, i, irqlen;
661 
662 	opal_node = of_find_node_by_path("/ibm,opal");
663 	if (!opal_node) {
664 		pr_warn("opal: Node not found\n");
665 		return -ENODEV;
666 	}
667 
668 	/* Register OPAL consoles if any ports */
669 	if (firmware_has_feature(FW_FEATURE_OPALv2))
670 		consoles = of_find_node_by_path("/ibm,opal/consoles");
671 	else
672 		consoles = of_node_get(opal_node);
673 	if (consoles) {
674 		for_each_child_of_node(consoles, np) {
675 			if (strcmp(np->name, "serial"))
676 				continue;
677 			of_platform_device_create(np, NULL, NULL);
678 		}
679 		of_node_put(consoles);
680 	}
681 
682 	/* Find all OPAL interrupts and request them */
683 	irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
684 	pr_debug("opal: Found %d interrupts reserved for OPAL\n",
685 		 irqs ? (irqlen / 4) : 0);
686 	opal_irq_count = irqlen / 4;
687 	opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
688 	for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) {
689 		unsigned int hwirq = be32_to_cpup(irqs);
690 		unsigned int irq = irq_create_mapping(NULL, hwirq);
691 		if (irq == NO_IRQ) {
692 			pr_warning("opal: Failed to map irq 0x%x\n", hwirq);
693 			continue;
694 		}
695 		rc = request_irq(irq, opal_interrupt, 0, "opal", NULL);
696 		if (rc)
697 			pr_warning("opal: Error %d requesting irq %d"
698 				   " (0x%x)\n", rc, irq, hwirq);
699 		opal_irqs[i] = irq;
700 	}
701 
702 	/* Create "opal" kobject under /sys/firmware */
703 	rc = opal_sysfs_init();
704 	if (rc == 0) {
705 		/* Setup dump region interface */
706 		opal_dump_region_init();
707 		/* Setup error log interface */
708 		rc = opal_elog_init();
709 		/* Setup code update interface */
710 		opal_flash_init();
711 		/* Setup platform dump extract interface */
712 		opal_platform_dump_init();
713 		/* Setup system parameters interface */
714 		opal_sys_param_init();
715 		/* Setup message log interface. */
716 		opal_msglog_init();
717 	}
718 
719 	opal_ipmi_init(opal_node);
720 
721 	return 0;
722 }
723 machine_subsys_initcall(powernv, opal_init);
724 
725 void opal_shutdown(void)
726 {
727 	unsigned int i;
728 	long rc = OPAL_BUSY;
729 
730 	/* First free interrupts, which will also mask them */
731 	for (i = 0; i < opal_irq_count; i++) {
732 		if (opal_irqs[i])
733 			free_irq(opal_irqs[i], NULL);
734 		opal_irqs[i] = 0;
735 	}
736 
737 	/*
738 	 * Then sync with OPAL which ensure anything that can
739 	 * potentially write to our memory has completed such
740 	 * as an ongoing dump retrieval
741 	 */
742 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
743 		rc = opal_sync_host_reboot();
744 		if (rc == OPAL_BUSY)
745 			opal_poll_events(NULL);
746 		else
747 			mdelay(10);
748 	}
749 
750 	/* Unregister memory dump region */
751 	opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
752 }
753 
754 /* Export this so that test modules can use it */
755 EXPORT_SYMBOL_GPL(opal_invalid_call);
756 EXPORT_SYMBOL_GPL(opal_ipmi_send);
757 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
758 
759 /* Convert a region of vmalloc memory to an opal sg list */
760 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
761 					     unsigned long vmalloc_size)
762 {
763 	struct opal_sg_list *sg, *first = NULL;
764 	unsigned long i = 0;
765 
766 	sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
767 	if (!sg)
768 		goto nomem;
769 
770 	first = sg;
771 
772 	while (vmalloc_size > 0) {
773 		uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
774 		uint64_t length = min(vmalloc_size, PAGE_SIZE);
775 
776 		sg->entry[i].data = cpu_to_be64(data);
777 		sg->entry[i].length = cpu_to_be64(length);
778 		i++;
779 
780 		if (i >= SG_ENTRIES_PER_NODE) {
781 			struct opal_sg_list *next;
782 
783 			next = kzalloc(PAGE_SIZE, GFP_KERNEL);
784 			if (!next)
785 				goto nomem;
786 
787 			sg->length = cpu_to_be64(
788 					i * sizeof(struct opal_sg_entry) + 16);
789 			i = 0;
790 			sg->next = cpu_to_be64(__pa(next));
791 			sg = next;
792 		}
793 
794 		vmalloc_addr += length;
795 		vmalloc_size -= length;
796 	}
797 
798 	sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
799 
800 	return first;
801 
802 nomem:
803 	pr_err("%s : Failed to allocate memory\n", __func__);
804 	opal_free_sg_list(first);
805 	return NULL;
806 }
807 
808 void opal_free_sg_list(struct opal_sg_list *sg)
809 {
810 	while (sg) {
811 		uint64_t next = be64_to_cpu(sg->next);
812 
813 		kfree(sg);
814 
815 		if (next)
816 			sg = __va(next);
817 		else
818 			sg = NULL;
819 	}
820 }
821 
822 EXPORT_SYMBOL_GPL(opal_poll_events);
823 EXPORT_SYMBOL_GPL(opal_rtc_read);
824 EXPORT_SYMBOL_GPL(opal_rtc_write);
825 EXPORT_SYMBOL_GPL(opal_tpo_read);
826 EXPORT_SYMBOL_GPL(opal_tpo_write);
827