1 /* 2 * PowerNV OPAL high level interfaces 3 * 4 * Copyright 2011 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #define pr_fmt(fmt) "opal: " fmt 13 14 #include <linux/printk.h> 15 #include <linux/types.h> 16 #include <linux/of.h> 17 #include <linux/of_fdt.h> 18 #include <linux/of_platform.h> 19 #include <linux/of_address.h> 20 #include <linux/interrupt.h> 21 #include <linux/notifier.h> 22 #include <linux/slab.h> 23 #include <linux/sched.h> 24 #include <linux/kobject.h> 25 #include <linux/delay.h> 26 #include <linux/memblock.h> 27 #include <linux/kthread.h> 28 #include <linux/freezer.h> 29 #include <linux/kmsg_dump.h> 30 #include <linux/console.h> 31 #include <linux/sched/debug.h> 32 33 #include <asm/machdep.h> 34 #include <asm/opal.h> 35 #include <asm/firmware.h> 36 #include <asm/mce.h> 37 #include <asm/imc-pmu.h> 38 #include <asm/bug.h> 39 40 #include "powernv.h" 41 42 /* /sys/firmware/opal */ 43 struct kobject *opal_kobj; 44 45 struct opal { 46 u64 base; 47 u64 entry; 48 u64 size; 49 } opal; 50 51 struct mcheck_recoverable_range { 52 u64 start_addr; 53 u64 end_addr; 54 u64 recover_addr; 55 }; 56 57 static struct mcheck_recoverable_range *mc_recoverable_range; 58 static int mc_recoverable_range_len; 59 60 struct device_node *opal_node; 61 static DEFINE_SPINLOCK(opal_write_lock); 62 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; 63 static uint32_t opal_heartbeat; 64 static struct task_struct *kopald_tsk; 65 66 void opal_configure_cores(void) 67 { 68 u64 reinit_flags = 0; 69 70 /* Do the actual re-init, This will clobber all FPRs, VRs, etc... 71 * 72 * It will preserve non volatile GPRs and HSPRG0/1. It will 73 * also restore HIDs and other SPRs to their original value 74 * but it might clobber a bunch. 75 */ 76 #ifdef __BIG_ENDIAN__ 77 reinit_flags |= OPAL_REINIT_CPUS_HILE_BE; 78 #else 79 reinit_flags |= OPAL_REINIT_CPUS_HILE_LE; 80 #endif 81 82 /* 83 * POWER9 always support running hash: 84 * ie. Host hash supports hash guests 85 * Host radix supports hash/radix guests 86 */ 87 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) { 88 reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH; 89 if (early_radix_enabled()) 90 reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX; 91 } 92 93 opal_reinit_cpus(reinit_flags); 94 95 /* Restore some bits */ 96 if (cur_cpu_spec->cpu_restore) 97 cur_cpu_spec->cpu_restore(); 98 } 99 100 int __init early_init_dt_scan_opal(unsigned long node, 101 const char *uname, int depth, void *data) 102 { 103 const void *basep, *entryp, *sizep; 104 int basesz, entrysz, runtimesz; 105 106 if (depth != 1 || strcmp(uname, "ibm,opal") != 0) 107 return 0; 108 109 basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz); 110 entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz); 111 sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz); 112 113 if (!basep || !entryp || !sizep) 114 return 1; 115 116 opal.base = of_read_number(basep, basesz/4); 117 opal.entry = of_read_number(entryp, entrysz/4); 118 opal.size = of_read_number(sizep, runtimesz/4); 119 120 pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n", 121 opal.base, basep, basesz); 122 pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n", 123 opal.entry, entryp, entrysz); 124 pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n", 125 opal.size, sizep, runtimesz); 126 127 if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { 128 powerpc_firmware_features |= FW_FEATURE_OPAL; 129 pr_debug("OPAL detected !\n"); 130 } else { 131 panic("OPAL != V3 detected, no longer supported.\n"); 132 } 133 134 return 1; 135 } 136 137 int __init early_init_dt_scan_recoverable_ranges(unsigned long node, 138 const char *uname, int depth, void *data) 139 { 140 int i, psize, size; 141 const __be32 *prop; 142 143 if (depth != 1 || strcmp(uname, "ibm,opal") != 0) 144 return 0; 145 146 prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize); 147 148 if (!prop) 149 return 1; 150 151 pr_debug("Found machine check recoverable ranges.\n"); 152 153 /* 154 * Calculate number of available entries. 155 * 156 * Each recoverable address range entry is (start address, len, 157 * recovery address), 2 cells each for start and recovery address, 158 * 1 cell for len, totalling 5 cells per entry. 159 */ 160 mc_recoverable_range_len = psize / (sizeof(*prop) * 5); 161 162 /* Sanity check */ 163 if (!mc_recoverable_range_len) 164 return 1; 165 166 /* Size required to hold all the entries. */ 167 size = mc_recoverable_range_len * 168 sizeof(struct mcheck_recoverable_range); 169 170 /* 171 * Allocate a buffer to hold the MC recoverable ranges. 172 */ 173 mc_recoverable_range = memblock_alloc(size, __alignof__(u64)); 174 if (!mc_recoverable_range) 175 panic("%s: Failed to allocate %u bytes align=0x%lx\n", 176 __func__, size, __alignof__(u64)); 177 178 for (i = 0; i < mc_recoverable_range_len; i++) { 179 mc_recoverable_range[i].start_addr = 180 of_read_number(prop + (i * 5) + 0, 2); 181 mc_recoverable_range[i].end_addr = 182 mc_recoverable_range[i].start_addr + 183 of_read_number(prop + (i * 5) + 2, 1); 184 mc_recoverable_range[i].recover_addr = 185 of_read_number(prop + (i * 5) + 3, 2); 186 187 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n", 188 mc_recoverable_range[i].start_addr, 189 mc_recoverable_range[i].end_addr, 190 mc_recoverable_range[i].recover_addr); 191 } 192 return 1; 193 } 194 195 static int __init opal_register_exception_handlers(void) 196 { 197 #ifdef __BIG_ENDIAN__ 198 u64 glue; 199 200 if (!(powerpc_firmware_features & FW_FEATURE_OPAL)) 201 return -ENODEV; 202 203 /* Hookup some exception handlers except machine check. We use the 204 * fwnmi area at 0x7000 to provide the glue space to OPAL 205 */ 206 glue = 0x7000; 207 208 /* 209 * Only ancient OPAL firmware requires this. 210 * Specifically, firmware from FW810.00 (released June 2014) 211 * through FW810.20 (Released October 2014). 212 * 213 * Check if we are running on newer (post Oct 2014) firmware that 214 * exports the OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to 215 * patch the HMI interrupt and we catch it directly in Linux. 216 * 217 * For older firmware (i.e < FW810.20), we fallback to old behavior and 218 * let OPAL patch the HMI vector and handle it inside OPAL firmware. 219 * 220 * For newer firmware we catch/handle the HMI directly in Linux. 221 */ 222 if (!opal_check_token(OPAL_HANDLE_HMI)) { 223 pr_info("Old firmware detected, OPAL handles HMIs.\n"); 224 opal_register_exception_handler( 225 OPAL_HYPERVISOR_MAINTENANCE_HANDLER, 226 0, glue); 227 glue += 128; 228 } 229 230 /* 231 * Only applicable to ancient firmware, all modern 232 * (post March 2015/skiboot 5.0) firmware will just return 233 * OPAL_UNSUPPORTED. 234 */ 235 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); 236 #endif 237 238 return 0; 239 } 240 machine_early_initcall(powernv, opal_register_exception_handlers); 241 242 /* 243 * Opal message notifier based on message type. Allow subscribers to get 244 * notified for specific messgae type. 245 */ 246 int opal_message_notifier_register(enum opal_msg_type msg_type, 247 struct notifier_block *nb) 248 { 249 if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) { 250 pr_warn("%s: Invalid arguments, msg_type:%d\n", 251 __func__, msg_type); 252 return -EINVAL; 253 } 254 255 return atomic_notifier_chain_register( 256 &opal_msg_notifier_head[msg_type], nb); 257 } 258 EXPORT_SYMBOL_GPL(opal_message_notifier_register); 259 260 int opal_message_notifier_unregister(enum opal_msg_type msg_type, 261 struct notifier_block *nb) 262 { 263 return atomic_notifier_chain_unregister( 264 &opal_msg_notifier_head[msg_type], nb); 265 } 266 EXPORT_SYMBOL_GPL(opal_message_notifier_unregister); 267 268 static void opal_message_do_notify(uint32_t msg_type, void *msg) 269 { 270 /* notify subscribers */ 271 atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type], 272 msg_type, msg); 273 } 274 275 static void opal_handle_message(void) 276 { 277 s64 ret; 278 /* 279 * TODO: pre-allocate a message buffer depending on opal-msg-size 280 * value in /proc/device-tree. 281 */ 282 static struct opal_msg msg; 283 u32 type; 284 285 ret = opal_get_msg(__pa(&msg), sizeof(msg)); 286 /* No opal message pending. */ 287 if (ret == OPAL_RESOURCE) 288 return; 289 290 /* check for errors. */ 291 if (ret) { 292 pr_warn("%s: Failed to retrieve opal message, err=%lld\n", 293 __func__, ret); 294 return; 295 } 296 297 type = be32_to_cpu(msg.msg_type); 298 299 /* Sanity check */ 300 if (type >= OPAL_MSG_TYPE_MAX) { 301 pr_warn_once("%s: Unknown message type: %u\n", __func__, type); 302 return; 303 } 304 opal_message_do_notify(type, (void *)&msg); 305 } 306 307 static irqreturn_t opal_message_notify(int irq, void *data) 308 { 309 opal_handle_message(); 310 return IRQ_HANDLED; 311 } 312 313 static int __init opal_message_init(void) 314 { 315 int ret, i, irq; 316 317 for (i = 0; i < OPAL_MSG_TYPE_MAX; i++) 318 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]); 319 320 irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING)); 321 if (!irq) { 322 pr_err("%s: Can't register OPAL event irq (%d)\n", 323 __func__, irq); 324 return irq; 325 } 326 327 ret = request_irq(irq, opal_message_notify, 328 IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL); 329 if (ret) { 330 pr_err("%s: Can't request OPAL event irq (%d)\n", 331 __func__, ret); 332 return ret; 333 } 334 335 return 0; 336 } 337 338 int opal_get_chars(uint32_t vtermno, char *buf, int count) 339 { 340 s64 rc; 341 __be64 evt, len; 342 343 if (!opal.entry) 344 return -ENODEV; 345 opal_poll_events(&evt); 346 if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0) 347 return 0; 348 len = cpu_to_be64(count); 349 rc = opal_console_read(vtermno, &len, buf); 350 if (rc == OPAL_SUCCESS) 351 return be64_to_cpu(len); 352 return 0; 353 } 354 355 static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic) 356 { 357 unsigned long flags = 0 /* shut up gcc */; 358 int written; 359 __be64 olen; 360 s64 rc; 361 362 if (!opal.entry) 363 return -ENODEV; 364 365 if (atomic) 366 spin_lock_irqsave(&opal_write_lock, flags); 367 rc = opal_console_write_buffer_space(vtermno, &olen); 368 if (rc || be64_to_cpu(olen) < total_len) { 369 /* Closed -> drop characters */ 370 if (rc) 371 written = total_len; 372 else 373 written = -EAGAIN; 374 goto out; 375 } 376 377 /* Should not get a partial write here because space is available. */ 378 olen = cpu_to_be64(total_len); 379 rc = opal_console_write(vtermno, &olen, data); 380 if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 381 if (rc == OPAL_BUSY_EVENT) 382 opal_poll_events(NULL); 383 written = -EAGAIN; 384 goto out; 385 } 386 387 /* Closed or other error drop */ 388 if (rc != OPAL_SUCCESS) { 389 written = opal_error_code(rc); 390 goto out; 391 } 392 393 written = be64_to_cpu(olen); 394 if (written < total_len) { 395 if (atomic) { 396 /* Should not happen */ 397 pr_warn("atomic console write returned partial " 398 "len=%d written=%d\n", total_len, written); 399 } 400 if (!written) 401 written = -EAGAIN; 402 } 403 404 out: 405 if (atomic) 406 spin_unlock_irqrestore(&opal_write_lock, flags); 407 408 return written; 409 } 410 411 int opal_put_chars(uint32_t vtermno, const char *data, int total_len) 412 { 413 return __opal_put_chars(vtermno, data, total_len, false); 414 } 415 416 /* 417 * opal_put_chars_atomic will not perform partial-writes. Data will be 418 * atomically written to the terminal or not at all. This is not strictly 419 * true at the moment because console space can race with OPAL's console 420 * writes. 421 */ 422 int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len) 423 { 424 return __opal_put_chars(vtermno, data, total_len, true); 425 } 426 427 static s64 __opal_flush_console(uint32_t vtermno) 428 { 429 s64 rc; 430 431 if (!opal_check_token(OPAL_CONSOLE_FLUSH)) { 432 __be64 evt; 433 434 /* 435 * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, 436 * the console can still be flushed by calling the polling 437 * function while it has OPAL_EVENT_CONSOLE_OUTPUT events. 438 */ 439 WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n"); 440 441 opal_poll_events(&evt); 442 if (!(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT)) 443 return OPAL_SUCCESS; 444 return OPAL_BUSY; 445 446 } else { 447 rc = opal_console_flush(vtermno); 448 if (rc == OPAL_BUSY_EVENT) { 449 opal_poll_events(NULL); 450 rc = OPAL_BUSY; 451 } 452 return rc; 453 } 454 455 } 456 457 /* 458 * opal_flush_console spins until the console is flushed 459 */ 460 int opal_flush_console(uint32_t vtermno) 461 { 462 for (;;) { 463 s64 rc = __opal_flush_console(vtermno); 464 465 if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) { 466 mdelay(1); 467 continue; 468 } 469 470 return opal_error_code(rc); 471 } 472 } 473 474 /* 475 * opal_flush_chars is an hvc interface that sleeps until the console is 476 * flushed if wait, otherwise it will return -EBUSY if the console has data, 477 * -EAGAIN if it has data and some of it was flushed. 478 */ 479 int opal_flush_chars(uint32_t vtermno, bool wait) 480 { 481 for (;;) { 482 s64 rc = __opal_flush_console(vtermno); 483 484 if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) { 485 if (wait) { 486 msleep(OPAL_BUSY_DELAY_MS); 487 continue; 488 } 489 if (rc == OPAL_PARTIAL) 490 return -EAGAIN; 491 } 492 493 return opal_error_code(rc); 494 } 495 } 496 497 static int opal_recover_mce(struct pt_regs *regs, 498 struct machine_check_event *evt) 499 { 500 int recovered = 0; 501 502 if (!(regs->msr & MSR_RI)) { 503 /* If MSR_RI isn't set, we cannot recover */ 504 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); 505 recovered = 0; 506 } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { 507 /* Platform corrected itself */ 508 recovered = 1; 509 } else if (evt->severity == MCE_SEV_FATAL) { 510 /* Fatal machine check */ 511 pr_err("Machine check interrupt is fatal\n"); 512 recovered = 0; 513 } 514 515 if (!recovered && evt->sync_error) { 516 /* 517 * Try to kill processes if we get a synchronous machine check 518 * (e.g., one caused by execution of this instruction). This 519 * will devolve into a panic if we try to kill init or are in 520 * an interrupt etc. 521 * 522 * TODO: Queue up this address for hwpoisioning later. 523 * TODO: This is not quite right for d-side machine 524 * checks ->nip is not necessarily the important 525 * address. 526 */ 527 if ((user_mode(regs))) { 528 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 529 recovered = 1; 530 } else if (die_will_crash()) { 531 /* 532 * die() would kill the kernel, so better to go via 533 * the platform reboot code that will log the 534 * machine check. 535 */ 536 recovered = 0; 537 } else { 538 die("Machine check", regs, SIGBUS); 539 recovered = 1; 540 } 541 } 542 543 return recovered; 544 } 545 546 void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) 547 { 548 panic_flush_kmsg_start(); 549 550 pr_emerg("Hardware platform error: %s\n", msg); 551 if (regs) 552 show_regs(regs); 553 smp_send_stop(); 554 555 panic_flush_kmsg_end(); 556 557 /* 558 * Don't bother to shut things down because this will 559 * xstop the system. 560 */ 561 if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg) 562 == OPAL_UNSUPPORTED) { 563 pr_emerg("Reboot type %d not supported for %s\n", 564 OPAL_REBOOT_PLATFORM_ERROR, msg); 565 } 566 567 /* 568 * We reached here. There can be three possibilities: 569 * 1. We are running on a firmware level that do not support 570 * opal_cec_reboot2() 571 * 2. We are running on a firmware level that do not support 572 * OPAL_REBOOT_PLATFORM_ERROR reboot type. 573 * 3. We are running on FSP based system that does not need 574 * opal to trigger checkstop explicitly for error analysis. 575 * The FSP PRD component would have already got notified 576 * about this error through other channels. 577 * 4. We are running on a newer skiboot that by default does 578 * not cause a checkstop, drops us back to the kernel to 579 * extract context and state at the time of the error. 580 */ 581 582 panic(msg); 583 } 584 585 int opal_machine_check(struct pt_regs *regs) 586 { 587 struct machine_check_event evt; 588 589 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 590 return 0; 591 592 /* Print things out */ 593 if (evt.version != MCE_V1) { 594 pr_err("Machine Check Exception, Unknown event version %d !\n", 595 evt.version); 596 return 0; 597 } 598 machine_check_print_event_info(&evt, user_mode(regs), false); 599 600 if (opal_recover_mce(regs, &evt)) 601 return 1; 602 603 pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception"); 604 } 605 606 /* Early hmi handler called in real mode. */ 607 int opal_hmi_exception_early(struct pt_regs *regs) 608 { 609 s64 rc; 610 611 /* 612 * call opal hmi handler. Pass paca address as token. 613 * The return value OPAL_SUCCESS is an indication that there is 614 * an HMI event generated waiting to pull by Linux. 615 */ 616 rc = opal_handle_hmi(); 617 if (rc == OPAL_SUCCESS) { 618 local_paca->hmi_event_available = 1; 619 return 1; 620 } 621 return 0; 622 } 623 624 int opal_hmi_exception_early2(struct pt_regs *regs) 625 { 626 s64 rc; 627 __be64 out_flags; 628 629 /* 630 * call opal hmi handler. 631 * Check 64-bit flag mask to find out if an event was generated, 632 * and whether TB is still valid or not etc. 633 */ 634 rc = opal_handle_hmi2(&out_flags); 635 if (rc != OPAL_SUCCESS) 636 return 0; 637 638 if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT) 639 local_paca->hmi_event_available = 1; 640 if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL) 641 tb_invalid = true; 642 return 1; 643 } 644 645 /* HMI exception handler called in virtual mode during check_irq_replay. */ 646 int opal_handle_hmi_exception(struct pt_regs *regs) 647 { 648 /* 649 * Check if HMI event is available. 650 * if Yes, then wake kopald to process them. 651 */ 652 if (!local_paca->hmi_event_available) 653 return 0; 654 655 local_paca->hmi_event_available = 0; 656 opal_wake_poller(); 657 658 return 1; 659 } 660 661 static uint64_t find_recovery_address(uint64_t nip) 662 { 663 int i; 664 665 for (i = 0; i < mc_recoverable_range_len; i++) 666 if ((nip >= mc_recoverable_range[i].start_addr) && 667 (nip < mc_recoverable_range[i].end_addr)) 668 return mc_recoverable_range[i].recover_addr; 669 return 0; 670 } 671 672 bool opal_mce_check_early_recovery(struct pt_regs *regs) 673 { 674 uint64_t recover_addr = 0; 675 676 if (!opal.base || !opal.size) 677 goto out; 678 679 if ((regs->nip >= opal.base) && 680 (regs->nip < (opal.base + opal.size))) 681 recover_addr = find_recovery_address(regs->nip); 682 683 /* 684 * Setup regs->nip to rfi into fixup address. 685 */ 686 if (recover_addr) 687 regs->nip = recover_addr; 688 689 out: 690 return !!recover_addr; 691 } 692 693 static int opal_sysfs_init(void) 694 { 695 opal_kobj = kobject_create_and_add("opal", firmware_kobj); 696 if (!opal_kobj) { 697 pr_warn("kobject_create_and_add opal failed\n"); 698 return -ENOMEM; 699 } 700 701 return 0; 702 } 703 704 static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj, 705 struct bin_attribute *bin_attr, 706 char *buf, loff_t off, size_t count) 707 { 708 return memory_read_from_buffer(buf, count, &off, bin_attr->private, 709 bin_attr->size); 710 } 711 712 static BIN_ATTR_RO(symbol_map, 0); 713 714 static void opal_export_symmap(void) 715 { 716 const __be64 *syms; 717 unsigned int size; 718 struct device_node *fw; 719 int rc; 720 721 fw = of_find_node_by_path("/ibm,opal/firmware"); 722 if (!fw) 723 return; 724 syms = of_get_property(fw, "symbol-map", &size); 725 if (!syms || size != 2 * sizeof(__be64)) 726 return; 727 728 /* Setup attributes */ 729 bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0])); 730 bin_attr_symbol_map.size = be64_to_cpu(syms[1]); 731 732 rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map); 733 if (rc) 734 pr_warn("Error %d creating OPAL symbols file\n", rc); 735 } 736 737 static ssize_t export_attr_read(struct file *fp, struct kobject *kobj, 738 struct bin_attribute *bin_attr, char *buf, 739 loff_t off, size_t count) 740 { 741 return memory_read_from_buffer(buf, count, &off, bin_attr->private, 742 bin_attr->size); 743 } 744 745 /* 746 * opal_export_attrs: creates a sysfs node for each property listed in 747 * the device-tree under /ibm,opal/firmware/exports/ 748 * All new sysfs nodes are created under /opal/exports/. 749 * This allows for reserved memory regions (e.g. HDAT) to be read. 750 * The new sysfs nodes are only readable by root. 751 */ 752 static void opal_export_attrs(void) 753 { 754 struct bin_attribute *attr; 755 struct device_node *np; 756 struct property *prop; 757 struct kobject *kobj; 758 u64 vals[2]; 759 int rc; 760 761 np = of_find_node_by_path("/ibm,opal/firmware/exports"); 762 if (!np) 763 return; 764 765 /* Create new 'exports' directory - /sys/firmware/opal/exports */ 766 kobj = kobject_create_and_add("exports", opal_kobj); 767 if (!kobj) { 768 pr_warn("kobject_create_and_add() of exports failed\n"); 769 return; 770 } 771 772 for_each_property_of_node(np, prop) { 773 if (!strcmp(prop->name, "name") || !strcmp(prop->name, "phandle")) 774 continue; 775 776 if (of_property_read_u64_array(np, prop->name, &vals[0], 2)) 777 continue; 778 779 attr = kzalloc(sizeof(*attr), GFP_KERNEL); 780 781 if (attr == NULL) { 782 pr_warn("Failed kmalloc for bin_attribute!"); 783 continue; 784 } 785 786 sysfs_bin_attr_init(attr); 787 attr->attr.name = kstrdup(prop->name, GFP_KERNEL); 788 attr->attr.mode = 0400; 789 attr->read = export_attr_read; 790 attr->private = __va(vals[0]); 791 attr->size = vals[1]; 792 793 if (attr->attr.name == NULL) { 794 pr_warn("Failed kstrdup for bin_attribute attr.name"); 795 kfree(attr); 796 continue; 797 } 798 799 rc = sysfs_create_bin_file(kobj, attr); 800 if (rc) { 801 pr_warn("Error %d creating OPAL sysfs exports/%s file\n", 802 rc, prop->name); 803 kfree(attr->attr.name); 804 kfree(attr); 805 } 806 } 807 808 of_node_put(np); 809 } 810 811 static void __init opal_dump_region_init(void) 812 { 813 void *addr; 814 uint64_t size; 815 int rc; 816 817 if (!opal_check_token(OPAL_REGISTER_DUMP_REGION)) 818 return; 819 820 /* Register kernel log buffer */ 821 addr = log_buf_addr_get(); 822 if (addr == NULL) 823 return; 824 825 size = log_buf_len_get(); 826 if (size == 0) 827 return; 828 829 rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF, 830 __pa(addr), size); 831 /* Don't warn if this is just an older OPAL that doesn't 832 * know about that call 833 */ 834 if (rc && rc != OPAL_UNSUPPORTED) 835 pr_warn("DUMP: Failed to register kernel log buffer. " 836 "rc = %d\n", rc); 837 } 838 839 static void opal_pdev_init(const char *compatible) 840 { 841 struct device_node *np; 842 843 for_each_compatible_node(np, NULL, compatible) 844 of_platform_device_create(np, NULL, NULL); 845 } 846 847 static void __init opal_imc_init_dev(void) 848 { 849 struct device_node *np; 850 851 np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT); 852 if (np) 853 of_platform_device_create(np, NULL, NULL); 854 } 855 856 static int kopald(void *unused) 857 { 858 unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1; 859 860 set_freezable(); 861 do { 862 try_to_freeze(); 863 864 opal_handle_events(); 865 866 set_current_state(TASK_INTERRUPTIBLE); 867 if (opal_have_pending_events()) 868 __set_current_state(TASK_RUNNING); 869 else 870 schedule_timeout(timeout); 871 872 } while (!kthread_should_stop()); 873 874 return 0; 875 } 876 877 void opal_wake_poller(void) 878 { 879 if (kopald_tsk) 880 wake_up_process(kopald_tsk); 881 } 882 883 static void opal_init_heartbeat(void) 884 { 885 /* Old firwmware, we assume the HVC heartbeat is sufficient */ 886 if (of_property_read_u32(opal_node, "ibm,heartbeat-ms", 887 &opal_heartbeat) != 0) 888 opal_heartbeat = 0; 889 890 if (opal_heartbeat) 891 kopald_tsk = kthread_run(kopald, NULL, "kopald"); 892 } 893 894 static int __init opal_init(void) 895 { 896 struct device_node *np, *consoles, *leds; 897 int rc; 898 899 opal_node = of_find_node_by_path("/ibm,opal"); 900 if (!opal_node) { 901 pr_warn("Device node not found\n"); 902 return -ENODEV; 903 } 904 905 /* Register OPAL consoles if any ports */ 906 consoles = of_find_node_by_path("/ibm,opal/consoles"); 907 if (consoles) { 908 for_each_child_of_node(consoles, np) { 909 if (!of_node_name_eq(np, "serial")) 910 continue; 911 of_platform_device_create(np, NULL, NULL); 912 } 913 of_node_put(consoles); 914 } 915 916 /* Initialise OPAL messaging system */ 917 opal_message_init(); 918 919 /* Initialise OPAL asynchronous completion interface */ 920 opal_async_comp_init(); 921 922 /* Initialise OPAL sensor interface */ 923 opal_sensor_init(); 924 925 /* Initialise OPAL hypervisor maintainence interrupt handling */ 926 opal_hmi_handler_init(); 927 928 /* Create i2c platform devices */ 929 opal_pdev_init("ibm,opal-i2c"); 930 931 /* Handle non-volatile memory devices */ 932 opal_pdev_init("pmem-region"); 933 934 /* Setup a heatbeat thread if requested by OPAL */ 935 opal_init_heartbeat(); 936 937 /* Detect In-Memory Collection counters and create devices*/ 938 opal_imc_init_dev(); 939 940 /* Create leds platform devices */ 941 leds = of_find_node_by_path("/ibm,opal/leds"); 942 if (leds) { 943 of_platform_device_create(leds, "opal_leds", NULL); 944 of_node_put(leds); 945 } 946 947 /* Initialise OPAL message log interface */ 948 opal_msglog_init(); 949 950 /* Create "opal" kobject under /sys/firmware */ 951 rc = opal_sysfs_init(); 952 if (rc == 0) { 953 /* Export symbol map to userspace */ 954 opal_export_symmap(); 955 /* Setup dump region interface */ 956 opal_dump_region_init(); 957 /* Setup error log interface */ 958 rc = opal_elog_init(); 959 /* Setup code update interface */ 960 opal_flash_update_init(); 961 /* Setup platform dump extract interface */ 962 opal_platform_dump_init(); 963 /* Setup system parameters interface */ 964 opal_sys_param_init(); 965 /* Setup message log sysfs interface. */ 966 opal_msglog_sysfs_init(); 967 } 968 969 /* Export all properties */ 970 opal_export_attrs(); 971 972 /* Initialize platform devices: IPMI backend, PRD & flash interface */ 973 opal_pdev_init("ibm,opal-ipmi"); 974 opal_pdev_init("ibm,opal-flash"); 975 opal_pdev_init("ibm,opal-prd"); 976 977 /* Initialise platform device: oppanel interface */ 978 opal_pdev_init("ibm,opal-oppanel"); 979 980 /* Initialise OPAL kmsg dumper for flushing console on panic */ 981 opal_kmsg_init(); 982 983 /* Initialise OPAL powercap interface */ 984 opal_powercap_init(); 985 986 /* Initialise OPAL Power-Shifting-Ratio interface */ 987 opal_psr_init(); 988 989 /* Initialise OPAL sensor groups */ 990 opal_sensor_groups_init(); 991 992 /* Initialise OPAL Power control interface */ 993 opal_power_control_init(); 994 995 return 0; 996 } 997 machine_subsys_initcall(powernv, opal_init); 998 999 void opal_shutdown(void) 1000 { 1001 long rc = OPAL_BUSY; 1002 1003 opal_event_shutdown(); 1004 1005 /* 1006 * Then sync with OPAL which ensure anything that can 1007 * potentially write to our memory has completed such 1008 * as an ongoing dump retrieval 1009 */ 1010 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 1011 rc = opal_sync_host_reboot(); 1012 if (rc == OPAL_BUSY) 1013 opal_poll_events(NULL); 1014 else 1015 mdelay(10); 1016 } 1017 1018 /* Unregister memory dump region */ 1019 if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION)) 1020 opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF); 1021 } 1022 1023 /* Export this so that test modules can use it */ 1024 EXPORT_SYMBOL_GPL(opal_invalid_call); 1025 EXPORT_SYMBOL_GPL(opal_xscom_read); 1026 EXPORT_SYMBOL_GPL(opal_xscom_write); 1027 EXPORT_SYMBOL_GPL(opal_ipmi_send); 1028 EXPORT_SYMBOL_GPL(opal_ipmi_recv); 1029 EXPORT_SYMBOL_GPL(opal_flash_read); 1030 EXPORT_SYMBOL_GPL(opal_flash_write); 1031 EXPORT_SYMBOL_GPL(opal_flash_erase); 1032 EXPORT_SYMBOL_GPL(opal_prd_msg); 1033 EXPORT_SYMBOL_GPL(opal_check_token); 1034 1035 /* Convert a region of vmalloc memory to an opal sg list */ 1036 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, 1037 unsigned long vmalloc_size) 1038 { 1039 struct opal_sg_list *sg, *first = NULL; 1040 unsigned long i = 0; 1041 1042 sg = kzalloc(PAGE_SIZE, GFP_KERNEL); 1043 if (!sg) 1044 goto nomem; 1045 1046 first = sg; 1047 1048 while (vmalloc_size > 0) { 1049 uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT; 1050 uint64_t length = min(vmalloc_size, PAGE_SIZE); 1051 1052 sg->entry[i].data = cpu_to_be64(data); 1053 sg->entry[i].length = cpu_to_be64(length); 1054 i++; 1055 1056 if (i >= SG_ENTRIES_PER_NODE) { 1057 struct opal_sg_list *next; 1058 1059 next = kzalloc(PAGE_SIZE, GFP_KERNEL); 1060 if (!next) 1061 goto nomem; 1062 1063 sg->length = cpu_to_be64( 1064 i * sizeof(struct opal_sg_entry) + 16); 1065 i = 0; 1066 sg->next = cpu_to_be64(__pa(next)); 1067 sg = next; 1068 } 1069 1070 vmalloc_addr += length; 1071 vmalloc_size -= length; 1072 } 1073 1074 sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16); 1075 1076 return first; 1077 1078 nomem: 1079 pr_err("%s : Failed to allocate memory\n", __func__); 1080 opal_free_sg_list(first); 1081 return NULL; 1082 } 1083 1084 void opal_free_sg_list(struct opal_sg_list *sg) 1085 { 1086 while (sg) { 1087 uint64_t next = be64_to_cpu(sg->next); 1088 1089 kfree(sg); 1090 1091 if (next) 1092 sg = __va(next); 1093 else 1094 sg = NULL; 1095 } 1096 } 1097 1098 int opal_error_code(int rc) 1099 { 1100 switch (rc) { 1101 case OPAL_SUCCESS: return 0; 1102 1103 case OPAL_PARAMETER: return -EINVAL; 1104 case OPAL_ASYNC_COMPLETION: return -EINPROGRESS; 1105 case OPAL_BUSY: 1106 case OPAL_BUSY_EVENT: return -EBUSY; 1107 case OPAL_NO_MEM: return -ENOMEM; 1108 case OPAL_PERMISSION: return -EPERM; 1109 1110 case OPAL_UNSUPPORTED: return -EIO; 1111 case OPAL_HARDWARE: return -EIO; 1112 case OPAL_INTERNAL_ERROR: return -EIO; 1113 case OPAL_TIMEOUT: return -ETIMEDOUT; 1114 default: 1115 pr_err("%s: unexpected OPAL error %d\n", __func__, rc); 1116 return -EIO; 1117 } 1118 } 1119 1120 void powernv_set_nmmu_ptcr(unsigned long ptcr) 1121 { 1122 int rc; 1123 1124 if (firmware_has_feature(FW_FEATURE_OPAL)) { 1125 rc = opal_nmmu_set_ptcr(-1UL, ptcr); 1126 if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) 1127 pr_warn("%s: Unable to set nest mmu ptcr\n", __func__); 1128 } 1129 } 1130 1131 EXPORT_SYMBOL_GPL(opal_poll_events); 1132 EXPORT_SYMBOL_GPL(opal_rtc_read); 1133 EXPORT_SYMBOL_GPL(opal_rtc_write); 1134 EXPORT_SYMBOL_GPL(opal_tpo_read); 1135 EXPORT_SYMBOL_GPL(opal_tpo_write); 1136 EXPORT_SYMBOL_GPL(opal_i2c_request); 1137 /* Export these symbols for PowerNV LED class driver */ 1138 EXPORT_SYMBOL_GPL(opal_leds_get_ind); 1139 EXPORT_SYMBOL_GPL(opal_leds_set_ind); 1140 /* Export this symbol for PowerNV Operator Panel class driver */ 1141 EXPORT_SYMBOL_GPL(opal_write_oppanel_async); 1142 /* Export this for KVM */ 1143 EXPORT_SYMBOL_GPL(opal_int_set_mfrr); 1144 EXPORT_SYMBOL_GPL(opal_int_eoi); 1145 EXPORT_SYMBOL_GPL(opal_error_code); 1146 /* Export the below symbol for NX compression */ 1147 EXPORT_SYMBOL(opal_nx_coproc_init); 1148