1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * PowerNV OPAL high level interfaces 4 * 5 * Copyright 2011 IBM Corp. 6 */ 7 8 #define pr_fmt(fmt) "opal: " fmt 9 10 #include <linux/printk.h> 11 #include <linux/types.h> 12 #include <linux/of.h> 13 #include <linux/of_fdt.h> 14 #include <linux/of_platform.h> 15 #include <linux/of_address.h> 16 #include <linux/interrupt.h> 17 #include <linux/notifier.h> 18 #include <linux/slab.h> 19 #include <linux/sched.h> 20 #include <linux/kobject.h> 21 #include <linux/delay.h> 22 #include <linux/memblock.h> 23 #include <linux/kthread.h> 24 #include <linux/freezer.h> 25 #include <linux/kmsg_dump.h> 26 #include <linux/console.h> 27 #include <linux/sched/debug.h> 28 29 #include <asm/machdep.h> 30 #include <asm/opal.h> 31 #include <asm/firmware.h> 32 #include <asm/mce.h> 33 #include <asm/imc-pmu.h> 34 #include <asm/bug.h> 35 36 #include "powernv.h" 37 38 /* /sys/firmware/opal */ 39 struct kobject *opal_kobj; 40 41 struct opal { 42 u64 base; 43 u64 entry; 44 u64 size; 45 } opal; 46 47 struct mcheck_recoverable_range { 48 u64 start_addr; 49 u64 end_addr; 50 u64 recover_addr; 51 }; 52 53 static struct mcheck_recoverable_range *mc_recoverable_range; 54 static int mc_recoverable_range_len; 55 56 struct device_node *opal_node; 57 static DEFINE_SPINLOCK(opal_write_lock); 58 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; 59 static uint32_t opal_heartbeat; 60 static struct task_struct *kopald_tsk; 61 static struct opal_msg *opal_msg; 62 static u32 opal_msg_size __ro_after_init; 63 64 void opal_configure_cores(void) 65 { 66 u64 reinit_flags = 0; 67 68 /* Do the actual re-init, This will clobber all FPRs, VRs, etc... 69 * 70 * It will preserve non volatile GPRs and HSPRG0/1. It will 71 * also restore HIDs and other SPRs to their original value 72 * but it might clobber a bunch. 73 */ 74 #ifdef __BIG_ENDIAN__ 75 reinit_flags |= OPAL_REINIT_CPUS_HILE_BE; 76 #else 77 reinit_flags |= OPAL_REINIT_CPUS_HILE_LE; 78 #endif 79 80 /* 81 * POWER9 always support running hash: 82 * ie. Host hash supports hash guests 83 * Host radix supports hash/radix guests 84 */ 85 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) { 86 reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH; 87 if (early_radix_enabled()) 88 reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX; 89 } 90 91 opal_reinit_cpus(reinit_flags); 92 93 /* Restore some bits */ 94 if (cur_cpu_spec->cpu_restore) 95 cur_cpu_spec->cpu_restore(); 96 } 97 98 int __init early_init_dt_scan_opal(unsigned long node, 99 const char *uname, int depth, void *data) 100 { 101 const void *basep, *entryp, *sizep; 102 int basesz, entrysz, runtimesz; 103 104 if (depth != 1 || strcmp(uname, "ibm,opal") != 0) 105 return 0; 106 107 basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz); 108 entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz); 109 sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz); 110 111 if (!basep || !entryp || !sizep) 112 return 1; 113 114 opal.base = of_read_number(basep, basesz/4); 115 opal.entry = of_read_number(entryp, entrysz/4); 116 opal.size = of_read_number(sizep, runtimesz/4); 117 118 pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n", 119 opal.base, basep, basesz); 120 pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n", 121 opal.entry, entryp, entrysz); 122 pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n", 123 opal.size, sizep, runtimesz); 124 125 if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { 126 powerpc_firmware_features |= FW_FEATURE_OPAL; 127 pr_debug("OPAL detected !\n"); 128 } else { 129 panic("OPAL != V3 detected, no longer supported.\n"); 130 } 131 132 return 1; 133 } 134 135 int __init early_init_dt_scan_recoverable_ranges(unsigned long node, 136 const char *uname, int depth, void *data) 137 { 138 int i, psize, size; 139 const __be32 *prop; 140 141 if (depth != 1 || strcmp(uname, "ibm,opal") != 0) 142 return 0; 143 144 prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize); 145 146 if (!prop) 147 return 1; 148 149 pr_debug("Found machine check recoverable ranges.\n"); 150 151 /* 152 * Calculate number of available entries. 153 * 154 * Each recoverable address range entry is (start address, len, 155 * recovery address), 2 cells each for start and recovery address, 156 * 1 cell for len, totalling 5 cells per entry. 157 */ 158 mc_recoverable_range_len = psize / (sizeof(*prop) * 5); 159 160 /* Sanity check */ 161 if (!mc_recoverable_range_len) 162 return 1; 163 164 /* Size required to hold all the entries. */ 165 size = mc_recoverable_range_len * 166 sizeof(struct mcheck_recoverable_range); 167 168 /* 169 * Allocate a buffer to hold the MC recoverable ranges. 170 */ 171 mc_recoverable_range = memblock_alloc(size, __alignof__(u64)); 172 if (!mc_recoverable_range) 173 panic("%s: Failed to allocate %u bytes align=0x%lx\n", 174 __func__, size, __alignof__(u64)); 175 176 for (i = 0; i < mc_recoverable_range_len; i++) { 177 mc_recoverable_range[i].start_addr = 178 of_read_number(prop + (i * 5) + 0, 2); 179 mc_recoverable_range[i].end_addr = 180 mc_recoverable_range[i].start_addr + 181 of_read_number(prop + (i * 5) + 2, 1); 182 mc_recoverable_range[i].recover_addr = 183 of_read_number(prop + (i * 5) + 3, 2); 184 185 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n", 186 mc_recoverable_range[i].start_addr, 187 mc_recoverable_range[i].end_addr, 188 mc_recoverable_range[i].recover_addr); 189 } 190 return 1; 191 } 192 193 static int __init opal_register_exception_handlers(void) 194 { 195 #ifdef __BIG_ENDIAN__ 196 u64 glue; 197 198 if (!(powerpc_firmware_features & FW_FEATURE_OPAL)) 199 return -ENODEV; 200 201 /* Hookup some exception handlers except machine check. We use the 202 * fwnmi area at 0x7000 to provide the glue space to OPAL 203 */ 204 glue = 0x7000; 205 206 /* 207 * Only ancient OPAL firmware requires this. 208 * Specifically, firmware from FW810.00 (released June 2014) 209 * through FW810.20 (Released October 2014). 210 * 211 * Check if we are running on newer (post Oct 2014) firmware that 212 * exports the OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to 213 * patch the HMI interrupt and we catch it directly in Linux. 214 * 215 * For older firmware (i.e < FW810.20), we fallback to old behavior and 216 * let OPAL patch the HMI vector and handle it inside OPAL firmware. 217 * 218 * For newer firmware we catch/handle the HMI directly in Linux. 219 */ 220 if (!opal_check_token(OPAL_HANDLE_HMI)) { 221 pr_info("Old firmware detected, OPAL handles HMIs.\n"); 222 opal_register_exception_handler( 223 OPAL_HYPERVISOR_MAINTENANCE_HANDLER, 224 0, glue); 225 glue += 128; 226 } 227 228 /* 229 * Only applicable to ancient firmware, all modern 230 * (post March 2015/skiboot 5.0) firmware will just return 231 * OPAL_UNSUPPORTED. 232 */ 233 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); 234 #endif 235 236 return 0; 237 } 238 machine_early_initcall(powernv, opal_register_exception_handlers); 239 240 /* 241 * Opal message notifier based on message type. Allow subscribers to get 242 * notified for specific messgae type. 243 */ 244 int opal_message_notifier_register(enum opal_msg_type msg_type, 245 struct notifier_block *nb) 246 { 247 if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) { 248 pr_warn("%s: Invalid arguments, msg_type:%d\n", 249 __func__, msg_type); 250 return -EINVAL; 251 } 252 253 return atomic_notifier_chain_register( 254 &opal_msg_notifier_head[msg_type], nb); 255 } 256 EXPORT_SYMBOL_GPL(opal_message_notifier_register); 257 258 int opal_message_notifier_unregister(enum opal_msg_type msg_type, 259 struct notifier_block *nb) 260 { 261 return atomic_notifier_chain_unregister( 262 &opal_msg_notifier_head[msg_type], nb); 263 } 264 EXPORT_SYMBOL_GPL(opal_message_notifier_unregister); 265 266 static void opal_message_do_notify(uint32_t msg_type, void *msg) 267 { 268 /* notify subscribers */ 269 atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type], 270 msg_type, msg); 271 } 272 273 static void opal_handle_message(void) 274 { 275 s64 ret; 276 u32 type; 277 278 ret = opal_get_msg(__pa(opal_msg), opal_msg_size); 279 /* No opal message pending. */ 280 if (ret == OPAL_RESOURCE) 281 return; 282 283 /* check for errors. */ 284 if (ret) { 285 pr_warn("%s: Failed to retrieve opal message, err=%lld\n", 286 __func__, ret); 287 return; 288 } 289 290 type = be32_to_cpu(opal_msg->msg_type); 291 292 /* Sanity check */ 293 if (type >= OPAL_MSG_TYPE_MAX) { 294 pr_warn_once("%s: Unknown message type: %u\n", __func__, type); 295 return; 296 } 297 opal_message_do_notify(type, (void *)opal_msg); 298 } 299 300 static irqreturn_t opal_message_notify(int irq, void *data) 301 { 302 opal_handle_message(); 303 return IRQ_HANDLED; 304 } 305 306 static int __init opal_message_init(struct device_node *opal_node) 307 { 308 int ret, i, irq; 309 310 ret = of_property_read_u32(opal_node, "opal-msg-size", &opal_msg_size); 311 if (ret) { 312 pr_notice("Failed to read opal-msg-size property\n"); 313 opal_msg_size = sizeof(struct opal_msg); 314 } 315 316 opal_msg = kmalloc(opal_msg_size, GFP_KERNEL); 317 if (!opal_msg) { 318 opal_msg_size = sizeof(struct opal_msg); 319 /* Try to allocate fixed message size */ 320 opal_msg = kmalloc(opal_msg_size, GFP_KERNEL); 321 BUG_ON(opal_msg == NULL); 322 } 323 324 for (i = 0; i < OPAL_MSG_TYPE_MAX; i++) 325 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]); 326 327 irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING)); 328 if (!irq) { 329 pr_err("%s: Can't register OPAL event irq (%d)\n", 330 __func__, irq); 331 return irq; 332 } 333 334 ret = request_irq(irq, opal_message_notify, 335 IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL); 336 if (ret) { 337 pr_err("%s: Can't request OPAL event irq (%d)\n", 338 __func__, ret); 339 return ret; 340 } 341 342 return 0; 343 } 344 345 int opal_get_chars(uint32_t vtermno, char *buf, int count) 346 { 347 s64 rc; 348 __be64 evt, len; 349 350 if (!opal.entry) 351 return -ENODEV; 352 opal_poll_events(&evt); 353 if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0) 354 return 0; 355 len = cpu_to_be64(count); 356 rc = opal_console_read(vtermno, &len, buf); 357 if (rc == OPAL_SUCCESS) 358 return be64_to_cpu(len); 359 return 0; 360 } 361 362 static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic) 363 { 364 unsigned long flags = 0 /* shut up gcc */; 365 int written; 366 __be64 olen; 367 s64 rc; 368 369 if (!opal.entry) 370 return -ENODEV; 371 372 if (atomic) 373 spin_lock_irqsave(&opal_write_lock, flags); 374 rc = opal_console_write_buffer_space(vtermno, &olen); 375 if (rc || be64_to_cpu(olen) < total_len) { 376 /* Closed -> drop characters */ 377 if (rc) 378 written = total_len; 379 else 380 written = -EAGAIN; 381 goto out; 382 } 383 384 /* Should not get a partial write here because space is available. */ 385 olen = cpu_to_be64(total_len); 386 rc = opal_console_write(vtermno, &olen, data); 387 if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 388 if (rc == OPAL_BUSY_EVENT) 389 opal_poll_events(NULL); 390 written = -EAGAIN; 391 goto out; 392 } 393 394 /* Closed or other error drop */ 395 if (rc != OPAL_SUCCESS) { 396 written = opal_error_code(rc); 397 goto out; 398 } 399 400 written = be64_to_cpu(olen); 401 if (written < total_len) { 402 if (atomic) { 403 /* Should not happen */ 404 pr_warn("atomic console write returned partial " 405 "len=%d written=%d\n", total_len, written); 406 } 407 if (!written) 408 written = -EAGAIN; 409 } 410 411 out: 412 if (atomic) 413 spin_unlock_irqrestore(&opal_write_lock, flags); 414 415 return written; 416 } 417 418 int opal_put_chars(uint32_t vtermno, const char *data, int total_len) 419 { 420 return __opal_put_chars(vtermno, data, total_len, false); 421 } 422 423 /* 424 * opal_put_chars_atomic will not perform partial-writes. Data will be 425 * atomically written to the terminal or not at all. This is not strictly 426 * true at the moment because console space can race with OPAL's console 427 * writes. 428 */ 429 int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len) 430 { 431 return __opal_put_chars(vtermno, data, total_len, true); 432 } 433 434 static s64 __opal_flush_console(uint32_t vtermno) 435 { 436 s64 rc; 437 438 if (!opal_check_token(OPAL_CONSOLE_FLUSH)) { 439 __be64 evt; 440 441 /* 442 * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, 443 * the console can still be flushed by calling the polling 444 * function while it has OPAL_EVENT_CONSOLE_OUTPUT events. 445 */ 446 WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n"); 447 448 opal_poll_events(&evt); 449 if (!(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT)) 450 return OPAL_SUCCESS; 451 return OPAL_BUSY; 452 453 } else { 454 rc = opal_console_flush(vtermno); 455 if (rc == OPAL_BUSY_EVENT) { 456 opal_poll_events(NULL); 457 rc = OPAL_BUSY; 458 } 459 return rc; 460 } 461 462 } 463 464 /* 465 * opal_flush_console spins until the console is flushed 466 */ 467 int opal_flush_console(uint32_t vtermno) 468 { 469 for (;;) { 470 s64 rc = __opal_flush_console(vtermno); 471 472 if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) { 473 mdelay(1); 474 continue; 475 } 476 477 return opal_error_code(rc); 478 } 479 } 480 481 /* 482 * opal_flush_chars is an hvc interface that sleeps until the console is 483 * flushed if wait, otherwise it will return -EBUSY if the console has data, 484 * -EAGAIN if it has data and some of it was flushed. 485 */ 486 int opal_flush_chars(uint32_t vtermno, bool wait) 487 { 488 for (;;) { 489 s64 rc = __opal_flush_console(vtermno); 490 491 if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) { 492 if (wait) { 493 msleep(OPAL_BUSY_DELAY_MS); 494 continue; 495 } 496 if (rc == OPAL_PARTIAL) 497 return -EAGAIN; 498 } 499 500 return opal_error_code(rc); 501 } 502 } 503 504 static int opal_recover_mce(struct pt_regs *regs, 505 struct machine_check_event *evt) 506 { 507 int recovered = 0; 508 509 if (!(regs->msr & MSR_RI)) { 510 /* If MSR_RI isn't set, we cannot recover */ 511 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); 512 recovered = 0; 513 } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { 514 /* Platform corrected itself */ 515 recovered = 1; 516 } else if (evt->severity == MCE_SEV_FATAL) { 517 /* Fatal machine check */ 518 pr_err("Machine check interrupt is fatal\n"); 519 recovered = 0; 520 } 521 522 if (!recovered && evt->sync_error) { 523 /* 524 * Try to kill processes if we get a synchronous machine check 525 * (e.g., one caused by execution of this instruction). This 526 * will devolve into a panic if we try to kill init or are in 527 * an interrupt etc. 528 * 529 * TODO: Queue up this address for hwpoisioning later. 530 * TODO: This is not quite right for d-side machine 531 * checks ->nip is not necessarily the important 532 * address. 533 */ 534 if ((user_mode(regs))) { 535 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 536 recovered = 1; 537 } else if (die_will_crash()) { 538 /* 539 * die() would kill the kernel, so better to go via 540 * the platform reboot code that will log the 541 * machine check. 542 */ 543 recovered = 0; 544 } else { 545 die("Machine check", regs, SIGBUS); 546 recovered = 1; 547 } 548 } 549 550 return recovered; 551 } 552 553 void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) 554 { 555 panic_flush_kmsg_start(); 556 557 pr_emerg("Hardware platform error: %s\n", msg); 558 if (regs) 559 show_regs(regs); 560 smp_send_stop(); 561 562 panic_flush_kmsg_end(); 563 564 /* 565 * Don't bother to shut things down because this will 566 * xstop the system. 567 */ 568 if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg) 569 == OPAL_UNSUPPORTED) { 570 pr_emerg("Reboot type %d not supported for %s\n", 571 OPAL_REBOOT_PLATFORM_ERROR, msg); 572 } 573 574 /* 575 * We reached here. There can be three possibilities: 576 * 1. We are running on a firmware level that do not support 577 * opal_cec_reboot2() 578 * 2. We are running on a firmware level that do not support 579 * OPAL_REBOOT_PLATFORM_ERROR reboot type. 580 * 3. We are running on FSP based system that does not need 581 * opal to trigger checkstop explicitly for error analysis. 582 * The FSP PRD component would have already got notified 583 * about this error through other channels. 584 * 4. We are running on a newer skiboot that by default does 585 * not cause a checkstop, drops us back to the kernel to 586 * extract context and state at the time of the error. 587 */ 588 589 panic(msg); 590 } 591 592 int opal_machine_check(struct pt_regs *regs) 593 { 594 struct machine_check_event evt; 595 596 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 597 return 0; 598 599 /* Print things out */ 600 if (evt.version != MCE_V1) { 601 pr_err("Machine Check Exception, Unknown event version %d !\n", 602 evt.version); 603 return 0; 604 } 605 machine_check_print_event_info(&evt, user_mode(regs), false); 606 607 if (opal_recover_mce(regs, &evt)) 608 return 1; 609 610 pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception"); 611 } 612 613 /* Early hmi handler called in real mode. */ 614 int opal_hmi_exception_early(struct pt_regs *regs) 615 { 616 s64 rc; 617 618 /* 619 * call opal hmi handler. Pass paca address as token. 620 * The return value OPAL_SUCCESS is an indication that there is 621 * an HMI event generated waiting to pull by Linux. 622 */ 623 rc = opal_handle_hmi(); 624 if (rc == OPAL_SUCCESS) { 625 local_paca->hmi_event_available = 1; 626 return 1; 627 } 628 return 0; 629 } 630 631 int opal_hmi_exception_early2(struct pt_regs *regs) 632 { 633 s64 rc; 634 __be64 out_flags; 635 636 /* 637 * call opal hmi handler. 638 * Check 64-bit flag mask to find out if an event was generated, 639 * and whether TB is still valid or not etc. 640 */ 641 rc = opal_handle_hmi2(&out_flags); 642 if (rc != OPAL_SUCCESS) 643 return 0; 644 645 if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT) 646 local_paca->hmi_event_available = 1; 647 if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL) 648 tb_invalid = true; 649 return 1; 650 } 651 652 /* HMI exception handler called in virtual mode during check_irq_replay. */ 653 int opal_handle_hmi_exception(struct pt_regs *regs) 654 { 655 /* 656 * Check if HMI event is available. 657 * if Yes, then wake kopald to process them. 658 */ 659 if (!local_paca->hmi_event_available) 660 return 0; 661 662 local_paca->hmi_event_available = 0; 663 opal_wake_poller(); 664 665 return 1; 666 } 667 668 static uint64_t find_recovery_address(uint64_t nip) 669 { 670 int i; 671 672 for (i = 0; i < mc_recoverable_range_len; i++) 673 if ((nip >= mc_recoverable_range[i].start_addr) && 674 (nip < mc_recoverable_range[i].end_addr)) 675 return mc_recoverable_range[i].recover_addr; 676 return 0; 677 } 678 679 bool opal_mce_check_early_recovery(struct pt_regs *regs) 680 { 681 uint64_t recover_addr = 0; 682 683 if (!opal.base || !opal.size) 684 goto out; 685 686 if ((regs->nip >= opal.base) && 687 (regs->nip < (opal.base + opal.size))) 688 recover_addr = find_recovery_address(regs->nip); 689 690 /* 691 * Setup regs->nip to rfi into fixup address. 692 */ 693 if (recover_addr) 694 regs->nip = recover_addr; 695 696 out: 697 return !!recover_addr; 698 } 699 700 static int opal_sysfs_init(void) 701 { 702 opal_kobj = kobject_create_and_add("opal", firmware_kobj); 703 if (!opal_kobj) { 704 pr_warn("kobject_create_and_add opal failed\n"); 705 return -ENOMEM; 706 } 707 708 return 0; 709 } 710 711 static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj, 712 struct bin_attribute *bin_attr, 713 char *buf, loff_t off, size_t count) 714 { 715 return memory_read_from_buffer(buf, count, &off, bin_attr->private, 716 bin_attr->size); 717 } 718 719 static struct bin_attribute symbol_map_attr = { 720 .attr = {.name = "symbol_map", .mode = 0400}, 721 .read = symbol_map_read 722 }; 723 724 static void opal_export_symmap(void) 725 { 726 const __be64 *syms; 727 unsigned int size; 728 struct device_node *fw; 729 int rc; 730 731 fw = of_find_node_by_path("/ibm,opal/firmware"); 732 if (!fw) 733 return; 734 syms = of_get_property(fw, "symbol-map", &size); 735 if (!syms || size != 2 * sizeof(__be64)) 736 return; 737 738 /* Setup attributes */ 739 symbol_map_attr.private = __va(be64_to_cpu(syms[0])); 740 symbol_map_attr.size = be64_to_cpu(syms[1]); 741 742 rc = sysfs_create_bin_file(opal_kobj, &symbol_map_attr); 743 if (rc) 744 pr_warn("Error %d creating OPAL symbols file\n", rc); 745 } 746 747 static ssize_t export_attr_read(struct file *fp, struct kobject *kobj, 748 struct bin_attribute *bin_attr, char *buf, 749 loff_t off, size_t count) 750 { 751 return memory_read_from_buffer(buf, count, &off, bin_attr->private, 752 bin_attr->size); 753 } 754 755 /* 756 * opal_export_attrs: creates a sysfs node for each property listed in 757 * the device-tree under /ibm,opal/firmware/exports/ 758 * All new sysfs nodes are created under /opal/exports/. 759 * This allows for reserved memory regions (e.g. HDAT) to be read. 760 * The new sysfs nodes are only readable by root. 761 */ 762 static void opal_export_attrs(void) 763 { 764 struct bin_attribute *attr; 765 struct device_node *np; 766 struct property *prop; 767 struct kobject *kobj; 768 u64 vals[2]; 769 int rc; 770 771 np = of_find_node_by_path("/ibm,opal/firmware/exports"); 772 if (!np) 773 return; 774 775 /* Create new 'exports' directory - /sys/firmware/opal/exports */ 776 kobj = kobject_create_and_add("exports", opal_kobj); 777 if (!kobj) { 778 pr_warn("kobject_create_and_add() of exports failed\n"); 779 return; 780 } 781 782 for_each_property_of_node(np, prop) { 783 if (!strcmp(prop->name, "name") || !strcmp(prop->name, "phandle")) 784 continue; 785 786 if (of_property_read_u64_array(np, prop->name, &vals[0], 2)) 787 continue; 788 789 attr = kzalloc(sizeof(*attr), GFP_KERNEL); 790 791 if (attr == NULL) { 792 pr_warn("Failed kmalloc for bin_attribute!"); 793 continue; 794 } 795 796 sysfs_bin_attr_init(attr); 797 attr->attr.name = kstrdup(prop->name, GFP_KERNEL); 798 attr->attr.mode = 0400; 799 attr->read = export_attr_read; 800 attr->private = __va(vals[0]); 801 attr->size = vals[1]; 802 803 if (attr->attr.name == NULL) { 804 pr_warn("Failed kstrdup for bin_attribute attr.name"); 805 kfree(attr); 806 continue; 807 } 808 809 rc = sysfs_create_bin_file(kobj, attr); 810 if (rc) { 811 pr_warn("Error %d creating OPAL sysfs exports/%s file\n", 812 rc, prop->name); 813 kfree(attr->attr.name); 814 kfree(attr); 815 } 816 } 817 818 of_node_put(np); 819 } 820 821 static void __init opal_dump_region_init(void) 822 { 823 void *addr; 824 uint64_t size; 825 int rc; 826 827 if (!opal_check_token(OPAL_REGISTER_DUMP_REGION)) 828 return; 829 830 /* Register kernel log buffer */ 831 addr = log_buf_addr_get(); 832 if (addr == NULL) 833 return; 834 835 size = log_buf_len_get(); 836 if (size == 0) 837 return; 838 839 rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF, 840 __pa(addr), size); 841 /* Don't warn if this is just an older OPAL that doesn't 842 * know about that call 843 */ 844 if (rc && rc != OPAL_UNSUPPORTED) 845 pr_warn("DUMP: Failed to register kernel log buffer. " 846 "rc = %d\n", rc); 847 } 848 849 static void opal_pdev_init(const char *compatible) 850 { 851 struct device_node *np; 852 853 for_each_compatible_node(np, NULL, compatible) 854 of_platform_device_create(np, NULL, NULL); 855 } 856 857 static void __init opal_imc_init_dev(void) 858 { 859 struct device_node *np; 860 861 np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT); 862 if (np) 863 of_platform_device_create(np, NULL, NULL); 864 } 865 866 static int kopald(void *unused) 867 { 868 unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1; 869 870 set_freezable(); 871 do { 872 try_to_freeze(); 873 874 opal_handle_events(); 875 876 set_current_state(TASK_INTERRUPTIBLE); 877 if (opal_have_pending_events()) 878 __set_current_state(TASK_RUNNING); 879 else 880 schedule_timeout(timeout); 881 882 } while (!kthread_should_stop()); 883 884 return 0; 885 } 886 887 void opal_wake_poller(void) 888 { 889 if (kopald_tsk) 890 wake_up_process(kopald_tsk); 891 } 892 893 static void opal_init_heartbeat(void) 894 { 895 /* Old firwmware, we assume the HVC heartbeat is sufficient */ 896 if (of_property_read_u32(opal_node, "ibm,heartbeat-ms", 897 &opal_heartbeat) != 0) 898 opal_heartbeat = 0; 899 900 if (opal_heartbeat) 901 kopald_tsk = kthread_run(kopald, NULL, "kopald"); 902 } 903 904 static int __init opal_init(void) 905 { 906 struct device_node *np, *consoles, *leds; 907 int rc; 908 909 opal_node = of_find_node_by_path("/ibm,opal"); 910 if (!opal_node) { 911 pr_warn("Device node not found\n"); 912 return -ENODEV; 913 } 914 915 /* Register OPAL consoles if any ports */ 916 consoles = of_find_node_by_path("/ibm,opal/consoles"); 917 if (consoles) { 918 for_each_child_of_node(consoles, np) { 919 if (!of_node_name_eq(np, "serial")) 920 continue; 921 of_platform_device_create(np, NULL, NULL); 922 } 923 of_node_put(consoles); 924 } 925 926 /* Initialise OPAL messaging system */ 927 opal_message_init(opal_node); 928 929 /* Initialise OPAL asynchronous completion interface */ 930 opal_async_comp_init(); 931 932 /* Initialise OPAL sensor interface */ 933 opal_sensor_init(); 934 935 /* Initialise OPAL hypervisor maintainence interrupt handling */ 936 opal_hmi_handler_init(); 937 938 /* Create i2c platform devices */ 939 opal_pdev_init("ibm,opal-i2c"); 940 941 /* Handle non-volatile memory devices */ 942 opal_pdev_init("pmem-region"); 943 944 /* Setup a heatbeat thread if requested by OPAL */ 945 opal_init_heartbeat(); 946 947 /* Detect In-Memory Collection counters and create devices*/ 948 opal_imc_init_dev(); 949 950 /* Create leds platform devices */ 951 leds = of_find_node_by_path("/ibm,opal/leds"); 952 if (leds) { 953 of_platform_device_create(leds, "opal_leds", NULL); 954 of_node_put(leds); 955 } 956 957 /* Initialise OPAL message log interface */ 958 opal_msglog_init(); 959 960 /* Create "opal" kobject under /sys/firmware */ 961 rc = opal_sysfs_init(); 962 if (rc == 0) { 963 /* Export symbol map to userspace */ 964 opal_export_symmap(); 965 /* Setup dump region interface */ 966 opal_dump_region_init(); 967 /* Setup error log interface */ 968 rc = opal_elog_init(); 969 /* Setup code update interface */ 970 opal_flash_update_init(); 971 /* Setup platform dump extract interface */ 972 opal_platform_dump_init(); 973 /* Setup system parameters interface */ 974 opal_sys_param_init(); 975 /* Setup message log sysfs interface. */ 976 opal_msglog_sysfs_init(); 977 } 978 979 /* Export all properties */ 980 opal_export_attrs(); 981 982 /* Initialize platform devices: IPMI backend, PRD & flash interface */ 983 opal_pdev_init("ibm,opal-ipmi"); 984 opal_pdev_init("ibm,opal-flash"); 985 opal_pdev_init("ibm,opal-prd"); 986 987 /* Initialise platform device: oppanel interface */ 988 opal_pdev_init("ibm,opal-oppanel"); 989 990 /* Initialise OPAL kmsg dumper for flushing console on panic */ 991 opal_kmsg_init(); 992 993 /* Initialise OPAL powercap interface */ 994 opal_powercap_init(); 995 996 /* Initialise OPAL Power-Shifting-Ratio interface */ 997 opal_psr_init(); 998 999 /* Initialise OPAL sensor groups */ 1000 opal_sensor_groups_init(); 1001 1002 /* Initialise OPAL Power control interface */ 1003 opal_power_control_init(); 1004 1005 return 0; 1006 } 1007 machine_subsys_initcall(powernv, opal_init); 1008 1009 void opal_shutdown(void) 1010 { 1011 long rc = OPAL_BUSY; 1012 1013 opal_event_shutdown(); 1014 1015 /* 1016 * Then sync with OPAL which ensure anything that can 1017 * potentially write to our memory has completed such 1018 * as an ongoing dump retrieval 1019 */ 1020 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 1021 rc = opal_sync_host_reboot(); 1022 if (rc == OPAL_BUSY) 1023 opal_poll_events(NULL); 1024 else 1025 mdelay(10); 1026 } 1027 1028 /* Unregister memory dump region */ 1029 if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION)) 1030 opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF); 1031 } 1032 1033 /* Export this so that test modules can use it */ 1034 EXPORT_SYMBOL_GPL(opal_invalid_call); 1035 EXPORT_SYMBOL_GPL(opal_xscom_read); 1036 EXPORT_SYMBOL_GPL(opal_xscom_write); 1037 EXPORT_SYMBOL_GPL(opal_ipmi_send); 1038 EXPORT_SYMBOL_GPL(opal_ipmi_recv); 1039 EXPORT_SYMBOL_GPL(opal_flash_read); 1040 EXPORT_SYMBOL_GPL(opal_flash_write); 1041 EXPORT_SYMBOL_GPL(opal_flash_erase); 1042 EXPORT_SYMBOL_GPL(opal_prd_msg); 1043 EXPORT_SYMBOL_GPL(opal_check_token); 1044 1045 /* Convert a region of vmalloc memory to an opal sg list */ 1046 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, 1047 unsigned long vmalloc_size) 1048 { 1049 struct opal_sg_list *sg, *first = NULL; 1050 unsigned long i = 0; 1051 1052 sg = kzalloc(PAGE_SIZE, GFP_KERNEL); 1053 if (!sg) 1054 goto nomem; 1055 1056 first = sg; 1057 1058 while (vmalloc_size > 0) { 1059 uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT; 1060 uint64_t length = min(vmalloc_size, PAGE_SIZE); 1061 1062 sg->entry[i].data = cpu_to_be64(data); 1063 sg->entry[i].length = cpu_to_be64(length); 1064 i++; 1065 1066 if (i >= SG_ENTRIES_PER_NODE) { 1067 struct opal_sg_list *next; 1068 1069 next = kzalloc(PAGE_SIZE, GFP_KERNEL); 1070 if (!next) 1071 goto nomem; 1072 1073 sg->length = cpu_to_be64( 1074 i * sizeof(struct opal_sg_entry) + 16); 1075 i = 0; 1076 sg->next = cpu_to_be64(__pa(next)); 1077 sg = next; 1078 } 1079 1080 vmalloc_addr += length; 1081 vmalloc_size -= length; 1082 } 1083 1084 sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16); 1085 1086 return first; 1087 1088 nomem: 1089 pr_err("%s : Failed to allocate memory\n", __func__); 1090 opal_free_sg_list(first); 1091 return NULL; 1092 } 1093 1094 void opal_free_sg_list(struct opal_sg_list *sg) 1095 { 1096 while (sg) { 1097 uint64_t next = be64_to_cpu(sg->next); 1098 1099 kfree(sg); 1100 1101 if (next) 1102 sg = __va(next); 1103 else 1104 sg = NULL; 1105 } 1106 } 1107 1108 int opal_error_code(int rc) 1109 { 1110 switch (rc) { 1111 case OPAL_SUCCESS: return 0; 1112 1113 case OPAL_PARAMETER: return -EINVAL; 1114 case OPAL_ASYNC_COMPLETION: return -EINPROGRESS; 1115 case OPAL_BUSY: 1116 case OPAL_BUSY_EVENT: return -EBUSY; 1117 case OPAL_NO_MEM: return -ENOMEM; 1118 case OPAL_PERMISSION: return -EPERM; 1119 1120 case OPAL_UNSUPPORTED: return -EIO; 1121 case OPAL_HARDWARE: return -EIO; 1122 case OPAL_INTERNAL_ERROR: return -EIO; 1123 case OPAL_TIMEOUT: return -ETIMEDOUT; 1124 default: 1125 pr_err("%s: unexpected OPAL error %d\n", __func__, rc); 1126 return -EIO; 1127 } 1128 } 1129 1130 void powernv_set_nmmu_ptcr(unsigned long ptcr) 1131 { 1132 int rc; 1133 1134 if (firmware_has_feature(FW_FEATURE_OPAL)) { 1135 rc = opal_nmmu_set_ptcr(-1UL, ptcr); 1136 if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) 1137 pr_warn("%s: Unable to set nest mmu ptcr\n", __func__); 1138 } 1139 } 1140 1141 EXPORT_SYMBOL_GPL(opal_poll_events); 1142 EXPORT_SYMBOL_GPL(opal_rtc_read); 1143 EXPORT_SYMBOL_GPL(opal_rtc_write); 1144 EXPORT_SYMBOL_GPL(opal_tpo_read); 1145 EXPORT_SYMBOL_GPL(opal_tpo_write); 1146 EXPORT_SYMBOL_GPL(opal_i2c_request); 1147 /* Export these symbols for PowerNV LED class driver */ 1148 EXPORT_SYMBOL_GPL(opal_leds_get_ind); 1149 EXPORT_SYMBOL_GPL(opal_leds_set_ind); 1150 /* Export this symbol for PowerNV Operator Panel class driver */ 1151 EXPORT_SYMBOL_GPL(opal_write_oppanel_async); 1152 /* Export this for KVM */ 1153 EXPORT_SYMBOL_GPL(opal_int_set_mfrr); 1154 EXPORT_SYMBOL_GPL(opal_int_eoi); 1155 EXPORT_SYMBOL_GPL(opal_error_code); 1156 /* Export the below symbol for NX compression */ 1157 EXPORT_SYMBOL(opal_nx_coproc_init); 1158