1 /* 2 * PowerNV OPAL high level interfaces 3 * 4 * Copyright 2011 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/types.h> 15 #include <linux/of.h> 16 #include <linux/of_fdt.h> 17 #include <linux/of_platform.h> 18 #include <linux/interrupt.h> 19 #include <linux/notifier.h> 20 #include <linux/slab.h> 21 #include <linux/sched.h> 22 #include <linux/kobject.h> 23 #include <linux/delay.h> 24 #include <linux/memblock.h> 25 26 #include <asm/machdep.h> 27 #include <asm/opal.h> 28 #include <asm/firmware.h> 29 #include <asm/mce.h> 30 31 #include "powernv.h" 32 33 /* /sys/firmware/opal */ 34 struct kobject *opal_kobj; 35 36 struct opal { 37 u64 base; 38 u64 entry; 39 u64 size; 40 } opal; 41 42 struct mcheck_recoverable_range { 43 u64 start_addr; 44 u64 end_addr; 45 u64 recover_addr; 46 }; 47 48 static struct mcheck_recoverable_range *mc_recoverable_range; 49 static int mc_recoverable_range_len; 50 51 struct device_node *opal_node; 52 static DEFINE_SPINLOCK(opal_write_lock); 53 extern u64 opal_mc_secondary_handler[]; 54 static unsigned int *opal_irqs; 55 static unsigned int opal_irq_count; 56 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); 57 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; 58 static DEFINE_SPINLOCK(opal_notifier_lock); 59 static uint64_t last_notified_mask = 0x0ul; 60 static atomic_t opal_notifier_hold = ATOMIC_INIT(0); 61 62 static void opal_reinit_cores(void) 63 { 64 /* Do the actual re-init, This will clobber all FPRs, VRs, etc... 65 * 66 * It will preserve non volatile GPRs and HSPRG0/1. It will 67 * also restore HIDs and other SPRs to their original value 68 * but it might clobber a bunch. 69 */ 70 #ifdef __BIG_ENDIAN__ 71 opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); 72 #else 73 opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE); 74 #endif 75 } 76 77 int __init early_init_dt_scan_opal(unsigned long node, 78 const char *uname, int depth, void *data) 79 { 80 const void *basep, *entryp, *sizep; 81 int basesz, entrysz, runtimesz; 82 83 if (depth != 1 || strcmp(uname, "ibm,opal") != 0) 84 return 0; 85 86 basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz); 87 entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz); 88 sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz); 89 90 if (!basep || !entryp || !sizep) 91 return 1; 92 93 opal.base = of_read_number(basep, basesz/4); 94 opal.entry = of_read_number(entryp, entrysz/4); 95 opal.size = of_read_number(sizep, runtimesz/4); 96 97 pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n", 98 opal.base, basep, basesz); 99 pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n", 100 opal.entry, entryp, entrysz); 101 pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n", 102 opal.size, sizep, runtimesz); 103 104 powerpc_firmware_features |= FW_FEATURE_OPAL; 105 if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { 106 powerpc_firmware_features |= FW_FEATURE_OPALv2; 107 powerpc_firmware_features |= FW_FEATURE_OPALv3; 108 printk("OPAL V3 detected !\n"); 109 } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { 110 powerpc_firmware_features |= FW_FEATURE_OPALv2; 111 printk("OPAL V2 detected !\n"); 112 } else { 113 printk("OPAL V1 detected !\n"); 114 } 115 116 /* Reinit all cores with the right endian */ 117 opal_reinit_cores(); 118 119 /* Restore some bits */ 120 if (cur_cpu_spec->cpu_restore) 121 cur_cpu_spec->cpu_restore(); 122 123 return 1; 124 } 125 126 int __init early_init_dt_scan_recoverable_ranges(unsigned long node, 127 const char *uname, int depth, void *data) 128 { 129 int i, psize, size; 130 const __be32 *prop; 131 132 if (depth != 1 || strcmp(uname, "ibm,opal") != 0) 133 return 0; 134 135 prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize); 136 137 if (!prop) 138 return 1; 139 140 pr_debug("Found machine check recoverable ranges.\n"); 141 142 /* 143 * Calculate number of available entries. 144 * 145 * Each recoverable address range entry is (start address, len, 146 * recovery address), 2 cells each for start and recovery address, 147 * 1 cell for len, totalling 5 cells per entry. 148 */ 149 mc_recoverable_range_len = psize / (sizeof(*prop) * 5); 150 151 /* Sanity check */ 152 if (!mc_recoverable_range_len) 153 return 1; 154 155 /* Size required to hold all the entries. */ 156 size = mc_recoverable_range_len * 157 sizeof(struct mcheck_recoverable_range); 158 159 /* 160 * Allocate a buffer to hold the MC recoverable ranges. We would be 161 * accessing them in real mode, hence it needs to be within 162 * RMO region. 163 */ 164 mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64), 165 ppc64_rma_size)); 166 memset(mc_recoverable_range, 0, size); 167 168 for (i = 0; i < mc_recoverable_range_len; i++) { 169 mc_recoverable_range[i].start_addr = 170 of_read_number(prop + (i * 5) + 0, 2); 171 mc_recoverable_range[i].end_addr = 172 mc_recoverable_range[i].start_addr + 173 of_read_number(prop + (i * 5) + 2, 1); 174 mc_recoverable_range[i].recover_addr = 175 of_read_number(prop + (i * 5) + 3, 2); 176 177 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n", 178 mc_recoverable_range[i].start_addr, 179 mc_recoverable_range[i].end_addr, 180 mc_recoverable_range[i].recover_addr); 181 } 182 return 1; 183 } 184 185 static int __init opal_register_exception_handlers(void) 186 { 187 #ifdef __BIG_ENDIAN__ 188 u64 glue; 189 190 if (!(powerpc_firmware_features & FW_FEATURE_OPAL)) 191 return -ENODEV; 192 193 /* Hookup some exception handlers except machine check. We use the 194 * fwnmi area at 0x7000 to provide the glue space to OPAL 195 */ 196 glue = 0x7000; 197 opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER, 198 0, glue); 199 glue += 128; 200 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); 201 #endif 202 203 return 0; 204 } 205 machine_early_initcall(powernv, opal_register_exception_handlers); 206 207 int opal_notifier_register(struct notifier_block *nb) 208 { 209 if (!nb) { 210 pr_warning("%s: Invalid argument (%p)\n", 211 __func__, nb); 212 return -EINVAL; 213 } 214 215 atomic_notifier_chain_register(&opal_notifier_head, nb); 216 return 0; 217 } 218 EXPORT_SYMBOL_GPL(opal_notifier_register); 219 220 int opal_notifier_unregister(struct notifier_block *nb) 221 { 222 if (!nb) { 223 pr_warning("%s: Invalid argument (%p)\n", 224 __func__, nb); 225 return -EINVAL; 226 } 227 228 atomic_notifier_chain_unregister(&opal_notifier_head, nb); 229 return 0; 230 } 231 EXPORT_SYMBOL_GPL(opal_notifier_unregister); 232 233 static void opal_do_notifier(uint64_t events) 234 { 235 unsigned long flags; 236 uint64_t changed_mask; 237 238 if (atomic_read(&opal_notifier_hold)) 239 return; 240 241 spin_lock_irqsave(&opal_notifier_lock, flags); 242 changed_mask = last_notified_mask ^ events; 243 last_notified_mask = events; 244 spin_unlock_irqrestore(&opal_notifier_lock, flags); 245 246 /* 247 * We feed with the event bits and changed bits for 248 * enough information to the callback. 249 */ 250 atomic_notifier_call_chain(&opal_notifier_head, 251 events, (void *)changed_mask); 252 } 253 254 void opal_notifier_update_evt(uint64_t evt_mask, 255 uint64_t evt_val) 256 { 257 unsigned long flags; 258 259 spin_lock_irqsave(&opal_notifier_lock, flags); 260 last_notified_mask &= ~evt_mask; 261 last_notified_mask |= evt_val; 262 spin_unlock_irqrestore(&opal_notifier_lock, flags); 263 } 264 265 void opal_notifier_enable(void) 266 { 267 int64_t rc; 268 __be64 evt = 0; 269 270 atomic_set(&opal_notifier_hold, 0); 271 272 /* Process pending events */ 273 rc = opal_poll_events(&evt); 274 if (rc == OPAL_SUCCESS && evt) 275 opal_do_notifier(be64_to_cpu(evt)); 276 } 277 278 void opal_notifier_disable(void) 279 { 280 atomic_set(&opal_notifier_hold, 1); 281 } 282 283 /* 284 * Opal message notifier based on message type. Allow subscribers to get 285 * notified for specific messgae type. 286 */ 287 int opal_message_notifier_register(enum OpalMessageType msg_type, 288 struct notifier_block *nb) 289 { 290 if (!nb) { 291 pr_warning("%s: Invalid argument (%p)\n", 292 __func__, nb); 293 return -EINVAL; 294 } 295 if (msg_type > OPAL_MSG_TYPE_MAX) { 296 pr_warning("%s: Invalid message type argument (%d)\n", 297 __func__, msg_type); 298 return -EINVAL; 299 } 300 return atomic_notifier_chain_register( 301 &opal_msg_notifier_head[msg_type], nb); 302 } 303 304 static void opal_message_do_notify(uint32_t msg_type, void *msg) 305 { 306 /* notify subscribers */ 307 atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type], 308 msg_type, msg); 309 } 310 311 static void opal_handle_message(void) 312 { 313 s64 ret; 314 /* 315 * TODO: pre-allocate a message buffer depending on opal-msg-size 316 * value in /proc/device-tree. 317 */ 318 static struct opal_msg msg; 319 u32 type; 320 321 ret = opal_get_msg(__pa(&msg), sizeof(msg)); 322 /* No opal message pending. */ 323 if (ret == OPAL_RESOURCE) 324 return; 325 326 /* check for errors. */ 327 if (ret) { 328 pr_warning("%s: Failed to retrive opal message, err=%lld\n", 329 __func__, ret); 330 return; 331 } 332 333 type = be32_to_cpu(msg.msg_type); 334 335 /* Sanity check */ 336 if (type > OPAL_MSG_TYPE_MAX) { 337 pr_warning("%s: Unknown message type: %u\n", __func__, type); 338 return; 339 } 340 opal_message_do_notify(type, (void *)&msg); 341 } 342 343 static int opal_message_notify(struct notifier_block *nb, 344 unsigned long events, void *change) 345 { 346 if (events & OPAL_EVENT_MSG_PENDING) 347 opal_handle_message(); 348 return 0; 349 } 350 351 static struct notifier_block opal_message_nb = { 352 .notifier_call = opal_message_notify, 353 .next = NULL, 354 .priority = 0, 355 }; 356 357 static int __init opal_message_init(void) 358 { 359 int ret, i; 360 361 for (i = 0; i < OPAL_MSG_TYPE_MAX; i++) 362 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]); 363 364 ret = opal_notifier_register(&opal_message_nb); 365 if (ret) { 366 pr_err("%s: Can't register OPAL event notifier (%d)\n", 367 __func__, ret); 368 return ret; 369 } 370 return 0; 371 } 372 machine_early_initcall(powernv, opal_message_init); 373 374 int opal_get_chars(uint32_t vtermno, char *buf, int count) 375 { 376 s64 rc; 377 __be64 evt, len; 378 379 if (!opal.entry) 380 return -ENODEV; 381 opal_poll_events(&evt); 382 if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0) 383 return 0; 384 len = cpu_to_be64(count); 385 rc = opal_console_read(vtermno, &len, buf); 386 if (rc == OPAL_SUCCESS) 387 return be64_to_cpu(len); 388 return 0; 389 } 390 391 int opal_put_chars(uint32_t vtermno, const char *data, int total_len) 392 { 393 int written = 0; 394 __be64 olen; 395 s64 len, rc; 396 unsigned long flags; 397 __be64 evt; 398 399 if (!opal.entry) 400 return -ENODEV; 401 402 /* We want put_chars to be atomic to avoid mangling of hvsi 403 * packets. To do that, we first test for room and return 404 * -EAGAIN if there isn't enough. 405 * 406 * Unfortunately, opal_console_write_buffer_space() doesn't 407 * appear to work on opal v1, so we just assume there is 408 * enough room and be done with it 409 */ 410 spin_lock_irqsave(&opal_write_lock, flags); 411 if (firmware_has_feature(FW_FEATURE_OPALv2)) { 412 rc = opal_console_write_buffer_space(vtermno, &olen); 413 len = be64_to_cpu(olen); 414 if (rc || len < total_len) { 415 spin_unlock_irqrestore(&opal_write_lock, flags); 416 /* Closed -> drop characters */ 417 if (rc) 418 return total_len; 419 opal_poll_events(NULL); 420 return -EAGAIN; 421 } 422 } 423 424 /* We still try to handle partial completions, though they 425 * should no longer happen. 426 */ 427 rc = OPAL_BUSY; 428 while(total_len > 0 && (rc == OPAL_BUSY || 429 rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) { 430 olen = cpu_to_be64(total_len); 431 rc = opal_console_write(vtermno, &olen, data); 432 len = be64_to_cpu(olen); 433 434 /* Closed or other error drop */ 435 if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && 436 rc != OPAL_BUSY_EVENT) { 437 written = total_len; 438 break; 439 } 440 if (rc == OPAL_SUCCESS) { 441 total_len -= len; 442 data += len; 443 written += len; 444 } 445 /* This is a bit nasty but we need that for the console to 446 * flush when there aren't any interrupts. We will clean 447 * things a bit later to limit that to synchronous path 448 * such as the kernel console and xmon/udbg 449 */ 450 do 451 opal_poll_events(&evt); 452 while(rc == OPAL_SUCCESS && 453 (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT)); 454 } 455 spin_unlock_irqrestore(&opal_write_lock, flags); 456 return written; 457 } 458 459 static int opal_recover_mce(struct pt_regs *regs, 460 struct machine_check_event *evt) 461 { 462 int recovered = 0; 463 uint64_t ea = get_mce_fault_addr(evt); 464 465 if (!(regs->msr & MSR_RI)) { 466 /* If MSR_RI isn't set, we cannot recover */ 467 recovered = 0; 468 } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { 469 /* Platform corrected itself */ 470 recovered = 1; 471 } else if (ea && !is_kernel_addr(ea)) { 472 /* 473 * Faulting address is not in kernel text. We should be fine. 474 * We need to find which process uses this address. 475 * For now, kill the task if we have received exception when 476 * in userspace. 477 * 478 * TODO: Queue up this address for hwpoisioning later. 479 */ 480 if (user_mode(regs) && !is_global_init(current)) { 481 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 482 recovered = 1; 483 } else 484 recovered = 0; 485 } else if (user_mode(regs) && !is_global_init(current) && 486 evt->severity == MCE_SEV_ERROR_SYNC) { 487 /* 488 * If we have received a synchronous error when in userspace 489 * kill the task. 490 */ 491 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 492 recovered = 1; 493 } 494 return recovered; 495 } 496 497 int opal_machine_check(struct pt_regs *regs) 498 { 499 struct machine_check_event evt; 500 501 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 502 return 0; 503 504 /* Print things out */ 505 if (evt.version != MCE_V1) { 506 pr_err("Machine Check Exception, Unknown event version %d !\n", 507 evt.version); 508 return 0; 509 } 510 machine_check_print_event_info(&evt); 511 512 if (opal_recover_mce(regs, &evt)) 513 return 1; 514 return 0; 515 } 516 517 static uint64_t find_recovery_address(uint64_t nip) 518 { 519 int i; 520 521 for (i = 0; i < mc_recoverable_range_len; i++) 522 if ((nip >= mc_recoverable_range[i].start_addr) && 523 (nip < mc_recoverable_range[i].end_addr)) 524 return mc_recoverable_range[i].recover_addr; 525 return 0; 526 } 527 528 bool opal_mce_check_early_recovery(struct pt_regs *regs) 529 { 530 uint64_t recover_addr = 0; 531 532 if (!opal.base || !opal.size) 533 goto out; 534 535 if ((regs->nip >= opal.base) && 536 (regs->nip <= (opal.base + opal.size))) 537 recover_addr = find_recovery_address(regs->nip); 538 539 /* 540 * Setup regs->nip to rfi into fixup address. 541 */ 542 if (recover_addr) 543 regs->nip = recover_addr; 544 545 out: 546 return !!recover_addr; 547 } 548 549 static irqreturn_t opal_interrupt(int irq, void *data) 550 { 551 __be64 events; 552 553 opal_handle_interrupt(virq_to_hw(irq), &events); 554 555 opal_do_notifier(be64_to_cpu(events)); 556 557 return IRQ_HANDLED; 558 } 559 560 static int opal_sysfs_init(void) 561 { 562 opal_kobj = kobject_create_and_add("opal", firmware_kobj); 563 if (!opal_kobj) { 564 pr_warn("kobject_create_and_add opal failed\n"); 565 return -ENOMEM; 566 } 567 568 return 0; 569 } 570 571 static int __init opal_init(void) 572 { 573 struct device_node *np, *consoles; 574 const __be32 *irqs; 575 int rc, i, irqlen; 576 577 opal_node = of_find_node_by_path("/ibm,opal"); 578 if (!opal_node) { 579 pr_warn("opal: Node not found\n"); 580 return -ENODEV; 581 } 582 583 /* Register OPAL consoles if any ports */ 584 if (firmware_has_feature(FW_FEATURE_OPALv2)) 585 consoles = of_find_node_by_path("/ibm,opal/consoles"); 586 else 587 consoles = of_node_get(opal_node); 588 if (consoles) { 589 for_each_child_of_node(consoles, np) { 590 if (strcmp(np->name, "serial")) 591 continue; 592 of_platform_device_create(np, NULL, NULL); 593 } 594 of_node_put(consoles); 595 } 596 597 /* Find all OPAL interrupts and request them */ 598 irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); 599 pr_debug("opal: Found %d interrupts reserved for OPAL\n", 600 irqs ? (irqlen / 4) : 0); 601 opal_irq_count = irqlen / 4; 602 opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); 603 for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) { 604 unsigned int hwirq = be32_to_cpup(irqs); 605 unsigned int irq = irq_create_mapping(NULL, hwirq); 606 if (irq == NO_IRQ) { 607 pr_warning("opal: Failed to map irq 0x%x\n", hwirq); 608 continue; 609 } 610 rc = request_irq(irq, opal_interrupt, 0, "opal", NULL); 611 if (rc) 612 pr_warning("opal: Error %d requesting irq %d" 613 " (0x%x)\n", rc, irq, hwirq); 614 opal_irqs[i] = irq; 615 } 616 617 /* Create "opal" kobject under /sys/firmware */ 618 rc = opal_sysfs_init(); 619 if (rc == 0) { 620 /* Setup error log interface */ 621 rc = opal_elog_init(); 622 /* Setup code update interface */ 623 opal_flash_init(); 624 /* Setup platform dump extract interface */ 625 opal_platform_dump_init(); 626 /* Setup system parameters interface */ 627 opal_sys_param_init(); 628 /* Setup message log interface. */ 629 opal_msglog_init(); 630 } 631 632 return 0; 633 } 634 machine_subsys_initcall(powernv, opal_init); 635 636 void opal_shutdown(void) 637 { 638 unsigned int i; 639 long rc = OPAL_BUSY; 640 641 /* First free interrupts, which will also mask them */ 642 for (i = 0; i < opal_irq_count; i++) { 643 if (opal_irqs[i]) 644 free_irq(opal_irqs[i], NULL); 645 opal_irqs[i] = 0; 646 } 647 648 /* 649 * Then sync with OPAL which ensure anything that can 650 * potentially write to our memory has completed such 651 * as an ongoing dump retrieval 652 */ 653 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 654 rc = opal_sync_host_reboot(); 655 if (rc == OPAL_BUSY) 656 opal_poll_events(NULL); 657 else 658 mdelay(10); 659 } 660 } 661 662 /* Export this so that test modules can use it */ 663 EXPORT_SYMBOL_GPL(opal_invalid_call); 664 665 /* Convert a region of vmalloc memory to an opal sg list */ 666 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, 667 unsigned long vmalloc_size) 668 { 669 struct opal_sg_list *sg, *first = NULL; 670 unsigned long i = 0; 671 672 sg = kzalloc(PAGE_SIZE, GFP_KERNEL); 673 if (!sg) 674 goto nomem; 675 676 first = sg; 677 678 while (vmalloc_size > 0) { 679 uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT; 680 uint64_t length = min(vmalloc_size, PAGE_SIZE); 681 682 sg->entry[i].data = cpu_to_be64(data); 683 sg->entry[i].length = cpu_to_be64(length); 684 i++; 685 686 if (i >= SG_ENTRIES_PER_NODE) { 687 struct opal_sg_list *next; 688 689 next = kzalloc(PAGE_SIZE, GFP_KERNEL); 690 if (!next) 691 goto nomem; 692 693 sg->length = cpu_to_be64( 694 i * sizeof(struct opal_sg_entry) + 16); 695 i = 0; 696 sg->next = cpu_to_be64(__pa(next)); 697 sg = next; 698 } 699 700 vmalloc_addr += length; 701 vmalloc_size -= length; 702 } 703 704 sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16); 705 706 return first; 707 708 nomem: 709 pr_err("%s : Failed to allocate memory\n", __func__); 710 opal_free_sg_list(first); 711 return NULL; 712 } 713 714 void opal_free_sg_list(struct opal_sg_list *sg) 715 { 716 while (sg) { 717 uint64_t next = be64_to_cpu(sg->next); 718 719 kfree(sg); 720 721 if (next) 722 sg = __va(next); 723 else 724 sg = NULL; 725 } 726 } 727