1 /* 2 * PowerNV OPAL high level interfaces 3 * 4 * Copyright 2011 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/types.h> 15 #include <linux/of.h> 16 #include <linux/of_fdt.h> 17 #include <linux/of_platform.h> 18 #include <linux/interrupt.h> 19 #include <linux/notifier.h> 20 #include <linux/slab.h> 21 #include <linux/sched.h> 22 #include <linux/kobject.h> 23 #include <linux/delay.h> 24 #include <linux/memblock.h> 25 #include <asm/opal.h> 26 #include <asm/firmware.h> 27 #include <asm/mce.h> 28 29 #include "powernv.h" 30 31 /* /sys/firmware/opal */ 32 struct kobject *opal_kobj; 33 34 struct opal { 35 u64 base; 36 u64 entry; 37 u64 size; 38 } opal; 39 40 struct mcheck_recoverable_range { 41 u64 start_addr; 42 u64 end_addr; 43 u64 recover_addr; 44 }; 45 46 static struct mcheck_recoverable_range *mc_recoverable_range; 47 static int mc_recoverable_range_len; 48 49 struct device_node *opal_node; 50 static DEFINE_SPINLOCK(opal_write_lock); 51 extern u64 opal_mc_secondary_handler[]; 52 static unsigned int *opal_irqs; 53 static unsigned int opal_irq_count; 54 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); 55 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; 56 static DEFINE_SPINLOCK(opal_notifier_lock); 57 static uint64_t last_notified_mask = 0x0ul; 58 static atomic_t opal_notifier_hold = ATOMIC_INIT(0); 59 60 static void opal_reinit_cores(void) 61 { 62 /* Do the actual re-init, This will clobber all FPRs, VRs, etc... 63 * 64 * It will preserve non volatile GPRs and HSPRG0/1. It will 65 * also restore HIDs and other SPRs to their original value 66 * but it might clobber a bunch. 67 */ 68 #ifdef __BIG_ENDIAN__ 69 opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); 70 #else 71 opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE); 72 #endif 73 } 74 75 int __init early_init_dt_scan_opal(unsigned long node, 76 const char *uname, int depth, void *data) 77 { 78 const void *basep, *entryp, *sizep; 79 int basesz, entrysz, runtimesz; 80 81 if (depth != 1 || strcmp(uname, "ibm,opal") != 0) 82 return 0; 83 84 basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz); 85 entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz); 86 sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz); 87 88 if (!basep || !entryp || !sizep) 89 return 1; 90 91 opal.base = of_read_number(basep, basesz/4); 92 opal.entry = of_read_number(entryp, entrysz/4); 93 opal.size = of_read_number(sizep, runtimesz/4); 94 95 pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n", 96 opal.base, basep, basesz); 97 pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n", 98 opal.entry, entryp, entrysz); 99 pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n", 100 opal.size, sizep, runtimesz); 101 102 powerpc_firmware_features |= FW_FEATURE_OPAL; 103 if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { 104 powerpc_firmware_features |= FW_FEATURE_OPALv2; 105 powerpc_firmware_features |= FW_FEATURE_OPALv3; 106 printk("OPAL V3 detected !\n"); 107 } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { 108 powerpc_firmware_features |= FW_FEATURE_OPALv2; 109 printk("OPAL V2 detected !\n"); 110 } else { 111 printk("OPAL V1 detected !\n"); 112 } 113 114 /* Reinit all cores with the right endian */ 115 opal_reinit_cores(); 116 117 /* Restore some bits */ 118 if (cur_cpu_spec->cpu_restore) 119 cur_cpu_spec->cpu_restore(); 120 121 return 1; 122 } 123 124 int __init early_init_dt_scan_recoverable_ranges(unsigned long node, 125 const char *uname, int depth, void *data) 126 { 127 int i, psize, size; 128 const __be32 *prop; 129 130 if (depth != 1 || strcmp(uname, "ibm,opal") != 0) 131 return 0; 132 133 prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize); 134 135 if (!prop) 136 return 1; 137 138 pr_debug("Found machine check recoverable ranges.\n"); 139 140 /* 141 * Calculate number of available entries. 142 * 143 * Each recoverable address range entry is (start address, len, 144 * recovery address), 2 cells each for start and recovery address, 145 * 1 cell for len, totalling 5 cells per entry. 146 */ 147 mc_recoverable_range_len = psize / (sizeof(*prop) * 5); 148 149 /* Sanity check */ 150 if (!mc_recoverable_range_len) 151 return 1; 152 153 /* Size required to hold all the entries. */ 154 size = mc_recoverable_range_len * 155 sizeof(struct mcheck_recoverable_range); 156 157 /* 158 * Allocate a buffer to hold the MC recoverable ranges. We would be 159 * accessing them in real mode, hence it needs to be within 160 * RMO region. 161 */ 162 mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64), 163 ppc64_rma_size)); 164 memset(mc_recoverable_range, 0, size); 165 166 for (i = 0; i < mc_recoverable_range_len; i++) { 167 mc_recoverable_range[i].start_addr = 168 of_read_number(prop + (i * 5) + 0, 2); 169 mc_recoverable_range[i].end_addr = 170 mc_recoverable_range[i].start_addr + 171 of_read_number(prop + (i * 5) + 2, 1); 172 mc_recoverable_range[i].recover_addr = 173 of_read_number(prop + (i * 5) + 3, 2); 174 175 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n", 176 mc_recoverable_range[i].start_addr, 177 mc_recoverable_range[i].end_addr, 178 mc_recoverable_range[i].recover_addr); 179 } 180 return 1; 181 } 182 183 static int __init opal_register_exception_handlers(void) 184 { 185 #ifdef __BIG_ENDIAN__ 186 u64 glue; 187 188 if (!(powerpc_firmware_features & FW_FEATURE_OPAL)) 189 return -ENODEV; 190 191 /* Hookup some exception handlers except machine check. We use the 192 * fwnmi area at 0x7000 to provide the glue space to OPAL 193 */ 194 glue = 0x7000; 195 opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER, 196 0, glue); 197 glue += 128; 198 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); 199 #endif 200 201 return 0; 202 } 203 204 early_initcall(opal_register_exception_handlers); 205 206 int opal_notifier_register(struct notifier_block *nb) 207 { 208 if (!nb) { 209 pr_warning("%s: Invalid argument (%p)\n", 210 __func__, nb); 211 return -EINVAL; 212 } 213 214 atomic_notifier_chain_register(&opal_notifier_head, nb); 215 return 0; 216 } 217 EXPORT_SYMBOL_GPL(opal_notifier_register); 218 219 int opal_notifier_unregister(struct notifier_block *nb) 220 { 221 if (!nb) { 222 pr_warning("%s: Invalid argument (%p)\n", 223 __func__, nb); 224 return -EINVAL; 225 } 226 227 atomic_notifier_chain_unregister(&opal_notifier_head, nb); 228 return 0; 229 } 230 EXPORT_SYMBOL_GPL(opal_notifier_unregister); 231 232 static void opal_do_notifier(uint64_t events) 233 { 234 unsigned long flags; 235 uint64_t changed_mask; 236 237 if (atomic_read(&opal_notifier_hold)) 238 return; 239 240 spin_lock_irqsave(&opal_notifier_lock, flags); 241 changed_mask = last_notified_mask ^ events; 242 last_notified_mask = events; 243 spin_unlock_irqrestore(&opal_notifier_lock, flags); 244 245 /* 246 * We feed with the event bits and changed bits for 247 * enough information to the callback. 248 */ 249 atomic_notifier_call_chain(&opal_notifier_head, 250 events, (void *)changed_mask); 251 } 252 253 void opal_notifier_update_evt(uint64_t evt_mask, 254 uint64_t evt_val) 255 { 256 unsigned long flags; 257 258 spin_lock_irqsave(&opal_notifier_lock, flags); 259 last_notified_mask &= ~evt_mask; 260 last_notified_mask |= evt_val; 261 spin_unlock_irqrestore(&opal_notifier_lock, flags); 262 } 263 264 void opal_notifier_enable(void) 265 { 266 int64_t rc; 267 __be64 evt = 0; 268 269 atomic_set(&opal_notifier_hold, 0); 270 271 /* Process pending events */ 272 rc = opal_poll_events(&evt); 273 if (rc == OPAL_SUCCESS && evt) 274 opal_do_notifier(be64_to_cpu(evt)); 275 } 276 277 void opal_notifier_disable(void) 278 { 279 atomic_set(&opal_notifier_hold, 1); 280 } 281 282 /* 283 * Opal message notifier based on message type. Allow subscribers to get 284 * notified for specific messgae type. 285 */ 286 int opal_message_notifier_register(enum OpalMessageType msg_type, 287 struct notifier_block *nb) 288 { 289 if (!nb) { 290 pr_warning("%s: Invalid argument (%p)\n", 291 __func__, nb); 292 return -EINVAL; 293 } 294 if (msg_type > OPAL_MSG_TYPE_MAX) { 295 pr_warning("%s: Invalid message type argument (%d)\n", 296 __func__, msg_type); 297 return -EINVAL; 298 } 299 return atomic_notifier_chain_register( 300 &opal_msg_notifier_head[msg_type], nb); 301 } 302 303 static void opal_message_do_notify(uint32_t msg_type, void *msg) 304 { 305 /* notify subscribers */ 306 atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type], 307 msg_type, msg); 308 } 309 310 static void opal_handle_message(void) 311 { 312 s64 ret; 313 /* 314 * TODO: pre-allocate a message buffer depending on opal-msg-size 315 * value in /proc/device-tree. 316 */ 317 static struct opal_msg msg; 318 u32 type; 319 320 ret = opal_get_msg(__pa(&msg), sizeof(msg)); 321 /* No opal message pending. */ 322 if (ret == OPAL_RESOURCE) 323 return; 324 325 /* check for errors. */ 326 if (ret) { 327 pr_warning("%s: Failed to retrive opal message, err=%lld\n", 328 __func__, ret); 329 return; 330 } 331 332 type = be32_to_cpu(msg.msg_type); 333 334 /* Sanity check */ 335 if (type > OPAL_MSG_TYPE_MAX) { 336 pr_warning("%s: Unknown message type: %u\n", __func__, type); 337 return; 338 } 339 opal_message_do_notify(type, (void *)&msg); 340 } 341 342 static int opal_message_notify(struct notifier_block *nb, 343 unsigned long events, void *change) 344 { 345 if (events & OPAL_EVENT_MSG_PENDING) 346 opal_handle_message(); 347 return 0; 348 } 349 350 static struct notifier_block opal_message_nb = { 351 .notifier_call = opal_message_notify, 352 .next = NULL, 353 .priority = 0, 354 }; 355 356 static int __init opal_message_init(void) 357 { 358 int ret, i; 359 360 for (i = 0; i < OPAL_MSG_TYPE_MAX; i++) 361 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]); 362 363 ret = opal_notifier_register(&opal_message_nb); 364 if (ret) { 365 pr_err("%s: Can't register OPAL event notifier (%d)\n", 366 __func__, ret); 367 return ret; 368 } 369 return 0; 370 } 371 early_initcall(opal_message_init); 372 373 int opal_get_chars(uint32_t vtermno, char *buf, int count) 374 { 375 s64 rc; 376 __be64 evt, len; 377 378 if (!opal.entry) 379 return -ENODEV; 380 opal_poll_events(&evt); 381 if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0) 382 return 0; 383 len = cpu_to_be64(count); 384 rc = opal_console_read(vtermno, &len, buf); 385 if (rc == OPAL_SUCCESS) 386 return be64_to_cpu(len); 387 return 0; 388 } 389 390 int opal_put_chars(uint32_t vtermno, const char *data, int total_len) 391 { 392 int written = 0; 393 __be64 olen; 394 s64 len, rc; 395 unsigned long flags; 396 __be64 evt; 397 398 if (!opal.entry) 399 return -ENODEV; 400 401 /* We want put_chars to be atomic to avoid mangling of hvsi 402 * packets. To do that, we first test for room and return 403 * -EAGAIN if there isn't enough. 404 * 405 * Unfortunately, opal_console_write_buffer_space() doesn't 406 * appear to work on opal v1, so we just assume there is 407 * enough room and be done with it 408 */ 409 spin_lock_irqsave(&opal_write_lock, flags); 410 if (firmware_has_feature(FW_FEATURE_OPALv2)) { 411 rc = opal_console_write_buffer_space(vtermno, &olen); 412 len = be64_to_cpu(olen); 413 if (rc || len < total_len) { 414 spin_unlock_irqrestore(&opal_write_lock, flags); 415 /* Closed -> drop characters */ 416 if (rc) 417 return total_len; 418 opal_poll_events(NULL); 419 return -EAGAIN; 420 } 421 } 422 423 /* We still try to handle partial completions, though they 424 * should no longer happen. 425 */ 426 rc = OPAL_BUSY; 427 while(total_len > 0 && (rc == OPAL_BUSY || 428 rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) { 429 olen = cpu_to_be64(total_len); 430 rc = opal_console_write(vtermno, &olen, data); 431 len = be64_to_cpu(olen); 432 433 /* Closed or other error drop */ 434 if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && 435 rc != OPAL_BUSY_EVENT) { 436 written = total_len; 437 break; 438 } 439 if (rc == OPAL_SUCCESS) { 440 total_len -= len; 441 data += len; 442 written += len; 443 } 444 /* This is a bit nasty but we need that for the console to 445 * flush when there aren't any interrupts. We will clean 446 * things a bit later to limit that to synchronous path 447 * such as the kernel console and xmon/udbg 448 */ 449 do 450 opal_poll_events(&evt); 451 while(rc == OPAL_SUCCESS && 452 (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT)); 453 } 454 spin_unlock_irqrestore(&opal_write_lock, flags); 455 return written; 456 } 457 458 static int opal_recover_mce(struct pt_regs *regs, 459 struct machine_check_event *evt) 460 { 461 int recovered = 0; 462 uint64_t ea = get_mce_fault_addr(evt); 463 464 if (!(regs->msr & MSR_RI)) { 465 /* If MSR_RI isn't set, we cannot recover */ 466 recovered = 0; 467 } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { 468 /* Platform corrected itself */ 469 recovered = 1; 470 } else if (ea && !is_kernel_addr(ea)) { 471 /* 472 * Faulting address is not in kernel text. We should be fine. 473 * We need to find which process uses this address. 474 * For now, kill the task if we have received exception when 475 * in userspace. 476 * 477 * TODO: Queue up this address for hwpoisioning later. 478 */ 479 if (user_mode(regs) && !is_global_init(current)) { 480 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 481 recovered = 1; 482 } else 483 recovered = 0; 484 } else if (user_mode(regs) && !is_global_init(current) && 485 evt->severity == MCE_SEV_ERROR_SYNC) { 486 /* 487 * If we have received a synchronous error when in userspace 488 * kill the task. 489 */ 490 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 491 recovered = 1; 492 } 493 return recovered; 494 } 495 496 int opal_machine_check(struct pt_regs *regs) 497 { 498 struct machine_check_event evt; 499 500 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 501 return 0; 502 503 /* Print things out */ 504 if (evt.version != MCE_V1) { 505 pr_err("Machine Check Exception, Unknown event version %d !\n", 506 evt.version); 507 return 0; 508 } 509 machine_check_print_event_info(&evt); 510 511 if (opal_recover_mce(regs, &evt)) 512 return 1; 513 return 0; 514 } 515 516 static uint64_t find_recovery_address(uint64_t nip) 517 { 518 int i; 519 520 for (i = 0; i < mc_recoverable_range_len; i++) 521 if ((nip >= mc_recoverable_range[i].start_addr) && 522 (nip < mc_recoverable_range[i].end_addr)) 523 return mc_recoverable_range[i].recover_addr; 524 return 0; 525 } 526 527 bool opal_mce_check_early_recovery(struct pt_regs *regs) 528 { 529 uint64_t recover_addr = 0; 530 531 if (!opal.base || !opal.size) 532 goto out; 533 534 if ((regs->nip >= opal.base) && 535 (regs->nip <= (opal.base + opal.size))) 536 recover_addr = find_recovery_address(regs->nip); 537 538 /* 539 * Setup regs->nip to rfi into fixup address. 540 */ 541 if (recover_addr) 542 regs->nip = recover_addr; 543 544 out: 545 return !!recover_addr; 546 } 547 548 static irqreturn_t opal_interrupt(int irq, void *data) 549 { 550 __be64 events; 551 552 opal_handle_interrupt(virq_to_hw(irq), &events); 553 554 opal_do_notifier(be64_to_cpu(events)); 555 556 return IRQ_HANDLED; 557 } 558 559 static int opal_sysfs_init(void) 560 { 561 opal_kobj = kobject_create_and_add("opal", firmware_kobj); 562 if (!opal_kobj) { 563 pr_warn("kobject_create_and_add opal failed\n"); 564 return -ENOMEM; 565 } 566 567 return 0; 568 } 569 570 static int __init opal_init(void) 571 { 572 struct device_node *np, *consoles; 573 const __be32 *irqs; 574 int rc, i, irqlen; 575 576 opal_node = of_find_node_by_path("/ibm,opal"); 577 if (!opal_node) { 578 pr_warn("opal: Node not found\n"); 579 return -ENODEV; 580 } 581 582 /* Register OPAL consoles if any ports */ 583 if (firmware_has_feature(FW_FEATURE_OPALv2)) 584 consoles = of_find_node_by_path("/ibm,opal/consoles"); 585 else 586 consoles = of_node_get(opal_node); 587 if (consoles) { 588 for_each_child_of_node(consoles, np) { 589 if (strcmp(np->name, "serial")) 590 continue; 591 of_platform_device_create(np, NULL, NULL); 592 } 593 of_node_put(consoles); 594 } 595 596 /* Find all OPAL interrupts and request them */ 597 irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); 598 pr_debug("opal: Found %d interrupts reserved for OPAL\n", 599 irqs ? (irqlen / 4) : 0); 600 opal_irq_count = irqlen / 4; 601 opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); 602 for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) { 603 unsigned int hwirq = be32_to_cpup(irqs); 604 unsigned int irq = irq_create_mapping(NULL, hwirq); 605 if (irq == NO_IRQ) { 606 pr_warning("opal: Failed to map irq 0x%x\n", hwirq); 607 continue; 608 } 609 rc = request_irq(irq, opal_interrupt, 0, "opal", NULL); 610 if (rc) 611 pr_warning("opal: Error %d requesting irq %d" 612 " (0x%x)\n", rc, irq, hwirq); 613 opal_irqs[i] = irq; 614 } 615 616 /* Create "opal" kobject under /sys/firmware */ 617 rc = opal_sysfs_init(); 618 if (rc == 0) { 619 /* Setup error log interface */ 620 rc = opal_elog_init(); 621 /* Setup code update interface */ 622 opal_flash_init(); 623 /* Setup platform dump extract interface */ 624 opal_platform_dump_init(); 625 /* Setup system parameters interface */ 626 opal_sys_param_init(); 627 /* Setup message log interface. */ 628 opal_msglog_init(); 629 } 630 631 return 0; 632 } 633 subsys_initcall(opal_init); 634 635 void opal_shutdown(void) 636 { 637 unsigned int i; 638 long rc = OPAL_BUSY; 639 640 /* First free interrupts, which will also mask them */ 641 for (i = 0; i < opal_irq_count; i++) { 642 if (opal_irqs[i]) 643 free_irq(opal_irqs[i], NULL); 644 opal_irqs[i] = 0; 645 } 646 647 /* 648 * Then sync with OPAL which ensure anything that can 649 * potentially write to our memory has completed such 650 * as an ongoing dump retrieval 651 */ 652 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 653 rc = opal_sync_host_reboot(); 654 if (rc == OPAL_BUSY) 655 opal_poll_events(NULL); 656 else 657 mdelay(10); 658 } 659 } 660 661 /* Export this so that test modules can use it */ 662 EXPORT_SYMBOL_GPL(opal_invalid_call); 663 664 /* Convert a region of vmalloc memory to an opal sg list */ 665 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, 666 unsigned long vmalloc_size) 667 { 668 struct opal_sg_list *sg, *first = NULL; 669 unsigned long i = 0; 670 671 sg = kzalloc(PAGE_SIZE, GFP_KERNEL); 672 if (!sg) 673 goto nomem; 674 675 first = sg; 676 677 while (vmalloc_size > 0) { 678 uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT; 679 uint64_t length = min(vmalloc_size, PAGE_SIZE); 680 681 sg->entry[i].data = cpu_to_be64(data); 682 sg->entry[i].length = cpu_to_be64(length); 683 i++; 684 685 if (i >= SG_ENTRIES_PER_NODE) { 686 struct opal_sg_list *next; 687 688 next = kzalloc(PAGE_SIZE, GFP_KERNEL); 689 if (!next) 690 goto nomem; 691 692 sg->length = cpu_to_be64( 693 i * sizeof(struct opal_sg_entry) + 16); 694 i = 0; 695 sg->next = cpu_to_be64(__pa(next)); 696 sg = next; 697 } 698 699 vmalloc_addr += length; 700 vmalloc_size -= length; 701 } 702 703 sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16); 704 705 return first; 706 707 nomem: 708 pr_err("%s : Failed to allocate memory\n", __func__); 709 opal_free_sg_list(first); 710 return NULL; 711 } 712 713 void opal_free_sg_list(struct opal_sg_list *sg) 714 { 715 while (sg) { 716 uint64_t next = be64_to_cpu(sg->next); 717 718 kfree(sg); 719 720 if (next) 721 sg = __va(next); 722 else 723 sg = NULL; 724 } 725 } 726