1 /* 2 * PowerNV OPAL high level interfaces 3 * 4 * Copyright 2011 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/types.h> 15 #include <linux/of.h> 16 #include <linux/of_fdt.h> 17 #include <linux/of_platform.h> 18 #include <linux/interrupt.h> 19 #include <linux/notifier.h> 20 #include <linux/slab.h> 21 #include <linux/sched.h> 22 #include <linux/kobject.h> 23 #include <linux/delay.h> 24 #include <linux/memblock.h> 25 #include <asm/opal.h> 26 #include <asm/firmware.h> 27 #include <asm/mce.h> 28 29 #include "powernv.h" 30 31 /* /sys/firmware/opal */ 32 struct kobject *opal_kobj; 33 34 struct opal { 35 u64 base; 36 u64 entry; 37 u64 size; 38 } opal; 39 40 struct mcheck_recoverable_range { 41 u64 start_addr; 42 u64 end_addr; 43 u64 recover_addr; 44 }; 45 46 static struct mcheck_recoverable_range *mc_recoverable_range; 47 static int mc_recoverable_range_len; 48 49 struct device_node *opal_node; 50 static DEFINE_SPINLOCK(opal_write_lock); 51 extern u64 opal_mc_secondary_handler[]; 52 static unsigned int *opal_irqs; 53 static unsigned int opal_irq_count; 54 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); 55 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; 56 static DEFINE_SPINLOCK(opal_notifier_lock); 57 static uint64_t last_notified_mask = 0x0ul; 58 static atomic_t opal_notifier_hold = ATOMIC_INIT(0); 59 60 int __init early_init_dt_scan_opal(unsigned long node, 61 const char *uname, int depth, void *data) 62 { 63 const void *basep, *entryp, *sizep; 64 unsigned long basesz, entrysz, runtimesz; 65 66 if (depth != 1 || strcmp(uname, "ibm,opal") != 0) 67 return 0; 68 69 basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz); 70 entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz); 71 sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz); 72 73 if (!basep || !entryp || !sizep) 74 return 1; 75 76 opal.base = of_read_number(basep, basesz/4); 77 opal.entry = of_read_number(entryp, entrysz/4); 78 opal.size = of_read_number(sizep, runtimesz/4); 79 80 pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%ld)\n", 81 opal.base, basep, basesz); 82 pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%ld)\n", 83 opal.entry, entryp, entrysz); 84 pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%ld)\n", 85 opal.size, sizep, runtimesz); 86 87 powerpc_firmware_features |= FW_FEATURE_OPAL; 88 if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { 89 powerpc_firmware_features |= FW_FEATURE_OPALv2; 90 powerpc_firmware_features |= FW_FEATURE_OPALv3; 91 printk("OPAL V3 detected !\n"); 92 } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { 93 powerpc_firmware_features |= FW_FEATURE_OPALv2; 94 printk("OPAL V2 detected !\n"); 95 } else { 96 printk("OPAL V1 detected !\n"); 97 } 98 99 return 1; 100 } 101 102 int __init early_init_dt_scan_recoverable_ranges(unsigned long node, 103 const char *uname, int depth, void *data) 104 { 105 unsigned long i, psize, size; 106 const __be32 *prop; 107 108 if (depth != 1 || strcmp(uname, "ibm,opal") != 0) 109 return 0; 110 111 prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize); 112 113 if (!prop) 114 return 1; 115 116 pr_debug("Found machine check recoverable ranges.\n"); 117 118 /* 119 * Calculate number of available entries. 120 * 121 * Each recoverable address range entry is (start address, len, 122 * recovery address), 2 cells each for start and recovery address, 123 * 1 cell for len, totalling 5 cells per entry. 124 */ 125 mc_recoverable_range_len = psize / (sizeof(*prop) * 5); 126 127 /* Sanity check */ 128 if (!mc_recoverable_range_len) 129 return 1; 130 131 /* Size required to hold all the entries. */ 132 size = mc_recoverable_range_len * 133 sizeof(struct mcheck_recoverable_range); 134 135 /* 136 * Allocate a buffer to hold the MC recoverable ranges. We would be 137 * accessing them in real mode, hence it needs to be within 138 * RMO region. 139 */ 140 mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64), 141 ppc64_rma_size)); 142 memset(mc_recoverable_range, 0, size); 143 144 for (i = 0; i < mc_recoverable_range_len; i++) { 145 mc_recoverable_range[i].start_addr = 146 of_read_number(prop + (i * 5) + 0, 2); 147 mc_recoverable_range[i].end_addr = 148 mc_recoverable_range[i].start_addr + 149 of_read_number(prop + (i * 5) + 2, 1); 150 mc_recoverable_range[i].recover_addr = 151 of_read_number(prop + (i * 5) + 3, 2); 152 153 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n", 154 mc_recoverable_range[i].start_addr, 155 mc_recoverable_range[i].end_addr, 156 mc_recoverable_range[i].recover_addr); 157 } 158 return 1; 159 } 160 161 static int __init opal_register_exception_handlers(void) 162 { 163 #ifdef __BIG_ENDIAN__ 164 u64 glue; 165 166 if (!(powerpc_firmware_features & FW_FEATURE_OPAL)) 167 return -ENODEV; 168 169 /* Hookup some exception handlers except machine check. We use the 170 * fwnmi area at 0x7000 to provide the glue space to OPAL 171 */ 172 glue = 0x7000; 173 opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER, 174 0, glue); 175 glue += 128; 176 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); 177 #endif 178 179 return 0; 180 } 181 182 early_initcall(opal_register_exception_handlers); 183 184 int opal_notifier_register(struct notifier_block *nb) 185 { 186 if (!nb) { 187 pr_warning("%s: Invalid argument (%p)\n", 188 __func__, nb); 189 return -EINVAL; 190 } 191 192 atomic_notifier_chain_register(&opal_notifier_head, nb); 193 return 0; 194 } 195 EXPORT_SYMBOL_GPL(opal_notifier_register); 196 197 int opal_notifier_unregister(struct notifier_block *nb) 198 { 199 if (!nb) { 200 pr_warning("%s: Invalid argument (%p)\n", 201 __func__, nb); 202 return -EINVAL; 203 } 204 205 atomic_notifier_chain_unregister(&opal_notifier_head, nb); 206 return 0; 207 } 208 EXPORT_SYMBOL_GPL(opal_notifier_unregister); 209 210 static void opal_do_notifier(uint64_t events) 211 { 212 unsigned long flags; 213 uint64_t changed_mask; 214 215 if (atomic_read(&opal_notifier_hold)) 216 return; 217 218 spin_lock_irqsave(&opal_notifier_lock, flags); 219 changed_mask = last_notified_mask ^ events; 220 last_notified_mask = events; 221 spin_unlock_irqrestore(&opal_notifier_lock, flags); 222 223 /* 224 * We feed with the event bits and changed bits for 225 * enough information to the callback. 226 */ 227 atomic_notifier_call_chain(&opal_notifier_head, 228 events, (void *)changed_mask); 229 } 230 231 void opal_notifier_update_evt(uint64_t evt_mask, 232 uint64_t evt_val) 233 { 234 unsigned long flags; 235 236 spin_lock_irqsave(&opal_notifier_lock, flags); 237 last_notified_mask &= ~evt_mask; 238 last_notified_mask |= evt_val; 239 spin_unlock_irqrestore(&opal_notifier_lock, flags); 240 } 241 242 void opal_notifier_enable(void) 243 { 244 int64_t rc; 245 uint64_t evt = 0; 246 247 atomic_set(&opal_notifier_hold, 0); 248 249 /* Process pending events */ 250 rc = opal_poll_events(&evt); 251 if (rc == OPAL_SUCCESS && evt) 252 opal_do_notifier(evt); 253 } 254 255 void opal_notifier_disable(void) 256 { 257 atomic_set(&opal_notifier_hold, 1); 258 } 259 260 /* 261 * Opal message notifier based on message type. Allow subscribers to get 262 * notified for specific messgae type. 263 */ 264 int opal_message_notifier_register(enum OpalMessageType msg_type, 265 struct notifier_block *nb) 266 { 267 if (!nb) { 268 pr_warning("%s: Invalid argument (%p)\n", 269 __func__, nb); 270 return -EINVAL; 271 } 272 if (msg_type > OPAL_MSG_TYPE_MAX) { 273 pr_warning("%s: Invalid message type argument (%d)\n", 274 __func__, msg_type); 275 return -EINVAL; 276 } 277 return atomic_notifier_chain_register( 278 &opal_msg_notifier_head[msg_type], nb); 279 } 280 281 static void opal_message_do_notify(uint32_t msg_type, void *msg) 282 { 283 /* notify subscribers */ 284 atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type], 285 msg_type, msg); 286 } 287 288 static void opal_handle_message(void) 289 { 290 s64 ret; 291 /* 292 * TODO: pre-allocate a message buffer depending on opal-msg-size 293 * value in /proc/device-tree. 294 */ 295 static struct opal_msg msg; 296 u32 type; 297 298 ret = opal_get_msg(__pa(&msg), sizeof(msg)); 299 /* No opal message pending. */ 300 if (ret == OPAL_RESOURCE) 301 return; 302 303 /* check for errors. */ 304 if (ret) { 305 pr_warning("%s: Failed to retrive opal message, err=%lld\n", 306 __func__, ret); 307 return; 308 } 309 310 type = be32_to_cpu(msg.msg_type); 311 312 /* Sanity check */ 313 if (type > OPAL_MSG_TYPE_MAX) { 314 pr_warning("%s: Unknown message type: %u\n", __func__, type); 315 return; 316 } 317 opal_message_do_notify(type, (void *)&msg); 318 } 319 320 static int opal_message_notify(struct notifier_block *nb, 321 unsigned long events, void *change) 322 { 323 if (events & OPAL_EVENT_MSG_PENDING) 324 opal_handle_message(); 325 return 0; 326 } 327 328 static struct notifier_block opal_message_nb = { 329 .notifier_call = opal_message_notify, 330 .next = NULL, 331 .priority = 0, 332 }; 333 334 static int __init opal_message_init(void) 335 { 336 int ret, i; 337 338 for (i = 0; i < OPAL_MSG_TYPE_MAX; i++) 339 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]); 340 341 ret = opal_notifier_register(&opal_message_nb); 342 if (ret) { 343 pr_err("%s: Can't register OPAL event notifier (%d)\n", 344 __func__, ret); 345 return ret; 346 } 347 return 0; 348 } 349 early_initcall(opal_message_init); 350 351 int opal_get_chars(uint32_t vtermno, char *buf, int count) 352 { 353 s64 rc; 354 __be64 evt, len; 355 356 if (!opal.entry) 357 return -ENODEV; 358 opal_poll_events(&evt); 359 if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0) 360 return 0; 361 len = cpu_to_be64(count); 362 rc = opal_console_read(vtermno, &len, buf); 363 if (rc == OPAL_SUCCESS) 364 return be64_to_cpu(len); 365 return 0; 366 } 367 368 int opal_put_chars(uint32_t vtermno, const char *data, int total_len) 369 { 370 int written = 0; 371 __be64 olen; 372 s64 len, rc; 373 unsigned long flags; 374 __be64 evt; 375 376 if (!opal.entry) 377 return -ENODEV; 378 379 /* We want put_chars to be atomic to avoid mangling of hvsi 380 * packets. To do that, we first test for room and return 381 * -EAGAIN if there isn't enough. 382 * 383 * Unfortunately, opal_console_write_buffer_space() doesn't 384 * appear to work on opal v1, so we just assume there is 385 * enough room and be done with it 386 */ 387 spin_lock_irqsave(&opal_write_lock, flags); 388 if (firmware_has_feature(FW_FEATURE_OPALv2)) { 389 rc = opal_console_write_buffer_space(vtermno, &olen); 390 len = be64_to_cpu(olen); 391 if (rc || len < total_len) { 392 spin_unlock_irqrestore(&opal_write_lock, flags); 393 /* Closed -> drop characters */ 394 if (rc) 395 return total_len; 396 opal_poll_events(NULL); 397 return -EAGAIN; 398 } 399 } 400 401 /* We still try to handle partial completions, though they 402 * should no longer happen. 403 */ 404 rc = OPAL_BUSY; 405 while(total_len > 0 && (rc == OPAL_BUSY || 406 rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) { 407 olen = cpu_to_be64(total_len); 408 rc = opal_console_write(vtermno, &olen, data); 409 len = be64_to_cpu(olen); 410 411 /* Closed or other error drop */ 412 if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && 413 rc != OPAL_BUSY_EVENT) { 414 written = total_len; 415 break; 416 } 417 if (rc == OPAL_SUCCESS) { 418 total_len -= len; 419 data += len; 420 written += len; 421 } 422 /* This is a bit nasty but we need that for the console to 423 * flush when there aren't any interrupts. We will clean 424 * things a bit later to limit that to synchronous path 425 * such as the kernel console and xmon/udbg 426 */ 427 do 428 opal_poll_events(&evt); 429 while(rc == OPAL_SUCCESS && 430 (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT)); 431 } 432 spin_unlock_irqrestore(&opal_write_lock, flags); 433 return written; 434 } 435 436 static int opal_recover_mce(struct pt_regs *regs, 437 struct machine_check_event *evt) 438 { 439 int recovered = 0; 440 uint64_t ea = get_mce_fault_addr(evt); 441 442 if (!(regs->msr & MSR_RI)) { 443 /* If MSR_RI isn't set, we cannot recover */ 444 recovered = 0; 445 } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { 446 /* Platform corrected itself */ 447 recovered = 1; 448 } else if (ea && !is_kernel_addr(ea)) { 449 /* 450 * Faulting address is not in kernel text. We should be fine. 451 * We need to find which process uses this address. 452 * For now, kill the task if we have received exception when 453 * in userspace. 454 * 455 * TODO: Queue up this address for hwpoisioning later. 456 */ 457 if (user_mode(regs) && !is_global_init(current)) { 458 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 459 recovered = 1; 460 } else 461 recovered = 0; 462 } else if (user_mode(regs) && !is_global_init(current) && 463 evt->severity == MCE_SEV_ERROR_SYNC) { 464 /* 465 * If we have received a synchronous error when in userspace 466 * kill the task. 467 */ 468 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 469 recovered = 1; 470 } 471 return recovered; 472 } 473 474 int opal_machine_check(struct pt_regs *regs) 475 { 476 struct machine_check_event evt; 477 478 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 479 return 0; 480 481 /* Print things out */ 482 if (evt.version != MCE_V1) { 483 pr_err("Machine Check Exception, Unknown event version %d !\n", 484 evt.version); 485 return 0; 486 } 487 machine_check_print_event_info(&evt); 488 489 if (opal_recover_mce(regs, &evt)) 490 return 1; 491 return 0; 492 } 493 494 static uint64_t find_recovery_address(uint64_t nip) 495 { 496 int i; 497 498 for (i = 0; i < mc_recoverable_range_len; i++) 499 if ((nip >= mc_recoverable_range[i].start_addr) && 500 (nip < mc_recoverable_range[i].end_addr)) 501 return mc_recoverable_range[i].recover_addr; 502 return 0; 503 } 504 505 bool opal_mce_check_early_recovery(struct pt_regs *regs) 506 { 507 uint64_t recover_addr = 0; 508 509 if (!opal.base || !opal.size) 510 goto out; 511 512 if ((regs->nip >= opal.base) && 513 (regs->nip <= (opal.base + opal.size))) 514 recover_addr = find_recovery_address(regs->nip); 515 516 /* 517 * Setup regs->nip to rfi into fixup address. 518 */ 519 if (recover_addr) 520 regs->nip = recover_addr; 521 522 out: 523 return !!recover_addr; 524 } 525 526 static irqreturn_t opal_interrupt(int irq, void *data) 527 { 528 __be64 events; 529 530 opal_handle_interrupt(virq_to_hw(irq), &events); 531 532 opal_do_notifier(events); 533 534 return IRQ_HANDLED; 535 } 536 537 static int opal_sysfs_init(void) 538 { 539 opal_kobj = kobject_create_and_add("opal", firmware_kobj); 540 if (!opal_kobj) { 541 pr_warn("kobject_create_and_add opal failed\n"); 542 return -ENOMEM; 543 } 544 545 return 0; 546 } 547 548 static int __init opal_init(void) 549 { 550 struct device_node *np, *consoles; 551 const __be32 *irqs; 552 int rc, i, irqlen; 553 554 opal_node = of_find_node_by_path("/ibm,opal"); 555 if (!opal_node) { 556 pr_warn("opal: Node not found\n"); 557 return -ENODEV; 558 } 559 560 /* Register OPAL consoles if any ports */ 561 if (firmware_has_feature(FW_FEATURE_OPALv2)) 562 consoles = of_find_node_by_path("/ibm,opal/consoles"); 563 else 564 consoles = of_node_get(opal_node); 565 if (consoles) { 566 for_each_child_of_node(consoles, np) { 567 if (strcmp(np->name, "serial")) 568 continue; 569 of_platform_device_create(np, NULL, NULL); 570 } 571 of_node_put(consoles); 572 } 573 574 /* Find all OPAL interrupts and request them */ 575 irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); 576 pr_debug("opal: Found %d interrupts reserved for OPAL\n", 577 irqs ? (irqlen / 4) : 0); 578 opal_irq_count = irqlen / 4; 579 opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); 580 for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) { 581 unsigned int hwirq = be32_to_cpup(irqs); 582 unsigned int irq = irq_create_mapping(NULL, hwirq); 583 if (irq == NO_IRQ) { 584 pr_warning("opal: Failed to map irq 0x%x\n", hwirq); 585 continue; 586 } 587 rc = request_irq(irq, opal_interrupt, 0, "opal", NULL); 588 if (rc) 589 pr_warning("opal: Error %d requesting irq %d" 590 " (0x%x)\n", rc, irq, hwirq); 591 opal_irqs[i] = irq; 592 } 593 594 /* Create "opal" kobject under /sys/firmware */ 595 rc = opal_sysfs_init(); 596 if (rc == 0) { 597 /* Setup error log interface */ 598 rc = opal_elog_init(); 599 /* Setup code update interface */ 600 opal_flash_init(); 601 /* Setup platform dump extract interface */ 602 opal_platform_dump_init(); 603 /* Setup system parameters interface */ 604 opal_sys_param_init(); 605 /* Setup message log interface. */ 606 opal_msglog_init(); 607 } 608 609 return 0; 610 } 611 subsys_initcall(opal_init); 612 613 void opal_shutdown(void) 614 { 615 unsigned int i; 616 long rc = OPAL_BUSY; 617 618 /* First free interrupts, which will also mask them */ 619 for (i = 0; i < opal_irq_count; i++) { 620 if (opal_irqs[i]) 621 free_irq(opal_irqs[i], NULL); 622 opal_irqs[i] = 0; 623 } 624 625 /* 626 * Then sync with OPAL which ensure anything that can 627 * potentially write to our memory has completed such 628 * as an ongoing dump retrieval 629 */ 630 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 631 rc = opal_sync_host_reboot(); 632 if (rc == OPAL_BUSY) 633 opal_poll_events(NULL); 634 else 635 mdelay(10); 636 } 637 } 638 639 /* Export this so that test modules can use it */ 640 EXPORT_SYMBOL_GPL(opal_invalid_call); 641