1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2001 Dave Engebretsen IBM Corporation 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/interrupt.h> 8 #include <linux/irq.h> 9 #include <linux/of.h> 10 #include <linux/fs.h> 11 #include <linux/reboot.h> 12 #include <linux/irq_work.h> 13 14 #include <asm/machdep.h> 15 #include <asm/rtas.h> 16 #include <asm/firmware.h> 17 #include <asm/mce.h> 18 19 #include "pseries.h" 20 21 static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; 22 static DEFINE_SPINLOCK(ras_log_buf_lock); 23 24 static int ras_check_exception_token; 25 26 static void mce_process_errlog_event(struct irq_work *work); 27 static struct irq_work mce_errlog_process_work = { 28 .func = mce_process_errlog_event, 29 }; 30 31 #define EPOW_SENSOR_TOKEN 9 32 #define EPOW_SENSOR_INDEX 0 33 34 /* EPOW events counter variable */ 35 static int num_epow_events; 36 37 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); 38 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); 39 static irqreturn_t ras_error_interrupt(int irq, void *dev_id); 40 41 /* RTAS pseries MCE errorlog section. */ 42 struct pseries_mc_errorlog { 43 __be32 fru_id; 44 __be32 proc_id; 45 u8 error_type; 46 /* 47 * sub_err_type (1 byte). Bit fields depends on error_type 48 * 49 * MSB0 50 * | 51 * V 52 * 01234567 53 * XXXXXXXX 54 * 55 * For error_type == MC_ERROR_TYPE_UE 56 * XXXXXXXX 57 * X 1: Permanent or Transient UE. 58 * X 1: Effective address provided. 59 * X 1: Logical address provided. 60 * XX 2: Reserved. 61 * XXX 3: Type of UE error. 62 * 63 * For error_type != MC_ERROR_TYPE_UE 64 * XXXXXXXX 65 * X 1: Effective address provided. 66 * XXXXX 5: Reserved. 67 * XX 2: Type of SLB/ERAT/TLB error. 68 */ 69 u8 sub_err_type; 70 u8 reserved_1[6]; 71 __be64 effective_address; 72 __be64 logical_address; 73 } __packed; 74 75 /* RTAS pseries MCE error types */ 76 #define MC_ERROR_TYPE_UE 0x00 77 #define MC_ERROR_TYPE_SLB 0x01 78 #define MC_ERROR_TYPE_ERAT 0x02 79 #define MC_ERROR_TYPE_TLB 0x04 80 #define MC_ERROR_TYPE_D_CACHE 0x05 81 #define MC_ERROR_TYPE_I_CACHE 0x07 82 83 /* RTAS pseries MCE error sub types */ 84 #define MC_ERROR_UE_INDETERMINATE 0 85 #define MC_ERROR_UE_IFETCH 1 86 #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 87 #define MC_ERROR_UE_LOAD_STORE 3 88 #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 89 90 #define MC_ERROR_SLB_PARITY 0 91 #define MC_ERROR_SLB_MULTIHIT 1 92 #define MC_ERROR_SLB_INDETERMINATE 2 93 94 #define MC_ERROR_ERAT_PARITY 1 95 #define MC_ERROR_ERAT_MULTIHIT 2 96 #define MC_ERROR_ERAT_INDETERMINATE 3 97 98 #define MC_ERROR_TLB_PARITY 1 99 #define MC_ERROR_TLB_MULTIHIT 2 100 #define MC_ERROR_TLB_INDETERMINATE 3 101 102 static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) 103 { 104 switch (mlog->error_type) { 105 case MC_ERROR_TYPE_UE: 106 return (mlog->sub_err_type & 0x07); 107 case MC_ERROR_TYPE_SLB: 108 case MC_ERROR_TYPE_ERAT: 109 case MC_ERROR_TYPE_TLB: 110 return (mlog->sub_err_type & 0x03); 111 default: 112 return 0; 113 } 114 } 115 116 static 117 inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog) 118 { 119 __be64 addr = 0; 120 121 switch (mlog->error_type) { 122 case MC_ERROR_TYPE_UE: 123 if (mlog->sub_err_type & 0x40) 124 addr = mlog->effective_address; 125 break; 126 case MC_ERROR_TYPE_SLB: 127 case MC_ERROR_TYPE_ERAT: 128 case MC_ERROR_TYPE_TLB: 129 if (mlog->sub_err_type & 0x80) 130 addr = mlog->effective_address; 131 default: 132 break; 133 } 134 return be64_to_cpu(addr); 135 } 136 137 /* 138 * Enable the hotplug interrupt late because processing them may touch other 139 * devices or systems (e.g. hugepages) that have not been initialized at the 140 * subsys stage. 141 */ 142 int __init init_ras_hotplug_IRQ(void) 143 { 144 struct device_node *np; 145 146 /* Hotplug Events */ 147 np = of_find_node_by_path("/event-sources/hot-plug-events"); 148 if (np != NULL) { 149 if (dlpar_workqueue_init() == 0) 150 request_event_sources_irqs(np, ras_hotplug_interrupt, 151 "RAS_HOTPLUG"); 152 of_node_put(np); 153 } 154 155 return 0; 156 } 157 machine_late_initcall(pseries, init_ras_hotplug_IRQ); 158 159 /* 160 * Initialize handlers for the set of interrupts caused by hardware errors 161 * and power system events. 162 */ 163 static int __init init_ras_IRQ(void) 164 { 165 struct device_node *np; 166 167 ras_check_exception_token = rtas_token("check-exception"); 168 169 /* Internal Errors */ 170 np = of_find_node_by_path("/event-sources/internal-errors"); 171 if (np != NULL) { 172 request_event_sources_irqs(np, ras_error_interrupt, 173 "RAS_ERROR"); 174 of_node_put(np); 175 } 176 177 /* EPOW Events */ 178 np = of_find_node_by_path("/event-sources/epow-events"); 179 if (np != NULL) { 180 request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW"); 181 of_node_put(np); 182 } 183 184 return 0; 185 } 186 machine_subsys_initcall(pseries, init_ras_IRQ); 187 188 #define EPOW_SHUTDOWN_NORMAL 1 189 #define EPOW_SHUTDOWN_ON_UPS 2 190 #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3 191 #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4 192 193 static void handle_system_shutdown(char event_modifier) 194 { 195 switch (event_modifier) { 196 case EPOW_SHUTDOWN_NORMAL: 197 pr_emerg("Power off requested\n"); 198 orderly_poweroff(true); 199 break; 200 201 case EPOW_SHUTDOWN_ON_UPS: 202 pr_emerg("Loss of system power detected. System is running on" 203 " UPS/battery. Check RTAS error log for details\n"); 204 orderly_poweroff(true); 205 break; 206 207 case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: 208 pr_emerg("Loss of system critical functions detected. Check" 209 " RTAS error log for details\n"); 210 orderly_poweroff(true); 211 break; 212 213 case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: 214 pr_emerg("High ambient temperature detected. Check RTAS" 215 " error log for details\n"); 216 orderly_poweroff(true); 217 break; 218 219 default: 220 pr_err("Unknown power/cooling shutdown event (modifier = %d)\n", 221 event_modifier); 222 } 223 } 224 225 struct epow_errorlog { 226 unsigned char sensor_value; 227 unsigned char event_modifier; 228 unsigned char extended_modifier; 229 unsigned char reserved; 230 unsigned char platform_reason; 231 }; 232 233 #define EPOW_RESET 0 234 #define EPOW_WARN_COOLING 1 235 #define EPOW_WARN_POWER 2 236 #define EPOW_SYSTEM_SHUTDOWN 3 237 #define EPOW_SYSTEM_HALT 4 238 #define EPOW_MAIN_ENCLOSURE 5 239 #define EPOW_POWER_OFF 7 240 241 static void rtas_parse_epow_errlog(struct rtas_error_log *log) 242 { 243 struct pseries_errorlog *pseries_log; 244 struct epow_errorlog *epow_log; 245 char action_code; 246 char modifier; 247 248 pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); 249 if (pseries_log == NULL) 250 return; 251 252 epow_log = (struct epow_errorlog *)pseries_log->data; 253 action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ 254 modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ 255 256 switch (action_code) { 257 case EPOW_RESET: 258 if (num_epow_events) { 259 pr_info("Non critical power/cooling issue cleared\n"); 260 num_epow_events--; 261 } 262 break; 263 264 case EPOW_WARN_COOLING: 265 pr_info("Non-critical cooling issue detected. Check RTAS error" 266 " log for details\n"); 267 break; 268 269 case EPOW_WARN_POWER: 270 pr_info("Non-critical power issue detected. Check RTAS error" 271 " log for details\n"); 272 break; 273 274 case EPOW_SYSTEM_SHUTDOWN: 275 handle_system_shutdown(epow_log->event_modifier); 276 break; 277 278 case EPOW_SYSTEM_HALT: 279 pr_emerg("Critical power/cooling issue detected. Check RTAS" 280 " error log for details. Powering off.\n"); 281 orderly_poweroff(true); 282 break; 283 284 case EPOW_MAIN_ENCLOSURE: 285 case EPOW_POWER_OFF: 286 pr_emerg("System about to lose power. Check RTAS error log " 287 " for details. Powering off immediately.\n"); 288 emergency_sync(); 289 kernel_power_off(); 290 break; 291 292 default: 293 pr_err("Unknown power/cooling event (action code = %d)\n", 294 action_code); 295 } 296 297 /* Increment epow events counter variable */ 298 if (action_code != EPOW_RESET) 299 num_epow_events++; 300 } 301 302 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) 303 { 304 struct pseries_errorlog *pseries_log; 305 struct pseries_hp_errorlog *hp_elog; 306 307 spin_lock(&ras_log_buf_lock); 308 309 rtas_call(ras_check_exception_token, 6, 1, NULL, 310 RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), 311 RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), 312 rtas_get_error_log_max()); 313 314 pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, 315 PSERIES_ELOG_SECT_ID_HOTPLUG); 316 hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; 317 318 /* 319 * Since PCI hotplug is not currently supported on pseries, put PCI 320 * hotplug events on the ras_log_buf to be handled by rtas_errd. 321 */ 322 if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || 323 hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || 324 hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) 325 queue_hotplug_event(hp_elog); 326 else 327 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 328 329 spin_unlock(&ras_log_buf_lock); 330 return IRQ_HANDLED; 331 } 332 333 /* Handle environmental and power warning (EPOW) interrupts. */ 334 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) 335 { 336 int status; 337 int state; 338 int critical; 339 340 status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, 341 &state); 342 343 if (state > 3) 344 critical = 1; /* Time Critical */ 345 else 346 critical = 0; 347 348 spin_lock(&ras_log_buf_lock); 349 350 status = rtas_call(ras_check_exception_token, 6, 1, NULL, 351 RTAS_VECTOR_EXTERNAL_INTERRUPT, 352 virq_to_hw(irq), 353 RTAS_EPOW_WARNING, 354 critical, __pa(&ras_log_buf), 355 rtas_get_error_log_max()); 356 357 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 358 359 rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf); 360 361 spin_unlock(&ras_log_buf_lock); 362 return IRQ_HANDLED; 363 } 364 365 /* 366 * Handle hardware error interrupts. 367 * 368 * RTAS check-exception is called to collect data on the exception. If 369 * the error is deemed recoverable, we log a warning and return. 370 * For nonrecoverable errors, an error is logged and we stop all processing 371 * as quickly as possible in order to prevent propagation of the failure. 372 */ 373 static irqreturn_t ras_error_interrupt(int irq, void *dev_id) 374 { 375 struct rtas_error_log *rtas_elog; 376 int status; 377 int fatal; 378 379 spin_lock(&ras_log_buf_lock); 380 381 status = rtas_call(ras_check_exception_token, 6, 1, NULL, 382 RTAS_VECTOR_EXTERNAL_INTERRUPT, 383 virq_to_hw(irq), 384 RTAS_INTERNAL_ERROR, 1 /* Time Critical */, 385 __pa(&ras_log_buf), 386 rtas_get_error_log_max()); 387 388 rtas_elog = (struct rtas_error_log *)ras_log_buf; 389 390 if (status == 0 && 391 rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC) 392 fatal = 1; 393 else 394 fatal = 0; 395 396 /* format and print the extended information */ 397 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); 398 399 if (fatal) { 400 pr_emerg("Fatal hardware error detected. Check RTAS error" 401 " log for details. Powering off immediately\n"); 402 emergency_sync(); 403 kernel_power_off(); 404 } else { 405 pr_err("Recoverable hardware error detected\n"); 406 } 407 408 spin_unlock(&ras_log_buf_lock); 409 return IRQ_HANDLED; 410 } 411 412 /* 413 * Some versions of FWNMI place the buffer inside the 4kB page starting at 414 * 0x7000. Other versions place it inside the rtas buffer. We check both. 415 */ 416 #define VALID_FWNMI_BUFFER(A) \ 417 ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ 418 (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) 419 420 static inline struct rtas_error_log *fwnmi_get_errlog(void) 421 { 422 return (struct rtas_error_log *)local_paca->mce_data_buf; 423 } 424 425 /* 426 * Get the error information for errors coming through the 427 * FWNMI vectors. The pt_regs' r3 will be updated to reflect 428 * the actual r3 if possible, and a ptr to the error log entry 429 * will be returned if found. 430 * 431 * Use one buffer mce_data_buf per cpu to store RTAS error. 432 * 433 * The mce_data_buf does not have any locks or protection around it, 434 * if a second machine check comes in, or a system reset is done 435 * before we have logged the error, then we will get corruption in the 436 * error log. This is preferable over holding off on calling 437 * ibm,nmi-interlock which would result in us checkstopping if a 438 * second machine check did come in. 439 */ 440 static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) 441 { 442 unsigned long *savep; 443 struct rtas_error_log *h; 444 445 /* Mask top two bits */ 446 regs->gpr[3] &= ~(0x3UL << 62); 447 448 if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { 449 printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); 450 return NULL; 451 } 452 453 savep = __va(regs->gpr[3]); 454 regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ 455 456 h = (struct rtas_error_log *)&savep[1]; 457 /* Use the per cpu buffer from paca to store rtas error log */ 458 memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); 459 if (!rtas_error_extended(h)) { 460 memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); 461 } else { 462 int len, error_log_length; 463 464 error_log_length = 8 + rtas_error_extended_log_length(h); 465 len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); 466 memcpy(local_paca->mce_data_buf, h, len); 467 } 468 469 return (struct rtas_error_log *)local_paca->mce_data_buf; 470 } 471 472 /* Call this when done with the data returned by FWNMI_get_errinfo. 473 * It will release the saved data area for other CPUs in the 474 * partition to receive FWNMI errors. 475 */ 476 static void fwnmi_release_errinfo(void) 477 { 478 int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); 479 if (ret != 0) 480 printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); 481 } 482 483 int pSeries_system_reset_exception(struct pt_regs *regs) 484 { 485 #ifdef __LITTLE_ENDIAN__ 486 /* 487 * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try 488 * to detect the bad SRR1 pattern here. Flip the NIP back to correct 489 * endian for reporting purposes. Unfortunately the MSR can't be fixed, 490 * so clear it. It will be missing MSR_RI so we won't try to recover. 491 */ 492 if ((be64_to_cpu(regs->msr) & 493 (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| 494 MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { 495 regs->nip = be64_to_cpu((__be64)regs->nip); 496 regs->msr = 0; 497 } 498 #endif 499 500 if (fwnmi_active) { 501 struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); 502 if (errhdr) { 503 /* XXX Should look at FWNMI information */ 504 } 505 fwnmi_release_errinfo(); 506 } 507 508 if (smp_handle_nmi_ipi(regs)) 509 return 1; 510 511 return 0; /* need to perform reset */ 512 } 513 514 #define VAL_TO_STRING(ar, val) \ 515 (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown") 516 517 static void pseries_print_mce_info(struct pt_regs *regs, 518 struct rtas_error_log *errp) 519 { 520 const char *level, *sevstr; 521 struct pseries_errorlog *pseries_log; 522 struct pseries_mc_errorlog *mce_log; 523 u8 error_type, err_sub_type; 524 u64 addr; 525 u8 initiator = rtas_error_initiator(errp); 526 int disposition = rtas_error_disposition(errp); 527 528 static const char * const initiators[] = { 529 [0] = "Unknown", 530 [1] = "CPU", 531 [2] = "PCI", 532 [3] = "ISA", 533 [4] = "Memory", 534 [5] = "Power Mgmt", 535 }; 536 static const char * const mc_err_types[] = { 537 [0] = "UE", 538 [1] = "SLB", 539 [2] = "ERAT", 540 [3] = "Unknown", 541 [4] = "TLB", 542 [5] = "D-Cache", 543 [6] = "Unknown", 544 [7] = "I-Cache", 545 }; 546 static const char * const mc_ue_types[] = { 547 [0] = "Indeterminate", 548 [1] = "Instruction fetch", 549 [2] = "Page table walk ifetch", 550 [3] = "Load/Store", 551 [4] = "Page table walk Load/Store", 552 }; 553 554 /* SLB sub errors valid values are 0x0, 0x1, 0x2 */ 555 static const char * const mc_slb_types[] = { 556 [0] = "Parity", 557 [1] = "Multihit", 558 [2] = "Indeterminate", 559 }; 560 561 /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ 562 static const char * const mc_soft_types[] = { 563 [0] = "Unknown", 564 [1] = "Parity", 565 [2] = "Multihit", 566 [3] = "Indeterminate", 567 }; 568 569 if (!rtas_error_extended(errp)) { 570 pr_err("Machine check interrupt: Missing extended error log\n"); 571 return; 572 } 573 574 pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); 575 if (pseries_log == NULL) 576 return; 577 578 mce_log = (struct pseries_mc_errorlog *)pseries_log->data; 579 580 error_type = mce_log->error_type; 581 err_sub_type = rtas_mc_error_sub_type(mce_log); 582 583 switch (rtas_error_severity(errp)) { 584 case RTAS_SEVERITY_NO_ERROR: 585 level = KERN_INFO; 586 sevstr = "Harmless"; 587 break; 588 case RTAS_SEVERITY_WARNING: 589 level = KERN_WARNING; 590 sevstr = ""; 591 break; 592 case RTAS_SEVERITY_ERROR: 593 case RTAS_SEVERITY_ERROR_SYNC: 594 level = KERN_ERR; 595 sevstr = "Severe"; 596 break; 597 case RTAS_SEVERITY_FATAL: 598 default: 599 level = KERN_ERR; 600 sevstr = "Fatal"; 601 break; 602 } 603 604 #ifdef CONFIG_PPC_BOOK3S_64 605 /* Display faulty slb contents for SLB errors. */ 606 if (error_type == MC_ERROR_TYPE_SLB) 607 slb_dump_contents(local_paca->mce_faulty_slbs); 608 #endif 609 610 printk("%s%s Machine check interrupt [%s]\n", level, sevstr, 611 disposition == RTAS_DISP_FULLY_RECOVERED ? 612 "Recovered" : "Not recovered"); 613 if (user_mode(regs)) { 614 printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level, 615 regs->nip, current->pid, current->comm); 616 } else { 617 printk("%s NIP [%016lx]: %pS\n", level, regs->nip, 618 (void *)regs->nip); 619 } 620 printk("%s Initiator: %s\n", level, 621 VAL_TO_STRING(initiators, initiator)); 622 623 switch (error_type) { 624 case MC_ERROR_TYPE_UE: 625 printk("%s Error type: %s [%s]\n", level, 626 VAL_TO_STRING(mc_err_types, error_type), 627 VAL_TO_STRING(mc_ue_types, err_sub_type)); 628 break; 629 case MC_ERROR_TYPE_SLB: 630 printk("%s Error type: %s [%s]\n", level, 631 VAL_TO_STRING(mc_err_types, error_type), 632 VAL_TO_STRING(mc_slb_types, err_sub_type)); 633 break; 634 case MC_ERROR_TYPE_ERAT: 635 case MC_ERROR_TYPE_TLB: 636 printk("%s Error type: %s [%s]\n", level, 637 VAL_TO_STRING(mc_err_types, error_type), 638 VAL_TO_STRING(mc_soft_types, err_sub_type)); 639 break; 640 default: 641 printk("%s Error type: %s\n", level, 642 VAL_TO_STRING(mc_err_types, error_type)); 643 break; 644 } 645 646 addr = rtas_mc_get_effective_addr(mce_log); 647 if (addr) 648 printk("%s Effective address: %016llx\n", level, addr); 649 } 650 651 static int mce_handle_error(struct rtas_error_log *errp) 652 { 653 struct pseries_errorlog *pseries_log; 654 struct pseries_mc_errorlog *mce_log; 655 int disposition = rtas_error_disposition(errp); 656 u8 error_type; 657 658 if (!rtas_error_extended(errp)) 659 goto out; 660 661 pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); 662 if (pseries_log == NULL) 663 goto out; 664 665 mce_log = (struct pseries_mc_errorlog *)pseries_log->data; 666 error_type = mce_log->error_type; 667 668 #ifdef CONFIG_PPC_BOOK3S_64 669 if (disposition == RTAS_DISP_NOT_RECOVERED) { 670 switch (error_type) { 671 case MC_ERROR_TYPE_SLB: 672 case MC_ERROR_TYPE_ERAT: 673 /* 674 * Store the old slb content in paca before flushing. 675 * Print this when we go to virtual mode. 676 * There are chances that we may hit MCE again if there 677 * is a parity error on the SLB entry we trying to read 678 * for saving. Hence limit the slb saving to single 679 * level of recursion. 680 */ 681 if (local_paca->in_mce == 1) 682 slb_save_contents(local_paca->mce_faulty_slbs); 683 flush_and_reload_slb(); 684 disposition = RTAS_DISP_FULLY_RECOVERED; 685 rtas_set_disposition_recovered(errp); 686 break; 687 default: 688 break; 689 } 690 } 691 #endif 692 693 out: 694 return disposition; 695 } 696 697 #ifdef CONFIG_MEMORY_FAILURE 698 699 static DEFINE_PER_CPU(int, rtas_ue_count); 700 static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]); 701 702 #define UE_EFFECTIVE_ADDR_PROVIDED 0x40 703 #define UE_LOGICAL_ADDR_PROVIDED 0x20 704 705 706 static void pseries_hwpoison_work_fn(struct work_struct *work) 707 { 708 unsigned long paddr; 709 int index; 710 711 while (__this_cpu_read(rtas_ue_count) > 0) { 712 index = __this_cpu_read(rtas_ue_count) - 1; 713 paddr = __this_cpu_read(rtas_ue_paddr[index]); 714 memory_failure(paddr >> PAGE_SHIFT, 0); 715 __this_cpu_dec(rtas_ue_count); 716 } 717 } 718 719 static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn); 720 721 static void queue_ue_paddr(unsigned long paddr) 722 { 723 int index; 724 725 index = __this_cpu_inc_return(rtas_ue_count) - 1; 726 if (index >= MAX_MC_EVT) { 727 __this_cpu_dec(rtas_ue_count); 728 return; 729 } 730 this_cpu_write(rtas_ue_paddr[index], paddr); 731 schedule_work(&hwpoison_work); 732 } 733 734 static void pseries_do_memory_failure(struct pt_regs *regs, 735 struct pseries_mc_errorlog *mce_log) 736 { 737 unsigned long paddr; 738 739 if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) { 740 paddr = be64_to_cpu(mce_log->logical_address); 741 } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) { 742 unsigned long pfn; 743 744 pfn = addr_to_pfn(regs, 745 be64_to_cpu(mce_log->effective_address)); 746 if (pfn == ULONG_MAX) 747 return; 748 paddr = pfn << PAGE_SHIFT; 749 } else { 750 return; 751 } 752 queue_ue_paddr(paddr); 753 } 754 755 static void pseries_process_ue(struct pt_regs *regs, 756 struct rtas_error_log *errp) 757 { 758 struct pseries_errorlog *pseries_log; 759 struct pseries_mc_errorlog *mce_log; 760 761 if (!rtas_error_extended(errp)) 762 return; 763 764 pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); 765 if (!pseries_log) 766 return; 767 768 mce_log = (struct pseries_mc_errorlog *)pseries_log->data; 769 770 if (mce_log->error_type == MC_ERROR_TYPE_UE) 771 pseries_do_memory_failure(regs, mce_log); 772 } 773 #else 774 static inline void pseries_process_ue(struct pt_regs *regs, 775 struct rtas_error_log *errp) { } 776 #endif /*CONFIG_MEMORY_FAILURE */ 777 778 /* 779 * Process MCE rtas errlog event. 780 */ 781 static void mce_process_errlog_event(struct irq_work *work) 782 { 783 struct rtas_error_log *err; 784 785 err = fwnmi_get_errlog(); 786 log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); 787 } 788 789 /* 790 * See if we can recover from a machine check exception. 791 * This is only called on power4 (or above) and only via 792 * the Firmware Non-Maskable Interrupts (fwnmi) handler 793 * which provides the error analysis for us. 794 * 795 * Return 1 if corrected (or delivered a signal). 796 * Return 0 if there is nothing we can do. 797 */ 798 static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) 799 { 800 int recovered = 0; 801 int disposition = rtas_error_disposition(err); 802 803 pseries_print_mce_info(regs, err); 804 805 if (!(regs->msr & MSR_RI)) { 806 /* If MSR_RI isn't set, we cannot recover */ 807 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); 808 recovered = 0; 809 810 } else if (disposition == RTAS_DISP_FULLY_RECOVERED) { 811 /* Platform corrected itself */ 812 recovered = 1; 813 814 } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { 815 /* Platform corrected itself but could be degraded */ 816 printk(KERN_ERR "MCE: limited recovery, system may " 817 "be degraded\n"); 818 recovered = 1; 819 820 } else if (user_mode(regs) && !is_global_init(current) && 821 rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) { 822 823 /* 824 * If we received a synchronous error when in userspace 825 * kill the task. Firmware may report details of the fail 826 * asynchronously, so we can't rely on the target and type 827 * fields being valid here. 828 */ 829 printk(KERN_ERR "MCE: uncorrectable error, killing task " 830 "%s:%d\n", current->comm, current->pid); 831 832 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 833 recovered = 1; 834 } 835 836 pseries_process_ue(regs, err); 837 838 /* Queue irq work to log this rtas event later. */ 839 irq_work_queue(&mce_errlog_process_work); 840 841 return recovered; 842 } 843 844 /* 845 * Handle a machine check. 846 * 847 * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) 848 * should be present. If so the handler which called us tells us if the 849 * error was recovered (never true if RI=0). 850 * 851 * On hardware prior to Power 4 these exceptions were asynchronous which 852 * means we can't tell exactly where it occurred and so we can't recover. 853 */ 854 int pSeries_machine_check_exception(struct pt_regs *regs) 855 { 856 struct rtas_error_log *errp; 857 858 if (fwnmi_active) { 859 fwnmi_release_errinfo(); 860 errp = fwnmi_get_errlog(); 861 if (errp && recover_mce(regs, errp)) 862 return 1; 863 } 864 865 return 0; 866 } 867 868 long pseries_machine_check_realmode(struct pt_regs *regs) 869 { 870 struct rtas_error_log *errp; 871 int disposition; 872 873 if (fwnmi_active) { 874 errp = fwnmi_get_errinfo(regs); 875 /* 876 * Call to fwnmi_release_errinfo() in real mode causes kernel 877 * to panic. Hence we will call it as soon as we go into 878 * virtual mode. 879 */ 880 disposition = mce_handle_error(errp); 881 if (disposition == RTAS_DISP_FULLY_RECOVERED) 882 return 1; 883 } 884 885 return 0; 886 } 887