1 /* 2 * Machine check exception handling. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright 2013 IBM Corporation 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 20 */ 21 22 #undef DEBUG 23 #define pr_fmt(fmt) "mce: " fmt 24 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/ptrace.h> 28 #include <linux/percpu.h> 29 #include <linux/export.h> 30 #include <linux/irq_work.h> 31 32 #include <asm/machdep.h> 33 #include <asm/mce.h> 34 #include <asm/nmi.h> 35 36 static DEFINE_PER_CPU(int, mce_nest_count); 37 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); 38 39 /* Queue for delayed MCE events. */ 40 static DEFINE_PER_CPU(int, mce_queue_count); 41 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); 42 43 /* Queue for delayed MCE UE events. */ 44 static DEFINE_PER_CPU(int, mce_ue_count); 45 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], 46 mce_ue_event_queue); 47 48 static void machine_check_process_queued_event(struct irq_work *work); 49 void machine_check_ue_event(struct machine_check_event *evt); 50 static void machine_process_ue_event(struct work_struct *work); 51 52 static struct irq_work mce_event_process_work = { 53 .func = machine_check_process_queued_event, 54 }; 55 56 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); 57 58 static void mce_set_error_info(struct machine_check_event *mce, 59 struct mce_error_info *mce_err) 60 { 61 mce->error_type = mce_err->error_type; 62 switch (mce_err->error_type) { 63 case MCE_ERROR_TYPE_UE: 64 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; 65 break; 66 case MCE_ERROR_TYPE_SLB: 67 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; 68 break; 69 case MCE_ERROR_TYPE_ERAT: 70 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; 71 break; 72 case MCE_ERROR_TYPE_TLB: 73 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; 74 break; 75 case MCE_ERROR_TYPE_USER: 76 mce->u.user_error.user_error_type = mce_err->u.user_error_type; 77 break; 78 case MCE_ERROR_TYPE_RA: 79 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; 80 break; 81 case MCE_ERROR_TYPE_LINK: 82 mce->u.link_error.link_error_type = mce_err->u.link_error_type; 83 break; 84 case MCE_ERROR_TYPE_UNKNOWN: 85 default: 86 break; 87 } 88 } 89 90 /* 91 * Decode and save high level MCE information into per cpu buffer which 92 * is an array of machine_check_event structure. 93 */ 94 void save_mce_event(struct pt_regs *regs, long handled, 95 struct mce_error_info *mce_err, 96 uint64_t nip, uint64_t addr, uint64_t phys_addr) 97 { 98 int index = __this_cpu_inc_return(mce_nest_count) - 1; 99 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); 100 101 /* 102 * Return if we don't have enough space to log mce event. 103 * mce_nest_count may go beyond MAX_MC_EVT but that's ok, 104 * the check below will stop buffer overrun. 105 */ 106 if (index >= MAX_MC_EVT) 107 return; 108 109 /* Populate generic machine check info */ 110 mce->version = MCE_V1; 111 mce->srr0 = nip; 112 mce->srr1 = regs->msr; 113 mce->gpr3 = regs->gpr[3]; 114 mce->in_use = 1; 115 mce->cpu = get_paca()->paca_index; 116 117 /* Mark it recovered if we have handled it and MSR(RI=1). */ 118 if (handled && (regs->msr & MSR_RI)) 119 mce->disposition = MCE_DISPOSITION_RECOVERED; 120 else 121 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; 122 123 mce->initiator = mce_err->initiator; 124 mce->severity = mce_err->severity; 125 mce->sync_error = mce_err->sync_error; 126 mce->error_class = mce_err->error_class; 127 128 /* 129 * Populate the mce error_type and type-specific error_type. 130 */ 131 mce_set_error_info(mce, mce_err); 132 133 if (!addr) 134 return; 135 136 if (mce->error_type == MCE_ERROR_TYPE_TLB) { 137 mce->u.tlb_error.effective_address_provided = true; 138 mce->u.tlb_error.effective_address = addr; 139 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { 140 mce->u.slb_error.effective_address_provided = true; 141 mce->u.slb_error.effective_address = addr; 142 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { 143 mce->u.erat_error.effective_address_provided = true; 144 mce->u.erat_error.effective_address = addr; 145 } else if (mce->error_type == MCE_ERROR_TYPE_USER) { 146 mce->u.user_error.effective_address_provided = true; 147 mce->u.user_error.effective_address = addr; 148 } else if (mce->error_type == MCE_ERROR_TYPE_RA) { 149 mce->u.ra_error.effective_address_provided = true; 150 mce->u.ra_error.effective_address = addr; 151 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { 152 mce->u.link_error.effective_address_provided = true; 153 mce->u.link_error.effective_address = addr; 154 } else if (mce->error_type == MCE_ERROR_TYPE_UE) { 155 mce->u.ue_error.effective_address_provided = true; 156 mce->u.ue_error.effective_address = addr; 157 if (phys_addr != ULONG_MAX) { 158 mce->u.ue_error.physical_address_provided = true; 159 mce->u.ue_error.physical_address = phys_addr; 160 machine_check_ue_event(mce); 161 } 162 } 163 return; 164 } 165 166 /* 167 * get_mce_event: 168 * mce Pointer to machine_check_event structure to be filled. 169 * release Flag to indicate whether to free the event slot or not. 170 * 0 <= do not release the mce event. Caller will invoke 171 * release_mce_event() once event has been consumed. 172 * 1 <= release the slot. 173 * 174 * return 1 = success 175 * 0 = failure 176 * 177 * get_mce_event() will be called by platform specific machine check 178 * handle routine and in KVM. 179 * When we call get_mce_event(), we are still in interrupt context and 180 * preemption will not be scheduled until ret_from_expect() routine 181 * is called. 182 */ 183 int get_mce_event(struct machine_check_event *mce, bool release) 184 { 185 int index = __this_cpu_read(mce_nest_count) - 1; 186 struct machine_check_event *mc_evt; 187 int ret = 0; 188 189 /* Sanity check */ 190 if (index < 0) 191 return ret; 192 193 /* Check if we have MCE info to process. */ 194 if (index < MAX_MC_EVT) { 195 mc_evt = this_cpu_ptr(&mce_event[index]); 196 /* Copy the event structure and release the original */ 197 if (mce) 198 *mce = *mc_evt; 199 if (release) 200 mc_evt->in_use = 0; 201 ret = 1; 202 } 203 /* Decrement the count to free the slot. */ 204 if (release) 205 __this_cpu_dec(mce_nest_count); 206 207 return ret; 208 } 209 210 void release_mce_event(void) 211 { 212 get_mce_event(NULL, true); 213 } 214 215 216 /* 217 * Queue up the MCE event which then can be handled later. 218 */ 219 void machine_check_ue_event(struct machine_check_event *evt) 220 { 221 int index; 222 223 index = __this_cpu_inc_return(mce_ue_count) - 1; 224 /* If queue is full, just return for now. */ 225 if (index >= MAX_MC_EVT) { 226 __this_cpu_dec(mce_ue_count); 227 return; 228 } 229 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); 230 231 /* Queue work to process this event later. */ 232 schedule_work(&mce_ue_event_work); 233 } 234 235 /* 236 * Queue up the MCE event which then can be handled later. 237 */ 238 void machine_check_queue_event(void) 239 { 240 int index; 241 struct machine_check_event evt; 242 243 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 244 return; 245 246 index = __this_cpu_inc_return(mce_queue_count) - 1; 247 /* If queue is full, just return for now. */ 248 if (index >= MAX_MC_EVT) { 249 __this_cpu_dec(mce_queue_count); 250 return; 251 } 252 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); 253 254 /* Queue irq work to process this event later. */ 255 irq_work_queue(&mce_event_process_work); 256 } 257 /* 258 * process pending MCE event from the mce event queue. This function will be 259 * called during syscall exit. 260 */ 261 static void machine_process_ue_event(struct work_struct *work) 262 { 263 int index; 264 struct machine_check_event *evt; 265 266 while (__this_cpu_read(mce_ue_count) > 0) { 267 index = __this_cpu_read(mce_ue_count) - 1; 268 evt = this_cpu_ptr(&mce_ue_event_queue[index]); 269 #ifdef CONFIG_MEMORY_FAILURE 270 /* 271 * This should probably queued elsewhere, but 272 * oh! well 273 */ 274 if (evt->error_type == MCE_ERROR_TYPE_UE) { 275 if (evt->u.ue_error.physical_address_provided) { 276 unsigned long pfn; 277 278 pfn = evt->u.ue_error.physical_address >> 279 PAGE_SHIFT; 280 memory_failure(pfn, 0); 281 } else 282 pr_warn("Failed to identify bad address from " 283 "where the uncorrectable error (UE) " 284 "was generated\n"); 285 } 286 #endif 287 __this_cpu_dec(mce_ue_count); 288 } 289 } 290 /* 291 * process pending MCE event from the mce event queue. This function will be 292 * called during syscall exit. 293 */ 294 static void machine_check_process_queued_event(struct irq_work *work) 295 { 296 int index; 297 struct machine_check_event *evt; 298 299 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 300 301 /* 302 * For now just print it to console. 303 * TODO: log this error event to FSP or nvram. 304 */ 305 while (__this_cpu_read(mce_queue_count) > 0) { 306 index = __this_cpu_read(mce_queue_count) - 1; 307 evt = this_cpu_ptr(&mce_event_queue[index]); 308 machine_check_print_event_info(evt, false, false); 309 __this_cpu_dec(mce_queue_count); 310 } 311 } 312 313 void machine_check_print_event_info(struct machine_check_event *evt, 314 bool user_mode, bool in_guest) 315 { 316 const char *level, *sevstr, *subtype, *err_type; 317 uint64_t ea = 0, pa = 0; 318 int n = 0; 319 char dar_str[50]; 320 char pa_str[50]; 321 static const char *mc_ue_types[] = { 322 "Indeterminate", 323 "Instruction fetch", 324 "Page table walk ifetch", 325 "Load/Store", 326 "Page table walk Load/Store", 327 }; 328 static const char *mc_slb_types[] = { 329 "Indeterminate", 330 "Parity", 331 "Multihit", 332 }; 333 static const char *mc_erat_types[] = { 334 "Indeterminate", 335 "Parity", 336 "Multihit", 337 }; 338 static const char *mc_tlb_types[] = { 339 "Indeterminate", 340 "Parity", 341 "Multihit", 342 }; 343 static const char *mc_user_types[] = { 344 "Indeterminate", 345 "tlbie(l) invalid", 346 }; 347 static const char *mc_ra_types[] = { 348 "Indeterminate", 349 "Instruction fetch (bad)", 350 "Instruction fetch (foreign)", 351 "Page table walk ifetch (bad)", 352 "Page table walk ifetch (foreign)", 353 "Load (bad)", 354 "Store (bad)", 355 "Page table walk Load/Store (bad)", 356 "Page table walk Load/Store (foreign)", 357 "Load/Store (foreign)", 358 }; 359 static const char *mc_link_types[] = { 360 "Indeterminate", 361 "Instruction fetch (timeout)", 362 "Page table walk ifetch (timeout)", 363 "Load (timeout)", 364 "Store (timeout)", 365 "Page table walk Load/Store (timeout)", 366 }; 367 static const char *mc_error_class[] = { 368 "Unknown", 369 "Hardware error", 370 "Probable Hardware error (some chance of software cause)", 371 "Software error", 372 "Probable Software error (some chance of hardware cause)", 373 }; 374 375 /* Print things out */ 376 if (evt->version != MCE_V1) { 377 pr_err("Machine Check Exception, Unknown event version %d !\n", 378 evt->version); 379 return; 380 } 381 switch (evt->severity) { 382 case MCE_SEV_NO_ERROR: 383 level = KERN_INFO; 384 sevstr = "Harmless"; 385 break; 386 case MCE_SEV_WARNING: 387 level = KERN_WARNING; 388 sevstr = "Warning"; 389 break; 390 case MCE_SEV_SEVERE: 391 level = KERN_ERR; 392 sevstr = "Severe"; 393 break; 394 case MCE_SEV_FATAL: 395 default: 396 level = KERN_ERR; 397 sevstr = "Fatal"; 398 break; 399 } 400 401 switch (evt->error_type) { 402 case MCE_ERROR_TYPE_UE: 403 err_type = "UE"; 404 subtype = evt->u.ue_error.ue_error_type < 405 ARRAY_SIZE(mc_ue_types) ? 406 mc_ue_types[evt->u.ue_error.ue_error_type] 407 : "Unknown"; 408 if (evt->u.ue_error.effective_address_provided) 409 ea = evt->u.ue_error.effective_address; 410 if (evt->u.ue_error.physical_address_provided) 411 pa = evt->u.ue_error.physical_address; 412 break; 413 case MCE_ERROR_TYPE_SLB: 414 err_type = "SLB"; 415 subtype = evt->u.slb_error.slb_error_type < 416 ARRAY_SIZE(mc_slb_types) ? 417 mc_slb_types[evt->u.slb_error.slb_error_type] 418 : "Unknown"; 419 if (evt->u.slb_error.effective_address_provided) 420 ea = evt->u.slb_error.effective_address; 421 break; 422 case MCE_ERROR_TYPE_ERAT: 423 err_type = "ERAT"; 424 subtype = evt->u.erat_error.erat_error_type < 425 ARRAY_SIZE(mc_erat_types) ? 426 mc_erat_types[evt->u.erat_error.erat_error_type] 427 : "Unknown"; 428 if (evt->u.erat_error.effective_address_provided) 429 ea = evt->u.erat_error.effective_address; 430 break; 431 case MCE_ERROR_TYPE_TLB: 432 err_type = "TLB"; 433 subtype = evt->u.tlb_error.tlb_error_type < 434 ARRAY_SIZE(mc_tlb_types) ? 435 mc_tlb_types[evt->u.tlb_error.tlb_error_type] 436 : "Unknown"; 437 if (evt->u.tlb_error.effective_address_provided) 438 ea = evt->u.tlb_error.effective_address; 439 break; 440 case MCE_ERROR_TYPE_USER: 441 err_type = "User"; 442 subtype = evt->u.user_error.user_error_type < 443 ARRAY_SIZE(mc_user_types) ? 444 mc_user_types[evt->u.user_error.user_error_type] 445 : "Unknown"; 446 if (evt->u.user_error.effective_address_provided) 447 ea = evt->u.user_error.effective_address; 448 break; 449 case MCE_ERROR_TYPE_RA: 450 err_type = "Real address"; 451 subtype = evt->u.ra_error.ra_error_type < 452 ARRAY_SIZE(mc_ra_types) ? 453 mc_ra_types[evt->u.ra_error.ra_error_type] 454 : "Unknown"; 455 if (evt->u.ra_error.effective_address_provided) 456 ea = evt->u.ra_error.effective_address; 457 break; 458 case MCE_ERROR_TYPE_LINK: 459 err_type = "Link"; 460 subtype = evt->u.link_error.link_error_type < 461 ARRAY_SIZE(mc_link_types) ? 462 mc_link_types[evt->u.link_error.link_error_type] 463 : "Unknown"; 464 if (evt->u.link_error.effective_address_provided) 465 ea = evt->u.link_error.effective_address; 466 break; 467 default: 468 case MCE_ERROR_TYPE_UNKNOWN: 469 err_type = "Unknown"; 470 subtype = ""; 471 break; 472 } 473 474 dar_str[0] = pa_str[0] = '\0'; 475 if (ea && evt->srr0 != ea) { 476 /* Load/Store address */ 477 n = sprintf(dar_str, "DAR: %016llx ", ea); 478 if (pa) 479 sprintf(dar_str + n, "paddr: %016llx ", pa); 480 } else if (pa) { 481 sprintf(pa_str, " paddr: %016llx", pa); 482 } 483 484 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", 485 level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", 486 err_type, subtype, dar_str, 487 evt->disposition == MCE_DISPOSITION_RECOVERED ? 488 "Recovered" : "Not recovered"); 489 490 if (in_guest || user_mode) { 491 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n", 492 level, evt->cpu, current->pid, current->comm, 493 in_guest ? "Guest " : "", evt->srr0, pa_str); 494 } else { 495 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n", 496 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str); 497 } 498 499 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ? 500 mc_error_class[evt->error_class] : "Unknown"; 501 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); 502 } 503 EXPORT_SYMBOL_GPL(machine_check_print_event_info); 504 505 /* 506 * This function is called in real mode. Strictly no printk's please. 507 * 508 * regs->nip and regs->msr contains srr0 and ssr1. 509 */ 510 long machine_check_early(struct pt_regs *regs) 511 { 512 long handled = 0; 513 514 hv_nmi_check_nonrecoverable(regs); 515 516 /* 517 * See if platform is capable of handling machine check. 518 */ 519 if (ppc_md.machine_check_early) 520 handled = ppc_md.machine_check_early(regs); 521 return handled; 522 } 523 524 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ 525 static enum { 526 DTRIG_UNKNOWN, 527 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ 528 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ 529 } hmer_debug_trig_function; 530 531 static int init_debug_trig_function(void) 532 { 533 int pvr; 534 struct device_node *cpun; 535 struct property *prop = NULL; 536 const char *str; 537 538 /* First look in the device tree */ 539 preempt_disable(); 540 cpun = of_get_cpu_node(smp_processor_id(), NULL); 541 if (cpun) { 542 of_property_for_each_string(cpun, "ibm,hmi-special-triggers", 543 prop, str) { 544 if (strcmp(str, "bit17-vector-ci-load") == 0) 545 hmer_debug_trig_function = DTRIG_VECTOR_CI; 546 else if (strcmp(str, "bit17-tm-suspend-escape") == 0) 547 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 548 } 549 of_node_put(cpun); 550 } 551 preempt_enable(); 552 553 /* If we found the property, don't look at PVR */ 554 if (prop) 555 goto out; 556 557 pvr = mfspr(SPRN_PVR); 558 /* Check for POWER9 Nimbus (scale-out) */ 559 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { 560 /* DD2.2 and later */ 561 if ((pvr & 0xfff) >= 0x202) 562 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 563 /* DD2.0 and DD2.1 - used for vector CI load emulation */ 564 else if ((pvr & 0xfff) >= 0x200) 565 hmer_debug_trig_function = DTRIG_VECTOR_CI; 566 } 567 568 out: 569 switch (hmer_debug_trig_function) { 570 case DTRIG_VECTOR_CI: 571 pr_debug("HMI debug trigger used for vector CI load\n"); 572 break; 573 case DTRIG_SUSPEND_ESCAPE: 574 pr_debug("HMI debug trigger used for TM suspend escape\n"); 575 break; 576 default: 577 break; 578 } 579 return 0; 580 } 581 __initcall(init_debug_trig_function); 582 583 /* 584 * Handle HMIs that occur as a result of a debug trigger. 585 * Return values: 586 * -1 means this is not a HMI cause that we know about 587 * 0 means no further handling is required 588 * 1 means further handling is required 589 */ 590 long hmi_handle_debugtrig(struct pt_regs *regs) 591 { 592 unsigned long hmer = mfspr(SPRN_HMER); 593 long ret = 0; 594 595 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ 596 if (!((hmer & HMER_DEBUG_TRIG) 597 && hmer_debug_trig_function != DTRIG_UNKNOWN)) 598 return -1; 599 600 hmer &= ~HMER_DEBUG_TRIG; 601 /* HMER is a write-AND register */ 602 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); 603 604 switch (hmer_debug_trig_function) { 605 case DTRIG_VECTOR_CI: 606 /* 607 * Now to avoid problems with soft-disable we 608 * only do the emulation if we are coming from 609 * host user space 610 */ 611 if (regs && user_mode(regs)) 612 ret = local_paca->hmi_p9_special_emu = 1; 613 614 break; 615 616 default: 617 break; 618 } 619 620 /* 621 * See if any other HMI causes remain to be handled 622 */ 623 if (hmer & mfspr(SPRN_HMEER)) 624 return -1; 625 626 return ret; 627 } 628 629 /* 630 * Return values: 631 */ 632 long hmi_exception_realmode(struct pt_regs *regs) 633 { 634 int ret; 635 636 __this_cpu_inc(irq_stat.hmi_exceptions); 637 638 ret = hmi_handle_debugtrig(regs); 639 if (ret >= 0) 640 return ret; 641 642 wait_for_subcore_guest_exit(); 643 644 if (ppc_md.hmi_exception_early) 645 ppc_md.hmi_exception_early(regs); 646 647 wait_for_tb_resync(); 648 649 return 1; 650 } 651