1 /* 2 * Machine check exception handling. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright 2013 IBM Corporation 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 20 */ 21 22 #undef DEBUG 23 #define pr_fmt(fmt) "mce: " fmt 24 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/ptrace.h> 28 #include <linux/percpu.h> 29 #include <linux/export.h> 30 #include <linux/irq_work.h> 31 32 #include <asm/machdep.h> 33 #include <asm/mce.h> 34 #include <asm/nmi.h> 35 36 static DEFINE_PER_CPU(int, mce_nest_count); 37 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); 38 39 /* Queue for delayed MCE events. */ 40 static DEFINE_PER_CPU(int, mce_queue_count); 41 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); 42 43 /* Queue for delayed MCE UE events. */ 44 static DEFINE_PER_CPU(int, mce_ue_count); 45 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], 46 mce_ue_event_queue); 47 48 static void machine_check_process_queued_event(struct irq_work *work); 49 void machine_check_ue_event(struct machine_check_event *evt); 50 static void machine_process_ue_event(struct work_struct *work); 51 52 static struct irq_work mce_event_process_work = { 53 .func = machine_check_process_queued_event, 54 }; 55 56 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); 57 58 static void mce_set_error_info(struct machine_check_event *mce, 59 struct mce_error_info *mce_err) 60 { 61 mce->error_type = mce_err->error_type; 62 switch (mce_err->error_type) { 63 case MCE_ERROR_TYPE_UE: 64 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; 65 break; 66 case MCE_ERROR_TYPE_SLB: 67 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; 68 break; 69 case MCE_ERROR_TYPE_ERAT: 70 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; 71 break; 72 case MCE_ERROR_TYPE_TLB: 73 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; 74 break; 75 case MCE_ERROR_TYPE_USER: 76 mce->u.user_error.user_error_type = mce_err->u.user_error_type; 77 break; 78 case MCE_ERROR_TYPE_RA: 79 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; 80 break; 81 case MCE_ERROR_TYPE_LINK: 82 mce->u.link_error.link_error_type = mce_err->u.link_error_type; 83 break; 84 case MCE_ERROR_TYPE_UNKNOWN: 85 default: 86 break; 87 } 88 } 89 90 /* 91 * Decode and save high level MCE information into per cpu buffer which 92 * is an array of machine_check_event structure. 93 */ 94 void save_mce_event(struct pt_regs *regs, long handled, 95 struct mce_error_info *mce_err, 96 uint64_t nip, uint64_t addr, uint64_t phys_addr) 97 { 98 int index = __this_cpu_inc_return(mce_nest_count) - 1; 99 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); 100 101 /* 102 * Return if we don't have enough space to log mce event. 103 * mce_nest_count may go beyond MAX_MC_EVT but that's ok, 104 * the check below will stop buffer overrun. 105 */ 106 if (index >= MAX_MC_EVT) 107 return; 108 109 /* Populate generic machine check info */ 110 mce->version = MCE_V1; 111 mce->srr0 = nip; 112 mce->srr1 = regs->msr; 113 mce->gpr3 = regs->gpr[3]; 114 mce->in_use = 1; 115 116 /* Mark it recovered if we have handled it and MSR(RI=1). */ 117 if (handled && (regs->msr & MSR_RI)) 118 mce->disposition = MCE_DISPOSITION_RECOVERED; 119 else 120 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; 121 122 mce->initiator = mce_err->initiator; 123 mce->severity = mce_err->severity; 124 125 /* 126 * Populate the mce error_type and type-specific error_type. 127 */ 128 mce_set_error_info(mce, mce_err); 129 130 if (!addr) 131 return; 132 133 if (mce->error_type == MCE_ERROR_TYPE_TLB) { 134 mce->u.tlb_error.effective_address_provided = true; 135 mce->u.tlb_error.effective_address = addr; 136 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { 137 mce->u.slb_error.effective_address_provided = true; 138 mce->u.slb_error.effective_address = addr; 139 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { 140 mce->u.erat_error.effective_address_provided = true; 141 mce->u.erat_error.effective_address = addr; 142 } else if (mce->error_type == MCE_ERROR_TYPE_USER) { 143 mce->u.user_error.effective_address_provided = true; 144 mce->u.user_error.effective_address = addr; 145 } else if (mce->error_type == MCE_ERROR_TYPE_RA) { 146 mce->u.ra_error.effective_address_provided = true; 147 mce->u.ra_error.effective_address = addr; 148 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { 149 mce->u.link_error.effective_address_provided = true; 150 mce->u.link_error.effective_address = addr; 151 } else if (mce->error_type == MCE_ERROR_TYPE_UE) { 152 mce->u.ue_error.effective_address_provided = true; 153 mce->u.ue_error.effective_address = addr; 154 if (phys_addr != ULONG_MAX) { 155 mce->u.ue_error.physical_address_provided = true; 156 mce->u.ue_error.physical_address = phys_addr; 157 machine_check_ue_event(mce); 158 } 159 } 160 return; 161 } 162 163 /* 164 * get_mce_event: 165 * mce Pointer to machine_check_event structure to be filled. 166 * release Flag to indicate whether to free the event slot or not. 167 * 0 <= do not release the mce event. Caller will invoke 168 * release_mce_event() once event has been consumed. 169 * 1 <= release the slot. 170 * 171 * return 1 = success 172 * 0 = failure 173 * 174 * get_mce_event() will be called by platform specific machine check 175 * handle routine and in KVM. 176 * When we call get_mce_event(), we are still in interrupt context and 177 * preemption will not be scheduled until ret_from_expect() routine 178 * is called. 179 */ 180 int get_mce_event(struct machine_check_event *mce, bool release) 181 { 182 int index = __this_cpu_read(mce_nest_count) - 1; 183 struct machine_check_event *mc_evt; 184 int ret = 0; 185 186 /* Sanity check */ 187 if (index < 0) 188 return ret; 189 190 /* Check if we have MCE info to process. */ 191 if (index < MAX_MC_EVT) { 192 mc_evt = this_cpu_ptr(&mce_event[index]); 193 /* Copy the event structure and release the original */ 194 if (mce) 195 *mce = *mc_evt; 196 if (release) 197 mc_evt->in_use = 0; 198 ret = 1; 199 } 200 /* Decrement the count to free the slot. */ 201 if (release) 202 __this_cpu_dec(mce_nest_count); 203 204 return ret; 205 } 206 207 void release_mce_event(void) 208 { 209 get_mce_event(NULL, true); 210 } 211 212 213 /* 214 * Queue up the MCE event which then can be handled later. 215 */ 216 void machine_check_ue_event(struct machine_check_event *evt) 217 { 218 int index; 219 220 index = __this_cpu_inc_return(mce_ue_count) - 1; 221 /* If queue is full, just return for now. */ 222 if (index >= MAX_MC_EVT) { 223 __this_cpu_dec(mce_ue_count); 224 return; 225 } 226 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); 227 228 /* Queue work to process this event later. */ 229 schedule_work(&mce_ue_event_work); 230 } 231 232 /* 233 * Queue up the MCE event which then can be handled later. 234 */ 235 void machine_check_queue_event(void) 236 { 237 int index; 238 struct machine_check_event evt; 239 240 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 241 return; 242 243 index = __this_cpu_inc_return(mce_queue_count) - 1; 244 /* If queue is full, just return for now. */ 245 if (index >= MAX_MC_EVT) { 246 __this_cpu_dec(mce_queue_count); 247 return; 248 } 249 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); 250 251 /* Queue irq work to process this event later. */ 252 irq_work_queue(&mce_event_process_work); 253 } 254 /* 255 * process pending MCE event from the mce event queue. This function will be 256 * called during syscall exit. 257 */ 258 static void machine_process_ue_event(struct work_struct *work) 259 { 260 int index; 261 struct machine_check_event *evt; 262 263 while (__this_cpu_read(mce_ue_count) > 0) { 264 index = __this_cpu_read(mce_ue_count) - 1; 265 evt = this_cpu_ptr(&mce_ue_event_queue[index]); 266 #ifdef CONFIG_MEMORY_FAILURE 267 /* 268 * This should probably queued elsewhere, but 269 * oh! well 270 */ 271 if (evt->error_type == MCE_ERROR_TYPE_UE) { 272 if (evt->u.ue_error.physical_address_provided) { 273 unsigned long pfn; 274 275 pfn = evt->u.ue_error.physical_address >> 276 PAGE_SHIFT; 277 memory_failure(pfn, 0); 278 } else 279 pr_warn("Failed to identify bad address from " 280 "where the uncorrectable error (UE) " 281 "was generated\n"); 282 } 283 #endif 284 __this_cpu_dec(mce_ue_count); 285 } 286 } 287 /* 288 * process pending MCE event from the mce event queue. This function will be 289 * called during syscall exit. 290 */ 291 static void machine_check_process_queued_event(struct irq_work *work) 292 { 293 int index; 294 struct machine_check_event *evt; 295 296 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 297 298 /* 299 * For now just print it to console. 300 * TODO: log this error event to FSP or nvram. 301 */ 302 while (__this_cpu_read(mce_queue_count) > 0) { 303 index = __this_cpu_read(mce_queue_count) - 1; 304 evt = this_cpu_ptr(&mce_event_queue[index]); 305 machine_check_print_event_info(evt, false, false); 306 __this_cpu_dec(mce_queue_count); 307 } 308 } 309 310 void machine_check_print_event_info(struct machine_check_event *evt, 311 bool user_mode, bool in_guest) 312 { 313 const char *level, *sevstr, *subtype; 314 static const char *mc_ue_types[] = { 315 "Indeterminate", 316 "Instruction fetch", 317 "Page table walk ifetch", 318 "Load/Store", 319 "Page table walk Load/Store", 320 }; 321 static const char *mc_slb_types[] = { 322 "Indeterminate", 323 "Parity", 324 "Multihit", 325 }; 326 static const char *mc_erat_types[] = { 327 "Indeterminate", 328 "Parity", 329 "Multihit", 330 }; 331 static const char *mc_tlb_types[] = { 332 "Indeterminate", 333 "Parity", 334 "Multihit", 335 }; 336 static const char *mc_user_types[] = { 337 "Indeterminate", 338 "tlbie(l) invalid", 339 }; 340 static const char *mc_ra_types[] = { 341 "Indeterminate", 342 "Instruction fetch (bad)", 343 "Instruction fetch (foreign)", 344 "Page table walk ifetch (bad)", 345 "Page table walk ifetch (foreign)", 346 "Load (bad)", 347 "Store (bad)", 348 "Page table walk Load/Store (bad)", 349 "Page table walk Load/Store (foreign)", 350 "Load/Store (foreign)", 351 }; 352 static const char *mc_link_types[] = { 353 "Indeterminate", 354 "Instruction fetch (timeout)", 355 "Page table walk ifetch (timeout)", 356 "Load (timeout)", 357 "Store (timeout)", 358 "Page table walk Load/Store (timeout)", 359 }; 360 361 /* Print things out */ 362 if (evt->version != MCE_V1) { 363 pr_err("Machine Check Exception, Unknown event version %d !\n", 364 evt->version); 365 return; 366 } 367 switch (evt->severity) { 368 case MCE_SEV_NO_ERROR: 369 level = KERN_INFO; 370 sevstr = "Harmless"; 371 break; 372 case MCE_SEV_WARNING: 373 level = KERN_WARNING; 374 sevstr = ""; 375 break; 376 case MCE_SEV_ERROR_SYNC: 377 level = KERN_ERR; 378 sevstr = "Severe"; 379 break; 380 case MCE_SEV_FATAL: 381 default: 382 level = KERN_ERR; 383 sevstr = "Fatal"; 384 break; 385 } 386 387 printk("%s%s Machine check interrupt [%s]\n", level, sevstr, 388 evt->disposition == MCE_DISPOSITION_RECOVERED ? 389 "Recovered" : "Not recovered"); 390 391 if (in_guest) { 392 printk("%s Guest NIP: %016llx\n", level, evt->srr0); 393 } else if (user_mode) { 394 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, 395 evt->srr0, current->pid, current->comm); 396 } else { 397 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, 398 (void *)evt->srr0); 399 } 400 401 printk("%s Initiator: %s\n", level, 402 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); 403 switch (evt->error_type) { 404 case MCE_ERROR_TYPE_UE: 405 subtype = evt->u.ue_error.ue_error_type < 406 ARRAY_SIZE(mc_ue_types) ? 407 mc_ue_types[evt->u.ue_error.ue_error_type] 408 : "Unknown"; 409 printk("%s Error type: UE [%s]\n", level, subtype); 410 if (evt->u.ue_error.effective_address_provided) 411 printk("%s Effective address: %016llx\n", 412 level, evt->u.ue_error.effective_address); 413 if (evt->u.ue_error.physical_address_provided) 414 printk("%s Physical address: %016llx\n", 415 level, evt->u.ue_error.physical_address); 416 break; 417 case MCE_ERROR_TYPE_SLB: 418 subtype = evt->u.slb_error.slb_error_type < 419 ARRAY_SIZE(mc_slb_types) ? 420 mc_slb_types[evt->u.slb_error.slb_error_type] 421 : "Unknown"; 422 printk("%s Error type: SLB [%s]\n", level, subtype); 423 if (evt->u.slb_error.effective_address_provided) 424 printk("%s Effective address: %016llx\n", 425 level, evt->u.slb_error.effective_address); 426 break; 427 case MCE_ERROR_TYPE_ERAT: 428 subtype = evt->u.erat_error.erat_error_type < 429 ARRAY_SIZE(mc_erat_types) ? 430 mc_erat_types[evt->u.erat_error.erat_error_type] 431 : "Unknown"; 432 printk("%s Error type: ERAT [%s]\n", level, subtype); 433 if (evt->u.erat_error.effective_address_provided) 434 printk("%s Effective address: %016llx\n", 435 level, evt->u.erat_error.effective_address); 436 break; 437 case MCE_ERROR_TYPE_TLB: 438 subtype = evt->u.tlb_error.tlb_error_type < 439 ARRAY_SIZE(mc_tlb_types) ? 440 mc_tlb_types[evt->u.tlb_error.tlb_error_type] 441 : "Unknown"; 442 printk("%s Error type: TLB [%s]\n", level, subtype); 443 if (evt->u.tlb_error.effective_address_provided) 444 printk("%s Effective address: %016llx\n", 445 level, evt->u.tlb_error.effective_address); 446 break; 447 case MCE_ERROR_TYPE_USER: 448 subtype = evt->u.user_error.user_error_type < 449 ARRAY_SIZE(mc_user_types) ? 450 mc_user_types[evt->u.user_error.user_error_type] 451 : "Unknown"; 452 printk("%s Error type: User [%s]\n", level, subtype); 453 if (evt->u.user_error.effective_address_provided) 454 printk("%s Effective address: %016llx\n", 455 level, evt->u.user_error.effective_address); 456 break; 457 case MCE_ERROR_TYPE_RA: 458 subtype = evt->u.ra_error.ra_error_type < 459 ARRAY_SIZE(mc_ra_types) ? 460 mc_ra_types[evt->u.ra_error.ra_error_type] 461 : "Unknown"; 462 printk("%s Error type: Real address [%s]\n", level, subtype); 463 if (evt->u.ra_error.effective_address_provided) 464 printk("%s Effective address: %016llx\n", 465 level, evt->u.ra_error.effective_address); 466 break; 467 case MCE_ERROR_TYPE_LINK: 468 subtype = evt->u.link_error.link_error_type < 469 ARRAY_SIZE(mc_link_types) ? 470 mc_link_types[evt->u.link_error.link_error_type] 471 : "Unknown"; 472 printk("%s Error type: Link [%s]\n", level, subtype); 473 if (evt->u.link_error.effective_address_provided) 474 printk("%s Effective address: %016llx\n", 475 level, evt->u.link_error.effective_address); 476 break; 477 default: 478 case MCE_ERROR_TYPE_UNKNOWN: 479 printk("%s Error type: Unknown\n", level); 480 break; 481 } 482 } 483 EXPORT_SYMBOL_GPL(machine_check_print_event_info); 484 485 /* 486 * This function is called in real mode. Strictly no printk's please. 487 * 488 * regs->nip and regs->msr contains srr0 and ssr1. 489 */ 490 long machine_check_early(struct pt_regs *regs) 491 { 492 long handled = 0; 493 494 hv_nmi_check_nonrecoverable(regs); 495 496 /* 497 * See if platform is capable of handling machine check. 498 */ 499 if (ppc_md.machine_check_early) 500 handled = ppc_md.machine_check_early(regs); 501 return handled; 502 } 503 504 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ 505 static enum { 506 DTRIG_UNKNOWN, 507 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ 508 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ 509 } hmer_debug_trig_function; 510 511 static int init_debug_trig_function(void) 512 { 513 int pvr; 514 struct device_node *cpun; 515 struct property *prop = NULL; 516 const char *str; 517 518 /* First look in the device tree */ 519 preempt_disable(); 520 cpun = of_get_cpu_node(smp_processor_id(), NULL); 521 if (cpun) { 522 of_property_for_each_string(cpun, "ibm,hmi-special-triggers", 523 prop, str) { 524 if (strcmp(str, "bit17-vector-ci-load") == 0) 525 hmer_debug_trig_function = DTRIG_VECTOR_CI; 526 else if (strcmp(str, "bit17-tm-suspend-escape") == 0) 527 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 528 } 529 of_node_put(cpun); 530 } 531 preempt_enable(); 532 533 /* If we found the property, don't look at PVR */ 534 if (prop) 535 goto out; 536 537 pvr = mfspr(SPRN_PVR); 538 /* Check for POWER9 Nimbus (scale-out) */ 539 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { 540 /* DD2.2 and later */ 541 if ((pvr & 0xfff) >= 0x202) 542 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 543 /* DD2.0 and DD2.1 - used for vector CI load emulation */ 544 else if ((pvr & 0xfff) >= 0x200) 545 hmer_debug_trig_function = DTRIG_VECTOR_CI; 546 } 547 548 out: 549 switch (hmer_debug_trig_function) { 550 case DTRIG_VECTOR_CI: 551 pr_debug("HMI debug trigger used for vector CI load\n"); 552 break; 553 case DTRIG_SUSPEND_ESCAPE: 554 pr_debug("HMI debug trigger used for TM suspend escape\n"); 555 break; 556 default: 557 break; 558 } 559 return 0; 560 } 561 __initcall(init_debug_trig_function); 562 563 /* 564 * Handle HMIs that occur as a result of a debug trigger. 565 * Return values: 566 * -1 means this is not a HMI cause that we know about 567 * 0 means no further handling is required 568 * 1 means further handling is required 569 */ 570 long hmi_handle_debugtrig(struct pt_regs *regs) 571 { 572 unsigned long hmer = mfspr(SPRN_HMER); 573 long ret = 0; 574 575 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ 576 if (!((hmer & HMER_DEBUG_TRIG) 577 && hmer_debug_trig_function != DTRIG_UNKNOWN)) 578 return -1; 579 580 hmer &= ~HMER_DEBUG_TRIG; 581 /* HMER is a write-AND register */ 582 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); 583 584 switch (hmer_debug_trig_function) { 585 case DTRIG_VECTOR_CI: 586 /* 587 * Now to avoid problems with soft-disable we 588 * only do the emulation if we are coming from 589 * host user space 590 */ 591 if (regs && user_mode(regs)) 592 ret = local_paca->hmi_p9_special_emu = 1; 593 594 break; 595 596 default: 597 break; 598 } 599 600 /* 601 * See if any other HMI causes remain to be handled 602 */ 603 if (hmer & mfspr(SPRN_HMEER)) 604 return -1; 605 606 return ret; 607 } 608 609 /* 610 * Return values: 611 */ 612 long hmi_exception_realmode(struct pt_regs *regs) 613 { 614 int ret; 615 616 __this_cpu_inc(irq_stat.hmi_exceptions); 617 618 ret = hmi_handle_debugtrig(regs); 619 if (ret >= 0) 620 return ret; 621 622 wait_for_subcore_guest_exit(); 623 624 if (ppc_md.hmi_exception_early) 625 ppc_md.hmi_exception_early(regs); 626 627 wait_for_tb_resync(); 628 629 return 1; 630 } 631