1 /* 2 * Machine check exception handling. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright 2013 IBM Corporation 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 20 */ 21 22 #undef DEBUG 23 #define pr_fmt(fmt) "mce: " fmt 24 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/ptrace.h> 28 #include <linux/percpu.h> 29 #include <linux/export.h> 30 #include <linux/irq_work.h> 31 32 #include <asm/machdep.h> 33 #include <asm/mce.h> 34 35 static DEFINE_PER_CPU(int, mce_nest_count); 36 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); 37 38 /* Queue for delayed MCE events. */ 39 static DEFINE_PER_CPU(int, mce_queue_count); 40 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); 41 42 /* Queue for delayed MCE UE events. */ 43 static DEFINE_PER_CPU(int, mce_ue_count); 44 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], 45 mce_ue_event_queue); 46 47 static void machine_check_process_queued_event(struct irq_work *work); 48 void machine_check_ue_event(struct machine_check_event *evt); 49 static void machine_process_ue_event(struct work_struct *work); 50 51 static struct irq_work mce_event_process_work = { 52 .func = machine_check_process_queued_event, 53 }; 54 55 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); 56 57 static void mce_set_error_info(struct machine_check_event *mce, 58 struct mce_error_info *mce_err) 59 { 60 mce->error_type = mce_err->error_type; 61 switch (mce_err->error_type) { 62 case MCE_ERROR_TYPE_UE: 63 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; 64 break; 65 case MCE_ERROR_TYPE_SLB: 66 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; 67 break; 68 case MCE_ERROR_TYPE_ERAT: 69 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; 70 break; 71 case MCE_ERROR_TYPE_TLB: 72 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; 73 break; 74 case MCE_ERROR_TYPE_USER: 75 mce->u.user_error.user_error_type = mce_err->u.user_error_type; 76 break; 77 case MCE_ERROR_TYPE_RA: 78 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; 79 break; 80 case MCE_ERROR_TYPE_LINK: 81 mce->u.link_error.link_error_type = mce_err->u.link_error_type; 82 break; 83 case MCE_ERROR_TYPE_UNKNOWN: 84 default: 85 break; 86 } 87 } 88 89 /* 90 * Decode and save high level MCE information into per cpu buffer which 91 * is an array of machine_check_event structure. 92 */ 93 void save_mce_event(struct pt_regs *regs, long handled, 94 struct mce_error_info *mce_err, 95 uint64_t nip, uint64_t addr, uint64_t phys_addr) 96 { 97 int index = __this_cpu_inc_return(mce_nest_count) - 1; 98 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); 99 100 /* 101 * Return if we don't have enough space to log mce event. 102 * mce_nest_count may go beyond MAX_MC_EVT but that's ok, 103 * the check below will stop buffer overrun. 104 */ 105 if (index >= MAX_MC_EVT) 106 return; 107 108 /* Populate generic machine check info */ 109 mce->version = MCE_V1; 110 mce->srr0 = nip; 111 mce->srr1 = regs->msr; 112 mce->gpr3 = regs->gpr[3]; 113 mce->in_use = 1; 114 115 /* Mark it recovered if we have handled it and MSR(RI=1). */ 116 if (handled && (regs->msr & MSR_RI)) 117 mce->disposition = MCE_DISPOSITION_RECOVERED; 118 else 119 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; 120 121 mce->initiator = mce_err->initiator; 122 mce->severity = mce_err->severity; 123 124 /* 125 * Populate the mce error_type and type-specific error_type. 126 */ 127 mce_set_error_info(mce, mce_err); 128 129 if (!addr) 130 return; 131 132 if (mce->error_type == MCE_ERROR_TYPE_TLB) { 133 mce->u.tlb_error.effective_address_provided = true; 134 mce->u.tlb_error.effective_address = addr; 135 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { 136 mce->u.slb_error.effective_address_provided = true; 137 mce->u.slb_error.effective_address = addr; 138 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { 139 mce->u.erat_error.effective_address_provided = true; 140 mce->u.erat_error.effective_address = addr; 141 } else if (mce->error_type == MCE_ERROR_TYPE_USER) { 142 mce->u.user_error.effective_address_provided = true; 143 mce->u.user_error.effective_address = addr; 144 } else if (mce->error_type == MCE_ERROR_TYPE_RA) { 145 mce->u.ra_error.effective_address_provided = true; 146 mce->u.ra_error.effective_address = addr; 147 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { 148 mce->u.link_error.effective_address_provided = true; 149 mce->u.link_error.effective_address = addr; 150 } else if (mce->error_type == MCE_ERROR_TYPE_UE) { 151 mce->u.ue_error.effective_address_provided = true; 152 mce->u.ue_error.effective_address = addr; 153 if (phys_addr != ULONG_MAX) { 154 mce->u.ue_error.physical_address_provided = true; 155 mce->u.ue_error.physical_address = phys_addr; 156 machine_check_ue_event(mce); 157 } 158 } 159 return; 160 } 161 162 /* 163 * get_mce_event: 164 * mce Pointer to machine_check_event structure to be filled. 165 * release Flag to indicate whether to free the event slot or not. 166 * 0 <= do not release the mce event. Caller will invoke 167 * release_mce_event() once event has been consumed. 168 * 1 <= release the slot. 169 * 170 * return 1 = success 171 * 0 = failure 172 * 173 * get_mce_event() will be called by platform specific machine check 174 * handle routine and in KVM. 175 * When we call get_mce_event(), we are still in interrupt context and 176 * preemption will not be scheduled until ret_from_expect() routine 177 * is called. 178 */ 179 int get_mce_event(struct machine_check_event *mce, bool release) 180 { 181 int index = __this_cpu_read(mce_nest_count) - 1; 182 struct machine_check_event *mc_evt; 183 int ret = 0; 184 185 /* Sanity check */ 186 if (index < 0) 187 return ret; 188 189 /* Check if we have MCE info to process. */ 190 if (index < MAX_MC_EVT) { 191 mc_evt = this_cpu_ptr(&mce_event[index]); 192 /* Copy the event structure and release the original */ 193 if (mce) 194 *mce = *mc_evt; 195 if (release) 196 mc_evt->in_use = 0; 197 ret = 1; 198 } 199 /* Decrement the count to free the slot. */ 200 if (release) 201 __this_cpu_dec(mce_nest_count); 202 203 return ret; 204 } 205 206 void release_mce_event(void) 207 { 208 get_mce_event(NULL, true); 209 } 210 211 212 /* 213 * Queue up the MCE event which then can be handled later. 214 */ 215 void machine_check_ue_event(struct machine_check_event *evt) 216 { 217 int index; 218 219 index = __this_cpu_inc_return(mce_ue_count) - 1; 220 /* If queue is full, just return for now. */ 221 if (index >= MAX_MC_EVT) { 222 __this_cpu_dec(mce_ue_count); 223 return; 224 } 225 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); 226 227 /* Queue work to process this event later. */ 228 schedule_work(&mce_ue_event_work); 229 } 230 231 /* 232 * Queue up the MCE event which then can be handled later. 233 */ 234 void machine_check_queue_event(void) 235 { 236 int index; 237 struct machine_check_event evt; 238 239 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 240 return; 241 242 index = __this_cpu_inc_return(mce_queue_count) - 1; 243 /* If queue is full, just return for now. */ 244 if (index >= MAX_MC_EVT) { 245 __this_cpu_dec(mce_queue_count); 246 return; 247 } 248 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); 249 250 /* Queue irq work to process this event later. */ 251 irq_work_queue(&mce_event_process_work); 252 } 253 /* 254 * process pending MCE event from the mce event queue. This function will be 255 * called during syscall exit. 256 */ 257 static void machine_process_ue_event(struct work_struct *work) 258 { 259 int index; 260 struct machine_check_event *evt; 261 262 while (__this_cpu_read(mce_ue_count) > 0) { 263 index = __this_cpu_read(mce_ue_count) - 1; 264 evt = this_cpu_ptr(&mce_ue_event_queue[index]); 265 #ifdef CONFIG_MEMORY_FAILURE 266 /* 267 * This should probably queued elsewhere, but 268 * oh! well 269 */ 270 if (evt->error_type == MCE_ERROR_TYPE_UE) { 271 if (evt->u.ue_error.physical_address_provided) { 272 unsigned long pfn; 273 274 pfn = evt->u.ue_error.physical_address >> 275 PAGE_SHIFT; 276 memory_failure(pfn, 0); 277 } else 278 pr_warn("Failed to identify bad address from " 279 "where the uncorrectable error (UE) " 280 "was generated\n"); 281 } 282 #endif 283 __this_cpu_dec(mce_ue_count); 284 } 285 } 286 /* 287 * process pending MCE event from the mce event queue. This function will be 288 * called during syscall exit. 289 */ 290 static void machine_check_process_queued_event(struct irq_work *work) 291 { 292 int index; 293 struct machine_check_event *evt; 294 295 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 296 297 /* 298 * For now just print it to console. 299 * TODO: log this error event to FSP or nvram. 300 */ 301 while (__this_cpu_read(mce_queue_count) > 0) { 302 index = __this_cpu_read(mce_queue_count) - 1; 303 evt = this_cpu_ptr(&mce_event_queue[index]); 304 machine_check_print_event_info(evt, false); 305 __this_cpu_dec(mce_queue_count); 306 } 307 } 308 309 void machine_check_print_event_info(struct machine_check_event *evt, 310 bool user_mode) 311 { 312 const char *level, *sevstr, *subtype; 313 static const char *mc_ue_types[] = { 314 "Indeterminate", 315 "Instruction fetch", 316 "Page table walk ifetch", 317 "Load/Store", 318 "Page table walk Load/Store", 319 }; 320 static const char *mc_slb_types[] = { 321 "Indeterminate", 322 "Parity", 323 "Multihit", 324 }; 325 static const char *mc_erat_types[] = { 326 "Indeterminate", 327 "Parity", 328 "Multihit", 329 }; 330 static const char *mc_tlb_types[] = { 331 "Indeterminate", 332 "Parity", 333 "Multihit", 334 }; 335 static const char *mc_user_types[] = { 336 "Indeterminate", 337 "tlbie(l) invalid", 338 }; 339 static const char *mc_ra_types[] = { 340 "Indeterminate", 341 "Instruction fetch (bad)", 342 "Instruction fetch (foreign)", 343 "Page table walk ifetch (bad)", 344 "Page table walk ifetch (foreign)", 345 "Load (bad)", 346 "Store (bad)", 347 "Page table walk Load/Store (bad)", 348 "Page table walk Load/Store (foreign)", 349 "Load/Store (foreign)", 350 }; 351 static const char *mc_link_types[] = { 352 "Indeterminate", 353 "Instruction fetch (timeout)", 354 "Page table walk ifetch (timeout)", 355 "Load (timeout)", 356 "Store (timeout)", 357 "Page table walk Load/Store (timeout)", 358 }; 359 360 /* Print things out */ 361 if (evt->version != MCE_V1) { 362 pr_err("Machine Check Exception, Unknown event version %d !\n", 363 evt->version); 364 return; 365 } 366 switch (evt->severity) { 367 case MCE_SEV_NO_ERROR: 368 level = KERN_INFO; 369 sevstr = "Harmless"; 370 break; 371 case MCE_SEV_WARNING: 372 level = KERN_WARNING; 373 sevstr = ""; 374 break; 375 case MCE_SEV_ERROR_SYNC: 376 level = KERN_ERR; 377 sevstr = "Severe"; 378 break; 379 case MCE_SEV_FATAL: 380 default: 381 level = KERN_ERR; 382 sevstr = "Fatal"; 383 break; 384 } 385 386 printk("%s%s Machine check interrupt [%s]\n", level, sevstr, 387 evt->disposition == MCE_DISPOSITION_RECOVERED ? 388 "Recovered" : "Not recovered"); 389 390 if (user_mode) { 391 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, 392 evt->srr0, current->pid, current->comm); 393 } else { 394 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, 395 (void *)evt->srr0); 396 } 397 398 printk("%s Initiator: %s\n", level, 399 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); 400 switch (evt->error_type) { 401 case MCE_ERROR_TYPE_UE: 402 subtype = evt->u.ue_error.ue_error_type < 403 ARRAY_SIZE(mc_ue_types) ? 404 mc_ue_types[evt->u.ue_error.ue_error_type] 405 : "Unknown"; 406 printk("%s Error type: UE [%s]\n", level, subtype); 407 if (evt->u.ue_error.effective_address_provided) 408 printk("%s Effective address: %016llx\n", 409 level, evt->u.ue_error.effective_address); 410 if (evt->u.ue_error.physical_address_provided) 411 printk("%s Physical address: %016llx\n", 412 level, evt->u.ue_error.physical_address); 413 break; 414 case MCE_ERROR_TYPE_SLB: 415 subtype = evt->u.slb_error.slb_error_type < 416 ARRAY_SIZE(mc_slb_types) ? 417 mc_slb_types[evt->u.slb_error.slb_error_type] 418 : "Unknown"; 419 printk("%s Error type: SLB [%s]\n", level, subtype); 420 if (evt->u.slb_error.effective_address_provided) 421 printk("%s Effective address: %016llx\n", 422 level, evt->u.slb_error.effective_address); 423 break; 424 case MCE_ERROR_TYPE_ERAT: 425 subtype = evt->u.erat_error.erat_error_type < 426 ARRAY_SIZE(mc_erat_types) ? 427 mc_erat_types[evt->u.erat_error.erat_error_type] 428 : "Unknown"; 429 printk("%s Error type: ERAT [%s]\n", level, subtype); 430 if (evt->u.erat_error.effective_address_provided) 431 printk("%s Effective address: %016llx\n", 432 level, evt->u.erat_error.effective_address); 433 break; 434 case MCE_ERROR_TYPE_TLB: 435 subtype = evt->u.tlb_error.tlb_error_type < 436 ARRAY_SIZE(mc_tlb_types) ? 437 mc_tlb_types[evt->u.tlb_error.tlb_error_type] 438 : "Unknown"; 439 printk("%s Error type: TLB [%s]\n", level, subtype); 440 if (evt->u.tlb_error.effective_address_provided) 441 printk("%s Effective address: %016llx\n", 442 level, evt->u.tlb_error.effective_address); 443 break; 444 case MCE_ERROR_TYPE_USER: 445 subtype = evt->u.user_error.user_error_type < 446 ARRAY_SIZE(mc_user_types) ? 447 mc_user_types[evt->u.user_error.user_error_type] 448 : "Unknown"; 449 printk("%s Error type: User [%s]\n", level, subtype); 450 if (evt->u.user_error.effective_address_provided) 451 printk("%s Effective address: %016llx\n", 452 level, evt->u.user_error.effective_address); 453 break; 454 case MCE_ERROR_TYPE_RA: 455 subtype = evt->u.ra_error.ra_error_type < 456 ARRAY_SIZE(mc_ra_types) ? 457 mc_ra_types[evt->u.ra_error.ra_error_type] 458 : "Unknown"; 459 printk("%s Error type: Real address [%s]\n", level, subtype); 460 if (evt->u.ra_error.effective_address_provided) 461 printk("%s Effective address: %016llx\n", 462 level, evt->u.ra_error.effective_address); 463 break; 464 case MCE_ERROR_TYPE_LINK: 465 subtype = evt->u.link_error.link_error_type < 466 ARRAY_SIZE(mc_link_types) ? 467 mc_link_types[evt->u.link_error.link_error_type] 468 : "Unknown"; 469 printk("%s Error type: Link [%s]\n", level, subtype); 470 if (evt->u.link_error.effective_address_provided) 471 printk("%s Effective address: %016llx\n", 472 level, evt->u.link_error.effective_address); 473 break; 474 default: 475 case MCE_ERROR_TYPE_UNKNOWN: 476 printk("%s Error type: Unknown\n", level); 477 break; 478 } 479 } 480 EXPORT_SYMBOL_GPL(machine_check_print_event_info); 481 482 /* 483 * This function is called in real mode. Strictly no printk's please. 484 * 485 * regs->nip and regs->msr contains srr0 and ssr1. 486 */ 487 long machine_check_early(struct pt_regs *regs) 488 { 489 long handled = 0; 490 491 __this_cpu_inc(irq_stat.mce_exceptions); 492 493 if (cur_cpu_spec && cur_cpu_spec->machine_check_early) 494 handled = cur_cpu_spec->machine_check_early(regs); 495 return handled; 496 } 497 498 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ 499 static enum { 500 DTRIG_UNKNOWN, 501 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ 502 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ 503 } hmer_debug_trig_function; 504 505 static int init_debug_trig_function(void) 506 { 507 int pvr; 508 struct device_node *cpun; 509 struct property *prop = NULL; 510 const char *str; 511 512 /* First look in the device tree */ 513 preempt_disable(); 514 cpun = of_get_cpu_node(smp_processor_id(), NULL); 515 if (cpun) { 516 of_property_for_each_string(cpun, "ibm,hmi-special-triggers", 517 prop, str) { 518 if (strcmp(str, "bit17-vector-ci-load") == 0) 519 hmer_debug_trig_function = DTRIG_VECTOR_CI; 520 else if (strcmp(str, "bit17-tm-suspend-escape") == 0) 521 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 522 } 523 of_node_put(cpun); 524 } 525 preempt_enable(); 526 527 /* If we found the property, don't look at PVR */ 528 if (prop) 529 goto out; 530 531 pvr = mfspr(SPRN_PVR); 532 /* Check for POWER9 Nimbus (scale-out) */ 533 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { 534 /* DD2.2 and later */ 535 if ((pvr & 0xfff) >= 0x202) 536 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 537 /* DD2.0 and DD2.1 - used for vector CI load emulation */ 538 else if ((pvr & 0xfff) >= 0x200) 539 hmer_debug_trig_function = DTRIG_VECTOR_CI; 540 } 541 542 out: 543 switch (hmer_debug_trig_function) { 544 case DTRIG_VECTOR_CI: 545 pr_debug("HMI debug trigger used for vector CI load\n"); 546 break; 547 case DTRIG_SUSPEND_ESCAPE: 548 pr_debug("HMI debug trigger used for TM suspend escape\n"); 549 break; 550 default: 551 break; 552 } 553 return 0; 554 } 555 __initcall(init_debug_trig_function); 556 557 /* 558 * Handle HMIs that occur as a result of a debug trigger. 559 * Return values: 560 * -1 means this is not a HMI cause that we know about 561 * 0 means no further handling is required 562 * 1 means further handling is required 563 */ 564 long hmi_handle_debugtrig(struct pt_regs *regs) 565 { 566 unsigned long hmer = mfspr(SPRN_HMER); 567 long ret = 0; 568 569 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ 570 if (!((hmer & HMER_DEBUG_TRIG) 571 && hmer_debug_trig_function != DTRIG_UNKNOWN)) 572 return -1; 573 574 hmer &= ~HMER_DEBUG_TRIG; 575 /* HMER is a write-AND register */ 576 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); 577 578 switch (hmer_debug_trig_function) { 579 case DTRIG_VECTOR_CI: 580 /* 581 * Now to avoid problems with soft-disable we 582 * only do the emulation if we are coming from 583 * host user space 584 */ 585 if (regs && user_mode(regs)) 586 ret = local_paca->hmi_p9_special_emu = 1; 587 588 break; 589 590 default: 591 break; 592 } 593 594 /* 595 * See if any other HMI causes remain to be handled 596 */ 597 if (hmer & mfspr(SPRN_HMEER)) 598 return -1; 599 600 return ret; 601 } 602 603 /* 604 * Return values: 605 */ 606 long hmi_exception_realmode(struct pt_regs *regs) 607 { 608 int ret; 609 610 __this_cpu_inc(irq_stat.hmi_exceptions); 611 612 ret = hmi_handle_debugtrig(regs); 613 if (ret >= 0) 614 return ret; 615 616 wait_for_subcore_guest_exit(); 617 618 if (ppc_md.hmi_exception_early) 619 ppc_md.hmi_exception_early(regs); 620 621 wait_for_tb_resync(); 622 623 return 1; 624 } 625