xref: /openbmc/linux/arch/powerpc/kernel/mce.c (revision 2fa49589)
1 /*
2  * Machine check exception handling.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright 2013 IBM Corporation
19  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20  */
21 
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce: " fmt
24 
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/ptrace.h>
28 #include <linux/percpu.h>
29 #include <linux/export.h>
30 #include <linux/irq_work.h>
31 
32 #include <asm/machdep.h>
33 #include <asm/mce.h>
34 
35 static DEFINE_PER_CPU(int, mce_nest_count);
36 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
37 
38 /* Queue for delayed MCE events. */
39 static DEFINE_PER_CPU(int, mce_queue_count);
40 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
41 
42 /* Queue for delayed MCE UE events. */
43 static DEFINE_PER_CPU(int, mce_ue_count);
44 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
45 					mce_ue_event_queue);
46 
47 static void machine_check_process_queued_event(struct irq_work *work);
48 void machine_check_ue_event(struct machine_check_event *evt);
49 static void machine_process_ue_event(struct work_struct *work);
50 
51 static struct irq_work mce_event_process_work = {
52         .func = machine_check_process_queued_event,
53 };
54 
55 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
56 
57 static void mce_set_error_info(struct machine_check_event *mce,
58 			       struct mce_error_info *mce_err)
59 {
60 	mce->error_type = mce_err->error_type;
61 	switch (mce_err->error_type) {
62 	case MCE_ERROR_TYPE_UE:
63 		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
64 		break;
65 	case MCE_ERROR_TYPE_SLB:
66 		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
67 		break;
68 	case MCE_ERROR_TYPE_ERAT:
69 		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
70 		break;
71 	case MCE_ERROR_TYPE_TLB:
72 		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
73 		break;
74 	case MCE_ERROR_TYPE_USER:
75 		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
76 		break;
77 	case MCE_ERROR_TYPE_RA:
78 		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
79 		break;
80 	case MCE_ERROR_TYPE_LINK:
81 		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
82 		break;
83 	case MCE_ERROR_TYPE_UNKNOWN:
84 	default:
85 		break;
86 	}
87 }
88 
89 /*
90  * Decode and save high level MCE information into per cpu buffer which
91  * is an array of machine_check_event structure.
92  */
93 void save_mce_event(struct pt_regs *regs, long handled,
94 		    struct mce_error_info *mce_err,
95 		    uint64_t nip, uint64_t addr, uint64_t phys_addr)
96 {
97 	int index = __this_cpu_inc_return(mce_nest_count) - 1;
98 	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
99 
100 	/*
101 	 * Return if we don't have enough space to log mce event.
102 	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103 	 * the check below will stop buffer overrun.
104 	 */
105 	if (index >= MAX_MC_EVT)
106 		return;
107 
108 	/* Populate generic machine check info */
109 	mce->version = MCE_V1;
110 	mce->srr0 = nip;
111 	mce->srr1 = regs->msr;
112 	mce->gpr3 = regs->gpr[3];
113 	mce->in_use = 1;
114 
115 	/* Mark it recovered if we have handled it and MSR(RI=1). */
116 	if (handled && (regs->msr & MSR_RI))
117 		mce->disposition = MCE_DISPOSITION_RECOVERED;
118 	else
119 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
120 
121 	mce->initiator = mce_err->initiator;
122 	mce->severity = mce_err->severity;
123 
124 	/*
125 	 * Populate the mce error_type and type-specific error_type.
126 	 */
127 	mce_set_error_info(mce, mce_err);
128 
129 	if (!addr)
130 		return;
131 
132 	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
133 		mce->u.tlb_error.effective_address_provided = true;
134 		mce->u.tlb_error.effective_address = addr;
135 	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
136 		mce->u.slb_error.effective_address_provided = true;
137 		mce->u.slb_error.effective_address = addr;
138 	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
139 		mce->u.erat_error.effective_address_provided = true;
140 		mce->u.erat_error.effective_address = addr;
141 	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
142 		mce->u.user_error.effective_address_provided = true;
143 		mce->u.user_error.effective_address = addr;
144 	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
145 		mce->u.ra_error.effective_address_provided = true;
146 		mce->u.ra_error.effective_address = addr;
147 	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
148 		mce->u.link_error.effective_address_provided = true;
149 		mce->u.link_error.effective_address = addr;
150 	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
151 		mce->u.ue_error.effective_address_provided = true;
152 		mce->u.ue_error.effective_address = addr;
153 		if (phys_addr != ULONG_MAX) {
154 			mce->u.ue_error.physical_address_provided = true;
155 			mce->u.ue_error.physical_address = phys_addr;
156 			machine_check_ue_event(mce);
157 		}
158 	}
159 	return;
160 }
161 
162 /*
163  * get_mce_event:
164  *	mce	Pointer to machine_check_event structure to be filled.
165  *	release Flag to indicate whether to free the event slot or not.
166  *		0 <= do not release the mce event. Caller will invoke
167  *		     release_mce_event() once event has been consumed.
168  *		1 <= release the slot.
169  *
170  *	return	1 = success
171  *		0 = failure
172  *
173  * get_mce_event() will be called by platform specific machine check
174  * handle routine and in KVM.
175  * When we call get_mce_event(), we are still in interrupt context and
176  * preemption will not be scheduled until ret_from_expect() routine
177  * is called.
178  */
179 int get_mce_event(struct machine_check_event *mce, bool release)
180 {
181 	int index = __this_cpu_read(mce_nest_count) - 1;
182 	struct machine_check_event *mc_evt;
183 	int ret = 0;
184 
185 	/* Sanity check */
186 	if (index < 0)
187 		return ret;
188 
189 	/* Check if we have MCE info to process. */
190 	if (index < MAX_MC_EVT) {
191 		mc_evt = this_cpu_ptr(&mce_event[index]);
192 		/* Copy the event structure and release the original */
193 		if (mce)
194 			*mce = *mc_evt;
195 		if (release)
196 			mc_evt->in_use = 0;
197 		ret = 1;
198 	}
199 	/* Decrement the count to free the slot. */
200 	if (release)
201 		__this_cpu_dec(mce_nest_count);
202 
203 	return ret;
204 }
205 
206 void release_mce_event(void)
207 {
208 	get_mce_event(NULL, true);
209 }
210 
211 
212 /*
213  * Queue up the MCE event which then can be handled later.
214  */
215 void machine_check_ue_event(struct machine_check_event *evt)
216 {
217 	int index;
218 
219 	index = __this_cpu_inc_return(mce_ue_count) - 1;
220 	/* If queue is full, just return for now. */
221 	if (index >= MAX_MC_EVT) {
222 		__this_cpu_dec(mce_ue_count);
223 		return;
224 	}
225 	memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
226 
227 	/* Queue work to process this event later. */
228 	schedule_work(&mce_ue_event_work);
229 }
230 
231 /*
232  * Queue up the MCE event which then can be handled later.
233  */
234 void machine_check_queue_event(void)
235 {
236 	int index;
237 	struct machine_check_event evt;
238 
239 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
240 		return;
241 
242 	index = __this_cpu_inc_return(mce_queue_count) - 1;
243 	/* If queue is full, just return for now. */
244 	if (index >= MAX_MC_EVT) {
245 		__this_cpu_dec(mce_queue_count);
246 		return;
247 	}
248 	memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
249 
250 	/* Queue irq work to process this event later. */
251 	irq_work_queue(&mce_event_process_work);
252 }
253 /*
254  * process pending MCE event from the mce event queue. This function will be
255  * called during syscall exit.
256  */
257 static void machine_process_ue_event(struct work_struct *work)
258 {
259 	int index;
260 	struct machine_check_event *evt;
261 
262 	while (__this_cpu_read(mce_ue_count) > 0) {
263 		index = __this_cpu_read(mce_ue_count) - 1;
264 		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
265 #ifdef CONFIG_MEMORY_FAILURE
266 		/*
267 		 * This should probably queued elsewhere, but
268 		 * oh! well
269 		 */
270 		if (evt->error_type == MCE_ERROR_TYPE_UE) {
271 			if (evt->u.ue_error.physical_address_provided) {
272 				unsigned long pfn;
273 
274 				pfn = evt->u.ue_error.physical_address >>
275 					PAGE_SHIFT;
276 				memory_failure(pfn, 0);
277 			} else
278 				pr_warn("Failed to identify bad address from "
279 					"where the uncorrectable error (UE) "
280 					"was generated\n");
281 		}
282 #endif
283 		__this_cpu_dec(mce_ue_count);
284 	}
285 }
286 /*
287  * process pending MCE event from the mce event queue. This function will be
288  * called during syscall exit.
289  */
290 static void machine_check_process_queued_event(struct irq_work *work)
291 {
292 	int index;
293 	struct machine_check_event *evt;
294 
295 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
296 
297 	/*
298 	 * For now just print it to console.
299 	 * TODO: log this error event to FSP or nvram.
300 	 */
301 	while (__this_cpu_read(mce_queue_count) > 0) {
302 		index = __this_cpu_read(mce_queue_count) - 1;
303 		evt = this_cpu_ptr(&mce_event_queue[index]);
304 		machine_check_print_event_info(evt, false);
305 		__this_cpu_dec(mce_queue_count);
306 	}
307 }
308 
309 void machine_check_print_event_info(struct machine_check_event *evt,
310 				    bool user_mode)
311 {
312 	const char *level, *sevstr, *subtype;
313 	static const char *mc_ue_types[] = {
314 		"Indeterminate",
315 		"Instruction fetch",
316 		"Page table walk ifetch",
317 		"Load/Store",
318 		"Page table walk Load/Store",
319 	};
320 	static const char *mc_slb_types[] = {
321 		"Indeterminate",
322 		"Parity",
323 		"Multihit",
324 	};
325 	static const char *mc_erat_types[] = {
326 		"Indeterminate",
327 		"Parity",
328 		"Multihit",
329 	};
330 	static const char *mc_tlb_types[] = {
331 		"Indeterminate",
332 		"Parity",
333 		"Multihit",
334 	};
335 	static const char *mc_user_types[] = {
336 		"Indeterminate",
337 		"tlbie(l) invalid",
338 	};
339 	static const char *mc_ra_types[] = {
340 		"Indeterminate",
341 		"Instruction fetch (bad)",
342 		"Instruction fetch (foreign)",
343 		"Page table walk ifetch (bad)",
344 		"Page table walk ifetch (foreign)",
345 		"Load (bad)",
346 		"Store (bad)",
347 		"Page table walk Load/Store (bad)",
348 		"Page table walk Load/Store (foreign)",
349 		"Load/Store (foreign)",
350 	};
351 	static const char *mc_link_types[] = {
352 		"Indeterminate",
353 		"Instruction fetch (timeout)",
354 		"Page table walk ifetch (timeout)",
355 		"Load (timeout)",
356 		"Store (timeout)",
357 		"Page table walk Load/Store (timeout)",
358 	};
359 
360 	/* Print things out */
361 	if (evt->version != MCE_V1) {
362 		pr_err("Machine Check Exception, Unknown event version %d !\n",
363 		       evt->version);
364 		return;
365 	}
366 	switch (evt->severity) {
367 	case MCE_SEV_NO_ERROR:
368 		level = KERN_INFO;
369 		sevstr = "Harmless";
370 		break;
371 	case MCE_SEV_WARNING:
372 		level = KERN_WARNING;
373 		sevstr = "";
374 		break;
375 	case MCE_SEV_ERROR_SYNC:
376 		level = KERN_ERR;
377 		sevstr = "Severe";
378 		break;
379 	case MCE_SEV_FATAL:
380 	default:
381 		level = KERN_ERR;
382 		sevstr = "Fatal";
383 		break;
384 	}
385 
386 	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
387 	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
388 	       "Recovered" : "Not recovered");
389 
390 	if (user_mode) {
391 		printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
392 			evt->srr0, current->pid, current->comm);
393 	} else {
394 		printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
395 		       (void *)evt->srr0);
396 	}
397 
398 	printk("%s  Initiator: %s\n", level,
399 	       evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
400 	switch (evt->error_type) {
401 	case MCE_ERROR_TYPE_UE:
402 		subtype = evt->u.ue_error.ue_error_type <
403 			ARRAY_SIZE(mc_ue_types) ?
404 			mc_ue_types[evt->u.ue_error.ue_error_type]
405 			: "Unknown";
406 		printk("%s  Error type: UE [%s]\n", level, subtype);
407 		if (evt->u.ue_error.effective_address_provided)
408 			printk("%s    Effective address: %016llx\n",
409 			       level, evt->u.ue_error.effective_address);
410 		if (evt->u.ue_error.physical_address_provided)
411 			printk("%s    Physical address:  %016llx\n",
412 			       level, evt->u.ue_error.physical_address);
413 		break;
414 	case MCE_ERROR_TYPE_SLB:
415 		subtype = evt->u.slb_error.slb_error_type <
416 			ARRAY_SIZE(mc_slb_types) ?
417 			mc_slb_types[evt->u.slb_error.slb_error_type]
418 			: "Unknown";
419 		printk("%s  Error type: SLB [%s]\n", level, subtype);
420 		if (evt->u.slb_error.effective_address_provided)
421 			printk("%s    Effective address: %016llx\n",
422 			       level, evt->u.slb_error.effective_address);
423 		break;
424 	case MCE_ERROR_TYPE_ERAT:
425 		subtype = evt->u.erat_error.erat_error_type <
426 			ARRAY_SIZE(mc_erat_types) ?
427 			mc_erat_types[evt->u.erat_error.erat_error_type]
428 			: "Unknown";
429 		printk("%s  Error type: ERAT [%s]\n", level, subtype);
430 		if (evt->u.erat_error.effective_address_provided)
431 			printk("%s    Effective address: %016llx\n",
432 			       level, evt->u.erat_error.effective_address);
433 		break;
434 	case MCE_ERROR_TYPE_TLB:
435 		subtype = evt->u.tlb_error.tlb_error_type <
436 			ARRAY_SIZE(mc_tlb_types) ?
437 			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
438 			: "Unknown";
439 		printk("%s  Error type: TLB [%s]\n", level, subtype);
440 		if (evt->u.tlb_error.effective_address_provided)
441 			printk("%s    Effective address: %016llx\n",
442 			       level, evt->u.tlb_error.effective_address);
443 		break;
444 	case MCE_ERROR_TYPE_USER:
445 		subtype = evt->u.user_error.user_error_type <
446 			ARRAY_SIZE(mc_user_types) ?
447 			mc_user_types[evt->u.user_error.user_error_type]
448 			: "Unknown";
449 		printk("%s  Error type: User [%s]\n", level, subtype);
450 		if (evt->u.user_error.effective_address_provided)
451 			printk("%s    Effective address: %016llx\n",
452 			       level, evt->u.user_error.effective_address);
453 		break;
454 	case MCE_ERROR_TYPE_RA:
455 		subtype = evt->u.ra_error.ra_error_type <
456 			ARRAY_SIZE(mc_ra_types) ?
457 			mc_ra_types[evt->u.ra_error.ra_error_type]
458 			: "Unknown";
459 		printk("%s  Error type: Real address [%s]\n", level, subtype);
460 		if (evt->u.ra_error.effective_address_provided)
461 			printk("%s    Effective address: %016llx\n",
462 			       level, evt->u.ra_error.effective_address);
463 		break;
464 	case MCE_ERROR_TYPE_LINK:
465 		subtype = evt->u.link_error.link_error_type <
466 			ARRAY_SIZE(mc_link_types) ?
467 			mc_link_types[evt->u.link_error.link_error_type]
468 			: "Unknown";
469 		printk("%s  Error type: Link [%s]\n", level, subtype);
470 		if (evt->u.link_error.effective_address_provided)
471 			printk("%s    Effective address: %016llx\n",
472 			       level, evt->u.link_error.effective_address);
473 		break;
474 	default:
475 	case MCE_ERROR_TYPE_UNKNOWN:
476 		printk("%s  Error type: Unknown\n", level);
477 		break;
478 	}
479 }
480 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
481 
482 /*
483  * This function is called in real mode. Strictly no printk's please.
484  *
485  * regs->nip and regs->msr contains srr0 and ssr1.
486  */
487 long machine_check_early(struct pt_regs *regs)
488 {
489 	long handled = 0;
490 
491 	/*
492 	 * See if platform is capable of handling machine check.
493 	 */
494 	if (ppc_md.machine_check_early)
495 		handled = ppc_md.machine_check_early(regs);
496 	return handled;
497 }
498 
499 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
500 static enum {
501 	DTRIG_UNKNOWN,
502 	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
503 	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
504 } hmer_debug_trig_function;
505 
506 static int init_debug_trig_function(void)
507 {
508 	int pvr;
509 	struct device_node *cpun;
510 	struct property *prop = NULL;
511 	const char *str;
512 
513 	/* First look in the device tree */
514 	preempt_disable();
515 	cpun = of_get_cpu_node(smp_processor_id(), NULL);
516 	if (cpun) {
517 		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
518 					    prop, str) {
519 			if (strcmp(str, "bit17-vector-ci-load") == 0)
520 				hmer_debug_trig_function = DTRIG_VECTOR_CI;
521 			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
522 				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
523 		}
524 		of_node_put(cpun);
525 	}
526 	preempt_enable();
527 
528 	/* If we found the property, don't look at PVR */
529 	if (prop)
530 		goto out;
531 
532 	pvr = mfspr(SPRN_PVR);
533 	/* Check for POWER9 Nimbus (scale-out) */
534 	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
535 		/* DD2.2 and later */
536 		if ((pvr & 0xfff) >= 0x202)
537 			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
538 		/* DD2.0 and DD2.1 - used for vector CI load emulation */
539 		else if ((pvr & 0xfff) >= 0x200)
540 			hmer_debug_trig_function = DTRIG_VECTOR_CI;
541 	}
542 
543  out:
544 	switch (hmer_debug_trig_function) {
545 	case DTRIG_VECTOR_CI:
546 		pr_debug("HMI debug trigger used for vector CI load\n");
547 		break;
548 	case DTRIG_SUSPEND_ESCAPE:
549 		pr_debug("HMI debug trigger used for TM suspend escape\n");
550 		break;
551 	default:
552 		break;
553 	}
554 	return 0;
555 }
556 __initcall(init_debug_trig_function);
557 
558 /*
559  * Handle HMIs that occur as a result of a debug trigger.
560  * Return values:
561  * -1 means this is not a HMI cause that we know about
562  *  0 means no further handling is required
563  *  1 means further handling is required
564  */
565 long hmi_handle_debugtrig(struct pt_regs *regs)
566 {
567 	unsigned long hmer = mfspr(SPRN_HMER);
568 	long ret = 0;
569 
570 	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
571 	if (!((hmer & HMER_DEBUG_TRIG)
572 	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
573 		return -1;
574 
575 	hmer &= ~HMER_DEBUG_TRIG;
576 	/* HMER is a write-AND register */
577 	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
578 
579 	switch (hmer_debug_trig_function) {
580 	case DTRIG_VECTOR_CI:
581 		/*
582 		 * Now to avoid problems with soft-disable we
583 		 * only do the emulation if we are coming from
584 		 * host user space
585 		 */
586 		if (regs && user_mode(regs))
587 			ret = local_paca->hmi_p9_special_emu = 1;
588 
589 		break;
590 
591 	default:
592 		break;
593 	}
594 
595 	/*
596 	 * See if any other HMI causes remain to be handled
597 	 */
598 	if (hmer & mfspr(SPRN_HMEER))
599 		return -1;
600 
601 	return ret;
602 }
603 
604 /*
605  * Return values:
606  */
607 long hmi_exception_realmode(struct pt_regs *regs)
608 {
609 	int ret;
610 
611 	__this_cpu_inc(irq_stat.hmi_exceptions);
612 
613 	ret = hmi_handle_debugtrig(regs);
614 	if (ret >= 0)
615 		return ret;
616 
617 	wait_for_subcore_guest_exit();
618 
619 	if (ppc_md.hmi_exception_early)
620 		ppc_md.hmi_exception_early(regs);
621 
622 	wait_for_tb_resync();
623 
624 	return 1;
625 }
626