xref: /openbmc/linux/arch/powerpc/kernel/mce.c (revision 96ac6d43)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Machine check exception handling.
4  *
5  * Copyright 2013 IBM Corporation
6  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7  */
8 
9 #undef DEBUG
10 #define pr_fmt(fmt) "mce: " fmt
11 
12 #include <linux/hardirq.h>
13 #include <linux/types.h>
14 #include <linux/ptrace.h>
15 #include <linux/percpu.h>
16 #include <linux/export.h>
17 #include <linux/irq_work.h>
18 
19 #include <asm/machdep.h>
20 #include <asm/mce.h>
21 #include <asm/nmi.h>
22 
23 static DEFINE_PER_CPU(int, mce_nest_count);
24 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
25 
26 /* Queue for delayed MCE events. */
27 static DEFINE_PER_CPU(int, mce_queue_count);
28 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
29 
30 /* Queue for delayed MCE UE events. */
31 static DEFINE_PER_CPU(int, mce_ue_count);
32 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
33 					mce_ue_event_queue);
34 
35 static void machine_check_process_queued_event(struct irq_work *work);
36 void machine_check_ue_event(struct machine_check_event *evt);
37 static void machine_process_ue_event(struct work_struct *work);
38 
39 static struct irq_work mce_event_process_work = {
40         .func = machine_check_process_queued_event,
41 };
42 
43 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
44 
45 static void mce_set_error_info(struct machine_check_event *mce,
46 			       struct mce_error_info *mce_err)
47 {
48 	mce->error_type = mce_err->error_type;
49 	switch (mce_err->error_type) {
50 	case MCE_ERROR_TYPE_UE:
51 		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
52 		break;
53 	case MCE_ERROR_TYPE_SLB:
54 		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
55 		break;
56 	case MCE_ERROR_TYPE_ERAT:
57 		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
58 		break;
59 	case MCE_ERROR_TYPE_TLB:
60 		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
61 		break;
62 	case MCE_ERROR_TYPE_USER:
63 		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
64 		break;
65 	case MCE_ERROR_TYPE_RA:
66 		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
67 		break;
68 	case MCE_ERROR_TYPE_LINK:
69 		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
70 		break;
71 	case MCE_ERROR_TYPE_UNKNOWN:
72 	default:
73 		break;
74 	}
75 }
76 
77 /*
78  * Decode and save high level MCE information into per cpu buffer which
79  * is an array of machine_check_event structure.
80  */
81 void save_mce_event(struct pt_regs *regs, long handled,
82 		    struct mce_error_info *mce_err,
83 		    uint64_t nip, uint64_t addr, uint64_t phys_addr)
84 {
85 	int index = __this_cpu_inc_return(mce_nest_count) - 1;
86 	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
87 
88 	/*
89 	 * Return if we don't have enough space to log mce event.
90 	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
91 	 * the check below will stop buffer overrun.
92 	 */
93 	if (index >= MAX_MC_EVT)
94 		return;
95 
96 	/* Populate generic machine check info */
97 	mce->version = MCE_V1;
98 	mce->srr0 = nip;
99 	mce->srr1 = regs->msr;
100 	mce->gpr3 = regs->gpr[3];
101 	mce->in_use = 1;
102 	mce->cpu = get_paca()->paca_index;
103 
104 	/* Mark it recovered if we have handled it and MSR(RI=1). */
105 	if (handled && (regs->msr & MSR_RI))
106 		mce->disposition = MCE_DISPOSITION_RECOVERED;
107 	else
108 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
109 
110 	mce->initiator = mce_err->initiator;
111 	mce->severity = mce_err->severity;
112 	mce->sync_error = mce_err->sync_error;
113 	mce->error_class = mce_err->error_class;
114 
115 	/*
116 	 * Populate the mce error_type and type-specific error_type.
117 	 */
118 	mce_set_error_info(mce, mce_err);
119 
120 	if (!addr)
121 		return;
122 
123 	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
124 		mce->u.tlb_error.effective_address_provided = true;
125 		mce->u.tlb_error.effective_address = addr;
126 	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
127 		mce->u.slb_error.effective_address_provided = true;
128 		mce->u.slb_error.effective_address = addr;
129 	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
130 		mce->u.erat_error.effective_address_provided = true;
131 		mce->u.erat_error.effective_address = addr;
132 	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
133 		mce->u.user_error.effective_address_provided = true;
134 		mce->u.user_error.effective_address = addr;
135 	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
136 		mce->u.ra_error.effective_address_provided = true;
137 		mce->u.ra_error.effective_address = addr;
138 	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
139 		mce->u.link_error.effective_address_provided = true;
140 		mce->u.link_error.effective_address = addr;
141 	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
142 		mce->u.ue_error.effective_address_provided = true;
143 		mce->u.ue_error.effective_address = addr;
144 		if (phys_addr != ULONG_MAX) {
145 			mce->u.ue_error.physical_address_provided = true;
146 			mce->u.ue_error.physical_address = phys_addr;
147 			machine_check_ue_event(mce);
148 		}
149 	}
150 	return;
151 }
152 
153 /*
154  * get_mce_event:
155  *	mce	Pointer to machine_check_event structure to be filled.
156  *	release Flag to indicate whether to free the event slot or not.
157  *		0 <= do not release the mce event. Caller will invoke
158  *		     release_mce_event() once event has been consumed.
159  *		1 <= release the slot.
160  *
161  *	return	1 = success
162  *		0 = failure
163  *
164  * get_mce_event() will be called by platform specific machine check
165  * handle routine and in KVM.
166  * When we call get_mce_event(), we are still in interrupt context and
167  * preemption will not be scheduled until ret_from_expect() routine
168  * is called.
169  */
170 int get_mce_event(struct machine_check_event *mce, bool release)
171 {
172 	int index = __this_cpu_read(mce_nest_count) - 1;
173 	struct machine_check_event *mc_evt;
174 	int ret = 0;
175 
176 	/* Sanity check */
177 	if (index < 0)
178 		return ret;
179 
180 	/* Check if we have MCE info to process. */
181 	if (index < MAX_MC_EVT) {
182 		mc_evt = this_cpu_ptr(&mce_event[index]);
183 		/* Copy the event structure and release the original */
184 		if (mce)
185 			*mce = *mc_evt;
186 		if (release)
187 			mc_evt->in_use = 0;
188 		ret = 1;
189 	}
190 	/* Decrement the count to free the slot. */
191 	if (release)
192 		__this_cpu_dec(mce_nest_count);
193 
194 	return ret;
195 }
196 
197 void release_mce_event(void)
198 {
199 	get_mce_event(NULL, true);
200 }
201 
202 
203 /*
204  * Queue up the MCE event which then can be handled later.
205  */
206 void machine_check_ue_event(struct machine_check_event *evt)
207 {
208 	int index;
209 
210 	index = __this_cpu_inc_return(mce_ue_count) - 1;
211 	/* If queue is full, just return for now. */
212 	if (index >= MAX_MC_EVT) {
213 		__this_cpu_dec(mce_ue_count);
214 		return;
215 	}
216 	memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
217 
218 	/* Queue work to process this event later. */
219 	schedule_work(&mce_ue_event_work);
220 }
221 
222 /*
223  * Queue up the MCE event which then can be handled later.
224  */
225 void machine_check_queue_event(void)
226 {
227 	int index;
228 	struct machine_check_event evt;
229 
230 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
231 		return;
232 
233 	index = __this_cpu_inc_return(mce_queue_count) - 1;
234 	/* If queue is full, just return for now. */
235 	if (index >= MAX_MC_EVT) {
236 		__this_cpu_dec(mce_queue_count);
237 		return;
238 	}
239 	memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
240 
241 	/* Queue irq work to process this event later. */
242 	irq_work_queue(&mce_event_process_work);
243 }
244 /*
245  * process pending MCE event from the mce event queue. This function will be
246  * called during syscall exit.
247  */
248 static void machine_process_ue_event(struct work_struct *work)
249 {
250 	int index;
251 	struct machine_check_event *evt;
252 
253 	while (__this_cpu_read(mce_ue_count) > 0) {
254 		index = __this_cpu_read(mce_ue_count) - 1;
255 		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
256 #ifdef CONFIG_MEMORY_FAILURE
257 		/*
258 		 * This should probably queued elsewhere, but
259 		 * oh! well
260 		 */
261 		if (evt->error_type == MCE_ERROR_TYPE_UE) {
262 			if (evt->u.ue_error.physical_address_provided) {
263 				unsigned long pfn;
264 
265 				pfn = evt->u.ue_error.physical_address >>
266 					PAGE_SHIFT;
267 				memory_failure(pfn, 0);
268 			} else
269 				pr_warn("Failed to identify bad address from "
270 					"where the uncorrectable error (UE) "
271 					"was generated\n");
272 		}
273 #endif
274 		__this_cpu_dec(mce_ue_count);
275 	}
276 }
277 /*
278  * process pending MCE event from the mce event queue. This function will be
279  * called during syscall exit.
280  */
281 static void machine_check_process_queued_event(struct irq_work *work)
282 {
283 	int index;
284 	struct machine_check_event *evt;
285 
286 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
287 
288 	/*
289 	 * For now just print it to console.
290 	 * TODO: log this error event to FSP or nvram.
291 	 */
292 	while (__this_cpu_read(mce_queue_count) > 0) {
293 		index = __this_cpu_read(mce_queue_count) - 1;
294 		evt = this_cpu_ptr(&mce_event_queue[index]);
295 		machine_check_print_event_info(evt, false, false);
296 		__this_cpu_dec(mce_queue_count);
297 	}
298 }
299 
300 void machine_check_print_event_info(struct machine_check_event *evt,
301 				    bool user_mode, bool in_guest)
302 {
303 	const char *level, *sevstr, *subtype, *err_type;
304 	uint64_t ea = 0, pa = 0;
305 	int n = 0;
306 	char dar_str[50];
307 	char pa_str[50];
308 	static const char *mc_ue_types[] = {
309 		"Indeterminate",
310 		"Instruction fetch",
311 		"Page table walk ifetch",
312 		"Load/Store",
313 		"Page table walk Load/Store",
314 	};
315 	static const char *mc_slb_types[] = {
316 		"Indeterminate",
317 		"Parity",
318 		"Multihit",
319 	};
320 	static const char *mc_erat_types[] = {
321 		"Indeterminate",
322 		"Parity",
323 		"Multihit",
324 	};
325 	static const char *mc_tlb_types[] = {
326 		"Indeterminate",
327 		"Parity",
328 		"Multihit",
329 	};
330 	static const char *mc_user_types[] = {
331 		"Indeterminate",
332 		"tlbie(l) invalid",
333 	};
334 	static const char *mc_ra_types[] = {
335 		"Indeterminate",
336 		"Instruction fetch (bad)",
337 		"Instruction fetch (foreign)",
338 		"Page table walk ifetch (bad)",
339 		"Page table walk ifetch (foreign)",
340 		"Load (bad)",
341 		"Store (bad)",
342 		"Page table walk Load/Store (bad)",
343 		"Page table walk Load/Store (foreign)",
344 		"Load/Store (foreign)",
345 	};
346 	static const char *mc_link_types[] = {
347 		"Indeterminate",
348 		"Instruction fetch (timeout)",
349 		"Page table walk ifetch (timeout)",
350 		"Load (timeout)",
351 		"Store (timeout)",
352 		"Page table walk Load/Store (timeout)",
353 	};
354 	static const char *mc_error_class[] = {
355 		"Unknown",
356 		"Hardware error",
357 		"Probable Hardware error (some chance of software cause)",
358 		"Software error",
359 		"Probable Software error (some chance of hardware cause)",
360 	};
361 
362 	/* Print things out */
363 	if (evt->version != MCE_V1) {
364 		pr_err("Machine Check Exception, Unknown event version %d !\n",
365 		       evt->version);
366 		return;
367 	}
368 	switch (evt->severity) {
369 	case MCE_SEV_NO_ERROR:
370 		level = KERN_INFO;
371 		sevstr = "Harmless";
372 		break;
373 	case MCE_SEV_WARNING:
374 		level = KERN_WARNING;
375 		sevstr = "Warning";
376 		break;
377 	case MCE_SEV_SEVERE:
378 		level = KERN_ERR;
379 		sevstr = "Severe";
380 		break;
381 	case MCE_SEV_FATAL:
382 	default:
383 		level = KERN_ERR;
384 		sevstr = "Fatal";
385 		break;
386 	}
387 
388 	switch (evt->error_type) {
389 	case MCE_ERROR_TYPE_UE:
390 		err_type = "UE";
391 		subtype = evt->u.ue_error.ue_error_type <
392 			ARRAY_SIZE(mc_ue_types) ?
393 			mc_ue_types[evt->u.ue_error.ue_error_type]
394 			: "Unknown";
395 		if (evt->u.ue_error.effective_address_provided)
396 			ea = evt->u.ue_error.effective_address;
397 		if (evt->u.ue_error.physical_address_provided)
398 			pa = evt->u.ue_error.physical_address;
399 		break;
400 	case MCE_ERROR_TYPE_SLB:
401 		err_type = "SLB";
402 		subtype = evt->u.slb_error.slb_error_type <
403 			ARRAY_SIZE(mc_slb_types) ?
404 			mc_slb_types[evt->u.slb_error.slb_error_type]
405 			: "Unknown";
406 		if (evt->u.slb_error.effective_address_provided)
407 			ea = evt->u.slb_error.effective_address;
408 		break;
409 	case MCE_ERROR_TYPE_ERAT:
410 		err_type = "ERAT";
411 		subtype = evt->u.erat_error.erat_error_type <
412 			ARRAY_SIZE(mc_erat_types) ?
413 			mc_erat_types[evt->u.erat_error.erat_error_type]
414 			: "Unknown";
415 		if (evt->u.erat_error.effective_address_provided)
416 			ea = evt->u.erat_error.effective_address;
417 		break;
418 	case MCE_ERROR_TYPE_TLB:
419 		err_type = "TLB";
420 		subtype = evt->u.tlb_error.tlb_error_type <
421 			ARRAY_SIZE(mc_tlb_types) ?
422 			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
423 			: "Unknown";
424 		if (evt->u.tlb_error.effective_address_provided)
425 			ea = evt->u.tlb_error.effective_address;
426 		break;
427 	case MCE_ERROR_TYPE_USER:
428 		err_type = "User";
429 		subtype = evt->u.user_error.user_error_type <
430 			ARRAY_SIZE(mc_user_types) ?
431 			mc_user_types[evt->u.user_error.user_error_type]
432 			: "Unknown";
433 		if (evt->u.user_error.effective_address_provided)
434 			ea = evt->u.user_error.effective_address;
435 		break;
436 	case MCE_ERROR_TYPE_RA:
437 		err_type = "Real address";
438 		subtype = evt->u.ra_error.ra_error_type <
439 			ARRAY_SIZE(mc_ra_types) ?
440 			mc_ra_types[evt->u.ra_error.ra_error_type]
441 			: "Unknown";
442 		if (evt->u.ra_error.effective_address_provided)
443 			ea = evt->u.ra_error.effective_address;
444 		break;
445 	case MCE_ERROR_TYPE_LINK:
446 		err_type = "Link";
447 		subtype = evt->u.link_error.link_error_type <
448 			ARRAY_SIZE(mc_link_types) ?
449 			mc_link_types[evt->u.link_error.link_error_type]
450 			: "Unknown";
451 		if (evt->u.link_error.effective_address_provided)
452 			ea = evt->u.link_error.effective_address;
453 		break;
454 	default:
455 	case MCE_ERROR_TYPE_UNKNOWN:
456 		err_type = "Unknown";
457 		subtype = "";
458 		break;
459 	}
460 
461 	dar_str[0] = pa_str[0] = '\0';
462 	if (ea && evt->srr0 != ea) {
463 		/* Load/Store address */
464 		n = sprintf(dar_str, "DAR: %016llx ", ea);
465 		if (pa)
466 			sprintf(dar_str + n, "paddr: %016llx ", pa);
467 	} else if (pa) {
468 		sprintf(pa_str, " paddr: %016llx", pa);
469 	}
470 
471 	printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
472 		level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
473 		err_type, subtype, dar_str,
474 		evt->disposition == MCE_DISPOSITION_RECOVERED ?
475 		"Recovered" : "Not recovered");
476 
477 	if (in_guest || user_mode) {
478 		printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
479 			level, evt->cpu, current->pid, current->comm,
480 			in_guest ? "Guest " : "", evt->srr0, pa_str);
481 	} else {
482 		printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
483 			level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
484 	}
485 
486 	subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
487 		mc_error_class[evt->error_class] : "Unknown";
488 	printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
489 }
490 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
491 
492 /*
493  * This function is called in real mode. Strictly no printk's please.
494  *
495  * regs->nip and regs->msr contains srr0 and ssr1.
496  */
497 long machine_check_early(struct pt_regs *regs)
498 {
499 	long handled = 0;
500 
501 	hv_nmi_check_nonrecoverable(regs);
502 
503 	/*
504 	 * See if platform is capable of handling machine check.
505 	 */
506 	if (ppc_md.machine_check_early)
507 		handled = ppc_md.machine_check_early(regs);
508 	return handled;
509 }
510 
511 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
512 static enum {
513 	DTRIG_UNKNOWN,
514 	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
515 	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
516 } hmer_debug_trig_function;
517 
518 static int init_debug_trig_function(void)
519 {
520 	int pvr;
521 	struct device_node *cpun;
522 	struct property *prop = NULL;
523 	const char *str;
524 
525 	/* First look in the device tree */
526 	preempt_disable();
527 	cpun = of_get_cpu_node(smp_processor_id(), NULL);
528 	if (cpun) {
529 		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
530 					    prop, str) {
531 			if (strcmp(str, "bit17-vector-ci-load") == 0)
532 				hmer_debug_trig_function = DTRIG_VECTOR_CI;
533 			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
534 				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
535 		}
536 		of_node_put(cpun);
537 	}
538 	preempt_enable();
539 
540 	/* If we found the property, don't look at PVR */
541 	if (prop)
542 		goto out;
543 
544 	pvr = mfspr(SPRN_PVR);
545 	/* Check for POWER9 Nimbus (scale-out) */
546 	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
547 		/* DD2.2 and later */
548 		if ((pvr & 0xfff) >= 0x202)
549 			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
550 		/* DD2.0 and DD2.1 - used for vector CI load emulation */
551 		else if ((pvr & 0xfff) >= 0x200)
552 			hmer_debug_trig_function = DTRIG_VECTOR_CI;
553 	}
554 
555  out:
556 	switch (hmer_debug_trig_function) {
557 	case DTRIG_VECTOR_CI:
558 		pr_debug("HMI debug trigger used for vector CI load\n");
559 		break;
560 	case DTRIG_SUSPEND_ESCAPE:
561 		pr_debug("HMI debug trigger used for TM suspend escape\n");
562 		break;
563 	default:
564 		break;
565 	}
566 	return 0;
567 }
568 __initcall(init_debug_trig_function);
569 
570 /*
571  * Handle HMIs that occur as a result of a debug trigger.
572  * Return values:
573  * -1 means this is not a HMI cause that we know about
574  *  0 means no further handling is required
575  *  1 means further handling is required
576  */
577 long hmi_handle_debugtrig(struct pt_regs *regs)
578 {
579 	unsigned long hmer = mfspr(SPRN_HMER);
580 	long ret = 0;
581 
582 	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
583 	if (!((hmer & HMER_DEBUG_TRIG)
584 	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
585 		return -1;
586 
587 	hmer &= ~HMER_DEBUG_TRIG;
588 	/* HMER is a write-AND register */
589 	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
590 
591 	switch (hmer_debug_trig_function) {
592 	case DTRIG_VECTOR_CI:
593 		/*
594 		 * Now to avoid problems with soft-disable we
595 		 * only do the emulation if we are coming from
596 		 * host user space
597 		 */
598 		if (regs && user_mode(regs))
599 			ret = local_paca->hmi_p9_special_emu = 1;
600 
601 		break;
602 
603 	default:
604 		break;
605 	}
606 
607 	/*
608 	 * See if any other HMI causes remain to be handled
609 	 */
610 	if (hmer & mfspr(SPRN_HMEER))
611 		return -1;
612 
613 	return ret;
614 }
615 
616 /*
617  * Return values:
618  */
619 long hmi_exception_realmode(struct pt_regs *regs)
620 {
621 	int ret;
622 
623 	__this_cpu_inc(irq_stat.hmi_exceptions);
624 
625 	ret = hmi_handle_debugtrig(regs);
626 	if (ret >= 0)
627 		return ret;
628 
629 	wait_for_subcore_guest_exit();
630 
631 	if (ppc_md.hmi_exception_early)
632 		ppc_md.hmi_exception_early(regs);
633 
634 	wait_for_tb_resync();
635 
636 	return 1;
637 }
638