xref: /openbmc/linux/arch/powerpc/kernel/mce.c (revision 160b8e75)
1 /*
2  * Machine check exception handling.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright 2013 IBM Corporation
19  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20  */
21 
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce: " fmt
24 
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/ptrace.h>
28 #include <linux/percpu.h>
29 #include <linux/export.h>
30 #include <linux/irq_work.h>
31 
32 #include <asm/machdep.h>
33 #include <asm/mce.h>
34 
35 static DEFINE_PER_CPU(int, mce_nest_count);
36 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
37 
38 /* Queue for delayed MCE events. */
39 static DEFINE_PER_CPU(int, mce_queue_count);
40 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
41 
42 /* Queue for delayed MCE UE events. */
43 static DEFINE_PER_CPU(int, mce_ue_count);
44 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
45 					mce_ue_event_queue);
46 
47 static void machine_check_process_queued_event(struct irq_work *work);
48 void machine_check_ue_event(struct machine_check_event *evt);
49 static void machine_process_ue_event(struct work_struct *work);
50 
51 static struct irq_work mce_event_process_work = {
52         .func = machine_check_process_queued_event,
53 };
54 
55 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
56 
57 static void mce_set_error_info(struct machine_check_event *mce,
58 			       struct mce_error_info *mce_err)
59 {
60 	mce->error_type = mce_err->error_type;
61 	switch (mce_err->error_type) {
62 	case MCE_ERROR_TYPE_UE:
63 		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
64 		break;
65 	case MCE_ERROR_TYPE_SLB:
66 		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
67 		break;
68 	case MCE_ERROR_TYPE_ERAT:
69 		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
70 		break;
71 	case MCE_ERROR_TYPE_TLB:
72 		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
73 		break;
74 	case MCE_ERROR_TYPE_USER:
75 		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
76 		break;
77 	case MCE_ERROR_TYPE_RA:
78 		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
79 		break;
80 	case MCE_ERROR_TYPE_LINK:
81 		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
82 		break;
83 	case MCE_ERROR_TYPE_UNKNOWN:
84 	default:
85 		break;
86 	}
87 }
88 
89 /*
90  * Decode and save high level MCE information into per cpu buffer which
91  * is an array of machine_check_event structure.
92  */
93 void save_mce_event(struct pt_regs *regs, long handled,
94 		    struct mce_error_info *mce_err,
95 		    uint64_t nip, uint64_t addr, uint64_t phys_addr)
96 {
97 	int index = __this_cpu_inc_return(mce_nest_count) - 1;
98 	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
99 
100 	/*
101 	 * Return if we don't have enough space to log mce event.
102 	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103 	 * the check below will stop buffer overrun.
104 	 */
105 	if (index >= MAX_MC_EVT)
106 		return;
107 
108 	/* Populate generic machine check info */
109 	mce->version = MCE_V1;
110 	mce->srr0 = nip;
111 	mce->srr1 = regs->msr;
112 	mce->gpr3 = regs->gpr[3];
113 	mce->in_use = 1;
114 
115 	/* Mark it recovered if we have handled it and MSR(RI=1). */
116 	if (handled && (regs->msr & MSR_RI))
117 		mce->disposition = MCE_DISPOSITION_RECOVERED;
118 	else
119 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
120 
121 	mce->initiator = mce_err->initiator;
122 	mce->severity = mce_err->severity;
123 
124 	/*
125 	 * Populate the mce error_type and type-specific error_type.
126 	 */
127 	mce_set_error_info(mce, mce_err);
128 
129 	if (!addr)
130 		return;
131 
132 	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
133 		mce->u.tlb_error.effective_address_provided = true;
134 		mce->u.tlb_error.effective_address = addr;
135 	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
136 		mce->u.slb_error.effective_address_provided = true;
137 		mce->u.slb_error.effective_address = addr;
138 	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
139 		mce->u.erat_error.effective_address_provided = true;
140 		mce->u.erat_error.effective_address = addr;
141 	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
142 		mce->u.user_error.effective_address_provided = true;
143 		mce->u.user_error.effective_address = addr;
144 	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
145 		mce->u.ra_error.effective_address_provided = true;
146 		mce->u.ra_error.effective_address = addr;
147 	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
148 		mce->u.link_error.effective_address_provided = true;
149 		mce->u.link_error.effective_address = addr;
150 	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
151 		mce->u.ue_error.effective_address_provided = true;
152 		mce->u.ue_error.effective_address = addr;
153 		if (phys_addr != ULONG_MAX) {
154 			mce->u.ue_error.physical_address_provided = true;
155 			mce->u.ue_error.physical_address = phys_addr;
156 			machine_check_ue_event(mce);
157 		}
158 	}
159 	return;
160 }
161 
162 /*
163  * get_mce_event:
164  *	mce	Pointer to machine_check_event structure to be filled.
165  *	release Flag to indicate whether to free the event slot or not.
166  *		0 <= do not release the mce event. Caller will invoke
167  *		     release_mce_event() once event has been consumed.
168  *		1 <= release the slot.
169  *
170  *	return	1 = success
171  *		0 = failure
172  *
173  * get_mce_event() will be called by platform specific machine check
174  * handle routine and in KVM.
175  * When we call get_mce_event(), we are still in interrupt context and
176  * preemption will not be scheduled until ret_from_expect() routine
177  * is called.
178  */
179 int get_mce_event(struct machine_check_event *mce, bool release)
180 {
181 	int index = __this_cpu_read(mce_nest_count) - 1;
182 	struct machine_check_event *mc_evt;
183 	int ret = 0;
184 
185 	/* Sanity check */
186 	if (index < 0)
187 		return ret;
188 
189 	/* Check if we have MCE info to process. */
190 	if (index < MAX_MC_EVT) {
191 		mc_evt = this_cpu_ptr(&mce_event[index]);
192 		/* Copy the event structure and release the original */
193 		if (mce)
194 			*mce = *mc_evt;
195 		if (release)
196 			mc_evt->in_use = 0;
197 		ret = 1;
198 	}
199 	/* Decrement the count to free the slot. */
200 	if (release)
201 		__this_cpu_dec(mce_nest_count);
202 
203 	return ret;
204 }
205 
206 void release_mce_event(void)
207 {
208 	get_mce_event(NULL, true);
209 }
210 
211 
212 /*
213  * Queue up the MCE event which then can be handled later.
214  */
215 void machine_check_ue_event(struct machine_check_event *evt)
216 {
217 	int index;
218 
219 	index = __this_cpu_inc_return(mce_ue_count) - 1;
220 	/* If queue is full, just return for now. */
221 	if (index >= MAX_MC_EVT) {
222 		__this_cpu_dec(mce_ue_count);
223 		return;
224 	}
225 	memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
226 
227 	/* Queue work to process this event later. */
228 	schedule_work(&mce_ue_event_work);
229 }
230 
231 /*
232  * Queue up the MCE event which then can be handled later.
233  */
234 void machine_check_queue_event(void)
235 {
236 	int index;
237 	struct machine_check_event evt;
238 
239 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
240 		return;
241 
242 	index = __this_cpu_inc_return(mce_queue_count) - 1;
243 	/* If queue is full, just return for now. */
244 	if (index >= MAX_MC_EVT) {
245 		__this_cpu_dec(mce_queue_count);
246 		return;
247 	}
248 	memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
249 
250 	/* Queue irq work to process this event later. */
251 	irq_work_queue(&mce_event_process_work);
252 }
253 /*
254  * process pending MCE event from the mce event queue. This function will be
255  * called during syscall exit.
256  */
257 static void machine_process_ue_event(struct work_struct *work)
258 {
259 	int index;
260 	struct machine_check_event *evt;
261 
262 	while (__this_cpu_read(mce_ue_count) > 0) {
263 		index = __this_cpu_read(mce_ue_count) - 1;
264 		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
265 #ifdef CONFIG_MEMORY_FAILURE
266 		/*
267 		 * This should probably queued elsewhere, but
268 		 * oh! well
269 		 */
270 		if (evt->error_type == MCE_ERROR_TYPE_UE) {
271 			if (evt->u.ue_error.physical_address_provided) {
272 				unsigned long pfn;
273 
274 				pfn = evt->u.ue_error.physical_address >>
275 					PAGE_SHIFT;
276 				memory_failure(pfn, 0);
277 			} else
278 				pr_warn("Failed to identify bad address from "
279 					"where the uncorrectable error (UE) "
280 					"was generated\n");
281 		}
282 #endif
283 		__this_cpu_dec(mce_ue_count);
284 	}
285 }
286 /*
287  * process pending MCE event from the mce event queue. This function will be
288  * called during syscall exit.
289  */
290 static void machine_check_process_queued_event(struct irq_work *work)
291 {
292 	int index;
293 	struct machine_check_event *evt;
294 
295 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
296 
297 	/*
298 	 * For now just print it to console.
299 	 * TODO: log this error event to FSP or nvram.
300 	 */
301 	while (__this_cpu_read(mce_queue_count) > 0) {
302 		index = __this_cpu_read(mce_queue_count) - 1;
303 		evt = this_cpu_ptr(&mce_event_queue[index]);
304 		machine_check_print_event_info(evt, false);
305 		__this_cpu_dec(mce_queue_count);
306 	}
307 }
308 
309 void machine_check_print_event_info(struct machine_check_event *evt,
310 				    bool user_mode)
311 {
312 	const char *level, *sevstr, *subtype;
313 	static const char *mc_ue_types[] = {
314 		"Indeterminate",
315 		"Instruction fetch",
316 		"Page table walk ifetch",
317 		"Load/Store",
318 		"Page table walk Load/Store",
319 	};
320 	static const char *mc_slb_types[] = {
321 		"Indeterminate",
322 		"Parity",
323 		"Multihit",
324 	};
325 	static const char *mc_erat_types[] = {
326 		"Indeterminate",
327 		"Parity",
328 		"Multihit",
329 	};
330 	static const char *mc_tlb_types[] = {
331 		"Indeterminate",
332 		"Parity",
333 		"Multihit",
334 	};
335 	static const char *mc_user_types[] = {
336 		"Indeterminate",
337 		"tlbie(l) invalid",
338 	};
339 	static const char *mc_ra_types[] = {
340 		"Indeterminate",
341 		"Instruction fetch (bad)",
342 		"Instruction fetch (foreign)",
343 		"Page table walk ifetch (bad)",
344 		"Page table walk ifetch (foreign)",
345 		"Load (bad)",
346 		"Store (bad)",
347 		"Page table walk Load/Store (bad)",
348 		"Page table walk Load/Store (foreign)",
349 		"Load/Store (foreign)",
350 	};
351 	static const char *mc_link_types[] = {
352 		"Indeterminate",
353 		"Instruction fetch (timeout)",
354 		"Page table walk ifetch (timeout)",
355 		"Load (timeout)",
356 		"Store (timeout)",
357 		"Page table walk Load/Store (timeout)",
358 	};
359 
360 	/* Print things out */
361 	if (evt->version != MCE_V1) {
362 		pr_err("Machine Check Exception, Unknown event version %d !\n",
363 		       evt->version);
364 		return;
365 	}
366 	switch (evt->severity) {
367 	case MCE_SEV_NO_ERROR:
368 		level = KERN_INFO;
369 		sevstr = "Harmless";
370 		break;
371 	case MCE_SEV_WARNING:
372 		level = KERN_WARNING;
373 		sevstr = "";
374 		break;
375 	case MCE_SEV_ERROR_SYNC:
376 		level = KERN_ERR;
377 		sevstr = "Severe";
378 		break;
379 	case MCE_SEV_FATAL:
380 	default:
381 		level = KERN_ERR;
382 		sevstr = "Fatal";
383 		break;
384 	}
385 
386 	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
387 	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
388 	       "Recovered" : "Not recovered");
389 
390 	if (user_mode) {
391 		printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
392 			evt->srr0, current->pid, current->comm);
393 	} else {
394 		printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
395 		       (void *)evt->srr0);
396 	}
397 
398 	printk("%s  Initiator: %s\n", level,
399 	       evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
400 	switch (evt->error_type) {
401 	case MCE_ERROR_TYPE_UE:
402 		subtype = evt->u.ue_error.ue_error_type <
403 			ARRAY_SIZE(mc_ue_types) ?
404 			mc_ue_types[evt->u.ue_error.ue_error_type]
405 			: "Unknown";
406 		printk("%s  Error type: UE [%s]\n", level, subtype);
407 		if (evt->u.ue_error.effective_address_provided)
408 			printk("%s    Effective address: %016llx\n",
409 			       level, evt->u.ue_error.effective_address);
410 		if (evt->u.ue_error.physical_address_provided)
411 			printk("%s    Physical address:  %016llx\n",
412 			       level, evt->u.ue_error.physical_address);
413 		break;
414 	case MCE_ERROR_TYPE_SLB:
415 		subtype = evt->u.slb_error.slb_error_type <
416 			ARRAY_SIZE(mc_slb_types) ?
417 			mc_slb_types[evt->u.slb_error.slb_error_type]
418 			: "Unknown";
419 		printk("%s  Error type: SLB [%s]\n", level, subtype);
420 		if (evt->u.slb_error.effective_address_provided)
421 			printk("%s    Effective address: %016llx\n",
422 			       level, evt->u.slb_error.effective_address);
423 		break;
424 	case MCE_ERROR_TYPE_ERAT:
425 		subtype = evt->u.erat_error.erat_error_type <
426 			ARRAY_SIZE(mc_erat_types) ?
427 			mc_erat_types[evt->u.erat_error.erat_error_type]
428 			: "Unknown";
429 		printk("%s  Error type: ERAT [%s]\n", level, subtype);
430 		if (evt->u.erat_error.effective_address_provided)
431 			printk("%s    Effective address: %016llx\n",
432 			       level, evt->u.erat_error.effective_address);
433 		break;
434 	case MCE_ERROR_TYPE_TLB:
435 		subtype = evt->u.tlb_error.tlb_error_type <
436 			ARRAY_SIZE(mc_tlb_types) ?
437 			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
438 			: "Unknown";
439 		printk("%s  Error type: TLB [%s]\n", level, subtype);
440 		if (evt->u.tlb_error.effective_address_provided)
441 			printk("%s    Effective address: %016llx\n",
442 			       level, evt->u.tlb_error.effective_address);
443 		break;
444 	case MCE_ERROR_TYPE_USER:
445 		subtype = evt->u.user_error.user_error_type <
446 			ARRAY_SIZE(mc_user_types) ?
447 			mc_user_types[evt->u.user_error.user_error_type]
448 			: "Unknown";
449 		printk("%s  Error type: User [%s]\n", level, subtype);
450 		if (evt->u.user_error.effective_address_provided)
451 			printk("%s    Effective address: %016llx\n",
452 			       level, evt->u.user_error.effective_address);
453 		break;
454 	case MCE_ERROR_TYPE_RA:
455 		subtype = evt->u.ra_error.ra_error_type <
456 			ARRAY_SIZE(mc_ra_types) ?
457 			mc_ra_types[evt->u.ra_error.ra_error_type]
458 			: "Unknown";
459 		printk("%s  Error type: Real address [%s]\n", level, subtype);
460 		if (evt->u.ra_error.effective_address_provided)
461 			printk("%s    Effective address: %016llx\n",
462 			       level, evt->u.ra_error.effective_address);
463 		break;
464 	case MCE_ERROR_TYPE_LINK:
465 		subtype = evt->u.link_error.link_error_type <
466 			ARRAY_SIZE(mc_link_types) ?
467 			mc_link_types[evt->u.link_error.link_error_type]
468 			: "Unknown";
469 		printk("%s  Error type: Link [%s]\n", level, subtype);
470 		if (evt->u.link_error.effective_address_provided)
471 			printk("%s    Effective address: %016llx\n",
472 			       level, evt->u.link_error.effective_address);
473 		break;
474 	default:
475 	case MCE_ERROR_TYPE_UNKNOWN:
476 		printk("%s  Error type: Unknown\n", level);
477 		break;
478 	}
479 }
480 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
481 
482 /*
483  * This function is called in real mode. Strictly no printk's please.
484  *
485  * regs->nip and regs->msr contains srr0 and ssr1.
486  */
487 long machine_check_early(struct pt_regs *regs)
488 {
489 	long handled = 0;
490 
491 	__this_cpu_inc(irq_stat.mce_exceptions);
492 
493 	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
494 		handled = cur_cpu_spec->machine_check_early(regs);
495 	return handled;
496 }
497 
498 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
499 static enum {
500 	DTRIG_UNKNOWN,
501 	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
502 	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
503 } hmer_debug_trig_function;
504 
505 static int init_debug_trig_function(void)
506 {
507 	int pvr;
508 	struct device_node *cpun;
509 	struct property *prop = NULL;
510 	const char *str;
511 
512 	/* First look in the device tree */
513 	preempt_disable();
514 	cpun = of_get_cpu_node(smp_processor_id(), NULL);
515 	if (cpun) {
516 		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
517 					    prop, str) {
518 			if (strcmp(str, "bit17-vector-ci-load") == 0)
519 				hmer_debug_trig_function = DTRIG_VECTOR_CI;
520 			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
521 				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
522 		}
523 		of_node_put(cpun);
524 	}
525 	preempt_enable();
526 
527 	/* If we found the property, don't look at PVR */
528 	if (prop)
529 		goto out;
530 
531 	pvr = mfspr(SPRN_PVR);
532 	/* Check for POWER9 Nimbus (scale-out) */
533 	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
534 		/* DD2.2 and later */
535 		if ((pvr & 0xfff) >= 0x202)
536 			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
537 		/* DD2.0 and DD2.1 - used for vector CI load emulation */
538 		else if ((pvr & 0xfff) >= 0x200)
539 			hmer_debug_trig_function = DTRIG_VECTOR_CI;
540 	}
541 
542  out:
543 	switch (hmer_debug_trig_function) {
544 	case DTRIG_VECTOR_CI:
545 		pr_debug("HMI debug trigger used for vector CI load\n");
546 		break;
547 	case DTRIG_SUSPEND_ESCAPE:
548 		pr_debug("HMI debug trigger used for TM suspend escape\n");
549 		break;
550 	default:
551 		break;
552 	}
553 	return 0;
554 }
555 __initcall(init_debug_trig_function);
556 
557 /*
558  * Handle HMIs that occur as a result of a debug trigger.
559  * Return values:
560  * -1 means this is not a HMI cause that we know about
561  *  0 means no further handling is required
562  *  1 means further handling is required
563  */
564 long hmi_handle_debugtrig(struct pt_regs *regs)
565 {
566 	unsigned long hmer = mfspr(SPRN_HMER);
567 	long ret = 0;
568 
569 	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
570 	if (!((hmer & HMER_DEBUG_TRIG)
571 	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
572 		return -1;
573 
574 	hmer &= ~HMER_DEBUG_TRIG;
575 	/* HMER is a write-AND register */
576 	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
577 
578 	switch (hmer_debug_trig_function) {
579 	case DTRIG_VECTOR_CI:
580 		/*
581 		 * Now to avoid problems with soft-disable we
582 		 * only do the emulation if we are coming from
583 		 * host user space
584 		 */
585 		if (regs && user_mode(regs))
586 			ret = local_paca->hmi_p9_special_emu = 1;
587 
588 		break;
589 
590 	default:
591 		break;
592 	}
593 
594 	/*
595 	 * See if any other HMI causes remain to be handled
596 	 */
597 	if (hmer & mfspr(SPRN_HMEER))
598 		return -1;
599 
600 	return ret;
601 }
602 
603 /*
604  * Return values:
605  */
606 long hmi_exception_realmode(struct pt_regs *regs)
607 {
608 	int ret;
609 
610 	__this_cpu_inc(irq_stat.hmi_exceptions);
611 
612 	ret = hmi_handle_debugtrig(regs);
613 	if (ret >= 0)
614 		return ret;
615 
616 	wait_for_subcore_guest_exit();
617 
618 	if (ppc_md.hmi_exception_early)
619 		ppc_md.hmi_exception_early(regs);
620 
621 	wait_for_tb_resync();
622 
623 	return 1;
624 }
625