xref: /openbmc/linux/drivers/edac/mce_amd.c (revision 6774def6)
1 #include <linux/module.h>
2 #include <linux/slab.h>
3 
4 #include "mce_amd.h"
5 
6 static struct amd_decoder_ops *fam_ops;
7 
8 static u8 xec_mask	 = 0xf;
9 
10 static bool report_gart_errors;
11 static void (*nb_bus_decoder)(int node_id, struct mce *m);
12 
13 void amd_report_gart_errors(bool v)
14 {
15 	report_gart_errors = v;
16 }
17 EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18 
19 void amd_register_ecc_decoder(void (*f)(int, struct mce *))
20 {
21 	nb_bus_decoder = f;
22 }
23 EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24 
25 void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
26 {
27 	if (nb_bus_decoder) {
28 		WARN_ON(nb_bus_decoder != f);
29 
30 		nb_bus_decoder = NULL;
31 	}
32 }
33 EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
34 
35 /*
36  * string representation for the different MCA reported error types, see F3x48
37  * or MSR0000_0411.
38  */
39 
40 /* transaction type */
41 static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
42 
43 /* cache level */
44 static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
45 
46 /* memory transaction type */
47 static const char * const rrrr_msgs[] = {
48        "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
49 };
50 
51 /* participating processor */
52 const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
53 EXPORT_SYMBOL_GPL(pp_msgs);
54 
55 /* request timeout */
56 static const char * const to_msgs[] = { "no timeout", "timed out" };
57 
58 /* memory or i/o */
59 static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
60 
61 /* internal error type */
62 static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
63 
64 static const char * const f15h_mc1_mce_desc[] = {
65 	"UC during a demand linefill from L2",
66 	"Parity error during data load from IC",
67 	"Parity error for IC valid bit",
68 	"Main tag parity error",
69 	"Parity error in prediction queue",
70 	"PFB data/address parity error",
71 	"Parity error in the branch status reg",
72 	"PFB promotion address error",
73 	"Tag error during probe/victimization",
74 	"Parity error for IC probe tag valid bit",
75 	"PFB non-cacheable bit parity error",
76 	"PFB valid bit parity error",			/* xec = 0xd */
77 	"Microcode Patch Buffer",			/* xec = 010 */
78 	"uop queue",
79 	"insn buffer",
80 	"predecode buffer",
81 	"fetch address FIFO",
82 	"dispatch uop queue"
83 };
84 
85 static const char * const f15h_mc2_mce_desc[] = {
86 	"Fill ECC error on data fills",			/* xec = 0x4 */
87 	"Fill parity error on insn fills",
88 	"Prefetcher request FIFO parity error",
89 	"PRQ address parity error",
90 	"PRQ data parity error",
91 	"WCC Tag ECC error",
92 	"WCC Data ECC error",
93 	"WCB Data parity error",
94 	"VB Data ECC or parity error",
95 	"L2 Tag ECC error",				/* xec = 0x10 */
96 	"Hard L2 Tag ECC error",
97 	"Multiple hits on L2 tag",
98 	"XAB parity error",
99 	"PRB address parity error"
100 };
101 
102 static const char * const mc4_mce_desc[] = {
103 	"DRAM ECC error detected on the NB",
104 	"CRC error detected on HT link",
105 	"Link-defined sync error packets detected on HT link",
106 	"HT Master abort",
107 	"HT Target abort",
108 	"Invalid GART PTE entry during GART table walk",
109 	"Unsupported atomic RMW received from an IO link",
110 	"Watchdog timeout due to lack of progress",
111 	"DRAM ECC error detected on the NB",
112 	"SVM DMA Exclusion Vector error",
113 	"HT data error detected on link",
114 	"Protocol error (link, L3, probe filter)",
115 	"NB internal arrays parity error",
116 	"DRAM addr/ctl signals parity error",
117 	"IO link transmission error",
118 	"L3 data cache ECC error",			/* xec = 0x1c */
119 	"L3 cache tag error",
120 	"L3 LRU parity bits error",
121 	"ECC Error in the Probe Filter directory"
122 };
123 
124 static const char * const mc5_mce_desc[] = {
125 	"CPU Watchdog timer expire",
126 	"Wakeup array dest tag",
127 	"AG payload array",
128 	"EX payload array",
129 	"IDRF array",
130 	"Retire dispatch queue",
131 	"Mapper checkpoint array",
132 	"Physical register file EX0 port",
133 	"Physical register file EX1 port",
134 	"Physical register file AG0 port",
135 	"Physical register file AG1 port",
136 	"Flag register file",
137 	"DE error occurred",
138 	"Retire status queue"
139 };
140 
141 static bool f12h_mc0_mce(u16 ec, u8 xec)
142 {
143 	bool ret = false;
144 
145 	if (MEM_ERROR(ec)) {
146 		u8 ll = LL(ec);
147 		ret = true;
148 
149 		if (ll == LL_L2)
150 			pr_cont("during L1 linefill from L2.\n");
151 		else if (ll == LL_L1)
152 			pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
153 		else
154 			ret = false;
155 	}
156 	return ret;
157 }
158 
159 static bool f10h_mc0_mce(u16 ec, u8 xec)
160 {
161 	if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
162 		pr_cont("during data scrub.\n");
163 		return true;
164 	}
165 	return f12h_mc0_mce(ec, xec);
166 }
167 
168 static bool k8_mc0_mce(u16 ec, u8 xec)
169 {
170 	if (BUS_ERROR(ec)) {
171 		pr_cont("during system linefill.\n");
172 		return true;
173 	}
174 
175 	return f10h_mc0_mce(ec, xec);
176 }
177 
178 static bool cat_mc0_mce(u16 ec, u8 xec)
179 {
180 	u8 r4	 = R4(ec);
181 	bool ret = true;
182 
183 	if (MEM_ERROR(ec)) {
184 
185 		if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
186 			return false;
187 
188 		switch (r4) {
189 		case R4_DRD:
190 		case R4_DWR:
191 			pr_cont("Data/Tag parity error due to %s.\n",
192 				(r4 == R4_DRD ? "load/hw prf" : "store"));
193 			break;
194 		case R4_EVICT:
195 			pr_cont("Copyback parity error on a tag miss.\n");
196 			break;
197 		case R4_SNOOP:
198 			pr_cont("Tag parity error during snoop.\n");
199 			break;
200 		default:
201 			ret = false;
202 		}
203 	} else if (BUS_ERROR(ec)) {
204 
205 		if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
206 			return false;
207 
208 		pr_cont("System read data error on a ");
209 
210 		switch (r4) {
211 		case R4_RD:
212 			pr_cont("TLB reload.\n");
213 			break;
214 		case R4_DWR:
215 			pr_cont("store.\n");
216 			break;
217 		case R4_DRD:
218 			pr_cont("load.\n");
219 			break;
220 		default:
221 			ret = false;
222 		}
223 	} else {
224 		ret = false;
225 	}
226 
227 	return ret;
228 }
229 
230 static bool f15h_mc0_mce(u16 ec, u8 xec)
231 {
232 	bool ret = true;
233 
234 	if (MEM_ERROR(ec)) {
235 
236 		switch (xec) {
237 		case 0x0:
238 			pr_cont("Data Array access error.\n");
239 			break;
240 
241 		case 0x1:
242 			pr_cont("UC error during a linefill from L2/NB.\n");
243 			break;
244 
245 		case 0x2:
246 		case 0x11:
247 			pr_cont("STQ access error.\n");
248 			break;
249 
250 		case 0x3:
251 			pr_cont("SCB access error.\n");
252 			break;
253 
254 		case 0x10:
255 			pr_cont("Tag error.\n");
256 			break;
257 
258 		case 0x12:
259 			pr_cont("LDQ access error.\n");
260 			break;
261 
262 		default:
263 			ret = false;
264 		}
265 	} else if (BUS_ERROR(ec)) {
266 
267 		if (!xec)
268 			pr_cont("System Read Data Error.\n");
269 		else
270 			pr_cont(" Internal error condition type %d.\n", xec);
271 	} else if (INT_ERROR(ec)) {
272 		if (xec <= 0x1f)
273 			pr_cont("Hardware Assert.\n");
274 		else
275 			ret = false;
276 
277 	} else
278 		ret = false;
279 
280 	return ret;
281 }
282 
283 static void decode_mc0_mce(struct mce *m)
284 {
285 	u16 ec = EC(m->status);
286 	u8 xec = XEC(m->status, xec_mask);
287 
288 	pr_emerg(HW_ERR "MC0 Error: ");
289 
290 	/* TLB error signatures are the same across families */
291 	if (TLB_ERROR(ec)) {
292 		if (TT(ec) == TT_DATA) {
293 			pr_cont("%s TLB %s.\n", LL_MSG(ec),
294 				((xec == 2) ? "locked miss"
295 					    : (xec ? "multimatch" : "parity")));
296 			return;
297 		}
298 	} else if (fam_ops->mc0_mce(ec, xec))
299 		;
300 	else
301 		pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
302 }
303 
304 static bool k8_mc1_mce(u16 ec, u8 xec)
305 {
306 	u8 ll	 = LL(ec);
307 	bool ret = true;
308 
309 	if (!MEM_ERROR(ec))
310 		return false;
311 
312 	if (ll == 0x2)
313 		pr_cont("during a linefill from L2.\n");
314 	else if (ll == 0x1) {
315 		switch (R4(ec)) {
316 		case R4_IRD:
317 			pr_cont("Parity error during data load.\n");
318 			break;
319 
320 		case R4_EVICT:
321 			pr_cont("Copyback Parity/Victim error.\n");
322 			break;
323 
324 		case R4_SNOOP:
325 			pr_cont("Tag Snoop error.\n");
326 			break;
327 
328 		default:
329 			ret = false;
330 			break;
331 		}
332 	} else
333 		ret = false;
334 
335 	return ret;
336 }
337 
338 static bool cat_mc1_mce(u16 ec, u8 xec)
339 {
340 	u8 r4    = R4(ec);
341 	bool ret = true;
342 
343 	if (!MEM_ERROR(ec))
344 		return false;
345 
346 	if (TT(ec) != TT_INSTR)
347 		return false;
348 
349 	if (r4 == R4_IRD)
350 		pr_cont("Data/tag array parity error for a tag hit.\n");
351 	else if (r4 == R4_SNOOP)
352 		pr_cont("Tag error during snoop/victimization.\n");
353 	else if (xec == 0x0)
354 		pr_cont("Tag parity error from victim castout.\n");
355 	else if (xec == 0x2)
356 		pr_cont("Microcode patch RAM parity error.\n");
357 	else
358 		ret = false;
359 
360 	return ret;
361 }
362 
363 static bool f15h_mc1_mce(u16 ec, u8 xec)
364 {
365 	bool ret = true;
366 
367 	if (!MEM_ERROR(ec))
368 		return false;
369 
370 	switch (xec) {
371 	case 0x0 ... 0xa:
372 		pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
373 		break;
374 
375 	case 0xd:
376 		pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
377 		break;
378 
379 	case 0x10:
380 		pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
381 		break;
382 
383 	case 0x11 ... 0x15:
384 		pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
385 		break;
386 
387 	default:
388 		ret = false;
389 	}
390 	return ret;
391 }
392 
393 static void decode_mc1_mce(struct mce *m)
394 {
395 	u16 ec = EC(m->status);
396 	u8 xec = XEC(m->status, xec_mask);
397 
398 	pr_emerg(HW_ERR "MC1 Error: ");
399 
400 	if (TLB_ERROR(ec))
401 		pr_cont("%s TLB %s.\n", LL_MSG(ec),
402 			(xec ? "multimatch" : "parity error"));
403 	else if (BUS_ERROR(ec)) {
404 		bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
405 
406 		pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
407 	} else if (INT_ERROR(ec)) {
408 		if (xec <= 0x3f)
409 			pr_cont("Hardware Assert.\n");
410 		else
411 			goto wrong_mc1_mce;
412 	} else if (fam_ops->mc1_mce(ec, xec))
413 		;
414 	else
415 		goto wrong_mc1_mce;
416 
417 	return;
418 
419 wrong_mc1_mce:
420 	pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
421 }
422 
423 static bool k8_mc2_mce(u16 ec, u8 xec)
424 {
425 	bool ret = true;
426 
427 	if (xec == 0x1)
428 		pr_cont(" in the write data buffers.\n");
429 	else if (xec == 0x3)
430 		pr_cont(" in the victim data buffers.\n");
431 	else if (xec == 0x2 && MEM_ERROR(ec))
432 		pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
433 	else if (xec == 0x0) {
434 		if (TLB_ERROR(ec))
435 			pr_cont(": %s error in a Page Descriptor Cache or "
436 				"Guest TLB.\n", TT_MSG(ec));
437 		else if (BUS_ERROR(ec))
438 			pr_cont(": %s/ECC error in data read from NB: %s.\n",
439 				R4_MSG(ec), PP_MSG(ec));
440 		else if (MEM_ERROR(ec)) {
441 			u8 r4 = R4(ec);
442 
443 			if (r4 >= 0x7)
444 				pr_cont(": %s error during data copyback.\n",
445 					R4_MSG(ec));
446 			else if (r4 <= 0x1)
447 				pr_cont(": %s parity/ECC error during data "
448 					"access from L2.\n", R4_MSG(ec));
449 			else
450 				ret = false;
451 		} else
452 			ret = false;
453 	} else
454 		ret = false;
455 
456 	return ret;
457 }
458 
459 static bool f15h_mc2_mce(u16 ec, u8 xec)
460 {
461 	bool ret = true;
462 
463 	if (TLB_ERROR(ec)) {
464 		if (xec == 0x0)
465 			pr_cont("Data parity TLB read error.\n");
466 		else if (xec == 0x1)
467 			pr_cont("Poison data provided for TLB fill.\n");
468 		else
469 			ret = false;
470 	} else if (BUS_ERROR(ec)) {
471 		if (xec > 2)
472 			ret = false;
473 
474 		pr_cont("Error during attempted NB data read.\n");
475 	} else if (MEM_ERROR(ec)) {
476 		switch (xec) {
477 		case 0x4 ... 0xc:
478 			pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
479 			break;
480 
481 		case 0x10 ... 0x14:
482 			pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
483 			break;
484 
485 		default:
486 			ret = false;
487 		}
488 	} else if (INT_ERROR(ec)) {
489 		if (xec <= 0x3f)
490 			pr_cont("Hardware Assert.\n");
491 		else
492 			ret = false;
493 	}
494 
495 	return ret;
496 }
497 
498 static bool f16h_mc2_mce(u16 ec, u8 xec)
499 {
500 	u8 r4 = R4(ec);
501 
502 	if (!MEM_ERROR(ec))
503 		return false;
504 
505 	switch (xec) {
506 	case 0x04 ... 0x05:
507 		pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
508 		break;
509 
510 	case 0x09 ... 0x0b:
511 	case 0x0d ... 0x0f:
512 		pr_cont("ECC error in L2 tag (%s).\n",
513 			((r4 == R4_GEN)   ? "BankReq" :
514 			((r4 == R4_SNOOP) ? "Prb"     : "Fill")));
515 		break;
516 
517 	case 0x10 ... 0x19:
518 	case 0x1b:
519 		pr_cont("ECC error in L2 data array (%s).\n",
520 			(((r4 == R4_RD) && !(xec & 0x3)) ? "Hit"  :
521 			((r4 == R4_GEN)   ? "Attr" :
522 			((r4 == R4_EVICT) ? "Vict" : "Fill"))));
523 		break;
524 
525 	case 0x1c ... 0x1d:
526 	case 0x1f:
527 		pr_cont("Parity error in L2 attribute bits (%s).\n",
528 			((r4 == R4_RD)  ? "Hit"  :
529 			((r4 == R4_GEN) ? "Attr" : "Fill")));
530 		break;
531 
532 	default:
533 		return false;
534 	}
535 
536 	return true;
537 }
538 
539 static void decode_mc2_mce(struct mce *m)
540 {
541 	u16 ec = EC(m->status);
542 	u8 xec = XEC(m->status, xec_mask);
543 
544 	pr_emerg(HW_ERR "MC2 Error: ");
545 
546 	if (!fam_ops->mc2_mce(ec, xec))
547 		pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
548 }
549 
550 static void decode_mc3_mce(struct mce *m)
551 {
552 	u16 ec = EC(m->status);
553 	u8 xec = XEC(m->status, xec_mask);
554 
555 	if (boot_cpu_data.x86 >= 0x14) {
556 		pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
557 			 " please report on LKML.\n");
558 		return;
559 	}
560 
561 	pr_emerg(HW_ERR "MC3 Error");
562 
563 	if (xec == 0x0) {
564 		u8 r4 = R4(ec);
565 
566 		if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
567 			goto wrong_mc3_mce;
568 
569 		pr_cont(" during %s.\n", R4_MSG(ec));
570 	} else
571 		goto wrong_mc3_mce;
572 
573 	return;
574 
575  wrong_mc3_mce:
576 	pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
577 }
578 
579 static void decode_mc4_mce(struct mce *m)
580 {
581 	struct cpuinfo_x86 *c = &boot_cpu_data;
582 	int node_id = amd_get_nb_id(m->extcpu);
583 	u16 ec = EC(m->status);
584 	u8 xec = XEC(m->status, 0x1f);
585 	u8 offset = 0;
586 
587 	pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
588 
589 	switch (xec) {
590 	case 0x0 ... 0xe:
591 
592 		/* special handling for DRAM ECCs */
593 		if (xec == 0x0 || xec == 0x8) {
594 			/* no ECCs on F11h */
595 			if (c->x86 == 0x11)
596 				goto wrong_mc4_mce;
597 
598 			pr_cont("%s.\n", mc4_mce_desc[xec]);
599 
600 			if (nb_bus_decoder)
601 				nb_bus_decoder(node_id, m);
602 			return;
603 		}
604 		break;
605 
606 	case 0xf:
607 		if (TLB_ERROR(ec))
608 			pr_cont("GART Table Walk data error.\n");
609 		else if (BUS_ERROR(ec))
610 			pr_cont("DMA Exclusion Vector Table Walk error.\n");
611 		else
612 			goto wrong_mc4_mce;
613 		return;
614 
615 	case 0x19:
616 		if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
617 			pr_cont("Compute Unit Data Error.\n");
618 		else
619 			goto wrong_mc4_mce;
620 		return;
621 
622 	case 0x1c ... 0x1f:
623 		offset = 13;
624 		break;
625 
626 	default:
627 		goto wrong_mc4_mce;
628 	}
629 
630 	pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
631 	return;
632 
633  wrong_mc4_mce:
634 	pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
635 }
636 
637 static void decode_mc5_mce(struct mce *m)
638 {
639 	struct cpuinfo_x86 *c = &boot_cpu_data;
640 	u16 ec = EC(m->status);
641 	u8 xec = XEC(m->status, xec_mask);
642 
643 	if (c->x86 == 0xf || c->x86 == 0x11)
644 		goto wrong_mc5_mce;
645 
646 	pr_emerg(HW_ERR "MC5 Error: ");
647 
648 	if (INT_ERROR(ec)) {
649 		if (xec <= 0x1f) {
650 			pr_cont("Hardware Assert.\n");
651 			return;
652 		} else
653 			goto wrong_mc5_mce;
654 	}
655 
656 	if (xec == 0x0 || xec == 0xc)
657 		pr_cont("%s.\n", mc5_mce_desc[xec]);
658 	else if (xec <= 0xd)
659 		pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
660 	else
661 		goto wrong_mc5_mce;
662 
663 	return;
664 
665  wrong_mc5_mce:
666 	pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
667 }
668 
669 static void decode_mc6_mce(struct mce *m)
670 {
671 	u8 xec = XEC(m->status, xec_mask);
672 
673 	pr_emerg(HW_ERR "MC6 Error: ");
674 
675 	switch (xec) {
676 	case 0x0:
677 		pr_cont("Hardware Assertion");
678 		break;
679 
680 	case 0x1:
681 		pr_cont("Free List");
682 		break;
683 
684 	case 0x2:
685 		pr_cont("Physical Register File");
686 		break;
687 
688 	case 0x3:
689 		pr_cont("Retire Queue");
690 		break;
691 
692 	case 0x4:
693 		pr_cont("Scheduler table");
694 		break;
695 
696 	case 0x5:
697 		pr_cont("Status Register File");
698 		break;
699 
700 	default:
701 		goto wrong_mc6_mce;
702 		break;
703 	}
704 
705 	pr_cont(" parity error.\n");
706 
707 	return;
708 
709  wrong_mc6_mce:
710 	pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
711 }
712 
713 static inline void amd_decode_err_code(u16 ec)
714 {
715 	if (INT_ERROR(ec)) {
716 		pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
717 		return;
718 	}
719 
720 	pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
721 
722 	if (BUS_ERROR(ec))
723 		pr_cont(", mem/io: %s", II_MSG(ec));
724 	else
725 		pr_cont(", tx: %s", TT_MSG(ec));
726 
727 	if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
728 		pr_cont(", mem-tx: %s", R4_MSG(ec));
729 
730 		if (BUS_ERROR(ec))
731 			pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
732 	}
733 
734 	pr_cont("\n");
735 }
736 
737 /*
738  * Filter out unwanted MCE signatures here.
739  */
740 static bool amd_filter_mce(struct mce *m)
741 {
742 	u8 xec = (m->status >> 16) & 0x1f;
743 
744 	/*
745 	 * NB GART TLB error reporting is disabled by default.
746 	 */
747 	if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
748 		return true;
749 
750 	return false;
751 }
752 
753 static const char *decode_error_status(struct mce *m)
754 {
755 	if (m->status & MCI_STATUS_UC) {
756 		if (m->status & MCI_STATUS_PCC)
757 			return "System Fatal error.";
758 		if (m->mcgstatus & MCG_STATUS_RIPV)
759 			return "Uncorrected, software restartable error.";
760 		return "Uncorrected, software containable error.";
761 	}
762 
763 	if (m->status & MCI_STATUS_DEFERRED)
764 		return "Deferred error.";
765 
766 	return "Corrected error, no action required.";
767 }
768 
769 int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
770 {
771 	struct mce *m = (struct mce *)data;
772 	struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
773 	int ecc;
774 
775 	if (amd_filter_mce(m))
776 		return NOTIFY_STOP;
777 
778 	pr_emerg(HW_ERR "%s\n", decode_error_status(m));
779 
780 	pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
781 		m->extcpu,
782 		c->x86, c->x86_model, c->x86_mask,
783 		m->bank,
784 		((m->status & MCI_STATUS_OVER)	? "Over"  : "-"),
785 		((m->status & MCI_STATUS_UC)	? "UE"	  : "CE"),
786 		((m->status & MCI_STATUS_MISCV)	? "MiscV" : "-"),
787 		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"),
788 		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"));
789 
790 	if (c->x86 == 0x15 || c->x86 == 0x16)
791 		pr_cont("|%s|%s",
792 			((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
793 			((m->status & MCI_STATUS_POISON)   ? "Poison"   : "-"));
794 
795 	/* do the two bits[14:13] together */
796 	ecc = (m->status >> 45) & 0x3;
797 	if (ecc)
798 		pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
799 
800 	pr_cont("]: 0x%016llx\n", m->status);
801 
802 	if (m->status & MCI_STATUS_ADDRV)
803 		pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
804 
805 	if (!fam_ops)
806 		goto err_code;
807 
808 	switch (m->bank) {
809 	case 0:
810 		decode_mc0_mce(m);
811 		break;
812 
813 	case 1:
814 		decode_mc1_mce(m);
815 		break;
816 
817 	case 2:
818 		decode_mc2_mce(m);
819 		break;
820 
821 	case 3:
822 		decode_mc3_mce(m);
823 		break;
824 
825 	case 4:
826 		decode_mc4_mce(m);
827 		break;
828 
829 	case 5:
830 		decode_mc5_mce(m);
831 		break;
832 
833 	case 6:
834 		decode_mc6_mce(m);
835 		break;
836 
837 	default:
838 		break;
839 	}
840 
841  err_code:
842 	amd_decode_err_code(m->status & 0xffff);
843 
844 	return NOTIFY_STOP;
845 }
846 EXPORT_SYMBOL_GPL(amd_decode_mce);
847 
848 static struct notifier_block amd_mce_dec_nb = {
849 	.notifier_call	= amd_decode_mce,
850 };
851 
852 static int __init mce_amd_init(void)
853 {
854 	struct cpuinfo_x86 *c = &boot_cpu_data;
855 
856 	if (c->x86_vendor != X86_VENDOR_AMD)
857 		return -ENODEV;
858 
859 	fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
860 	if (!fam_ops)
861 		return -ENOMEM;
862 
863 	switch (c->x86) {
864 	case 0xf:
865 		fam_ops->mc0_mce = k8_mc0_mce;
866 		fam_ops->mc1_mce = k8_mc1_mce;
867 		fam_ops->mc2_mce = k8_mc2_mce;
868 		break;
869 
870 	case 0x10:
871 		fam_ops->mc0_mce = f10h_mc0_mce;
872 		fam_ops->mc1_mce = k8_mc1_mce;
873 		fam_ops->mc2_mce = k8_mc2_mce;
874 		break;
875 
876 	case 0x11:
877 		fam_ops->mc0_mce = k8_mc0_mce;
878 		fam_ops->mc1_mce = k8_mc1_mce;
879 		fam_ops->mc2_mce = k8_mc2_mce;
880 		break;
881 
882 	case 0x12:
883 		fam_ops->mc0_mce = f12h_mc0_mce;
884 		fam_ops->mc1_mce = k8_mc1_mce;
885 		fam_ops->mc2_mce = k8_mc2_mce;
886 		break;
887 
888 	case 0x14:
889 		fam_ops->mc0_mce = cat_mc0_mce;
890 		fam_ops->mc1_mce = cat_mc1_mce;
891 		fam_ops->mc2_mce = k8_mc2_mce;
892 		break;
893 
894 	case 0x15:
895 		xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
896 
897 		fam_ops->mc0_mce = f15h_mc0_mce;
898 		fam_ops->mc1_mce = f15h_mc1_mce;
899 		fam_ops->mc2_mce = f15h_mc2_mce;
900 		break;
901 
902 	case 0x16:
903 		xec_mask = 0x1f;
904 		fam_ops->mc0_mce = cat_mc0_mce;
905 		fam_ops->mc1_mce = cat_mc1_mce;
906 		fam_ops->mc2_mce = f16h_mc2_mce;
907 		break;
908 
909 	default:
910 		printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
911 		kfree(fam_ops);
912 		fam_ops = NULL;
913 	}
914 
915 	pr_info("MCE: In-kernel MCE decoding enabled.\n");
916 
917 	mce_register_decode_chain(&amd_mce_dec_nb);
918 
919 	return 0;
920 }
921 early_initcall(mce_amd_init);
922 
923 #ifdef MODULE
924 static void __exit mce_amd_exit(void)
925 {
926 	mce_unregister_decode_chain(&amd_mce_dec_nb);
927 	kfree(fam_ops);
928 }
929 
930 MODULE_DESCRIPTION("AMD MCE decoder");
931 MODULE_ALIAS("edac-mce-amd");
932 MODULE_LICENSE("GPL");
933 module_exit(mce_amd_exit);
934 #endif
935