xref: /openbmc/linux/drivers/edac/mce_amd.c (revision eb3fcf007fffe5830d815e713591f3e858f2a365)
1 #include <linux/module.h>
2 #include <linux/slab.h>
3 
4 #include "mce_amd.h"
5 
6 static struct amd_decoder_ops *fam_ops;
7 
8 static u8 xec_mask	 = 0xf;
9 
10 static bool report_gart_errors;
11 static void (*nb_bus_decoder)(int node_id, struct mce *m);
12 
13 void amd_report_gart_errors(bool v)
14 {
15 	report_gart_errors = v;
16 }
17 EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18 
19 void amd_register_ecc_decoder(void (*f)(int, struct mce *))
20 {
21 	nb_bus_decoder = f;
22 }
23 EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24 
25 void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
26 {
27 	if (nb_bus_decoder) {
28 		WARN_ON(nb_bus_decoder != f);
29 
30 		nb_bus_decoder = NULL;
31 	}
32 }
33 EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
34 
35 /*
36  * string representation for the different MCA reported error types, see F3x48
37  * or MSR0000_0411.
38  */
39 
40 /* transaction type */
41 static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
42 
43 /* cache level */
44 static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
45 
46 /* memory transaction type */
47 static const char * const rrrr_msgs[] = {
48        "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
49 };
50 
51 /* participating processor */
52 const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
53 EXPORT_SYMBOL_GPL(pp_msgs);
54 
55 /* request timeout */
56 static const char * const to_msgs[] = { "no timeout", "timed out" };
57 
58 /* memory or i/o */
59 static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
60 
61 /* internal error type */
62 static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
63 
64 static const char * const f15h_mc1_mce_desc[] = {
65 	"UC during a demand linefill from L2",
66 	"Parity error during data load from IC",
67 	"Parity error for IC valid bit",
68 	"Main tag parity error",
69 	"Parity error in prediction queue",
70 	"PFB data/address parity error",
71 	"Parity error in the branch status reg",
72 	"PFB promotion address error",
73 	"Tag error during probe/victimization",
74 	"Parity error for IC probe tag valid bit",
75 	"PFB non-cacheable bit parity error",
76 	"PFB valid bit parity error",			/* xec = 0xd */
77 	"Microcode Patch Buffer",			/* xec = 010 */
78 	"uop queue",
79 	"insn buffer",
80 	"predecode buffer",
81 	"fetch address FIFO",
82 	"dispatch uop queue"
83 };
84 
85 static const char * const f15h_mc2_mce_desc[] = {
86 	"Fill ECC error on data fills",			/* xec = 0x4 */
87 	"Fill parity error on insn fills",
88 	"Prefetcher request FIFO parity error",
89 	"PRQ address parity error",
90 	"PRQ data parity error",
91 	"WCC Tag ECC error",
92 	"WCC Data ECC error",
93 	"WCB Data parity error",
94 	"VB Data ECC or parity error",
95 	"L2 Tag ECC error",				/* xec = 0x10 */
96 	"Hard L2 Tag ECC error",
97 	"Multiple hits on L2 tag",
98 	"XAB parity error",
99 	"PRB address parity error"
100 };
101 
102 static const char * const mc4_mce_desc[] = {
103 	"DRAM ECC error detected on the NB",
104 	"CRC error detected on HT link",
105 	"Link-defined sync error packets detected on HT link",
106 	"HT Master abort",
107 	"HT Target abort",
108 	"Invalid GART PTE entry during GART table walk",
109 	"Unsupported atomic RMW received from an IO link",
110 	"Watchdog timeout due to lack of progress",
111 	"DRAM ECC error detected on the NB",
112 	"SVM DMA Exclusion Vector error",
113 	"HT data error detected on link",
114 	"Protocol error (link, L3, probe filter)",
115 	"NB internal arrays parity error",
116 	"DRAM addr/ctl signals parity error",
117 	"IO link transmission error",
118 	"L3 data cache ECC error",			/* xec = 0x1c */
119 	"L3 cache tag error",
120 	"L3 LRU parity bits error",
121 	"ECC Error in the Probe Filter directory"
122 };
123 
124 static const char * const mc5_mce_desc[] = {
125 	"CPU Watchdog timer expire",
126 	"Wakeup array dest tag",
127 	"AG payload array",
128 	"EX payload array",
129 	"IDRF array",
130 	"Retire dispatch queue",
131 	"Mapper checkpoint array",
132 	"Physical register file EX0 port",
133 	"Physical register file EX1 port",
134 	"Physical register file AG0 port",
135 	"Physical register file AG1 port",
136 	"Flag register file",
137 	"DE error occurred",
138 	"Retire status queue"
139 };
140 
141 static const char * const mc6_mce_desc[] = {
142 	"Hardware Assertion",
143 	"Free List",
144 	"Physical Register File",
145 	"Retire Queue",
146 	"Scheduler table",
147 	"Status Register File",
148 };
149 
150 static bool f12h_mc0_mce(u16 ec, u8 xec)
151 {
152 	bool ret = false;
153 
154 	if (MEM_ERROR(ec)) {
155 		u8 ll = LL(ec);
156 		ret = true;
157 
158 		if (ll == LL_L2)
159 			pr_cont("during L1 linefill from L2.\n");
160 		else if (ll == LL_L1)
161 			pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
162 		else
163 			ret = false;
164 	}
165 	return ret;
166 }
167 
168 static bool f10h_mc0_mce(u16 ec, u8 xec)
169 {
170 	if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
171 		pr_cont("during data scrub.\n");
172 		return true;
173 	}
174 	return f12h_mc0_mce(ec, xec);
175 }
176 
177 static bool k8_mc0_mce(u16 ec, u8 xec)
178 {
179 	if (BUS_ERROR(ec)) {
180 		pr_cont("during system linefill.\n");
181 		return true;
182 	}
183 
184 	return f10h_mc0_mce(ec, xec);
185 }
186 
187 static bool cat_mc0_mce(u16 ec, u8 xec)
188 {
189 	u8 r4	 = R4(ec);
190 	bool ret = true;
191 
192 	if (MEM_ERROR(ec)) {
193 
194 		if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
195 			return false;
196 
197 		switch (r4) {
198 		case R4_DRD:
199 		case R4_DWR:
200 			pr_cont("Data/Tag parity error due to %s.\n",
201 				(r4 == R4_DRD ? "load/hw prf" : "store"));
202 			break;
203 		case R4_EVICT:
204 			pr_cont("Copyback parity error on a tag miss.\n");
205 			break;
206 		case R4_SNOOP:
207 			pr_cont("Tag parity error during snoop.\n");
208 			break;
209 		default:
210 			ret = false;
211 		}
212 	} else if (BUS_ERROR(ec)) {
213 
214 		if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
215 			return false;
216 
217 		pr_cont("System read data error on a ");
218 
219 		switch (r4) {
220 		case R4_RD:
221 			pr_cont("TLB reload.\n");
222 			break;
223 		case R4_DWR:
224 			pr_cont("store.\n");
225 			break;
226 		case R4_DRD:
227 			pr_cont("load.\n");
228 			break;
229 		default:
230 			ret = false;
231 		}
232 	} else {
233 		ret = false;
234 	}
235 
236 	return ret;
237 }
238 
239 static bool f15h_mc0_mce(u16 ec, u8 xec)
240 {
241 	bool ret = true;
242 
243 	if (MEM_ERROR(ec)) {
244 
245 		switch (xec) {
246 		case 0x0:
247 			pr_cont("Data Array access error.\n");
248 			break;
249 
250 		case 0x1:
251 			pr_cont("UC error during a linefill from L2/NB.\n");
252 			break;
253 
254 		case 0x2:
255 		case 0x11:
256 			pr_cont("STQ access error.\n");
257 			break;
258 
259 		case 0x3:
260 			pr_cont("SCB access error.\n");
261 			break;
262 
263 		case 0x10:
264 			pr_cont("Tag error.\n");
265 			break;
266 
267 		case 0x12:
268 			pr_cont("LDQ access error.\n");
269 			break;
270 
271 		default:
272 			ret = false;
273 		}
274 	} else if (BUS_ERROR(ec)) {
275 
276 		if (!xec)
277 			pr_cont("System Read Data Error.\n");
278 		else
279 			pr_cont(" Internal error condition type %d.\n", xec);
280 	} else if (INT_ERROR(ec)) {
281 		if (xec <= 0x1f)
282 			pr_cont("Hardware Assert.\n");
283 		else
284 			ret = false;
285 
286 	} else
287 		ret = false;
288 
289 	return ret;
290 }
291 
292 static void decode_mc0_mce(struct mce *m)
293 {
294 	u16 ec = EC(m->status);
295 	u8 xec = XEC(m->status, xec_mask);
296 
297 	pr_emerg(HW_ERR "MC0 Error: ");
298 
299 	/* TLB error signatures are the same across families */
300 	if (TLB_ERROR(ec)) {
301 		if (TT(ec) == TT_DATA) {
302 			pr_cont("%s TLB %s.\n", LL_MSG(ec),
303 				((xec == 2) ? "locked miss"
304 					    : (xec ? "multimatch" : "parity")));
305 			return;
306 		}
307 	} else if (fam_ops->mc0_mce(ec, xec))
308 		;
309 	else
310 		pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
311 }
312 
313 static bool k8_mc1_mce(u16 ec, u8 xec)
314 {
315 	u8 ll	 = LL(ec);
316 	bool ret = true;
317 
318 	if (!MEM_ERROR(ec))
319 		return false;
320 
321 	if (ll == 0x2)
322 		pr_cont("during a linefill from L2.\n");
323 	else if (ll == 0x1) {
324 		switch (R4(ec)) {
325 		case R4_IRD:
326 			pr_cont("Parity error during data load.\n");
327 			break;
328 
329 		case R4_EVICT:
330 			pr_cont("Copyback Parity/Victim error.\n");
331 			break;
332 
333 		case R4_SNOOP:
334 			pr_cont("Tag Snoop error.\n");
335 			break;
336 
337 		default:
338 			ret = false;
339 			break;
340 		}
341 	} else
342 		ret = false;
343 
344 	return ret;
345 }
346 
347 static bool cat_mc1_mce(u16 ec, u8 xec)
348 {
349 	u8 r4    = R4(ec);
350 	bool ret = true;
351 
352 	if (!MEM_ERROR(ec))
353 		return false;
354 
355 	if (TT(ec) != TT_INSTR)
356 		return false;
357 
358 	if (r4 == R4_IRD)
359 		pr_cont("Data/tag array parity error for a tag hit.\n");
360 	else if (r4 == R4_SNOOP)
361 		pr_cont("Tag error during snoop/victimization.\n");
362 	else if (xec == 0x0)
363 		pr_cont("Tag parity error from victim castout.\n");
364 	else if (xec == 0x2)
365 		pr_cont("Microcode patch RAM parity error.\n");
366 	else
367 		ret = false;
368 
369 	return ret;
370 }
371 
372 static bool f15h_mc1_mce(u16 ec, u8 xec)
373 {
374 	bool ret = true;
375 
376 	if (!MEM_ERROR(ec))
377 		return false;
378 
379 	switch (xec) {
380 	case 0x0 ... 0xa:
381 		pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
382 		break;
383 
384 	case 0xd:
385 		pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
386 		break;
387 
388 	case 0x10:
389 		pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
390 		break;
391 
392 	case 0x11 ... 0x15:
393 		pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
394 		break;
395 
396 	default:
397 		ret = false;
398 	}
399 	return ret;
400 }
401 
402 static void decode_mc1_mce(struct mce *m)
403 {
404 	u16 ec = EC(m->status);
405 	u8 xec = XEC(m->status, xec_mask);
406 
407 	pr_emerg(HW_ERR "MC1 Error: ");
408 
409 	if (TLB_ERROR(ec))
410 		pr_cont("%s TLB %s.\n", LL_MSG(ec),
411 			(xec ? "multimatch" : "parity error"));
412 	else if (BUS_ERROR(ec)) {
413 		bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
414 
415 		pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
416 	} else if (INT_ERROR(ec)) {
417 		if (xec <= 0x3f)
418 			pr_cont("Hardware Assert.\n");
419 		else
420 			goto wrong_mc1_mce;
421 	} else if (fam_ops->mc1_mce(ec, xec))
422 		;
423 	else
424 		goto wrong_mc1_mce;
425 
426 	return;
427 
428 wrong_mc1_mce:
429 	pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
430 }
431 
432 static bool k8_mc2_mce(u16 ec, u8 xec)
433 {
434 	bool ret = true;
435 
436 	if (xec == 0x1)
437 		pr_cont(" in the write data buffers.\n");
438 	else if (xec == 0x3)
439 		pr_cont(" in the victim data buffers.\n");
440 	else if (xec == 0x2 && MEM_ERROR(ec))
441 		pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
442 	else if (xec == 0x0) {
443 		if (TLB_ERROR(ec))
444 			pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
445 				TT_MSG(ec));
446 		else if (BUS_ERROR(ec))
447 			pr_cont(": %s/ECC error in data read from NB: %s.\n",
448 				R4_MSG(ec), PP_MSG(ec));
449 		else if (MEM_ERROR(ec)) {
450 			u8 r4 = R4(ec);
451 
452 			if (r4 >= 0x7)
453 				pr_cont(": %s error during data copyback.\n",
454 					R4_MSG(ec));
455 			else if (r4 <= 0x1)
456 				pr_cont(": %s parity/ECC error during data "
457 					"access from L2.\n", R4_MSG(ec));
458 			else
459 				ret = false;
460 		} else
461 			ret = false;
462 	} else
463 		ret = false;
464 
465 	return ret;
466 }
467 
468 static bool f15h_mc2_mce(u16 ec, u8 xec)
469 {
470 	bool ret = true;
471 
472 	if (TLB_ERROR(ec)) {
473 		if (xec == 0x0)
474 			pr_cont("Data parity TLB read error.\n");
475 		else if (xec == 0x1)
476 			pr_cont("Poison data provided for TLB fill.\n");
477 		else
478 			ret = false;
479 	} else if (BUS_ERROR(ec)) {
480 		if (xec > 2)
481 			ret = false;
482 
483 		pr_cont("Error during attempted NB data read.\n");
484 	} else if (MEM_ERROR(ec)) {
485 		switch (xec) {
486 		case 0x4 ... 0xc:
487 			pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
488 			break;
489 
490 		case 0x10 ... 0x14:
491 			pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
492 			break;
493 
494 		default:
495 			ret = false;
496 		}
497 	} else if (INT_ERROR(ec)) {
498 		if (xec <= 0x3f)
499 			pr_cont("Hardware Assert.\n");
500 		else
501 			ret = false;
502 	}
503 
504 	return ret;
505 }
506 
507 static bool f16h_mc2_mce(u16 ec, u8 xec)
508 {
509 	u8 r4 = R4(ec);
510 
511 	if (!MEM_ERROR(ec))
512 		return false;
513 
514 	switch (xec) {
515 	case 0x04 ... 0x05:
516 		pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
517 		break;
518 
519 	case 0x09 ... 0x0b:
520 	case 0x0d ... 0x0f:
521 		pr_cont("ECC error in L2 tag (%s).\n",
522 			((r4 == R4_GEN)   ? "BankReq" :
523 			((r4 == R4_SNOOP) ? "Prb"     : "Fill")));
524 		break;
525 
526 	case 0x10 ... 0x19:
527 	case 0x1b:
528 		pr_cont("ECC error in L2 data array (%s).\n",
529 			(((r4 == R4_RD) && !(xec & 0x3)) ? "Hit"  :
530 			((r4 == R4_GEN)   ? "Attr" :
531 			((r4 == R4_EVICT) ? "Vict" : "Fill"))));
532 		break;
533 
534 	case 0x1c ... 0x1d:
535 	case 0x1f:
536 		pr_cont("Parity error in L2 attribute bits (%s).\n",
537 			((r4 == R4_RD)  ? "Hit"  :
538 			((r4 == R4_GEN) ? "Attr" : "Fill")));
539 		break;
540 
541 	default:
542 		return false;
543 	}
544 
545 	return true;
546 }
547 
548 static void decode_mc2_mce(struct mce *m)
549 {
550 	u16 ec = EC(m->status);
551 	u8 xec = XEC(m->status, xec_mask);
552 
553 	pr_emerg(HW_ERR "MC2 Error: ");
554 
555 	if (!fam_ops->mc2_mce(ec, xec))
556 		pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
557 }
558 
559 static void decode_mc3_mce(struct mce *m)
560 {
561 	u16 ec = EC(m->status);
562 	u8 xec = XEC(m->status, xec_mask);
563 
564 	if (boot_cpu_data.x86 >= 0x14) {
565 		pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
566 			 " please report on LKML.\n");
567 		return;
568 	}
569 
570 	pr_emerg(HW_ERR "MC3 Error");
571 
572 	if (xec == 0x0) {
573 		u8 r4 = R4(ec);
574 
575 		if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
576 			goto wrong_mc3_mce;
577 
578 		pr_cont(" during %s.\n", R4_MSG(ec));
579 	} else
580 		goto wrong_mc3_mce;
581 
582 	return;
583 
584  wrong_mc3_mce:
585 	pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
586 }
587 
588 static void decode_mc4_mce(struct mce *m)
589 {
590 	struct cpuinfo_x86 *c = &boot_cpu_data;
591 	int node_id = amd_get_nb_id(m->extcpu);
592 	u16 ec = EC(m->status);
593 	u8 xec = XEC(m->status, 0x1f);
594 	u8 offset = 0;
595 
596 	pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
597 
598 	switch (xec) {
599 	case 0x0 ... 0xe:
600 
601 		/* special handling for DRAM ECCs */
602 		if (xec == 0x0 || xec == 0x8) {
603 			/* no ECCs on F11h */
604 			if (c->x86 == 0x11)
605 				goto wrong_mc4_mce;
606 
607 			pr_cont("%s.\n", mc4_mce_desc[xec]);
608 
609 			if (nb_bus_decoder)
610 				nb_bus_decoder(node_id, m);
611 			return;
612 		}
613 		break;
614 
615 	case 0xf:
616 		if (TLB_ERROR(ec))
617 			pr_cont("GART Table Walk data error.\n");
618 		else if (BUS_ERROR(ec))
619 			pr_cont("DMA Exclusion Vector Table Walk error.\n");
620 		else
621 			goto wrong_mc4_mce;
622 		return;
623 
624 	case 0x19:
625 		if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
626 			pr_cont("Compute Unit Data Error.\n");
627 		else
628 			goto wrong_mc4_mce;
629 		return;
630 
631 	case 0x1c ... 0x1f:
632 		offset = 13;
633 		break;
634 
635 	default:
636 		goto wrong_mc4_mce;
637 	}
638 
639 	pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
640 	return;
641 
642  wrong_mc4_mce:
643 	pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
644 }
645 
646 static void decode_mc5_mce(struct mce *m)
647 {
648 	struct cpuinfo_x86 *c = &boot_cpu_data;
649 	u16 ec = EC(m->status);
650 	u8 xec = XEC(m->status, xec_mask);
651 
652 	if (c->x86 == 0xf || c->x86 == 0x11)
653 		goto wrong_mc5_mce;
654 
655 	pr_emerg(HW_ERR "MC5 Error: ");
656 
657 	if (INT_ERROR(ec)) {
658 		if (xec <= 0x1f) {
659 			pr_cont("Hardware Assert.\n");
660 			return;
661 		} else
662 			goto wrong_mc5_mce;
663 	}
664 
665 	if (xec == 0x0 || xec == 0xc)
666 		pr_cont("%s.\n", mc5_mce_desc[xec]);
667 	else if (xec <= 0xd)
668 		pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
669 	else
670 		goto wrong_mc5_mce;
671 
672 	return;
673 
674  wrong_mc5_mce:
675 	pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
676 }
677 
678 static void decode_mc6_mce(struct mce *m)
679 {
680 	u8 xec = XEC(m->status, xec_mask);
681 
682 	pr_emerg(HW_ERR "MC6 Error: ");
683 
684 	if (xec > 0x5)
685 		goto wrong_mc6_mce;
686 
687 	pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
688 	return;
689 
690  wrong_mc6_mce:
691 	pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
692 }
693 
694 static inline void amd_decode_err_code(u16 ec)
695 {
696 	if (INT_ERROR(ec)) {
697 		pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
698 		return;
699 	}
700 
701 	pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
702 
703 	if (BUS_ERROR(ec))
704 		pr_cont(", mem/io: %s", II_MSG(ec));
705 	else
706 		pr_cont(", tx: %s", TT_MSG(ec));
707 
708 	if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
709 		pr_cont(", mem-tx: %s", R4_MSG(ec));
710 
711 		if (BUS_ERROR(ec))
712 			pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
713 	}
714 
715 	pr_cont("\n");
716 }
717 
718 /*
719  * Filter out unwanted MCE signatures here.
720  */
721 static bool amd_filter_mce(struct mce *m)
722 {
723 	u8 xec = (m->status >> 16) & 0x1f;
724 
725 	/*
726 	 * NB GART TLB error reporting is disabled by default.
727 	 */
728 	if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
729 		return true;
730 
731 	return false;
732 }
733 
734 static const char *decode_error_status(struct mce *m)
735 {
736 	if (m->status & MCI_STATUS_UC) {
737 		if (m->status & MCI_STATUS_PCC)
738 			return "System Fatal error.";
739 		if (m->mcgstatus & MCG_STATUS_RIPV)
740 			return "Uncorrected, software restartable error.";
741 		return "Uncorrected, software containable error.";
742 	}
743 
744 	if (m->status & MCI_STATUS_DEFERRED)
745 		return "Deferred error.";
746 
747 	return "Corrected error, no action required.";
748 }
749 
750 int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
751 {
752 	struct mce *m = (struct mce *)data;
753 	struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
754 	int ecc;
755 
756 	if (amd_filter_mce(m))
757 		return NOTIFY_STOP;
758 
759 	pr_emerg(HW_ERR "%s\n", decode_error_status(m));
760 
761 	pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
762 		m->extcpu,
763 		c->x86, c->x86_model, c->x86_mask,
764 		m->bank,
765 		((m->status & MCI_STATUS_OVER)	? "Over"  : "-"),
766 		((m->status & MCI_STATUS_UC)	? "UE"	  :
767 		 (m->status & MCI_STATUS_DEFERRED) ? "-"  : "CE"),
768 		((m->status & MCI_STATUS_MISCV)	? "MiscV" : "-"),
769 		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"),
770 		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"));
771 
772 	if (c->x86 == 0x15 || c->x86 == 0x16)
773 		pr_cont("|%s|%s",
774 			((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
775 			((m->status & MCI_STATUS_POISON)   ? "Poison"   : "-"));
776 
777 	/* do the two bits[14:13] together */
778 	ecc = (m->status >> 45) & 0x3;
779 	if (ecc)
780 		pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
781 
782 	pr_cont("]: 0x%016llx\n", m->status);
783 
784 	if (m->status & MCI_STATUS_ADDRV)
785 		pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
786 
787 	if (!fam_ops)
788 		goto err_code;
789 
790 	switch (m->bank) {
791 	case 0:
792 		decode_mc0_mce(m);
793 		break;
794 
795 	case 1:
796 		decode_mc1_mce(m);
797 		break;
798 
799 	case 2:
800 		decode_mc2_mce(m);
801 		break;
802 
803 	case 3:
804 		decode_mc3_mce(m);
805 		break;
806 
807 	case 4:
808 		decode_mc4_mce(m);
809 		break;
810 
811 	case 5:
812 		decode_mc5_mce(m);
813 		break;
814 
815 	case 6:
816 		decode_mc6_mce(m);
817 		break;
818 
819 	default:
820 		break;
821 	}
822 
823  err_code:
824 	amd_decode_err_code(m->status & 0xffff);
825 
826 	return NOTIFY_STOP;
827 }
828 EXPORT_SYMBOL_GPL(amd_decode_mce);
829 
830 static struct notifier_block amd_mce_dec_nb = {
831 	.notifier_call	= amd_decode_mce,
832 };
833 
834 static int __init mce_amd_init(void)
835 {
836 	struct cpuinfo_x86 *c = &boot_cpu_data;
837 
838 	if (c->x86_vendor != X86_VENDOR_AMD)
839 		return -ENODEV;
840 
841 	fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
842 	if (!fam_ops)
843 		return -ENOMEM;
844 
845 	switch (c->x86) {
846 	case 0xf:
847 		fam_ops->mc0_mce = k8_mc0_mce;
848 		fam_ops->mc1_mce = k8_mc1_mce;
849 		fam_ops->mc2_mce = k8_mc2_mce;
850 		break;
851 
852 	case 0x10:
853 		fam_ops->mc0_mce = f10h_mc0_mce;
854 		fam_ops->mc1_mce = k8_mc1_mce;
855 		fam_ops->mc2_mce = k8_mc2_mce;
856 		break;
857 
858 	case 0x11:
859 		fam_ops->mc0_mce = k8_mc0_mce;
860 		fam_ops->mc1_mce = k8_mc1_mce;
861 		fam_ops->mc2_mce = k8_mc2_mce;
862 		break;
863 
864 	case 0x12:
865 		fam_ops->mc0_mce = f12h_mc0_mce;
866 		fam_ops->mc1_mce = k8_mc1_mce;
867 		fam_ops->mc2_mce = k8_mc2_mce;
868 		break;
869 
870 	case 0x14:
871 		fam_ops->mc0_mce = cat_mc0_mce;
872 		fam_ops->mc1_mce = cat_mc1_mce;
873 		fam_ops->mc2_mce = k8_mc2_mce;
874 		break;
875 
876 	case 0x15:
877 		xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
878 
879 		fam_ops->mc0_mce = f15h_mc0_mce;
880 		fam_ops->mc1_mce = f15h_mc1_mce;
881 		fam_ops->mc2_mce = f15h_mc2_mce;
882 		break;
883 
884 	case 0x16:
885 		xec_mask = 0x1f;
886 		fam_ops->mc0_mce = cat_mc0_mce;
887 		fam_ops->mc1_mce = cat_mc1_mce;
888 		fam_ops->mc2_mce = f16h_mc2_mce;
889 		break;
890 
891 	default:
892 		printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
893 		kfree(fam_ops);
894 		fam_ops = NULL;
895 	}
896 
897 	pr_info("MCE: In-kernel MCE decoding enabled.\n");
898 
899 	mce_register_decode_chain(&amd_mce_dec_nb);
900 
901 	return 0;
902 }
903 early_initcall(mce_amd_init);
904 
905 #ifdef MODULE
906 static void __exit mce_amd_exit(void)
907 {
908 	mce_unregister_decode_chain(&amd_mce_dec_nb);
909 	kfree(fam_ops);
910 }
911 
912 MODULE_DESCRIPTION("AMD MCE decoder");
913 MODULE_ALIAS("edac-mce-amd");
914 MODULE_LICENSE("GPL");
915 module_exit(mce_amd_exit);
916 #endif
917