xref: /openbmc/linux/drivers/acpi/nfit/mce.c (revision 5d331b7f)
1 /*
2  * NFIT - Machine Check Handler
3  *
4  * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of version 2 of the GNU General Public License as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  */
15 #include <linux/notifier.h>
16 #include <linux/acpi.h>
17 #include <linux/nd.h>
18 #include <asm/mce.h>
19 #include "nfit.h"
20 
21 static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
22 			void *data)
23 {
24 	struct mce *mce = (struct mce *)data;
25 	struct acpi_nfit_desc *acpi_desc;
26 	struct nfit_spa *nfit_spa;
27 
28 	/* We only care about memory errors */
29 	if (!mce_is_memory_error(mce))
30 		return NOTIFY_DONE;
31 
32 	/*
33 	 * mce->addr contains the physical addr accessed that caused the
34 	 * machine check. We need to walk through the list of NFITs, and see
35 	 * if any of them matches that address, and only then start a scrub.
36 	 */
37 	mutex_lock(&acpi_desc_lock);
38 	list_for_each_entry(acpi_desc, &acpi_descs, list) {
39 		struct device *dev = acpi_desc->dev;
40 		int found_match = 0;
41 
42 		mutex_lock(&acpi_desc->init_mutex);
43 		list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
44 			struct acpi_nfit_system_address *spa = nfit_spa->spa;
45 
46 			if (nfit_spa_type(spa) != NFIT_SPA_PM)
47 				continue;
48 			/* find the spa that covers the mce addr */
49 			if (spa->address > mce->addr)
50 				continue;
51 			if ((spa->address + spa->length - 1) < mce->addr)
52 				continue;
53 			found_match = 1;
54 			dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
55 				spa->range_index, spa->address, spa->length);
56 			/*
57 			 * We can break at the first match because we're going
58 			 * to rescan all the SPA ranges. There shouldn't be any
59 			 * aliasing anyway.
60 			 */
61 			break;
62 		}
63 		mutex_unlock(&acpi_desc->init_mutex);
64 
65 		if (!found_match)
66 			continue;
67 
68 		/* If this fails due to an -ENOMEM, there is little we can do */
69 		nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
70 				ALIGN(mce->addr, L1_CACHE_BYTES),
71 				L1_CACHE_BYTES);
72 		nvdimm_region_notify(nfit_spa->nd_region,
73 				NVDIMM_REVALIDATE_POISON);
74 
75 		if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
76 			/*
77 			 * We can ignore an -EBUSY here because if an ARS is
78 			 * already in progress, just let that be the last
79 			 * authoritative one
80 			 */
81 			acpi_nfit_ars_rescan(acpi_desc, 0);
82 		}
83 		break;
84 	}
85 
86 	mutex_unlock(&acpi_desc_lock);
87 	return NOTIFY_DONE;
88 }
89 
90 static struct notifier_block nfit_mce_dec = {
91 	.notifier_call	= nfit_handle_mce,
92 	.priority	= MCE_PRIO_NFIT,
93 };
94 
95 void nfit_mce_register(void)
96 {
97 	mce_register_decode_chain(&nfit_mce_dec);
98 }
99 
100 void nfit_mce_unregister(void)
101 {
102 	mce_unregister_decode_chain(&nfit_mce_dec);
103 }
104