1 /* 2 * NFIT - Machine Check Handler 3 * 4 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of version 2 of the GNU General Public License as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 */ 15 #include <linux/notifier.h> 16 #include <linux/acpi.h> 17 #include <linux/nd.h> 18 #include <asm/mce.h> 19 #include "nfit.h" 20 21 static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, 22 void *data) 23 { 24 struct mce *mce = (struct mce *)data; 25 struct acpi_nfit_desc *acpi_desc; 26 struct nfit_spa *nfit_spa; 27 28 /* We only care about uncorrectable memory errors */ 29 if (!mce_is_memory_error(mce) || mce_is_correctable(mce)) 30 return NOTIFY_DONE; 31 32 /* Verify the address reported in the MCE is valid. */ 33 if (!mce_usable_address(mce)) 34 return NOTIFY_DONE; 35 36 /* 37 * mce->addr contains the physical addr accessed that caused the 38 * machine check. We need to walk through the list of NFITs, and see 39 * if any of them matches that address, and only then start a scrub. 40 */ 41 mutex_lock(&acpi_desc_lock); 42 list_for_each_entry(acpi_desc, &acpi_descs, list) { 43 struct device *dev = acpi_desc->dev; 44 int found_match = 0; 45 46 mutex_lock(&acpi_desc->init_mutex); 47 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 48 struct acpi_nfit_system_address *spa = nfit_spa->spa; 49 50 if (nfit_spa_type(spa) != NFIT_SPA_PM) 51 continue; 52 /* find the spa that covers the mce addr */ 53 if (spa->address > mce->addr) 54 continue; 55 if ((spa->address + spa->length - 1) < mce->addr) 56 continue; 57 found_match = 1; 58 dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n", 59 spa->range_index, spa->address, spa->length); 60 /* 61 * We can break at the first match because we're going 62 * to rescan all the SPA ranges. There shouldn't be any 63 * aliasing anyway. 64 */ 65 break; 66 } 67 mutex_unlock(&acpi_desc->init_mutex); 68 69 if (!found_match) 70 continue; 71 72 /* If this fails due to an -ENOMEM, there is little we can do */ 73 nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus, 74 ALIGN(mce->addr, L1_CACHE_BYTES), 75 L1_CACHE_BYTES); 76 nvdimm_region_notify(nfit_spa->nd_region, 77 NVDIMM_REVALIDATE_POISON); 78 79 if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) { 80 /* 81 * We can ignore an -EBUSY here because if an ARS is 82 * already in progress, just let that be the last 83 * authoritative one 84 */ 85 acpi_nfit_ars_rescan(acpi_desc, 0); 86 } 87 break; 88 } 89 90 mutex_unlock(&acpi_desc_lock); 91 return NOTIFY_DONE; 92 } 93 94 static struct notifier_block nfit_mce_dec = { 95 .notifier_call = nfit_handle_mce, 96 .priority = MCE_PRIO_NFIT, 97 }; 98 99 void nfit_mce_register(void) 100 { 101 mce_register_decode_chain(&nfit_mce_dec); 102 } 103 104 void nfit_mce_unregister(void) 105 { 106 mce_unregister_decode_chain(&nfit_mce_dec); 107 } 108