1 /* 2 * OPAL asynchronus Memory error handling support in PowerNV. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright 2013 IBM Corporation 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 20 */ 21 22 #undef DEBUG 23 24 #include <linux/kernel.h> 25 #include <linux/init.h> 26 #include <linux/of.h> 27 #include <linux/mm.h> 28 #include <linux/slab.h> 29 30 #include <asm/machdep.h> 31 #include <asm/opal.h> 32 #include <asm/cputable.h> 33 34 static int opal_mem_err_nb_init; 35 static LIST_HEAD(opal_memory_err_list); 36 static DEFINE_SPINLOCK(opal_mem_err_lock); 37 38 struct OpalMsgNode { 39 struct list_head list; 40 struct opal_msg msg; 41 }; 42 43 static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt) 44 { 45 uint64_t paddr_start, paddr_end; 46 47 pr_debug("%s: Retrieved memory error event, type: 0x%x\n", 48 __func__, merr_evt->type); 49 switch (merr_evt->type) { 50 case OPAL_MEM_ERR_TYPE_RESILIENCE: 51 paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start); 52 paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end); 53 break; 54 case OPAL_MEM_ERR_TYPE_DYN_DALLOC: 55 paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start); 56 paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end); 57 break; 58 default: 59 return; 60 } 61 62 for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) { 63 memory_failure(paddr_start >> PAGE_SHIFT, 0); 64 } 65 } 66 67 static void handle_memory_error(void) 68 { 69 unsigned long flags; 70 struct OpalMemoryErrorData *merr_evt; 71 struct OpalMsgNode *msg_node; 72 73 spin_lock_irqsave(&opal_mem_err_lock, flags); 74 while (!list_empty(&opal_memory_err_list)) { 75 msg_node = list_entry(opal_memory_err_list.next, 76 struct OpalMsgNode, list); 77 list_del(&msg_node->list); 78 spin_unlock_irqrestore(&opal_mem_err_lock, flags); 79 80 merr_evt = (struct OpalMemoryErrorData *) 81 &msg_node->msg.params[0]; 82 handle_memory_error_event(merr_evt); 83 kfree(msg_node); 84 spin_lock_irqsave(&opal_mem_err_lock, flags); 85 } 86 spin_unlock_irqrestore(&opal_mem_err_lock, flags); 87 } 88 89 static void mem_error_handler(struct work_struct *work) 90 { 91 handle_memory_error(); 92 } 93 94 static DECLARE_WORK(mem_error_work, mem_error_handler); 95 96 /* 97 * opal_memory_err_event - notifier handler that queues up the opal message 98 * to be preocessed later. 99 */ 100 static int opal_memory_err_event(struct notifier_block *nb, 101 unsigned long msg_type, void *msg) 102 { 103 unsigned long flags; 104 struct OpalMsgNode *msg_node; 105 106 if (msg_type != OPAL_MSG_MEM_ERR) 107 return 0; 108 109 msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); 110 if (!msg_node) { 111 pr_err("MEMORY_ERROR: out of memory, Opal message event not" 112 "handled\n"); 113 return -ENOMEM; 114 } 115 memcpy(&msg_node->msg, msg, sizeof(msg_node->msg)); 116 117 spin_lock_irqsave(&opal_mem_err_lock, flags); 118 list_add(&msg_node->list, &opal_memory_err_list); 119 spin_unlock_irqrestore(&opal_mem_err_lock, flags); 120 121 schedule_work(&mem_error_work); 122 return 0; 123 } 124 125 static struct notifier_block opal_mem_err_nb = { 126 .notifier_call = opal_memory_err_event, 127 .next = NULL, 128 .priority = 0, 129 }; 130 131 static int __init opal_mem_err_init(void) 132 { 133 int ret; 134 135 if (!opal_mem_err_nb_init) { 136 ret = opal_message_notifier_register( 137 OPAL_MSG_MEM_ERR, &opal_mem_err_nb); 138 if (ret) { 139 pr_err("%s: Can't register OPAL event notifier (%d)\n", 140 __func__, ret); 141 return ret; 142 } 143 opal_mem_err_nb_init = 1; 144 } 145 return 0; 146 } 147 machine_device_initcall(powernv, opal_mem_err_init); 148