175eb3d9bSMahesh Salgaonkar /* 2c28237f1SMarkus Elfring * OPAL asynchronus Memory error handling support in PowerNV. 375eb3d9bSMahesh Salgaonkar * 475eb3d9bSMahesh Salgaonkar * This program is free software; you can redistribute it and/or modify 575eb3d9bSMahesh Salgaonkar * it under the terms of the GNU General Public License as published by 675eb3d9bSMahesh Salgaonkar * the Free Software Foundation; either version 2 of the License, or 775eb3d9bSMahesh Salgaonkar * (at your option) any later version. 875eb3d9bSMahesh Salgaonkar * 975eb3d9bSMahesh Salgaonkar * This program is distributed in the hope that it will be useful, 1075eb3d9bSMahesh Salgaonkar * but WITHOUT ANY WARRANTY; without even the implied warranty of 1175eb3d9bSMahesh Salgaonkar * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1275eb3d9bSMahesh Salgaonkar * GNU General Public License for more details. 1375eb3d9bSMahesh Salgaonkar * 1475eb3d9bSMahesh Salgaonkar * You should have received a copy of the GNU General Public License 1575eb3d9bSMahesh Salgaonkar * along with this program; if not, write to the Free Software 1675eb3d9bSMahesh Salgaonkar * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 1775eb3d9bSMahesh Salgaonkar * 1875eb3d9bSMahesh Salgaonkar * Copyright 2013 IBM Corporation 1975eb3d9bSMahesh Salgaonkar * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 2075eb3d9bSMahesh Salgaonkar */ 2175eb3d9bSMahesh Salgaonkar 2275eb3d9bSMahesh Salgaonkar #undef DEBUG 2375eb3d9bSMahesh Salgaonkar 2475eb3d9bSMahesh Salgaonkar #include <linux/kernel.h> 2575eb3d9bSMahesh Salgaonkar #include <linux/init.h> 2675eb3d9bSMahesh Salgaonkar #include <linux/of.h> 2775eb3d9bSMahesh Salgaonkar #include <linux/mm.h> 2875eb3d9bSMahesh Salgaonkar #include <linux/slab.h> 2975eb3d9bSMahesh Salgaonkar 30b14726c5SMichael Ellerman #include <asm/machdep.h> 3175eb3d9bSMahesh Salgaonkar #include <asm/opal.h> 3275eb3d9bSMahesh Salgaonkar #include <asm/cputable.h> 3375eb3d9bSMahesh Salgaonkar 3475eb3d9bSMahesh Salgaonkar static int opal_mem_err_nb_init; 3575eb3d9bSMahesh Salgaonkar static LIST_HEAD(opal_memory_err_list); 3675eb3d9bSMahesh Salgaonkar static DEFINE_SPINLOCK(opal_mem_err_lock); 3775eb3d9bSMahesh Salgaonkar 3875eb3d9bSMahesh Salgaonkar struct OpalMsgNode { 3975eb3d9bSMahesh Salgaonkar struct list_head list; 4075eb3d9bSMahesh Salgaonkar struct opal_msg msg; 4175eb3d9bSMahesh Salgaonkar }; 4275eb3d9bSMahesh Salgaonkar 4375eb3d9bSMahesh Salgaonkar static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt) 4475eb3d9bSMahesh Salgaonkar { 4575eb3d9bSMahesh Salgaonkar uint64_t paddr_start, paddr_end; 4675eb3d9bSMahesh Salgaonkar 476e8a9279SColin Ian King pr_debug("%s: Retrieved memory error event, type: 0x%x\n", 4875eb3d9bSMahesh Salgaonkar __func__, merr_evt->type); 4975eb3d9bSMahesh Salgaonkar switch (merr_evt->type) { 5075eb3d9bSMahesh Salgaonkar case OPAL_MEM_ERR_TYPE_RESILIENCE: 51223ca9d8SAnton Blanchard paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start); 52223ca9d8SAnton Blanchard paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end); 5375eb3d9bSMahesh Salgaonkar break; 5475eb3d9bSMahesh Salgaonkar case OPAL_MEM_ERR_TYPE_DYN_DALLOC: 55223ca9d8SAnton Blanchard paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start); 56223ca9d8SAnton Blanchard paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end); 5775eb3d9bSMahesh Salgaonkar break; 5875eb3d9bSMahesh Salgaonkar default: 5975eb3d9bSMahesh Salgaonkar return; 6075eb3d9bSMahesh Salgaonkar } 6175eb3d9bSMahesh Salgaonkar 6275eb3d9bSMahesh Salgaonkar for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) { 6383b57531SEric W. Biederman memory_failure(paddr_start >> PAGE_SHIFT, 0); 6475eb3d9bSMahesh Salgaonkar } 6575eb3d9bSMahesh Salgaonkar } 6675eb3d9bSMahesh Salgaonkar 6775eb3d9bSMahesh Salgaonkar static void handle_memory_error(void) 6875eb3d9bSMahesh Salgaonkar { 6975eb3d9bSMahesh Salgaonkar unsigned long flags; 7075eb3d9bSMahesh Salgaonkar struct OpalMemoryErrorData *merr_evt; 7175eb3d9bSMahesh Salgaonkar struct OpalMsgNode *msg_node; 7275eb3d9bSMahesh Salgaonkar 7375eb3d9bSMahesh Salgaonkar spin_lock_irqsave(&opal_mem_err_lock, flags); 7475eb3d9bSMahesh Salgaonkar while (!list_empty(&opal_memory_err_list)) { 7575eb3d9bSMahesh Salgaonkar msg_node = list_entry(opal_memory_err_list.next, 7675eb3d9bSMahesh Salgaonkar struct OpalMsgNode, list); 7775eb3d9bSMahesh Salgaonkar list_del(&msg_node->list); 7875eb3d9bSMahesh Salgaonkar spin_unlock_irqrestore(&opal_mem_err_lock, flags); 7975eb3d9bSMahesh Salgaonkar 8075eb3d9bSMahesh Salgaonkar merr_evt = (struct OpalMemoryErrorData *) 8175eb3d9bSMahesh Salgaonkar &msg_node->msg.params[0]; 8275eb3d9bSMahesh Salgaonkar handle_memory_error_event(merr_evt); 8375eb3d9bSMahesh Salgaonkar kfree(msg_node); 8475eb3d9bSMahesh Salgaonkar spin_lock_irqsave(&opal_mem_err_lock, flags); 8575eb3d9bSMahesh Salgaonkar } 8675eb3d9bSMahesh Salgaonkar spin_unlock_irqrestore(&opal_mem_err_lock, flags); 8775eb3d9bSMahesh Salgaonkar } 8875eb3d9bSMahesh Salgaonkar 8975eb3d9bSMahesh Salgaonkar static void mem_error_handler(struct work_struct *work) 9075eb3d9bSMahesh Salgaonkar { 9175eb3d9bSMahesh Salgaonkar handle_memory_error(); 9275eb3d9bSMahesh Salgaonkar } 9375eb3d9bSMahesh Salgaonkar 9475eb3d9bSMahesh Salgaonkar static DECLARE_WORK(mem_error_work, mem_error_handler); 9575eb3d9bSMahesh Salgaonkar 9675eb3d9bSMahesh Salgaonkar /* 9775eb3d9bSMahesh Salgaonkar * opal_memory_err_event - notifier handler that queues up the opal message 9875eb3d9bSMahesh Salgaonkar * to be preocessed later. 9975eb3d9bSMahesh Salgaonkar */ 10075eb3d9bSMahesh Salgaonkar static int opal_memory_err_event(struct notifier_block *nb, 10175eb3d9bSMahesh Salgaonkar unsigned long msg_type, void *msg) 10275eb3d9bSMahesh Salgaonkar { 10375eb3d9bSMahesh Salgaonkar unsigned long flags; 10475eb3d9bSMahesh Salgaonkar struct OpalMsgNode *msg_node; 10575eb3d9bSMahesh Salgaonkar 10675eb3d9bSMahesh Salgaonkar if (msg_type != OPAL_MSG_MEM_ERR) 10775eb3d9bSMahesh Salgaonkar return 0; 10875eb3d9bSMahesh Salgaonkar 10975eb3d9bSMahesh Salgaonkar msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); 11075eb3d9bSMahesh Salgaonkar if (!msg_node) { 11175eb3d9bSMahesh Salgaonkar pr_err("MEMORY_ERROR: out of memory, Opal message event not" 11275eb3d9bSMahesh Salgaonkar "handled\n"); 11375eb3d9bSMahesh Salgaonkar return -ENOMEM; 11475eb3d9bSMahesh Salgaonkar } 115a0828cf5SMarkus Elfring memcpy(&msg_node->msg, msg, sizeof(msg_node->msg)); 11675eb3d9bSMahesh Salgaonkar 11775eb3d9bSMahesh Salgaonkar spin_lock_irqsave(&opal_mem_err_lock, flags); 11875eb3d9bSMahesh Salgaonkar list_add(&msg_node->list, &opal_memory_err_list); 11975eb3d9bSMahesh Salgaonkar spin_unlock_irqrestore(&opal_mem_err_lock, flags); 12075eb3d9bSMahesh Salgaonkar 12175eb3d9bSMahesh Salgaonkar schedule_work(&mem_error_work); 12275eb3d9bSMahesh Salgaonkar return 0; 12375eb3d9bSMahesh Salgaonkar } 12475eb3d9bSMahesh Salgaonkar 12575eb3d9bSMahesh Salgaonkar static struct notifier_block opal_mem_err_nb = { 12675eb3d9bSMahesh Salgaonkar .notifier_call = opal_memory_err_event, 12775eb3d9bSMahesh Salgaonkar .next = NULL, 12875eb3d9bSMahesh Salgaonkar .priority = 0, 12975eb3d9bSMahesh Salgaonkar }; 13075eb3d9bSMahesh Salgaonkar 13175eb3d9bSMahesh Salgaonkar static int __init opal_mem_err_init(void) 13275eb3d9bSMahesh Salgaonkar { 13375eb3d9bSMahesh Salgaonkar int ret; 13475eb3d9bSMahesh Salgaonkar 13575eb3d9bSMahesh Salgaonkar if (!opal_mem_err_nb_init) { 13675eb3d9bSMahesh Salgaonkar ret = opal_message_notifier_register( 13775eb3d9bSMahesh Salgaonkar OPAL_MSG_MEM_ERR, &opal_mem_err_nb); 13875eb3d9bSMahesh Salgaonkar if (ret) { 13975eb3d9bSMahesh Salgaonkar pr_err("%s: Can't register OPAL event notifier (%d)\n", 14075eb3d9bSMahesh Salgaonkar __func__, ret); 14175eb3d9bSMahesh Salgaonkar return ret; 14275eb3d9bSMahesh Salgaonkar } 14375eb3d9bSMahesh Salgaonkar opal_mem_err_nb_init = 1; 14475eb3d9bSMahesh Salgaonkar } 14575eb3d9bSMahesh Salgaonkar return 0; 14675eb3d9bSMahesh Salgaonkar } 14796e023e7SAlistair Popple machine_device_initcall(powernv, opal_mem_err_init); 148