11a59d1b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 275eb3d9bSMahesh Salgaonkar /* 3c28237f1SMarkus Elfring * OPAL asynchronus Memory error handling support in PowerNV. 475eb3d9bSMahesh Salgaonkar * 575eb3d9bSMahesh Salgaonkar * Copyright 2013 IBM Corporation 675eb3d9bSMahesh Salgaonkar * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 775eb3d9bSMahesh Salgaonkar */ 875eb3d9bSMahesh Salgaonkar 975eb3d9bSMahesh Salgaonkar #undef DEBUG 1075eb3d9bSMahesh Salgaonkar 1175eb3d9bSMahesh Salgaonkar #include <linux/kernel.h> 1275eb3d9bSMahesh Salgaonkar #include <linux/init.h> 1375eb3d9bSMahesh Salgaonkar #include <linux/of.h> 1475eb3d9bSMahesh Salgaonkar #include <linux/mm.h> 1575eb3d9bSMahesh Salgaonkar #include <linux/slab.h> 1675eb3d9bSMahesh Salgaonkar 17b14726c5SMichael Ellerman #include <asm/machdep.h> 1875eb3d9bSMahesh Salgaonkar #include <asm/opal.h> 1975eb3d9bSMahesh Salgaonkar #include <asm/cputable.h> 2075eb3d9bSMahesh Salgaonkar 2175eb3d9bSMahesh Salgaonkar static int opal_mem_err_nb_init; 2275eb3d9bSMahesh Salgaonkar static LIST_HEAD(opal_memory_err_list); 2375eb3d9bSMahesh Salgaonkar static DEFINE_SPINLOCK(opal_mem_err_lock); 2475eb3d9bSMahesh Salgaonkar 2575eb3d9bSMahesh Salgaonkar struct OpalMsgNode { 2675eb3d9bSMahesh Salgaonkar struct list_head list; 2775eb3d9bSMahesh Salgaonkar struct opal_msg msg; 2875eb3d9bSMahesh Salgaonkar }; 2975eb3d9bSMahesh Salgaonkar 3075eb3d9bSMahesh Salgaonkar static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt) 3175eb3d9bSMahesh Salgaonkar { 3275eb3d9bSMahesh Salgaonkar uint64_t paddr_start, paddr_end; 3375eb3d9bSMahesh Salgaonkar 346e8a9279SColin Ian King pr_debug("%s: Retrieved memory error event, type: 0x%x\n", 3575eb3d9bSMahesh Salgaonkar __func__, merr_evt->type); 3675eb3d9bSMahesh Salgaonkar switch (merr_evt->type) { 3775eb3d9bSMahesh Salgaonkar case OPAL_MEM_ERR_TYPE_RESILIENCE: 38223ca9d8SAnton Blanchard paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start); 39223ca9d8SAnton Blanchard paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end); 4075eb3d9bSMahesh Salgaonkar break; 4175eb3d9bSMahesh Salgaonkar case OPAL_MEM_ERR_TYPE_DYN_DALLOC: 42223ca9d8SAnton Blanchard paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start); 43223ca9d8SAnton Blanchard paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end); 4475eb3d9bSMahesh Salgaonkar break; 4575eb3d9bSMahesh Salgaonkar default: 4675eb3d9bSMahesh Salgaonkar return; 4775eb3d9bSMahesh Salgaonkar } 4875eb3d9bSMahesh Salgaonkar 4975eb3d9bSMahesh Salgaonkar for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) { 5083b57531SEric W. Biederman memory_failure(paddr_start >> PAGE_SHIFT, 0); 5175eb3d9bSMahesh Salgaonkar } 5275eb3d9bSMahesh Salgaonkar } 5375eb3d9bSMahesh Salgaonkar 5475eb3d9bSMahesh Salgaonkar static void handle_memory_error(void) 5575eb3d9bSMahesh Salgaonkar { 5675eb3d9bSMahesh Salgaonkar unsigned long flags; 5775eb3d9bSMahesh Salgaonkar struct OpalMemoryErrorData *merr_evt; 5875eb3d9bSMahesh Salgaonkar struct OpalMsgNode *msg_node; 5975eb3d9bSMahesh Salgaonkar 6075eb3d9bSMahesh Salgaonkar spin_lock_irqsave(&opal_mem_err_lock, flags); 6175eb3d9bSMahesh Salgaonkar while (!list_empty(&opal_memory_err_list)) { 6275eb3d9bSMahesh Salgaonkar msg_node = list_entry(opal_memory_err_list.next, 6375eb3d9bSMahesh Salgaonkar struct OpalMsgNode, list); 6475eb3d9bSMahesh Salgaonkar list_del(&msg_node->list); 6575eb3d9bSMahesh Salgaonkar spin_unlock_irqrestore(&opal_mem_err_lock, flags); 6675eb3d9bSMahesh Salgaonkar 6775eb3d9bSMahesh Salgaonkar merr_evt = (struct OpalMemoryErrorData *) 6875eb3d9bSMahesh Salgaonkar &msg_node->msg.params[0]; 6975eb3d9bSMahesh Salgaonkar handle_memory_error_event(merr_evt); 7075eb3d9bSMahesh Salgaonkar kfree(msg_node); 7175eb3d9bSMahesh Salgaonkar spin_lock_irqsave(&opal_mem_err_lock, flags); 7275eb3d9bSMahesh Salgaonkar } 7375eb3d9bSMahesh Salgaonkar spin_unlock_irqrestore(&opal_mem_err_lock, flags); 7475eb3d9bSMahesh Salgaonkar } 7575eb3d9bSMahesh Salgaonkar 7675eb3d9bSMahesh Salgaonkar static void mem_error_handler(struct work_struct *work) 7775eb3d9bSMahesh Salgaonkar { 7875eb3d9bSMahesh Salgaonkar handle_memory_error(); 7975eb3d9bSMahesh Salgaonkar } 8075eb3d9bSMahesh Salgaonkar 8175eb3d9bSMahesh Salgaonkar static DECLARE_WORK(mem_error_work, mem_error_handler); 8275eb3d9bSMahesh Salgaonkar 8375eb3d9bSMahesh Salgaonkar /* 8475eb3d9bSMahesh Salgaonkar * opal_memory_err_event - notifier handler that queues up the opal message 8575eb3d9bSMahesh Salgaonkar * to be preocessed later. 8675eb3d9bSMahesh Salgaonkar */ 8775eb3d9bSMahesh Salgaonkar static int opal_memory_err_event(struct notifier_block *nb, 8875eb3d9bSMahesh Salgaonkar unsigned long msg_type, void *msg) 8975eb3d9bSMahesh Salgaonkar { 9075eb3d9bSMahesh Salgaonkar unsigned long flags; 9175eb3d9bSMahesh Salgaonkar struct OpalMsgNode *msg_node; 9275eb3d9bSMahesh Salgaonkar 9375eb3d9bSMahesh Salgaonkar if (msg_type != OPAL_MSG_MEM_ERR) 9475eb3d9bSMahesh Salgaonkar return 0; 9575eb3d9bSMahesh Salgaonkar 9675eb3d9bSMahesh Salgaonkar msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); 9775eb3d9bSMahesh Salgaonkar if (!msg_node) { 9875eb3d9bSMahesh Salgaonkar pr_err("MEMORY_ERROR: out of memory, Opal message event not" 9975eb3d9bSMahesh Salgaonkar "handled\n"); 10075eb3d9bSMahesh Salgaonkar return -ENOMEM; 10175eb3d9bSMahesh Salgaonkar } 102a0828cf5SMarkus Elfring memcpy(&msg_node->msg, msg, sizeof(msg_node->msg)); 10375eb3d9bSMahesh Salgaonkar 10475eb3d9bSMahesh Salgaonkar spin_lock_irqsave(&opal_mem_err_lock, flags); 10575eb3d9bSMahesh Salgaonkar list_add(&msg_node->list, &opal_memory_err_list); 10675eb3d9bSMahesh Salgaonkar spin_unlock_irqrestore(&opal_mem_err_lock, flags); 10775eb3d9bSMahesh Salgaonkar 10875eb3d9bSMahesh Salgaonkar schedule_work(&mem_error_work); 10975eb3d9bSMahesh Salgaonkar return 0; 11075eb3d9bSMahesh Salgaonkar } 11175eb3d9bSMahesh Salgaonkar 11275eb3d9bSMahesh Salgaonkar static struct notifier_block opal_mem_err_nb = { 11375eb3d9bSMahesh Salgaonkar .notifier_call = opal_memory_err_event, 11475eb3d9bSMahesh Salgaonkar .next = NULL, 11575eb3d9bSMahesh Salgaonkar .priority = 0, 11675eb3d9bSMahesh Salgaonkar }; 11775eb3d9bSMahesh Salgaonkar 11875eb3d9bSMahesh Salgaonkar static int __init opal_mem_err_init(void) 11975eb3d9bSMahesh Salgaonkar { 12075eb3d9bSMahesh Salgaonkar int ret; 12175eb3d9bSMahesh Salgaonkar 12275eb3d9bSMahesh Salgaonkar if (!opal_mem_err_nb_init) { 12375eb3d9bSMahesh Salgaonkar ret = opal_message_notifier_register( 12475eb3d9bSMahesh Salgaonkar OPAL_MSG_MEM_ERR, &opal_mem_err_nb); 12575eb3d9bSMahesh Salgaonkar if (ret) { 12675eb3d9bSMahesh Salgaonkar pr_err("%s: Can't register OPAL event notifier (%d)\n", 12775eb3d9bSMahesh Salgaonkar __func__, ret); 12875eb3d9bSMahesh Salgaonkar return ret; 12975eb3d9bSMahesh Salgaonkar } 13075eb3d9bSMahesh Salgaonkar opal_mem_err_nb_init = 1; 13175eb3d9bSMahesh Salgaonkar } 13275eb3d9bSMahesh Salgaonkar return 0; 13375eb3d9bSMahesh Salgaonkar } 13496e023e7SAlistair Popple machine_device_initcall(powernv, opal_mem_err_init); 135