xref: /openbmc/linux/arch/powerpc/platforms/powernv/opal-memory-errors.c (revision 4f727ecefefbd180de10e25b3e74c03dce3f1e75)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * OPAL asynchronus Memory error handling support in PowerNV.
4  *
5  * Copyright 2013 IBM Corporation
6  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7  */
8 
9 #undef DEBUG
10 
11 #include <linux/kernel.h>
12 #include <linux/init.h>
13 #include <linux/of.h>
14 #include <linux/mm.h>
15 #include <linux/slab.h>
16 
17 #include <asm/machdep.h>
18 #include <asm/opal.h>
19 #include <asm/cputable.h>
20 
21 static int opal_mem_err_nb_init;
22 static LIST_HEAD(opal_memory_err_list);
23 static DEFINE_SPINLOCK(opal_mem_err_lock);
24 
25 struct OpalMsgNode {
26 	struct list_head list;
27 	struct opal_msg msg;
28 };
29 
30 static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt)
31 {
32 	uint64_t paddr_start, paddr_end;
33 
34 	pr_debug("%s: Retrieved memory error event, type: 0x%x\n",
35 		  __func__, merr_evt->type);
36 	switch (merr_evt->type) {
37 	case OPAL_MEM_ERR_TYPE_RESILIENCE:
38 		paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start);
39 		paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end);
40 		break;
41 	case OPAL_MEM_ERR_TYPE_DYN_DALLOC:
42 		paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start);
43 		paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end);
44 		break;
45 	default:
46 		return;
47 	}
48 
49 	for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) {
50 		memory_failure(paddr_start >> PAGE_SHIFT, 0);
51 	}
52 }
53 
54 static void handle_memory_error(void)
55 {
56 	unsigned long flags;
57 	struct OpalMemoryErrorData *merr_evt;
58 	struct OpalMsgNode *msg_node;
59 
60 	spin_lock_irqsave(&opal_mem_err_lock, flags);
61 	while (!list_empty(&opal_memory_err_list)) {
62 		 msg_node = list_entry(opal_memory_err_list.next,
63 					   struct OpalMsgNode, list);
64 		list_del(&msg_node->list);
65 		spin_unlock_irqrestore(&opal_mem_err_lock, flags);
66 
67 		merr_evt = (struct OpalMemoryErrorData *)
68 					&msg_node->msg.params[0];
69 		handle_memory_error_event(merr_evt);
70 		kfree(msg_node);
71 		spin_lock_irqsave(&opal_mem_err_lock, flags);
72 	}
73 	spin_unlock_irqrestore(&opal_mem_err_lock, flags);
74 }
75 
76 static void mem_error_handler(struct work_struct *work)
77 {
78 	handle_memory_error();
79 }
80 
81 static DECLARE_WORK(mem_error_work, mem_error_handler);
82 
83 /*
84  * opal_memory_err_event - notifier handler that queues up the opal message
85  * to be preocessed later.
86  */
87 static int opal_memory_err_event(struct notifier_block *nb,
88 			  unsigned long msg_type, void *msg)
89 {
90 	unsigned long flags;
91 	struct OpalMsgNode *msg_node;
92 
93 	if (msg_type != OPAL_MSG_MEM_ERR)
94 		return 0;
95 
96 	msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
97 	if (!msg_node) {
98 		pr_err("MEMORY_ERROR: out of memory, Opal message event not"
99 		       "handled\n");
100 		return -ENOMEM;
101 	}
102 	memcpy(&msg_node->msg, msg, sizeof(msg_node->msg));
103 
104 	spin_lock_irqsave(&opal_mem_err_lock, flags);
105 	list_add(&msg_node->list, &opal_memory_err_list);
106 	spin_unlock_irqrestore(&opal_mem_err_lock, flags);
107 
108 	schedule_work(&mem_error_work);
109 	return 0;
110 }
111 
112 static struct notifier_block opal_mem_err_nb = {
113 	.notifier_call	= opal_memory_err_event,
114 	.next		= NULL,
115 	.priority	= 0,
116 };
117 
118 static int __init opal_mem_err_init(void)
119 {
120 	int ret;
121 
122 	if (!opal_mem_err_nb_init) {
123 		ret = opal_message_notifier_register(
124 					OPAL_MSG_MEM_ERR, &opal_mem_err_nb);
125 		if (ret) {
126 			pr_err("%s: Can't register OPAL event notifier (%d)\n",
127 			       __func__, ret);
128 			return ret;
129 		}
130 		opal_mem_err_nb_init = 1;
131 	}
132 	return 0;
133 }
134 machine_device_initcall(powernv, opal_mem_err_init);
135