xref: /openbmc/linux/arch/powerpc/platforms/cell/ras.c (revision 4dc7ccf7)
1 /*
2  * Copyright 2006-2008, IBM Corporation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 
10 #undef DEBUG
11 
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/smp.h>
16 #include <linux/reboot.h>
17 #include <linux/kexec.h>
18 #include <linux/crash_dump.h>
19 
20 #include <asm/kexec.h>
21 #include <asm/reg.h>
22 #include <asm/io.h>
23 #include <asm/prom.h>
24 #include <asm/machdep.h>
25 #include <asm/rtas.h>
26 #include <asm/cell-regs.h>
27 
28 #include "ras.h"
29 
30 
31 static void dump_fir(int cpu)
32 {
33 	struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
34 	struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
35 
36 	if (pregs == NULL)
37 		return;
38 
39 	/* Todo: do some nicer parsing of bits and based on them go down
40 	 * to other sub-units FIRs and not only IIC
41 	 */
42 	printk(KERN_ERR "Global Checkstop FIR    : 0x%016llx\n",
43 	       in_be64(&pregs->checkstop_fir));
44 	printk(KERN_ERR "Global Recoverable FIR  : 0x%016llx\n",
45 	       in_be64(&pregs->checkstop_fir));
46 	printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n",
47 	       in_be64(&pregs->spec_att_mchk_fir));
48 
49 	if (iregs == NULL)
50 		return;
51 	printk(KERN_ERR "IOC FIR                 : 0x%016llx\n",
52 	       in_be64(&iregs->ioc_fir));
53 
54 }
55 
56 void cbe_system_error_exception(struct pt_regs *regs)
57 {
58 	int cpu = smp_processor_id();
59 
60 	printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
61 	dump_fir(cpu);
62 	dump_stack();
63 }
64 
65 void cbe_maintenance_exception(struct pt_regs *regs)
66 {
67 	int cpu = smp_processor_id();
68 
69 	/*
70 	 * Nothing implemented for the maintenance interrupt at this point
71 	 */
72 
73 	printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
74 	dump_stack();
75 }
76 
77 void cbe_thermal_exception(struct pt_regs *regs)
78 {
79 	int cpu = smp_processor_id();
80 
81 	/*
82 	 * Nothing implemented for the thermal interrupt at this point
83 	 */
84 
85 	printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
86 	dump_stack();
87 }
88 
89 static int cbe_machine_check_handler(struct pt_regs *regs)
90 {
91 	int cpu = smp_processor_id();
92 
93 	printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
94 	dump_fir(cpu);
95 
96 	/* No recovery from this code now, lets continue */
97 	return 0;
98 }
99 
100 struct ptcal_area {
101 	struct list_head list;
102 	int nid;
103 	int order;
104 	struct page *pages;
105 };
106 
107 static LIST_HEAD(ptcal_list);
108 
109 static int ptcal_start_tok, ptcal_stop_tok;
110 
111 static int __init cbe_ptcal_enable_on_node(int nid, int order)
112 {
113 	struct ptcal_area *area;
114 	int ret = -ENOMEM;
115 	unsigned long addr;
116 
117 	if (is_kdump_kernel())
118 		rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
119 
120 	area = kmalloc(sizeof(*area), GFP_KERNEL);
121 	if (!area)
122 		goto out_err;
123 
124 	area->nid = nid;
125 	area->order = order;
126 	area->pages = alloc_pages_exact_node(area->nid, GFP_KERNEL|GFP_THISNODE,
127 						area->order);
128 
129 	if (!area->pages) {
130 		printk(KERN_WARNING "%s: no page on node %d\n",
131 			__func__, area->nid);
132 		goto out_free_area;
133 	}
134 
135 	/*
136 	 * We move the ptcal area to the middle of the allocated
137 	 * page, in order to avoid prefetches in memcpy and similar
138 	 * functions stepping on it.
139 	 */
140 	addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1);
141 	printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n",
142 			__func__, area->nid, addr);
143 
144 	ret = -EIO;
145 	if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
146 				(unsigned int)(addr >> 32),
147 				(unsigned int)(addr & 0xffffffff))) {
148 		printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
149 				__func__, nid);
150 		goto out_free_pages;
151 	}
152 
153 	list_add(&area->list, &ptcal_list);
154 
155 	return 0;
156 
157 out_free_pages:
158 	__free_pages(area->pages, area->order);
159 out_free_area:
160 	kfree(area);
161 out_err:
162 	return ret;
163 }
164 
165 static int __init cbe_ptcal_enable(void)
166 {
167 	const u32 *size;
168 	struct device_node *np;
169 	int order, found_mic = 0;
170 
171 	np = of_find_node_by_path("/rtas");
172 	if (!np)
173 		return -ENODEV;
174 
175 	size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
176 	if (!size)
177 		return -ENODEV;
178 
179 	pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size);
180 	order = get_order(*size);
181 	of_node_put(np);
182 
183 	/* support for malta device trees, with be@/mic@ nodes */
184 	for_each_node_by_type(np, "mic-tm") {
185 		cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
186 		found_mic = 1;
187 	}
188 
189 	if (found_mic)
190 		return 0;
191 
192 	/* support for older device tree - use cpu nodes */
193 	for_each_node_by_type(np, "cpu") {
194 		const u32 *nid = of_get_property(np, "node-id", NULL);
195 		if (!nid) {
196 			printk(KERN_ERR "%s: node %s is missing node-id?\n",
197 					__func__, np->full_name);
198 			continue;
199 		}
200 		cbe_ptcal_enable_on_node(*nid, order);
201 		found_mic = 1;
202 	}
203 
204 	return found_mic ? 0 : -ENODEV;
205 }
206 
207 static int cbe_ptcal_disable(void)
208 {
209 	struct ptcal_area *area, *tmp;
210 	int ret = 0;
211 
212 	pr_debug("%s: disabling PTCAL\n", __func__);
213 
214 	list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
215 		/* disable ptcal on this node */
216 		if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
217 			printk(KERN_ERR "%s: error disabling PTCAL "
218 					"on node %d!\n", __func__,
219 					area->nid);
220 			ret = -EIO;
221 			continue;
222 		}
223 
224 		/* ensure we can access the PTCAL area */
225 		memset(page_address(area->pages), 0,
226 				1 << (area->order + PAGE_SHIFT));
227 
228 		/* clean up */
229 		list_del(&area->list);
230 		__free_pages(area->pages, area->order);
231 		kfree(area);
232 	}
233 
234 	return ret;
235 }
236 
237 static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
238 		unsigned long code, void *data)
239 {
240 	return cbe_ptcal_disable();
241 }
242 
243 static void cbe_ptcal_crash_shutdown(void)
244 {
245 	cbe_ptcal_disable();
246 }
247 
248 static struct notifier_block cbe_ptcal_reboot_notifier = {
249 	.notifier_call = cbe_ptcal_notify_reboot
250 };
251 
252 #ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
253 static int sysreset_hack;
254 
255 static int __init cbe_sysreset_init(void)
256 {
257 	struct cbe_pmd_regs __iomem *regs;
258 
259 	sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0");
260 	if (!sysreset_hack)
261 		return 0;
262 
263 	regs = cbe_get_cpu_pmd_regs(0);
264 	if (!regs)
265 		return 0;
266 
267 	/* Enable JTAG system-reset hack */
268 	out_be32(&regs->fir_mode_reg,
269 		in_be32(&regs->fir_mode_reg) |
270 		CBE_PMD_FIR_MODE_M8);
271 
272 	return 0;
273 }
274 device_initcall(cbe_sysreset_init);
275 
276 int cbe_sysreset_hack(void)
277 {
278 	struct cbe_pmd_regs __iomem *regs;
279 
280 	/*
281 	 * The BMC can inject user triggered system reset exceptions,
282 	 * but cannot set the system reset reason in srr1,
283 	 * so check an extra register here.
284 	 */
285 	if (sysreset_hack && (smp_processor_id() == 0)) {
286 		regs = cbe_get_cpu_pmd_regs(0);
287 		if (!regs)
288 			return 0;
289 		if (in_be64(&regs->ras_esc_0) & 0x0000ffff) {
290 			out_be64(&regs->ras_esc_0, 0);
291 			return 0;
292 		}
293 	}
294 	return 1;
295 }
296 #endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
297 
298 int __init cbe_ptcal_init(void)
299 {
300 	int ret;
301 	ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal");
302 	ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal");
303 
304 	if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
305 			|| ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
306 		return -ENODEV;
307 
308 	ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
309 	if (ret)
310 		goto out1;
311 
312 	ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown);
313 	if (ret)
314 		goto out2;
315 
316 	return cbe_ptcal_enable();
317 
318 out2:
319 	unregister_reboot_notifier(&cbe_ptcal_reboot_notifier);
320 out1:
321 	printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
322 	return ret;
323 }
324 
325 arch_initcall(cbe_ptcal_init);
326 
327 void __init cbe_ras_init(void)
328 {
329 	unsigned long hid0;
330 
331 	/*
332 	 * Enable System Error & thermal interrupts and wakeup conditions
333 	 */
334 
335 	hid0 = mfspr(SPRN_HID0);
336 	hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
337 		HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
338 	mtspr(SPRN_HID0, hid0);
339 	mb();
340 
341 	/*
342 	 * Install machine check handler. Leave setting of precise mode to
343 	 * what the firmware did for now
344 	 */
345 	ppc_md.machine_check_exception = cbe_machine_check_handler;
346 	mb();
347 
348 	/*
349 	 * For now, we assume that IOC_FIR is already set to forward some
350 	 * error conditions to the System Error handler. If that is not true
351 	 * then it will have to be fixed up here.
352 	 */
353 }
354