xref: /openbmc/linux/arch/mips/dec/ecc-berr.c (revision 2874c5fd)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Bus error event handling code for systems equipped with ECC
4  *	handling logic, i.e. DECstation/DECsystem 5000/200 (KN02),
5  *	5000/240 (KN03), 5000/260 (KN05) and DECsystem 5900 (KN03),
6  *	5900/260 (KN05) systems.
7  *
8  *	Copyright (c) 2003, 2005  Maciej W. Rozycki
9  */
10 
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/kernel.h>
14 #include <linux/sched.h>
15 #include <linux/types.h>
16 
17 #include <asm/addrspace.h>
18 #include <asm/bootinfo.h>
19 #include <asm/cpu.h>
20 #include <asm/cpu-type.h>
21 #include <asm/irq_regs.h>
22 #include <asm/processor.h>
23 #include <asm/ptrace.h>
24 #include <asm/traps.h>
25 
26 #include <asm/dec/ecc.h>
27 #include <asm/dec/kn02.h>
28 #include <asm/dec/kn03.h>
29 #include <asm/dec/kn05.h>
30 
31 static volatile u32 *kn0x_erraddr;
32 static volatile u32 *kn0x_chksyn;
33 
dec_ecc_be_ack(void)34 static inline void dec_ecc_be_ack(void)
35 {
36 	*kn0x_erraddr = 0;			/* any write clears the IRQ */
37 	iob();
38 }
39 
dec_ecc_be_backend(struct pt_regs * regs,int is_fixup,int invoker)40 static int dec_ecc_be_backend(struct pt_regs *regs, int is_fixup, int invoker)
41 {
42 	static const char excstr[] = "exception";
43 	static const char intstr[] = "interrupt";
44 	static const char cpustr[] = "CPU";
45 	static const char dmastr[] = "DMA";
46 	static const char readstr[] = "read";
47 	static const char mreadstr[] = "memory read";
48 	static const char writestr[] = "write";
49 	static const char mwritstr[] = "partial memory write";
50 	static const char timestr[] = "timeout";
51 	static const char overstr[] = "overrun";
52 	static const char eccstr[] = "ECC error";
53 
54 	const char *kind, *agent, *cycle, *event;
55 	const char *status = "", *xbit = "", *fmt = "";
56 	unsigned long address;
57 	u16 syn = 0, sngl;
58 
59 	int i = 0;
60 
61 	u32 erraddr = *kn0x_erraddr;
62 	u32 chksyn = *kn0x_chksyn;
63 	int action = MIPS_BE_FATAL;
64 
65 	/* For non-ECC ack ASAP, so that any subsequent errors get caught. */
66 	if ((erraddr & (KN0X_EAR_VALID | KN0X_EAR_ECCERR)) == KN0X_EAR_VALID)
67 		dec_ecc_be_ack();
68 
69 	kind = invoker ? intstr : excstr;
70 
71 	if (!(erraddr & KN0X_EAR_VALID)) {
72 		/* No idea what happened. */
73 		printk(KERN_ALERT "Unidentified bus error %s\n", kind);
74 		return action;
75 	}
76 
77 	agent = (erraddr & KN0X_EAR_CPU) ? cpustr : dmastr;
78 
79 	if (erraddr & KN0X_EAR_ECCERR) {
80 		/* An ECC error on a CPU or DMA transaction. */
81 		cycle = (erraddr & KN0X_EAR_WRITE) ? mwritstr : mreadstr;
82 		event = eccstr;
83 	} else {
84 		/* A CPU timeout or a DMA overrun. */
85 		cycle = (erraddr & KN0X_EAR_WRITE) ? writestr : readstr;
86 		event = (erraddr & KN0X_EAR_CPU) ? timestr : overstr;
87 	}
88 
89 	address = erraddr & KN0X_EAR_ADDRESS;
90 	/* For ECC errors on reads adjust for MT pipelining. */
91 	if ((erraddr & (KN0X_EAR_WRITE | KN0X_EAR_ECCERR)) == KN0X_EAR_ECCERR)
92 		address = (address & ~0xfffLL) | ((address - 5) & 0xfffLL);
93 	address <<= 2;
94 
95 	/* Only CPU errors are fixable. */
96 	if (erraddr & KN0X_EAR_CPU && is_fixup)
97 		action = MIPS_BE_FIXUP;
98 
99 	if (erraddr & KN0X_EAR_ECCERR) {
100 		static const u8 data_sbit[32] = {
101 			0x4f, 0x4a, 0x52, 0x54, 0x57, 0x58, 0x5b, 0x5d,
102 			0x23, 0x25, 0x26, 0x29, 0x2a, 0x2c, 0x31, 0x34,
103 			0x0e, 0x0b, 0x13, 0x15, 0x16, 0x19, 0x1a, 0x1c,
104 			0x62, 0x64, 0x67, 0x68, 0x6b, 0x6d, 0x70, 0x75,
105 		};
106 		static const u8 data_mbit[25] = {
107 			0x07, 0x0d, 0x1f,
108 			0x2f, 0x32, 0x37, 0x38, 0x3b, 0x3d, 0x3e,
109 			0x43, 0x45, 0x46, 0x49, 0x4c, 0x51, 0x5e,
110 			0x61, 0x6e, 0x73, 0x76, 0x79, 0x7a, 0x7c, 0x7f,
111 		};
112 		static const char sbestr[] = "corrected single";
113 		static const char dbestr[] = "uncorrectable double";
114 		static const char mbestr[] = "uncorrectable multiple";
115 
116 		if (!(address & 0x4))
117 			syn = chksyn;			/* Low bank. */
118 		else
119 			syn = chksyn >> 16;		/* High bank. */
120 
121 		if (!(syn & KN0X_ESR_VLDLO)) {
122 			/* Ack now, no rewrite will happen. */
123 			dec_ecc_be_ack();
124 
125 			fmt = KERN_ALERT "%s" "invalid\n";
126 		} else {
127 			sngl = syn & KN0X_ESR_SNGLO;
128 			syn &= KN0X_ESR_SYNLO;
129 
130 			/*
131 			 * Multibit errors may be tagged incorrectly;
132 			 * check the syndrome explicitly.
133 			 */
134 			for (i = 0; i < 25; i++)
135 				if (syn == data_mbit[i])
136 					break;
137 
138 			if (i < 25) {
139 				status = mbestr;
140 			} else if (!sngl) {
141 				status = dbestr;
142 			} else {
143 				volatile u32 *ptr =
144 					(void *)CKSEG1ADDR(address);
145 
146 				*ptr = *ptr;		/* Rewrite. */
147 				iob();
148 
149 				status = sbestr;
150 				action = MIPS_BE_DISCARD;
151 			}
152 
153 			/* Ack now, now we've rewritten (or not). */
154 			dec_ecc_be_ack();
155 
156 			if (syn && syn == (syn & -syn)) {
157 				if (syn == 0x01) {
158 					fmt = KERN_ALERT "%s"
159 					      "%#04x -- %s bit error "
160 					      "at check bit C%s\n";
161 					xbit = "X";
162 				} else {
163 					fmt = KERN_ALERT "%s"
164 					      "%#04x -- %s bit error "
165 					      "at check bit C%s%u\n";
166 				}
167 				i = syn >> 2;
168 			} else {
169 				for (i = 0; i < 32; i++)
170 					if (syn == data_sbit[i])
171 						break;
172 				if (i < 32)
173 					fmt = KERN_ALERT "%s"
174 					      "%#04x -- %s bit error "
175 					      "at data bit D%s%u\n";
176 				else
177 					fmt = KERN_ALERT "%s"
178 					      "%#04x -- %s bit error\n";
179 			}
180 		}
181 	}
182 
183 	if (action != MIPS_BE_FIXUP)
184 		printk(KERN_ALERT "Bus error %s: %s %s %s at %#010lx\n",
185 			kind, agent, cycle, event, address);
186 
187 	if (action != MIPS_BE_FIXUP && erraddr & KN0X_EAR_ECCERR)
188 		printk(fmt, "  ECC syndrome ", syn, status, xbit, i);
189 
190 	return action;
191 }
192 
dec_ecc_be_handler(struct pt_regs * regs,int is_fixup)193 int dec_ecc_be_handler(struct pt_regs *regs, int is_fixup)
194 {
195 	return dec_ecc_be_backend(regs, is_fixup, 0);
196 }
197 
dec_ecc_be_interrupt(int irq,void * dev_id)198 irqreturn_t dec_ecc_be_interrupt(int irq, void *dev_id)
199 {
200 	struct pt_regs *regs = get_irq_regs();
201 
202 	int action = dec_ecc_be_backend(regs, 0, 1);
203 
204 	if (action == MIPS_BE_DISCARD)
205 		return IRQ_HANDLED;
206 
207 	/*
208 	 * FIXME: Find the affected processes and kill them, otherwise
209 	 * we must die.
210 	 *
211 	 * The interrupt is asynchronously delivered thus EPC and RA
212 	 * may be irrelevant, but are printed for a reference.
213 	 */
214 	printk(KERN_ALERT "Fatal bus interrupt, epc == %08lx, ra == %08lx\n",
215 	       regs->cp0_epc, regs->regs[31]);
216 	die("Unrecoverable bus error", regs);
217 }
218 
219 
220 /*
221  * Initialization differs a bit between KN02 and KN03/KN05, so we
222  * need two variants.  Once set up, all systems can be handled the
223  * same way.
224  */
dec_kn02_be_init(void)225 static inline void dec_kn02_be_init(void)
226 {
227 	volatile u32 *csr = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_CSR);
228 
229 	kn0x_erraddr = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_ERRADDR);
230 	kn0x_chksyn = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_CHKSYN);
231 
232 	/* Preset write-only bits of the Control Register cache. */
233 	cached_kn02_csr = *csr | KN02_CSR_LEDS;
234 
235 	/* Set normal ECC detection and generation. */
236 	cached_kn02_csr &= ~(KN02_CSR_DIAGCHK | KN02_CSR_DIAGGEN);
237 	/* Enable ECC correction. */
238 	cached_kn02_csr |= KN02_CSR_CORRECT;
239 	*csr = cached_kn02_csr;
240 	iob();
241 }
242 
dec_kn03_be_init(void)243 static inline void dec_kn03_be_init(void)
244 {
245 	volatile u32 *mcr = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_MCR);
246 	volatile u32 *mbcs = (void *)CKSEG1ADDR(KN4K_SLOT_BASE + KN4K_MB_CSR);
247 
248 	kn0x_erraddr = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_ERRADDR);
249 	kn0x_chksyn = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_CHKSYN);
250 
251 	/*
252 	 * Set normal ECC detection and generation, enable ECC correction.
253 	 * For KN05 we also need to make sure EE (?) is enabled in the MB.
254 	 * Otherwise DBE/IBE exceptions would be masked but bus error
255 	 * interrupts would still arrive, resulting in an inevitable crash
256 	 * if get_dbe() triggers one.
257 	 */
258 	*mcr = (*mcr & ~(KN03_MCR_DIAGCHK | KN03_MCR_DIAGGEN)) |
259 	       KN03_MCR_CORRECT;
260 	if (current_cpu_type() == CPU_R4400SC)
261 		*mbcs |= KN4K_MB_CSR_EE;
262 	fast_iob();
263 }
264 
dec_ecc_be_init(void)265 void __init dec_ecc_be_init(void)
266 {
267 	if (mips_machtype == MACH_DS5000_200)
268 		dec_kn02_be_init();
269 	else
270 		dec_kn03_be_init();
271 
272 	/* Clear any leftover errors from the firmware. */
273 	dec_ecc_be_ack();
274 }
275