xref: /openbmc/linux/arch/powerpc/kernel/mce_power.c (revision f3539c12)
1 /*
2  * Machine check exception handling CPU-side for power7 and power8
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright 2013 IBM Corporation
19  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20  */
21 
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce_power: " fmt
24 
25 #include <linux/types.h>
26 #include <linux/ptrace.h>
27 #include <asm/mmu.h>
28 #include <asm/mce.h>
29 #include <asm/machdep.h>
30 
31 static void flush_tlb_206(unsigned int num_sets, unsigned int action)
32 {
33 	unsigned long rb;
34 	unsigned int i;
35 
36 	switch (action) {
37 	case TLB_INVAL_SCOPE_GLOBAL:
38 		rb = TLBIEL_INVAL_SET;
39 		break;
40 	case TLB_INVAL_SCOPE_LPID:
41 		rb = TLBIEL_INVAL_SET_LPID;
42 		break;
43 	default:
44 		BUG();
45 		break;
46 	}
47 
48 	asm volatile("ptesync" : : : "memory");
49 	for (i = 0; i < num_sets; i++) {
50 		asm volatile("tlbiel %0" : : "r" (rb));
51 		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
52 	}
53 	asm volatile("ptesync" : : : "memory");
54 }
55 
56 /*
57  * Generic routines to flush TLB on POWER processors. These routines
58  * are used as flush_tlb hook in the cpu_spec.
59  *
60  * action => TLB_INVAL_SCOPE_GLOBAL:  Invalidate all TLBs.
61  *	     TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
62  */
63 void __flush_tlb_power7(unsigned int action)
64 {
65 	flush_tlb_206(POWER7_TLB_SETS, action);
66 }
67 
68 void __flush_tlb_power8(unsigned int action)
69 {
70 	flush_tlb_206(POWER8_TLB_SETS, action);
71 }
72 
73 void __flush_tlb_power9(unsigned int action)
74 {
75 	if (radix_enabled())
76 		flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
77 
78 	flush_tlb_206(POWER9_TLB_SETS_HASH, action);
79 }
80 
81 
82 /* flush SLBs and reload */
83 #ifdef CONFIG_PPC_STD_MMU_64
84 static void flush_and_reload_slb(void)
85 {
86 	struct slb_shadow *slb;
87 	unsigned long i, n;
88 
89 	/* Invalidate all SLBs */
90 	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
91 
92 #ifdef CONFIG_KVM_BOOK3S_HANDLER
93 	/*
94 	 * If machine check is hit when in guest or in transition, we will
95 	 * only flush the SLBs and continue.
96 	 */
97 	if (get_paca()->kvm_hstate.in_guest)
98 		return;
99 #endif
100 
101 	/* For host kernel, reload the SLBs from shadow SLB buffer. */
102 	slb = get_slb_shadow();
103 	if (!slb)
104 		return;
105 
106 	n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
107 
108 	/* Load up the SLB entries from shadow SLB */
109 	for (i = 0; i < n; i++) {
110 		unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
111 		unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
112 
113 		rb = (rb & ~0xFFFul) | i;
114 		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
115 	}
116 }
117 #endif
118 
119 static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
120 {
121 	long handled = 1;
122 
123 	/*
124 	 * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
125 	 * reset the error bits whenever we handle them so that at the end
126 	 * we can check whether we handled all of them or not.
127 	 * */
128 #ifdef CONFIG_PPC_STD_MMU_64
129 	if (dsisr & slb_error_bits) {
130 		flush_and_reload_slb();
131 		/* reset error bits */
132 		dsisr &= ~(slb_error_bits);
133 	}
134 	if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
135 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
136 			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
137 		/* reset error bits */
138 		dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
139 	}
140 #endif
141 	/* Any other errors we don't understand? */
142 	if (dsisr & 0xffffffffUL)
143 		handled = 0;
144 
145 	return handled;
146 }
147 
148 static long mce_handle_derror_p7(uint64_t dsisr)
149 {
150 	return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
151 }
152 
153 static long mce_handle_common_ierror(uint64_t srr1)
154 {
155 	long handled = 0;
156 
157 	switch (P7_SRR1_MC_IFETCH(srr1)) {
158 	case 0:
159 		break;
160 #ifdef CONFIG_PPC_STD_MMU_64
161 	case P7_SRR1_MC_IFETCH_SLB_PARITY:
162 	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
163 		/* flush and reload SLBs for SLB errors. */
164 		flush_and_reload_slb();
165 		handled = 1;
166 		break;
167 	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
168 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
169 			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
170 			handled = 1;
171 		}
172 		break;
173 #endif
174 	default:
175 		break;
176 	}
177 
178 	return handled;
179 }
180 
181 static long mce_handle_ierror_p7(uint64_t srr1)
182 {
183 	long handled = 0;
184 
185 	handled = mce_handle_common_ierror(srr1);
186 
187 #ifdef CONFIG_PPC_STD_MMU_64
188 	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
189 		flush_and_reload_slb();
190 		handled = 1;
191 	}
192 #endif
193 	return handled;
194 }
195 
196 static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
197 {
198 	switch (P7_SRR1_MC_IFETCH(srr1)) {
199 	case P7_SRR1_MC_IFETCH_SLB_PARITY:
200 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
201 		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
202 		break;
203 	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
204 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
205 		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
206 		break;
207 	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
208 		mce_err->error_type = MCE_ERROR_TYPE_TLB;
209 		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
210 		break;
211 	case P7_SRR1_MC_IFETCH_UE:
212 	case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
213 		mce_err->error_type = MCE_ERROR_TYPE_UE;
214 		mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
215 		break;
216 	case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
217 		mce_err->error_type = MCE_ERROR_TYPE_UE;
218 		mce_err->u.ue_error_type =
219 				MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
220 		break;
221 	}
222 }
223 
224 static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
225 {
226 	mce_get_common_ierror(mce_err, srr1);
227 	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
228 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
229 		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
230 	}
231 }
232 
233 static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
234 {
235 	if (dsisr & P7_DSISR_MC_UE) {
236 		mce_err->error_type = MCE_ERROR_TYPE_UE;
237 		mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
238 	} else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
239 		mce_err->error_type = MCE_ERROR_TYPE_UE;
240 		mce_err->u.ue_error_type =
241 				MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
242 	} else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
243 		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
244 		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
245 	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
246 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
247 		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
248 	} else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
249 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
250 		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
251 	} else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
252 		mce_err->error_type = MCE_ERROR_TYPE_TLB;
253 		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
254 	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
255 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
256 		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
257 	}
258 }
259 
260 static long mce_handle_ue_error(struct pt_regs *regs)
261 {
262 	long handled = 0;
263 
264 	/*
265 	 * On specific SCOM read via MMIO we may get a machine check
266 	 * exception with SRR0 pointing inside opal. If that is the
267 	 * case OPAL may have recovery address to re-read SCOM data in
268 	 * different way and hence we can recover from this MC.
269 	 */
270 
271 	if (ppc_md.mce_check_early_recovery) {
272 		if (ppc_md.mce_check_early_recovery(regs))
273 			handled = 1;
274 	}
275 	return handled;
276 }
277 
278 long __machine_check_early_realmode_p7(struct pt_regs *regs)
279 {
280 	uint64_t srr1, nip, addr;
281 	long handled = 1;
282 	struct mce_error_info mce_error_info = { 0 };
283 
284 	srr1 = regs->msr;
285 	nip = regs->nip;
286 
287 	/*
288 	 * Handle memory errors depending whether this was a load/store or
289 	 * ifetch exception. Also, populate the mce error_type and
290 	 * type-specific error_type from either SRR1 or DSISR, depending
291 	 * whether this was a load/store or ifetch exception
292 	 */
293 	if (P7_SRR1_MC_LOADSTORE(srr1)) {
294 		handled = mce_handle_derror_p7(regs->dsisr);
295 		mce_get_derror_p7(&mce_error_info, regs->dsisr);
296 		addr = regs->dar;
297 	} else {
298 		handled = mce_handle_ierror_p7(srr1);
299 		mce_get_ierror_p7(&mce_error_info, srr1);
300 		addr = regs->nip;
301 	}
302 
303 	/* Handle UE error. */
304 	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
305 		handled = mce_handle_ue_error(regs);
306 
307 	save_mce_event(regs, handled, &mce_error_info, nip, addr);
308 	return handled;
309 }
310 
311 static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
312 {
313 	mce_get_common_ierror(mce_err, srr1);
314 	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
315 		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
316 		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
317 	}
318 }
319 
320 static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
321 {
322 	mce_get_derror_p7(mce_err, dsisr);
323 	if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
324 		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
325 		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
326 	}
327 }
328 
329 static long mce_handle_ierror_p8(uint64_t srr1)
330 {
331 	long handled = 0;
332 
333 	handled = mce_handle_common_ierror(srr1);
334 
335 #ifdef CONFIG_PPC_STD_MMU_64
336 	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
337 		flush_and_reload_slb();
338 		handled = 1;
339 	}
340 #endif
341 	return handled;
342 }
343 
344 static long mce_handle_derror_p8(uint64_t dsisr)
345 {
346 	return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
347 }
348 
349 long __machine_check_early_realmode_p8(struct pt_regs *regs)
350 {
351 	uint64_t srr1, nip, addr;
352 	long handled = 1;
353 	struct mce_error_info mce_error_info = { 0 };
354 
355 	srr1 = regs->msr;
356 	nip = regs->nip;
357 
358 	if (P7_SRR1_MC_LOADSTORE(srr1)) {
359 		handled = mce_handle_derror_p8(regs->dsisr);
360 		mce_get_derror_p8(&mce_error_info, regs->dsisr);
361 		addr = regs->dar;
362 	} else {
363 		handled = mce_handle_ierror_p8(srr1);
364 		mce_get_ierror_p8(&mce_error_info, srr1);
365 		addr = regs->nip;
366 	}
367 
368 	/* Handle UE error. */
369 	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
370 		handled = mce_handle_ue_error(regs);
371 
372 	save_mce_event(regs, handled, &mce_error_info, nip, addr);
373 	return handled;
374 }
375