xref: /openbmc/linux/arch/powerpc/kernel/mce_power.c (revision 7051924f771722c6dd235e693742cda6488ac700)
1 /*
2  * Machine check exception handling CPU-side for power7 and power8
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright 2013 IBM Corporation
19  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20  */
21 
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce_power: " fmt
24 
25 #include <linux/types.h>
26 #include <linux/ptrace.h>
27 #include <asm/mmu.h>
28 #include <asm/mce.h>
29 #include <asm/machdep.h>
30 
31 /* flush SLBs and reload */
32 static void flush_and_reload_slb(void)
33 {
34 	struct slb_shadow *slb;
35 	unsigned long i, n;
36 
37 	/* Invalidate all SLBs */
38 	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
39 
40 #ifdef CONFIG_KVM_BOOK3S_HANDLER
41 	/*
42 	 * If machine check is hit when in guest or in transition, we will
43 	 * only flush the SLBs and continue.
44 	 */
45 	if (get_paca()->kvm_hstate.in_guest)
46 		return;
47 #endif
48 
49 	/* For host kernel, reload the SLBs from shadow SLB buffer. */
50 	slb = get_slb_shadow();
51 	if (!slb)
52 		return;
53 
54 	n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
55 
56 	/* Load up the SLB entries from shadow SLB */
57 	for (i = 0; i < n; i++) {
58 		unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
59 		unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
60 
61 		rb = (rb & ~0xFFFul) | i;
62 		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
63 	}
64 }
65 
66 static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
67 {
68 	long handled = 1;
69 
70 	/*
71 	 * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
72 	 * reset the error bits whenever we handle them so that at the end
73 	 * we can check whether we handled all of them or not.
74 	 * */
75 	if (dsisr & slb_error_bits) {
76 		flush_and_reload_slb();
77 		/* reset error bits */
78 		dsisr &= ~(slb_error_bits);
79 	}
80 	if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
81 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
82 			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
83 		/* reset error bits */
84 		dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
85 	}
86 	/* Any other errors we don't understand? */
87 	if (dsisr & 0xffffffffUL)
88 		handled = 0;
89 
90 	return handled;
91 }
92 
93 static long mce_handle_derror_p7(uint64_t dsisr)
94 {
95 	return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
96 }
97 
98 static long mce_handle_common_ierror(uint64_t srr1)
99 {
100 	long handled = 0;
101 
102 	switch (P7_SRR1_MC_IFETCH(srr1)) {
103 	case 0:
104 		break;
105 	case P7_SRR1_MC_IFETCH_SLB_PARITY:
106 	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
107 		/* flush and reload SLBs for SLB errors. */
108 		flush_and_reload_slb();
109 		handled = 1;
110 		break;
111 	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
112 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
113 			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
114 			handled = 1;
115 		}
116 		break;
117 	default:
118 		break;
119 	}
120 
121 	return handled;
122 }
123 
124 static long mce_handle_ierror_p7(uint64_t srr1)
125 {
126 	long handled = 0;
127 
128 	handled = mce_handle_common_ierror(srr1);
129 
130 	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
131 		flush_and_reload_slb();
132 		handled = 1;
133 	}
134 	return handled;
135 }
136 
137 static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
138 {
139 	switch (P7_SRR1_MC_IFETCH(srr1)) {
140 	case P7_SRR1_MC_IFETCH_SLB_PARITY:
141 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
142 		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
143 		break;
144 	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
145 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
146 		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
147 		break;
148 	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
149 		mce_err->error_type = MCE_ERROR_TYPE_TLB;
150 		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
151 		break;
152 	case P7_SRR1_MC_IFETCH_UE:
153 	case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
154 		mce_err->error_type = MCE_ERROR_TYPE_UE;
155 		mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
156 		break;
157 	case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
158 		mce_err->error_type = MCE_ERROR_TYPE_UE;
159 		mce_err->u.ue_error_type =
160 				MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
161 		break;
162 	}
163 }
164 
165 static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
166 {
167 	mce_get_common_ierror(mce_err, srr1);
168 	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
169 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
170 		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
171 	}
172 }
173 
174 static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
175 {
176 	if (dsisr & P7_DSISR_MC_UE) {
177 		mce_err->error_type = MCE_ERROR_TYPE_UE;
178 		mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
179 	} else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
180 		mce_err->error_type = MCE_ERROR_TYPE_UE;
181 		mce_err->u.ue_error_type =
182 				MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
183 	} else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
184 		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
185 		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
186 	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
187 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
188 		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
189 	} else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
190 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
191 		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
192 	} else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
193 		mce_err->error_type = MCE_ERROR_TYPE_TLB;
194 		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
195 	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
196 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
197 		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
198 	}
199 }
200 
201 static long mce_handle_ue_error(struct pt_regs *regs)
202 {
203 	long handled = 0;
204 
205 	/*
206 	 * On specific SCOM read via MMIO we may get a machine check
207 	 * exception with SRR0 pointing inside opal. If that is the
208 	 * case OPAL may have recovery address to re-read SCOM data in
209 	 * different way and hence we can recover from this MC.
210 	 */
211 
212 	if (ppc_md.mce_check_early_recovery) {
213 		if (ppc_md.mce_check_early_recovery(regs))
214 			handled = 1;
215 	}
216 	return handled;
217 }
218 
219 long __machine_check_early_realmode_p7(struct pt_regs *regs)
220 {
221 	uint64_t srr1, nip, addr;
222 	long handled = 1;
223 	struct mce_error_info mce_error_info = { 0 };
224 
225 	srr1 = regs->msr;
226 	nip = regs->nip;
227 
228 	/*
229 	 * Handle memory errors depending whether this was a load/store or
230 	 * ifetch exception. Also, populate the mce error_type and
231 	 * type-specific error_type from either SRR1 or DSISR, depending
232 	 * whether this was a load/store or ifetch exception
233 	 */
234 	if (P7_SRR1_MC_LOADSTORE(srr1)) {
235 		handled = mce_handle_derror_p7(regs->dsisr);
236 		mce_get_derror_p7(&mce_error_info, regs->dsisr);
237 		addr = regs->dar;
238 	} else {
239 		handled = mce_handle_ierror_p7(srr1);
240 		mce_get_ierror_p7(&mce_error_info, srr1);
241 		addr = regs->nip;
242 	}
243 
244 	/* Handle UE error. */
245 	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
246 		handled = mce_handle_ue_error(regs);
247 
248 	save_mce_event(regs, handled, &mce_error_info, nip, addr);
249 	return handled;
250 }
251 
252 static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
253 {
254 	mce_get_common_ierror(mce_err, srr1);
255 	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
256 		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
257 		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
258 	}
259 }
260 
261 static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
262 {
263 	mce_get_derror_p7(mce_err, dsisr);
264 	if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
265 		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
266 		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
267 	}
268 }
269 
270 static long mce_handle_ierror_p8(uint64_t srr1)
271 {
272 	long handled = 0;
273 
274 	handled = mce_handle_common_ierror(srr1);
275 
276 	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
277 		flush_and_reload_slb();
278 		handled = 1;
279 	}
280 	return handled;
281 }
282 
283 static long mce_handle_derror_p8(uint64_t dsisr)
284 {
285 	return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
286 }
287 
288 long __machine_check_early_realmode_p8(struct pt_regs *regs)
289 {
290 	uint64_t srr1, nip, addr;
291 	long handled = 1;
292 	struct mce_error_info mce_error_info = { 0 };
293 
294 	srr1 = regs->msr;
295 	nip = regs->nip;
296 
297 	if (P7_SRR1_MC_LOADSTORE(srr1)) {
298 		handled = mce_handle_derror_p8(regs->dsisr);
299 		mce_get_derror_p8(&mce_error_info, regs->dsisr);
300 		addr = regs->dar;
301 	} else {
302 		handled = mce_handle_ierror_p8(srr1);
303 		mce_get_ierror_p8(&mce_error_info, srr1);
304 		addr = regs->nip;
305 	}
306 
307 	/* Handle UE error. */
308 	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
309 		handled = mce_handle_ue_error(regs);
310 
311 	save_mce_event(regs, handled, &mce_error_info, nip, addr);
312 	return handled;
313 }
314