xref: /openbmc/linux/drivers/edac/qcom_edac.c (revision 97e6f135)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2018, The Linux Foundation. All rights reserved.
4  */
5 
6 #include <linux/edac.h>
7 #include <linux/interrupt.h>
8 #include <linux/kernel.h>
9 #include <linux/of.h>
10 #include <linux/platform_device.h>
11 #include <linux/regmap.h>
12 #include <linux/soc/qcom/llcc-qcom.h>
13 
14 #include "edac_mc.h"
15 #include "edac_device.h"
16 
17 #define EDAC_LLCC                       "qcom_llcc"
18 
19 #define LLCC_ERP_PANIC_ON_UE            1
20 
21 #define TRP_SYN_REG_CNT                 6
22 #define DRP_SYN_REG_CNT                 8
23 
24 #define LLCC_LB_CNT_MASK                GENMASK(31, 28)
25 #define LLCC_LB_CNT_SHIFT               28
26 
27 /* Mask and shift macros */
28 #define ECC_DB_ERR_COUNT_MASK           GENMASK(4, 0)
29 #define ECC_DB_ERR_WAYS_MASK            GENMASK(31, 16)
30 #define ECC_DB_ERR_WAYS_SHIFT           BIT(4)
31 
32 #define ECC_SB_ERR_COUNT_MASK           GENMASK(23, 16)
33 #define ECC_SB_ERR_COUNT_SHIFT          BIT(4)
34 #define ECC_SB_ERR_WAYS_MASK            GENMASK(15, 0)
35 
36 #define SB_ECC_ERROR                    BIT(0)
37 #define DB_ECC_ERROR                    BIT(1)
38 
39 #define DRP_TRP_INT_CLEAR               GENMASK(1, 0)
40 #define DRP_TRP_CNT_CLEAR               GENMASK(1, 0)
41 
42 #define SB_ERROR_THRESHOLD              0x1
43 #define SB_ERROR_THRESHOLD_SHIFT        24
44 #define SB_DB_TRP_INTERRUPT_ENABLE      0x3
45 #define TRP0_INTERRUPT_ENABLE           0x1
46 #define DRP0_INTERRUPT_ENABLE           BIT(6)
47 #define SB_DB_DRP_INTERRUPT_ENABLE      0x3
48 
49 #define ECC_POLL_MSEC			5000
50 
51 enum {
52 	LLCC_DRAM_CE = 0,
53 	LLCC_DRAM_UE,
54 	LLCC_TRAM_CE,
55 	LLCC_TRAM_UE,
56 };
57 
58 static const struct llcc_edac_reg_data edac_reg_data[] = {
59 	[LLCC_DRAM_CE] = {
60 		.name = "DRAM Single-bit",
61 		.reg_cnt = DRP_SYN_REG_CNT,
62 		.count_mask = ECC_SB_ERR_COUNT_MASK,
63 		.ways_mask = ECC_SB_ERR_WAYS_MASK,
64 		.count_shift = ECC_SB_ERR_COUNT_SHIFT,
65 	},
66 	[LLCC_DRAM_UE] = {
67 		.name = "DRAM Double-bit",
68 		.reg_cnt = DRP_SYN_REG_CNT,
69 		.count_mask = ECC_DB_ERR_COUNT_MASK,
70 		.ways_mask = ECC_DB_ERR_WAYS_MASK,
71 		.ways_shift = ECC_DB_ERR_WAYS_SHIFT,
72 	},
73 	[LLCC_TRAM_CE] = {
74 		.name = "TRAM Single-bit",
75 		.reg_cnt = TRP_SYN_REG_CNT,
76 		.count_mask = ECC_SB_ERR_COUNT_MASK,
77 		.ways_mask = ECC_SB_ERR_WAYS_MASK,
78 		.count_shift = ECC_SB_ERR_COUNT_SHIFT,
79 	},
80 	[LLCC_TRAM_UE] = {
81 		.name = "TRAM Double-bit",
82 		.reg_cnt = TRP_SYN_REG_CNT,
83 		.count_mask = ECC_DB_ERR_COUNT_MASK,
84 		.ways_mask = ECC_DB_ERR_WAYS_MASK,
85 		.ways_shift = ECC_DB_ERR_WAYS_SHIFT,
86 	},
87 };
88 
89 static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_bcast_regmap)
90 {
91 	u32 sb_err_threshold;
92 	int ret;
93 
94 	/*
95 	 * Configure interrupt enable registers such that Tag, Data RAM related
96 	 * interrupts are propagated to interrupt controller for servicing
97 	 */
98 	ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable,
99 				 TRP0_INTERRUPT_ENABLE,
100 				 TRP0_INTERRUPT_ENABLE);
101 	if (ret)
102 		return ret;
103 
104 	ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->trp_interrupt_0_enable,
105 				 SB_DB_TRP_INTERRUPT_ENABLE,
106 				 SB_DB_TRP_INTERRUPT_ENABLE);
107 	if (ret)
108 		return ret;
109 
110 	sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT);
111 	ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_ecc_error_cfg,
112 			   sb_err_threshold);
113 	if (ret)
114 		return ret;
115 
116 	ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable,
117 				 DRP0_INTERRUPT_ENABLE,
118 				 DRP0_INTERRUPT_ENABLE);
119 	if (ret)
120 		return ret;
121 
122 	ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_interrupt_enable,
123 			   SB_DB_DRP_INTERRUPT_ENABLE);
124 	return ret;
125 }
126 
127 /* Clear the error interrupt and counter registers */
128 static int
129 qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
130 {
131 	int ret;
132 
133 	switch (err_type) {
134 	case LLCC_DRAM_CE:
135 	case LLCC_DRAM_UE:
136 		ret = regmap_write(drv->bcast_regmap,
137 				   drv->edac_reg_offset->drp_interrupt_clear,
138 				   DRP_TRP_INT_CLEAR);
139 		if (ret)
140 			return ret;
141 
142 		ret = regmap_write(drv->bcast_regmap,
143 				   drv->edac_reg_offset->drp_ecc_error_cntr_clear,
144 				   DRP_TRP_CNT_CLEAR);
145 		if (ret)
146 			return ret;
147 		break;
148 	case LLCC_TRAM_CE:
149 	case LLCC_TRAM_UE:
150 		ret = regmap_write(drv->bcast_regmap,
151 				   drv->edac_reg_offset->trp_interrupt_0_clear,
152 				   DRP_TRP_INT_CLEAR);
153 		if (ret)
154 			return ret;
155 
156 		ret = regmap_write(drv->bcast_regmap,
157 				   drv->edac_reg_offset->trp_ecc_error_cntr_clear,
158 				   DRP_TRP_CNT_CLEAR);
159 		if (ret)
160 			return ret;
161 		break;
162 	default:
163 		ret = -EINVAL;
164 		edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
165 			    err_type);
166 	}
167 	return ret;
168 }
169 
170 struct qcom_llcc_syn_regs {
171 	u32 synd_reg;
172 	u32 count_status_reg;
173 	u32 ways_status_reg;
174 };
175 
176 static void get_reg_offsets(struct llcc_drv_data *drv, int err_type,
177 			    struct qcom_llcc_syn_regs *syn_regs)
178 {
179 	const struct llcc_edac_reg_offset *edac_reg_offset = drv->edac_reg_offset;
180 
181 	switch (err_type) {
182 	case LLCC_DRAM_CE:
183 		syn_regs->synd_reg = edac_reg_offset->drp_ecc_sb_err_syn0;
184 		syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1;
185 		syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0;
186 		break;
187 	case LLCC_DRAM_UE:
188 		syn_regs->synd_reg = edac_reg_offset->drp_ecc_db_err_syn0;
189 		syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1;
190 		syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0;
191 		break;
192 	case LLCC_TRAM_CE:
193 		syn_regs->synd_reg = edac_reg_offset->trp_ecc_sb_err_syn0;
194 		syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1;
195 		syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0;
196 		break;
197 	case LLCC_TRAM_UE:
198 		syn_regs->synd_reg = edac_reg_offset->trp_ecc_db_err_syn0;
199 		syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1;
200 		syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0;
201 		break;
202 	}
203 }
204 
205 /* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/
206 static int
207 dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
208 {
209 	struct llcc_edac_reg_data reg_data = edac_reg_data[err_type];
210 	struct qcom_llcc_syn_regs regs = { };
211 	int err_cnt, err_ways, ret, i;
212 	u32 synd_reg, synd_val;
213 
214 	get_reg_offsets(drv, err_type, &regs);
215 
216 	for (i = 0; i < reg_data.reg_cnt; i++) {
217 		synd_reg = regs.synd_reg + (i * 4);
218 		ret = regmap_read(drv->regmaps[bank], synd_reg,
219 				  &synd_val);
220 		if (ret)
221 			goto clear;
222 
223 		edac_printk(KERN_CRIT, EDAC_LLCC, "%s: ECC_SYN%d: 0x%8x\n",
224 			    reg_data.name, i, synd_val);
225 	}
226 
227 	ret = regmap_read(drv->regmaps[bank], regs.count_status_reg,
228 			  &err_cnt);
229 	if (ret)
230 		goto clear;
231 
232 	err_cnt &= reg_data.count_mask;
233 	err_cnt >>= reg_data.count_shift;
234 	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n",
235 		    reg_data.name, err_cnt);
236 
237 	ret = regmap_read(drv->regmaps[bank], regs.ways_status_reg,
238 			  &err_ways);
239 	if (ret)
240 		goto clear;
241 
242 	err_ways &= reg_data.ways_mask;
243 	err_ways >>= reg_data.ways_shift;
244 
245 	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error ways: 0x%4x\n",
246 		    reg_data.name, err_ways);
247 
248 clear:
249 	return qcom_llcc_clear_error_status(err_type, drv);
250 }
251 
252 static int
253 dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank)
254 {
255 	struct llcc_drv_data *drv = edev_ctl->dev->platform_data;
256 	int ret;
257 
258 	ret = dump_syn_reg_values(drv, bank, err_type);
259 	if (ret)
260 		return ret;
261 
262 	switch (err_type) {
263 	case LLCC_DRAM_CE:
264 		edac_device_handle_ce(edev_ctl, 0, bank,
265 				      "LLCC Data RAM correctable Error");
266 		break;
267 	case LLCC_DRAM_UE:
268 		edac_device_handle_ue(edev_ctl, 0, bank,
269 				      "LLCC Data RAM uncorrectable Error");
270 		break;
271 	case LLCC_TRAM_CE:
272 		edac_device_handle_ce(edev_ctl, 0, bank,
273 				      "LLCC Tag RAM correctable Error");
274 		break;
275 	case LLCC_TRAM_UE:
276 		edac_device_handle_ue(edev_ctl, 0, bank,
277 				      "LLCC Tag RAM uncorrectable Error");
278 		break;
279 	default:
280 		ret = -EINVAL;
281 		edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
282 			    err_type);
283 	}
284 
285 	return ret;
286 }
287 
288 static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl)
289 {
290 	struct edac_device_ctl_info *edac_dev_ctl = edev_ctl;
291 	struct llcc_drv_data *drv = edac_dev_ctl->dev->platform_data;
292 	irqreturn_t irq_rc = IRQ_NONE;
293 	u32 drp_error, trp_error, i;
294 	int ret;
295 
296 	/* Iterate over the banks and look for Tag RAM or Data RAM errors */
297 	for (i = 0; i < drv->num_banks; i++) {
298 		ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->drp_interrupt_status,
299 				  &drp_error);
300 
301 		if (!ret && (drp_error & SB_ECC_ERROR)) {
302 			edac_printk(KERN_CRIT, EDAC_LLCC,
303 				    "Single Bit Error detected in Data RAM\n");
304 			ret = dump_syn_reg(edev_ctl, LLCC_DRAM_CE, i);
305 		} else if (!ret && (drp_error & DB_ECC_ERROR)) {
306 			edac_printk(KERN_CRIT, EDAC_LLCC,
307 				    "Double Bit Error detected in Data RAM\n");
308 			ret = dump_syn_reg(edev_ctl, LLCC_DRAM_UE, i);
309 		}
310 		if (!ret)
311 			irq_rc = IRQ_HANDLED;
312 
313 		ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->trp_interrupt_0_status,
314 				  &trp_error);
315 
316 		if (!ret && (trp_error & SB_ECC_ERROR)) {
317 			edac_printk(KERN_CRIT, EDAC_LLCC,
318 				    "Single Bit Error detected in Tag RAM\n");
319 			ret = dump_syn_reg(edev_ctl, LLCC_TRAM_CE, i);
320 		} else if (!ret && (trp_error & DB_ECC_ERROR)) {
321 			edac_printk(KERN_CRIT, EDAC_LLCC,
322 				    "Double Bit Error detected in Tag RAM\n");
323 			ret = dump_syn_reg(edev_ctl, LLCC_TRAM_UE, i);
324 		}
325 		if (!ret)
326 			irq_rc = IRQ_HANDLED;
327 	}
328 
329 	return irq_rc;
330 }
331 
332 static void llcc_ecc_check(struct edac_device_ctl_info *edev_ctl)
333 {
334 	llcc_ecc_irq_handler(0, edev_ctl);
335 }
336 
337 static int qcom_llcc_edac_probe(struct platform_device *pdev)
338 {
339 	struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
340 	struct edac_device_ctl_info *edev_ctl;
341 	struct device *dev = &pdev->dev;
342 	int ecc_irq;
343 	int rc;
344 
345 	rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap);
346 	if (rc)
347 		return rc;
348 
349 	/* Allocate edac control info */
350 	edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank",
351 					      llcc_driv_data->num_banks, 1,
352 					      NULL, 0,
353 					      edac_device_alloc_index());
354 
355 	if (!edev_ctl)
356 		return -ENOMEM;
357 
358 	edev_ctl->dev = dev;
359 	edev_ctl->mod_name = dev_name(dev);
360 	edev_ctl->dev_name = dev_name(dev);
361 	edev_ctl->ctl_name = "llcc";
362 	edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
363 
364 	/* Check if LLCC driver has passed ECC IRQ */
365 	ecc_irq = llcc_driv_data->ecc_irq;
366 	if (ecc_irq > 0) {
367 		/* Use interrupt mode if IRQ is available */
368 		rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
369 			      IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
370 		if (!rc) {
371 			edac_op_state = EDAC_OPSTATE_INT;
372 			goto irq_done;
373 		}
374 	}
375 
376 	/* Fall back to polling mode otherwise */
377 	edev_ctl->poll_msec = ECC_POLL_MSEC;
378 	edev_ctl->edac_check = llcc_ecc_check;
379 	edac_op_state = EDAC_OPSTATE_POLL;
380 
381 irq_done:
382 	rc = edac_device_add_device(edev_ctl);
383 	if (rc) {
384 		edac_device_free_ctl_info(edev_ctl);
385 		return rc;
386 	}
387 
388 	platform_set_drvdata(pdev, edev_ctl);
389 
390 	return rc;
391 }
392 
393 static int qcom_llcc_edac_remove(struct platform_device *pdev)
394 {
395 	struct edac_device_ctl_info *edev_ctl = dev_get_drvdata(&pdev->dev);
396 
397 	edac_device_del_device(edev_ctl->dev);
398 	edac_device_free_ctl_info(edev_ctl);
399 
400 	return 0;
401 }
402 
403 static const struct platform_device_id qcom_llcc_edac_id_table[] = {
404 	{ .name = "qcom_llcc_edac" },
405 	{}
406 };
407 MODULE_DEVICE_TABLE(platform, qcom_llcc_edac_id_table);
408 
409 static struct platform_driver qcom_llcc_edac_driver = {
410 	.probe = qcom_llcc_edac_probe,
411 	.remove = qcom_llcc_edac_remove,
412 	.driver = {
413 		.name = "qcom_llcc_edac",
414 	},
415 	.id_table = qcom_llcc_edac_id_table,
416 };
417 module_platform_driver(qcom_llcc_edac_driver);
418 
419 MODULE_DESCRIPTION("QCOM EDAC driver");
420 MODULE_LICENSE("GPL v2");
421