xref: /openbmc/linux/drivers/edac/qcom_edac.c (revision 7f8256ae)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2018, The Linux Foundation. All rights reserved.
4  */
5 
6 #include <linux/edac.h>
7 #include <linux/interrupt.h>
8 #include <linux/kernel.h>
9 #include <linux/of.h>
10 #include <linux/platform_device.h>
11 #include <linux/regmap.h>
12 #include <linux/soc/qcom/llcc-qcom.h>
13 
14 #include "edac_mc.h"
15 #include "edac_device.h"
16 
17 #define EDAC_LLCC                       "qcom_llcc"
18 
19 #define LLCC_ERP_PANIC_ON_UE            1
20 
21 #define TRP_SYN_REG_CNT                 6
22 #define DRP_SYN_REG_CNT                 8
23 
24 #define LLCC_COMMON_STATUS0             0x0003000c
25 #define LLCC_LB_CNT_MASK                GENMASK(31, 28)
26 #define LLCC_LB_CNT_SHIFT               28
27 
28 /* Single & double bit syndrome register offsets */
29 #define TRP_ECC_SB_ERR_SYN0             0x0002304c
30 #define TRP_ECC_DB_ERR_SYN0             0x00020370
31 #define DRP_ECC_SB_ERR_SYN0             0x0004204c
32 #define DRP_ECC_DB_ERR_SYN0             0x00042070
33 
34 /* Error register offsets */
35 #define TRP_ECC_ERROR_STATUS1           0x00020348
36 #define TRP_ECC_ERROR_STATUS0           0x00020344
37 #define DRP_ECC_ERROR_STATUS1           0x00042048
38 #define DRP_ECC_ERROR_STATUS0           0x00042044
39 
40 /* TRP, DRP interrupt register offsets */
41 #define DRP_INTERRUPT_STATUS            0x00041000
42 #define TRP_INTERRUPT_0_STATUS          0x00020480
43 #define DRP_INTERRUPT_CLEAR             0x00041008
44 #define DRP_ECC_ERROR_CNTR_CLEAR        0x00040004
45 #define TRP_INTERRUPT_0_CLEAR           0x00020484
46 #define TRP_ECC_ERROR_CNTR_CLEAR        0x00020440
47 
48 /* Mask and shift macros */
49 #define ECC_DB_ERR_COUNT_MASK           GENMASK(4, 0)
50 #define ECC_DB_ERR_WAYS_MASK            GENMASK(31, 16)
51 #define ECC_DB_ERR_WAYS_SHIFT           BIT(4)
52 
53 #define ECC_SB_ERR_COUNT_MASK           GENMASK(23, 16)
54 #define ECC_SB_ERR_COUNT_SHIFT          BIT(4)
55 #define ECC_SB_ERR_WAYS_MASK            GENMASK(15, 0)
56 
57 #define SB_ECC_ERROR                    BIT(0)
58 #define DB_ECC_ERROR                    BIT(1)
59 
60 #define DRP_TRP_INT_CLEAR               GENMASK(1, 0)
61 #define DRP_TRP_CNT_CLEAR               GENMASK(1, 0)
62 
63 /* Config registers offsets*/
64 #define DRP_ECC_ERROR_CFG               0x00040000
65 
66 /* Tag RAM, Data RAM interrupt register offsets */
67 #define CMN_INTERRUPT_0_ENABLE          0x0003001c
68 #define CMN_INTERRUPT_2_ENABLE          0x0003003c
69 #define TRP_INTERRUPT_0_ENABLE          0x00020488
70 #define DRP_INTERRUPT_ENABLE            0x0004100c
71 
72 #define SB_ERROR_THRESHOLD              0x1
73 #define SB_ERROR_THRESHOLD_SHIFT        24
74 #define SB_DB_TRP_INTERRUPT_ENABLE      0x3
75 #define TRP0_INTERRUPT_ENABLE           0x1
76 #define DRP0_INTERRUPT_ENABLE           BIT(6)
77 #define SB_DB_DRP_INTERRUPT_ENABLE      0x3
78 
79 #define ECC_POLL_MSEC			5000
80 
81 enum {
82 	LLCC_DRAM_CE = 0,
83 	LLCC_DRAM_UE,
84 	LLCC_TRAM_CE,
85 	LLCC_TRAM_UE,
86 };
87 
88 static const struct llcc_edac_reg_data edac_reg_data[] = {
89 	[LLCC_DRAM_CE] = {
90 		.name = "DRAM Single-bit",
91 		.synd_reg = DRP_ECC_SB_ERR_SYN0,
92 		.count_status_reg = DRP_ECC_ERROR_STATUS1,
93 		.ways_status_reg = DRP_ECC_ERROR_STATUS0,
94 		.reg_cnt = DRP_SYN_REG_CNT,
95 		.count_mask = ECC_SB_ERR_COUNT_MASK,
96 		.ways_mask = ECC_SB_ERR_WAYS_MASK,
97 		.count_shift = ECC_SB_ERR_COUNT_SHIFT,
98 	},
99 	[LLCC_DRAM_UE] = {
100 		.name = "DRAM Double-bit",
101 		.synd_reg = DRP_ECC_DB_ERR_SYN0,
102 		.count_status_reg = DRP_ECC_ERROR_STATUS1,
103 		.ways_status_reg = DRP_ECC_ERROR_STATUS0,
104 		.reg_cnt = DRP_SYN_REG_CNT,
105 		.count_mask = ECC_DB_ERR_COUNT_MASK,
106 		.ways_mask = ECC_DB_ERR_WAYS_MASK,
107 		.ways_shift = ECC_DB_ERR_WAYS_SHIFT,
108 	},
109 	[LLCC_TRAM_CE] = {
110 		.name = "TRAM Single-bit",
111 		.synd_reg = TRP_ECC_SB_ERR_SYN0,
112 		.count_status_reg = TRP_ECC_ERROR_STATUS1,
113 		.ways_status_reg = TRP_ECC_ERROR_STATUS0,
114 		.reg_cnt = TRP_SYN_REG_CNT,
115 		.count_mask = ECC_SB_ERR_COUNT_MASK,
116 		.ways_mask = ECC_SB_ERR_WAYS_MASK,
117 		.count_shift = ECC_SB_ERR_COUNT_SHIFT,
118 	},
119 	[LLCC_TRAM_UE] = {
120 		.name = "TRAM Double-bit",
121 		.synd_reg = TRP_ECC_DB_ERR_SYN0,
122 		.count_status_reg = TRP_ECC_ERROR_STATUS1,
123 		.ways_status_reg = TRP_ECC_ERROR_STATUS0,
124 		.reg_cnt = TRP_SYN_REG_CNT,
125 		.count_mask = ECC_DB_ERR_COUNT_MASK,
126 		.ways_mask = ECC_DB_ERR_WAYS_MASK,
127 		.ways_shift = ECC_DB_ERR_WAYS_SHIFT,
128 	},
129 };
130 
131 static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
132 {
133 	u32 sb_err_threshold;
134 	int ret;
135 
136 	/*
137 	 * Configure interrupt enable registers such that Tag, Data RAM related
138 	 * interrupts are propagated to interrupt controller for servicing
139 	 */
140 	ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
141 				 TRP0_INTERRUPT_ENABLE,
142 				 TRP0_INTERRUPT_ENABLE);
143 	if (ret)
144 		return ret;
145 
146 	ret = regmap_update_bits(llcc_bcast_regmap, TRP_INTERRUPT_0_ENABLE,
147 				 SB_DB_TRP_INTERRUPT_ENABLE,
148 				 SB_DB_TRP_INTERRUPT_ENABLE);
149 	if (ret)
150 		return ret;
151 
152 	sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT);
153 	ret = regmap_write(llcc_bcast_regmap, DRP_ECC_ERROR_CFG,
154 			   sb_err_threshold);
155 	if (ret)
156 		return ret;
157 
158 	ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
159 				 DRP0_INTERRUPT_ENABLE,
160 				 DRP0_INTERRUPT_ENABLE);
161 	if (ret)
162 		return ret;
163 
164 	ret = regmap_write(llcc_bcast_regmap, DRP_INTERRUPT_ENABLE,
165 			   SB_DB_DRP_INTERRUPT_ENABLE);
166 	return ret;
167 }
168 
169 /* Clear the error interrupt and counter registers */
170 static int
171 qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
172 {
173 	int ret = 0;
174 
175 	switch (err_type) {
176 	case LLCC_DRAM_CE:
177 	case LLCC_DRAM_UE:
178 		ret = regmap_write(drv->bcast_regmap, DRP_INTERRUPT_CLEAR,
179 				   DRP_TRP_INT_CLEAR);
180 		if (ret)
181 			return ret;
182 
183 		ret = regmap_write(drv->bcast_regmap, DRP_ECC_ERROR_CNTR_CLEAR,
184 				   DRP_TRP_CNT_CLEAR);
185 		if (ret)
186 			return ret;
187 		break;
188 	case LLCC_TRAM_CE:
189 	case LLCC_TRAM_UE:
190 		ret = regmap_write(drv->bcast_regmap, TRP_INTERRUPT_0_CLEAR,
191 				   DRP_TRP_INT_CLEAR);
192 		if (ret)
193 			return ret;
194 
195 		ret = regmap_write(drv->bcast_regmap, TRP_ECC_ERROR_CNTR_CLEAR,
196 				   DRP_TRP_CNT_CLEAR);
197 		if (ret)
198 			return ret;
199 		break;
200 	default:
201 		ret = -EINVAL;
202 		edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
203 			    err_type);
204 	}
205 	return ret;
206 }
207 
208 /* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/
209 static int
210 dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
211 {
212 	struct llcc_edac_reg_data reg_data = edac_reg_data[err_type];
213 	int err_cnt, err_ways, ret, i;
214 	u32 synd_reg, synd_val;
215 
216 	for (i = 0; i < reg_data.reg_cnt; i++) {
217 		synd_reg = reg_data.synd_reg + (i * 4);
218 		ret = regmap_read(drv->regmaps[bank], synd_reg,
219 				  &synd_val);
220 		if (ret)
221 			goto clear;
222 
223 		edac_printk(KERN_CRIT, EDAC_LLCC, "%s: ECC_SYN%d: 0x%8x\n",
224 			    reg_data.name, i, synd_val);
225 	}
226 
227 	ret = regmap_read(drv->regmaps[bank], reg_data.count_status_reg,
228 			  &err_cnt);
229 	if (ret)
230 		goto clear;
231 
232 	err_cnt &= reg_data.count_mask;
233 	err_cnt >>= reg_data.count_shift;
234 	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n",
235 		    reg_data.name, err_cnt);
236 
237 	ret = regmap_read(drv->regmaps[bank], reg_data.ways_status_reg,
238 			  &err_ways);
239 	if (ret)
240 		goto clear;
241 
242 	err_ways &= reg_data.ways_mask;
243 	err_ways >>= reg_data.ways_shift;
244 
245 	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error ways: 0x%4x\n",
246 		    reg_data.name, err_ways);
247 
248 clear:
249 	return qcom_llcc_clear_error_status(err_type, drv);
250 }
251 
252 static int
253 dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank)
254 {
255 	struct llcc_drv_data *drv = edev_ctl->dev->platform_data;
256 	int ret;
257 
258 	ret = dump_syn_reg_values(drv, bank, err_type);
259 	if (ret)
260 		return ret;
261 
262 	switch (err_type) {
263 	case LLCC_DRAM_CE:
264 		edac_device_handle_ce(edev_ctl, 0, bank,
265 				      "LLCC Data RAM correctable Error");
266 		break;
267 	case LLCC_DRAM_UE:
268 		edac_device_handle_ue(edev_ctl, 0, bank,
269 				      "LLCC Data RAM uncorrectable Error");
270 		break;
271 	case LLCC_TRAM_CE:
272 		edac_device_handle_ce(edev_ctl, 0, bank,
273 				      "LLCC Tag RAM correctable Error");
274 		break;
275 	case LLCC_TRAM_UE:
276 		edac_device_handle_ue(edev_ctl, 0, bank,
277 				      "LLCC Tag RAM uncorrectable Error");
278 		break;
279 	default:
280 		ret = -EINVAL;
281 		edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
282 			    err_type);
283 	}
284 
285 	return ret;
286 }
287 
288 static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl)
289 {
290 	struct edac_device_ctl_info *edac_dev_ctl = edev_ctl;
291 	struct llcc_drv_data *drv = edac_dev_ctl->dev->platform_data;
292 	irqreturn_t irq_rc = IRQ_NONE;
293 	u32 drp_error, trp_error, i;
294 	int ret;
295 
296 	/* Iterate over the banks and look for Tag RAM or Data RAM errors */
297 	for (i = 0; i < drv->num_banks; i++) {
298 		ret = regmap_read(drv->regmaps[i], DRP_INTERRUPT_STATUS,
299 				  &drp_error);
300 
301 		if (!ret && (drp_error & SB_ECC_ERROR)) {
302 			edac_printk(KERN_CRIT, EDAC_LLCC,
303 				    "Single Bit Error detected in Data RAM\n");
304 			ret = dump_syn_reg(edev_ctl, LLCC_DRAM_CE, i);
305 		} else if (!ret && (drp_error & DB_ECC_ERROR)) {
306 			edac_printk(KERN_CRIT, EDAC_LLCC,
307 				    "Double Bit Error detected in Data RAM\n");
308 			ret = dump_syn_reg(edev_ctl, LLCC_DRAM_UE, i);
309 		}
310 		if (!ret)
311 			irq_rc = IRQ_HANDLED;
312 
313 		ret = regmap_read(drv->regmaps[i], TRP_INTERRUPT_0_STATUS,
314 				  &trp_error);
315 
316 		if (!ret && (trp_error & SB_ECC_ERROR)) {
317 			edac_printk(KERN_CRIT, EDAC_LLCC,
318 				    "Single Bit Error detected in Tag RAM\n");
319 			ret = dump_syn_reg(edev_ctl, LLCC_TRAM_CE, i);
320 		} else if (!ret && (trp_error & DB_ECC_ERROR)) {
321 			edac_printk(KERN_CRIT, EDAC_LLCC,
322 				    "Double Bit Error detected in Tag RAM\n");
323 			ret = dump_syn_reg(edev_ctl, LLCC_TRAM_UE, i);
324 		}
325 		if (!ret)
326 			irq_rc = IRQ_HANDLED;
327 	}
328 
329 	return irq_rc;
330 }
331 
332 static void llcc_ecc_check(struct edac_device_ctl_info *edev_ctl)
333 {
334 	llcc_ecc_irq_handler(0, edev_ctl);
335 }
336 
337 static int qcom_llcc_edac_probe(struct platform_device *pdev)
338 {
339 	struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
340 	struct edac_device_ctl_info *edev_ctl;
341 	struct device *dev = &pdev->dev;
342 	int ecc_irq;
343 	int rc;
344 
345 	rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap);
346 	if (rc)
347 		return rc;
348 
349 	/* Allocate edac control info */
350 	edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank",
351 					      llcc_driv_data->num_banks, 1,
352 					      NULL, 0,
353 					      edac_device_alloc_index());
354 
355 	if (!edev_ctl)
356 		return -ENOMEM;
357 
358 	edev_ctl->dev = dev;
359 	edev_ctl->mod_name = dev_name(dev);
360 	edev_ctl->dev_name = dev_name(dev);
361 	edev_ctl->ctl_name = "llcc";
362 	edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
363 
364 	/* Check if LLCC driver has passed ECC IRQ */
365 	ecc_irq = llcc_driv_data->ecc_irq;
366 	if (ecc_irq > 0) {
367 		/* Use interrupt mode if IRQ is available */
368 		rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
369 			      IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
370 		if (!rc) {
371 			edac_op_state = EDAC_OPSTATE_INT;
372 			goto irq_done;
373 		}
374 	}
375 
376 	/* Fall back to polling mode otherwise */
377 	edev_ctl->poll_msec = ECC_POLL_MSEC;
378 	edev_ctl->edac_check = llcc_ecc_check;
379 	edac_op_state = EDAC_OPSTATE_POLL;
380 
381 irq_done:
382 	rc = edac_device_add_device(edev_ctl);
383 	if (rc) {
384 		edac_device_free_ctl_info(edev_ctl);
385 		return rc;
386 	}
387 
388 	platform_set_drvdata(pdev, edev_ctl);
389 
390 	return rc;
391 }
392 
393 static int qcom_llcc_edac_remove(struct platform_device *pdev)
394 {
395 	struct edac_device_ctl_info *edev_ctl = dev_get_drvdata(&pdev->dev);
396 
397 	edac_device_del_device(edev_ctl->dev);
398 	edac_device_free_ctl_info(edev_ctl);
399 
400 	return 0;
401 }
402 
403 static const struct platform_device_id qcom_llcc_edac_id_table[] = {
404 	{ .name = "qcom_llcc_edac" },
405 	{}
406 };
407 MODULE_DEVICE_TABLE(platform, qcom_llcc_edac_id_table);
408 
409 static struct platform_driver qcom_llcc_edac_driver = {
410 	.probe = qcom_llcc_edac_probe,
411 	.remove = qcom_llcc_edac_remove,
412 	.driver = {
413 		.name = "qcom_llcc_edac",
414 	},
415 	.id_table = qcom_llcc_edac_id_table,
416 };
417 module_platform_driver(qcom_llcc_edac_driver);
418 
419 MODULE_DESCRIPTION("QCOM EDAC driver");
420 MODULE_LICENSE("GPL v2");
421