xref: /openbmc/linux/drivers/edac/qcom_edac.c (revision a01822e94ee53e8ebc9632fe2764048b81921254)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2018, The Linux Foundation. All rights reserved.
4  */
5 
6 #include <linux/edac.h>
7 #include <linux/interrupt.h>
8 #include <linux/kernel.h>
9 #include <linux/of.h>
10 #include <linux/platform_device.h>
11 #include <linux/regmap.h>
12 #include <linux/soc/qcom/llcc-qcom.h>
13 
14 #include "edac_mc.h"
15 #include "edac_device.h"
16 
17 #define EDAC_LLCC                       "qcom_llcc"
18 
19 #define LLCC_ERP_PANIC_ON_UE            1
20 
21 #define TRP_SYN_REG_CNT                 6
22 #define DRP_SYN_REG_CNT                 8
23 
24 #define LLCC_COMMON_STATUS0             0x0003000c
25 #define LLCC_LB_CNT_MASK                GENMASK(31, 28)
26 #define LLCC_LB_CNT_SHIFT               28
27 
28 /* Single & double bit syndrome register offsets */
29 #define TRP_ECC_SB_ERR_SYN0             0x0002304c
30 #define TRP_ECC_DB_ERR_SYN0             0x00020370
31 #define DRP_ECC_SB_ERR_SYN0             0x0004204c
32 #define DRP_ECC_DB_ERR_SYN0             0x00042070
33 
34 /* Error register offsets */
35 #define TRP_ECC_ERROR_STATUS1           0x00020348
36 #define TRP_ECC_ERROR_STATUS0           0x00020344
37 #define DRP_ECC_ERROR_STATUS1           0x00042048
38 #define DRP_ECC_ERROR_STATUS0           0x00042044
39 
40 /* TRP, DRP interrupt register offsets */
41 #define DRP_INTERRUPT_STATUS            0x00041000
42 #define TRP_INTERRUPT_0_STATUS          0x00020480
43 #define DRP_INTERRUPT_CLEAR             0x00041008
44 #define DRP_ECC_ERROR_CNTR_CLEAR        0x00040004
45 #define TRP_INTERRUPT_0_CLEAR           0x00020484
46 #define TRP_ECC_ERROR_CNTR_CLEAR        0x00020440
47 
48 /* Mask and shift macros */
49 #define ECC_DB_ERR_COUNT_MASK           GENMASK(4, 0)
50 #define ECC_DB_ERR_WAYS_MASK            GENMASK(31, 16)
51 #define ECC_DB_ERR_WAYS_SHIFT           BIT(4)
52 
53 #define ECC_SB_ERR_COUNT_MASK           GENMASK(23, 16)
54 #define ECC_SB_ERR_COUNT_SHIFT          BIT(4)
55 #define ECC_SB_ERR_WAYS_MASK            GENMASK(15, 0)
56 
57 #define SB_ECC_ERROR                    BIT(0)
58 #define DB_ECC_ERROR                    BIT(1)
59 
60 #define DRP_TRP_INT_CLEAR               GENMASK(1, 0)
61 #define DRP_TRP_CNT_CLEAR               GENMASK(1, 0)
62 
63 /* Config registers offsets*/
64 #define DRP_ECC_ERROR_CFG               0x00040000
65 
66 /* Tag RAM, Data RAM interrupt register offsets */
67 #define CMN_INTERRUPT_0_ENABLE          0x0003001c
68 #define CMN_INTERRUPT_2_ENABLE          0x0003003c
69 #define TRP_INTERRUPT_0_ENABLE          0x00020488
70 #define DRP_INTERRUPT_ENABLE            0x0004100c
71 
72 #define SB_ERROR_THRESHOLD              0x1
73 #define SB_ERROR_THRESHOLD_SHIFT        24
74 #define SB_DB_TRP_INTERRUPT_ENABLE      0x3
75 #define TRP0_INTERRUPT_ENABLE           0x1
76 #define DRP0_INTERRUPT_ENABLE           BIT(6)
77 #define SB_DB_DRP_INTERRUPT_ENABLE      0x3
78 
79 enum {
80 	LLCC_DRAM_CE = 0,
81 	LLCC_DRAM_UE,
82 	LLCC_TRAM_CE,
83 	LLCC_TRAM_UE,
84 };
85 
86 static const struct llcc_edac_reg_data edac_reg_data[] = {
87 	[LLCC_DRAM_CE] = {
88 		.name = "DRAM Single-bit",
89 		.synd_reg = DRP_ECC_SB_ERR_SYN0,
90 		.count_status_reg = DRP_ECC_ERROR_STATUS1,
91 		.ways_status_reg = DRP_ECC_ERROR_STATUS0,
92 		.reg_cnt = DRP_SYN_REG_CNT,
93 		.count_mask = ECC_SB_ERR_COUNT_MASK,
94 		.ways_mask = ECC_SB_ERR_WAYS_MASK,
95 		.count_shift = ECC_SB_ERR_COUNT_SHIFT,
96 	},
97 	[LLCC_DRAM_UE] = {
98 		.name = "DRAM Double-bit",
99 		.synd_reg = DRP_ECC_DB_ERR_SYN0,
100 		.count_status_reg = DRP_ECC_ERROR_STATUS1,
101 		.ways_status_reg = DRP_ECC_ERROR_STATUS0,
102 		.reg_cnt = DRP_SYN_REG_CNT,
103 		.count_mask = ECC_DB_ERR_COUNT_MASK,
104 		.ways_mask = ECC_DB_ERR_WAYS_MASK,
105 		.ways_shift = ECC_DB_ERR_WAYS_SHIFT,
106 	},
107 	[LLCC_TRAM_CE] = {
108 		.name = "TRAM Single-bit",
109 		.synd_reg = TRP_ECC_SB_ERR_SYN0,
110 		.count_status_reg = TRP_ECC_ERROR_STATUS1,
111 		.ways_status_reg = TRP_ECC_ERROR_STATUS0,
112 		.reg_cnt = TRP_SYN_REG_CNT,
113 		.count_mask = ECC_SB_ERR_COUNT_MASK,
114 		.ways_mask = ECC_SB_ERR_WAYS_MASK,
115 		.count_shift = ECC_SB_ERR_COUNT_SHIFT,
116 	},
117 	[LLCC_TRAM_UE] = {
118 		.name = "TRAM Double-bit",
119 		.synd_reg = TRP_ECC_DB_ERR_SYN0,
120 		.count_status_reg = TRP_ECC_ERROR_STATUS1,
121 		.ways_status_reg = TRP_ECC_ERROR_STATUS0,
122 		.reg_cnt = TRP_SYN_REG_CNT,
123 		.count_mask = ECC_DB_ERR_COUNT_MASK,
124 		.ways_mask = ECC_DB_ERR_WAYS_MASK,
125 		.ways_shift = ECC_DB_ERR_WAYS_SHIFT,
126 	},
127 };
128 
129 static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
130 {
131 	u32 sb_err_threshold;
132 	int ret;
133 
134 	/*
135 	 * Configure interrupt enable registers such that Tag, Data RAM related
136 	 * interrupts are propagated to interrupt controller for servicing
137 	 */
138 	ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
139 				 TRP0_INTERRUPT_ENABLE,
140 				 TRP0_INTERRUPT_ENABLE);
141 	if (ret)
142 		return ret;
143 
144 	ret = regmap_update_bits(llcc_bcast_regmap, TRP_INTERRUPT_0_ENABLE,
145 				 SB_DB_TRP_INTERRUPT_ENABLE,
146 				 SB_DB_TRP_INTERRUPT_ENABLE);
147 	if (ret)
148 		return ret;
149 
150 	sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT);
151 	ret = regmap_write(llcc_bcast_regmap, DRP_ECC_ERROR_CFG,
152 			   sb_err_threshold);
153 	if (ret)
154 		return ret;
155 
156 	ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
157 				 DRP0_INTERRUPT_ENABLE,
158 				 DRP0_INTERRUPT_ENABLE);
159 	if (ret)
160 		return ret;
161 
162 	ret = regmap_write(llcc_bcast_regmap, DRP_INTERRUPT_ENABLE,
163 			   SB_DB_DRP_INTERRUPT_ENABLE);
164 	return ret;
165 }
166 
167 /* Clear the error interrupt and counter registers */
168 static int
169 qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
170 {
171 	int ret = 0;
172 
173 	switch (err_type) {
174 	case LLCC_DRAM_CE:
175 	case LLCC_DRAM_UE:
176 		ret = regmap_write(drv->bcast_regmap, DRP_INTERRUPT_CLEAR,
177 				   DRP_TRP_INT_CLEAR);
178 		if (ret)
179 			return ret;
180 
181 		ret = regmap_write(drv->bcast_regmap, DRP_ECC_ERROR_CNTR_CLEAR,
182 				   DRP_TRP_CNT_CLEAR);
183 		if (ret)
184 			return ret;
185 		break;
186 	case LLCC_TRAM_CE:
187 	case LLCC_TRAM_UE:
188 		ret = regmap_write(drv->bcast_regmap, TRP_INTERRUPT_0_CLEAR,
189 				   DRP_TRP_INT_CLEAR);
190 		if (ret)
191 			return ret;
192 
193 		ret = regmap_write(drv->bcast_regmap, TRP_ECC_ERROR_CNTR_CLEAR,
194 				   DRP_TRP_CNT_CLEAR);
195 		if (ret)
196 			return ret;
197 		break;
198 	default:
199 		ret = -EINVAL;
200 		edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
201 			    err_type);
202 	}
203 	return ret;
204 }
205 
206 /* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/
207 static int
208 dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
209 {
210 	struct llcc_edac_reg_data reg_data = edac_reg_data[err_type];
211 	int err_cnt, err_ways, ret, i;
212 	u32 synd_reg, synd_val;
213 
214 	for (i = 0; i < reg_data.reg_cnt; i++) {
215 		synd_reg = reg_data.synd_reg + (i * 4);
216 		ret = regmap_read(drv->regmap, drv->offsets[bank] + synd_reg,
217 				  &synd_val);
218 		if (ret)
219 			goto clear;
220 
221 		edac_printk(KERN_CRIT, EDAC_LLCC, "%s: ECC_SYN%d: 0x%8x\n",
222 			    reg_data.name, i, synd_val);
223 	}
224 
225 	ret = regmap_read(drv->regmap,
226 			  drv->offsets[bank] + reg_data.count_status_reg,
227 			  &err_cnt);
228 	if (ret)
229 		goto clear;
230 
231 	err_cnt &= reg_data.count_mask;
232 	err_cnt >>= reg_data.count_shift;
233 	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n",
234 		    reg_data.name, err_cnt);
235 
236 	ret = regmap_read(drv->regmap,
237 			  drv->offsets[bank] + reg_data.ways_status_reg,
238 			  &err_ways);
239 	if (ret)
240 		goto clear;
241 
242 	err_ways &= reg_data.ways_mask;
243 	err_ways >>= reg_data.ways_shift;
244 
245 	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error ways: 0x%4x\n",
246 		    reg_data.name, err_ways);
247 
248 clear:
249 	return qcom_llcc_clear_error_status(err_type, drv);
250 }
251 
252 static int
253 dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank)
254 {
255 	struct llcc_drv_data *drv = edev_ctl->pvt_info;
256 	int ret;
257 
258 	ret = dump_syn_reg_values(drv, bank, err_type);
259 	if (ret)
260 		return ret;
261 
262 	switch (err_type) {
263 	case LLCC_DRAM_CE:
264 		edac_device_handle_ce(edev_ctl, 0, bank,
265 				      "LLCC Data RAM correctable Error");
266 		break;
267 	case LLCC_DRAM_UE:
268 		edac_device_handle_ue(edev_ctl, 0, bank,
269 				      "LLCC Data RAM uncorrectable Error");
270 		break;
271 	case LLCC_TRAM_CE:
272 		edac_device_handle_ce(edev_ctl, 0, bank,
273 				      "LLCC Tag RAM correctable Error");
274 		break;
275 	case LLCC_TRAM_UE:
276 		edac_device_handle_ue(edev_ctl, 0, bank,
277 				      "LLCC Tag RAM uncorrectable Error");
278 		break;
279 	default:
280 		ret = -EINVAL;
281 		edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
282 			    err_type);
283 	}
284 
285 	return ret;
286 }
287 
288 static irqreturn_t
289 llcc_ecc_irq_handler(int irq, void *edev_ctl)
290 {
291 	struct edac_device_ctl_info *edac_dev_ctl = edev_ctl;
292 	struct llcc_drv_data *drv = edac_dev_ctl->pvt_info;
293 	irqreturn_t irq_rc = IRQ_NONE;
294 	u32 drp_error, trp_error, i;
295 	int ret;
296 
297 	/* Iterate over the banks and look for Tag RAM or Data RAM errors */
298 	for (i = 0; i < drv->num_banks; i++) {
299 		ret = regmap_read(drv->regmap,
300 				  drv->offsets[i] + DRP_INTERRUPT_STATUS,
301 				  &drp_error);
302 
303 		if (!ret && (drp_error & SB_ECC_ERROR)) {
304 			edac_printk(KERN_CRIT, EDAC_LLCC,
305 				    "Single Bit Error detected in Data RAM\n");
306 			ret = dump_syn_reg(edev_ctl, LLCC_DRAM_CE, i);
307 		} else if (!ret && (drp_error & DB_ECC_ERROR)) {
308 			edac_printk(KERN_CRIT, EDAC_LLCC,
309 				    "Double Bit Error detected in Data RAM\n");
310 			ret = dump_syn_reg(edev_ctl, LLCC_DRAM_UE, i);
311 		}
312 		if (!ret)
313 			irq_rc = IRQ_HANDLED;
314 
315 		ret = regmap_read(drv->regmap,
316 				  drv->offsets[i] + TRP_INTERRUPT_0_STATUS,
317 				  &trp_error);
318 
319 		if (!ret && (trp_error & SB_ECC_ERROR)) {
320 			edac_printk(KERN_CRIT, EDAC_LLCC,
321 				    "Single Bit Error detected in Tag RAM\n");
322 			ret = dump_syn_reg(edev_ctl, LLCC_TRAM_CE, i);
323 		} else if (!ret && (trp_error & DB_ECC_ERROR)) {
324 			edac_printk(KERN_CRIT, EDAC_LLCC,
325 				    "Double Bit Error detected in Tag RAM\n");
326 			ret = dump_syn_reg(edev_ctl, LLCC_TRAM_UE, i);
327 		}
328 		if (!ret)
329 			irq_rc = IRQ_HANDLED;
330 	}
331 
332 	return irq_rc;
333 }
334 
335 static int qcom_llcc_edac_probe(struct platform_device *pdev)
336 {
337 	struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
338 	struct edac_device_ctl_info *edev_ctl;
339 	struct device *dev = &pdev->dev;
340 	int ecc_irq;
341 	int rc;
342 
343 	rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap);
344 	if (rc)
345 		return rc;
346 
347 	/* Allocate edac control info */
348 	edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank",
349 					      llcc_driv_data->num_banks, 1,
350 					      NULL, 0,
351 					      edac_device_alloc_index());
352 
353 	if (!edev_ctl)
354 		return -ENOMEM;
355 
356 	edev_ctl->dev = dev;
357 	edev_ctl->mod_name = dev_name(dev);
358 	edev_ctl->dev_name = dev_name(dev);
359 	edev_ctl->ctl_name = "llcc";
360 	edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
361 	edev_ctl->pvt_info = llcc_driv_data;
362 
363 	rc = edac_device_add_device(edev_ctl);
364 	if (rc)
365 		goto out_mem;
366 
367 	platform_set_drvdata(pdev, edev_ctl);
368 
369 	/* Request for ecc irq */
370 	ecc_irq = llcc_driv_data->ecc_irq;
371 	if (ecc_irq < 0) {
372 		rc = -ENODEV;
373 		goto out_dev;
374 	}
375 	rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
376 			      IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
377 	if (rc)
378 		goto out_dev;
379 
380 	return rc;
381 
382 out_dev:
383 	edac_device_del_device(edev_ctl->dev);
384 out_mem:
385 	edac_device_free_ctl_info(edev_ctl);
386 
387 	return rc;
388 }
389 
390 static int qcom_llcc_edac_remove(struct platform_device *pdev)
391 {
392 	struct edac_device_ctl_info *edev_ctl = dev_get_drvdata(&pdev->dev);
393 
394 	edac_device_del_device(edev_ctl->dev);
395 	edac_device_free_ctl_info(edev_ctl);
396 
397 	return 0;
398 }
399 
400 static struct platform_driver qcom_llcc_edac_driver = {
401 	.probe = qcom_llcc_edac_probe,
402 	.remove = qcom_llcc_edac_remove,
403 	.driver = {
404 		.name = "qcom_llcc_edac",
405 	},
406 };
407 module_platform_driver(qcom_llcc_edac_driver);
408 
409 MODULE_DESCRIPTION("QCOM EDAC driver");
410 MODULE_LICENSE("GPL v2");
411