xref: /openbmc/linux/drivers/edac/zynqmp_edac.c (revision 39f555fb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Xilinx ZynqMP OCM ECC Driver
4  *
5  * Copyright (C) 2022 Advanced Micro Devices, Inc.
6  */
7 
8 #include <linux/edac.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/of.h>
12 #include <linux/of_platform.h>
13 #include <linux/platform_device.h>
14 
15 #include "edac_module.h"
16 
17 #define ZYNQMP_OCM_EDAC_MSG_SIZE	256
18 
19 #define ZYNQMP_OCM_EDAC_STRING	"zynqmp_ocm"
20 
21 /* Error/Interrupt registers */
22 #define ERR_CTRL_OFST		0x0
23 #define OCM_ISR_OFST		0x04
24 #define OCM_IMR_OFST		0x08
25 #define OCM_IEN_OFST		0x0C
26 #define OCM_IDS_OFST		0x10
27 
28 /* ECC control register */
29 #define ECC_CTRL_OFST		0x14
30 
31 /* Correctable error info registers */
32 #define CE_FFA_OFST		0x1C
33 #define CE_FFD0_OFST		0x20
34 #define CE_FFD1_OFST		0x24
35 #define CE_FFD2_OFST		0x28
36 #define CE_FFD3_OFST		0x2C
37 #define CE_FFE_OFST		0x30
38 
39 /* Uncorrectable error info registers */
40 #define UE_FFA_OFST		0x34
41 #define UE_FFD0_OFST		0x38
42 #define UE_FFD1_OFST		0x3C
43 #define UE_FFD2_OFST		0x40
44 #define UE_FFD3_OFST		0x44
45 #define UE_FFE_OFST		0x48
46 
47 /* ECC control register bit field definitions */
48 #define ECC_CTRL_CLR_CE_ERR	0x40
49 #define ECC_CTRL_CLR_UE_ERR	0x80
50 
51 /* Fault injection data and count registers */
52 #define OCM_FID0_OFST		0x4C
53 #define OCM_FID1_OFST		0x50
54 #define OCM_FID2_OFST		0x54
55 #define OCM_FID3_OFST		0x58
56 #define OCM_FIC_OFST		0x74
57 
58 #define UE_MAX_BITPOS_LOWER	31
59 #define UE_MIN_BITPOS_UPPER	32
60 #define UE_MAX_BITPOS_UPPER	63
61 
62 /* Interrupt masks */
63 #define OCM_CEINTR_MASK		BIT(6)
64 #define OCM_UEINTR_MASK		BIT(7)
65 #define OCM_ECC_ENABLE_MASK	BIT(0)
66 
67 #define OCM_FICOUNT_MASK	GENMASK(23, 0)
68 #define OCM_NUM_UE_BITPOS	2
69 #define OCM_BASEVAL		0xFFFC0000
70 #define EDAC_DEVICE		"ZynqMP-OCM"
71 
72 /**
73  * struct ecc_error_info - ECC error log information
74  * @addr:	Fault generated at this address
75  * @fault_lo:	Generated fault data (lower 32-bit)
76  * @fault_hi:	Generated fault data (upper 32-bit)
77  */
78 struct ecc_error_info {
79 	u32 addr;
80 	u32 fault_lo;
81 	u32 fault_hi;
82 };
83 
84 /**
85  * struct ecc_status - ECC status information to report
86  * @ce_cnt:	Correctable error count
87  * @ue_cnt:	Uncorrectable error count
88  * @ceinfo:	Correctable error log information
89  * @ueinfo:	Uncorrectable error log information
90  */
91 struct ecc_status {
92 	u32 ce_cnt;
93 	u32 ue_cnt;
94 	struct ecc_error_info ceinfo;
95 	struct ecc_error_info ueinfo;
96 };
97 
98 /**
99  * struct edac_priv - OCM private instance data
100  * @baseaddr:	Base address of the OCM
101  * @message:	Buffer for framing the event specific info
102  * @stat:	ECC status information
103  * @ce_cnt:	Correctable Error count
104  * @ue_cnt:	Uncorrectable Error count
105  * @debugfs_dir:	Directory entry for debugfs
106  * @ce_bitpos:	Bit position for Correctable Error
107  * @ue_bitpos:	Array to store UnCorrectable Error bit positions
108  * @fault_injection_cnt: Fault Injection Counter value
109  */
110 struct edac_priv {
111 	void __iomem *baseaddr;
112 	char message[ZYNQMP_OCM_EDAC_MSG_SIZE];
113 	struct ecc_status stat;
114 	u32 ce_cnt;
115 	u32 ue_cnt;
116 #ifdef CONFIG_EDAC_DEBUG
117 	struct dentry *debugfs_dir;
118 	u8 ce_bitpos;
119 	u8 ue_bitpos[OCM_NUM_UE_BITPOS];
120 	u32 fault_injection_cnt;
121 #endif
122 };
123 
124 /**
125  * get_error_info - Get the current ECC error info
126  * @base:	Pointer to the base address of the OCM
127  * @p:		Pointer to the OCM ECC status structure
128  * @mask:	Status register mask value
129  *
130  * Determines there is any ECC error or not
131  *
132  */
133 static void get_error_info(void __iomem *base, struct ecc_status *p, int mask)
134 {
135 	if (mask & OCM_CEINTR_MASK) {
136 		p->ce_cnt++;
137 		p->ceinfo.fault_lo = readl(base + CE_FFD0_OFST);
138 		p->ceinfo.fault_hi = readl(base + CE_FFD1_OFST);
139 		p->ceinfo.addr = (OCM_BASEVAL | readl(base + CE_FFA_OFST));
140 		writel(ECC_CTRL_CLR_CE_ERR, base + OCM_ISR_OFST);
141 	} else if (mask & OCM_UEINTR_MASK) {
142 		p->ue_cnt++;
143 		p->ueinfo.fault_lo = readl(base + UE_FFD0_OFST);
144 		p->ueinfo.fault_hi = readl(base + UE_FFD1_OFST);
145 		p->ueinfo.addr = (OCM_BASEVAL | readl(base + UE_FFA_OFST));
146 		writel(ECC_CTRL_CLR_UE_ERR, base + OCM_ISR_OFST);
147 	}
148 }
149 
150 /**
151  * handle_error - Handle error types CE and UE
152  * @dci:	Pointer to the EDAC device instance
153  * @p:		Pointer to the OCM ECC status structure
154  *
155  * Handles correctable and uncorrectable errors.
156  */
157 static void handle_error(struct edac_device_ctl_info *dci, struct ecc_status *p)
158 {
159 	struct edac_priv *priv = dci->pvt_info;
160 	struct ecc_error_info *pinf;
161 
162 	if (p->ce_cnt) {
163 		pinf = &p->ceinfo;
164 		snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE,
165 			 "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]",
166 			 "CE", pinf->addr, pinf->fault_hi, pinf->fault_lo);
167 		edac_device_handle_ce(dci, 0, 0, priv->message);
168 	}
169 
170 	if (p->ue_cnt) {
171 		pinf = &p->ueinfo;
172 		snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE,
173 			 "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]",
174 			 "UE", pinf->addr, pinf->fault_hi, pinf->fault_lo);
175 		edac_device_handle_ue(dci, 0, 0, priv->message);
176 	}
177 
178 	memset(p, 0, sizeof(*p));
179 }
180 
181 /**
182  * intr_handler - ISR routine
183  * @irq:        irq number
184  * @dev_id:     device id pointer
185  *
186  * Return: IRQ_NONE, if CE/UE interrupt not set or IRQ_HANDLED otherwise
187  */
188 static irqreturn_t intr_handler(int irq, void *dev_id)
189 {
190 	struct edac_device_ctl_info *dci = dev_id;
191 	struct edac_priv *priv = dci->pvt_info;
192 	int regval;
193 
194 	regval = readl(priv->baseaddr + OCM_ISR_OFST);
195 	if (!(regval & (OCM_CEINTR_MASK | OCM_UEINTR_MASK))) {
196 		WARN_ONCE(1, "Unhandled IRQ%d, ISR: 0x%x", irq, regval);
197 		return IRQ_NONE;
198 	}
199 
200 	get_error_info(priv->baseaddr, &priv->stat, regval);
201 
202 	priv->ce_cnt += priv->stat.ce_cnt;
203 	priv->ue_cnt += priv->stat.ue_cnt;
204 	handle_error(dci, &priv->stat);
205 
206 	return IRQ_HANDLED;
207 }
208 
209 /**
210  * get_eccstate - Return the ECC status
211  * @base:	Pointer to the OCM base address
212  *
213  * Get the ECC enable/disable status
214  *
215  * Return: ECC status 0/1.
216  */
217 static bool get_eccstate(void __iomem *base)
218 {
219 	return readl(base + ECC_CTRL_OFST) & OCM_ECC_ENABLE_MASK;
220 }
221 
222 #ifdef CONFIG_EDAC_DEBUG
223 /**
224  * write_fault_count - write fault injection count
225  * @priv:	Pointer to the EDAC private struct
226  *
227  * Update the fault injection count register, once the counter reaches
228  * zero, it injects errors
229  */
230 static void write_fault_count(struct edac_priv *priv)
231 {
232 	u32 ficount = priv->fault_injection_cnt;
233 
234 	if (ficount & ~OCM_FICOUNT_MASK) {
235 		ficount &= OCM_FICOUNT_MASK;
236 		edac_printk(KERN_INFO, EDAC_DEVICE,
237 			    "Fault injection count value truncated to %d\n", ficount);
238 	}
239 
240 	writel(ficount, priv->baseaddr + OCM_FIC_OFST);
241 }
242 
243 /*
244  * To get the Correctable Error injected, the following steps are needed:
245  * - Setup the optional Fault Injection Count:
246  *	echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count
247  * - Write the Correctable Error bit position value:
248  *	echo <bit_pos val> > /sys/kernel/debug/edac/ocm/inject_ce_bitpos
249  */
250 static ssize_t inject_ce_write(struct file *file, const char __user *data,
251 			       size_t count, loff_t *ppos)
252 {
253 	struct edac_device_ctl_info *edac_dev = file->private_data;
254 	struct edac_priv *priv = edac_dev->pvt_info;
255 	int ret;
256 
257 	if (!data)
258 		return -EFAULT;
259 
260 	ret = kstrtou8_from_user(data, count, 0, &priv->ce_bitpos);
261 	if (ret)
262 		return ret;
263 
264 	if (priv->ce_bitpos > UE_MAX_BITPOS_UPPER)
265 		return -EINVAL;
266 
267 	if (priv->ce_bitpos <= UE_MAX_BITPOS_LOWER) {
268 		writel(BIT(priv->ce_bitpos), priv->baseaddr + OCM_FID0_OFST);
269 		writel(0, priv->baseaddr + OCM_FID1_OFST);
270 	} else {
271 		writel(BIT(priv->ce_bitpos - UE_MIN_BITPOS_UPPER),
272 		       priv->baseaddr + OCM_FID1_OFST);
273 		writel(0, priv->baseaddr + OCM_FID0_OFST);
274 	}
275 
276 	write_fault_count(priv);
277 
278 	return count;
279 }
280 
281 static const struct file_operations inject_ce_fops = {
282 	.open = simple_open,
283 	.write = inject_ce_write,
284 	.llseek = generic_file_llseek,
285 };
286 
287 /*
288  * To get the Uncorrectable Error injected, the following steps are needed:
289  * - Setup the optional Fault Injection Count:
290  *      echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count
291  * - Write the Uncorrectable Error bit position values:
292  *      echo <bit_pos0 val>,<bit_pos1 val> > /sys/kernel/debug/edac/ocm/inject_ue_bitpos
293  */
294 static ssize_t inject_ue_write(struct file *file, const char __user *data,
295 			       size_t count, loff_t *ppos)
296 {
297 	struct edac_device_ctl_info *edac_dev = file->private_data;
298 	struct edac_priv *priv = edac_dev->pvt_info;
299 	char buf[6], *pbuf, *token[2];
300 	u64 ue_bitpos;
301 	int i, ret;
302 	u8 len;
303 
304 	if (!data)
305 		return -EFAULT;
306 
307 	len = min_t(size_t, count, sizeof(buf));
308 	if (copy_from_user(buf, data, len))
309 		return -EFAULT;
310 
311 	buf[len] = '\0';
312 	pbuf = &buf[0];
313 	for (i = 0; i < OCM_NUM_UE_BITPOS; i++)
314 		token[i] = strsep(&pbuf, ",");
315 
316 	ret = kstrtou8(token[0], 0, &priv->ue_bitpos[0]);
317 	if (ret)
318 		return ret;
319 
320 	ret = kstrtou8(token[1], 0, &priv->ue_bitpos[1]);
321 	if (ret)
322 		return ret;
323 
324 	if (priv->ue_bitpos[0] > UE_MAX_BITPOS_UPPER ||
325 	    priv->ue_bitpos[1] > UE_MAX_BITPOS_UPPER)
326 		return -EINVAL;
327 
328 	if (priv->ue_bitpos[0] == priv->ue_bitpos[1]) {
329 		edac_printk(KERN_ERR, EDAC_DEVICE, "Bit positions should not be equal\n");
330 		return -EINVAL;
331 	}
332 
333 	ue_bitpos = BIT(priv->ue_bitpos[0]) | BIT(priv->ue_bitpos[1]);
334 
335 	writel((u32)ue_bitpos, priv->baseaddr + OCM_FID0_OFST);
336 	writel((u32)(ue_bitpos >> 32), priv->baseaddr + OCM_FID1_OFST);
337 
338 	write_fault_count(priv);
339 
340 	return count;
341 }
342 
343 static const struct file_operations inject_ue_fops = {
344 	.open = simple_open,
345 	.write = inject_ue_write,
346 	.llseek = generic_file_llseek,
347 };
348 
349 static void setup_debugfs(struct edac_device_ctl_info *edac_dev)
350 {
351 	struct edac_priv *priv = edac_dev->pvt_info;
352 
353 	priv->debugfs_dir = edac_debugfs_create_dir("ocm");
354 	if (!priv->debugfs_dir)
355 		return;
356 
357 	edac_debugfs_create_x32("inject_fault_count", 0644, priv->debugfs_dir,
358 				&priv->fault_injection_cnt);
359 	edac_debugfs_create_file("inject_ue_bitpos", 0644, priv->debugfs_dir,
360 				 edac_dev, &inject_ue_fops);
361 	edac_debugfs_create_file("inject_ce_bitpos", 0644, priv->debugfs_dir,
362 				 edac_dev, &inject_ce_fops);
363 }
364 #endif
365 
366 static int edac_probe(struct platform_device *pdev)
367 {
368 	struct edac_device_ctl_info *dci;
369 	struct edac_priv *priv;
370 	void __iomem *baseaddr;
371 	struct resource *res;
372 	int irq, ret;
373 
374 	baseaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
375 	if (IS_ERR(baseaddr))
376 		return PTR_ERR(baseaddr);
377 
378 	if (!get_eccstate(baseaddr)) {
379 		edac_printk(KERN_INFO, EDAC_DEVICE, "ECC not enabled\n");
380 		return -ENXIO;
381 	}
382 
383 	dci = edac_device_alloc_ctl_info(sizeof(*priv), ZYNQMP_OCM_EDAC_STRING,
384 					 1, ZYNQMP_OCM_EDAC_STRING, 1, 0, NULL, 0,
385 					 edac_device_alloc_index());
386 	if (!dci)
387 		return -ENOMEM;
388 
389 	priv = dci->pvt_info;
390 	platform_set_drvdata(pdev, dci);
391 	dci->dev = &pdev->dev;
392 	priv->baseaddr = baseaddr;
393 	dci->mod_name = pdev->dev.driver->name;
394 	dci->ctl_name = ZYNQMP_OCM_EDAC_STRING;
395 	dci->dev_name = dev_name(&pdev->dev);
396 
397 	irq = platform_get_irq(pdev, 0);
398 	if (irq < 0) {
399 		ret = irq;
400 		goto free_dev_ctl;
401 	}
402 
403 	ret = devm_request_irq(&pdev->dev, irq, intr_handler, 0,
404 			       dev_name(&pdev->dev), dci);
405 	if (ret) {
406 		edac_printk(KERN_ERR, EDAC_DEVICE, "Failed to request Irq\n");
407 		goto free_dev_ctl;
408 	}
409 
410 	/* Enable UE, CE interrupts */
411 	writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IEN_OFST);
412 
413 #ifdef CONFIG_EDAC_DEBUG
414 	setup_debugfs(dci);
415 #endif
416 
417 	ret = edac_device_add_device(dci);
418 	if (ret)
419 		goto free_dev_ctl;
420 
421 	return 0;
422 
423 free_dev_ctl:
424 	edac_device_free_ctl_info(dci);
425 
426 	return ret;
427 }
428 
429 static int edac_remove(struct platform_device *pdev)
430 {
431 	struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
432 	struct edac_priv *priv = dci->pvt_info;
433 
434 	/* Disable UE, CE interrupts */
435 	writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IDS_OFST);
436 
437 #ifdef CONFIG_EDAC_DEBUG
438 	debugfs_remove_recursive(priv->debugfs_dir);
439 #endif
440 
441 	edac_device_del_device(&pdev->dev);
442 	edac_device_free_ctl_info(dci);
443 
444 	return 0;
445 }
446 
447 static const struct of_device_id zynqmp_ocm_edac_match[] = {
448 	{ .compatible = "xlnx,zynqmp-ocmc-1.0"},
449 	{ /* end of table */ }
450 };
451 
452 MODULE_DEVICE_TABLE(of, zynqmp_ocm_edac_match);
453 
454 static struct platform_driver zynqmp_ocm_edac_driver = {
455 	.driver = {
456 		   .name = "zynqmp-ocm-edac",
457 		   .of_match_table = zynqmp_ocm_edac_match,
458 		   },
459 	.probe = edac_probe,
460 	.remove = edac_remove,
461 };
462 
463 module_platform_driver(zynqmp_ocm_edac_driver);
464 
465 MODULE_AUTHOR("Advanced Micro Devices, Inc");
466 MODULE_DESCRIPTION("Xilinx ZynqMP OCM ECC driver");
467 MODULE_LICENSE("GPL");
468