xref: /openbmc/linux/drivers/edac/altera_edac.c (revision a8da474e)
1 /*
2  *  Copyright Altera Corporation (C) 2014-2015. All rights reserved.
3  *  Copyright 2011-2012 Calxeda, Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program.  If not, see <http://www.gnu.org/licenses/>.
16  *
17  * Adapted from the highbank_mc_edac driver.
18  */
19 
20 #include <linux/ctype.h>
21 #include <linux/edac.h>
22 #include <linux/interrupt.h>
23 #include <linux/kernel.h>
24 #include <linux/mfd/syscon.h>
25 #include <linux/of_platform.h>
26 #include <linux/platform_device.h>
27 #include <linux/regmap.h>
28 #include <linux/types.h>
29 #include <linux/uaccess.h>
30 
31 #include "altera_edac.h"
32 #include "edac_core.h"
33 #include "edac_module.h"
34 
35 #define EDAC_MOD_STR		"altera_edac"
36 #define EDAC_VERSION		"1"
37 
38 static const struct altr_sdram_prv_data c5_data = {
39 	.ecc_ctrl_offset    = CV_CTLCFG_OFST,
40 	.ecc_ctl_en_mask    = CV_CTLCFG_ECC_AUTO_EN,
41 	.ecc_stat_offset    = CV_DRAMSTS_OFST,
42 	.ecc_stat_ce_mask   = CV_DRAMSTS_SBEERR,
43 	.ecc_stat_ue_mask   = CV_DRAMSTS_DBEERR,
44 	.ecc_saddr_offset   = CV_ERRADDR_OFST,
45 	.ecc_daddr_offset   = CV_ERRADDR_OFST,
46 	.ecc_cecnt_offset   = CV_SBECOUNT_OFST,
47 	.ecc_uecnt_offset   = CV_DBECOUNT_OFST,
48 	.ecc_irq_en_offset  = CV_DRAMINTR_OFST,
49 	.ecc_irq_en_mask    = CV_DRAMINTR_INTREN,
50 	.ecc_irq_clr_offset = CV_DRAMINTR_OFST,
51 	.ecc_irq_clr_mask   = (CV_DRAMINTR_INTRCLR | CV_DRAMINTR_INTREN),
52 	.ecc_cnt_rst_offset = CV_DRAMINTR_OFST,
53 	.ecc_cnt_rst_mask   = CV_DRAMINTR_INTRCLR,
54 	.ce_ue_trgr_offset  = CV_CTLCFG_OFST,
55 	.ce_set_mask        = CV_CTLCFG_GEN_SB_ERR,
56 	.ue_set_mask        = CV_CTLCFG_GEN_DB_ERR,
57 };
58 
59 static const struct altr_sdram_prv_data a10_data = {
60 	.ecc_ctrl_offset    = A10_ECCCTRL1_OFST,
61 	.ecc_ctl_en_mask    = A10_ECCCTRL1_ECC_EN,
62 	.ecc_stat_offset    = A10_INTSTAT_OFST,
63 	.ecc_stat_ce_mask   = A10_INTSTAT_SBEERR,
64 	.ecc_stat_ue_mask   = A10_INTSTAT_DBEERR,
65 	.ecc_saddr_offset   = A10_SERRADDR_OFST,
66 	.ecc_daddr_offset   = A10_DERRADDR_OFST,
67 	.ecc_irq_en_offset  = A10_ERRINTEN_OFST,
68 	.ecc_irq_en_mask    = A10_ECC_IRQ_EN_MASK,
69 	.ecc_irq_clr_offset = A10_INTSTAT_OFST,
70 	.ecc_irq_clr_mask   = (A10_INTSTAT_SBEERR | A10_INTSTAT_DBEERR),
71 	.ecc_cnt_rst_offset = A10_ECCCTRL1_OFST,
72 	.ecc_cnt_rst_mask   = A10_ECC_CNT_RESET_MASK,
73 	.ce_ue_trgr_offset  = A10_DIAGINTTEST_OFST,
74 	.ce_set_mask        = A10_DIAGINT_TSERRA_MASK,
75 	.ue_set_mask        = A10_DIAGINT_TDERRA_MASK,
76 };
77 
78 static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
79 {
80 	struct mem_ctl_info *mci = dev_id;
81 	struct altr_sdram_mc_data *drvdata = mci->pvt_info;
82 	const struct altr_sdram_prv_data *priv = drvdata->data;
83 	u32 status, err_count = 1, err_addr;
84 
85 	regmap_read(drvdata->mc_vbase, priv->ecc_stat_offset, &status);
86 
87 	if (status & priv->ecc_stat_ue_mask) {
88 		regmap_read(drvdata->mc_vbase, priv->ecc_daddr_offset,
89 			    &err_addr);
90 		if (priv->ecc_uecnt_offset)
91 			regmap_read(drvdata->mc_vbase, priv->ecc_uecnt_offset,
92 				    &err_count);
93 		panic("\nEDAC: [%d Uncorrectable errors @ 0x%08X]\n",
94 		      err_count, err_addr);
95 	}
96 	if (status & priv->ecc_stat_ce_mask) {
97 		regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset,
98 			    &err_addr);
99 		if (priv->ecc_uecnt_offset)
100 			regmap_read(drvdata->mc_vbase,  priv->ecc_cecnt_offset,
101 				    &err_count);
102 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count,
103 				     err_addr >> PAGE_SHIFT,
104 				     err_addr & ~PAGE_MASK, 0,
105 				     0, 0, -1, mci->ctl_name, "");
106 		/* Clear IRQ to resume */
107 		regmap_write(drvdata->mc_vbase,	priv->ecc_irq_clr_offset,
108 			     priv->ecc_irq_clr_mask);
109 
110 		return IRQ_HANDLED;
111 	}
112 	return IRQ_NONE;
113 }
114 
115 static ssize_t altr_sdr_mc_err_inject_write(struct file *file,
116 					    const char __user *data,
117 					    size_t count, loff_t *ppos)
118 {
119 	struct mem_ctl_info *mci = file->private_data;
120 	struct altr_sdram_mc_data *drvdata = mci->pvt_info;
121 	const struct altr_sdram_prv_data *priv = drvdata->data;
122 	u32 *ptemp;
123 	dma_addr_t dma_handle;
124 	u32 reg, read_reg;
125 
126 	ptemp = dma_alloc_coherent(mci->pdev, 16, &dma_handle, GFP_KERNEL);
127 	if (!ptemp) {
128 		dma_free_coherent(mci->pdev, 16, ptemp, dma_handle);
129 		edac_printk(KERN_ERR, EDAC_MC,
130 			    "Inject: Buffer Allocation error\n");
131 		return -ENOMEM;
132 	}
133 
134 	regmap_read(drvdata->mc_vbase, priv->ce_ue_trgr_offset,
135 		    &read_reg);
136 	read_reg &= ~(priv->ce_set_mask | priv->ue_set_mask);
137 
138 	/* Error are injected by writing a word while the SBE or DBE
139 	 * bit in the CTLCFG register is set. Reading the word will
140 	 * trigger the SBE or DBE error and the corresponding IRQ.
141 	 */
142 	if (count == 3) {
143 		edac_printk(KERN_ALERT, EDAC_MC,
144 			    "Inject Double bit error\n");
145 		regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset,
146 			     (read_reg | priv->ue_set_mask));
147 	} else {
148 		edac_printk(KERN_ALERT, EDAC_MC,
149 			    "Inject Single bit error\n");
150 		regmap_write(drvdata->mc_vbase,	priv->ce_ue_trgr_offset,
151 			     (read_reg | priv->ce_set_mask));
152 	}
153 
154 	ptemp[0] = 0x5A5A5A5A;
155 	ptemp[1] = 0xA5A5A5A5;
156 
157 	/* Clear the error injection bits */
158 	regmap_write(drvdata->mc_vbase,	priv->ce_ue_trgr_offset, read_reg);
159 	/* Ensure it has been written out */
160 	wmb();
161 
162 	/*
163 	 * To trigger the error, we need to read the data back
164 	 * (the data was written with errors above).
165 	 * The ACCESS_ONCE macros and printk are used to prevent the
166 	 * the compiler optimizing these reads out.
167 	 */
168 	reg = ACCESS_ONCE(ptemp[0]);
169 	read_reg = ACCESS_ONCE(ptemp[1]);
170 	/* Force Read */
171 	rmb();
172 
173 	edac_printk(KERN_ALERT, EDAC_MC, "Read Data [0x%X, 0x%X]\n",
174 		    reg, read_reg);
175 
176 	dma_free_coherent(mci->pdev, 16, ptemp, dma_handle);
177 
178 	return count;
179 }
180 
181 static const struct file_operations altr_sdr_mc_debug_inject_fops = {
182 	.open = simple_open,
183 	.write = altr_sdr_mc_err_inject_write,
184 	.llseek = generic_file_llseek,
185 };
186 
187 static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci)
188 {
189 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
190 		return;
191 
192 	if (!mci->debugfs)
193 		return;
194 
195 	edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
196 				 &altr_sdr_mc_debug_inject_fops);
197 }
198 
199 /* Get total memory size from Open Firmware DTB */
200 static unsigned long get_total_mem(void)
201 {
202 	struct device_node *np = NULL;
203 	const unsigned int *reg, *reg_end;
204 	int len, sw, aw;
205 	unsigned long start, size, total_mem = 0;
206 
207 	for_each_node_by_type(np, "memory") {
208 		aw = of_n_addr_cells(np);
209 		sw = of_n_size_cells(np);
210 		reg = (const unsigned int *)of_get_property(np, "reg", &len);
211 		reg_end = reg + (len / sizeof(u32));
212 
213 		total_mem = 0;
214 		do {
215 			start = of_read_number(reg, aw);
216 			reg += aw;
217 			size = of_read_number(reg, sw);
218 			reg += sw;
219 			total_mem += size;
220 		} while (reg < reg_end);
221 	}
222 	edac_dbg(0, "total_mem 0x%lx\n", total_mem);
223 	return total_mem;
224 }
225 
226 static const struct of_device_id altr_sdram_ctrl_of_match[] = {
227 	{ .compatible = "altr,sdram-edac", .data = (void *)&c5_data},
228 	{ .compatible = "altr,sdram-edac-a10", .data = (void *)&a10_data},
229 	{},
230 };
231 MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
232 
233 static int a10_init(struct regmap *mc_vbase)
234 {
235 	if (regmap_update_bits(mc_vbase, A10_INTMODE_OFST,
236 			       A10_INTMODE_SB_INT, A10_INTMODE_SB_INT)) {
237 		edac_printk(KERN_ERR, EDAC_MC,
238 			    "Error setting SB IRQ mode\n");
239 		return -ENODEV;
240 	}
241 
242 	if (regmap_write(mc_vbase, A10_SERRCNTREG_OFST, 1)) {
243 		edac_printk(KERN_ERR, EDAC_MC,
244 			    "Error setting trigger count\n");
245 		return -ENODEV;
246 	}
247 
248 	return 0;
249 }
250 
251 static int a10_unmask_irq(struct platform_device *pdev, u32 mask)
252 {
253 	void __iomem  *sm_base;
254 	int  ret = 0;
255 
256 	if (!request_mem_region(A10_SYMAN_INTMASK_CLR, sizeof(u32),
257 				dev_name(&pdev->dev))) {
258 		edac_printk(KERN_ERR, EDAC_MC,
259 			    "Unable to request mem region\n");
260 		return -EBUSY;
261 	}
262 
263 	sm_base = ioremap(A10_SYMAN_INTMASK_CLR, sizeof(u32));
264 	if (!sm_base) {
265 		edac_printk(KERN_ERR, EDAC_MC,
266 			    "Unable to ioremap device\n");
267 
268 		ret = -ENOMEM;
269 		goto release;
270 	}
271 
272 	iowrite32(mask, sm_base);
273 
274 	iounmap(sm_base);
275 
276 release:
277 	release_mem_region(A10_SYMAN_INTMASK_CLR, sizeof(u32));
278 
279 	return ret;
280 }
281 
282 static int altr_sdram_probe(struct platform_device *pdev)
283 {
284 	const struct of_device_id *id;
285 	struct edac_mc_layer layers[2];
286 	struct mem_ctl_info *mci;
287 	struct altr_sdram_mc_data *drvdata;
288 	const struct altr_sdram_prv_data *priv;
289 	struct regmap *mc_vbase;
290 	struct dimm_info *dimm;
291 	u32 read_reg;
292 	int irq, irq2, res = 0;
293 	unsigned long mem_size, irqflags = 0;
294 
295 	id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev);
296 	if (!id)
297 		return -ENODEV;
298 
299 	/* Grab the register range from the sdr controller in device tree */
300 	mc_vbase = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
301 						   "altr,sdr-syscon");
302 	if (IS_ERR(mc_vbase)) {
303 		edac_printk(KERN_ERR, EDAC_MC,
304 			    "regmap for altr,sdr-syscon lookup failed.\n");
305 		return -ENODEV;
306 	}
307 
308 	/* Check specific dependencies for the module */
309 	priv = of_match_node(altr_sdram_ctrl_of_match,
310 			     pdev->dev.of_node)->data;
311 
312 	/* Validate the SDRAM controller has ECC enabled */
313 	if (regmap_read(mc_vbase, priv->ecc_ctrl_offset, &read_reg) ||
314 	    ((read_reg & priv->ecc_ctl_en_mask) != priv->ecc_ctl_en_mask)) {
315 		edac_printk(KERN_ERR, EDAC_MC,
316 			    "No ECC/ECC disabled [0x%08X]\n", read_reg);
317 		return -ENODEV;
318 	}
319 
320 	/* Grab memory size from device tree. */
321 	mem_size = get_total_mem();
322 	if (!mem_size) {
323 		edac_printk(KERN_ERR, EDAC_MC, "Unable to calculate memory size\n");
324 		return -ENODEV;
325 	}
326 
327 	/* Ensure the SDRAM Interrupt is disabled */
328 	if (regmap_update_bits(mc_vbase, priv->ecc_irq_en_offset,
329 			       priv->ecc_irq_en_mask, 0)) {
330 		edac_printk(KERN_ERR, EDAC_MC,
331 			    "Error disabling SDRAM ECC IRQ\n");
332 		return -ENODEV;
333 	}
334 
335 	/* Toggle to clear the SDRAM Error count */
336 	if (regmap_update_bits(mc_vbase, priv->ecc_cnt_rst_offset,
337 			       priv->ecc_cnt_rst_mask,
338 			       priv->ecc_cnt_rst_mask)) {
339 		edac_printk(KERN_ERR, EDAC_MC,
340 			    "Error clearing SDRAM ECC count\n");
341 		return -ENODEV;
342 	}
343 
344 	if (regmap_update_bits(mc_vbase, priv->ecc_cnt_rst_offset,
345 			       priv->ecc_cnt_rst_mask, 0)) {
346 		edac_printk(KERN_ERR, EDAC_MC,
347 			    "Error clearing SDRAM ECC count\n");
348 		return -ENODEV;
349 	}
350 
351 	irq = platform_get_irq(pdev, 0);
352 	if (irq < 0) {
353 		edac_printk(KERN_ERR, EDAC_MC,
354 			    "No irq %d in DT\n", irq);
355 		return -ENODEV;
356 	}
357 
358 	/* Arria10 has a 2nd IRQ */
359 	irq2 = platform_get_irq(pdev, 1);
360 
361 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
362 	layers[0].size = 1;
363 	layers[0].is_virt_csrow = true;
364 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
365 	layers[1].size = 1;
366 	layers[1].is_virt_csrow = false;
367 	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
368 			    sizeof(struct altr_sdram_mc_data));
369 	if (!mci)
370 		return -ENOMEM;
371 
372 	mci->pdev = &pdev->dev;
373 	drvdata = mci->pvt_info;
374 	drvdata->mc_vbase = mc_vbase;
375 	drvdata->data = priv;
376 	platform_set_drvdata(pdev, mci);
377 
378 	if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
379 		edac_printk(KERN_ERR, EDAC_MC,
380 			    "Unable to get managed device resource\n");
381 		res = -ENOMEM;
382 		goto free;
383 	}
384 
385 	mci->mtype_cap = MEM_FLAG_DDR3;
386 	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
387 	mci->edac_cap = EDAC_FLAG_SECDED;
388 	mci->mod_name = EDAC_MOD_STR;
389 	mci->mod_ver = EDAC_VERSION;
390 	mci->ctl_name = dev_name(&pdev->dev);
391 	mci->scrub_mode = SCRUB_SW_SRC;
392 	mci->dev_name = dev_name(&pdev->dev);
393 
394 	dimm = *mci->dimms;
395 	dimm->nr_pages = ((mem_size - 1) >> PAGE_SHIFT) + 1;
396 	dimm->grain = 8;
397 	dimm->dtype = DEV_X8;
398 	dimm->mtype = MEM_DDR3;
399 	dimm->edac_mode = EDAC_SECDED;
400 
401 	res = edac_mc_add_mc(mci);
402 	if (res < 0)
403 		goto err;
404 
405 	/* Only the Arria10 has separate IRQs */
406 	if (irq2 > 0) {
407 		/* Arria10 specific initialization */
408 		res = a10_init(mc_vbase);
409 		if (res < 0)
410 			goto err2;
411 
412 		res = devm_request_irq(&pdev->dev, irq2,
413 				       altr_sdram_mc_err_handler,
414 				       IRQF_SHARED, dev_name(&pdev->dev), mci);
415 		if (res < 0) {
416 			edac_mc_printk(mci, KERN_ERR,
417 				       "Unable to request irq %d\n", irq2);
418 			res = -ENODEV;
419 			goto err2;
420 		}
421 
422 		res = a10_unmask_irq(pdev, A10_DDR0_IRQ_MASK);
423 		if (res < 0)
424 			goto err2;
425 
426 		irqflags = IRQF_SHARED;
427 	}
428 
429 	res = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler,
430 			       irqflags, dev_name(&pdev->dev), mci);
431 	if (res < 0) {
432 		edac_mc_printk(mci, KERN_ERR,
433 			       "Unable to request irq %d\n", irq);
434 		res = -ENODEV;
435 		goto err2;
436 	}
437 
438 	/* Infrastructure ready - enable the IRQ */
439 	if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset,
440 			       priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) {
441 		edac_mc_printk(mci, KERN_ERR,
442 			       "Error enabling SDRAM ECC IRQ\n");
443 		res = -ENODEV;
444 		goto err2;
445 	}
446 
447 	altr_sdr_mc_create_debugfs_nodes(mci);
448 
449 	devres_close_group(&pdev->dev, NULL);
450 
451 	return 0;
452 
453 err2:
454 	edac_mc_del_mc(&pdev->dev);
455 err:
456 	devres_release_group(&pdev->dev, NULL);
457 free:
458 	edac_mc_free(mci);
459 	edac_printk(KERN_ERR, EDAC_MC,
460 		    "EDAC Probe Failed; Error %d\n", res);
461 
462 	return res;
463 }
464 
465 static int altr_sdram_remove(struct platform_device *pdev)
466 {
467 	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
468 
469 	edac_mc_del_mc(&pdev->dev);
470 	edac_mc_free(mci);
471 	platform_set_drvdata(pdev, NULL);
472 
473 	return 0;
474 }
475 
476 /*
477  * If you want to suspend, need to disable EDAC by removing it
478  * from the device tree or defconfig.
479  */
480 #ifdef CONFIG_PM
481 static int altr_sdram_prepare(struct device *dev)
482 {
483 	pr_err("Suspend not allowed when EDAC is enabled.\n");
484 
485 	return -EPERM;
486 }
487 
488 static const struct dev_pm_ops altr_sdram_pm_ops = {
489 	.prepare = altr_sdram_prepare,
490 };
491 #endif
492 
493 static struct platform_driver altr_sdram_edac_driver = {
494 	.probe = altr_sdram_probe,
495 	.remove = altr_sdram_remove,
496 	.driver = {
497 		.name = "altr_sdram_edac",
498 #ifdef CONFIG_PM
499 		.pm = &altr_sdram_pm_ops,
500 #endif
501 		.of_match_table = altr_sdram_ctrl_of_match,
502 	},
503 };
504 
505 module_platform_driver(altr_sdram_edac_driver);
506 
507 MODULE_LICENSE("GPL v2");
508 MODULE_AUTHOR("Thor Thayer");
509 MODULE_DESCRIPTION("EDAC Driver for Altera SDRAM Controller");
510