xref: /openbmc/linux/drivers/edac/altera_edac.c (revision 92b19ff5)
1 /*
2  *  Copyright Altera Corporation (C) 2014-2015. All rights reserved.
3  *  Copyright 2011-2012 Calxeda, Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program.  If not, see <http://www.gnu.org/licenses/>.
16  *
17  * Adapted from the highbank_mc_edac driver.
18  */
19 
20 #include <linux/ctype.h>
21 #include <linux/edac.h>
22 #include <linux/interrupt.h>
23 #include <linux/kernel.h>
24 #include <linux/mfd/syscon.h>
25 #include <linux/of_platform.h>
26 #include <linux/platform_device.h>
27 #include <linux/regmap.h>
28 #include <linux/types.h>
29 #include <linux/uaccess.h>
30 
31 #include "altera_edac.h"
32 #include "edac_core.h"
33 #include "edac_module.h"
34 
35 #define EDAC_MOD_STR		"altera_edac"
36 #define EDAC_VERSION		"1"
37 
38 static const struct altr_sdram_prv_data c5_data = {
39 	.ecc_ctrl_offset    = CV_CTLCFG_OFST,
40 	.ecc_ctl_en_mask    = CV_CTLCFG_ECC_AUTO_EN,
41 	.ecc_stat_offset    = CV_DRAMSTS_OFST,
42 	.ecc_stat_ce_mask   = CV_DRAMSTS_SBEERR,
43 	.ecc_stat_ue_mask   = CV_DRAMSTS_DBEERR,
44 	.ecc_saddr_offset   = CV_ERRADDR_OFST,
45 	.ecc_daddr_offset   = CV_ERRADDR_OFST,
46 	.ecc_cecnt_offset   = CV_SBECOUNT_OFST,
47 	.ecc_uecnt_offset   = CV_DBECOUNT_OFST,
48 	.ecc_irq_en_offset  = CV_DRAMINTR_OFST,
49 	.ecc_irq_en_mask    = CV_DRAMINTR_INTREN,
50 	.ecc_irq_clr_offset = CV_DRAMINTR_OFST,
51 	.ecc_irq_clr_mask   = (CV_DRAMINTR_INTRCLR | CV_DRAMINTR_INTREN),
52 	.ecc_cnt_rst_offset = CV_DRAMINTR_OFST,
53 	.ecc_cnt_rst_mask   = CV_DRAMINTR_INTRCLR,
54 #ifdef CONFIG_EDAC_DEBUG
55 	.ce_ue_trgr_offset  = CV_CTLCFG_OFST,
56 	.ce_set_mask        = CV_CTLCFG_GEN_SB_ERR,
57 	.ue_set_mask        = CV_CTLCFG_GEN_DB_ERR,
58 #endif
59 };
60 
61 static const struct altr_sdram_prv_data a10_data = {
62 	.ecc_ctrl_offset    = A10_ECCCTRL1_OFST,
63 	.ecc_ctl_en_mask    = A10_ECCCTRL1_ECC_EN,
64 	.ecc_stat_offset    = A10_INTSTAT_OFST,
65 	.ecc_stat_ce_mask   = A10_INTSTAT_SBEERR,
66 	.ecc_stat_ue_mask   = A10_INTSTAT_DBEERR,
67 	.ecc_saddr_offset   = A10_SERRADDR_OFST,
68 	.ecc_daddr_offset   = A10_DERRADDR_OFST,
69 	.ecc_irq_en_offset  = A10_ERRINTEN_OFST,
70 	.ecc_irq_en_mask    = A10_ECC_IRQ_EN_MASK,
71 	.ecc_irq_clr_offset = A10_INTSTAT_OFST,
72 	.ecc_irq_clr_mask   = (A10_INTSTAT_SBEERR | A10_INTSTAT_DBEERR),
73 	.ecc_cnt_rst_offset = A10_ECCCTRL1_OFST,
74 	.ecc_cnt_rst_mask   = A10_ECC_CNT_RESET_MASK,
75 #ifdef CONFIG_EDAC_DEBUG
76 	.ce_ue_trgr_offset  = A10_DIAGINTTEST_OFST,
77 	.ce_set_mask        = A10_DIAGINT_TSERRA_MASK,
78 	.ue_set_mask        = A10_DIAGINT_TDERRA_MASK,
79 #endif
80 };
81 
82 static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
83 {
84 	struct mem_ctl_info *mci = dev_id;
85 	struct altr_sdram_mc_data *drvdata = mci->pvt_info;
86 	const struct altr_sdram_prv_data *priv = drvdata->data;
87 	u32 status, err_count = 1, err_addr;
88 
89 	regmap_read(drvdata->mc_vbase, priv->ecc_stat_offset, &status);
90 
91 	if (status & priv->ecc_stat_ue_mask) {
92 		regmap_read(drvdata->mc_vbase, priv->ecc_daddr_offset,
93 			    &err_addr);
94 		if (priv->ecc_uecnt_offset)
95 			regmap_read(drvdata->mc_vbase, priv->ecc_uecnt_offset,
96 				    &err_count);
97 		panic("\nEDAC: [%d Uncorrectable errors @ 0x%08X]\n",
98 		      err_count, err_addr);
99 	}
100 	if (status & priv->ecc_stat_ce_mask) {
101 		regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset,
102 			    &err_addr);
103 		if (priv->ecc_uecnt_offset)
104 			regmap_read(drvdata->mc_vbase,  priv->ecc_cecnt_offset,
105 				    &err_count);
106 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count,
107 				     err_addr >> PAGE_SHIFT,
108 				     err_addr & ~PAGE_MASK, 0,
109 				     0, 0, -1, mci->ctl_name, "");
110 		/* Clear IRQ to resume */
111 		regmap_write(drvdata->mc_vbase,	priv->ecc_irq_clr_offset,
112 			     priv->ecc_irq_clr_mask);
113 
114 		return IRQ_HANDLED;
115 	}
116 	return IRQ_NONE;
117 }
118 
119 #ifdef CONFIG_EDAC_DEBUG
120 static ssize_t altr_sdr_mc_err_inject_write(struct file *file,
121 					    const char __user *data,
122 					    size_t count, loff_t *ppos)
123 {
124 	struct mem_ctl_info *mci = file->private_data;
125 	struct altr_sdram_mc_data *drvdata = mci->pvt_info;
126 	const struct altr_sdram_prv_data *priv = drvdata->data;
127 	u32 *ptemp;
128 	dma_addr_t dma_handle;
129 	u32 reg, read_reg;
130 
131 	ptemp = dma_alloc_coherent(mci->pdev, 16, &dma_handle, GFP_KERNEL);
132 	if (!ptemp) {
133 		dma_free_coherent(mci->pdev, 16, ptemp, dma_handle);
134 		edac_printk(KERN_ERR, EDAC_MC,
135 			    "Inject: Buffer Allocation error\n");
136 		return -ENOMEM;
137 	}
138 
139 	regmap_read(drvdata->mc_vbase, priv->ce_ue_trgr_offset,
140 		    &read_reg);
141 	read_reg &= ~(priv->ce_set_mask | priv->ue_set_mask);
142 
143 	/* Error are injected by writing a word while the SBE or DBE
144 	 * bit in the CTLCFG register is set. Reading the word will
145 	 * trigger the SBE or DBE error and the corresponding IRQ.
146 	 */
147 	if (count == 3) {
148 		edac_printk(KERN_ALERT, EDAC_MC,
149 			    "Inject Double bit error\n");
150 		regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset,
151 			     (read_reg | priv->ue_set_mask));
152 	} else {
153 		edac_printk(KERN_ALERT, EDAC_MC,
154 			    "Inject Single bit error\n");
155 		regmap_write(drvdata->mc_vbase,	priv->ce_ue_trgr_offset,
156 			     (read_reg | priv->ce_set_mask));
157 	}
158 
159 	ptemp[0] = 0x5A5A5A5A;
160 	ptemp[1] = 0xA5A5A5A5;
161 
162 	/* Clear the error injection bits */
163 	regmap_write(drvdata->mc_vbase,	priv->ce_ue_trgr_offset, read_reg);
164 	/* Ensure it has been written out */
165 	wmb();
166 
167 	/*
168 	 * To trigger the error, we need to read the data back
169 	 * (the data was written with errors above).
170 	 * The ACCESS_ONCE macros and printk are used to prevent the
171 	 * the compiler optimizing these reads out.
172 	 */
173 	reg = ACCESS_ONCE(ptemp[0]);
174 	read_reg = ACCESS_ONCE(ptemp[1]);
175 	/* Force Read */
176 	rmb();
177 
178 	edac_printk(KERN_ALERT, EDAC_MC, "Read Data [0x%X, 0x%X]\n",
179 		    reg, read_reg);
180 
181 	dma_free_coherent(mci->pdev, 16, ptemp, dma_handle);
182 
183 	return count;
184 }
185 
186 static const struct file_operations altr_sdr_mc_debug_inject_fops = {
187 	.open = simple_open,
188 	.write = altr_sdr_mc_err_inject_write,
189 	.llseek = generic_file_llseek,
190 };
191 
192 static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci)
193 {
194 	if (mci->debugfs)
195 		debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
196 				    &altr_sdr_mc_debug_inject_fops);
197 }
198 #else
199 static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci)
200 {}
201 #endif
202 
203 /* Get total memory size from Open Firmware DTB */
204 static unsigned long get_total_mem(void)
205 {
206 	struct device_node *np = NULL;
207 	const unsigned int *reg, *reg_end;
208 	int len, sw, aw;
209 	unsigned long start, size, total_mem = 0;
210 
211 	for_each_node_by_type(np, "memory") {
212 		aw = of_n_addr_cells(np);
213 		sw = of_n_size_cells(np);
214 		reg = (const unsigned int *)of_get_property(np, "reg", &len);
215 		reg_end = reg + (len / sizeof(u32));
216 
217 		total_mem = 0;
218 		do {
219 			start = of_read_number(reg, aw);
220 			reg += aw;
221 			size = of_read_number(reg, sw);
222 			reg += sw;
223 			total_mem += size;
224 		} while (reg < reg_end);
225 	}
226 	edac_dbg(0, "total_mem 0x%lx\n", total_mem);
227 	return total_mem;
228 }
229 
230 static const struct of_device_id altr_sdram_ctrl_of_match[] = {
231 	{ .compatible = "altr,sdram-edac", .data = (void *)&c5_data},
232 	{ .compatible = "altr,sdram-edac-a10", .data = (void *)&a10_data},
233 	{},
234 };
235 MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
236 
237 static int a10_init(struct regmap *mc_vbase)
238 {
239 	if (regmap_update_bits(mc_vbase, A10_INTMODE_OFST,
240 			       A10_INTMODE_SB_INT, A10_INTMODE_SB_INT)) {
241 		edac_printk(KERN_ERR, EDAC_MC,
242 			    "Error setting SB IRQ mode\n");
243 		return -ENODEV;
244 	}
245 
246 	if (regmap_write(mc_vbase, A10_SERRCNTREG_OFST, 1)) {
247 		edac_printk(KERN_ERR, EDAC_MC,
248 			    "Error setting trigger count\n");
249 		return -ENODEV;
250 	}
251 
252 	return 0;
253 }
254 
255 static int a10_unmask_irq(struct platform_device *pdev, u32 mask)
256 {
257 	void __iomem  *sm_base;
258 	int  ret = 0;
259 
260 	if (!request_mem_region(A10_SYMAN_INTMASK_CLR, sizeof(u32),
261 				dev_name(&pdev->dev))) {
262 		edac_printk(KERN_ERR, EDAC_MC,
263 			    "Unable to request mem region\n");
264 		return -EBUSY;
265 	}
266 
267 	sm_base = ioremap(A10_SYMAN_INTMASK_CLR, sizeof(u32));
268 	if (!sm_base) {
269 		edac_printk(KERN_ERR, EDAC_MC,
270 			    "Unable to ioremap device\n");
271 
272 		ret = -ENOMEM;
273 		goto release;
274 	}
275 
276 	iowrite32(mask, sm_base);
277 
278 	iounmap(sm_base);
279 
280 release:
281 	release_mem_region(A10_SYMAN_INTMASK_CLR, sizeof(u32));
282 
283 	return ret;
284 }
285 
286 static int altr_sdram_probe(struct platform_device *pdev)
287 {
288 	const struct of_device_id *id;
289 	struct edac_mc_layer layers[2];
290 	struct mem_ctl_info *mci;
291 	struct altr_sdram_mc_data *drvdata;
292 	const struct altr_sdram_prv_data *priv;
293 	struct regmap *mc_vbase;
294 	struct dimm_info *dimm;
295 	u32 read_reg;
296 	int irq, irq2, res = 0;
297 	unsigned long mem_size, irqflags = 0;
298 
299 	id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev);
300 	if (!id)
301 		return -ENODEV;
302 
303 	/* Grab the register range from the sdr controller in device tree */
304 	mc_vbase = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
305 						   "altr,sdr-syscon");
306 	if (IS_ERR(mc_vbase)) {
307 		edac_printk(KERN_ERR, EDAC_MC,
308 			    "regmap for altr,sdr-syscon lookup failed.\n");
309 		return -ENODEV;
310 	}
311 
312 	/* Check specific dependencies for the module */
313 	priv = of_match_node(altr_sdram_ctrl_of_match,
314 			     pdev->dev.of_node)->data;
315 
316 	/* Validate the SDRAM controller has ECC enabled */
317 	if (regmap_read(mc_vbase, priv->ecc_ctrl_offset, &read_reg) ||
318 	    ((read_reg & priv->ecc_ctl_en_mask) != priv->ecc_ctl_en_mask)) {
319 		edac_printk(KERN_ERR, EDAC_MC,
320 			    "No ECC/ECC disabled [0x%08X]\n", read_reg);
321 		return -ENODEV;
322 	}
323 
324 	/* Grab memory size from device tree. */
325 	mem_size = get_total_mem();
326 	if (!mem_size) {
327 		edac_printk(KERN_ERR, EDAC_MC, "Unable to calculate memory size\n");
328 		return -ENODEV;
329 	}
330 
331 	/* Ensure the SDRAM Interrupt is disabled */
332 	if (regmap_update_bits(mc_vbase, priv->ecc_irq_en_offset,
333 			       priv->ecc_irq_en_mask, 0)) {
334 		edac_printk(KERN_ERR, EDAC_MC,
335 			    "Error disabling SDRAM ECC IRQ\n");
336 		return -ENODEV;
337 	}
338 
339 	/* Toggle to clear the SDRAM Error count */
340 	if (regmap_update_bits(mc_vbase, priv->ecc_cnt_rst_offset,
341 			       priv->ecc_cnt_rst_mask,
342 			       priv->ecc_cnt_rst_mask)) {
343 		edac_printk(KERN_ERR, EDAC_MC,
344 			    "Error clearing SDRAM ECC count\n");
345 		return -ENODEV;
346 	}
347 
348 	if (regmap_update_bits(mc_vbase, priv->ecc_cnt_rst_offset,
349 			       priv->ecc_cnt_rst_mask, 0)) {
350 		edac_printk(KERN_ERR, EDAC_MC,
351 			    "Error clearing SDRAM ECC count\n");
352 		return -ENODEV;
353 	}
354 
355 	irq = platform_get_irq(pdev, 0);
356 	if (irq < 0) {
357 		edac_printk(KERN_ERR, EDAC_MC,
358 			    "No irq %d in DT\n", irq);
359 		return -ENODEV;
360 	}
361 
362 	/* Arria10 has a 2nd IRQ */
363 	irq2 = platform_get_irq(pdev, 1);
364 
365 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
366 	layers[0].size = 1;
367 	layers[0].is_virt_csrow = true;
368 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
369 	layers[1].size = 1;
370 	layers[1].is_virt_csrow = false;
371 	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
372 			    sizeof(struct altr_sdram_mc_data));
373 	if (!mci)
374 		return -ENOMEM;
375 
376 	mci->pdev = &pdev->dev;
377 	drvdata = mci->pvt_info;
378 	drvdata->mc_vbase = mc_vbase;
379 	drvdata->data = priv;
380 	platform_set_drvdata(pdev, mci);
381 
382 	if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
383 		edac_printk(KERN_ERR, EDAC_MC,
384 			    "Unable to get managed device resource\n");
385 		res = -ENOMEM;
386 		goto free;
387 	}
388 
389 	mci->mtype_cap = MEM_FLAG_DDR3;
390 	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
391 	mci->edac_cap = EDAC_FLAG_SECDED;
392 	mci->mod_name = EDAC_MOD_STR;
393 	mci->mod_ver = EDAC_VERSION;
394 	mci->ctl_name = dev_name(&pdev->dev);
395 	mci->scrub_mode = SCRUB_SW_SRC;
396 	mci->dev_name = dev_name(&pdev->dev);
397 
398 	dimm = *mci->dimms;
399 	dimm->nr_pages = ((mem_size - 1) >> PAGE_SHIFT) + 1;
400 	dimm->grain = 8;
401 	dimm->dtype = DEV_X8;
402 	dimm->mtype = MEM_DDR3;
403 	dimm->edac_mode = EDAC_SECDED;
404 
405 	res = edac_mc_add_mc(mci);
406 	if (res < 0)
407 		goto err;
408 
409 	/* Only the Arria10 has separate IRQs */
410 	if (irq2 > 0) {
411 		/* Arria10 specific initialization */
412 		res = a10_init(mc_vbase);
413 		if (res < 0)
414 			goto err2;
415 
416 		res = devm_request_irq(&pdev->dev, irq2,
417 				       altr_sdram_mc_err_handler,
418 				       IRQF_SHARED, dev_name(&pdev->dev), mci);
419 		if (res < 0) {
420 			edac_mc_printk(mci, KERN_ERR,
421 				       "Unable to request irq %d\n", irq2);
422 			res = -ENODEV;
423 			goto err2;
424 		}
425 
426 		res = a10_unmask_irq(pdev, A10_DDR0_IRQ_MASK);
427 		if (res < 0)
428 			goto err2;
429 
430 		irqflags = IRQF_SHARED;
431 	}
432 
433 	res = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler,
434 			       irqflags, dev_name(&pdev->dev), mci);
435 	if (res < 0) {
436 		edac_mc_printk(mci, KERN_ERR,
437 			       "Unable to request irq %d\n", irq);
438 		res = -ENODEV;
439 		goto err2;
440 	}
441 
442 	/* Infrastructure ready - enable the IRQ */
443 	if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset,
444 			       priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) {
445 		edac_mc_printk(mci, KERN_ERR,
446 			       "Error enabling SDRAM ECC IRQ\n");
447 		res = -ENODEV;
448 		goto err2;
449 	}
450 
451 	altr_sdr_mc_create_debugfs_nodes(mci);
452 
453 	devres_close_group(&pdev->dev, NULL);
454 
455 	return 0;
456 
457 err2:
458 	edac_mc_del_mc(&pdev->dev);
459 err:
460 	devres_release_group(&pdev->dev, NULL);
461 free:
462 	edac_mc_free(mci);
463 	edac_printk(KERN_ERR, EDAC_MC,
464 		    "EDAC Probe Failed; Error %d\n", res);
465 
466 	return res;
467 }
468 
469 static int altr_sdram_remove(struct platform_device *pdev)
470 {
471 	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
472 
473 	edac_mc_del_mc(&pdev->dev);
474 	edac_mc_free(mci);
475 	platform_set_drvdata(pdev, NULL);
476 
477 	return 0;
478 }
479 
480 /*
481  * If you want to suspend, need to disable EDAC by removing it
482  * from the device tree or defconfig.
483  */
484 #ifdef CONFIG_PM
485 static int altr_sdram_prepare(struct device *dev)
486 {
487 	pr_err("Suspend not allowed when EDAC is enabled.\n");
488 
489 	return -EPERM;
490 }
491 
492 static const struct dev_pm_ops altr_sdram_pm_ops = {
493 	.prepare = altr_sdram_prepare,
494 };
495 #endif
496 
497 static struct platform_driver altr_sdram_edac_driver = {
498 	.probe = altr_sdram_probe,
499 	.remove = altr_sdram_remove,
500 	.driver = {
501 		.name = "altr_sdram_edac",
502 #ifdef CONFIG_PM
503 		.pm = &altr_sdram_pm_ops,
504 #endif
505 		.of_match_table = altr_sdram_ctrl_of_match,
506 	},
507 };
508 
509 module_platform_driver(altr_sdram_edac_driver);
510 
511 MODULE_LICENSE("GPL v2");
512 MODULE_AUTHOR("Thor Thayer");
513 MODULE_DESCRIPTION("EDAC Driver for Altera SDRAM Controller");
514