xref: /openbmc/linux/drivers/edac/al_mc_edac.c (revision e15a5365)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4  */
5 #include <linux/bitfield.h>
6 #include <linux/bitops.h>
7 #include <linux/edac.h>
8 #include <linux/of_irq.h>
9 #include <linux/platform_device.h>
10 #include <linux/spinlock.h>
11 #include "edac_module.h"
12 
13 /* Registers Offset */
14 #define AL_MC_ECC_CFG		0x70
15 #define AL_MC_ECC_CLEAR		0x7c
16 #define AL_MC_ECC_ERR_COUNT	0x80
17 #define AL_MC_ECC_CE_ADDR0	0x84
18 #define AL_MC_ECC_CE_ADDR1	0x88
19 #define AL_MC_ECC_UE_ADDR0	0xa4
20 #define AL_MC_ECC_UE_ADDR1	0xa8
21 #define AL_MC_ECC_CE_SYND0	0x8c
22 #define AL_MC_ECC_CE_SYND1	0x90
23 #define AL_MC_ECC_CE_SYND2	0x94
24 #define AL_MC_ECC_UE_SYND0	0xac
25 #define AL_MC_ECC_UE_SYND1	0xb0
26 #define AL_MC_ECC_UE_SYND2	0xb4
27 
28 /* Registers Fields */
29 #define AL_MC_ECC_CFG_SCRUB_DISABLED	BIT(4)
30 
31 #define AL_MC_ECC_CLEAR_UE_COUNT	BIT(3)
32 #define AL_MC_ECC_CLEAR_CE_COUNT	BIT(2)
33 #define AL_MC_ECC_CLEAR_UE_ERR		BIT(1)
34 #define AL_MC_ECC_CLEAR_CE_ERR		BIT(0)
35 
36 #define AL_MC_ECC_ERR_COUNT_UE		GENMASK(31, 16)
37 #define AL_MC_ECC_ERR_COUNT_CE		GENMASK(15, 0)
38 
39 #define AL_MC_ECC_CE_ADDR0_RANK		GENMASK(25, 24)
40 #define AL_MC_ECC_CE_ADDR0_ROW		GENMASK(17, 0)
41 
42 #define AL_MC_ECC_CE_ADDR1_BG		GENMASK(25, 24)
43 #define AL_MC_ECC_CE_ADDR1_BANK		GENMASK(18, 16)
44 #define AL_MC_ECC_CE_ADDR1_COLUMN	GENMASK(11, 0)
45 
46 #define AL_MC_ECC_UE_ADDR0_RANK		GENMASK(25, 24)
47 #define AL_MC_ECC_UE_ADDR0_ROW		GENMASK(17, 0)
48 
49 #define AL_MC_ECC_UE_ADDR1_BG		GENMASK(25, 24)
50 #define AL_MC_ECC_UE_ADDR1_BANK		GENMASK(18, 16)
51 #define AL_MC_ECC_UE_ADDR1_COLUMN	GENMASK(11, 0)
52 
53 #define DRV_NAME "al_mc_edac"
54 #define AL_MC_EDAC_MSG_MAX 256
55 
56 struct al_mc_edac {
57 	void __iomem *mmio_base;
58 	spinlock_t lock;
59 	int irq_ce;
60 	int irq_ue;
61 };
62 
63 static void prepare_msg(char *message, size_t buffer_size,
64 			enum hw_event_mc_err_type type,
65 			u8 rank, u32 row, u8 bg, u8 bank, u16 column,
66 			u32 syn0, u32 syn1, u32 syn2)
67 {
68 	snprintf(message, buffer_size,
69 		 "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x",
70 		 type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE",
71 		 rank, row, bg, bank, column, syn0, syn1, syn2);
72 }
73 
74 static int handle_ce(struct mem_ctl_info *mci)
75 {
76 	u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row;
77 	struct al_mc_edac *al_mc = mci->pvt_info;
78 	char msg[AL_MC_EDAC_MSG_MAX];
79 	u16 ce_count, column;
80 	unsigned long flags;
81 	u8 rank, bg, bank;
82 
83 	eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
84 	ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt);
85 	if (!ce_count)
86 		return 0;
87 
88 	ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0);
89 	ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1);
90 	ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0);
91 	ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1);
92 	ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2);
93 
94 	writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR,
95 		       al_mc->mmio_base + AL_MC_ECC_CLEAR);
96 
97 	dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
98 		ecccaddr0, ecccaddr1);
99 
100 	rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0);
101 	row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0);
102 
103 	bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1);
104 	bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1);
105 	column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1);
106 
107 	prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED,
108 		    rank, row, bg, bank, column,
109 		    ecccsyn0, ecccsyn1, ecccsyn2);
110 
111 	spin_lock_irqsave(&al_mc->lock, flags);
112 	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
113 			     ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
114 	spin_unlock_irqrestore(&al_mc->lock, flags);
115 
116 	return ce_count;
117 }
118 
119 static int handle_ue(struct mem_ctl_info *mci)
120 {
121 	u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row;
122 	struct al_mc_edac *al_mc = mci->pvt_info;
123 	char msg[AL_MC_EDAC_MSG_MAX];
124 	u16 ue_count, column;
125 	unsigned long flags;
126 	u8 rank, bg, bank;
127 
128 	eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
129 	ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt);
130 	if (!ue_count)
131 		return 0;
132 
133 	eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0);
134 	eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1);
135 	eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0);
136 	eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1);
137 	eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2);
138 
139 	writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR,
140 		       al_mc->mmio_base + AL_MC_ECC_CLEAR);
141 
142 	dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
143 		eccuaddr0, eccuaddr1);
144 
145 	rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0);
146 	row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0);
147 
148 	bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1);
149 	bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1);
150 	column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1);
151 
152 	prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED,
153 		    rank, row, bg, bank, column,
154 		    eccusyn0, eccusyn1, eccusyn2);
155 
156 	spin_lock_irqsave(&al_mc->lock, flags);
157 	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
158 			     ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
159 	spin_unlock_irqrestore(&al_mc->lock, flags);
160 
161 	return ue_count;
162 }
163 
164 static void al_mc_edac_check(struct mem_ctl_info *mci)
165 {
166 	struct al_mc_edac *al_mc = mci->pvt_info;
167 
168 	if (al_mc->irq_ue <= 0)
169 		handle_ue(mci);
170 
171 	if (al_mc->irq_ce <= 0)
172 		handle_ce(mci);
173 }
174 
175 static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info)
176 {
177 	struct platform_device *pdev = info;
178 	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
179 
180 	if (handle_ue(mci))
181 		return IRQ_HANDLED;
182 	return IRQ_NONE;
183 }
184 
185 static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info)
186 {
187 	struct platform_device *pdev = info;
188 	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
189 
190 	if (handle_ce(mci))
191 		return IRQ_HANDLED;
192 	return IRQ_NONE;
193 }
194 
195 static enum scrub_type get_scrub_mode(void __iomem *mmio_base)
196 {
197 	u32 ecccfg0;
198 
199 	ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG);
200 
201 	if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0))
202 		return SCRUB_NONE;
203 	else
204 		return SCRUB_HW_SRC;
205 }
206 
207 static void devm_al_mc_edac_free(void *data)
208 {
209 	edac_mc_free(data);
210 }
211 
212 static void devm_al_mc_edac_del(void *data)
213 {
214 	edac_mc_del_mc(data);
215 }
216 
217 static int al_mc_edac_probe(struct platform_device *pdev)
218 {
219 	struct edac_mc_layer layers[1];
220 	struct mem_ctl_info *mci;
221 	struct al_mc_edac *al_mc;
222 	void __iomem *mmio_base;
223 	struct dimm_info *dimm;
224 	int ret;
225 
226 	mmio_base = devm_platform_ioremap_resource(pdev, 0);
227 	if (IS_ERR(mmio_base)) {
228 		dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
229 			PTR_ERR(mmio_base));
230 		return PTR_ERR(mmio_base);
231 	}
232 
233 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
234 	layers[0].size = 1;
235 	layers[0].is_virt_csrow = false;
236 	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
237 			    sizeof(struct al_mc_edac));
238 	if (!mci)
239 		return -ENOMEM;
240 
241 	ret = devm_add_action(&pdev->dev, devm_al_mc_edac_free, mci);
242 	if (ret) {
243 		edac_mc_free(mci);
244 		return ret;
245 	}
246 
247 	platform_set_drvdata(pdev, mci);
248 	al_mc = mci->pvt_info;
249 
250 	al_mc->mmio_base = mmio_base;
251 
252 	al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue");
253 	if (al_mc->irq_ue <= 0)
254 		dev_dbg(&pdev->dev,
255 			"no IRQ defined for UE - falling back to polling\n");
256 
257 	al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce");
258 	if (al_mc->irq_ce <= 0)
259 		dev_dbg(&pdev->dev,
260 			"no IRQ defined for CE - falling back to polling\n");
261 
262 	/*
263 	 * In case both interrupts (ue/ce) are to be found, use interrupt mode.
264 	 * In case none of the interrupt are foud, use polling mode.
265 	 * In case only one interrupt is found, use interrupt mode for it but
266 	 * keep polling mode enable for the other.
267 	 */
268 	if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) {
269 		edac_op_state = EDAC_OPSTATE_POLL;
270 		mci->edac_check = al_mc_edac_check;
271 	} else {
272 		edac_op_state = EDAC_OPSTATE_INT;
273 	}
274 
275 	spin_lock_init(&al_mc->lock);
276 
277 	mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
278 	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
279 	mci->edac_cap = EDAC_FLAG_SECDED;
280 	mci->mod_name = DRV_NAME;
281 	mci->ctl_name = "al_mc";
282 	mci->pdev = &pdev->dev;
283 	mci->scrub_mode = get_scrub_mode(mmio_base);
284 
285 	dimm = *mci->dimms;
286 	dimm->grain = 1;
287 
288 	ret = edac_mc_add_mc(mci);
289 	if (ret < 0) {
290 		dev_err(&pdev->dev,
291 			"fail to add memory controller device (%d)\n",
292 			ret);
293 		return ret;
294 	}
295 
296 	ret = devm_add_action(&pdev->dev, devm_al_mc_edac_del, &pdev->dev);
297 	if (ret) {
298 		edac_mc_del_mc(&pdev->dev);
299 		return ret;
300 	}
301 
302 	if (al_mc->irq_ue > 0) {
303 		ret = devm_request_irq(&pdev->dev,
304 				       al_mc->irq_ue,
305 				       al_mc_edac_irq_handler_ue,
306 				       IRQF_SHARED,
307 				       pdev->name,
308 				       pdev);
309 		if (ret != 0) {
310 			dev_err(&pdev->dev,
311 				"failed to request UE IRQ %d (%d)\n",
312 				al_mc->irq_ue, ret);
313 			return ret;
314 		}
315 	}
316 
317 	if (al_mc->irq_ce > 0) {
318 		ret = devm_request_irq(&pdev->dev,
319 				       al_mc->irq_ce,
320 				       al_mc_edac_irq_handler_ce,
321 				       IRQF_SHARED,
322 				       pdev->name,
323 				       pdev);
324 		if (ret != 0) {
325 			dev_err(&pdev->dev,
326 				"failed to request CE IRQ %d (%d)\n",
327 				al_mc->irq_ce, ret);
328 			return ret;
329 		}
330 	}
331 
332 	return 0;
333 }
334 
335 static const struct of_device_id al_mc_edac_of_match[] = {
336 	{ .compatible = "amazon,al-mc-edac", },
337 	{},
338 };
339 
340 MODULE_DEVICE_TABLE(of, al_mc_edac_of_match);
341 
342 static struct platform_driver al_mc_edac_driver = {
343 	.probe = al_mc_edac_probe,
344 	.driver = {
345 		.name = DRV_NAME,
346 		.of_match_table = al_mc_edac_of_match,
347 	},
348 };
349 
350 module_platform_driver(al_mc_edac_driver);
351 
352 MODULE_LICENSE("GPL v2");
353 MODULE_AUTHOR("Talel Shenhar");
354 MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver");
355