xref: /openbmc/linux/drivers/edac/i3200_edac.c (revision a09d2831)
1 /*
2  * Intel 3200/3210 Memory Controller kernel module
3  * Copyright (C) 2008-2009 Akamai Technologies, Inc.
4  * Portions by Hitoshi Mitake <h.mitake@gmail.com>.
5  *
6  * This file may be distributed under the terms of the
7  * GNU General Public License.
8  */
9 
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/pci.h>
13 #include <linux/pci_ids.h>
14 #include <linux/slab.h>
15 #include <linux/edac.h>
16 #include <linux/io.h>
17 #include "edac_core.h"
18 
19 #define I3200_REVISION        "1.1"
20 
21 #define EDAC_MOD_STR        "i3200_edac"
22 
23 #define PCI_DEVICE_ID_INTEL_3200_HB    0x29f0
24 
25 #define I3200_RANKS		8
26 #define I3200_RANKS_PER_CHANNEL	4
27 #define I3200_CHANNELS		2
28 
29 /* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
30 
31 #define I3200_MCHBAR_LOW	0x48	/* MCH Memory Mapped Register BAR */
32 #define I3200_MCHBAR_HIGH	0x4c
33 #define I3200_MCHBAR_MASK	0xfffffc000ULL	/* bits 35:14 */
34 #define I3200_MMR_WINDOW_SIZE	16384
35 
36 #define I3200_TOM		0xa0	/* Top of Memory (16b)
37 		 *
38 		 * 15:10 reserved
39 		 *  9:0  total populated physical memory
40 		 */
41 #define I3200_TOM_MASK		0x3ff	/* bits 9:0 */
42 #define I3200_TOM_SHIFT		26	/* 64MiB grain */
43 
44 #define I3200_ERRSTS		0xc8	/* Error Status Register (16b)
45 		 *
46 		 * 15    reserved
47 		 * 14    Isochronous TBWRR Run Behind FIFO Full
48 		 *       (ITCV)
49 		 * 13    Isochronous TBWRR Run Behind FIFO Put
50 		 *       (ITSTV)
51 		 * 12    reserved
52 		 * 11    MCH Thermal Sensor Event
53 		 *       for SMI/SCI/SERR (GTSE)
54 		 * 10    reserved
55 		 *  9    LOCK to non-DRAM Memory Flag (LCKF)
56 		 *  8    reserved
57 		 *  7    DRAM Throttle Flag (DTF)
58 		 *  6:2  reserved
59 		 *  1    Multi-bit DRAM ECC Error Flag (DMERR)
60 		 *  0    Single-bit DRAM ECC Error Flag (DSERR)
61 		 */
62 #define I3200_ERRSTS_UE		0x0002
63 #define I3200_ERRSTS_CE		0x0001
64 #define I3200_ERRSTS_BITS	(I3200_ERRSTS_UE | I3200_ERRSTS_CE)
65 
66 
67 /* Intel  MMIO register space - device 0 function 0 - MMR space */
68 
69 #define I3200_C0DRB	0x200	/* Channel 0 DRAM Rank Boundary (16b x 4)
70 		 *
71 		 * 15:10 reserved
72 		 *  9:0  Channel 0 DRAM Rank Boundary Address
73 		 */
74 #define I3200_C1DRB	0x600	/* Channel 1 DRAM Rank Boundary (16b x 4) */
75 #define I3200_DRB_MASK	0x3ff	/* bits 9:0 */
76 #define I3200_DRB_SHIFT	26	/* 64MiB grain */
77 
78 #define I3200_C0ECCERRLOG	0x280	/* Channel 0 ECC Error Log (64b)
79 		 *
80 		 * 63:48 Error Column Address (ERRCOL)
81 		 * 47:32 Error Row Address (ERRROW)
82 		 * 31:29 Error Bank Address (ERRBANK)
83 		 * 28:27 Error Rank Address (ERRRANK)
84 		 * 26:24 reserved
85 		 * 23:16 Error Syndrome (ERRSYND)
86 		 * 15: 2 reserved
87 		 *    1  Multiple Bit Error Status (MERRSTS)
88 		 *    0  Correctable Error Status (CERRSTS)
89 		 */
90 #define I3200_C1ECCERRLOG		0x680	/* Chan 1 ECC Error Log (64b) */
91 #define I3200_ECCERRLOG_CE		0x1
92 #define I3200_ECCERRLOG_UE		0x2
93 #define I3200_ECCERRLOG_RANK_BITS	0x18000000
94 #define I3200_ECCERRLOG_RANK_SHIFT	27
95 #define I3200_ECCERRLOG_SYNDROME_BITS	0xff0000
96 #define I3200_ECCERRLOG_SYNDROME_SHIFT	16
97 #define I3200_CAPID0			0xe0	/* P.95 of spec for details */
98 
99 struct i3200_priv {
100 	void __iomem *window;
101 };
102 
103 static int nr_channels;
104 
105 static int how_many_channels(struct pci_dev *pdev)
106 {
107 	unsigned char capid0_8b; /* 8th byte of CAPID0 */
108 
109 	pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
110 	if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
111 		debugf0("In single channel mode.\n");
112 		return 1;
113 	} else {
114 		debugf0("In dual channel mode.\n");
115 		return 2;
116 	}
117 }
118 
119 static unsigned long eccerrlog_syndrome(u64 log)
120 {
121 	return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
122 		I3200_ECCERRLOG_SYNDROME_SHIFT;
123 }
124 
125 static int eccerrlog_row(int channel, u64 log)
126 {
127 	u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
128 		I3200_ECCERRLOG_RANK_SHIFT);
129 	return rank | (channel * I3200_RANKS_PER_CHANNEL);
130 }
131 
132 enum i3200_chips {
133 	I3200 = 0,
134 };
135 
136 struct i3200_dev_info {
137 	const char *ctl_name;
138 };
139 
140 struct i3200_error_info {
141 	u16 errsts;
142 	u16 errsts2;
143 	u64 eccerrlog[I3200_CHANNELS];
144 };
145 
146 static const struct i3200_dev_info i3200_devs[] = {
147 	[I3200] = {
148 		.ctl_name = "i3200"
149 	},
150 };
151 
152 static struct pci_dev *mci_pdev;
153 static int i3200_registered = 1;
154 
155 
156 static void i3200_clear_error_info(struct mem_ctl_info *mci)
157 {
158 	struct pci_dev *pdev;
159 
160 	pdev = to_pci_dev(mci->dev);
161 
162 	/*
163 	 * Clear any error bits.
164 	 * (Yes, we really clear bits by writing 1 to them.)
165 	 */
166 	pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
167 		I3200_ERRSTS_BITS);
168 }
169 
170 static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
171 		struct i3200_error_info *info)
172 {
173 	struct pci_dev *pdev;
174 	struct i3200_priv *priv = mci->pvt_info;
175 	void __iomem *window = priv->window;
176 
177 	pdev = to_pci_dev(mci->dev);
178 
179 	/*
180 	 * This is a mess because there is no atomic way to read all the
181 	 * registers at once and the registers can transition from CE being
182 	 * overwritten by UE.
183 	 */
184 	pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
185 	if (!(info->errsts & I3200_ERRSTS_BITS))
186 		return;
187 
188 	info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
189 	if (nr_channels == 2)
190 		info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
191 
192 	pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
193 
194 	/*
195 	 * If the error is the same for both reads then the first set
196 	 * of reads is valid.  If there is a change then there is a CE
197 	 * with no info and the second set of reads is valid and
198 	 * should be UE info.
199 	 */
200 	if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
201 		info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
202 		if (nr_channels == 2)
203 			info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
204 	}
205 
206 	i3200_clear_error_info(mci);
207 }
208 
209 static void i3200_process_error_info(struct mem_ctl_info *mci,
210 		struct i3200_error_info *info)
211 {
212 	int channel;
213 	u64 log;
214 
215 	if (!(info->errsts & I3200_ERRSTS_BITS))
216 		return;
217 
218 	if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
219 		edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
220 		info->errsts = info->errsts2;
221 	}
222 
223 	for (channel = 0; channel < nr_channels; channel++) {
224 		log = info->eccerrlog[channel];
225 		if (log & I3200_ECCERRLOG_UE) {
226 			edac_mc_handle_ue(mci, 0, 0,
227 				eccerrlog_row(channel, log),
228 				"i3200 UE");
229 		} else if (log & I3200_ECCERRLOG_CE) {
230 			edac_mc_handle_ce(mci, 0, 0,
231 				eccerrlog_syndrome(log),
232 				eccerrlog_row(channel, log), 0,
233 				"i3200 CE");
234 		}
235 	}
236 }
237 
238 static void i3200_check(struct mem_ctl_info *mci)
239 {
240 	struct i3200_error_info info;
241 
242 	debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
243 	i3200_get_and_clear_error_info(mci, &info);
244 	i3200_process_error_info(mci, &info);
245 }
246 
247 
248 void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
249 {
250 	union {
251 		u64 mchbar;
252 		struct {
253 			u32 mchbar_low;
254 			u32 mchbar_high;
255 		};
256 	} u;
257 	void __iomem *window;
258 
259 	pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
260 	pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
261 	u.mchbar &= I3200_MCHBAR_MASK;
262 
263 	if (u.mchbar != (resource_size_t)u.mchbar) {
264 		printk(KERN_ERR
265 			"i3200: mmio space beyond accessible range (0x%llx)\n",
266 			(unsigned long long)u.mchbar);
267 		return NULL;
268 	}
269 
270 	window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
271 	if (!window)
272 		printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
273 			(unsigned long long)u.mchbar);
274 
275 	return window;
276 }
277 
278 
279 static void i3200_get_drbs(void __iomem *window,
280 	u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
281 {
282 	int i;
283 
284 	for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
285 		drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
286 		drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
287 	}
288 }
289 
290 static bool i3200_is_stacked(struct pci_dev *pdev,
291 	u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
292 {
293 	u16 tom;
294 
295 	pci_read_config_word(pdev, I3200_TOM, &tom);
296 	tom &= I3200_TOM_MASK;
297 
298 	return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
299 }
300 
301 static unsigned long drb_to_nr_pages(
302 	u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
303 	int channel, int rank)
304 {
305 	int n;
306 
307 	n = drbs[channel][rank];
308 	if (rank > 0)
309 		n -= drbs[channel][rank - 1];
310 	if (stacked && (channel == 1) &&
311 	drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
312 		n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
313 
314 	n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
315 	return n;
316 }
317 
318 static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
319 {
320 	int rc;
321 	int i;
322 	struct mem_ctl_info *mci = NULL;
323 	unsigned long last_page;
324 	u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
325 	bool stacked;
326 	void __iomem *window;
327 	struct i3200_priv *priv;
328 
329 	debugf0("MC: %s()\n", __func__);
330 
331 	window = i3200_map_mchbar(pdev);
332 	if (!window)
333 		return -ENODEV;
334 
335 	i3200_get_drbs(window, drbs);
336 	nr_channels = how_many_channels(pdev);
337 
338 	mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS,
339 		nr_channels, 0);
340 	if (!mci)
341 		return -ENOMEM;
342 
343 	debugf3("MC: %s(): init mci\n", __func__);
344 
345 	mci->dev = &pdev->dev;
346 	mci->mtype_cap = MEM_FLAG_DDR2;
347 
348 	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
349 	mci->edac_cap = EDAC_FLAG_SECDED;
350 
351 	mci->mod_name = EDAC_MOD_STR;
352 	mci->mod_ver = I3200_REVISION;
353 	mci->ctl_name = i3200_devs[dev_idx].ctl_name;
354 	mci->dev_name = pci_name(pdev);
355 	mci->edac_check = i3200_check;
356 	mci->ctl_page_to_phys = NULL;
357 	priv = mci->pvt_info;
358 	priv->window = window;
359 
360 	stacked = i3200_is_stacked(pdev, drbs);
361 
362 	/*
363 	 * The dram rank boundary (DRB) reg values are boundary addresses
364 	 * for each DRAM rank with a granularity of 64MB.  DRB regs are
365 	 * cumulative; the last one will contain the total memory
366 	 * contained in all ranks.
367 	 */
368 	last_page = -1UL;
369 	for (i = 0; i < mci->nr_csrows; i++) {
370 		unsigned long nr_pages;
371 		struct csrow_info *csrow = &mci->csrows[i];
372 
373 		nr_pages = drb_to_nr_pages(drbs, stacked,
374 			i / I3200_RANKS_PER_CHANNEL,
375 			i % I3200_RANKS_PER_CHANNEL);
376 
377 		if (nr_pages == 0) {
378 			csrow->mtype = MEM_EMPTY;
379 			continue;
380 		}
381 
382 		csrow->first_page = last_page + 1;
383 		last_page += nr_pages;
384 		csrow->last_page = last_page;
385 		csrow->nr_pages = nr_pages;
386 
387 		csrow->grain = nr_pages << PAGE_SHIFT;
388 		csrow->mtype = MEM_DDR2;
389 		csrow->dtype = DEV_UNKNOWN;
390 		csrow->edac_mode = EDAC_UNKNOWN;
391 	}
392 
393 	i3200_clear_error_info(mci);
394 
395 	rc = -ENODEV;
396 	if (edac_mc_add_mc(mci)) {
397 		debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
398 		goto fail;
399 	}
400 
401 	/* get this far and it's successful */
402 	debugf3("MC: %s(): success\n", __func__);
403 	return 0;
404 
405 fail:
406 	iounmap(window);
407 	if (mci)
408 		edac_mc_free(mci);
409 
410 	return rc;
411 }
412 
413 static int __devinit i3200_init_one(struct pci_dev *pdev,
414 		const struct pci_device_id *ent)
415 {
416 	int rc;
417 
418 	debugf0("MC: %s()\n", __func__);
419 
420 	if (pci_enable_device(pdev) < 0)
421 		return -EIO;
422 
423 	rc = i3200_probe1(pdev, ent->driver_data);
424 	if (!mci_pdev)
425 		mci_pdev = pci_dev_get(pdev);
426 
427 	return rc;
428 }
429 
430 static void __devexit i3200_remove_one(struct pci_dev *pdev)
431 {
432 	struct mem_ctl_info *mci;
433 	struct i3200_priv *priv;
434 
435 	debugf0("%s()\n", __func__);
436 
437 	mci = edac_mc_del_mc(&pdev->dev);
438 	if (!mci)
439 		return;
440 
441 	priv = mci->pvt_info;
442 	iounmap(priv->window);
443 
444 	edac_mc_free(mci);
445 }
446 
447 static const struct pci_device_id i3200_pci_tbl[] __devinitdata = {
448 	{
449 		PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
450 		I3200},
451 	{
452 		0,
453 	}            /* 0 terminated list. */
454 };
455 
456 MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
457 
458 static struct pci_driver i3200_driver = {
459 	.name = EDAC_MOD_STR,
460 	.probe = i3200_init_one,
461 	.remove = __devexit_p(i3200_remove_one),
462 	.id_table = i3200_pci_tbl,
463 };
464 
465 static int __init i3200_init(void)
466 {
467 	int pci_rc;
468 
469 	debugf3("MC: %s()\n", __func__);
470 
471 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
472 	opstate_init();
473 
474 	pci_rc = pci_register_driver(&i3200_driver);
475 	if (pci_rc < 0)
476 		goto fail0;
477 
478 	if (!mci_pdev) {
479 		i3200_registered = 0;
480 		mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
481 				PCI_DEVICE_ID_INTEL_3200_HB, NULL);
482 		if (!mci_pdev) {
483 			debugf0("i3200 pci_get_device fail\n");
484 			pci_rc = -ENODEV;
485 			goto fail1;
486 		}
487 
488 		pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
489 		if (pci_rc < 0) {
490 			debugf0("i3200 init fail\n");
491 			pci_rc = -ENODEV;
492 			goto fail1;
493 		}
494 	}
495 
496 	return 0;
497 
498 fail1:
499 	pci_unregister_driver(&i3200_driver);
500 
501 fail0:
502 	if (mci_pdev)
503 		pci_dev_put(mci_pdev);
504 
505 	return pci_rc;
506 }
507 
508 static void __exit i3200_exit(void)
509 {
510 	debugf3("MC: %s()\n", __func__);
511 
512 	pci_unregister_driver(&i3200_driver);
513 	if (!i3200_registered) {
514 		i3200_remove_one(mci_pdev);
515 		pci_dev_put(mci_pdev);
516 	}
517 }
518 
519 module_init(i3200_init);
520 module_exit(i3200_exit);
521 
522 MODULE_LICENSE("GPL");
523 MODULE_AUTHOR("Akamai Technologies, Inc.");
524 MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
525 
526 module_param(edac_op_state, int, 0444);
527 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
528