xref: /openbmc/linux/drivers/edac/amd64_edac.c (revision cf028200)
1 #include "amd64_edac.h"
2 #include <asm/amd_nb.h>
3 
4 static struct edac_pci_ctl_info *amd64_ctl_pci;
5 
6 static int report_gart_errors;
7 module_param(report_gart_errors, int, 0644);
8 
9 /*
10  * Set by command line parameter. If BIOS has enabled the ECC, this override is
11  * cleared to prevent re-enabling the hardware by this driver.
12  */
13 static int ecc_enable_override;
14 module_param(ecc_enable_override, int, 0644);
15 
16 static struct msr __percpu *msrs;
17 
18 /*
19  * count successfully initialized driver instances for setup_pci_device()
20  */
21 static atomic_t drv_instances = ATOMIC_INIT(0);
22 
23 /* Per-node driver instances */
24 static struct mem_ctl_info **mcis;
25 static struct ecc_settings **ecc_stngs;
26 
27 /*
28  * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
29  * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
30  * or higher value'.
31  *
32  *FIXME: Produce a better mapping/linearisation.
33  */
34 struct scrubrate {
35        u32 scrubval;           /* bit pattern for scrub rate */
36        u32 bandwidth;          /* bandwidth consumed (bytes/sec) */
37 } scrubrates[] = {
38 	{ 0x01, 1600000000UL},
39 	{ 0x02, 800000000UL},
40 	{ 0x03, 400000000UL},
41 	{ 0x04, 200000000UL},
42 	{ 0x05, 100000000UL},
43 	{ 0x06, 50000000UL},
44 	{ 0x07, 25000000UL},
45 	{ 0x08, 12284069UL},
46 	{ 0x09, 6274509UL},
47 	{ 0x0A, 3121951UL},
48 	{ 0x0B, 1560975UL},
49 	{ 0x0C, 781440UL},
50 	{ 0x0D, 390720UL},
51 	{ 0x0E, 195300UL},
52 	{ 0x0F, 97650UL},
53 	{ 0x10, 48854UL},
54 	{ 0x11, 24427UL},
55 	{ 0x12, 12213UL},
56 	{ 0x13, 6101UL},
57 	{ 0x14, 3051UL},
58 	{ 0x15, 1523UL},
59 	{ 0x16, 761UL},
60 	{ 0x00, 0UL},        /* scrubbing off */
61 };
62 
63 static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
64 				      u32 *val, const char *func)
65 {
66 	int err = 0;
67 
68 	err = pci_read_config_dword(pdev, offset, val);
69 	if (err)
70 		amd64_warn("%s: error reading F%dx%03x.\n",
71 			   func, PCI_FUNC(pdev->devfn), offset);
72 
73 	return err;
74 }
75 
76 int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
77 				u32 val, const char *func)
78 {
79 	int err = 0;
80 
81 	err = pci_write_config_dword(pdev, offset, val);
82 	if (err)
83 		amd64_warn("%s: error writing to F%dx%03x.\n",
84 			   func, PCI_FUNC(pdev->devfn), offset);
85 
86 	return err;
87 }
88 
89 /*
90  *
91  * Depending on the family, F2 DCT reads need special handling:
92  *
93  * K8: has a single DCT only
94  *
95  * F10h: each DCT has its own set of regs
96  *	DCT0 -> F2x040..
97  *	DCT1 -> F2x140..
98  *
99  * F15h: we select which DCT we access using F1x10C[DctCfgSel]
100  *
101  */
102 static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
103 			       const char *func)
104 {
105 	if (addr >= 0x100)
106 		return -EINVAL;
107 
108 	return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
109 }
110 
111 static int f10_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
112 				 const char *func)
113 {
114 	return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
115 }
116 
117 /*
118  * Select DCT to which PCI cfg accesses are routed
119  */
120 static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct)
121 {
122 	u32 reg = 0;
123 
124 	amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, &reg);
125 	reg &= 0xfffffffe;
126 	reg |= dct;
127 	amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg);
128 }
129 
130 static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
131 				 const char *func)
132 {
133 	u8 dct  = 0;
134 
135 	if (addr >= 0x140 && addr <= 0x1a0) {
136 		dct   = 1;
137 		addr -= 0x100;
138 	}
139 
140 	f15h_select_dct(pvt, dct);
141 
142 	return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
143 }
144 
145 /*
146  * Memory scrubber control interface. For K8, memory scrubbing is handled by
147  * hardware and can involve L2 cache, dcache as well as the main memory. With
148  * F10, this is extended to L3 cache scrubbing on CPU models sporting that
149  * functionality.
150  *
151  * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
152  * (dram) over to cache lines. This is nasty, so we will use bandwidth in
153  * bytes/sec for the setting.
154  *
155  * Currently, we only do dram scrubbing. If the scrubbing is done in software on
156  * other archs, we might not have access to the caches directly.
157  */
158 
159 /*
160  * scan the scrub rate mapping table for a close or matching bandwidth value to
161  * issue. If requested is too big, then use last maximum value found.
162  */
163 static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate)
164 {
165 	u32 scrubval;
166 	int i;
167 
168 	/*
169 	 * map the configured rate (new_bw) to a value specific to the AMD64
170 	 * memory controller and apply to register. Search for the first
171 	 * bandwidth entry that is greater or equal than the setting requested
172 	 * and program that. If at last entry, turn off DRAM scrubbing.
173 	 *
174 	 * If no suitable bandwidth is found, turn off DRAM scrubbing entirely
175 	 * by falling back to the last element in scrubrates[].
176 	 */
177 	for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) {
178 		/*
179 		 * skip scrub rates which aren't recommended
180 		 * (see F10 BKDG, F3x58)
181 		 */
182 		if (scrubrates[i].scrubval < min_rate)
183 			continue;
184 
185 		if (scrubrates[i].bandwidth <= new_bw)
186 			break;
187 	}
188 
189 	scrubval = scrubrates[i].scrubval;
190 
191 	pci_write_bits32(ctl, SCRCTRL, scrubval, 0x001F);
192 
193 	if (scrubval)
194 		return scrubrates[i].bandwidth;
195 
196 	return 0;
197 }
198 
199 static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
200 {
201 	struct amd64_pvt *pvt = mci->pvt_info;
202 	u32 min_scrubrate = 0x5;
203 
204 	if (boot_cpu_data.x86 == 0xf)
205 		min_scrubrate = 0x0;
206 
207 	/* F15h Erratum #505 */
208 	if (boot_cpu_data.x86 == 0x15)
209 		f15h_select_dct(pvt, 0);
210 
211 	return __amd64_set_scrub_rate(pvt->F3, bw, min_scrubrate);
212 }
213 
214 static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
215 {
216 	struct amd64_pvt *pvt = mci->pvt_info;
217 	u32 scrubval = 0;
218 	int i, retval = -EINVAL;
219 
220 	/* F15h Erratum #505 */
221 	if (boot_cpu_data.x86 == 0x15)
222 		f15h_select_dct(pvt, 0);
223 
224 	amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
225 
226 	scrubval = scrubval & 0x001F;
227 
228 	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
229 		if (scrubrates[i].scrubval == scrubval) {
230 			retval = scrubrates[i].bandwidth;
231 			break;
232 		}
233 	}
234 	return retval;
235 }
236 
237 /*
238  * returns true if the SysAddr given by sys_addr matches the
239  * DRAM base/limit associated with node_id
240  */
241 static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr,
242 				   unsigned nid)
243 {
244 	u64 addr;
245 
246 	/* The K8 treats this as a 40-bit value.  However, bits 63-40 will be
247 	 * all ones if the most significant implemented address bit is 1.
248 	 * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
249 	 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
250 	 * Application Programming.
251 	 */
252 	addr = sys_addr & 0x000000ffffffffffull;
253 
254 	return ((addr >= get_dram_base(pvt, nid)) &&
255 		(addr <= get_dram_limit(pvt, nid)));
256 }
257 
258 /*
259  * Attempt to map a SysAddr to a node. On success, return a pointer to the
260  * mem_ctl_info structure for the node that the SysAddr maps to.
261  *
262  * On failure, return NULL.
263  */
264 static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
265 						u64 sys_addr)
266 {
267 	struct amd64_pvt *pvt;
268 	unsigned node_id;
269 	u32 intlv_en, bits;
270 
271 	/*
272 	 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
273 	 * 3.4.4.2) registers to map the SysAddr to a node ID.
274 	 */
275 	pvt = mci->pvt_info;
276 
277 	/*
278 	 * The value of this field should be the same for all DRAM Base
279 	 * registers.  Therefore we arbitrarily choose to read it from the
280 	 * register for node 0.
281 	 */
282 	intlv_en = dram_intlv_en(pvt, 0);
283 
284 	if (intlv_en == 0) {
285 		for (node_id = 0; node_id < DRAM_RANGES; node_id++) {
286 			if (amd64_base_limit_match(pvt, sys_addr, node_id))
287 				goto found;
288 		}
289 		goto err_no_match;
290 	}
291 
292 	if (unlikely((intlv_en != 0x01) &&
293 		     (intlv_en != 0x03) &&
294 		     (intlv_en != 0x07))) {
295 		amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en);
296 		return NULL;
297 	}
298 
299 	bits = (((u32) sys_addr) >> 12) & intlv_en;
300 
301 	for (node_id = 0; ; ) {
302 		if ((dram_intlv_sel(pvt, node_id) & intlv_en) == bits)
303 			break;	/* intlv_sel field matches */
304 
305 		if (++node_id >= DRAM_RANGES)
306 			goto err_no_match;
307 	}
308 
309 	/* sanity test for sys_addr */
310 	if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
311 		amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
312 			   "range for node %d with node interleaving enabled.\n",
313 			   __func__, sys_addr, node_id);
314 		return NULL;
315 	}
316 
317 found:
318 	return edac_mc_find((int)node_id);
319 
320 err_no_match:
321 	edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n",
322 		 (unsigned long)sys_addr);
323 
324 	return NULL;
325 }
326 
327 /*
328  * compute the CS base address of the @csrow on the DRAM controller @dct.
329  * For details see F2x[5C:40] in the processor's BKDG
330  */
331 static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
332 				 u64 *base, u64 *mask)
333 {
334 	u64 csbase, csmask, base_bits, mask_bits;
335 	u8 addr_shift;
336 
337 	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
338 		csbase		= pvt->csels[dct].csbases[csrow];
339 		csmask		= pvt->csels[dct].csmasks[csrow];
340 		base_bits	= GENMASK(21, 31) | GENMASK(9, 15);
341 		mask_bits	= GENMASK(21, 29) | GENMASK(9, 15);
342 		addr_shift	= 4;
343 	} else {
344 		csbase		= pvt->csels[dct].csbases[csrow];
345 		csmask		= pvt->csels[dct].csmasks[csrow >> 1];
346 		addr_shift	= 8;
347 
348 		if (boot_cpu_data.x86 == 0x15)
349 			base_bits = mask_bits = GENMASK(19,30) | GENMASK(5,13);
350 		else
351 			base_bits = mask_bits = GENMASK(19,28) | GENMASK(5,13);
352 	}
353 
354 	*base  = (csbase & base_bits) << addr_shift;
355 
356 	*mask  = ~0ULL;
357 	/* poke holes for the csmask */
358 	*mask &= ~(mask_bits << addr_shift);
359 	/* OR them in */
360 	*mask |= (csmask & mask_bits) << addr_shift;
361 }
362 
363 #define for_each_chip_select(i, dct, pvt) \
364 	for (i = 0; i < pvt->csels[dct].b_cnt; i++)
365 
366 #define chip_select_base(i, dct, pvt) \
367 	pvt->csels[dct].csbases[i]
368 
369 #define for_each_chip_select_mask(i, dct, pvt) \
370 	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
371 
372 /*
373  * @input_addr is an InputAddr associated with the node given by mci. Return the
374  * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
375  */
376 static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
377 {
378 	struct amd64_pvt *pvt;
379 	int csrow;
380 	u64 base, mask;
381 
382 	pvt = mci->pvt_info;
383 
384 	for_each_chip_select(csrow, 0, pvt) {
385 		if (!csrow_enabled(csrow, 0, pvt))
386 			continue;
387 
388 		get_cs_base_and_mask(pvt, csrow, 0, &base, &mask);
389 
390 		mask = ~mask;
391 
392 		if ((input_addr & mask) == (base & mask)) {
393 			edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n",
394 				 (unsigned long)input_addr, csrow,
395 				 pvt->mc_node_id);
396 
397 			return csrow;
398 		}
399 	}
400 	edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n",
401 		 (unsigned long)input_addr, pvt->mc_node_id);
402 
403 	return -1;
404 }
405 
406 /*
407  * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
408  * for the node represented by mci. Info is passed back in *hole_base,
409  * *hole_offset, and *hole_size.  Function returns 0 if info is valid or 1 if
410  * info is invalid. Info may be invalid for either of the following reasons:
411  *
412  * - The revision of the node is not E or greater.  In this case, the DRAM Hole
413  *   Address Register does not exist.
414  *
415  * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
416  *   indicating that its contents are not valid.
417  *
418  * The values passed back in *hole_base, *hole_offset, and *hole_size are
419  * complete 32-bit values despite the fact that the bitfields in the DHAR
420  * only represent bits 31-24 of the base and offset values.
421  */
422 int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
423 			     u64 *hole_offset, u64 *hole_size)
424 {
425 	struct amd64_pvt *pvt = mci->pvt_info;
426 	u64 base;
427 
428 	/* only revE and later have the DRAM Hole Address Register */
429 	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) {
430 		edac_dbg(1, "  revision %d for node %d does not support DHAR\n",
431 			 pvt->ext_model, pvt->mc_node_id);
432 		return 1;
433 	}
434 
435 	/* valid for Fam10h and above */
436 	if (boot_cpu_data.x86 >= 0x10 && !dhar_mem_hoist_valid(pvt)) {
437 		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this system\n");
438 		return 1;
439 	}
440 
441 	if (!dhar_valid(pvt)) {
442 		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this node %d\n",
443 			 pvt->mc_node_id);
444 		return 1;
445 	}
446 
447 	/* This node has Memory Hoisting */
448 
449 	/* +------------------+--------------------+--------------------+-----
450 	 * | memory           | DRAM hole          | relocated          |
451 	 * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
452 	 * |                  |                    | DRAM hole          |
453 	 * |                  |                    | [0x100000000,      |
454 	 * |                  |                    |  (0x100000000+     |
455 	 * |                  |                    |   (0xffffffff-x))] |
456 	 * +------------------+--------------------+--------------------+-----
457 	 *
458 	 * Above is a diagram of physical memory showing the DRAM hole and the
459 	 * relocated addresses from the DRAM hole.  As shown, the DRAM hole
460 	 * starts at address x (the base address) and extends through address
461 	 * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
462 	 * addresses in the hole so that they start at 0x100000000.
463 	 */
464 
465 	base = dhar_base(pvt);
466 
467 	*hole_base = base;
468 	*hole_size = (0x1ull << 32) - base;
469 
470 	if (boot_cpu_data.x86 > 0xf)
471 		*hole_offset = f10_dhar_offset(pvt);
472 	else
473 		*hole_offset = k8_dhar_offset(pvt);
474 
475 	edac_dbg(1, "  DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
476 		 pvt->mc_node_id, (unsigned long)*hole_base,
477 		 (unsigned long)*hole_offset, (unsigned long)*hole_size);
478 
479 	return 0;
480 }
481 EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
482 
483 /*
484  * Return the DramAddr that the SysAddr given by @sys_addr maps to.  It is
485  * assumed that sys_addr maps to the node given by mci.
486  *
487  * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
488  * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
489  * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
490  * then it is also involved in translating a SysAddr to a DramAddr. Sections
491  * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
492  * These parts of the documentation are unclear. I interpret them as follows:
493  *
494  * When node n receives a SysAddr, it processes the SysAddr as follows:
495  *
496  * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
497  *    Limit registers for node n. If the SysAddr is not within the range
498  *    specified by the base and limit values, then node n ignores the Sysaddr
499  *    (since it does not map to node n). Otherwise continue to step 2 below.
500  *
501  * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
502  *    disabled so skip to step 3 below. Otherwise see if the SysAddr is within
503  *    the range of relocated addresses (starting at 0x100000000) from the DRAM
504  *    hole. If not, skip to step 3 below. Else get the value of the
505  *    DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
506  *    offset defined by this value from the SysAddr.
507  *
508  * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
509  *    Base register for node n. To obtain the DramAddr, subtract the base
510  *    address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
511  */
512 static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
513 {
514 	struct amd64_pvt *pvt = mci->pvt_info;
515 	u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
516 	int ret = 0;
517 
518 	dram_base = get_dram_base(pvt, pvt->mc_node_id);
519 
520 	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
521 				      &hole_size);
522 	if (!ret) {
523 		if ((sys_addr >= (1ull << 32)) &&
524 		    (sys_addr < ((1ull << 32) + hole_size))) {
525 			/* use DHAR to translate SysAddr to DramAddr */
526 			dram_addr = sys_addr - hole_offset;
527 
528 			edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
529 				 (unsigned long)sys_addr,
530 				 (unsigned long)dram_addr);
531 
532 			return dram_addr;
533 		}
534 	}
535 
536 	/*
537 	 * Translate the SysAddr to a DramAddr as shown near the start of
538 	 * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
539 	 * only deals with 40-bit values.  Therefore we discard bits 63-40 of
540 	 * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
541 	 * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
542 	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
543 	 * Programmer's Manual Volume 1 Application Programming.
544 	 */
545 	dram_addr = (sys_addr & GENMASK(0, 39)) - dram_base;
546 
547 	edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
548 		 (unsigned long)sys_addr, (unsigned long)dram_addr);
549 	return dram_addr;
550 }
551 
552 /*
553  * @intlv_en is the value of the IntlvEn field from a DRAM Base register
554  * (section 3.4.4.1).  Return the number of bits from a SysAddr that are used
555  * for node interleaving.
556  */
557 static int num_node_interleave_bits(unsigned intlv_en)
558 {
559 	static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
560 	int n;
561 
562 	BUG_ON(intlv_en > 7);
563 	n = intlv_shift_table[intlv_en];
564 	return n;
565 }
566 
567 /* Translate the DramAddr given by @dram_addr to an InputAddr. */
568 static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
569 {
570 	struct amd64_pvt *pvt;
571 	int intlv_shift;
572 	u64 input_addr;
573 
574 	pvt = mci->pvt_info;
575 
576 	/*
577 	 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
578 	 * concerning translating a DramAddr to an InputAddr.
579 	 */
580 	intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
581 	input_addr = ((dram_addr >> intlv_shift) & GENMASK(12, 35)) +
582 		      (dram_addr & 0xfff);
583 
584 	edac_dbg(2, "  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
585 		 intlv_shift, (unsigned long)dram_addr,
586 		 (unsigned long)input_addr);
587 
588 	return input_addr;
589 }
590 
591 /*
592  * Translate the SysAddr represented by @sys_addr to an InputAddr.  It is
593  * assumed that @sys_addr maps to the node given by mci.
594  */
595 static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
596 {
597 	u64 input_addr;
598 
599 	input_addr =
600 	    dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
601 
602 	edac_dbg(2, "SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
603 		 (unsigned long)sys_addr, (unsigned long)input_addr);
604 
605 	return input_addr;
606 }
607 
608 
609 /*
610  * @input_addr is an InputAddr associated with the node represented by mci.
611  * Translate @input_addr to a DramAddr and return the result.
612  */
613 static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
614 {
615 	struct amd64_pvt *pvt;
616 	unsigned node_id, intlv_shift;
617 	u64 bits, dram_addr;
618 	u32 intlv_sel;
619 
620 	/*
621 	 * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
622 	 * shows how to translate a DramAddr to an InputAddr. Here we reverse
623 	 * this procedure. When translating from a DramAddr to an InputAddr, the
624 	 * bits used for node interleaving are discarded.  Here we recover these
625 	 * bits from the IntlvSel field of the DRAM Limit register (section
626 	 * 3.4.4.2) for the node that input_addr is associated with.
627 	 */
628 	pvt = mci->pvt_info;
629 	node_id = pvt->mc_node_id;
630 
631 	BUG_ON(node_id > 7);
632 
633 	intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
634 	if (intlv_shift == 0) {
635 		edac_dbg(1, "    InputAddr 0x%lx translates to DramAddr of same value\n",
636 			 (unsigned long)input_addr);
637 
638 		return input_addr;
639 	}
640 
641 	bits = ((input_addr & GENMASK(12, 35)) << intlv_shift) +
642 		(input_addr & 0xfff);
643 
644 	intlv_sel = dram_intlv_sel(pvt, node_id) & ((1 << intlv_shift) - 1);
645 	dram_addr = bits + (intlv_sel << 12);
646 
647 	edac_dbg(1, "InputAddr 0x%lx translates to DramAddr 0x%lx (%d node interleave bits)\n",
648 		 (unsigned long)input_addr,
649 		 (unsigned long)dram_addr, intlv_shift);
650 
651 	return dram_addr;
652 }
653 
654 /*
655  * @dram_addr is a DramAddr that maps to the node represented by mci. Convert
656  * @dram_addr to a SysAddr.
657  */
658 static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr)
659 {
660 	struct amd64_pvt *pvt = mci->pvt_info;
661 	u64 hole_base, hole_offset, hole_size, base, sys_addr;
662 	int ret = 0;
663 
664 	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
665 				      &hole_size);
666 	if (!ret) {
667 		if ((dram_addr >= hole_base) &&
668 		    (dram_addr < (hole_base + hole_size))) {
669 			sys_addr = dram_addr + hole_offset;
670 
671 			edac_dbg(1, "using DHAR to translate DramAddr 0x%lx to SysAddr 0x%lx\n",
672 				 (unsigned long)dram_addr,
673 				 (unsigned long)sys_addr);
674 
675 			return sys_addr;
676 		}
677 	}
678 
679 	base     = get_dram_base(pvt, pvt->mc_node_id);
680 	sys_addr = dram_addr + base;
681 
682 	/*
683 	 * The sys_addr we have computed up to this point is a 40-bit value
684 	 * because the k8 deals with 40-bit values.  However, the value we are
685 	 * supposed to return is a full 64-bit physical address.  The AMD
686 	 * x86-64 architecture specifies that the most significant implemented
687 	 * address bit through bit 63 of a physical address must be either all
688 	 * 0s or all 1s.  Therefore we sign-extend the 40-bit sys_addr to a
689 	 * 64-bit value below.  See section 3.4.2 of AMD publication 24592:
690 	 * AMD x86-64 Architecture Programmer's Manual Volume 1 Application
691 	 * Programming.
692 	 */
693 	sys_addr |= ~((sys_addr & (1ull << 39)) - 1);
694 
695 	edac_dbg(1, "    Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n",
696 		 pvt->mc_node_id, (unsigned long)dram_addr,
697 		 (unsigned long)sys_addr);
698 
699 	return sys_addr;
700 }
701 
702 /*
703  * @input_addr is an InputAddr associated with the node given by mci. Translate
704  * @input_addr to a SysAddr.
705  */
706 static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
707 					 u64 input_addr)
708 {
709 	return dram_addr_to_sys_addr(mci,
710 				     input_addr_to_dram_addr(mci, input_addr));
711 }
712 
713 /* Map the Error address to a PAGE and PAGE OFFSET. */
714 static inline void error_address_to_page_and_offset(u64 error_address,
715 						    u32 *page, u32 *offset)
716 {
717 	*page = (u32) (error_address >> PAGE_SHIFT);
718 	*offset = ((u32) error_address) & ~PAGE_MASK;
719 }
720 
721 /*
722  * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
723  * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
724  * of a node that detected an ECC memory error.  mci represents the node that
725  * the error address maps to (possibly different from the node that detected
726  * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
727  * error.
728  */
729 static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
730 {
731 	int csrow;
732 
733 	csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
734 
735 	if (csrow == -1)
736 		amd64_mc_err(mci, "Failed to translate InputAddr to csrow for "
737 				  "address 0x%lx\n", (unsigned long)sys_addr);
738 	return csrow;
739 }
740 
741 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
742 
743 /*
744  * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
745  * are ECC capable.
746  */
747 static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt)
748 {
749 	u8 bit;
750 	unsigned long edac_cap = EDAC_FLAG_NONE;
751 
752 	bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F)
753 		? 19
754 		: 17;
755 
756 	if (pvt->dclr0 & BIT(bit))
757 		edac_cap = EDAC_FLAG_SECDED;
758 
759 	return edac_cap;
760 }
761 
762 static void amd64_debug_display_dimm_sizes(struct amd64_pvt *, u8);
763 
764 static void amd64_dump_dramcfg_low(u32 dclr, int chan)
765 {
766 	edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
767 
768 	edac_dbg(1, "  DIMM type: %sbuffered; all DIMMs support ECC: %s\n",
769 		 (dclr & BIT(16)) ?  "un" : "",
770 		 (dclr & BIT(19)) ? "yes" : "no");
771 
772 	edac_dbg(1, "  PAR/ERR parity: %s\n",
773 		 (dclr & BIT(8)) ?  "enabled" : "disabled");
774 
775 	if (boot_cpu_data.x86 == 0x10)
776 		edac_dbg(1, "  DCT 128bit mode width: %s\n",
777 			 (dclr & BIT(11)) ?  "128b" : "64b");
778 
779 	edac_dbg(1, "  x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
780 		 (dclr & BIT(12)) ?  "yes" : "no",
781 		 (dclr & BIT(13)) ?  "yes" : "no",
782 		 (dclr & BIT(14)) ?  "yes" : "no",
783 		 (dclr & BIT(15)) ?  "yes" : "no");
784 }
785 
786 /* Display and decode various NB registers for debug purposes. */
787 static void dump_misc_regs(struct amd64_pvt *pvt)
788 {
789 	edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
790 
791 	edac_dbg(1, "  NB two channel DRAM capable: %s\n",
792 		 (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
793 
794 	edac_dbg(1, "  ECC capable: %s, ChipKill ECC capable: %s\n",
795 		 (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
796 		 (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
797 
798 	amd64_dump_dramcfg_low(pvt->dclr0, 0);
799 
800 	edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare);
801 
802 	edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n",
803 		 pvt->dhar, dhar_base(pvt),
804 		 (boot_cpu_data.x86 == 0xf) ? k8_dhar_offset(pvt)
805 		 : f10_dhar_offset(pvt));
806 
807 	edac_dbg(1, "  DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
808 
809 	amd64_debug_display_dimm_sizes(pvt, 0);
810 
811 	/* everything below this point is Fam10h and above */
812 	if (boot_cpu_data.x86 == 0xf)
813 		return;
814 
815 	amd64_debug_display_dimm_sizes(pvt, 1);
816 
817 	amd64_info("using %s syndromes.\n", ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
818 
819 	/* Only if NOT ganged does dclr1 have valid info */
820 	if (!dct_ganging_enabled(pvt))
821 		amd64_dump_dramcfg_low(pvt->dclr1, 1);
822 }
823 
824 /*
825  * see BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
826  */
827 static void prep_chip_selects(struct amd64_pvt *pvt)
828 {
829 	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
830 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
831 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
832 	} else {
833 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
834 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
835 	}
836 }
837 
838 /*
839  * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
840  */
841 static void read_dct_base_mask(struct amd64_pvt *pvt)
842 {
843 	int cs;
844 
845 	prep_chip_selects(pvt);
846 
847 	for_each_chip_select(cs, 0, pvt) {
848 		int reg0   = DCSB0 + (cs * 4);
849 		int reg1   = DCSB1 + (cs * 4);
850 		u32 *base0 = &pvt->csels[0].csbases[cs];
851 		u32 *base1 = &pvt->csels[1].csbases[cs];
852 
853 		if (!amd64_read_dct_pci_cfg(pvt, reg0, base0))
854 			edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
855 				 cs, *base0, reg0);
856 
857 		if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt))
858 			continue;
859 
860 		if (!amd64_read_dct_pci_cfg(pvt, reg1, base1))
861 			edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
862 				 cs, *base1, reg1);
863 	}
864 
865 	for_each_chip_select_mask(cs, 0, pvt) {
866 		int reg0   = DCSM0 + (cs * 4);
867 		int reg1   = DCSM1 + (cs * 4);
868 		u32 *mask0 = &pvt->csels[0].csmasks[cs];
869 		u32 *mask1 = &pvt->csels[1].csmasks[cs];
870 
871 		if (!amd64_read_dct_pci_cfg(pvt, reg0, mask0))
872 			edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
873 				 cs, *mask0, reg0);
874 
875 		if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt))
876 			continue;
877 
878 		if (!amd64_read_dct_pci_cfg(pvt, reg1, mask1))
879 			edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
880 				 cs, *mask1, reg1);
881 	}
882 }
883 
884 static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt, int cs)
885 {
886 	enum mem_type type;
887 
888 	/* F15h supports only DDR3 */
889 	if (boot_cpu_data.x86 >= 0x15)
890 		type = (pvt->dclr0 & BIT(16)) ?	MEM_DDR3 : MEM_RDDR3;
891 	else if (boot_cpu_data.x86 == 0x10 || pvt->ext_model >= K8_REV_F) {
892 		if (pvt->dchr0 & DDR3_MODE)
893 			type = (pvt->dclr0 & BIT(16)) ?	MEM_DDR3 : MEM_RDDR3;
894 		else
895 			type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
896 	} else {
897 		type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
898 	}
899 
900 	amd64_info("CS%d: %s\n", cs, edac_mem_types[type]);
901 
902 	return type;
903 }
904 
905 /* Get the number of DCT channels the memory controller is using. */
906 static int k8_early_channel_count(struct amd64_pvt *pvt)
907 {
908 	int flag;
909 
910 	if (pvt->ext_model >= K8_REV_F)
911 		/* RevF (NPT) and later */
912 		flag = pvt->dclr0 & WIDTH_128;
913 	else
914 		/* RevE and earlier */
915 		flag = pvt->dclr0 & REVE_WIDTH_128;
916 
917 	/* not used */
918 	pvt->dclr1 = 0;
919 
920 	return (flag) ? 2 : 1;
921 }
922 
923 /* On F10h and later ErrAddr is MC4_ADDR[47:1] */
924 static u64 get_error_address(struct mce *m)
925 {
926 	struct cpuinfo_x86 *c = &boot_cpu_data;
927 	u64 addr;
928 	u8 start_bit = 1;
929 	u8 end_bit   = 47;
930 
931 	if (c->x86 == 0xf) {
932 		start_bit = 3;
933 		end_bit   = 39;
934 	}
935 
936 	addr = m->addr & GENMASK(start_bit, end_bit);
937 
938 	/*
939 	 * Erratum 637 workaround
940 	 */
941 	if (c->x86 == 0x15) {
942 		struct amd64_pvt *pvt;
943 		u64 cc6_base, tmp_addr;
944 		u32 tmp;
945 		u8 mce_nid, intlv_en;
946 
947 		if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
948 			return addr;
949 
950 		mce_nid	= amd_get_nb_id(m->extcpu);
951 		pvt	= mcis[mce_nid]->pvt_info;
952 
953 		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
954 		intlv_en = tmp >> 21 & 0x7;
955 
956 		/* add [47:27] + 3 trailing bits */
957 		cc6_base  = (tmp & GENMASK(0, 20)) << 3;
958 
959 		/* reverse and add DramIntlvEn */
960 		cc6_base |= intlv_en ^ 0x7;
961 
962 		/* pin at [47:24] */
963 		cc6_base <<= 24;
964 
965 		if (!intlv_en)
966 			return cc6_base | (addr & GENMASK(0, 23));
967 
968 		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp);
969 
970 							/* faster log2 */
971 		tmp_addr  = (addr & GENMASK(12, 23)) << __fls(intlv_en + 1);
972 
973 		/* OR DramIntlvSel into bits [14:12] */
974 		tmp_addr |= (tmp & GENMASK(21, 23)) >> 9;
975 
976 		/* add remaining [11:0] bits from original MC4_ADDR */
977 		tmp_addr |= addr & GENMASK(0, 11);
978 
979 		return cc6_base | tmp_addr;
980 	}
981 
982 	return addr;
983 }
984 
985 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
986 {
987 	struct cpuinfo_x86 *c = &boot_cpu_data;
988 	int off = range << 3;
989 
990 	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off,  &pvt->ranges[range].base.lo);
991 	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo);
992 
993 	if (c->x86 == 0xf)
994 		return;
995 
996 	if (!dram_rw(pvt, range))
997 		return;
998 
999 	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off,  &pvt->ranges[range].base.hi);
1000 	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi);
1001 
1002 	/* Factor in CC6 save area by reading dst node's limit reg */
1003 	if (c->x86 == 0x15) {
1004 		struct pci_dev *f1 = NULL;
1005 		u8 nid = dram_dst_node(pvt, range);
1006 		u32 llim;
1007 
1008 		f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1));
1009 		if (WARN_ON(!f1))
1010 			return;
1011 
1012 		amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);
1013 
1014 		pvt->ranges[range].lim.lo &= GENMASK(0, 15);
1015 
1016 					    /* {[39:27],111b} */
1017 		pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
1018 
1019 		pvt->ranges[range].lim.hi &= GENMASK(0, 7);
1020 
1021 					    /* [47:40] */
1022 		pvt->ranges[range].lim.hi |= llim >> 13;
1023 
1024 		pci_dev_put(f1);
1025 	}
1026 }
1027 
1028 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1029 				    u16 syndrome)
1030 {
1031 	struct mem_ctl_info *src_mci;
1032 	struct amd64_pvt *pvt = mci->pvt_info;
1033 	int channel, csrow;
1034 	u32 page, offset;
1035 
1036 	error_address_to_page_and_offset(sys_addr, &page, &offset);
1037 
1038 	/*
1039 	 * Find out which node the error address belongs to. This may be
1040 	 * different from the node that detected the error.
1041 	 */
1042 	src_mci = find_mc_by_sys_addr(mci, sys_addr);
1043 	if (!src_mci) {
1044 		amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
1045 			     (unsigned long)sys_addr);
1046 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1047 				     page, offset, syndrome,
1048 				     -1, -1, -1,
1049 				     "failed to map error addr to a node",
1050 				     "");
1051 		return;
1052 	}
1053 
1054 	/* Now map the sys_addr to a CSROW */
1055 	csrow = sys_addr_to_csrow(src_mci, sys_addr);
1056 	if (csrow < 0) {
1057 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1058 				     page, offset, syndrome,
1059 				     -1, -1, -1,
1060 				     "failed to map error addr to a csrow",
1061 				     "");
1062 		return;
1063 	}
1064 
1065 	/* CHIPKILL enabled */
1066 	if (pvt->nbcfg & NBCFG_CHIPKILL) {
1067 		channel = get_channel_from_ecc_syndrome(mci, syndrome);
1068 		if (channel < 0) {
1069 			/*
1070 			 * Syndrome didn't map, so we don't know which of the
1071 			 * 2 DIMMs is in error. So we need to ID 'both' of them
1072 			 * as suspect.
1073 			 */
1074 			amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
1075 				      "possible error reporting race\n",
1076 				      syndrome);
1077 			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1078 					     page, offset, syndrome,
1079 					     csrow, -1, -1,
1080 					     "unknown syndrome - possible error reporting race",
1081 					     "");
1082 			return;
1083 		}
1084 	} else {
1085 		/*
1086 		 * non-chipkill ecc mode
1087 		 *
1088 		 * The k8 documentation is unclear about how to determine the
1089 		 * channel number when using non-chipkill memory.  This method
1090 		 * was obtained from email communication with someone at AMD.
1091 		 * (Wish the email was placed in this comment - norsk)
1092 		 */
1093 		channel = ((sys_addr & BIT(3)) != 0);
1094 	}
1095 
1096 	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1,
1097 			     page, offset, syndrome,
1098 			     csrow, channel, -1,
1099 			     "", "");
1100 }
1101 
1102 static int ddr2_cs_size(unsigned i, bool dct_width)
1103 {
1104 	unsigned shift = 0;
1105 
1106 	if (i <= 2)
1107 		shift = i;
1108 	else if (!(i & 0x1))
1109 		shift = i >> 1;
1110 	else
1111 		shift = (i + 1) >> 1;
1112 
1113 	return 128 << (shift + !!dct_width);
1114 }
1115 
1116 static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1117 				  unsigned cs_mode)
1118 {
1119 	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1120 
1121 	if (pvt->ext_model >= K8_REV_F) {
1122 		WARN_ON(cs_mode > 11);
1123 		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1124 	}
1125 	else if (pvt->ext_model >= K8_REV_D) {
1126 		unsigned diff;
1127 		WARN_ON(cs_mode > 10);
1128 
1129 		/*
1130 		 * the below calculation, besides trying to win an obfuscated C
1131 		 * contest, maps cs_mode values to DIMM chip select sizes. The
1132 		 * mappings are:
1133 		 *
1134 		 * cs_mode	CS size (mb)
1135 		 * =======	============
1136 		 * 0		32
1137 		 * 1		64
1138 		 * 2		128
1139 		 * 3		128
1140 		 * 4		256
1141 		 * 5		512
1142 		 * 6		256
1143 		 * 7		512
1144 		 * 8		1024
1145 		 * 9		1024
1146 		 * 10		2048
1147 		 *
1148 		 * Basically, it calculates a value with which to shift the
1149 		 * smallest CS size of 32MB.
1150 		 *
1151 		 * ddr[23]_cs_size have a similar purpose.
1152 		 */
1153 		diff = cs_mode/3 + (unsigned)(cs_mode > 5);
1154 
1155 		return 32 << (cs_mode - diff);
1156 	}
1157 	else {
1158 		WARN_ON(cs_mode > 6);
1159 		return 32 << cs_mode;
1160 	}
1161 }
1162 
1163 /*
1164  * Get the number of DCT channels in use.
1165  *
1166  * Return:
1167  *	number of Memory Channels in operation
1168  * Pass back:
1169  *	contents of the DCL0_LOW register
1170  */
1171 static int f1x_early_channel_count(struct amd64_pvt *pvt)
1172 {
1173 	int i, j, channels = 0;
1174 
1175 	/* On F10h, if we are in 128 bit mode, then we are using 2 channels */
1176 	if (boot_cpu_data.x86 == 0x10 && (pvt->dclr0 & WIDTH_128))
1177 		return 2;
1178 
1179 	/*
1180 	 * Need to check if in unganged mode: In such, there are 2 channels,
1181 	 * but they are not in 128 bit mode and thus the above 'dclr0' status
1182 	 * bit will be OFF.
1183 	 *
1184 	 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
1185 	 * their CSEnable bit on. If so, then SINGLE DIMM case.
1186 	 */
1187 	edac_dbg(0, "Data width is not 128 bits - need more decoding\n");
1188 
1189 	/*
1190 	 * Check DRAM Bank Address Mapping values for each DIMM to see if there
1191 	 * is more than just one DIMM present in unganged mode. Need to check
1192 	 * both controllers since DIMMs can be placed in either one.
1193 	 */
1194 	for (i = 0; i < 2; i++) {
1195 		u32 dbam = (i ? pvt->dbam1 : pvt->dbam0);
1196 
1197 		for (j = 0; j < 4; j++) {
1198 			if (DBAM_DIMM(j, dbam) > 0) {
1199 				channels++;
1200 				break;
1201 			}
1202 		}
1203 	}
1204 
1205 	if (channels > 2)
1206 		channels = 2;
1207 
1208 	amd64_info("MCT channel count: %d\n", channels);
1209 
1210 	return channels;
1211 }
1212 
1213 static int ddr3_cs_size(unsigned i, bool dct_width)
1214 {
1215 	unsigned shift = 0;
1216 	int cs_size = 0;
1217 
1218 	if (i == 0 || i == 3 || i == 4)
1219 		cs_size = -1;
1220 	else if (i <= 2)
1221 		shift = i;
1222 	else if (i == 12)
1223 		shift = 7;
1224 	else if (!(i & 0x1))
1225 		shift = i >> 1;
1226 	else
1227 		shift = (i + 1) >> 1;
1228 
1229 	if (cs_size != -1)
1230 		cs_size = (128 * (1 << !!dct_width)) << shift;
1231 
1232 	return cs_size;
1233 }
1234 
1235 static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1236 				   unsigned cs_mode)
1237 {
1238 	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1239 
1240 	WARN_ON(cs_mode > 11);
1241 
1242 	if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE)
1243 		return ddr3_cs_size(cs_mode, dclr & WIDTH_128);
1244 	else
1245 		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1246 }
1247 
1248 /*
1249  * F15h supports only 64bit DCT interfaces
1250  */
1251 static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1252 				   unsigned cs_mode)
1253 {
1254 	WARN_ON(cs_mode > 12);
1255 
1256 	return ddr3_cs_size(cs_mode, false);
1257 }
1258 
1259 static void read_dram_ctl_register(struct amd64_pvt *pvt)
1260 {
1261 
1262 	if (boot_cpu_data.x86 == 0xf)
1263 		return;
1264 
1265 	if (!amd64_read_dct_pci_cfg(pvt, DCT_SEL_LO, &pvt->dct_sel_lo)) {
1266 		edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n",
1267 			 pvt->dct_sel_lo, dct_sel_baseaddr(pvt));
1268 
1269 		edac_dbg(0, "  DCTs operate in %s mode\n",
1270 			 (dct_ganging_enabled(pvt) ? "ganged" : "unganged"));
1271 
1272 		if (!dct_ganging_enabled(pvt))
1273 			edac_dbg(0, "  Address range split per DCT: %s\n",
1274 				 (dct_high_range_enabled(pvt) ? "yes" : "no"));
1275 
1276 		edac_dbg(0, "  data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
1277 			 (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
1278 			 (dct_memory_cleared(pvt) ? "yes" : "no"));
1279 
1280 		edac_dbg(0, "  channel interleave: %s, "
1281 			 "interleave bits selector: 0x%x\n",
1282 			 (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
1283 			 dct_sel_interleave_addr(pvt));
1284 	}
1285 
1286 	amd64_read_dct_pci_cfg(pvt, DCT_SEL_HI, &pvt->dct_sel_hi);
1287 }
1288 
1289 /*
1290  * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory
1291  * Interleaving Modes.
1292  */
1293 static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1294 				bool hi_range_sel, u8 intlv_en)
1295 {
1296 	u8 dct_sel_high = (pvt->dct_sel_lo >> 1) & 1;
1297 
1298 	if (dct_ganging_enabled(pvt))
1299 		return 0;
1300 
1301 	if (hi_range_sel)
1302 		return dct_sel_high;
1303 
1304 	/*
1305 	 * see F2x110[DctSelIntLvAddr] - channel interleave mode
1306 	 */
1307 	if (dct_interleave_enabled(pvt)) {
1308 		u8 intlv_addr = dct_sel_interleave_addr(pvt);
1309 
1310 		/* return DCT select function: 0=DCT0, 1=DCT1 */
1311 		if (!intlv_addr)
1312 			return sys_addr >> 6 & 1;
1313 
1314 		if (intlv_addr & 0x2) {
1315 			u8 shift = intlv_addr & 0x1 ? 9 : 6;
1316 			u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
1317 
1318 			return ((sys_addr >> shift) & 1) ^ temp;
1319 		}
1320 
1321 		return (sys_addr >> (12 + hweight8(intlv_en))) & 1;
1322 	}
1323 
1324 	if (dct_high_range_enabled(pvt))
1325 		return ~dct_sel_high & 1;
1326 
1327 	return 0;
1328 }
1329 
1330 /* Convert the sys_addr to the normalized DCT address */
1331 static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range,
1332 				 u64 sys_addr, bool hi_rng,
1333 				 u32 dct_sel_base_addr)
1334 {
1335 	u64 chan_off;
1336 	u64 dram_base		= get_dram_base(pvt, range);
1337 	u64 hole_off		= f10_dhar_offset(pvt);
1338 	u64 dct_sel_base_off	= (pvt->dct_sel_hi & 0xFFFFFC00) << 16;
1339 
1340 	if (hi_rng) {
1341 		/*
1342 		 * if
1343 		 * base address of high range is below 4Gb
1344 		 * (bits [47:27] at [31:11])
1345 		 * DRAM address space on this DCT is hoisted above 4Gb	&&
1346 		 * sys_addr > 4Gb
1347 		 *
1348 		 *	remove hole offset from sys_addr
1349 		 * else
1350 		 *	remove high range offset from sys_addr
1351 		 */
1352 		if ((!(dct_sel_base_addr >> 16) ||
1353 		     dct_sel_base_addr < dhar_base(pvt)) &&
1354 		    dhar_valid(pvt) &&
1355 		    (sys_addr >= BIT_64(32)))
1356 			chan_off = hole_off;
1357 		else
1358 			chan_off = dct_sel_base_off;
1359 	} else {
1360 		/*
1361 		 * if
1362 		 * we have a valid hole		&&
1363 		 * sys_addr > 4Gb
1364 		 *
1365 		 *	remove hole
1366 		 * else
1367 		 *	remove dram base to normalize to DCT address
1368 		 */
1369 		if (dhar_valid(pvt) && (sys_addr >= BIT_64(32)))
1370 			chan_off = hole_off;
1371 		else
1372 			chan_off = dram_base;
1373 	}
1374 
1375 	return (sys_addr & GENMASK(6,47)) - (chan_off & GENMASK(23,47));
1376 }
1377 
1378 /*
1379  * checks if the csrow passed in is marked as SPARED, if so returns the new
1380  * spare row
1381  */
1382 static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
1383 {
1384 	int tmp_cs;
1385 
1386 	if (online_spare_swap_done(pvt, dct) &&
1387 	    csrow == online_spare_bad_dramcs(pvt, dct)) {
1388 
1389 		for_each_chip_select(tmp_cs, dct, pvt) {
1390 			if (chip_select_base(tmp_cs, dct, pvt) & 0x2) {
1391 				csrow = tmp_cs;
1392 				break;
1393 			}
1394 		}
1395 	}
1396 	return csrow;
1397 }
1398 
1399 /*
1400  * Iterate over the DRAM DCT "base" and "mask" registers looking for a
1401  * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
1402  *
1403  * Return:
1404  *	-EINVAL:  NOT FOUND
1405  *	0..csrow = Chip-Select Row
1406  */
1407 static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct)
1408 {
1409 	struct mem_ctl_info *mci;
1410 	struct amd64_pvt *pvt;
1411 	u64 cs_base, cs_mask;
1412 	int cs_found = -EINVAL;
1413 	int csrow;
1414 
1415 	mci = mcis[nid];
1416 	if (!mci)
1417 		return cs_found;
1418 
1419 	pvt = mci->pvt_info;
1420 
1421 	edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr, dct);
1422 
1423 	for_each_chip_select(csrow, dct, pvt) {
1424 		if (!csrow_enabled(csrow, dct, pvt))
1425 			continue;
1426 
1427 		get_cs_base_and_mask(pvt, csrow, dct, &cs_base, &cs_mask);
1428 
1429 		edac_dbg(1, "    CSROW=%d CSBase=0x%llx CSMask=0x%llx\n",
1430 			 csrow, cs_base, cs_mask);
1431 
1432 		cs_mask = ~cs_mask;
1433 
1434 		edac_dbg(1, "    (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n",
1435 			 (in_addr & cs_mask), (cs_base & cs_mask));
1436 
1437 		if ((in_addr & cs_mask) == (cs_base & cs_mask)) {
1438 			cs_found = f10_process_possible_spare(pvt, dct, csrow);
1439 
1440 			edac_dbg(1, " MATCH csrow=%d\n", cs_found);
1441 			break;
1442 		}
1443 	}
1444 	return cs_found;
1445 }
1446 
1447 /*
1448  * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is
1449  * swapped with a region located at the bottom of memory so that the GPU can use
1450  * the interleaved region and thus two channels.
1451  */
1452 static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
1453 {
1454 	u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr;
1455 
1456 	if (boot_cpu_data.x86 == 0x10) {
1457 		/* only revC3 and revE have that feature */
1458 		if (boot_cpu_data.x86_model < 4 ||
1459 		    (boot_cpu_data.x86_model < 0xa &&
1460 		     boot_cpu_data.x86_mask < 3))
1461 			return sys_addr;
1462 	}
1463 
1464 	amd64_read_dct_pci_cfg(pvt, SWAP_INTLV_REG, &swap_reg);
1465 
1466 	if (!(swap_reg & 0x1))
1467 		return sys_addr;
1468 
1469 	swap_base	= (swap_reg >> 3) & 0x7f;
1470 	swap_limit	= (swap_reg >> 11) & 0x7f;
1471 	rgn_size	= (swap_reg >> 20) & 0x7f;
1472 	tmp_addr	= sys_addr >> 27;
1473 
1474 	if (!(sys_addr >> 34) &&
1475 	    (((tmp_addr >= swap_base) &&
1476 	     (tmp_addr <= swap_limit)) ||
1477 	     (tmp_addr < rgn_size)))
1478 		return sys_addr ^ (u64)swap_base << 27;
1479 
1480 	return sys_addr;
1481 }
1482 
1483 /* For a given @dram_range, check if @sys_addr falls within it. */
1484 static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1485 				  u64 sys_addr, int *nid, int *chan_sel)
1486 {
1487 	int cs_found = -EINVAL;
1488 	u64 chan_addr;
1489 	u32 dct_sel_base;
1490 	u8 channel;
1491 	bool high_range = false;
1492 
1493 	u8 node_id    = dram_dst_node(pvt, range);
1494 	u8 intlv_en   = dram_intlv_en(pvt, range);
1495 	u32 intlv_sel = dram_intlv_sel(pvt, range);
1496 
1497 	edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1498 		 range, sys_addr, get_dram_limit(pvt, range));
1499 
1500 	if (dhar_valid(pvt) &&
1501 	    dhar_base(pvt) <= sys_addr &&
1502 	    sys_addr < BIT_64(32)) {
1503 		amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1504 			    sys_addr);
1505 		return -EINVAL;
1506 	}
1507 
1508 	if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en)))
1509 		return -EINVAL;
1510 
1511 	sys_addr = f1x_swap_interleaved_region(pvt, sys_addr);
1512 
1513 	dct_sel_base = dct_sel_baseaddr(pvt);
1514 
1515 	/*
1516 	 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
1517 	 * select between DCT0 and DCT1.
1518 	 */
1519 	if (dct_high_range_enabled(pvt) &&
1520 	   !dct_ganging_enabled(pvt) &&
1521 	   ((sys_addr >> 27) >= (dct_sel_base >> 11)))
1522 		high_range = true;
1523 
1524 	channel = f1x_determine_channel(pvt, sys_addr, high_range, intlv_en);
1525 
1526 	chan_addr = f1x_get_norm_dct_addr(pvt, range, sys_addr,
1527 					  high_range, dct_sel_base);
1528 
1529 	/* Remove node interleaving, see F1x120 */
1530 	if (intlv_en)
1531 		chan_addr = ((chan_addr >> (12 + hweight8(intlv_en))) << 12) |
1532 			    (chan_addr & 0xfff);
1533 
1534 	/* remove channel interleave */
1535 	if (dct_interleave_enabled(pvt) &&
1536 	   !dct_high_range_enabled(pvt) &&
1537 	   !dct_ganging_enabled(pvt)) {
1538 
1539 		if (dct_sel_interleave_addr(pvt) != 1) {
1540 			if (dct_sel_interleave_addr(pvt) == 0x3)
1541 				/* hash 9 */
1542 				chan_addr = ((chan_addr >> 10) << 9) |
1543 					     (chan_addr & 0x1ff);
1544 			else
1545 				/* A[6] or hash 6 */
1546 				chan_addr = ((chan_addr >> 7) << 6) |
1547 					     (chan_addr & 0x3f);
1548 		} else
1549 			/* A[12] */
1550 			chan_addr = ((chan_addr >> 13) << 12) |
1551 				     (chan_addr & 0xfff);
1552 	}
1553 
1554 	edac_dbg(1, "   Normalized DCT addr: 0x%llx\n", chan_addr);
1555 
1556 	cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
1557 
1558 	if (cs_found >= 0) {
1559 		*nid = node_id;
1560 		*chan_sel = channel;
1561 	}
1562 	return cs_found;
1563 }
1564 
1565 static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
1566 				       int *node, int *chan_sel)
1567 {
1568 	int cs_found = -EINVAL;
1569 	unsigned range;
1570 
1571 	for (range = 0; range < DRAM_RANGES; range++) {
1572 
1573 		if (!dram_rw(pvt, range))
1574 			continue;
1575 
1576 		if ((get_dram_base(pvt, range)  <= sys_addr) &&
1577 		    (get_dram_limit(pvt, range) >= sys_addr)) {
1578 
1579 			cs_found = f1x_match_to_this_node(pvt, range,
1580 							  sys_addr, node,
1581 							  chan_sel);
1582 			if (cs_found >= 0)
1583 				break;
1584 		}
1585 	}
1586 	return cs_found;
1587 }
1588 
1589 /*
1590  * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
1591  * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
1592  *
1593  * The @sys_addr is usually an error address received from the hardware
1594  * (MCX_ADDR).
1595  */
1596 static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1597 				     u16 syndrome)
1598 {
1599 	struct amd64_pvt *pvt = mci->pvt_info;
1600 	u32 page, offset;
1601 	int nid, csrow, chan = 0;
1602 
1603 	error_address_to_page_and_offset(sys_addr, &page, &offset);
1604 
1605 	csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
1606 
1607 	if (csrow < 0) {
1608 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1609 				     page, offset, syndrome,
1610 				     -1, -1, -1,
1611 				     "failed to map error addr to a csrow",
1612 				     "");
1613 		return;
1614 	}
1615 
1616 	/*
1617 	 * We need the syndromes for channel detection only when we're
1618 	 * ganged. Otherwise @chan should already contain the channel at
1619 	 * this point.
1620 	 */
1621 	if (dct_ganging_enabled(pvt))
1622 		chan = get_channel_from_ecc_syndrome(mci, syndrome);
1623 
1624 	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1625 			     page, offset, syndrome,
1626 			     csrow, chan, -1,
1627 			     "", "");
1628 }
1629 
1630 /*
1631  * debug routine to display the memory sizes of all logical DIMMs and its
1632  * CSROWs
1633  */
1634 static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
1635 {
1636 	int dimm, size0, size1, factor = 0;
1637 	u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
1638 	u32 dbam  = ctrl ? pvt->dbam1 : pvt->dbam0;
1639 
1640 	if (boot_cpu_data.x86 == 0xf) {
1641 		if (pvt->dclr0 & WIDTH_128)
1642 			factor = 1;
1643 
1644 		/* K8 families < revF not supported yet */
1645 	       if (pvt->ext_model < K8_REV_F)
1646 			return;
1647 	       else
1648 		       WARN_ON(ctrl != 0);
1649 	}
1650 
1651 	dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 : pvt->dbam0;
1652 	dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->csels[1].csbases
1653 						   : pvt->csels[0].csbases;
1654 
1655 	edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
1656 		 ctrl, dbam);
1657 
1658 	edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);
1659 
1660 	/* Dump memory sizes for DIMM and its CSROWs */
1661 	for (dimm = 0; dimm < 4; dimm++) {
1662 
1663 		size0 = 0;
1664 		if (dcsb[dimm*2] & DCSB_CS_ENABLE)
1665 			size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
1666 						     DBAM_DIMM(dimm, dbam));
1667 
1668 		size1 = 0;
1669 		if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
1670 			size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
1671 						     DBAM_DIMM(dimm, dbam));
1672 
1673 		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
1674 				dimm * 2,     size0 << factor,
1675 				dimm * 2 + 1, size1 << factor);
1676 	}
1677 }
1678 
1679 static struct amd64_family_type amd64_family_types[] = {
1680 	[K8_CPUS] = {
1681 		.ctl_name = "K8",
1682 		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
1683 		.f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
1684 		.ops = {
1685 			.early_channel_count	= k8_early_channel_count,
1686 			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
1687 			.dbam_to_cs		= k8_dbam_to_chip_select,
1688 			.read_dct_pci_cfg	= k8_read_dct_pci_cfg,
1689 		}
1690 	},
1691 	[F10_CPUS] = {
1692 		.ctl_name = "F10h",
1693 		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
1694 		.f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
1695 		.ops = {
1696 			.early_channel_count	= f1x_early_channel_count,
1697 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1698 			.dbam_to_cs		= f10_dbam_to_chip_select,
1699 			.read_dct_pci_cfg	= f10_read_dct_pci_cfg,
1700 		}
1701 	},
1702 	[F15_CPUS] = {
1703 		.ctl_name = "F15h",
1704 		.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
1705 		.f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3,
1706 		.ops = {
1707 			.early_channel_count	= f1x_early_channel_count,
1708 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1709 			.dbam_to_cs		= f15_dbam_to_chip_select,
1710 			.read_dct_pci_cfg	= f15_read_dct_pci_cfg,
1711 		}
1712 	},
1713 };
1714 
1715 static struct pci_dev *pci_get_related_function(unsigned int vendor,
1716 						unsigned int device,
1717 						struct pci_dev *related)
1718 {
1719 	struct pci_dev *dev = NULL;
1720 
1721 	dev = pci_get_device(vendor, device, dev);
1722 	while (dev) {
1723 		if ((dev->bus->number == related->bus->number) &&
1724 		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
1725 			break;
1726 		dev = pci_get_device(vendor, device, dev);
1727 	}
1728 
1729 	return dev;
1730 }
1731 
1732 /*
1733  * These are tables of eigenvectors (one per line) which can be used for the
1734  * construction of the syndrome tables. The modified syndrome search algorithm
1735  * uses those to find the symbol in error and thus the DIMM.
1736  *
1737  * Algorithm courtesy of Ross LaFetra from AMD.
1738  */
1739 static u16 x4_vectors[] = {
1740 	0x2f57, 0x1afe, 0x66cc, 0xdd88,
1741 	0x11eb, 0x3396, 0x7f4c, 0xeac8,
1742 	0x0001, 0x0002, 0x0004, 0x0008,
1743 	0x1013, 0x3032, 0x4044, 0x8088,
1744 	0x106b, 0x30d6, 0x70fc, 0xe0a8,
1745 	0x4857, 0xc4fe, 0x13cc, 0x3288,
1746 	0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
1747 	0x1f39, 0x251e, 0xbd6c, 0x6bd8,
1748 	0x15c1, 0x2a42, 0x89ac, 0x4758,
1749 	0x2b03, 0x1602, 0x4f0c, 0xca08,
1750 	0x1f07, 0x3a0e, 0x6b04, 0xbd08,
1751 	0x8ba7, 0x465e, 0x244c, 0x1cc8,
1752 	0x2b87, 0x164e, 0x642c, 0xdc18,
1753 	0x40b9, 0x80de, 0x1094, 0x20e8,
1754 	0x27db, 0x1eb6, 0x9dac, 0x7b58,
1755 	0x11c1, 0x2242, 0x84ac, 0x4c58,
1756 	0x1be5, 0x2d7a, 0x5e34, 0xa718,
1757 	0x4b39, 0x8d1e, 0x14b4, 0x28d8,
1758 	0x4c97, 0xc87e, 0x11fc, 0x33a8,
1759 	0x8e97, 0x497e, 0x2ffc, 0x1aa8,
1760 	0x16b3, 0x3d62, 0x4f34, 0x8518,
1761 	0x1e2f, 0x391a, 0x5cac, 0xf858,
1762 	0x1d9f, 0x3b7a, 0x572c, 0xfe18,
1763 	0x15f5, 0x2a5a, 0x5264, 0xa3b8,
1764 	0x1dbb, 0x3b66, 0x715c, 0xe3f8,
1765 	0x4397, 0xc27e, 0x17fc, 0x3ea8,
1766 	0x1617, 0x3d3e, 0x6464, 0xb8b8,
1767 	0x23ff, 0x12aa, 0xab6c, 0x56d8,
1768 	0x2dfb, 0x1ba6, 0x913c, 0x7328,
1769 	0x185d, 0x2ca6, 0x7914, 0x9e28,
1770 	0x171b, 0x3e36, 0x7d7c, 0xebe8,
1771 	0x4199, 0x82ee, 0x19f4, 0x2e58,
1772 	0x4807, 0xc40e, 0x130c, 0x3208,
1773 	0x1905, 0x2e0a, 0x5804, 0xac08,
1774 	0x213f, 0x132a, 0xadfc, 0x5ba8,
1775 	0x19a9, 0x2efe, 0xb5cc, 0x6f88,
1776 };
1777 
1778 static u16 x8_vectors[] = {
1779 	0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
1780 	0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
1781 	0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
1782 	0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80,
1783 	0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780,
1784 	0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080,
1785 	0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080,
1786 	0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080,
1787 	0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80,
1788 	0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580,
1789 	0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880,
1790 	0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280,
1791 	0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180,
1792 	0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580,
1793 	0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280,
1794 	0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180,
1795 	0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080,
1796 	0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
1797 	0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
1798 };
1799 
1800 static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs,
1801 			   unsigned v_dim)
1802 {
1803 	unsigned int i, err_sym;
1804 
1805 	for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) {
1806 		u16 s = syndrome;
1807 		unsigned v_idx =  err_sym * v_dim;
1808 		unsigned v_end = (err_sym + 1) * v_dim;
1809 
1810 		/* walk over all 16 bits of the syndrome */
1811 		for (i = 1; i < (1U << 16); i <<= 1) {
1812 
1813 			/* if bit is set in that eigenvector... */
1814 			if (v_idx < v_end && vectors[v_idx] & i) {
1815 				u16 ev_comp = vectors[v_idx++];
1816 
1817 				/* ... and bit set in the modified syndrome, */
1818 				if (s & i) {
1819 					/* remove it. */
1820 					s ^= ev_comp;
1821 
1822 					if (!s)
1823 						return err_sym;
1824 				}
1825 
1826 			} else if (s & i)
1827 				/* can't get to zero, move to next symbol */
1828 				break;
1829 		}
1830 	}
1831 
1832 	edac_dbg(0, "syndrome(%x) not found\n", syndrome);
1833 	return -1;
1834 }
1835 
1836 static int map_err_sym_to_channel(int err_sym, int sym_size)
1837 {
1838 	if (sym_size == 4)
1839 		switch (err_sym) {
1840 		case 0x20:
1841 		case 0x21:
1842 			return 0;
1843 			break;
1844 		case 0x22:
1845 		case 0x23:
1846 			return 1;
1847 			break;
1848 		default:
1849 			return err_sym >> 4;
1850 			break;
1851 		}
1852 	/* x8 symbols */
1853 	else
1854 		switch (err_sym) {
1855 		/* imaginary bits not in a DIMM */
1856 		case 0x10:
1857 			WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n",
1858 					  err_sym);
1859 			return -1;
1860 			break;
1861 
1862 		case 0x11:
1863 			return 0;
1864 			break;
1865 		case 0x12:
1866 			return 1;
1867 			break;
1868 		default:
1869 			return err_sym >> 3;
1870 			break;
1871 		}
1872 	return -1;
1873 }
1874 
1875 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
1876 {
1877 	struct amd64_pvt *pvt = mci->pvt_info;
1878 	int err_sym = -1;
1879 
1880 	if (pvt->ecc_sym_sz == 8)
1881 		err_sym = decode_syndrome(syndrome, x8_vectors,
1882 					  ARRAY_SIZE(x8_vectors),
1883 					  pvt->ecc_sym_sz);
1884 	else if (pvt->ecc_sym_sz == 4)
1885 		err_sym = decode_syndrome(syndrome, x4_vectors,
1886 					  ARRAY_SIZE(x4_vectors),
1887 					  pvt->ecc_sym_sz);
1888 	else {
1889 		amd64_warn("Illegal syndrome type: %u\n", pvt->ecc_sym_sz);
1890 		return err_sym;
1891 	}
1892 
1893 	return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
1894 }
1895 
1896 /*
1897  * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
1898  * ADDRESS and process.
1899  */
1900 static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
1901 {
1902 	struct amd64_pvt *pvt = mci->pvt_info;
1903 	u64 sys_addr;
1904 	u16 syndrome;
1905 
1906 	/* Ensure that the Error Address is VALID */
1907 	if (!(m->status & MCI_STATUS_ADDRV)) {
1908 		amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
1909 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1910 				     0, 0, 0,
1911 				     -1, -1, -1,
1912 				     "HW has no ERROR_ADDRESS available",
1913 				     "");
1914 		return;
1915 	}
1916 
1917 	sys_addr = get_error_address(m);
1918 	syndrome = extract_syndrome(m->status);
1919 
1920 	amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
1921 
1922 	pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome);
1923 }
1924 
1925 /* Handle any Un-correctable Errors (UEs) */
1926 static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
1927 {
1928 	struct mem_ctl_info *log_mci, *src_mci = NULL;
1929 	int csrow;
1930 	u64 sys_addr;
1931 	u32 page, offset;
1932 
1933 	log_mci = mci;
1934 
1935 	if (!(m->status & MCI_STATUS_ADDRV)) {
1936 		amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
1937 		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
1938 				     0, 0, 0,
1939 				     -1, -1, -1,
1940 				     "HW has no ERROR_ADDRESS available",
1941 				     "");
1942 		return;
1943 	}
1944 
1945 	sys_addr = get_error_address(m);
1946 	error_address_to_page_and_offset(sys_addr, &page, &offset);
1947 
1948 	/*
1949 	 * Find out which node the error address belongs to. This may be
1950 	 * different from the node that detected the error.
1951 	 */
1952 	src_mci = find_mc_by_sys_addr(mci, sys_addr);
1953 	if (!src_mci) {
1954 		amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
1955 				  (unsigned long)sys_addr);
1956 		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
1957 				     page, offset, 0,
1958 				     -1, -1, -1,
1959 				     "ERROR ADDRESS NOT mapped to a MC",
1960 				     "");
1961 		return;
1962 	}
1963 
1964 	log_mci = src_mci;
1965 
1966 	csrow = sys_addr_to_csrow(log_mci, sys_addr);
1967 	if (csrow < 0) {
1968 		amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
1969 				  (unsigned long)sys_addr);
1970 		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
1971 				     page, offset, 0,
1972 				     -1, -1, -1,
1973 				     "ERROR ADDRESS NOT mapped to CS",
1974 				     "");
1975 	} else {
1976 		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
1977 				     page, offset, 0,
1978 				     csrow, -1, -1,
1979 				     "", "");
1980 	}
1981 }
1982 
1983 static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
1984 					    struct mce *m)
1985 {
1986 	u16 ec = EC(m->status);
1987 	u8 xec = XEC(m->status, 0x1f);
1988 	u8 ecc_type = (m->status >> 45) & 0x3;
1989 
1990 	/* Bail early out if this was an 'observed' error */
1991 	if (PP(ec) == NBSL_PP_OBS)
1992 		return;
1993 
1994 	/* Do only ECC errors */
1995 	if (xec && xec != F10_NBSL_EXT_ERR_ECC)
1996 		return;
1997 
1998 	if (ecc_type == 2)
1999 		amd64_handle_ce(mci, m);
2000 	else if (ecc_type == 1)
2001 		amd64_handle_ue(mci, m);
2002 }
2003 
2004 void amd64_decode_bus_error(int node_id, struct mce *m)
2005 {
2006 	__amd64_decode_bus_error(mcis[node_id], m);
2007 }
2008 
2009 /*
2010  * Use pvt->F2 which contains the F2 CPU PCI device to get the related
2011  * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
2012  */
2013 static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
2014 {
2015 	/* Reserve the ADDRESS MAP Device */
2016 	pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
2017 	if (!pvt->F1) {
2018 		amd64_err("error address map device not found: "
2019 			  "vendor %x device 0x%x (broken BIOS?)\n",
2020 			  PCI_VENDOR_ID_AMD, f1_id);
2021 		return -ENODEV;
2022 	}
2023 
2024 	/* Reserve the MISC Device */
2025 	pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
2026 	if (!pvt->F3) {
2027 		pci_dev_put(pvt->F1);
2028 		pvt->F1 = NULL;
2029 
2030 		amd64_err("error F3 device not found: "
2031 			  "vendor %x device 0x%x (broken BIOS?)\n",
2032 			  PCI_VENDOR_ID_AMD, f3_id);
2033 
2034 		return -ENODEV;
2035 	}
2036 	edac_dbg(1, "F1: %s\n", pci_name(pvt->F1));
2037 	edac_dbg(1, "F2: %s\n", pci_name(pvt->F2));
2038 	edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
2039 
2040 	return 0;
2041 }
2042 
2043 static void free_mc_sibling_devs(struct amd64_pvt *pvt)
2044 {
2045 	pci_dev_put(pvt->F1);
2046 	pci_dev_put(pvt->F3);
2047 }
2048 
2049 /*
2050  * Retrieve the hardware registers of the memory controller (this includes the
2051  * 'Address Map' and 'Misc' device regs)
2052  */
2053 static void read_mc_regs(struct amd64_pvt *pvt)
2054 {
2055 	struct cpuinfo_x86 *c = &boot_cpu_data;
2056 	u64 msr_val;
2057 	u32 tmp;
2058 	unsigned range;
2059 
2060 	/*
2061 	 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
2062 	 * those are Read-As-Zero
2063 	 */
2064 	rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
2065 	edac_dbg(0, "  TOP_MEM:  0x%016llx\n", pvt->top_mem);
2066 
2067 	/* check first whether TOP_MEM2 is enabled */
2068 	rdmsrl(MSR_K8_SYSCFG, msr_val);
2069 	if (msr_val & (1U << 21)) {
2070 		rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
2071 		edac_dbg(0, "  TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
2072 	} else
2073 		edac_dbg(0, "  TOP_MEM2 disabled\n");
2074 
2075 	amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
2076 
2077 	read_dram_ctl_register(pvt);
2078 
2079 	for (range = 0; range < DRAM_RANGES; range++) {
2080 		u8 rw;
2081 
2082 		/* read settings for this DRAM range */
2083 		read_dram_base_limit_regs(pvt, range);
2084 
2085 		rw = dram_rw(pvt, range);
2086 		if (!rw)
2087 			continue;
2088 
2089 		edac_dbg(1, "  DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n",
2090 			 range,
2091 			 get_dram_base(pvt, range),
2092 			 get_dram_limit(pvt, range));
2093 
2094 		edac_dbg(1, "   IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n",
2095 			 dram_intlv_en(pvt, range) ? "Enabled" : "Disabled",
2096 			 (rw & 0x1) ? "R" : "-",
2097 			 (rw & 0x2) ? "W" : "-",
2098 			 dram_intlv_sel(pvt, range),
2099 			 dram_dst_node(pvt, range));
2100 	}
2101 
2102 	read_dct_base_mask(pvt);
2103 
2104 	amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar);
2105 	amd64_read_dct_pci_cfg(pvt, DBAM0, &pvt->dbam0);
2106 
2107 	amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare);
2108 
2109 	amd64_read_dct_pci_cfg(pvt, DCLR0, &pvt->dclr0);
2110 	amd64_read_dct_pci_cfg(pvt, DCHR0, &pvt->dchr0);
2111 
2112 	if (!dct_ganging_enabled(pvt)) {
2113 		amd64_read_dct_pci_cfg(pvt, DCLR1, &pvt->dclr1);
2114 		amd64_read_dct_pci_cfg(pvt, DCHR1, &pvt->dchr1);
2115 	}
2116 
2117 	pvt->ecc_sym_sz = 4;
2118 
2119 	if (c->x86 >= 0x10) {
2120 		amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
2121 		amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1);
2122 
2123 		/* F10h, revD and later can do x8 ECC too */
2124 		if ((c->x86 > 0x10 || c->x86_model > 7) && tmp & BIT(25))
2125 			pvt->ecc_sym_sz = 8;
2126 	}
2127 	dump_misc_regs(pvt);
2128 }
2129 
2130 /*
2131  * NOTE: CPU Revision Dependent code
2132  *
2133  * Input:
2134  *	@csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1)
2135  *	k8 private pointer to -->
2136  *			DRAM Bank Address mapping register
2137  *			node_id
2138  *			DCL register where dual_channel_active is
2139  *
2140  * The DBAM register consists of 4 sets of 4 bits each definitions:
2141  *
2142  * Bits:	CSROWs
2143  * 0-3		CSROWs 0 and 1
2144  * 4-7		CSROWs 2 and 3
2145  * 8-11		CSROWs 4 and 5
2146  * 12-15	CSROWs 6 and 7
2147  *
2148  * Values range from: 0 to 15
2149  * The meaning of the values depends on CPU revision and dual-channel state,
2150  * see relevant BKDG more info.
2151  *
2152  * The memory controller provides for total of only 8 CSROWs in its current
2153  * architecture. Each "pair" of CSROWs normally represents just one DIMM in
2154  * single channel or two (2) DIMMs in dual channel mode.
2155  *
2156  * The following code logic collapses the various tables for CSROW based on CPU
2157  * revision.
2158  *
2159  * Returns:
2160  *	The number of PAGE_SIZE pages on the specified CSROW number it
2161  *	encompasses
2162  *
2163  */
2164 static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
2165 {
2166 	u32 cs_mode, nr_pages;
2167 	u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
2168 
2169 	/*
2170 	 * The math on this doesn't look right on the surface because x/2*4 can
2171 	 * be simplified to x*2 but this expression makes use of the fact that
2172 	 * it is integral math where 1/2=0. This intermediate value becomes the
2173 	 * number of bits to shift the DBAM register to extract the proper CSROW
2174 	 * field.
2175 	 */
2176 	cs_mode =  (dbam >> ((csrow_nr / 2) * 4)) & 0xF;
2177 
2178 	nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
2179 
2180 	edac_dbg(0, "  (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode);
2181 	edac_dbg(0, "    nr_pages/channel= %u  channel-count = %d\n",
2182 		 nr_pages, pvt->channel_count);
2183 
2184 	return nr_pages;
2185 }
2186 
2187 /*
2188  * Initialize the array of csrow attribute instances, based on the values
2189  * from pci config hardware registers.
2190  */
2191 static int init_csrows(struct mem_ctl_info *mci)
2192 {
2193 	struct csrow_info *csrow;
2194 	struct dimm_info *dimm;
2195 	struct amd64_pvt *pvt = mci->pvt_info;
2196 	u64 base, mask;
2197 	u32 val;
2198 	int i, j, empty = 1;
2199 	enum mem_type mtype;
2200 	enum edac_type edac_mode;
2201 	int nr_pages = 0;
2202 
2203 	amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
2204 
2205 	pvt->nbcfg = val;
2206 
2207 	edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2208 		 pvt->mc_node_id, val,
2209 		 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
2210 
2211 	for_each_chip_select(i, 0, pvt) {
2212 		csrow = mci->csrows[i];
2213 
2214 		if (!csrow_enabled(i, 0, pvt) && !csrow_enabled(i, 1, pvt)) {
2215 			edac_dbg(1, "----CSROW %d VALID for MC node %d\n",
2216 				 i, pvt->mc_node_id);
2217 			continue;
2218 		}
2219 
2220 		empty = 0;
2221 		if (csrow_enabled(i, 0, pvt))
2222 			nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
2223 		if (csrow_enabled(i, 1, pvt))
2224 			nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
2225 
2226 		get_cs_base_and_mask(pvt, i, 0, &base, &mask);
2227 		/* 8 bytes of resolution */
2228 
2229 		mtype = amd64_determine_memory_type(pvt, i);
2230 
2231 		edac_dbg(1, "  for MC node %d csrow %d:\n", pvt->mc_node_id, i);
2232 		edac_dbg(1, "    nr_pages: %u\n",
2233 			 nr_pages * pvt->channel_count);
2234 
2235 		/*
2236 		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
2237 		 */
2238 		if (pvt->nbcfg & NBCFG_ECC_ENABLE)
2239 			edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ?
2240 				    EDAC_S4ECD4ED : EDAC_SECDED;
2241 		else
2242 			edac_mode = EDAC_NONE;
2243 
2244 		for (j = 0; j < pvt->channel_count; j++) {
2245 			dimm = csrow->channels[j]->dimm;
2246 			dimm->mtype = mtype;
2247 			dimm->edac_mode = edac_mode;
2248 			dimm->nr_pages = nr_pages;
2249 		}
2250 	}
2251 
2252 	return empty;
2253 }
2254 
2255 /* get all cores on this DCT */
2256 static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid)
2257 {
2258 	int cpu;
2259 
2260 	for_each_online_cpu(cpu)
2261 		if (amd_get_nb_id(cpu) == nid)
2262 			cpumask_set_cpu(cpu, mask);
2263 }
2264 
2265 /* check MCG_CTL on all the cpus on this node */
2266 static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid)
2267 {
2268 	cpumask_var_t mask;
2269 	int cpu, nbe;
2270 	bool ret = false;
2271 
2272 	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
2273 		amd64_warn("%s: Error allocating mask\n", __func__);
2274 		return false;
2275 	}
2276 
2277 	get_cpus_on_this_dct_cpumask(mask, nid);
2278 
2279 	rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs);
2280 
2281 	for_each_cpu(cpu, mask) {
2282 		struct msr *reg = per_cpu_ptr(msrs, cpu);
2283 		nbe = reg->l & MSR_MCGCTL_NBE;
2284 
2285 		edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2286 			 cpu, reg->q,
2287 			 (nbe ? "enabled" : "disabled"));
2288 
2289 		if (!nbe)
2290 			goto out;
2291 	}
2292 	ret = true;
2293 
2294 out:
2295 	free_cpumask_var(mask);
2296 	return ret;
2297 }
2298 
2299 static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
2300 {
2301 	cpumask_var_t cmask;
2302 	int cpu;
2303 
2304 	if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
2305 		amd64_warn("%s: error allocating mask\n", __func__);
2306 		return false;
2307 	}
2308 
2309 	get_cpus_on_this_dct_cpumask(cmask, nid);
2310 
2311 	rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2312 
2313 	for_each_cpu(cpu, cmask) {
2314 
2315 		struct msr *reg = per_cpu_ptr(msrs, cpu);
2316 
2317 		if (on) {
2318 			if (reg->l & MSR_MCGCTL_NBE)
2319 				s->flags.nb_mce_enable = 1;
2320 
2321 			reg->l |= MSR_MCGCTL_NBE;
2322 		} else {
2323 			/*
2324 			 * Turn off NB MCE reporting only when it was off before
2325 			 */
2326 			if (!s->flags.nb_mce_enable)
2327 				reg->l &= ~MSR_MCGCTL_NBE;
2328 		}
2329 	}
2330 	wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2331 
2332 	free_cpumask_var(cmask);
2333 
2334 	return 0;
2335 }
2336 
2337 static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
2338 				       struct pci_dev *F3)
2339 {
2340 	bool ret = true;
2341 	u32 value, mask = 0x3;		/* UECC/CECC enable */
2342 
2343 	if (toggle_ecc_err_reporting(s, nid, ON)) {
2344 		amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
2345 		return false;
2346 	}
2347 
2348 	amd64_read_pci_cfg(F3, NBCTL, &value);
2349 
2350 	s->old_nbctl   = value & mask;
2351 	s->nbctl_valid = true;
2352 
2353 	value |= mask;
2354 	amd64_write_pci_cfg(F3, NBCTL, value);
2355 
2356 	amd64_read_pci_cfg(F3, NBCFG, &value);
2357 
2358 	edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
2359 		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2360 
2361 	if (!(value & NBCFG_ECC_ENABLE)) {
2362 		amd64_warn("DRAM ECC disabled on this node, enabling...\n");
2363 
2364 		s->flags.nb_ecc_prev = 0;
2365 
2366 		/* Attempt to turn on DRAM ECC Enable */
2367 		value |= NBCFG_ECC_ENABLE;
2368 		amd64_write_pci_cfg(F3, NBCFG, value);
2369 
2370 		amd64_read_pci_cfg(F3, NBCFG, &value);
2371 
2372 		if (!(value & NBCFG_ECC_ENABLE)) {
2373 			amd64_warn("Hardware rejected DRAM ECC enable,"
2374 				   "check memory DIMM configuration.\n");
2375 			ret = false;
2376 		} else {
2377 			amd64_info("Hardware accepted DRAM ECC Enable\n");
2378 		}
2379 	} else {
2380 		s->flags.nb_ecc_prev = 1;
2381 	}
2382 
2383 	edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
2384 		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2385 
2386 	return ret;
2387 }
2388 
2389 static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
2390 					struct pci_dev *F3)
2391 {
2392 	u32 value, mask = 0x3;		/* UECC/CECC enable */
2393 
2394 
2395 	if (!s->nbctl_valid)
2396 		return;
2397 
2398 	amd64_read_pci_cfg(F3, NBCTL, &value);
2399 	value &= ~mask;
2400 	value |= s->old_nbctl;
2401 
2402 	amd64_write_pci_cfg(F3, NBCTL, value);
2403 
2404 	/* restore previous BIOS DRAM ECC "off" setting we force-enabled */
2405 	if (!s->flags.nb_ecc_prev) {
2406 		amd64_read_pci_cfg(F3, NBCFG, &value);
2407 		value &= ~NBCFG_ECC_ENABLE;
2408 		amd64_write_pci_cfg(F3, NBCFG, value);
2409 	}
2410 
2411 	/* restore the NB Enable MCGCTL bit */
2412 	if (toggle_ecc_err_reporting(s, nid, OFF))
2413 		amd64_warn("Error restoring NB MCGCTL settings!\n");
2414 }
2415 
2416 /*
2417  * EDAC requires that the BIOS have ECC enabled before
2418  * taking over the processing of ECC errors. A command line
2419  * option allows to force-enable hardware ECC later in
2420  * enable_ecc_error_reporting().
2421  */
2422 static const char *ecc_msg =
2423 	"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
2424 	" Either enable ECC checking or force module loading by setting "
2425 	"'ecc_enable_override'.\n"
2426 	" (Note that use of the override may cause unknown side effects.)\n";
2427 
2428 static bool ecc_enabled(struct pci_dev *F3, u8 nid)
2429 {
2430 	u32 value;
2431 	u8 ecc_en = 0;
2432 	bool nb_mce_en = false;
2433 
2434 	amd64_read_pci_cfg(F3, NBCFG, &value);
2435 
2436 	ecc_en = !!(value & NBCFG_ECC_ENABLE);
2437 	amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
2438 
2439 	nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid);
2440 	if (!nb_mce_en)
2441 		amd64_notice("NB MCE bank disabled, set MSR "
2442 			     "0x%08x[4] on node %d to enable.\n",
2443 			     MSR_IA32_MCG_CTL, nid);
2444 
2445 	if (!ecc_en || !nb_mce_en) {
2446 		amd64_notice("%s", ecc_msg);
2447 		return false;
2448 	}
2449 	return true;
2450 }
2451 
2452 static int set_mc_sysfs_attrs(struct mem_ctl_info *mci)
2453 {
2454 	int rc;
2455 
2456 	rc = amd64_create_sysfs_dbg_files(mci);
2457 	if (rc < 0)
2458 		return rc;
2459 
2460 	if (boot_cpu_data.x86 >= 0x10) {
2461 		rc = amd64_create_sysfs_inject_files(mci);
2462 		if (rc < 0)
2463 			return rc;
2464 	}
2465 
2466 	return 0;
2467 }
2468 
2469 static void del_mc_sysfs_attrs(struct mem_ctl_info *mci)
2470 {
2471 	amd64_remove_sysfs_dbg_files(mci);
2472 
2473 	if (boot_cpu_data.x86 >= 0x10)
2474 		amd64_remove_sysfs_inject_files(mci);
2475 }
2476 
2477 static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
2478 				 struct amd64_family_type *fam)
2479 {
2480 	struct amd64_pvt *pvt = mci->pvt_info;
2481 
2482 	mci->mtype_cap		= MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
2483 	mci->edac_ctl_cap	= EDAC_FLAG_NONE;
2484 
2485 	if (pvt->nbcap & NBCAP_SECDED)
2486 		mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
2487 
2488 	if (pvt->nbcap & NBCAP_CHIPKILL)
2489 		mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
2490 
2491 	mci->edac_cap		= amd64_determine_edac_cap(pvt);
2492 	mci->mod_name		= EDAC_MOD_STR;
2493 	mci->mod_ver		= EDAC_AMD64_VERSION;
2494 	mci->ctl_name		= fam->ctl_name;
2495 	mci->dev_name		= pci_name(pvt->F2);
2496 	mci->ctl_page_to_phys	= NULL;
2497 
2498 	/* memory scrubber interface */
2499 	mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
2500 	mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
2501 }
2502 
2503 /*
2504  * returns a pointer to the family descriptor on success, NULL otherwise.
2505  */
2506 static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
2507 {
2508 	u8 fam = boot_cpu_data.x86;
2509 	struct amd64_family_type *fam_type = NULL;
2510 
2511 	switch (fam) {
2512 	case 0xf:
2513 		fam_type		= &amd64_family_types[K8_CPUS];
2514 		pvt->ops		= &amd64_family_types[K8_CPUS].ops;
2515 		break;
2516 
2517 	case 0x10:
2518 		fam_type		= &amd64_family_types[F10_CPUS];
2519 		pvt->ops		= &amd64_family_types[F10_CPUS].ops;
2520 		break;
2521 
2522 	case 0x15:
2523 		fam_type		= &amd64_family_types[F15_CPUS];
2524 		pvt->ops		= &amd64_family_types[F15_CPUS].ops;
2525 		break;
2526 
2527 	default:
2528 		amd64_err("Unsupported family!\n");
2529 		return NULL;
2530 	}
2531 
2532 	pvt->ext_model = boot_cpu_data.x86_model >> 4;
2533 
2534 	amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name,
2535 		     (fam == 0xf ?
2536 				(pvt->ext_model >= K8_REV_F  ? "revF or later "
2537 							     : "revE or earlier ")
2538 				 : ""), pvt->mc_node_id);
2539 	return fam_type;
2540 }
2541 
2542 static int amd64_init_one_instance(struct pci_dev *F2)
2543 {
2544 	struct amd64_pvt *pvt = NULL;
2545 	struct amd64_family_type *fam_type = NULL;
2546 	struct mem_ctl_info *mci = NULL;
2547 	struct edac_mc_layer layers[2];
2548 	int err = 0, ret;
2549 	u8 nid = get_node_id(F2);
2550 
2551 	ret = -ENOMEM;
2552 	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
2553 	if (!pvt)
2554 		goto err_ret;
2555 
2556 	pvt->mc_node_id	= nid;
2557 	pvt->F2 = F2;
2558 
2559 	ret = -EINVAL;
2560 	fam_type = amd64_per_family_init(pvt);
2561 	if (!fam_type)
2562 		goto err_free;
2563 
2564 	ret = -ENODEV;
2565 	err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
2566 	if (err)
2567 		goto err_free;
2568 
2569 	read_mc_regs(pvt);
2570 
2571 	/*
2572 	 * We need to determine how many memory channels there are. Then use
2573 	 * that information for calculating the size of the dynamic instance
2574 	 * tables in the 'mci' structure.
2575 	 */
2576 	ret = -EINVAL;
2577 	pvt->channel_count = pvt->ops->early_channel_count(pvt);
2578 	if (pvt->channel_count < 0)
2579 		goto err_siblings;
2580 
2581 	ret = -ENOMEM;
2582 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
2583 	layers[0].size = pvt->csels[0].b_cnt;
2584 	layers[0].is_virt_csrow = true;
2585 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
2586 	layers[1].size = pvt->channel_count;
2587 	layers[1].is_virt_csrow = false;
2588 	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
2589 	if (!mci)
2590 		goto err_siblings;
2591 
2592 	mci->pvt_info = pvt;
2593 	mci->pdev = &pvt->F2->dev;
2594 
2595 	setup_mci_misc_attrs(mci, fam_type);
2596 
2597 	if (init_csrows(mci))
2598 		mci->edac_cap = EDAC_FLAG_NONE;
2599 
2600 	ret = -ENODEV;
2601 	if (edac_mc_add_mc(mci)) {
2602 		edac_dbg(1, "failed edac_mc_add_mc()\n");
2603 		goto err_add_mc;
2604 	}
2605 	if (set_mc_sysfs_attrs(mci)) {
2606 		edac_dbg(1, "failed edac_mc_add_mc()\n");
2607 		goto err_add_sysfs;
2608 	}
2609 
2610 	/* register stuff with EDAC MCE */
2611 	if (report_gart_errors)
2612 		amd_report_gart_errors(true);
2613 
2614 	amd_register_ecc_decoder(amd64_decode_bus_error);
2615 
2616 	mcis[nid] = mci;
2617 
2618 	atomic_inc(&drv_instances);
2619 
2620 	return 0;
2621 
2622 err_add_sysfs:
2623 	edac_mc_del_mc(mci->pdev);
2624 err_add_mc:
2625 	edac_mc_free(mci);
2626 
2627 err_siblings:
2628 	free_mc_sibling_devs(pvt);
2629 
2630 err_free:
2631 	kfree(pvt);
2632 
2633 err_ret:
2634 	return ret;
2635 }
2636 
2637 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
2638 					     const struct pci_device_id *mc_type)
2639 {
2640 	u8 nid = get_node_id(pdev);
2641 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2642 	struct ecc_settings *s;
2643 	int ret = 0;
2644 
2645 	ret = pci_enable_device(pdev);
2646 	if (ret < 0) {
2647 		edac_dbg(0, "ret=%d\n", ret);
2648 		return -EIO;
2649 	}
2650 
2651 	ret = -ENOMEM;
2652 	s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
2653 	if (!s)
2654 		goto err_out;
2655 
2656 	ecc_stngs[nid] = s;
2657 
2658 	if (!ecc_enabled(F3, nid)) {
2659 		ret = -ENODEV;
2660 
2661 		if (!ecc_enable_override)
2662 			goto err_enable;
2663 
2664 		amd64_warn("Forcing ECC on!\n");
2665 
2666 		if (!enable_ecc_error_reporting(s, nid, F3))
2667 			goto err_enable;
2668 	}
2669 
2670 	ret = amd64_init_one_instance(pdev);
2671 	if (ret < 0) {
2672 		amd64_err("Error probing instance: %d\n", nid);
2673 		restore_ecc_error_reporting(s, nid, F3);
2674 	}
2675 
2676 	return ret;
2677 
2678 err_enable:
2679 	kfree(s);
2680 	ecc_stngs[nid] = NULL;
2681 
2682 err_out:
2683 	return ret;
2684 }
2685 
2686 static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
2687 {
2688 	struct mem_ctl_info *mci;
2689 	struct amd64_pvt *pvt;
2690 	u8 nid = get_node_id(pdev);
2691 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2692 	struct ecc_settings *s = ecc_stngs[nid];
2693 
2694 	mci = find_mci_by_dev(&pdev->dev);
2695 	del_mc_sysfs_attrs(mci);
2696 	/* Remove from EDAC CORE tracking list */
2697 	mci = edac_mc_del_mc(&pdev->dev);
2698 	if (!mci)
2699 		return;
2700 
2701 	pvt = mci->pvt_info;
2702 
2703 	restore_ecc_error_reporting(s, nid, F3);
2704 
2705 	free_mc_sibling_devs(pvt);
2706 
2707 	/* unregister from EDAC MCE */
2708 	amd_report_gart_errors(false);
2709 	amd_unregister_ecc_decoder(amd64_decode_bus_error);
2710 
2711 	kfree(ecc_stngs[nid]);
2712 	ecc_stngs[nid] = NULL;
2713 
2714 	/* Free the EDAC CORE resources */
2715 	mci->pvt_info = NULL;
2716 	mcis[nid] = NULL;
2717 
2718 	kfree(pvt);
2719 	edac_mc_free(mci);
2720 }
2721 
2722 /*
2723  * This table is part of the interface for loading drivers for PCI devices. The
2724  * PCI core identifies what devices are on a system during boot, and then
2725  * inquiry this table to see if this driver is for a given device found.
2726  */
2727 static DEFINE_PCI_DEVICE_TABLE(amd64_pci_table) = {
2728 	{
2729 		.vendor		= PCI_VENDOR_ID_AMD,
2730 		.device		= PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
2731 		.subvendor	= PCI_ANY_ID,
2732 		.subdevice	= PCI_ANY_ID,
2733 		.class		= 0,
2734 		.class_mask	= 0,
2735 	},
2736 	{
2737 		.vendor		= PCI_VENDOR_ID_AMD,
2738 		.device		= PCI_DEVICE_ID_AMD_10H_NB_DRAM,
2739 		.subvendor	= PCI_ANY_ID,
2740 		.subdevice	= PCI_ANY_ID,
2741 		.class		= 0,
2742 		.class_mask	= 0,
2743 	},
2744 	{
2745 		.vendor		= PCI_VENDOR_ID_AMD,
2746 		.device		= PCI_DEVICE_ID_AMD_15H_NB_F2,
2747 		.subvendor	= PCI_ANY_ID,
2748 		.subdevice	= PCI_ANY_ID,
2749 		.class		= 0,
2750 		.class_mask	= 0,
2751 	},
2752 
2753 	{0, }
2754 };
2755 MODULE_DEVICE_TABLE(pci, amd64_pci_table);
2756 
2757 static struct pci_driver amd64_pci_driver = {
2758 	.name		= EDAC_MOD_STR,
2759 	.probe		= amd64_probe_one_instance,
2760 	.remove		= __devexit_p(amd64_remove_one_instance),
2761 	.id_table	= amd64_pci_table,
2762 };
2763 
2764 static void setup_pci_device(void)
2765 {
2766 	struct mem_ctl_info *mci;
2767 	struct amd64_pvt *pvt;
2768 
2769 	if (amd64_ctl_pci)
2770 		return;
2771 
2772 	mci = mcis[0];
2773 	if (mci) {
2774 
2775 		pvt = mci->pvt_info;
2776 		amd64_ctl_pci =
2777 			edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR);
2778 
2779 		if (!amd64_ctl_pci) {
2780 			pr_warning("%s(): Unable to create PCI control\n",
2781 				   __func__);
2782 
2783 			pr_warning("%s(): PCI error report via EDAC not set\n",
2784 				   __func__);
2785 			}
2786 	}
2787 }
2788 
2789 static int __init amd64_edac_init(void)
2790 {
2791 	int err = -ENODEV;
2792 
2793 	printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
2794 
2795 	opstate_init();
2796 
2797 	if (amd_cache_northbridges() < 0)
2798 		goto err_ret;
2799 
2800 	err = -ENOMEM;
2801 	mcis	  = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
2802 	ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
2803 	if (!(mcis && ecc_stngs))
2804 		goto err_free;
2805 
2806 	msrs = msrs_alloc();
2807 	if (!msrs)
2808 		goto err_free;
2809 
2810 	err = pci_register_driver(&amd64_pci_driver);
2811 	if (err)
2812 		goto err_pci;
2813 
2814 	err = -ENODEV;
2815 	if (!atomic_read(&drv_instances))
2816 		goto err_no_instances;
2817 
2818 	setup_pci_device();
2819 	return 0;
2820 
2821 err_no_instances:
2822 	pci_unregister_driver(&amd64_pci_driver);
2823 
2824 err_pci:
2825 	msrs_free(msrs);
2826 	msrs = NULL;
2827 
2828 err_free:
2829 	kfree(mcis);
2830 	mcis = NULL;
2831 
2832 	kfree(ecc_stngs);
2833 	ecc_stngs = NULL;
2834 
2835 err_ret:
2836 	return err;
2837 }
2838 
2839 static void __exit amd64_edac_exit(void)
2840 {
2841 	if (amd64_ctl_pci)
2842 		edac_pci_release_generic_ctl(amd64_ctl_pci);
2843 
2844 	pci_unregister_driver(&amd64_pci_driver);
2845 
2846 	kfree(ecc_stngs);
2847 	ecc_stngs = NULL;
2848 
2849 	kfree(mcis);
2850 	mcis = NULL;
2851 
2852 	msrs_free(msrs);
2853 	msrs = NULL;
2854 }
2855 
2856 module_init(amd64_edac_init);
2857 module_exit(amd64_edac_exit);
2858 
2859 MODULE_LICENSE("GPL");
2860 MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
2861 		"Dave Peterson, Thayne Harbaugh");
2862 MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
2863 		EDAC_AMD64_VERSION);
2864 
2865 module_param(edac_op_state, int, 0444);
2866 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2867