xref: /openbmc/linux/drivers/edac/amd64_edac.c (revision b6bec26c)
1 #include "amd64_edac.h"
2 #include <asm/amd_nb.h>
3 
4 static struct edac_pci_ctl_info *amd64_ctl_pci;
5 
6 static int report_gart_errors;
7 module_param(report_gart_errors, int, 0644);
8 
9 /*
10  * Set by command line parameter. If BIOS has enabled the ECC, this override is
11  * cleared to prevent re-enabling the hardware by this driver.
12  */
13 static int ecc_enable_override;
14 module_param(ecc_enable_override, int, 0644);
15 
16 static struct msr __percpu *msrs;
17 
18 /*
19  * count successfully initialized driver instances for setup_pci_device()
20  */
21 static atomic_t drv_instances = ATOMIC_INIT(0);
22 
23 /* Per-node driver instances */
24 static struct mem_ctl_info **mcis;
25 static struct ecc_settings **ecc_stngs;
26 
27 /*
28  * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
29  * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
30  * or higher value'.
31  *
32  *FIXME: Produce a better mapping/linearisation.
33  */
34 struct scrubrate {
35        u32 scrubval;           /* bit pattern for scrub rate */
36        u32 bandwidth;          /* bandwidth consumed (bytes/sec) */
37 } scrubrates[] = {
38 	{ 0x01, 1600000000UL},
39 	{ 0x02, 800000000UL},
40 	{ 0x03, 400000000UL},
41 	{ 0x04, 200000000UL},
42 	{ 0x05, 100000000UL},
43 	{ 0x06, 50000000UL},
44 	{ 0x07, 25000000UL},
45 	{ 0x08, 12284069UL},
46 	{ 0x09, 6274509UL},
47 	{ 0x0A, 3121951UL},
48 	{ 0x0B, 1560975UL},
49 	{ 0x0C, 781440UL},
50 	{ 0x0D, 390720UL},
51 	{ 0x0E, 195300UL},
52 	{ 0x0F, 97650UL},
53 	{ 0x10, 48854UL},
54 	{ 0x11, 24427UL},
55 	{ 0x12, 12213UL},
56 	{ 0x13, 6101UL},
57 	{ 0x14, 3051UL},
58 	{ 0x15, 1523UL},
59 	{ 0x16, 761UL},
60 	{ 0x00, 0UL},        /* scrubbing off */
61 };
62 
63 int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
64 			       u32 *val, const char *func)
65 {
66 	int err = 0;
67 
68 	err = pci_read_config_dword(pdev, offset, val);
69 	if (err)
70 		amd64_warn("%s: error reading F%dx%03x.\n",
71 			   func, PCI_FUNC(pdev->devfn), offset);
72 
73 	return err;
74 }
75 
76 int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
77 				u32 val, const char *func)
78 {
79 	int err = 0;
80 
81 	err = pci_write_config_dword(pdev, offset, val);
82 	if (err)
83 		amd64_warn("%s: error writing to F%dx%03x.\n",
84 			   func, PCI_FUNC(pdev->devfn), offset);
85 
86 	return err;
87 }
88 
89 /*
90  *
91  * Depending on the family, F2 DCT reads need special handling:
92  *
93  * K8: has a single DCT only
94  *
95  * F10h: each DCT has its own set of regs
96  *	DCT0 -> F2x040..
97  *	DCT1 -> F2x140..
98  *
99  * F15h: we select which DCT we access using F1x10C[DctCfgSel]
100  *
101  */
102 static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
103 			       const char *func)
104 {
105 	if (addr >= 0x100)
106 		return -EINVAL;
107 
108 	return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
109 }
110 
111 static int f10_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
112 				 const char *func)
113 {
114 	return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
115 }
116 
117 /*
118  * Select DCT to which PCI cfg accesses are routed
119  */
120 static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct)
121 {
122 	u32 reg = 0;
123 
124 	amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, &reg);
125 	reg &= 0xfffffffe;
126 	reg |= dct;
127 	amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg);
128 }
129 
130 static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
131 				 const char *func)
132 {
133 	u8 dct  = 0;
134 
135 	if (addr >= 0x140 && addr <= 0x1a0) {
136 		dct   = 1;
137 		addr -= 0x100;
138 	}
139 
140 	f15h_select_dct(pvt, dct);
141 
142 	return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
143 }
144 
145 /*
146  * Memory scrubber control interface. For K8, memory scrubbing is handled by
147  * hardware and can involve L2 cache, dcache as well as the main memory. With
148  * F10, this is extended to L3 cache scrubbing on CPU models sporting that
149  * functionality.
150  *
151  * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
152  * (dram) over to cache lines. This is nasty, so we will use bandwidth in
153  * bytes/sec for the setting.
154  *
155  * Currently, we only do dram scrubbing. If the scrubbing is done in software on
156  * other archs, we might not have access to the caches directly.
157  */
158 
159 /*
160  * scan the scrub rate mapping table for a close or matching bandwidth value to
161  * issue. If requested is too big, then use last maximum value found.
162  */
163 static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate)
164 {
165 	u32 scrubval;
166 	int i;
167 
168 	/*
169 	 * map the configured rate (new_bw) to a value specific to the AMD64
170 	 * memory controller and apply to register. Search for the first
171 	 * bandwidth entry that is greater or equal than the setting requested
172 	 * and program that. If at last entry, turn off DRAM scrubbing.
173 	 *
174 	 * If no suitable bandwidth is found, turn off DRAM scrubbing entirely
175 	 * by falling back to the last element in scrubrates[].
176 	 */
177 	for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) {
178 		/*
179 		 * skip scrub rates which aren't recommended
180 		 * (see F10 BKDG, F3x58)
181 		 */
182 		if (scrubrates[i].scrubval < min_rate)
183 			continue;
184 
185 		if (scrubrates[i].bandwidth <= new_bw)
186 			break;
187 	}
188 
189 	scrubval = scrubrates[i].scrubval;
190 
191 	pci_write_bits32(ctl, SCRCTRL, scrubval, 0x001F);
192 
193 	if (scrubval)
194 		return scrubrates[i].bandwidth;
195 
196 	return 0;
197 }
198 
199 static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
200 {
201 	struct amd64_pvt *pvt = mci->pvt_info;
202 	u32 min_scrubrate = 0x5;
203 
204 	if (boot_cpu_data.x86 == 0xf)
205 		min_scrubrate = 0x0;
206 
207 	/* F15h Erratum #505 */
208 	if (boot_cpu_data.x86 == 0x15)
209 		f15h_select_dct(pvt, 0);
210 
211 	return __amd64_set_scrub_rate(pvt->F3, bw, min_scrubrate);
212 }
213 
214 static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
215 {
216 	struct amd64_pvt *pvt = mci->pvt_info;
217 	u32 scrubval = 0;
218 	int i, retval = -EINVAL;
219 
220 	/* F15h Erratum #505 */
221 	if (boot_cpu_data.x86 == 0x15)
222 		f15h_select_dct(pvt, 0);
223 
224 	amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
225 
226 	scrubval = scrubval & 0x001F;
227 
228 	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
229 		if (scrubrates[i].scrubval == scrubval) {
230 			retval = scrubrates[i].bandwidth;
231 			break;
232 		}
233 	}
234 	return retval;
235 }
236 
237 /*
238  * returns true if the SysAddr given by sys_addr matches the
239  * DRAM base/limit associated with node_id
240  */
241 static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr,
242 				   unsigned nid)
243 {
244 	u64 addr;
245 
246 	/* The K8 treats this as a 40-bit value.  However, bits 63-40 will be
247 	 * all ones if the most significant implemented address bit is 1.
248 	 * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
249 	 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
250 	 * Application Programming.
251 	 */
252 	addr = sys_addr & 0x000000ffffffffffull;
253 
254 	return ((addr >= get_dram_base(pvt, nid)) &&
255 		(addr <= get_dram_limit(pvt, nid)));
256 }
257 
258 /*
259  * Attempt to map a SysAddr to a node. On success, return a pointer to the
260  * mem_ctl_info structure for the node that the SysAddr maps to.
261  *
262  * On failure, return NULL.
263  */
264 static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
265 						u64 sys_addr)
266 {
267 	struct amd64_pvt *pvt;
268 	unsigned node_id;
269 	u32 intlv_en, bits;
270 
271 	/*
272 	 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
273 	 * 3.4.4.2) registers to map the SysAddr to a node ID.
274 	 */
275 	pvt = mci->pvt_info;
276 
277 	/*
278 	 * The value of this field should be the same for all DRAM Base
279 	 * registers.  Therefore we arbitrarily choose to read it from the
280 	 * register for node 0.
281 	 */
282 	intlv_en = dram_intlv_en(pvt, 0);
283 
284 	if (intlv_en == 0) {
285 		for (node_id = 0; node_id < DRAM_RANGES; node_id++) {
286 			if (amd64_base_limit_match(pvt, sys_addr, node_id))
287 				goto found;
288 		}
289 		goto err_no_match;
290 	}
291 
292 	if (unlikely((intlv_en != 0x01) &&
293 		     (intlv_en != 0x03) &&
294 		     (intlv_en != 0x07))) {
295 		amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en);
296 		return NULL;
297 	}
298 
299 	bits = (((u32) sys_addr) >> 12) & intlv_en;
300 
301 	for (node_id = 0; ; ) {
302 		if ((dram_intlv_sel(pvt, node_id) & intlv_en) == bits)
303 			break;	/* intlv_sel field matches */
304 
305 		if (++node_id >= DRAM_RANGES)
306 			goto err_no_match;
307 	}
308 
309 	/* sanity test for sys_addr */
310 	if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
311 		amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
312 			   "range for node %d with node interleaving enabled.\n",
313 			   __func__, sys_addr, node_id);
314 		return NULL;
315 	}
316 
317 found:
318 	return edac_mc_find((int)node_id);
319 
320 err_no_match:
321 	edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n",
322 		 (unsigned long)sys_addr);
323 
324 	return NULL;
325 }
326 
327 /*
328  * compute the CS base address of the @csrow on the DRAM controller @dct.
329  * For details see F2x[5C:40] in the processor's BKDG
330  */
331 static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
332 				 u64 *base, u64 *mask)
333 {
334 	u64 csbase, csmask, base_bits, mask_bits;
335 	u8 addr_shift;
336 
337 	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
338 		csbase		= pvt->csels[dct].csbases[csrow];
339 		csmask		= pvt->csels[dct].csmasks[csrow];
340 		base_bits	= GENMASK(21, 31) | GENMASK(9, 15);
341 		mask_bits	= GENMASK(21, 29) | GENMASK(9, 15);
342 		addr_shift	= 4;
343 	} else {
344 		csbase		= pvt->csels[dct].csbases[csrow];
345 		csmask		= pvt->csels[dct].csmasks[csrow >> 1];
346 		addr_shift	= 8;
347 
348 		if (boot_cpu_data.x86 == 0x15)
349 			base_bits = mask_bits = GENMASK(19,30) | GENMASK(5,13);
350 		else
351 			base_bits = mask_bits = GENMASK(19,28) | GENMASK(5,13);
352 	}
353 
354 	*base  = (csbase & base_bits) << addr_shift;
355 
356 	*mask  = ~0ULL;
357 	/* poke holes for the csmask */
358 	*mask &= ~(mask_bits << addr_shift);
359 	/* OR them in */
360 	*mask |= (csmask & mask_bits) << addr_shift;
361 }
362 
363 #define for_each_chip_select(i, dct, pvt) \
364 	for (i = 0; i < pvt->csels[dct].b_cnt; i++)
365 
366 #define chip_select_base(i, dct, pvt) \
367 	pvt->csels[dct].csbases[i]
368 
369 #define for_each_chip_select_mask(i, dct, pvt) \
370 	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
371 
372 /*
373  * @input_addr is an InputAddr associated with the node given by mci. Return the
374  * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
375  */
376 static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
377 {
378 	struct amd64_pvt *pvt;
379 	int csrow;
380 	u64 base, mask;
381 
382 	pvt = mci->pvt_info;
383 
384 	for_each_chip_select(csrow, 0, pvt) {
385 		if (!csrow_enabled(csrow, 0, pvt))
386 			continue;
387 
388 		get_cs_base_and_mask(pvt, csrow, 0, &base, &mask);
389 
390 		mask = ~mask;
391 
392 		if ((input_addr & mask) == (base & mask)) {
393 			edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n",
394 				 (unsigned long)input_addr, csrow,
395 				 pvt->mc_node_id);
396 
397 			return csrow;
398 		}
399 	}
400 	edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n",
401 		 (unsigned long)input_addr, pvt->mc_node_id);
402 
403 	return -1;
404 }
405 
406 /*
407  * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
408  * for the node represented by mci. Info is passed back in *hole_base,
409  * *hole_offset, and *hole_size.  Function returns 0 if info is valid or 1 if
410  * info is invalid. Info may be invalid for either of the following reasons:
411  *
412  * - The revision of the node is not E or greater.  In this case, the DRAM Hole
413  *   Address Register does not exist.
414  *
415  * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
416  *   indicating that its contents are not valid.
417  *
418  * The values passed back in *hole_base, *hole_offset, and *hole_size are
419  * complete 32-bit values despite the fact that the bitfields in the DHAR
420  * only represent bits 31-24 of the base and offset values.
421  */
422 int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
423 			     u64 *hole_offset, u64 *hole_size)
424 {
425 	struct amd64_pvt *pvt = mci->pvt_info;
426 
427 	/* only revE and later have the DRAM Hole Address Register */
428 	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) {
429 		edac_dbg(1, "  revision %d for node %d does not support DHAR\n",
430 			 pvt->ext_model, pvt->mc_node_id);
431 		return 1;
432 	}
433 
434 	/* valid for Fam10h and above */
435 	if (boot_cpu_data.x86 >= 0x10 && !dhar_mem_hoist_valid(pvt)) {
436 		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this system\n");
437 		return 1;
438 	}
439 
440 	if (!dhar_valid(pvt)) {
441 		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this node %d\n",
442 			 pvt->mc_node_id);
443 		return 1;
444 	}
445 
446 	/* This node has Memory Hoisting */
447 
448 	/* +------------------+--------------------+--------------------+-----
449 	 * | memory           | DRAM hole          | relocated          |
450 	 * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
451 	 * |                  |                    | DRAM hole          |
452 	 * |                  |                    | [0x100000000,      |
453 	 * |                  |                    |  (0x100000000+     |
454 	 * |                  |                    |   (0xffffffff-x))] |
455 	 * +------------------+--------------------+--------------------+-----
456 	 *
457 	 * Above is a diagram of physical memory showing the DRAM hole and the
458 	 * relocated addresses from the DRAM hole.  As shown, the DRAM hole
459 	 * starts at address x (the base address) and extends through address
460 	 * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
461 	 * addresses in the hole so that they start at 0x100000000.
462 	 */
463 
464 	*hole_base = dhar_base(pvt);
465 	*hole_size = (1ULL << 32) - *hole_base;
466 
467 	if (boot_cpu_data.x86 > 0xf)
468 		*hole_offset = f10_dhar_offset(pvt);
469 	else
470 		*hole_offset = k8_dhar_offset(pvt);
471 
472 	edac_dbg(1, "  DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
473 		 pvt->mc_node_id, (unsigned long)*hole_base,
474 		 (unsigned long)*hole_offset, (unsigned long)*hole_size);
475 
476 	return 0;
477 }
478 EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
479 
480 /*
481  * Return the DramAddr that the SysAddr given by @sys_addr maps to.  It is
482  * assumed that sys_addr maps to the node given by mci.
483  *
484  * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
485  * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
486  * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
487  * then it is also involved in translating a SysAddr to a DramAddr. Sections
488  * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
489  * These parts of the documentation are unclear. I interpret them as follows:
490  *
491  * When node n receives a SysAddr, it processes the SysAddr as follows:
492  *
493  * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
494  *    Limit registers for node n. If the SysAddr is not within the range
495  *    specified by the base and limit values, then node n ignores the Sysaddr
496  *    (since it does not map to node n). Otherwise continue to step 2 below.
497  *
498  * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
499  *    disabled so skip to step 3 below. Otherwise see if the SysAddr is within
500  *    the range of relocated addresses (starting at 0x100000000) from the DRAM
501  *    hole. If not, skip to step 3 below. Else get the value of the
502  *    DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
503  *    offset defined by this value from the SysAddr.
504  *
505  * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
506  *    Base register for node n. To obtain the DramAddr, subtract the base
507  *    address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
508  */
509 static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
510 {
511 	struct amd64_pvt *pvt = mci->pvt_info;
512 	u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
513 	int ret;
514 
515 	dram_base = get_dram_base(pvt, pvt->mc_node_id);
516 
517 	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
518 				      &hole_size);
519 	if (!ret) {
520 		if ((sys_addr >= (1ULL << 32)) &&
521 		    (sys_addr < ((1ULL << 32) + hole_size))) {
522 			/* use DHAR to translate SysAddr to DramAddr */
523 			dram_addr = sys_addr - hole_offset;
524 
525 			edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
526 				 (unsigned long)sys_addr,
527 				 (unsigned long)dram_addr);
528 
529 			return dram_addr;
530 		}
531 	}
532 
533 	/*
534 	 * Translate the SysAddr to a DramAddr as shown near the start of
535 	 * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
536 	 * only deals with 40-bit values.  Therefore we discard bits 63-40 of
537 	 * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
538 	 * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
539 	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
540 	 * Programmer's Manual Volume 1 Application Programming.
541 	 */
542 	dram_addr = (sys_addr & GENMASK(0, 39)) - dram_base;
543 
544 	edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
545 		 (unsigned long)sys_addr, (unsigned long)dram_addr);
546 	return dram_addr;
547 }
548 
549 /*
550  * @intlv_en is the value of the IntlvEn field from a DRAM Base register
551  * (section 3.4.4.1).  Return the number of bits from a SysAddr that are used
552  * for node interleaving.
553  */
554 static int num_node_interleave_bits(unsigned intlv_en)
555 {
556 	static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
557 	int n;
558 
559 	BUG_ON(intlv_en > 7);
560 	n = intlv_shift_table[intlv_en];
561 	return n;
562 }
563 
564 /* Translate the DramAddr given by @dram_addr to an InputAddr. */
565 static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
566 {
567 	struct amd64_pvt *pvt;
568 	int intlv_shift;
569 	u64 input_addr;
570 
571 	pvt = mci->pvt_info;
572 
573 	/*
574 	 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
575 	 * concerning translating a DramAddr to an InputAddr.
576 	 */
577 	intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
578 	input_addr = ((dram_addr >> intlv_shift) & GENMASK(12, 35)) +
579 		      (dram_addr & 0xfff);
580 
581 	edac_dbg(2, "  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
582 		 intlv_shift, (unsigned long)dram_addr,
583 		 (unsigned long)input_addr);
584 
585 	return input_addr;
586 }
587 
588 /*
589  * Translate the SysAddr represented by @sys_addr to an InputAddr.  It is
590  * assumed that @sys_addr maps to the node given by mci.
591  */
592 static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
593 {
594 	u64 input_addr;
595 
596 	input_addr =
597 	    dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
598 
599 	edac_dbg(2, "SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
600 		 (unsigned long)sys_addr, (unsigned long)input_addr);
601 
602 	return input_addr;
603 }
604 
605 
606 /*
607  * @input_addr is an InputAddr associated with the node represented by mci.
608  * Translate @input_addr to a DramAddr and return the result.
609  */
610 static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
611 {
612 	struct amd64_pvt *pvt;
613 	unsigned node_id, intlv_shift;
614 	u64 bits, dram_addr;
615 	u32 intlv_sel;
616 
617 	/*
618 	 * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
619 	 * shows how to translate a DramAddr to an InputAddr. Here we reverse
620 	 * this procedure. When translating from a DramAddr to an InputAddr, the
621 	 * bits used for node interleaving are discarded.  Here we recover these
622 	 * bits from the IntlvSel field of the DRAM Limit register (section
623 	 * 3.4.4.2) for the node that input_addr is associated with.
624 	 */
625 	pvt = mci->pvt_info;
626 	node_id = pvt->mc_node_id;
627 
628 	BUG_ON(node_id > 7);
629 
630 	intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
631 	if (intlv_shift == 0) {
632 		edac_dbg(1, "    InputAddr 0x%lx translates to DramAddr of same value\n",
633 			 (unsigned long)input_addr);
634 
635 		return input_addr;
636 	}
637 
638 	bits = ((input_addr & GENMASK(12, 35)) << intlv_shift) +
639 		(input_addr & 0xfff);
640 
641 	intlv_sel = dram_intlv_sel(pvt, node_id) & ((1 << intlv_shift) - 1);
642 	dram_addr = bits + (intlv_sel << 12);
643 
644 	edac_dbg(1, "InputAddr 0x%lx translates to DramAddr 0x%lx (%d node interleave bits)\n",
645 		 (unsigned long)input_addr,
646 		 (unsigned long)dram_addr, intlv_shift);
647 
648 	return dram_addr;
649 }
650 
651 /*
652  * @dram_addr is a DramAddr that maps to the node represented by mci. Convert
653  * @dram_addr to a SysAddr.
654  */
655 static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr)
656 {
657 	struct amd64_pvt *pvt = mci->pvt_info;
658 	u64 hole_base, hole_offset, hole_size, base, sys_addr;
659 	int ret = 0;
660 
661 	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
662 				      &hole_size);
663 	if (!ret) {
664 		if ((dram_addr >= hole_base) &&
665 		    (dram_addr < (hole_base + hole_size))) {
666 			sys_addr = dram_addr + hole_offset;
667 
668 			edac_dbg(1, "using DHAR to translate DramAddr 0x%lx to SysAddr 0x%lx\n",
669 				 (unsigned long)dram_addr,
670 				 (unsigned long)sys_addr);
671 
672 			return sys_addr;
673 		}
674 	}
675 
676 	base     = get_dram_base(pvt, pvt->mc_node_id);
677 	sys_addr = dram_addr + base;
678 
679 	/*
680 	 * The sys_addr we have computed up to this point is a 40-bit value
681 	 * because the k8 deals with 40-bit values.  However, the value we are
682 	 * supposed to return is a full 64-bit physical address.  The AMD
683 	 * x86-64 architecture specifies that the most significant implemented
684 	 * address bit through bit 63 of a physical address must be either all
685 	 * 0s or all 1s.  Therefore we sign-extend the 40-bit sys_addr to a
686 	 * 64-bit value below.  See section 3.4.2 of AMD publication 24592:
687 	 * AMD x86-64 Architecture Programmer's Manual Volume 1 Application
688 	 * Programming.
689 	 */
690 	sys_addr |= ~((sys_addr & (1ull << 39)) - 1);
691 
692 	edac_dbg(1, "    Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n",
693 		 pvt->mc_node_id, (unsigned long)dram_addr,
694 		 (unsigned long)sys_addr);
695 
696 	return sys_addr;
697 }
698 
699 /*
700  * @input_addr is an InputAddr associated with the node given by mci. Translate
701  * @input_addr to a SysAddr.
702  */
703 static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
704 					 u64 input_addr)
705 {
706 	return dram_addr_to_sys_addr(mci,
707 				     input_addr_to_dram_addr(mci, input_addr));
708 }
709 
710 /* Map the Error address to a PAGE and PAGE OFFSET. */
711 static inline void error_address_to_page_and_offset(u64 error_address,
712 						    struct err_info *err)
713 {
714 	err->page = (u32) (error_address >> PAGE_SHIFT);
715 	err->offset = ((u32) error_address) & ~PAGE_MASK;
716 }
717 
718 /*
719  * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
720  * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
721  * of a node that detected an ECC memory error.  mci represents the node that
722  * the error address maps to (possibly different from the node that detected
723  * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
724  * error.
725  */
726 static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
727 {
728 	int csrow;
729 
730 	csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
731 
732 	if (csrow == -1)
733 		amd64_mc_err(mci, "Failed to translate InputAddr to csrow for "
734 				  "address 0x%lx\n", (unsigned long)sys_addr);
735 	return csrow;
736 }
737 
738 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
739 
740 /*
741  * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
742  * are ECC capable.
743  */
744 static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt)
745 {
746 	u8 bit;
747 	unsigned long edac_cap = EDAC_FLAG_NONE;
748 
749 	bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F)
750 		? 19
751 		: 17;
752 
753 	if (pvt->dclr0 & BIT(bit))
754 		edac_cap = EDAC_FLAG_SECDED;
755 
756 	return edac_cap;
757 }
758 
759 static void amd64_debug_display_dimm_sizes(struct amd64_pvt *, u8);
760 
761 static void amd64_dump_dramcfg_low(u32 dclr, int chan)
762 {
763 	edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
764 
765 	edac_dbg(1, "  DIMM type: %sbuffered; all DIMMs support ECC: %s\n",
766 		 (dclr & BIT(16)) ?  "un" : "",
767 		 (dclr & BIT(19)) ? "yes" : "no");
768 
769 	edac_dbg(1, "  PAR/ERR parity: %s\n",
770 		 (dclr & BIT(8)) ?  "enabled" : "disabled");
771 
772 	if (boot_cpu_data.x86 == 0x10)
773 		edac_dbg(1, "  DCT 128bit mode width: %s\n",
774 			 (dclr & BIT(11)) ?  "128b" : "64b");
775 
776 	edac_dbg(1, "  x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
777 		 (dclr & BIT(12)) ?  "yes" : "no",
778 		 (dclr & BIT(13)) ?  "yes" : "no",
779 		 (dclr & BIT(14)) ?  "yes" : "no",
780 		 (dclr & BIT(15)) ?  "yes" : "no");
781 }
782 
783 /* Display and decode various NB registers for debug purposes. */
784 static void dump_misc_regs(struct amd64_pvt *pvt)
785 {
786 	edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
787 
788 	edac_dbg(1, "  NB two channel DRAM capable: %s\n",
789 		 (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
790 
791 	edac_dbg(1, "  ECC capable: %s, ChipKill ECC capable: %s\n",
792 		 (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
793 		 (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
794 
795 	amd64_dump_dramcfg_low(pvt->dclr0, 0);
796 
797 	edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare);
798 
799 	edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n",
800 		 pvt->dhar, dhar_base(pvt),
801 		 (boot_cpu_data.x86 == 0xf) ? k8_dhar_offset(pvt)
802 		 : f10_dhar_offset(pvt));
803 
804 	edac_dbg(1, "  DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
805 
806 	amd64_debug_display_dimm_sizes(pvt, 0);
807 
808 	/* everything below this point is Fam10h and above */
809 	if (boot_cpu_data.x86 == 0xf)
810 		return;
811 
812 	amd64_debug_display_dimm_sizes(pvt, 1);
813 
814 	amd64_info("using %s syndromes.\n", ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
815 
816 	/* Only if NOT ganged does dclr1 have valid info */
817 	if (!dct_ganging_enabled(pvt))
818 		amd64_dump_dramcfg_low(pvt->dclr1, 1);
819 }
820 
821 /*
822  * see BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
823  */
824 static void prep_chip_selects(struct amd64_pvt *pvt)
825 {
826 	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
827 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
828 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
829 	} else {
830 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
831 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
832 	}
833 }
834 
835 /*
836  * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
837  */
838 static void read_dct_base_mask(struct amd64_pvt *pvt)
839 {
840 	int cs;
841 
842 	prep_chip_selects(pvt);
843 
844 	for_each_chip_select(cs, 0, pvt) {
845 		int reg0   = DCSB0 + (cs * 4);
846 		int reg1   = DCSB1 + (cs * 4);
847 		u32 *base0 = &pvt->csels[0].csbases[cs];
848 		u32 *base1 = &pvt->csels[1].csbases[cs];
849 
850 		if (!amd64_read_dct_pci_cfg(pvt, reg0, base0))
851 			edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
852 				 cs, *base0, reg0);
853 
854 		if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt))
855 			continue;
856 
857 		if (!amd64_read_dct_pci_cfg(pvt, reg1, base1))
858 			edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
859 				 cs, *base1, reg1);
860 	}
861 
862 	for_each_chip_select_mask(cs, 0, pvt) {
863 		int reg0   = DCSM0 + (cs * 4);
864 		int reg1   = DCSM1 + (cs * 4);
865 		u32 *mask0 = &pvt->csels[0].csmasks[cs];
866 		u32 *mask1 = &pvt->csels[1].csmasks[cs];
867 
868 		if (!amd64_read_dct_pci_cfg(pvt, reg0, mask0))
869 			edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
870 				 cs, *mask0, reg0);
871 
872 		if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt))
873 			continue;
874 
875 		if (!amd64_read_dct_pci_cfg(pvt, reg1, mask1))
876 			edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
877 				 cs, *mask1, reg1);
878 	}
879 }
880 
881 static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt, int cs)
882 {
883 	enum mem_type type;
884 
885 	/* F15h supports only DDR3 */
886 	if (boot_cpu_data.x86 >= 0x15)
887 		type = (pvt->dclr0 & BIT(16)) ?	MEM_DDR3 : MEM_RDDR3;
888 	else if (boot_cpu_data.x86 == 0x10 || pvt->ext_model >= K8_REV_F) {
889 		if (pvt->dchr0 & DDR3_MODE)
890 			type = (pvt->dclr0 & BIT(16)) ?	MEM_DDR3 : MEM_RDDR3;
891 		else
892 			type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
893 	} else {
894 		type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
895 	}
896 
897 	amd64_info("CS%d: %s\n", cs, edac_mem_types[type]);
898 
899 	return type;
900 }
901 
902 /* Get the number of DCT channels the memory controller is using. */
903 static int k8_early_channel_count(struct amd64_pvt *pvt)
904 {
905 	int flag;
906 
907 	if (pvt->ext_model >= K8_REV_F)
908 		/* RevF (NPT) and later */
909 		flag = pvt->dclr0 & WIDTH_128;
910 	else
911 		/* RevE and earlier */
912 		flag = pvt->dclr0 & REVE_WIDTH_128;
913 
914 	/* not used */
915 	pvt->dclr1 = 0;
916 
917 	return (flag) ? 2 : 1;
918 }
919 
920 /* On F10h and later ErrAddr is MC4_ADDR[47:1] */
921 static u64 get_error_address(struct mce *m)
922 {
923 	struct cpuinfo_x86 *c = &boot_cpu_data;
924 	u64 addr;
925 	u8 start_bit = 1;
926 	u8 end_bit   = 47;
927 
928 	if (c->x86 == 0xf) {
929 		start_bit = 3;
930 		end_bit   = 39;
931 	}
932 
933 	addr = m->addr & GENMASK(start_bit, end_bit);
934 
935 	/*
936 	 * Erratum 637 workaround
937 	 */
938 	if (c->x86 == 0x15) {
939 		struct amd64_pvt *pvt;
940 		u64 cc6_base, tmp_addr;
941 		u32 tmp;
942 		u8 mce_nid, intlv_en;
943 
944 		if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
945 			return addr;
946 
947 		mce_nid	= amd_get_nb_id(m->extcpu);
948 		pvt	= mcis[mce_nid]->pvt_info;
949 
950 		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
951 		intlv_en = tmp >> 21 & 0x7;
952 
953 		/* add [47:27] + 3 trailing bits */
954 		cc6_base  = (tmp & GENMASK(0, 20)) << 3;
955 
956 		/* reverse and add DramIntlvEn */
957 		cc6_base |= intlv_en ^ 0x7;
958 
959 		/* pin at [47:24] */
960 		cc6_base <<= 24;
961 
962 		if (!intlv_en)
963 			return cc6_base | (addr & GENMASK(0, 23));
964 
965 		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp);
966 
967 							/* faster log2 */
968 		tmp_addr  = (addr & GENMASK(12, 23)) << __fls(intlv_en + 1);
969 
970 		/* OR DramIntlvSel into bits [14:12] */
971 		tmp_addr |= (tmp & GENMASK(21, 23)) >> 9;
972 
973 		/* add remaining [11:0] bits from original MC4_ADDR */
974 		tmp_addr |= addr & GENMASK(0, 11);
975 
976 		return cc6_base | tmp_addr;
977 	}
978 
979 	return addr;
980 }
981 
982 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
983 {
984 	struct cpuinfo_x86 *c = &boot_cpu_data;
985 	int off = range << 3;
986 
987 	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off,  &pvt->ranges[range].base.lo);
988 	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo);
989 
990 	if (c->x86 == 0xf)
991 		return;
992 
993 	if (!dram_rw(pvt, range))
994 		return;
995 
996 	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off,  &pvt->ranges[range].base.hi);
997 	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi);
998 
999 	/* Factor in CC6 save area by reading dst node's limit reg */
1000 	if (c->x86 == 0x15) {
1001 		struct pci_dev *f1 = NULL;
1002 		u8 nid = dram_dst_node(pvt, range);
1003 		u32 llim;
1004 
1005 		f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1));
1006 		if (WARN_ON(!f1))
1007 			return;
1008 
1009 		amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);
1010 
1011 		pvt->ranges[range].lim.lo &= GENMASK(0, 15);
1012 
1013 					    /* {[39:27],111b} */
1014 		pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
1015 
1016 		pvt->ranges[range].lim.hi &= GENMASK(0, 7);
1017 
1018 					    /* [47:40] */
1019 		pvt->ranges[range].lim.hi |= llim >> 13;
1020 
1021 		pci_dev_put(f1);
1022 	}
1023 }
1024 
1025 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1026 				    struct err_info *err)
1027 {
1028 	struct amd64_pvt *pvt = mci->pvt_info;
1029 
1030 	error_address_to_page_and_offset(sys_addr, err);
1031 
1032 	/*
1033 	 * Find out which node the error address belongs to. This may be
1034 	 * different from the node that detected the error.
1035 	 */
1036 	err->src_mci = find_mc_by_sys_addr(mci, sys_addr);
1037 	if (!err->src_mci) {
1038 		amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
1039 			     (unsigned long)sys_addr);
1040 		err->err_code = ERR_NODE;
1041 		return;
1042 	}
1043 
1044 	/* Now map the sys_addr to a CSROW */
1045 	err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr);
1046 	if (err->csrow < 0) {
1047 		err->err_code = ERR_CSROW;
1048 		return;
1049 	}
1050 
1051 	/* CHIPKILL enabled */
1052 	if (pvt->nbcfg & NBCFG_CHIPKILL) {
1053 		err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
1054 		if (err->channel < 0) {
1055 			/*
1056 			 * Syndrome didn't map, so we don't know which of the
1057 			 * 2 DIMMs is in error. So we need to ID 'both' of them
1058 			 * as suspect.
1059 			 */
1060 			amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - "
1061 				      "possible error reporting race\n",
1062 				      err->syndrome);
1063 			err->err_code = ERR_CHANNEL;
1064 			return;
1065 		}
1066 	} else {
1067 		/*
1068 		 * non-chipkill ecc mode
1069 		 *
1070 		 * The k8 documentation is unclear about how to determine the
1071 		 * channel number when using non-chipkill memory.  This method
1072 		 * was obtained from email communication with someone at AMD.
1073 		 * (Wish the email was placed in this comment - norsk)
1074 		 */
1075 		err->channel = ((sys_addr & BIT(3)) != 0);
1076 	}
1077 }
1078 
1079 static int ddr2_cs_size(unsigned i, bool dct_width)
1080 {
1081 	unsigned shift = 0;
1082 
1083 	if (i <= 2)
1084 		shift = i;
1085 	else if (!(i & 0x1))
1086 		shift = i >> 1;
1087 	else
1088 		shift = (i + 1) >> 1;
1089 
1090 	return 128 << (shift + !!dct_width);
1091 }
1092 
1093 static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1094 				  unsigned cs_mode)
1095 {
1096 	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1097 
1098 	if (pvt->ext_model >= K8_REV_F) {
1099 		WARN_ON(cs_mode > 11);
1100 		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1101 	}
1102 	else if (pvt->ext_model >= K8_REV_D) {
1103 		unsigned diff;
1104 		WARN_ON(cs_mode > 10);
1105 
1106 		/*
1107 		 * the below calculation, besides trying to win an obfuscated C
1108 		 * contest, maps cs_mode values to DIMM chip select sizes. The
1109 		 * mappings are:
1110 		 *
1111 		 * cs_mode	CS size (mb)
1112 		 * =======	============
1113 		 * 0		32
1114 		 * 1		64
1115 		 * 2		128
1116 		 * 3		128
1117 		 * 4		256
1118 		 * 5		512
1119 		 * 6		256
1120 		 * 7		512
1121 		 * 8		1024
1122 		 * 9		1024
1123 		 * 10		2048
1124 		 *
1125 		 * Basically, it calculates a value with which to shift the
1126 		 * smallest CS size of 32MB.
1127 		 *
1128 		 * ddr[23]_cs_size have a similar purpose.
1129 		 */
1130 		diff = cs_mode/3 + (unsigned)(cs_mode > 5);
1131 
1132 		return 32 << (cs_mode - diff);
1133 	}
1134 	else {
1135 		WARN_ON(cs_mode > 6);
1136 		return 32 << cs_mode;
1137 	}
1138 }
1139 
1140 /*
1141  * Get the number of DCT channels in use.
1142  *
1143  * Return:
1144  *	number of Memory Channels in operation
1145  * Pass back:
1146  *	contents of the DCL0_LOW register
1147  */
1148 static int f1x_early_channel_count(struct amd64_pvt *pvt)
1149 {
1150 	int i, j, channels = 0;
1151 
1152 	/* On F10h, if we are in 128 bit mode, then we are using 2 channels */
1153 	if (boot_cpu_data.x86 == 0x10 && (pvt->dclr0 & WIDTH_128))
1154 		return 2;
1155 
1156 	/*
1157 	 * Need to check if in unganged mode: In such, there are 2 channels,
1158 	 * but they are not in 128 bit mode and thus the above 'dclr0' status
1159 	 * bit will be OFF.
1160 	 *
1161 	 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
1162 	 * their CSEnable bit on. If so, then SINGLE DIMM case.
1163 	 */
1164 	edac_dbg(0, "Data width is not 128 bits - need more decoding\n");
1165 
1166 	/*
1167 	 * Check DRAM Bank Address Mapping values for each DIMM to see if there
1168 	 * is more than just one DIMM present in unganged mode. Need to check
1169 	 * both controllers since DIMMs can be placed in either one.
1170 	 */
1171 	for (i = 0; i < 2; i++) {
1172 		u32 dbam = (i ? pvt->dbam1 : pvt->dbam0);
1173 
1174 		for (j = 0; j < 4; j++) {
1175 			if (DBAM_DIMM(j, dbam) > 0) {
1176 				channels++;
1177 				break;
1178 			}
1179 		}
1180 	}
1181 
1182 	if (channels > 2)
1183 		channels = 2;
1184 
1185 	amd64_info("MCT channel count: %d\n", channels);
1186 
1187 	return channels;
1188 }
1189 
1190 static int ddr3_cs_size(unsigned i, bool dct_width)
1191 {
1192 	unsigned shift = 0;
1193 	int cs_size = 0;
1194 
1195 	if (i == 0 || i == 3 || i == 4)
1196 		cs_size = -1;
1197 	else if (i <= 2)
1198 		shift = i;
1199 	else if (i == 12)
1200 		shift = 7;
1201 	else if (!(i & 0x1))
1202 		shift = i >> 1;
1203 	else
1204 		shift = (i + 1) >> 1;
1205 
1206 	if (cs_size != -1)
1207 		cs_size = (128 * (1 << !!dct_width)) << shift;
1208 
1209 	return cs_size;
1210 }
1211 
1212 static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1213 				   unsigned cs_mode)
1214 {
1215 	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1216 
1217 	WARN_ON(cs_mode > 11);
1218 
1219 	if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE)
1220 		return ddr3_cs_size(cs_mode, dclr & WIDTH_128);
1221 	else
1222 		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1223 }
1224 
1225 /*
1226  * F15h supports only 64bit DCT interfaces
1227  */
1228 static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1229 				   unsigned cs_mode)
1230 {
1231 	WARN_ON(cs_mode > 12);
1232 
1233 	return ddr3_cs_size(cs_mode, false);
1234 }
1235 
1236 static void read_dram_ctl_register(struct amd64_pvt *pvt)
1237 {
1238 
1239 	if (boot_cpu_data.x86 == 0xf)
1240 		return;
1241 
1242 	if (!amd64_read_dct_pci_cfg(pvt, DCT_SEL_LO, &pvt->dct_sel_lo)) {
1243 		edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n",
1244 			 pvt->dct_sel_lo, dct_sel_baseaddr(pvt));
1245 
1246 		edac_dbg(0, "  DCTs operate in %s mode\n",
1247 			 (dct_ganging_enabled(pvt) ? "ganged" : "unganged"));
1248 
1249 		if (!dct_ganging_enabled(pvt))
1250 			edac_dbg(0, "  Address range split per DCT: %s\n",
1251 				 (dct_high_range_enabled(pvt) ? "yes" : "no"));
1252 
1253 		edac_dbg(0, "  data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
1254 			 (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
1255 			 (dct_memory_cleared(pvt) ? "yes" : "no"));
1256 
1257 		edac_dbg(0, "  channel interleave: %s, "
1258 			 "interleave bits selector: 0x%x\n",
1259 			 (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
1260 			 dct_sel_interleave_addr(pvt));
1261 	}
1262 
1263 	amd64_read_dct_pci_cfg(pvt, DCT_SEL_HI, &pvt->dct_sel_hi);
1264 }
1265 
1266 /*
1267  * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory
1268  * Interleaving Modes.
1269  */
1270 static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1271 				bool hi_range_sel, u8 intlv_en)
1272 {
1273 	u8 dct_sel_high = (pvt->dct_sel_lo >> 1) & 1;
1274 
1275 	if (dct_ganging_enabled(pvt))
1276 		return 0;
1277 
1278 	if (hi_range_sel)
1279 		return dct_sel_high;
1280 
1281 	/*
1282 	 * see F2x110[DctSelIntLvAddr] - channel interleave mode
1283 	 */
1284 	if (dct_interleave_enabled(pvt)) {
1285 		u8 intlv_addr = dct_sel_interleave_addr(pvt);
1286 
1287 		/* return DCT select function: 0=DCT0, 1=DCT1 */
1288 		if (!intlv_addr)
1289 			return sys_addr >> 6 & 1;
1290 
1291 		if (intlv_addr & 0x2) {
1292 			u8 shift = intlv_addr & 0x1 ? 9 : 6;
1293 			u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
1294 
1295 			return ((sys_addr >> shift) & 1) ^ temp;
1296 		}
1297 
1298 		return (sys_addr >> (12 + hweight8(intlv_en))) & 1;
1299 	}
1300 
1301 	if (dct_high_range_enabled(pvt))
1302 		return ~dct_sel_high & 1;
1303 
1304 	return 0;
1305 }
1306 
1307 /* Convert the sys_addr to the normalized DCT address */
1308 static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range,
1309 				 u64 sys_addr, bool hi_rng,
1310 				 u32 dct_sel_base_addr)
1311 {
1312 	u64 chan_off;
1313 	u64 dram_base		= get_dram_base(pvt, range);
1314 	u64 hole_off		= f10_dhar_offset(pvt);
1315 	u64 dct_sel_base_off	= (pvt->dct_sel_hi & 0xFFFFFC00) << 16;
1316 
1317 	if (hi_rng) {
1318 		/*
1319 		 * if
1320 		 * base address of high range is below 4Gb
1321 		 * (bits [47:27] at [31:11])
1322 		 * DRAM address space on this DCT is hoisted above 4Gb	&&
1323 		 * sys_addr > 4Gb
1324 		 *
1325 		 *	remove hole offset from sys_addr
1326 		 * else
1327 		 *	remove high range offset from sys_addr
1328 		 */
1329 		if ((!(dct_sel_base_addr >> 16) ||
1330 		     dct_sel_base_addr < dhar_base(pvt)) &&
1331 		    dhar_valid(pvt) &&
1332 		    (sys_addr >= BIT_64(32)))
1333 			chan_off = hole_off;
1334 		else
1335 			chan_off = dct_sel_base_off;
1336 	} else {
1337 		/*
1338 		 * if
1339 		 * we have a valid hole		&&
1340 		 * sys_addr > 4Gb
1341 		 *
1342 		 *	remove hole
1343 		 * else
1344 		 *	remove dram base to normalize to DCT address
1345 		 */
1346 		if (dhar_valid(pvt) && (sys_addr >= BIT_64(32)))
1347 			chan_off = hole_off;
1348 		else
1349 			chan_off = dram_base;
1350 	}
1351 
1352 	return (sys_addr & GENMASK(6,47)) - (chan_off & GENMASK(23,47));
1353 }
1354 
1355 /*
1356  * checks if the csrow passed in is marked as SPARED, if so returns the new
1357  * spare row
1358  */
1359 static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
1360 {
1361 	int tmp_cs;
1362 
1363 	if (online_spare_swap_done(pvt, dct) &&
1364 	    csrow == online_spare_bad_dramcs(pvt, dct)) {
1365 
1366 		for_each_chip_select(tmp_cs, dct, pvt) {
1367 			if (chip_select_base(tmp_cs, dct, pvt) & 0x2) {
1368 				csrow = tmp_cs;
1369 				break;
1370 			}
1371 		}
1372 	}
1373 	return csrow;
1374 }
1375 
1376 /*
1377  * Iterate over the DRAM DCT "base" and "mask" registers looking for a
1378  * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
1379  *
1380  * Return:
1381  *	-EINVAL:  NOT FOUND
1382  *	0..csrow = Chip-Select Row
1383  */
1384 static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct)
1385 {
1386 	struct mem_ctl_info *mci;
1387 	struct amd64_pvt *pvt;
1388 	u64 cs_base, cs_mask;
1389 	int cs_found = -EINVAL;
1390 	int csrow;
1391 
1392 	mci = mcis[nid];
1393 	if (!mci)
1394 		return cs_found;
1395 
1396 	pvt = mci->pvt_info;
1397 
1398 	edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr, dct);
1399 
1400 	for_each_chip_select(csrow, dct, pvt) {
1401 		if (!csrow_enabled(csrow, dct, pvt))
1402 			continue;
1403 
1404 		get_cs_base_and_mask(pvt, csrow, dct, &cs_base, &cs_mask);
1405 
1406 		edac_dbg(1, "    CSROW=%d CSBase=0x%llx CSMask=0x%llx\n",
1407 			 csrow, cs_base, cs_mask);
1408 
1409 		cs_mask = ~cs_mask;
1410 
1411 		edac_dbg(1, "    (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n",
1412 			 (in_addr & cs_mask), (cs_base & cs_mask));
1413 
1414 		if ((in_addr & cs_mask) == (cs_base & cs_mask)) {
1415 			cs_found = f10_process_possible_spare(pvt, dct, csrow);
1416 
1417 			edac_dbg(1, " MATCH csrow=%d\n", cs_found);
1418 			break;
1419 		}
1420 	}
1421 	return cs_found;
1422 }
1423 
1424 /*
1425  * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is
1426  * swapped with a region located at the bottom of memory so that the GPU can use
1427  * the interleaved region and thus two channels.
1428  */
1429 static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
1430 {
1431 	u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr;
1432 
1433 	if (boot_cpu_data.x86 == 0x10) {
1434 		/* only revC3 and revE have that feature */
1435 		if (boot_cpu_data.x86_model < 4 ||
1436 		    (boot_cpu_data.x86_model < 0xa &&
1437 		     boot_cpu_data.x86_mask < 3))
1438 			return sys_addr;
1439 	}
1440 
1441 	amd64_read_dct_pci_cfg(pvt, SWAP_INTLV_REG, &swap_reg);
1442 
1443 	if (!(swap_reg & 0x1))
1444 		return sys_addr;
1445 
1446 	swap_base	= (swap_reg >> 3) & 0x7f;
1447 	swap_limit	= (swap_reg >> 11) & 0x7f;
1448 	rgn_size	= (swap_reg >> 20) & 0x7f;
1449 	tmp_addr	= sys_addr >> 27;
1450 
1451 	if (!(sys_addr >> 34) &&
1452 	    (((tmp_addr >= swap_base) &&
1453 	     (tmp_addr <= swap_limit)) ||
1454 	     (tmp_addr < rgn_size)))
1455 		return sys_addr ^ (u64)swap_base << 27;
1456 
1457 	return sys_addr;
1458 }
1459 
1460 /* For a given @dram_range, check if @sys_addr falls within it. */
1461 static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1462 				  u64 sys_addr, int *chan_sel)
1463 {
1464 	int cs_found = -EINVAL;
1465 	u64 chan_addr;
1466 	u32 dct_sel_base;
1467 	u8 channel;
1468 	bool high_range = false;
1469 
1470 	u8 node_id    = dram_dst_node(pvt, range);
1471 	u8 intlv_en   = dram_intlv_en(pvt, range);
1472 	u32 intlv_sel = dram_intlv_sel(pvt, range);
1473 
1474 	edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1475 		 range, sys_addr, get_dram_limit(pvt, range));
1476 
1477 	if (dhar_valid(pvt) &&
1478 	    dhar_base(pvt) <= sys_addr &&
1479 	    sys_addr < BIT_64(32)) {
1480 		amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1481 			    sys_addr);
1482 		return -EINVAL;
1483 	}
1484 
1485 	if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en)))
1486 		return -EINVAL;
1487 
1488 	sys_addr = f1x_swap_interleaved_region(pvt, sys_addr);
1489 
1490 	dct_sel_base = dct_sel_baseaddr(pvt);
1491 
1492 	/*
1493 	 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
1494 	 * select between DCT0 and DCT1.
1495 	 */
1496 	if (dct_high_range_enabled(pvt) &&
1497 	   !dct_ganging_enabled(pvt) &&
1498 	   ((sys_addr >> 27) >= (dct_sel_base >> 11)))
1499 		high_range = true;
1500 
1501 	channel = f1x_determine_channel(pvt, sys_addr, high_range, intlv_en);
1502 
1503 	chan_addr = f1x_get_norm_dct_addr(pvt, range, sys_addr,
1504 					  high_range, dct_sel_base);
1505 
1506 	/* Remove node interleaving, see F1x120 */
1507 	if (intlv_en)
1508 		chan_addr = ((chan_addr >> (12 + hweight8(intlv_en))) << 12) |
1509 			    (chan_addr & 0xfff);
1510 
1511 	/* remove channel interleave */
1512 	if (dct_interleave_enabled(pvt) &&
1513 	   !dct_high_range_enabled(pvt) &&
1514 	   !dct_ganging_enabled(pvt)) {
1515 
1516 		if (dct_sel_interleave_addr(pvt) != 1) {
1517 			if (dct_sel_interleave_addr(pvt) == 0x3)
1518 				/* hash 9 */
1519 				chan_addr = ((chan_addr >> 10) << 9) |
1520 					     (chan_addr & 0x1ff);
1521 			else
1522 				/* A[6] or hash 6 */
1523 				chan_addr = ((chan_addr >> 7) << 6) |
1524 					     (chan_addr & 0x3f);
1525 		} else
1526 			/* A[12] */
1527 			chan_addr = ((chan_addr >> 13) << 12) |
1528 				     (chan_addr & 0xfff);
1529 	}
1530 
1531 	edac_dbg(1, "   Normalized DCT addr: 0x%llx\n", chan_addr);
1532 
1533 	cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
1534 
1535 	if (cs_found >= 0)
1536 		*chan_sel = channel;
1537 
1538 	return cs_found;
1539 }
1540 
1541 static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
1542 				       int *chan_sel)
1543 {
1544 	int cs_found = -EINVAL;
1545 	unsigned range;
1546 
1547 	for (range = 0; range < DRAM_RANGES; range++) {
1548 
1549 		if (!dram_rw(pvt, range))
1550 			continue;
1551 
1552 		if ((get_dram_base(pvt, range)  <= sys_addr) &&
1553 		    (get_dram_limit(pvt, range) >= sys_addr)) {
1554 
1555 			cs_found = f1x_match_to_this_node(pvt, range,
1556 							  sys_addr, chan_sel);
1557 			if (cs_found >= 0)
1558 				break;
1559 		}
1560 	}
1561 	return cs_found;
1562 }
1563 
1564 /*
1565  * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
1566  * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
1567  *
1568  * The @sys_addr is usually an error address received from the hardware
1569  * (MCX_ADDR).
1570  */
1571 static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1572 				     struct err_info *err)
1573 {
1574 	struct amd64_pvt *pvt = mci->pvt_info;
1575 
1576 	error_address_to_page_and_offset(sys_addr, err);
1577 
1578 	err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel);
1579 	if (err->csrow < 0) {
1580 		err->err_code = ERR_CSROW;
1581 		return;
1582 	}
1583 
1584 	/*
1585 	 * We need the syndromes for channel detection only when we're
1586 	 * ganged. Otherwise @chan should already contain the channel at
1587 	 * this point.
1588 	 */
1589 	if (dct_ganging_enabled(pvt))
1590 		err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
1591 }
1592 
1593 /*
1594  * debug routine to display the memory sizes of all logical DIMMs and its
1595  * CSROWs
1596  */
1597 static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
1598 {
1599 	int dimm, size0, size1;
1600 	u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
1601 	u32 dbam  = ctrl ? pvt->dbam1 : pvt->dbam0;
1602 
1603 	if (boot_cpu_data.x86 == 0xf) {
1604 		/* K8 families < revF not supported yet */
1605 	       if (pvt->ext_model < K8_REV_F)
1606 			return;
1607 	       else
1608 		       WARN_ON(ctrl != 0);
1609 	}
1610 
1611 	dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 : pvt->dbam0;
1612 	dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->csels[1].csbases
1613 						   : pvt->csels[0].csbases;
1614 
1615 	edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
1616 		 ctrl, dbam);
1617 
1618 	edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);
1619 
1620 	/* Dump memory sizes for DIMM and its CSROWs */
1621 	for (dimm = 0; dimm < 4; dimm++) {
1622 
1623 		size0 = 0;
1624 		if (dcsb[dimm*2] & DCSB_CS_ENABLE)
1625 			size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
1626 						     DBAM_DIMM(dimm, dbam));
1627 
1628 		size1 = 0;
1629 		if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
1630 			size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
1631 						     DBAM_DIMM(dimm, dbam));
1632 
1633 		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
1634 				dimm * 2,     size0,
1635 				dimm * 2 + 1, size1);
1636 	}
1637 }
1638 
1639 static struct amd64_family_type amd64_family_types[] = {
1640 	[K8_CPUS] = {
1641 		.ctl_name = "K8",
1642 		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
1643 		.f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
1644 		.ops = {
1645 			.early_channel_count	= k8_early_channel_count,
1646 			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
1647 			.dbam_to_cs		= k8_dbam_to_chip_select,
1648 			.read_dct_pci_cfg	= k8_read_dct_pci_cfg,
1649 		}
1650 	},
1651 	[F10_CPUS] = {
1652 		.ctl_name = "F10h",
1653 		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
1654 		.f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
1655 		.ops = {
1656 			.early_channel_count	= f1x_early_channel_count,
1657 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1658 			.dbam_to_cs		= f10_dbam_to_chip_select,
1659 			.read_dct_pci_cfg	= f10_read_dct_pci_cfg,
1660 		}
1661 	},
1662 	[F15_CPUS] = {
1663 		.ctl_name = "F15h",
1664 		.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
1665 		.f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3,
1666 		.ops = {
1667 			.early_channel_count	= f1x_early_channel_count,
1668 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1669 			.dbam_to_cs		= f15_dbam_to_chip_select,
1670 			.read_dct_pci_cfg	= f15_read_dct_pci_cfg,
1671 		}
1672 	},
1673 };
1674 
1675 static struct pci_dev *pci_get_related_function(unsigned int vendor,
1676 						unsigned int device,
1677 						struct pci_dev *related)
1678 {
1679 	struct pci_dev *dev = NULL;
1680 
1681 	dev = pci_get_device(vendor, device, dev);
1682 	while (dev) {
1683 		if ((dev->bus->number == related->bus->number) &&
1684 		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
1685 			break;
1686 		dev = pci_get_device(vendor, device, dev);
1687 	}
1688 
1689 	return dev;
1690 }
1691 
1692 /*
1693  * These are tables of eigenvectors (one per line) which can be used for the
1694  * construction of the syndrome tables. The modified syndrome search algorithm
1695  * uses those to find the symbol in error and thus the DIMM.
1696  *
1697  * Algorithm courtesy of Ross LaFetra from AMD.
1698  */
1699 static u16 x4_vectors[] = {
1700 	0x2f57, 0x1afe, 0x66cc, 0xdd88,
1701 	0x11eb, 0x3396, 0x7f4c, 0xeac8,
1702 	0x0001, 0x0002, 0x0004, 0x0008,
1703 	0x1013, 0x3032, 0x4044, 0x8088,
1704 	0x106b, 0x30d6, 0x70fc, 0xe0a8,
1705 	0x4857, 0xc4fe, 0x13cc, 0x3288,
1706 	0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
1707 	0x1f39, 0x251e, 0xbd6c, 0x6bd8,
1708 	0x15c1, 0x2a42, 0x89ac, 0x4758,
1709 	0x2b03, 0x1602, 0x4f0c, 0xca08,
1710 	0x1f07, 0x3a0e, 0x6b04, 0xbd08,
1711 	0x8ba7, 0x465e, 0x244c, 0x1cc8,
1712 	0x2b87, 0x164e, 0x642c, 0xdc18,
1713 	0x40b9, 0x80de, 0x1094, 0x20e8,
1714 	0x27db, 0x1eb6, 0x9dac, 0x7b58,
1715 	0x11c1, 0x2242, 0x84ac, 0x4c58,
1716 	0x1be5, 0x2d7a, 0x5e34, 0xa718,
1717 	0x4b39, 0x8d1e, 0x14b4, 0x28d8,
1718 	0x4c97, 0xc87e, 0x11fc, 0x33a8,
1719 	0x8e97, 0x497e, 0x2ffc, 0x1aa8,
1720 	0x16b3, 0x3d62, 0x4f34, 0x8518,
1721 	0x1e2f, 0x391a, 0x5cac, 0xf858,
1722 	0x1d9f, 0x3b7a, 0x572c, 0xfe18,
1723 	0x15f5, 0x2a5a, 0x5264, 0xa3b8,
1724 	0x1dbb, 0x3b66, 0x715c, 0xe3f8,
1725 	0x4397, 0xc27e, 0x17fc, 0x3ea8,
1726 	0x1617, 0x3d3e, 0x6464, 0xb8b8,
1727 	0x23ff, 0x12aa, 0xab6c, 0x56d8,
1728 	0x2dfb, 0x1ba6, 0x913c, 0x7328,
1729 	0x185d, 0x2ca6, 0x7914, 0x9e28,
1730 	0x171b, 0x3e36, 0x7d7c, 0xebe8,
1731 	0x4199, 0x82ee, 0x19f4, 0x2e58,
1732 	0x4807, 0xc40e, 0x130c, 0x3208,
1733 	0x1905, 0x2e0a, 0x5804, 0xac08,
1734 	0x213f, 0x132a, 0xadfc, 0x5ba8,
1735 	0x19a9, 0x2efe, 0xb5cc, 0x6f88,
1736 };
1737 
1738 static u16 x8_vectors[] = {
1739 	0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
1740 	0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
1741 	0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
1742 	0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80,
1743 	0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780,
1744 	0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080,
1745 	0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080,
1746 	0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080,
1747 	0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80,
1748 	0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580,
1749 	0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880,
1750 	0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280,
1751 	0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180,
1752 	0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580,
1753 	0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280,
1754 	0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180,
1755 	0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080,
1756 	0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
1757 	0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
1758 };
1759 
1760 static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs,
1761 			   unsigned v_dim)
1762 {
1763 	unsigned int i, err_sym;
1764 
1765 	for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) {
1766 		u16 s = syndrome;
1767 		unsigned v_idx =  err_sym * v_dim;
1768 		unsigned v_end = (err_sym + 1) * v_dim;
1769 
1770 		/* walk over all 16 bits of the syndrome */
1771 		for (i = 1; i < (1U << 16); i <<= 1) {
1772 
1773 			/* if bit is set in that eigenvector... */
1774 			if (v_idx < v_end && vectors[v_idx] & i) {
1775 				u16 ev_comp = vectors[v_idx++];
1776 
1777 				/* ... and bit set in the modified syndrome, */
1778 				if (s & i) {
1779 					/* remove it. */
1780 					s ^= ev_comp;
1781 
1782 					if (!s)
1783 						return err_sym;
1784 				}
1785 
1786 			} else if (s & i)
1787 				/* can't get to zero, move to next symbol */
1788 				break;
1789 		}
1790 	}
1791 
1792 	edac_dbg(0, "syndrome(%x) not found\n", syndrome);
1793 	return -1;
1794 }
1795 
1796 static int map_err_sym_to_channel(int err_sym, int sym_size)
1797 {
1798 	if (sym_size == 4)
1799 		switch (err_sym) {
1800 		case 0x20:
1801 		case 0x21:
1802 			return 0;
1803 			break;
1804 		case 0x22:
1805 		case 0x23:
1806 			return 1;
1807 			break;
1808 		default:
1809 			return err_sym >> 4;
1810 			break;
1811 		}
1812 	/* x8 symbols */
1813 	else
1814 		switch (err_sym) {
1815 		/* imaginary bits not in a DIMM */
1816 		case 0x10:
1817 			WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n",
1818 					  err_sym);
1819 			return -1;
1820 			break;
1821 
1822 		case 0x11:
1823 			return 0;
1824 			break;
1825 		case 0x12:
1826 			return 1;
1827 			break;
1828 		default:
1829 			return err_sym >> 3;
1830 			break;
1831 		}
1832 	return -1;
1833 }
1834 
1835 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
1836 {
1837 	struct amd64_pvt *pvt = mci->pvt_info;
1838 	int err_sym = -1;
1839 
1840 	if (pvt->ecc_sym_sz == 8)
1841 		err_sym = decode_syndrome(syndrome, x8_vectors,
1842 					  ARRAY_SIZE(x8_vectors),
1843 					  pvt->ecc_sym_sz);
1844 	else if (pvt->ecc_sym_sz == 4)
1845 		err_sym = decode_syndrome(syndrome, x4_vectors,
1846 					  ARRAY_SIZE(x4_vectors),
1847 					  pvt->ecc_sym_sz);
1848 	else {
1849 		amd64_warn("Illegal syndrome type: %u\n", pvt->ecc_sym_sz);
1850 		return err_sym;
1851 	}
1852 
1853 	return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
1854 }
1855 
1856 static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err,
1857 			    u8 ecc_type)
1858 {
1859 	enum hw_event_mc_err_type err_type;
1860 	const char *string;
1861 
1862 	if (ecc_type == 2)
1863 		err_type = HW_EVENT_ERR_CORRECTED;
1864 	else if (ecc_type == 1)
1865 		err_type = HW_EVENT_ERR_UNCORRECTED;
1866 	else {
1867 		WARN(1, "Something is rotten in the state of Denmark.\n");
1868 		return;
1869 	}
1870 
1871 	switch (err->err_code) {
1872 	case DECODE_OK:
1873 		string = "";
1874 		break;
1875 	case ERR_NODE:
1876 		string = "Failed to map error addr to a node";
1877 		break;
1878 	case ERR_CSROW:
1879 		string = "Failed to map error addr to a csrow";
1880 		break;
1881 	case ERR_CHANNEL:
1882 		string = "unknown syndrome - possible error reporting race";
1883 		break;
1884 	default:
1885 		string = "WTF error";
1886 		break;
1887 	}
1888 
1889 	edac_mc_handle_error(err_type, mci, 1,
1890 			     err->page, err->offset, err->syndrome,
1891 			     err->csrow, err->channel, -1,
1892 			     string, "");
1893 }
1894 
1895 static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
1896 					    struct mce *m)
1897 {
1898 	struct amd64_pvt *pvt = mci->pvt_info;
1899 	u8 ecc_type = (m->status >> 45) & 0x3;
1900 	u8 xec = XEC(m->status, 0x1f);
1901 	u16 ec = EC(m->status);
1902 	u64 sys_addr;
1903 	struct err_info err;
1904 
1905 	/* Bail out early if this was an 'observed' error */
1906 	if (PP(ec) == NBSL_PP_OBS)
1907 		return;
1908 
1909 	/* Do only ECC errors */
1910 	if (xec && xec != F10_NBSL_EXT_ERR_ECC)
1911 		return;
1912 
1913 	memset(&err, 0, sizeof(err));
1914 
1915 	sys_addr = get_error_address(m);
1916 
1917 	if (ecc_type == 2)
1918 		err.syndrome = extract_syndrome(m->status);
1919 
1920 	pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
1921 
1922 	__log_bus_error(mci, &err, ecc_type);
1923 }
1924 
1925 void amd64_decode_bus_error(int node_id, struct mce *m)
1926 {
1927 	__amd64_decode_bus_error(mcis[node_id], m);
1928 }
1929 
1930 /*
1931  * Use pvt->F2 which contains the F2 CPU PCI device to get the related
1932  * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
1933  */
1934 static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
1935 {
1936 	/* Reserve the ADDRESS MAP Device */
1937 	pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
1938 	if (!pvt->F1) {
1939 		amd64_err("error address map device not found: "
1940 			  "vendor %x device 0x%x (broken BIOS?)\n",
1941 			  PCI_VENDOR_ID_AMD, f1_id);
1942 		return -ENODEV;
1943 	}
1944 
1945 	/* Reserve the MISC Device */
1946 	pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
1947 	if (!pvt->F3) {
1948 		pci_dev_put(pvt->F1);
1949 		pvt->F1 = NULL;
1950 
1951 		amd64_err("error F3 device not found: "
1952 			  "vendor %x device 0x%x (broken BIOS?)\n",
1953 			  PCI_VENDOR_ID_AMD, f3_id);
1954 
1955 		return -ENODEV;
1956 	}
1957 	edac_dbg(1, "F1: %s\n", pci_name(pvt->F1));
1958 	edac_dbg(1, "F2: %s\n", pci_name(pvt->F2));
1959 	edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
1960 
1961 	return 0;
1962 }
1963 
1964 static void free_mc_sibling_devs(struct amd64_pvt *pvt)
1965 {
1966 	pci_dev_put(pvt->F1);
1967 	pci_dev_put(pvt->F3);
1968 }
1969 
1970 /*
1971  * Retrieve the hardware registers of the memory controller (this includes the
1972  * 'Address Map' and 'Misc' device regs)
1973  */
1974 static void read_mc_regs(struct amd64_pvt *pvt)
1975 {
1976 	struct cpuinfo_x86 *c = &boot_cpu_data;
1977 	u64 msr_val;
1978 	u32 tmp;
1979 	unsigned range;
1980 
1981 	/*
1982 	 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
1983 	 * those are Read-As-Zero
1984 	 */
1985 	rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
1986 	edac_dbg(0, "  TOP_MEM:  0x%016llx\n", pvt->top_mem);
1987 
1988 	/* check first whether TOP_MEM2 is enabled */
1989 	rdmsrl(MSR_K8_SYSCFG, msr_val);
1990 	if (msr_val & (1U << 21)) {
1991 		rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
1992 		edac_dbg(0, "  TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
1993 	} else
1994 		edac_dbg(0, "  TOP_MEM2 disabled\n");
1995 
1996 	amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
1997 
1998 	read_dram_ctl_register(pvt);
1999 
2000 	for (range = 0; range < DRAM_RANGES; range++) {
2001 		u8 rw;
2002 
2003 		/* read settings for this DRAM range */
2004 		read_dram_base_limit_regs(pvt, range);
2005 
2006 		rw = dram_rw(pvt, range);
2007 		if (!rw)
2008 			continue;
2009 
2010 		edac_dbg(1, "  DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n",
2011 			 range,
2012 			 get_dram_base(pvt, range),
2013 			 get_dram_limit(pvt, range));
2014 
2015 		edac_dbg(1, "   IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n",
2016 			 dram_intlv_en(pvt, range) ? "Enabled" : "Disabled",
2017 			 (rw & 0x1) ? "R" : "-",
2018 			 (rw & 0x2) ? "W" : "-",
2019 			 dram_intlv_sel(pvt, range),
2020 			 dram_dst_node(pvt, range));
2021 	}
2022 
2023 	read_dct_base_mask(pvt);
2024 
2025 	amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar);
2026 	amd64_read_dct_pci_cfg(pvt, DBAM0, &pvt->dbam0);
2027 
2028 	amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare);
2029 
2030 	amd64_read_dct_pci_cfg(pvt, DCLR0, &pvt->dclr0);
2031 	amd64_read_dct_pci_cfg(pvt, DCHR0, &pvt->dchr0);
2032 
2033 	if (!dct_ganging_enabled(pvt)) {
2034 		amd64_read_dct_pci_cfg(pvt, DCLR1, &pvt->dclr1);
2035 		amd64_read_dct_pci_cfg(pvt, DCHR1, &pvt->dchr1);
2036 	}
2037 
2038 	pvt->ecc_sym_sz = 4;
2039 
2040 	if (c->x86 >= 0x10) {
2041 		amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
2042 		amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1);
2043 
2044 		/* F10h, revD and later can do x8 ECC too */
2045 		if ((c->x86 > 0x10 || c->x86_model > 7) && tmp & BIT(25))
2046 			pvt->ecc_sym_sz = 8;
2047 	}
2048 	dump_misc_regs(pvt);
2049 }
2050 
2051 /*
2052  * NOTE: CPU Revision Dependent code
2053  *
2054  * Input:
2055  *	@csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1)
2056  *	k8 private pointer to -->
2057  *			DRAM Bank Address mapping register
2058  *			node_id
2059  *			DCL register where dual_channel_active is
2060  *
2061  * The DBAM register consists of 4 sets of 4 bits each definitions:
2062  *
2063  * Bits:	CSROWs
2064  * 0-3		CSROWs 0 and 1
2065  * 4-7		CSROWs 2 and 3
2066  * 8-11		CSROWs 4 and 5
2067  * 12-15	CSROWs 6 and 7
2068  *
2069  * Values range from: 0 to 15
2070  * The meaning of the values depends on CPU revision and dual-channel state,
2071  * see relevant BKDG more info.
2072  *
2073  * The memory controller provides for total of only 8 CSROWs in its current
2074  * architecture. Each "pair" of CSROWs normally represents just one DIMM in
2075  * single channel or two (2) DIMMs in dual channel mode.
2076  *
2077  * The following code logic collapses the various tables for CSROW based on CPU
2078  * revision.
2079  *
2080  * Returns:
2081  *	The number of PAGE_SIZE pages on the specified CSROW number it
2082  *	encompasses
2083  *
2084  */
2085 static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
2086 {
2087 	u32 cs_mode, nr_pages;
2088 	u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
2089 
2090 
2091 	/*
2092 	 * The math on this doesn't look right on the surface because x/2*4 can
2093 	 * be simplified to x*2 but this expression makes use of the fact that
2094 	 * it is integral math where 1/2=0. This intermediate value becomes the
2095 	 * number of bits to shift the DBAM register to extract the proper CSROW
2096 	 * field.
2097 	 */
2098 	cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
2099 
2100 	nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
2101 
2102 	edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
2103 		    csrow_nr, dct,  cs_mode);
2104 	edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
2105 
2106 	return nr_pages;
2107 }
2108 
2109 /*
2110  * Initialize the array of csrow attribute instances, based on the values
2111  * from pci config hardware registers.
2112  */
2113 static int init_csrows(struct mem_ctl_info *mci)
2114 {
2115 	struct amd64_pvt *pvt = mci->pvt_info;
2116 	struct csrow_info *csrow;
2117 	struct dimm_info *dimm;
2118 	enum edac_type edac_mode;
2119 	enum mem_type mtype;
2120 	int i, j, empty = 1;
2121 	int nr_pages = 0;
2122 	u32 val;
2123 
2124 	amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
2125 
2126 	pvt->nbcfg = val;
2127 
2128 	edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2129 		 pvt->mc_node_id, val,
2130 		 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
2131 
2132 	/*
2133 	 * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
2134 	 */
2135 	for_each_chip_select(i, 0, pvt) {
2136 		bool row_dct0 = !!csrow_enabled(i, 0, pvt);
2137 		bool row_dct1 = false;
2138 
2139 		if (boot_cpu_data.x86 != 0xf)
2140 			row_dct1 = !!csrow_enabled(i, 1, pvt);
2141 
2142 		if (!row_dct0 && !row_dct1)
2143 			continue;
2144 
2145 		csrow = mci->csrows[i];
2146 		empty = 0;
2147 
2148 		edac_dbg(1, "MC node: %d, csrow: %d\n",
2149 			    pvt->mc_node_id, i);
2150 
2151 		if (row_dct0)
2152 			nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
2153 
2154 		/* K8 has only one DCT */
2155 		if (boot_cpu_data.x86 != 0xf && row_dct1)
2156 			nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
2157 
2158 		mtype = amd64_determine_memory_type(pvt, i);
2159 
2160 		edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
2161 
2162 		/*
2163 		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
2164 		 */
2165 		if (pvt->nbcfg & NBCFG_ECC_ENABLE)
2166 			edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ?
2167 				    EDAC_S4ECD4ED : EDAC_SECDED;
2168 		else
2169 			edac_mode = EDAC_NONE;
2170 
2171 		for (j = 0; j < pvt->channel_count; j++) {
2172 			dimm = csrow->channels[j]->dimm;
2173 			dimm->mtype = mtype;
2174 			dimm->edac_mode = edac_mode;
2175 			dimm->nr_pages = nr_pages;
2176 		}
2177 		csrow->nr_pages = nr_pages;
2178 	}
2179 
2180 	return empty;
2181 }
2182 
2183 /* get all cores on this DCT */
2184 static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid)
2185 {
2186 	int cpu;
2187 
2188 	for_each_online_cpu(cpu)
2189 		if (amd_get_nb_id(cpu) == nid)
2190 			cpumask_set_cpu(cpu, mask);
2191 }
2192 
2193 /* check MCG_CTL on all the cpus on this node */
2194 static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid)
2195 {
2196 	cpumask_var_t mask;
2197 	int cpu, nbe;
2198 	bool ret = false;
2199 
2200 	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
2201 		amd64_warn("%s: Error allocating mask\n", __func__);
2202 		return false;
2203 	}
2204 
2205 	get_cpus_on_this_dct_cpumask(mask, nid);
2206 
2207 	rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs);
2208 
2209 	for_each_cpu(cpu, mask) {
2210 		struct msr *reg = per_cpu_ptr(msrs, cpu);
2211 		nbe = reg->l & MSR_MCGCTL_NBE;
2212 
2213 		edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2214 			 cpu, reg->q,
2215 			 (nbe ? "enabled" : "disabled"));
2216 
2217 		if (!nbe)
2218 			goto out;
2219 	}
2220 	ret = true;
2221 
2222 out:
2223 	free_cpumask_var(mask);
2224 	return ret;
2225 }
2226 
2227 static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
2228 {
2229 	cpumask_var_t cmask;
2230 	int cpu;
2231 
2232 	if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
2233 		amd64_warn("%s: error allocating mask\n", __func__);
2234 		return false;
2235 	}
2236 
2237 	get_cpus_on_this_dct_cpumask(cmask, nid);
2238 
2239 	rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2240 
2241 	for_each_cpu(cpu, cmask) {
2242 
2243 		struct msr *reg = per_cpu_ptr(msrs, cpu);
2244 
2245 		if (on) {
2246 			if (reg->l & MSR_MCGCTL_NBE)
2247 				s->flags.nb_mce_enable = 1;
2248 
2249 			reg->l |= MSR_MCGCTL_NBE;
2250 		} else {
2251 			/*
2252 			 * Turn off NB MCE reporting only when it was off before
2253 			 */
2254 			if (!s->flags.nb_mce_enable)
2255 				reg->l &= ~MSR_MCGCTL_NBE;
2256 		}
2257 	}
2258 	wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2259 
2260 	free_cpumask_var(cmask);
2261 
2262 	return 0;
2263 }
2264 
2265 static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
2266 				       struct pci_dev *F3)
2267 {
2268 	bool ret = true;
2269 	u32 value, mask = 0x3;		/* UECC/CECC enable */
2270 
2271 	if (toggle_ecc_err_reporting(s, nid, ON)) {
2272 		amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
2273 		return false;
2274 	}
2275 
2276 	amd64_read_pci_cfg(F3, NBCTL, &value);
2277 
2278 	s->old_nbctl   = value & mask;
2279 	s->nbctl_valid = true;
2280 
2281 	value |= mask;
2282 	amd64_write_pci_cfg(F3, NBCTL, value);
2283 
2284 	amd64_read_pci_cfg(F3, NBCFG, &value);
2285 
2286 	edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
2287 		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2288 
2289 	if (!(value & NBCFG_ECC_ENABLE)) {
2290 		amd64_warn("DRAM ECC disabled on this node, enabling...\n");
2291 
2292 		s->flags.nb_ecc_prev = 0;
2293 
2294 		/* Attempt to turn on DRAM ECC Enable */
2295 		value |= NBCFG_ECC_ENABLE;
2296 		amd64_write_pci_cfg(F3, NBCFG, value);
2297 
2298 		amd64_read_pci_cfg(F3, NBCFG, &value);
2299 
2300 		if (!(value & NBCFG_ECC_ENABLE)) {
2301 			amd64_warn("Hardware rejected DRAM ECC enable,"
2302 				   "check memory DIMM configuration.\n");
2303 			ret = false;
2304 		} else {
2305 			amd64_info("Hardware accepted DRAM ECC Enable\n");
2306 		}
2307 	} else {
2308 		s->flags.nb_ecc_prev = 1;
2309 	}
2310 
2311 	edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
2312 		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2313 
2314 	return ret;
2315 }
2316 
2317 static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
2318 					struct pci_dev *F3)
2319 {
2320 	u32 value, mask = 0x3;		/* UECC/CECC enable */
2321 
2322 
2323 	if (!s->nbctl_valid)
2324 		return;
2325 
2326 	amd64_read_pci_cfg(F3, NBCTL, &value);
2327 	value &= ~mask;
2328 	value |= s->old_nbctl;
2329 
2330 	amd64_write_pci_cfg(F3, NBCTL, value);
2331 
2332 	/* restore previous BIOS DRAM ECC "off" setting we force-enabled */
2333 	if (!s->flags.nb_ecc_prev) {
2334 		amd64_read_pci_cfg(F3, NBCFG, &value);
2335 		value &= ~NBCFG_ECC_ENABLE;
2336 		amd64_write_pci_cfg(F3, NBCFG, value);
2337 	}
2338 
2339 	/* restore the NB Enable MCGCTL bit */
2340 	if (toggle_ecc_err_reporting(s, nid, OFF))
2341 		amd64_warn("Error restoring NB MCGCTL settings!\n");
2342 }
2343 
2344 /*
2345  * EDAC requires that the BIOS have ECC enabled before
2346  * taking over the processing of ECC errors. A command line
2347  * option allows to force-enable hardware ECC later in
2348  * enable_ecc_error_reporting().
2349  */
2350 static const char *ecc_msg =
2351 	"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
2352 	" Either enable ECC checking or force module loading by setting "
2353 	"'ecc_enable_override'.\n"
2354 	" (Note that use of the override may cause unknown side effects.)\n";
2355 
2356 static bool ecc_enabled(struct pci_dev *F3, u8 nid)
2357 {
2358 	u32 value;
2359 	u8 ecc_en = 0;
2360 	bool nb_mce_en = false;
2361 
2362 	amd64_read_pci_cfg(F3, NBCFG, &value);
2363 
2364 	ecc_en = !!(value & NBCFG_ECC_ENABLE);
2365 	amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
2366 
2367 	nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid);
2368 	if (!nb_mce_en)
2369 		amd64_notice("NB MCE bank disabled, set MSR "
2370 			     "0x%08x[4] on node %d to enable.\n",
2371 			     MSR_IA32_MCG_CTL, nid);
2372 
2373 	if (!ecc_en || !nb_mce_en) {
2374 		amd64_notice("%s", ecc_msg);
2375 		return false;
2376 	}
2377 	return true;
2378 }
2379 
2380 static int set_mc_sysfs_attrs(struct mem_ctl_info *mci)
2381 {
2382 	int rc;
2383 
2384 	rc = amd64_create_sysfs_dbg_files(mci);
2385 	if (rc < 0)
2386 		return rc;
2387 
2388 	if (boot_cpu_data.x86 >= 0x10) {
2389 		rc = amd64_create_sysfs_inject_files(mci);
2390 		if (rc < 0)
2391 			return rc;
2392 	}
2393 
2394 	return 0;
2395 }
2396 
2397 static void del_mc_sysfs_attrs(struct mem_ctl_info *mci)
2398 {
2399 	amd64_remove_sysfs_dbg_files(mci);
2400 
2401 	if (boot_cpu_data.x86 >= 0x10)
2402 		amd64_remove_sysfs_inject_files(mci);
2403 }
2404 
2405 static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
2406 				 struct amd64_family_type *fam)
2407 {
2408 	struct amd64_pvt *pvt = mci->pvt_info;
2409 
2410 	mci->mtype_cap		= MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
2411 	mci->edac_ctl_cap	= EDAC_FLAG_NONE;
2412 
2413 	if (pvt->nbcap & NBCAP_SECDED)
2414 		mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
2415 
2416 	if (pvt->nbcap & NBCAP_CHIPKILL)
2417 		mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
2418 
2419 	mci->edac_cap		= amd64_determine_edac_cap(pvt);
2420 	mci->mod_name		= EDAC_MOD_STR;
2421 	mci->mod_ver		= EDAC_AMD64_VERSION;
2422 	mci->ctl_name		= fam->ctl_name;
2423 	mci->dev_name		= pci_name(pvt->F2);
2424 	mci->ctl_page_to_phys	= NULL;
2425 
2426 	/* memory scrubber interface */
2427 	mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
2428 	mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
2429 }
2430 
2431 /*
2432  * returns a pointer to the family descriptor on success, NULL otherwise.
2433  */
2434 static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
2435 {
2436 	u8 fam = boot_cpu_data.x86;
2437 	struct amd64_family_type *fam_type = NULL;
2438 
2439 	switch (fam) {
2440 	case 0xf:
2441 		fam_type		= &amd64_family_types[K8_CPUS];
2442 		pvt->ops		= &amd64_family_types[K8_CPUS].ops;
2443 		break;
2444 
2445 	case 0x10:
2446 		fam_type		= &amd64_family_types[F10_CPUS];
2447 		pvt->ops		= &amd64_family_types[F10_CPUS].ops;
2448 		break;
2449 
2450 	case 0x15:
2451 		fam_type		= &amd64_family_types[F15_CPUS];
2452 		pvt->ops		= &amd64_family_types[F15_CPUS].ops;
2453 		break;
2454 
2455 	default:
2456 		amd64_err("Unsupported family!\n");
2457 		return NULL;
2458 	}
2459 
2460 	pvt->ext_model = boot_cpu_data.x86_model >> 4;
2461 
2462 	amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name,
2463 		     (fam == 0xf ?
2464 				(pvt->ext_model >= K8_REV_F  ? "revF or later "
2465 							     : "revE or earlier ")
2466 				 : ""), pvt->mc_node_id);
2467 	return fam_type;
2468 }
2469 
2470 static int amd64_init_one_instance(struct pci_dev *F2)
2471 {
2472 	struct amd64_pvt *pvt = NULL;
2473 	struct amd64_family_type *fam_type = NULL;
2474 	struct mem_ctl_info *mci = NULL;
2475 	struct edac_mc_layer layers[2];
2476 	int err = 0, ret;
2477 	u8 nid = get_node_id(F2);
2478 
2479 	ret = -ENOMEM;
2480 	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
2481 	if (!pvt)
2482 		goto err_ret;
2483 
2484 	pvt->mc_node_id	= nid;
2485 	pvt->F2 = F2;
2486 
2487 	ret = -EINVAL;
2488 	fam_type = amd64_per_family_init(pvt);
2489 	if (!fam_type)
2490 		goto err_free;
2491 
2492 	ret = -ENODEV;
2493 	err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
2494 	if (err)
2495 		goto err_free;
2496 
2497 	read_mc_regs(pvt);
2498 
2499 	/*
2500 	 * We need to determine how many memory channels there are. Then use
2501 	 * that information for calculating the size of the dynamic instance
2502 	 * tables in the 'mci' structure.
2503 	 */
2504 	ret = -EINVAL;
2505 	pvt->channel_count = pvt->ops->early_channel_count(pvt);
2506 	if (pvt->channel_count < 0)
2507 		goto err_siblings;
2508 
2509 	ret = -ENOMEM;
2510 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
2511 	layers[0].size = pvt->csels[0].b_cnt;
2512 	layers[0].is_virt_csrow = true;
2513 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
2514 	layers[1].size = pvt->channel_count;
2515 	layers[1].is_virt_csrow = false;
2516 	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
2517 	if (!mci)
2518 		goto err_siblings;
2519 
2520 	mci->pvt_info = pvt;
2521 	mci->pdev = &pvt->F2->dev;
2522 	mci->csbased = 1;
2523 
2524 	setup_mci_misc_attrs(mci, fam_type);
2525 
2526 	if (init_csrows(mci))
2527 		mci->edac_cap = EDAC_FLAG_NONE;
2528 
2529 	ret = -ENODEV;
2530 	if (edac_mc_add_mc(mci)) {
2531 		edac_dbg(1, "failed edac_mc_add_mc()\n");
2532 		goto err_add_mc;
2533 	}
2534 	if (set_mc_sysfs_attrs(mci)) {
2535 		edac_dbg(1, "failed edac_mc_add_mc()\n");
2536 		goto err_add_sysfs;
2537 	}
2538 
2539 	/* register stuff with EDAC MCE */
2540 	if (report_gart_errors)
2541 		amd_report_gart_errors(true);
2542 
2543 	amd_register_ecc_decoder(amd64_decode_bus_error);
2544 
2545 	mcis[nid] = mci;
2546 
2547 	atomic_inc(&drv_instances);
2548 
2549 	return 0;
2550 
2551 err_add_sysfs:
2552 	edac_mc_del_mc(mci->pdev);
2553 err_add_mc:
2554 	edac_mc_free(mci);
2555 
2556 err_siblings:
2557 	free_mc_sibling_devs(pvt);
2558 
2559 err_free:
2560 	kfree(pvt);
2561 
2562 err_ret:
2563 	return ret;
2564 }
2565 
2566 static int amd64_probe_one_instance(struct pci_dev *pdev,
2567 				    const struct pci_device_id *mc_type)
2568 {
2569 	u8 nid = get_node_id(pdev);
2570 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2571 	struct ecc_settings *s;
2572 	int ret = 0;
2573 
2574 	ret = pci_enable_device(pdev);
2575 	if (ret < 0) {
2576 		edac_dbg(0, "ret=%d\n", ret);
2577 		return -EIO;
2578 	}
2579 
2580 	ret = -ENOMEM;
2581 	s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
2582 	if (!s)
2583 		goto err_out;
2584 
2585 	ecc_stngs[nid] = s;
2586 
2587 	if (!ecc_enabled(F3, nid)) {
2588 		ret = -ENODEV;
2589 
2590 		if (!ecc_enable_override)
2591 			goto err_enable;
2592 
2593 		amd64_warn("Forcing ECC on!\n");
2594 
2595 		if (!enable_ecc_error_reporting(s, nid, F3))
2596 			goto err_enable;
2597 	}
2598 
2599 	ret = amd64_init_one_instance(pdev);
2600 	if (ret < 0) {
2601 		amd64_err("Error probing instance: %d\n", nid);
2602 		restore_ecc_error_reporting(s, nid, F3);
2603 	}
2604 
2605 	return ret;
2606 
2607 err_enable:
2608 	kfree(s);
2609 	ecc_stngs[nid] = NULL;
2610 
2611 err_out:
2612 	return ret;
2613 }
2614 
2615 static void amd64_remove_one_instance(struct pci_dev *pdev)
2616 {
2617 	struct mem_ctl_info *mci;
2618 	struct amd64_pvt *pvt;
2619 	u8 nid = get_node_id(pdev);
2620 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2621 	struct ecc_settings *s = ecc_stngs[nid];
2622 
2623 	mci = find_mci_by_dev(&pdev->dev);
2624 	del_mc_sysfs_attrs(mci);
2625 	/* Remove from EDAC CORE tracking list */
2626 	mci = edac_mc_del_mc(&pdev->dev);
2627 	if (!mci)
2628 		return;
2629 
2630 	pvt = mci->pvt_info;
2631 
2632 	restore_ecc_error_reporting(s, nid, F3);
2633 
2634 	free_mc_sibling_devs(pvt);
2635 
2636 	/* unregister from EDAC MCE */
2637 	amd_report_gart_errors(false);
2638 	amd_unregister_ecc_decoder(amd64_decode_bus_error);
2639 
2640 	kfree(ecc_stngs[nid]);
2641 	ecc_stngs[nid] = NULL;
2642 
2643 	/* Free the EDAC CORE resources */
2644 	mci->pvt_info = NULL;
2645 	mcis[nid] = NULL;
2646 
2647 	kfree(pvt);
2648 	edac_mc_free(mci);
2649 }
2650 
2651 /*
2652  * This table is part of the interface for loading drivers for PCI devices. The
2653  * PCI core identifies what devices are on a system during boot, and then
2654  * inquiry this table to see if this driver is for a given device found.
2655  */
2656 static DEFINE_PCI_DEVICE_TABLE(amd64_pci_table) = {
2657 	{
2658 		.vendor		= PCI_VENDOR_ID_AMD,
2659 		.device		= PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
2660 		.subvendor	= PCI_ANY_ID,
2661 		.subdevice	= PCI_ANY_ID,
2662 		.class		= 0,
2663 		.class_mask	= 0,
2664 	},
2665 	{
2666 		.vendor		= PCI_VENDOR_ID_AMD,
2667 		.device		= PCI_DEVICE_ID_AMD_10H_NB_DRAM,
2668 		.subvendor	= PCI_ANY_ID,
2669 		.subdevice	= PCI_ANY_ID,
2670 		.class		= 0,
2671 		.class_mask	= 0,
2672 	},
2673 	{
2674 		.vendor		= PCI_VENDOR_ID_AMD,
2675 		.device		= PCI_DEVICE_ID_AMD_15H_NB_F2,
2676 		.subvendor	= PCI_ANY_ID,
2677 		.subdevice	= PCI_ANY_ID,
2678 		.class		= 0,
2679 		.class_mask	= 0,
2680 	},
2681 
2682 	{0, }
2683 };
2684 MODULE_DEVICE_TABLE(pci, amd64_pci_table);
2685 
2686 static struct pci_driver amd64_pci_driver = {
2687 	.name		= EDAC_MOD_STR,
2688 	.probe		= amd64_probe_one_instance,
2689 	.remove		= amd64_remove_one_instance,
2690 	.id_table	= amd64_pci_table,
2691 };
2692 
2693 static void setup_pci_device(void)
2694 {
2695 	struct mem_ctl_info *mci;
2696 	struct amd64_pvt *pvt;
2697 
2698 	if (amd64_ctl_pci)
2699 		return;
2700 
2701 	mci = mcis[0];
2702 	if (mci) {
2703 
2704 		pvt = mci->pvt_info;
2705 		amd64_ctl_pci =
2706 			edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR);
2707 
2708 		if (!amd64_ctl_pci) {
2709 			pr_warning("%s(): Unable to create PCI control\n",
2710 				   __func__);
2711 
2712 			pr_warning("%s(): PCI error report via EDAC not set\n",
2713 				   __func__);
2714 			}
2715 	}
2716 }
2717 
2718 static int __init amd64_edac_init(void)
2719 {
2720 	int err = -ENODEV;
2721 
2722 	printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
2723 
2724 	opstate_init();
2725 
2726 	if (amd_cache_northbridges() < 0)
2727 		goto err_ret;
2728 
2729 	err = -ENOMEM;
2730 	mcis	  = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
2731 	ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
2732 	if (!(mcis && ecc_stngs))
2733 		goto err_free;
2734 
2735 	msrs = msrs_alloc();
2736 	if (!msrs)
2737 		goto err_free;
2738 
2739 	err = pci_register_driver(&amd64_pci_driver);
2740 	if (err)
2741 		goto err_pci;
2742 
2743 	err = -ENODEV;
2744 	if (!atomic_read(&drv_instances))
2745 		goto err_no_instances;
2746 
2747 	setup_pci_device();
2748 	return 0;
2749 
2750 err_no_instances:
2751 	pci_unregister_driver(&amd64_pci_driver);
2752 
2753 err_pci:
2754 	msrs_free(msrs);
2755 	msrs = NULL;
2756 
2757 err_free:
2758 	kfree(mcis);
2759 	mcis = NULL;
2760 
2761 	kfree(ecc_stngs);
2762 	ecc_stngs = NULL;
2763 
2764 err_ret:
2765 	return err;
2766 }
2767 
2768 static void __exit amd64_edac_exit(void)
2769 {
2770 	if (amd64_ctl_pci)
2771 		edac_pci_release_generic_ctl(amd64_ctl_pci);
2772 
2773 	pci_unregister_driver(&amd64_pci_driver);
2774 
2775 	kfree(ecc_stngs);
2776 	ecc_stngs = NULL;
2777 
2778 	kfree(mcis);
2779 	mcis = NULL;
2780 
2781 	msrs_free(msrs);
2782 	msrs = NULL;
2783 }
2784 
2785 module_init(amd64_edac_init);
2786 module_exit(amd64_edac_exit);
2787 
2788 MODULE_LICENSE("GPL");
2789 MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
2790 		"Dave Peterson, Thayne Harbaugh");
2791 MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
2792 		EDAC_AMD64_VERSION);
2793 
2794 module_param(edac_op_state, int, 0444);
2795 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2796