1 // SPDX-License-Identifier: GPL-2.0-only 2 #include "amd64_edac.h" 3 #include <asm/amd_nb.h> 4 5 static struct edac_pci_ctl_info *pci_ctl; 6 7 static int report_gart_errors; 8 module_param(report_gart_errors, int, 0644); 9 10 /* 11 * Set by command line parameter. If BIOS has enabled the ECC, this override is 12 * cleared to prevent re-enabling the hardware by this driver. 13 */ 14 static int ecc_enable_override; 15 module_param(ecc_enable_override, int, 0644); 16 17 static struct msr __percpu *msrs; 18 19 static struct amd64_family_type *fam_type; 20 21 /* Per-node stuff */ 22 static struct ecc_settings **ecc_stngs; 23 24 /* 25 * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing 26 * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- 27 * or higher value'. 28 * 29 *FIXME: Produce a better mapping/linearisation. 30 */ 31 static const struct scrubrate { 32 u32 scrubval; /* bit pattern for scrub rate */ 33 u32 bandwidth; /* bandwidth consumed (bytes/sec) */ 34 } scrubrates[] = { 35 { 0x01, 1600000000UL}, 36 { 0x02, 800000000UL}, 37 { 0x03, 400000000UL}, 38 { 0x04, 200000000UL}, 39 { 0x05, 100000000UL}, 40 { 0x06, 50000000UL}, 41 { 0x07, 25000000UL}, 42 { 0x08, 12284069UL}, 43 { 0x09, 6274509UL}, 44 { 0x0A, 3121951UL}, 45 { 0x0B, 1560975UL}, 46 { 0x0C, 781440UL}, 47 { 0x0D, 390720UL}, 48 { 0x0E, 195300UL}, 49 { 0x0F, 97650UL}, 50 { 0x10, 48854UL}, 51 { 0x11, 24427UL}, 52 { 0x12, 12213UL}, 53 { 0x13, 6101UL}, 54 { 0x14, 3051UL}, 55 { 0x15, 1523UL}, 56 { 0x16, 761UL}, 57 { 0x00, 0UL}, /* scrubbing off */ 58 }; 59 60 int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, 61 u32 *val, const char *func) 62 { 63 int err = 0; 64 65 err = pci_read_config_dword(pdev, offset, val); 66 if (err) 67 amd64_warn("%s: error reading F%dx%03x.\n", 68 func, PCI_FUNC(pdev->devfn), offset); 69 70 return err; 71 } 72 73 int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, 74 u32 val, const char *func) 75 { 76 int err = 0; 77 78 err = pci_write_config_dword(pdev, offset, val); 79 if (err) 80 amd64_warn("%s: error writing to F%dx%03x.\n", 81 func, PCI_FUNC(pdev->devfn), offset); 82 83 return err; 84 } 85 86 /* 87 * Select DCT to which PCI cfg accesses are routed 88 */ 89 static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct) 90 { 91 u32 reg = 0; 92 93 amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, ®); 94 reg &= (pvt->model == 0x30) ? ~3 : ~1; 95 reg |= dct; 96 amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg); 97 } 98 99 /* 100 * 101 * Depending on the family, F2 DCT reads need special handling: 102 * 103 * K8: has a single DCT only and no address offsets >= 0x100 104 * 105 * F10h: each DCT has its own set of regs 106 * DCT0 -> F2x040.. 107 * DCT1 -> F2x140.. 108 * 109 * F16h: has only 1 DCT 110 * 111 * F15h: we select which DCT we access using F1x10C[DctCfgSel] 112 */ 113 static inline int amd64_read_dct_pci_cfg(struct amd64_pvt *pvt, u8 dct, 114 int offset, u32 *val) 115 { 116 switch (pvt->fam) { 117 case 0xf: 118 if (dct || offset >= 0x100) 119 return -EINVAL; 120 break; 121 122 case 0x10: 123 if (dct) { 124 /* 125 * Note: If ganging is enabled, barring the regs 126 * F2x[1,0]98 and F2x[1,0]9C; reads reads to F2x1xx 127 * return 0. (cf. Section 2.8.1 F10h BKDG) 128 */ 129 if (dct_ganging_enabled(pvt)) 130 return 0; 131 132 offset += 0x100; 133 } 134 break; 135 136 case 0x15: 137 /* 138 * F15h: F2x1xx addresses do not map explicitly to DCT1. 139 * We should select which DCT we access using F1x10C[DctCfgSel] 140 */ 141 dct = (dct && pvt->model == 0x30) ? 3 : dct; 142 f15h_select_dct(pvt, dct); 143 break; 144 145 case 0x16: 146 if (dct) 147 return -EINVAL; 148 break; 149 150 default: 151 break; 152 } 153 return amd64_read_pci_cfg(pvt->F2, offset, val); 154 } 155 156 /* 157 * Memory scrubber control interface. For K8, memory scrubbing is handled by 158 * hardware and can involve L2 cache, dcache as well as the main memory. With 159 * F10, this is extended to L3 cache scrubbing on CPU models sporting that 160 * functionality. 161 * 162 * This causes the "units" for the scrubbing speed to vary from 64 byte blocks 163 * (dram) over to cache lines. This is nasty, so we will use bandwidth in 164 * bytes/sec for the setting. 165 * 166 * Currently, we only do dram scrubbing. If the scrubbing is done in software on 167 * other archs, we might not have access to the caches directly. 168 */ 169 170 static inline void __f17h_set_scrubval(struct amd64_pvt *pvt, u32 scrubval) 171 { 172 /* 173 * Fam17h supports scrub values between 0x5 and 0x14. Also, the values 174 * are shifted down by 0x5, so scrubval 0x5 is written to the register 175 * as 0x0, scrubval 0x6 as 0x1, etc. 176 */ 177 if (scrubval >= 0x5 && scrubval <= 0x14) { 178 scrubval -= 0x5; 179 pci_write_bits32(pvt->F6, F17H_SCR_LIMIT_ADDR, scrubval, 0xF); 180 pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 1, 0x1); 181 } else { 182 pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 0, 0x1); 183 } 184 } 185 /* 186 * Scan the scrub rate mapping table for a close or matching bandwidth value to 187 * issue. If requested is too big, then use last maximum value found. 188 */ 189 static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) 190 { 191 u32 scrubval; 192 int i; 193 194 /* 195 * map the configured rate (new_bw) to a value specific to the AMD64 196 * memory controller and apply to register. Search for the first 197 * bandwidth entry that is greater or equal than the setting requested 198 * and program that. If at last entry, turn off DRAM scrubbing. 199 * 200 * If no suitable bandwidth is found, turn off DRAM scrubbing entirely 201 * by falling back to the last element in scrubrates[]. 202 */ 203 for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) { 204 /* 205 * skip scrub rates which aren't recommended 206 * (see F10 BKDG, F3x58) 207 */ 208 if (scrubrates[i].scrubval < min_rate) 209 continue; 210 211 if (scrubrates[i].bandwidth <= new_bw) 212 break; 213 } 214 215 scrubval = scrubrates[i].scrubval; 216 217 if (pvt->umc) { 218 __f17h_set_scrubval(pvt, scrubval); 219 } else if (pvt->fam == 0x15 && pvt->model == 0x60) { 220 f15h_select_dct(pvt, 0); 221 pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F); 222 f15h_select_dct(pvt, 1); 223 pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F); 224 } else { 225 pci_write_bits32(pvt->F3, SCRCTRL, scrubval, 0x001F); 226 } 227 228 if (scrubval) 229 return scrubrates[i].bandwidth; 230 231 return 0; 232 } 233 234 static int set_scrub_rate(struct mem_ctl_info *mci, u32 bw) 235 { 236 struct amd64_pvt *pvt = mci->pvt_info; 237 u32 min_scrubrate = 0x5; 238 239 if (pvt->fam == 0xf) 240 min_scrubrate = 0x0; 241 242 if (pvt->fam == 0x15) { 243 /* Erratum #505 */ 244 if (pvt->model < 0x10) 245 f15h_select_dct(pvt, 0); 246 247 if (pvt->model == 0x60) 248 min_scrubrate = 0x6; 249 } 250 return __set_scrub_rate(pvt, bw, min_scrubrate); 251 } 252 253 static int get_scrub_rate(struct mem_ctl_info *mci) 254 { 255 struct amd64_pvt *pvt = mci->pvt_info; 256 int i, retval = -EINVAL; 257 u32 scrubval = 0; 258 259 if (pvt->umc) { 260 amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); 261 if (scrubval & BIT(0)) { 262 amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); 263 scrubval &= 0xF; 264 scrubval += 0x5; 265 } else { 266 scrubval = 0; 267 } 268 } else if (pvt->fam == 0x15) { 269 /* Erratum #505 */ 270 if (pvt->model < 0x10) 271 f15h_select_dct(pvt, 0); 272 273 if (pvt->model == 0x60) 274 amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); 275 } else { 276 amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); 277 } 278 279 scrubval = scrubval & 0x001F; 280 281 for (i = 0; i < ARRAY_SIZE(scrubrates); i++) { 282 if (scrubrates[i].scrubval == scrubval) { 283 retval = scrubrates[i].bandwidth; 284 break; 285 } 286 } 287 return retval; 288 } 289 290 /* 291 * returns true if the SysAddr given by sys_addr matches the 292 * DRAM base/limit associated with node_id 293 */ 294 static bool base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, u8 nid) 295 { 296 u64 addr; 297 298 /* The K8 treats this as a 40-bit value. However, bits 63-40 will be 299 * all ones if the most significant implemented address bit is 1. 300 * Here we discard bits 63-40. See section 3.4.2 of AMD publication 301 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1 302 * Application Programming. 303 */ 304 addr = sys_addr & 0x000000ffffffffffull; 305 306 return ((addr >= get_dram_base(pvt, nid)) && 307 (addr <= get_dram_limit(pvt, nid))); 308 } 309 310 /* 311 * Attempt to map a SysAddr to a node. On success, return a pointer to the 312 * mem_ctl_info structure for the node that the SysAddr maps to. 313 * 314 * On failure, return NULL. 315 */ 316 static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, 317 u64 sys_addr) 318 { 319 struct amd64_pvt *pvt; 320 u8 node_id; 321 u32 intlv_en, bits; 322 323 /* 324 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section 325 * 3.4.4.2) registers to map the SysAddr to a node ID. 326 */ 327 pvt = mci->pvt_info; 328 329 /* 330 * The value of this field should be the same for all DRAM Base 331 * registers. Therefore we arbitrarily choose to read it from the 332 * register for node 0. 333 */ 334 intlv_en = dram_intlv_en(pvt, 0); 335 336 if (intlv_en == 0) { 337 for (node_id = 0; node_id < DRAM_RANGES; node_id++) { 338 if (base_limit_match(pvt, sys_addr, node_id)) 339 goto found; 340 } 341 goto err_no_match; 342 } 343 344 if (unlikely((intlv_en != 0x01) && 345 (intlv_en != 0x03) && 346 (intlv_en != 0x07))) { 347 amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en); 348 return NULL; 349 } 350 351 bits = (((u32) sys_addr) >> 12) & intlv_en; 352 353 for (node_id = 0; ; ) { 354 if ((dram_intlv_sel(pvt, node_id) & intlv_en) == bits) 355 break; /* intlv_sel field matches */ 356 357 if (++node_id >= DRAM_RANGES) 358 goto err_no_match; 359 } 360 361 /* sanity test for sys_addr */ 362 if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) { 363 amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address" 364 "range for node %d with node interleaving enabled.\n", 365 __func__, sys_addr, node_id); 366 return NULL; 367 } 368 369 found: 370 return edac_mc_find((int)node_id); 371 372 err_no_match: 373 edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n", 374 (unsigned long)sys_addr); 375 376 return NULL; 377 } 378 379 /* 380 * compute the CS base address of the @csrow on the DRAM controller @dct. 381 * For details see F2x[5C:40] in the processor's BKDG 382 */ 383 static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, 384 u64 *base, u64 *mask) 385 { 386 u64 csbase, csmask, base_bits, mask_bits; 387 u8 addr_shift; 388 389 if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { 390 csbase = pvt->csels[dct].csbases[csrow]; 391 csmask = pvt->csels[dct].csmasks[csrow]; 392 base_bits = GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9); 393 mask_bits = GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9); 394 addr_shift = 4; 395 396 /* 397 * F16h and F15h, models 30h and later need two addr_shift values: 398 * 8 for high and 6 for low (cf. F16h BKDG). 399 */ 400 } else if (pvt->fam == 0x16 || 401 (pvt->fam == 0x15 && pvt->model >= 0x30)) { 402 csbase = pvt->csels[dct].csbases[csrow]; 403 csmask = pvt->csels[dct].csmasks[csrow >> 1]; 404 405 *base = (csbase & GENMASK_ULL(15, 5)) << 6; 406 *base |= (csbase & GENMASK_ULL(30, 19)) << 8; 407 408 *mask = ~0ULL; 409 /* poke holes for the csmask */ 410 *mask &= ~((GENMASK_ULL(15, 5) << 6) | 411 (GENMASK_ULL(30, 19) << 8)); 412 413 *mask |= (csmask & GENMASK_ULL(15, 5)) << 6; 414 *mask |= (csmask & GENMASK_ULL(30, 19)) << 8; 415 416 return; 417 } else { 418 csbase = pvt->csels[dct].csbases[csrow]; 419 csmask = pvt->csels[dct].csmasks[csrow >> 1]; 420 addr_shift = 8; 421 422 if (pvt->fam == 0x15) 423 base_bits = mask_bits = 424 GENMASK_ULL(30,19) | GENMASK_ULL(13,5); 425 else 426 base_bits = mask_bits = 427 GENMASK_ULL(28,19) | GENMASK_ULL(13,5); 428 } 429 430 *base = (csbase & base_bits) << addr_shift; 431 432 *mask = ~0ULL; 433 /* poke holes for the csmask */ 434 *mask &= ~(mask_bits << addr_shift); 435 /* OR them in */ 436 *mask |= (csmask & mask_bits) << addr_shift; 437 } 438 439 #define for_each_chip_select(i, dct, pvt) \ 440 for (i = 0; i < pvt->csels[dct].b_cnt; i++) 441 442 #define chip_select_base(i, dct, pvt) \ 443 pvt->csels[dct].csbases[i] 444 445 #define for_each_chip_select_mask(i, dct, pvt) \ 446 for (i = 0; i < pvt->csels[dct].m_cnt; i++) 447 448 #define for_each_umc(i) \ 449 for (i = 0; i < fam_type->max_mcs; i++) 450 451 /* 452 * @input_addr is an InputAddr associated with the node given by mci. Return the 453 * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr). 454 */ 455 static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr) 456 { 457 struct amd64_pvt *pvt; 458 int csrow; 459 u64 base, mask; 460 461 pvt = mci->pvt_info; 462 463 for_each_chip_select(csrow, 0, pvt) { 464 if (!csrow_enabled(csrow, 0, pvt)) 465 continue; 466 467 get_cs_base_and_mask(pvt, csrow, 0, &base, &mask); 468 469 mask = ~mask; 470 471 if ((input_addr & mask) == (base & mask)) { 472 edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n", 473 (unsigned long)input_addr, csrow, 474 pvt->mc_node_id); 475 476 return csrow; 477 } 478 } 479 edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n", 480 (unsigned long)input_addr, pvt->mc_node_id); 481 482 return -1; 483 } 484 485 /* 486 * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094) 487 * for the node represented by mci. Info is passed back in *hole_base, 488 * *hole_offset, and *hole_size. Function returns 0 if info is valid or 1 if 489 * info is invalid. Info may be invalid for either of the following reasons: 490 * 491 * - The revision of the node is not E or greater. In this case, the DRAM Hole 492 * Address Register does not exist. 493 * 494 * - The DramHoleValid bit is cleared in the DRAM Hole Address Register, 495 * indicating that its contents are not valid. 496 * 497 * The values passed back in *hole_base, *hole_offset, and *hole_size are 498 * complete 32-bit values despite the fact that the bitfields in the DHAR 499 * only represent bits 31-24 of the base and offset values. 500 */ 501 int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, 502 u64 *hole_offset, u64 *hole_size) 503 { 504 struct amd64_pvt *pvt = mci->pvt_info; 505 506 /* only revE and later have the DRAM Hole Address Register */ 507 if (pvt->fam == 0xf && pvt->ext_model < K8_REV_E) { 508 edac_dbg(1, " revision %d for node %d does not support DHAR\n", 509 pvt->ext_model, pvt->mc_node_id); 510 return 1; 511 } 512 513 /* valid for Fam10h and above */ 514 if (pvt->fam >= 0x10 && !dhar_mem_hoist_valid(pvt)) { 515 edac_dbg(1, " Dram Memory Hoisting is DISABLED on this system\n"); 516 return 1; 517 } 518 519 if (!dhar_valid(pvt)) { 520 edac_dbg(1, " Dram Memory Hoisting is DISABLED on this node %d\n", 521 pvt->mc_node_id); 522 return 1; 523 } 524 525 /* This node has Memory Hoisting */ 526 527 /* +------------------+--------------------+--------------------+----- 528 * | memory | DRAM hole | relocated | 529 * | [0, (x - 1)] | [x, 0xffffffff] | addresses from | 530 * | | | DRAM hole | 531 * | | | [0x100000000, | 532 * | | | (0x100000000+ | 533 * | | | (0xffffffff-x))] | 534 * +------------------+--------------------+--------------------+----- 535 * 536 * Above is a diagram of physical memory showing the DRAM hole and the 537 * relocated addresses from the DRAM hole. As shown, the DRAM hole 538 * starts at address x (the base address) and extends through address 539 * 0xffffffff. The DRAM Hole Address Register (DHAR) relocates the 540 * addresses in the hole so that they start at 0x100000000. 541 */ 542 543 *hole_base = dhar_base(pvt); 544 *hole_size = (1ULL << 32) - *hole_base; 545 546 *hole_offset = (pvt->fam > 0xf) ? f10_dhar_offset(pvt) 547 : k8_dhar_offset(pvt); 548 549 edac_dbg(1, " DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n", 550 pvt->mc_node_id, (unsigned long)*hole_base, 551 (unsigned long)*hole_offset, (unsigned long)*hole_size); 552 553 return 0; 554 } 555 EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info); 556 557 /* 558 * Return the DramAddr that the SysAddr given by @sys_addr maps to. It is 559 * assumed that sys_addr maps to the node given by mci. 560 * 561 * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section 562 * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a 563 * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled, 564 * then it is also involved in translating a SysAddr to a DramAddr. Sections 565 * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting. 566 * These parts of the documentation are unclear. I interpret them as follows: 567 * 568 * When node n receives a SysAddr, it processes the SysAddr as follows: 569 * 570 * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM 571 * Limit registers for node n. If the SysAddr is not within the range 572 * specified by the base and limit values, then node n ignores the Sysaddr 573 * (since it does not map to node n). Otherwise continue to step 2 below. 574 * 575 * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is 576 * disabled so skip to step 3 below. Otherwise see if the SysAddr is within 577 * the range of relocated addresses (starting at 0x100000000) from the DRAM 578 * hole. If not, skip to step 3 below. Else get the value of the 579 * DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the 580 * offset defined by this value from the SysAddr. 581 * 582 * 3. Obtain the base address for node n from the DRAMBase field of the DRAM 583 * Base register for node n. To obtain the DramAddr, subtract the base 584 * address from the SysAddr, as shown near the start of section 3.4.4 (p.70). 585 */ 586 static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr) 587 { 588 struct amd64_pvt *pvt = mci->pvt_info; 589 u64 dram_base, hole_base, hole_offset, hole_size, dram_addr; 590 int ret; 591 592 dram_base = get_dram_base(pvt, pvt->mc_node_id); 593 594 ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, 595 &hole_size); 596 if (!ret) { 597 if ((sys_addr >= (1ULL << 32)) && 598 (sys_addr < ((1ULL << 32) + hole_size))) { 599 /* use DHAR to translate SysAddr to DramAddr */ 600 dram_addr = sys_addr - hole_offset; 601 602 edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n", 603 (unsigned long)sys_addr, 604 (unsigned long)dram_addr); 605 606 return dram_addr; 607 } 608 } 609 610 /* 611 * Translate the SysAddr to a DramAddr as shown near the start of 612 * section 3.4.4 (p. 70). Although sys_addr is a 64-bit value, the k8 613 * only deals with 40-bit values. Therefore we discard bits 63-40 of 614 * sys_addr below. If bit 39 of sys_addr is 1 then the bits we 615 * discard are all 1s. Otherwise the bits we discard are all 0s. See 616 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture 617 * Programmer's Manual Volume 1 Application Programming. 618 */ 619 dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base; 620 621 edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n", 622 (unsigned long)sys_addr, (unsigned long)dram_addr); 623 return dram_addr; 624 } 625 626 /* 627 * @intlv_en is the value of the IntlvEn field from a DRAM Base register 628 * (section 3.4.4.1). Return the number of bits from a SysAddr that are used 629 * for node interleaving. 630 */ 631 static int num_node_interleave_bits(unsigned intlv_en) 632 { 633 static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 }; 634 int n; 635 636 BUG_ON(intlv_en > 7); 637 n = intlv_shift_table[intlv_en]; 638 return n; 639 } 640 641 /* Translate the DramAddr given by @dram_addr to an InputAddr. */ 642 static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr) 643 { 644 struct amd64_pvt *pvt; 645 int intlv_shift; 646 u64 input_addr; 647 648 pvt = mci->pvt_info; 649 650 /* 651 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E) 652 * concerning translating a DramAddr to an InputAddr. 653 */ 654 intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0)); 655 input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) + 656 (dram_addr & 0xfff); 657 658 edac_dbg(2, " Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n", 659 intlv_shift, (unsigned long)dram_addr, 660 (unsigned long)input_addr); 661 662 return input_addr; 663 } 664 665 /* 666 * Translate the SysAddr represented by @sys_addr to an InputAddr. It is 667 * assumed that @sys_addr maps to the node given by mci. 668 */ 669 static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr) 670 { 671 u64 input_addr; 672 673 input_addr = 674 dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr)); 675 676 edac_dbg(2, "SysAddr 0x%lx translates to InputAddr 0x%lx\n", 677 (unsigned long)sys_addr, (unsigned long)input_addr); 678 679 return input_addr; 680 } 681 682 /* Map the Error address to a PAGE and PAGE OFFSET. */ 683 static inline void error_address_to_page_and_offset(u64 error_address, 684 struct err_info *err) 685 { 686 err->page = (u32) (error_address >> PAGE_SHIFT); 687 err->offset = ((u32) error_address) & ~PAGE_MASK; 688 } 689 690 /* 691 * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address 692 * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers 693 * of a node that detected an ECC memory error. mci represents the node that 694 * the error address maps to (possibly different from the node that detected 695 * the error). Return the number of the csrow that sys_addr maps to, or -1 on 696 * error. 697 */ 698 static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) 699 { 700 int csrow; 701 702 csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr)); 703 704 if (csrow == -1) 705 amd64_mc_err(mci, "Failed to translate InputAddr to csrow for " 706 "address 0x%lx\n", (unsigned long)sys_addr); 707 return csrow; 708 } 709 710 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); 711 712 /* 713 * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs 714 * are ECC capable. 715 */ 716 static unsigned long determine_edac_cap(struct amd64_pvt *pvt) 717 { 718 unsigned long edac_cap = EDAC_FLAG_NONE; 719 u8 bit; 720 721 if (pvt->umc) { 722 u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0; 723 724 for_each_umc(i) { 725 if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT)) 726 continue; 727 728 umc_en_mask |= BIT(i); 729 730 /* UMC Configuration bit 12 (DimmEccEn) */ 731 if (pvt->umc[i].umc_cfg & BIT(12)) 732 dimm_ecc_en_mask |= BIT(i); 733 } 734 735 if (umc_en_mask == dimm_ecc_en_mask) 736 edac_cap = EDAC_FLAG_SECDED; 737 } else { 738 bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F) 739 ? 19 740 : 17; 741 742 if (pvt->dclr0 & BIT(bit)) 743 edac_cap = EDAC_FLAG_SECDED; 744 } 745 746 return edac_cap; 747 } 748 749 static void debug_display_dimm_sizes(struct amd64_pvt *, u8); 750 751 static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) 752 { 753 edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr); 754 755 if (pvt->dram_type == MEM_LRDDR3) { 756 u32 dcsm = pvt->csels[chan].csmasks[0]; 757 /* 758 * It's assumed all LRDIMMs in a DCT are going to be of 759 * same 'type' until proven otherwise. So, use a cs 760 * value of '0' here to get dcsm value. 761 */ 762 edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3)); 763 } 764 765 edac_dbg(1, "All DIMMs support ECC:%s\n", 766 (dclr & BIT(19)) ? "yes" : "no"); 767 768 769 edac_dbg(1, " PAR/ERR parity: %s\n", 770 (dclr & BIT(8)) ? "enabled" : "disabled"); 771 772 if (pvt->fam == 0x10) 773 edac_dbg(1, " DCT 128bit mode width: %s\n", 774 (dclr & BIT(11)) ? "128b" : "64b"); 775 776 edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n", 777 (dclr & BIT(12)) ? "yes" : "no", 778 (dclr & BIT(13)) ? "yes" : "no", 779 (dclr & BIT(14)) ? "yes" : "no", 780 (dclr & BIT(15)) ? "yes" : "no"); 781 } 782 783 #define CS_EVEN_PRIMARY BIT(0) 784 #define CS_ODD_PRIMARY BIT(1) 785 #define CS_EVEN_SECONDARY BIT(2) 786 #define CS_ODD_SECONDARY BIT(3) 787 788 #define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) 789 #define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) 790 791 static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) 792 { 793 int cs_mode = 0; 794 795 if (csrow_enabled(2 * dimm, ctrl, pvt)) 796 cs_mode |= CS_EVEN_PRIMARY; 797 798 if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) 799 cs_mode |= CS_ODD_PRIMARY; 800 801 /* Asymmetric dual-rank DIMM support. */ 802 if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) 803 cs_mode |= CS_ODD_SECONDARY; 804 805 return cs_mode; 806 } 807 808 static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) 809 { 810 int dimm, size0, size1, cs0, cs1, cs_mode; 811 812 edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); 813 814 for (dimm = 0; dimm < 2; dimm++) { 815 cs0 = dimm * 2; 816 cs1 = dimm * 2 + 1; 817 818 cs_mode = f17_get_cs_mode(dimm, ctrl, pvt); 819 820 size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0); 821 size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1); 822 823 amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", 824 cs0, size0, 825 cs1, size1); 826 } 827 } 828 829 static void __dump_misc_regs_df(struct amd64_pvt *pvt) 830 { 831 struct amd64_umc *umc; 832 u32 i, tmp, umc_base; 833 834 for_each_umc(i) { 835 umc_base = get_umc_base(i); 836 umc = &pvt->umc[i]; 837 838 edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg); 839 edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg); 840 edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl); 841 edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl); 842 843 amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ECC_BAD_SYMBOL, &tmp); 844 edac_dbg(1, "UMC%d ECC bad symbol: 0x%x\n", i, tmp); 845 846 amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_UMC_CAP, &tmp); 847 edac_dbg(1, "UMC%d UMC cap: 0x%x\n", i, tmp); 848 edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi); 849 850 edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n", 851 i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no", 852 (umc->umc_cap_hi & BIT(31)) ? "yes" : "no"); 853 edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n", 854 i, (umc->umc_cfg & BIT(12)) ? "yes" : "no"); 855 edac_dbg(1, "UMC%d x4 DIMMs present: %s\n", 856 i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no"); 857 edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", 858 i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); 859 860 if (pvt->dram_type == MEM_LRDDR4) { 861 amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp); 862 edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n", 863 i, 1 << ((tmp >> 4) & 0x3)); 864 } 865 866 debug_display_dimm_sizes_df(pvt, i); 867 } 868 869 edac_dbg(1, "F0x104 (DRAM Hole Address): 0x%08x, base: 0x%08x\n", 870 pvt->dhar, dhar_base(pvt)); 871 } 872 873 /* Display and decode various NB registers for debug purposes. */ 874 static void __dump_misc_regs(struct amd64_pvt *pvt) 875 { 876 edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); 877 878 edac_dbg(1, " NB two channel DRAM capable: %s\n", 879 (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no"); 880 881 edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n", 882 (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no", 883 (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no"); 884 885 debug_dump_dramcfg_low(pvt, pvt->dclr0, 0); 886 887 edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare); 888 889 edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n", 890 pvt->dhar, dhar_base(pvt), 891 (pvt->fam == 0xf) ? k8_dhar_offset(pvt) 892 : f10_dhar_offset(pvt)); 893 894 debug_display_dimm_sizes(pvt, 0); 895 896 /* everything below this point is Fam10h and above */ 897 if (pvt->fam == 0xf) 898 return; 899 900 debug_display_dimm_sizes(pvt, 1); 901 902 /* Only if NOT ganged does dclr1 have valid info */ 903 if (!dct_ganging_enabled(pvt)) 904 debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); 905 } 906 907 /* Display and decode various NB registers for debug purposes. */ 908 static void dump_misc_regs(struct amd64_pvt *pvt) 909 { 910 if (pvt->umc) 911 __dump_misc_regs_df(pvt); 912 else 913 __dump_misc_regs(pvt); 914 915 edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); 916 917 amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); 918 } 919 920 /* 921 * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] 922 */ 923 static void prep_chip_selects(struct amd64_pvt *pvt) 924 { 925 if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { 926 pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; 927 pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8; 928 } else if (pvt->fam == 0x15 && pvt->model == 0x30) { 929 pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; 930 pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; 931 } else if (pvt->fam >= 0x17) { 932 int umc; 933 934 for_each_umc(umc) { 935 pvt->csels[umc].b_cnt = 4; 936 pvt->csels[umc].m_cnt = 2; 937 } 938 939 } else { 940 pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; 941 pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; 942 } 943 } 944 945 static void read_umc_base_mask(struct amd64_pvt *pvt) 946 { 947 u32 umc_base_reg, umc_base_reg_sec; 948 u32 umc_mask_reg, umc_mask_reg_sec; 949 u32 base_reg, base_reg_sec; 950 u32 mask_reg, mask_reg_sec; 951 u32 *base, *base_sec; 952 u32 *mask, *mask_sec; 953 int cs, umc; 954 955 for_each_umc(umc) { 956 umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; 957 umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC; 958 959 for_each_chip_select(cs, umc, pvt) { 960 base = &pvt->csels[umc].csbases[cs]; 961 base_sec = &pvt->csels[umc].csbases_sec[cs]; 962 963 base_reg = umc_base_reg + (cs * 4); 964 base_reg_sec = umc_base_reg_sec + (cs * 4); 965 966 if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) 967 edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", 968 umc, cs, *base, base_reg); 969 970 if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec)) 971 edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n", 972 umc, cs, *base_sec, base_reg_sec); 973 } 974 975 umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; 976 umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC; 977 978 for_each_chip_select_mask(cs, umc, pvt) { 979 mask = &pvt->csels[umc].csmasks[cs]; 980 mask_sec = &pvt->csels[umc].csmasks_sec[cs]; 981 982 mask_reg = umc_mask_reg + (cs * 4); 983 mask_reg_sec = umc_mask_reg_sec + (cs * 4); 984 985 if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) 986 edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", 987 umc, cs, *mask, mask_reg); 988 989 if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec)) 990 edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n", 991 umc, cs, *mask_sec, mask_reg_sec); 992 } 993 } 994 } 995 996 /* 997 * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers 998 */ 999 static void read_dct_base_mask(struct amd64_pvt *pvt) 1000 { 1001 int cs; 1002 1003 prep_chip_selects(pvt); 1004 1005 if (pvt->umc) 1006 return read_umc_base_mask(pvt); 1007 1008 for_each_chip_select(cs, 0, pvt) { 1009 int reg0 = DCSB0 + (cs * 4); 1010 int reg1 = DCSB1 + (cs * 4); 1011 u32 *base0 = &pvt->csels[0].csbases[cs]; 1012 u32 *base1 = &pvt->csels[1].csbases[cs]; 1013 1014 if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) 1015 edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", 1016 cs, *base0, reg0); 1017 1018 if (pvt->fam == 0xf) 1019 continue; 1020 1021 if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) 1022 edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", 1023 cs, *base1, (pvt->fam == 0x10) ? reg1 1024 : reg0); 1025 } 1026 1027 for_each_chip_select_mask(cs, 0, pvt) { 1028 int reg0 = DCSM0 + (cs * 4); 1029 int reg1 = DCSM1 + (cs * 4); 1030 u32 *mask0 = &pvt->csels[0].csmasks[cs]; 1031 u32 *mask1 = &pvt->csels[1].csmasks[cs]; 1032 1033 if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) 1034 edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", 1035 cs, *mask0, reg0); 1036 1037 if (pvt->fam == 0xf) 1038 continue; 1039 1040 if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) 1041 edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", 1042 cs, *mask1, (pvt->fam == 0x10) ? reg1 1043 : reg0); 1044 } 1045 } 1046 1047 static void determine_memory_type(struct amd64_pvt *pvt) 1048 { 1049 u32 dram_ctrl, dcsm; 1050 1051 if (pvt->umc) { 1052 if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) 1053 pvt->dram_type = MEM_LRDDR4; 1054 else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) 1055 pvt->dram_type = MEM_RDDR4; 1056 else 1057 pvt->dram_type = MEM_DDR4; 1058 return; 1059 } 1060 1061 switch (pvt->fam) { 1062 case 0xf: 1063 if (pvt->ext_model >= K8_REV_F) 1064 goto ddr3; 1065 1066 pvt->dram_type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; 1067 return; 1068 1069 case 0x10: 1070 if (pvt->dchr0 & DDR3_MODE) 1071 goto ddr3; 1072 1073 pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; 1074 return; 1075 1076 case 0x15: 1077 if (pvt->model < 0x60) 1078 goto ddr3; 1079 1080 /* 1081 * Model 0x60h needs special handling: 1082 * 1083 * We use a Chip Select value of '0' to obtain dcsm. 1084 * Theoretically, it is possible to populate LRDIMMs of different 1085 * 'Rank' value on a DCT. But this is not the common case. So, 1086 * it's reasonable to assume all DIMMs are going to be of same 1087 * 'type' until proven otherwise. 1088 */ 1089 amd64_read_dct_pci_cfg(pvt, 0, DRAM_CONTROL, &dram_ctrl); 1090 dcsm = pvt->csels[0].csmasks[0]; 1091 1092 if (((dram_ctrl >> 8) & 0x7) == 0x2) 1093 pvt->dram_type = MEM_DDR4; 1094 else if (pvt->dclr0 & BIT(16)) 1095 pvt->dram_type = MEM_DDR3; 1096 else if (dcsm & 0x3) 1097 pvt->dram_type = MEM_LRDDR3; 1098 else 1099 pvt->dram_type = MEM_RDDR3; 1100 1101 return; 1102 1103 case 0x16: 1104 goto ddr3; 1105 1106 default: 1107 WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); 1108 pvt->dram_type = MEM_EMPTY; 1109 } 1110 return; 1111 1112 ddr3: 1113 pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; 1114 } 1115 1116 /* Get the number of DCT channels the memory controller is using. */ 1117 static int k8_early_channel_count(struct amd64_pvt *pvt) 1118 { 1119 int flag; 1120 1121 if (pvt->ext_model >= K8_REV_F) 1122 /* RevF (NPT) and later */ 1123 flag = pvt->dclr0 & WIDTH_128; 1124 else 1125 /* RevE and earlier */ 1126 flag = pvt->dclr0 & REVE_WIDTH_128; 1127 1128 /* not used */ 1129 pvt->dclr1 = 0; 1130 1131 return (flag) ? 2 : 1; 1132 } 1133 1134 /* On F10h and later ErrAddr is MC4_ADDR[47:1] */ 1135 static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m) 1136 { 1137 u16 mce_nid = amd_get_nb_id(m->extcpu); 1138 struct mem_ctl_info *mci; 1139 u8 start_bit = 1; 1140 u8 end_bit = 47; 1141 u64 addr; 1142 1143 mci = edac_mc_find(mce_nid); 1144 if (!mci) 1145 return 0; 1146 1147 pvt = mci->pvt_info; 1148 1149 if (pvt->fam == 0xf) { 1150 start_bit = 3; 1151 end_bit = 39; 1152 } 1153 1154 addr = m->addr & GENMASK_ULL(end_bit, start_bit); 1155 1156 /* 1157 * Erratum 637 workaround 1158 */ 1159 if (pvt->fam == 0x15) { 1160 u64 cc6_base, tmp_addr; 1161 u32 tmp; 1162 u8 intlv_en; 1163 1164 if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7) 1165 return addr; 1166 1167 1168 amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp); 1169 intlv_en = tmp >> 21 & 0x7; 1170 1171 /* add [47:27] + 3 trailing bits */ 1172 cc6_base = (tmp & GENMASK_ULL(20, 0)) << 3; 1173 1174 /* reverse and add DramIntlvEn */ 1175 cc6_base |= intlv_en ^ 0x7; 1176 1177 /* pin at [47:24] */ 1178 cc6_base <<= 24; 1179 1180 if (!intlv_en) 1181 return cc6_base | (addr & GENMASK_ULL(23, 0)); 1182 1183 amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp); 1184 1185 /* faster log2 */ 1186 tmp_addr = (addr & GENMASK_ULL(23, 12)) << __fls(intlv_en + 1); 1187 1188 /* OR DramIntlvSel into bits [14:12] */ 1189 tmp_addr |= (tmp & GENMASK_ULL(23, 21)) >> 9; 1190 1191 /* add remaining [11:0] bits from original MC4_ADDR */ 1192 tmp_addr |= addr & GENMASK_ULL(11, 0); 1193 1194 return cc6_base | tmp_addr; 1195 } 1196 1197 return addr; 1198 } 1199 1200 static struct pci_dev *pci_get_related_function(unsigned int vendor, 1201 unsigned int device, 1202 struct pci_dev *related) 1203 { 1204 struct pci_dev *dev = NULL; 1205 1206 while ((dev = pci_get_device(vendor, device, dev))) { 1207 if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) && 1208 (dev->bus->number == related->bus->number) && 1209 (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) 1210 break; 1211 } 1212 1213 return dev; 1214 } 1215 1216 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) 1217 { 1218 struct amd_northbridge *nb; 1219 struct pci_dev *f1 = NULL; 1220 unsigned int pci_func; 1221 int off = range << 3; 1222 u32 llim; 1223 1224 amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off, &pvt->ranges[range].base.lo); 1225 amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo); 1226 1227 if (pvt->fam == 0xf) 1228 return; 1229 1230 if (!dram_rw(pvt, range)) 1231 return; 1232 1233 amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off, &pvt->ranges[range].base.hi); 1234 amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi); 1235 1236 /* F15h: factor in CC6 save area by reading dst node's limit reg */ 1237 if (pvt->fam != 0x15) 1238 return; 1239 1240 nb = node_to_amd_nb(dram_dst_node(pvt, range)); 1241 if (WARN_ON(!nb)) 1242 return; 1243 1244 if (pvt->model == 0x60) 1245 pci_func = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1; 1246 else if (pvt->model == 0x30) 1247 pci_func = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1; 1248 else 1249 pci_func = PCI_DEVICE_ID_AMD_15H_NB_F1; 1250 1251 f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc); 1252 if (WARN_ON(!f1)) 1253 return; 1254 1255 amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); 1256 1257 pvt->ranges[range].lim.lo &= GENMASK_ULL(15, 0); 1258 1259 /* {[39:27],111b} */ 1260 pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16; 1261 1262 pvt->ranges[range].lim.hi &= GENMASK_ULL(7, 0); 1263 1264 /* [47:40] */ 1265 pvt->ranges[range].lim.hi |= llim >> 13; 1266 1267 pci_dev_put(f1); 1268 } 1269 1270 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, 1271 struct err_info *err) 1272 { 1273 struct amd64_pvt *pvt = mci->pvt_info; 1274 1275 error_address_to_page_and_offset(sys_addr, err); 1276 1277 /* 1278 * Find out which node the error address belongs to. This may be 1279 * different from the node that detected the error. 1280 */ 1281 err->src_mci = find_mc_by_sys_addr(mci, sys_addr); 1282 if (!err->src_mci) { 1283 amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", 1284 (unsigned long)sys_addr); 1285 err->err_code = ERR_NODE; 1286 return; 1287 } 1288 1289 /* Now map the sys_addr to a CSROW */ 1290 err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr); 1291 if (err->csrow < 0) { 1292 err->err_code = ERR_CSROW; 1293 return; 1294 } 1295 1296 /* CHIPKILL enabled */ 1297 if (pvt->nbcfg & NBCFG_CHIPKILL) { 1298 err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); 1299 if (err->channel < 0) { 1300 /* 1301 * Syndrome didn't map, so we don't know which of the 1302 * 2 DIMMs is in error. So we need to ID 'both' of them 1303 * as suspect. 1304 */ 1305 amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - " 1306 "possible error reporting race\n", 1307 err->syndrome); 1308 err->err_code = ERR_CHANNEL; 1309 return; 1310 } 1311 } else { 1312 /* 1313 * non-chipkill ecc mode 1314 * 1315 * The k8 documentation is unclear about how to determine the 1316 * channel number when using non-chipkill memory. This method 1317 * was obtained from email communication with someone at AMD. 1318 * (Wish the email was placed in this comment - norsk) 1319 */ 1320 err->channel = ((sys_addr & BIT(3)) != 0); 1321 } 1322 } 1323 1324 static int ddr2_cs_size(unsigned i, bool dct_width) 1325 { 1326 unsigned shift = 0; 1327 1328 if (i <= 2) 1329 shift = i; 1330 else if (!(i & 0x1)) 1331 shift = i >> 1; 1332 else 1333 shift = (i + 1) >> 1; 1334 1335 return 128 << (shift + !!dct_width); 1336 } 1337 1338 static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, 1339 unsigned cs_mode, int cs_mask_nr) 1340 { 1341 u32 dclr = dct ? pvt->dclr1 : pvt->dclr0; 1342 1343 if (pvt->ext_model >= K8_REV_F) { 1344 WARN_ON(cs_mode > 11); 1345 return ddr2_cs_size(cs_mode, dclr & WIDTH_128); 1346 } 1347 else if (pvt->ext_model >= K8_REV_D) { 1348 unsigned diff; 1349 WARN_ON(cs_mode > 10); 1350 1351 /* 1352 * the below calculation, besides trying to win an obfuscated C 1353 * contest, maps cs_mode values to DIMM chip select sizes. The 1354 * mappings are: 1355 * 1356 * cs_mode CS size (mb) 1357 * ======= ============ 1358 * 0 32 1359 * 1 64 1360 * 2 128 1361 * 3 128 1362 * 4 256 1363 * 5 512 1364 * 6 256 1365 * 7 512 1366 * 8 1024 1367 * 9 1024 1368 * 10 2048 1369 * 1370 * Basically, it calculates a value with which to shift the 1371 * smallest CS size of 32MB. 1372 * 1373 * ddr[23]_cs_size have a similar purpose. 1374 */ 1375 diff = cs_mode/3 + (unsigned)(cs_mode > 5); 1376 1377 return 32 << (cs_mode - diff); 1378 } 1379 else { 1380 WARN_ON(cs_mode > 6); 1381 return 32 << cs_mode; 1382 } 1383 } 1384 1385 /* 1386 * Get the number of DCT channels in use. 1387 * 1388 * Return: 1389 * number of Memory Channels in operation 1390 * Pass back: 1391 * contents of the DCL0_LOW register 1392 */ 1393 static int f1x_early_channel_count(struct amd64_pvt *pvt) 1394 { 1395 int i, j, channels = 0; 1396 1397 /* On F10h, if we are in 128 bit mode, then we are using 2 channels */ 1398 if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128)) 1399 return 2; 1400 1401 /* 1402 * Need to check if in unganged mode: In such, there are 2 channels, 1403 * but they are not in 128 bit mode and thus the above 'dclr0' status 1404 * bit will be OFF. 1405 * 1406 * Need to check DCT0[0] and DCT1[0] to see if only one of them has 1407 * their CSEnable bit on. If so, then SINGLE DIMM case. 1408 */ 1409 edac_dbg(0, "Data width is not 128 bits - need more decoding\n"); 1410 1411 /* 1412 * Check DRAM Bank Address Mapping values for each DIMM to see if there 1413 * is more than just one DIMM present in unganged mode. Need to check 1414 * both controllers since DIMMs can be placed in either one. 1415 */ 1416 for (i = 0; i < 2; i++) { 1417 u32 dbam = (i ? pvt->dbam1 : pvt->dbam0); 1418 1419 for (j = 0; j < 4; j++) { 1420 if (DBAM_DIMM(j, dbam) > 0) { 1421 channels++; 1422 break; 1423 } 1424 } 1425 } 1426 1427 if (channels > 2) 1428 channels = 2; 1429 1430 amd64_info("MCT channel count: %d\n", channels); 1431 1432 return channels; 1433 } 1434 1435 static int f17_early_channel_count(struct amd64_pvt *pvt) 1436 { 1437 int i, channels = 0; 1438 1439 /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */ 1440 for_each_umc(i) 1441 channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); 1442 1443 amd64_info("MCT channel count: %d\n", channels); 1444 1445 return channels; 1446 } 1447 1448 static int ddr3_cs_size(unsigned i, bool dct_width) 1449 { 1450 unsigned shift = 0; 1451 int cs_size = 0; 1452 1453 if (i == 0 || i == 3 || i == 4) 1454 cs_size = -1; 1455 else if (i <= 2) 1456 shift = i; 1457 else if (i == 12) 1458 shift = 7; 1459 else if (!(i & 0x1)) 1460 shift = i >> 1; 1461 else 1462 shift = (i + 1) >> 1; 1463 1464 if (cs_size != -1) 1465 cs_size = (128 * (1 << !!dct_width)) << shift; 1466 1467 return cs_size; 1468 } 1469 1470 static int ddr3_lrdimm_cs_size(unsigned i, unsigned rank_multiply) 1471 { 1472 unsigned shift = 0; 1473 int cs_size = 0; 1474 1475 if (i < 4 || i == 6) 1476 cs_size = -1; 1477 else if (i == 12) 1478 shift = 7; 1479 else if (!(i & 0x1)) 1480 shift = i >> 1; 1481 else 1482 shift = (i + 1) >> 1; 1483 1484 if (cs_size != -1) 1485 cs_size = rank_multiply * (128 << shift); 1486 1487 return cs_size; 1488 } 1489 1490 static int ddr4_cs_size(unsigned i) 1491 { 1492 int cs_size = 0; 1493 1494 if (i == 0) 1495 cs_size = -1; 1496 else if (i == 1) 1497 cs_size = 1024; 1498 else 1499 /* Min cs_size = 1G */ 1500 cs_size = 1024 * (1 << (i >> 1)); 1501 1502 return cs_size; 1503 } 1504 1505 static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, 1506 unsigned cs_mode, int cs_mask_nr) 1507 { 1508 u32 dclr = dct ? pvt->dclr1 : pvt->dclr0; 1509 1510 WARN_ON(cs_mode > 11); 1511 1512 if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE) 1513 return ddr3_cs_size(cs_mode, dclr & WIDTH_128); 1514 else 1515 return ddr2_cs_size(cs_mode, dclr & WIDTH_128); 1516 } 1517 1518 /* 1519 * F15h supports only 64bit DCT interfaces 1520 */ 1521 static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, 1522 unsigned cs_mode, int cs_mask_nr) 1523 { 1524 WARN_ON(cs_mode > 12); 1525 1526 return ddr3_cs_size(cs_mode, false); 1527 } 1528 1529 /* F15h M60h supports DDR4 mapping as well.. */ 1530 static int f15_m60h_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, 1531 unsigned cs_mode, int cs_mask_nr) 1532 { 1533 int cs_size; 1534 u32 dcsm = pvt->csels[dct].csmasks[cs_mask_nr]; 1535 1536 WARN_ON(cs_mode > 12); 1537 1538 if (pvt->dram_type == MEM_DDR4) { 1539 if (cs_mode > 9) 1540 return -1; 1541 1542 cs_size = ddr4_cs_size(cs_mode); 1543 } else if (pvt->dram_type == MEM_LRDDR3) { 1544 unsigned rank_multiply = dcsm & 0xf; 1545 1546 if (rank_multiply == 3) 1547 rank_multiply = 4; 1548 cs_size = ddr3_lrdimm_cs_size(cs_mode, rank_multiply); 1549 } else { 1550 /* Minimum cs size is 512mb for F15hM60h*/ 1551 if (cs_mode == 0x1) 1552 return -1; 1553 1554 cs_size = ddr3_cs_size(cs_mode, false); 1555 } 1556 1557 return cs_size; 1558 } 1559 1560 /* 1561 * F16h and F15h model 30h have only limited cs_modes. 1562 */ 1563 static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, 1564 unsigned cs_mode, int cs_mask_nr) 1565 { 1566 WARN_ON(cs_mode > 12); 1567 1568 if (cs_mode == 6 || cs_mode == 8 || 1569 cs_mode == 9 || cs_mode == 12) 1570 return -1; 1571 else 1572 return ddr3_cs_size(cs_mode, false); 1573 } 1574 1575 static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, 1576 unsigned int cs_mode, int csrow_nr) 1577 { 1578 u32 addr_mask_orig, addr_mask_deinterleaved; 1579 u32 msb, weight, num_zero_bits; 1580 int dimm, size = 0; 1581 1582 /* No Chip Selects are enabled. */ 1583 if (!cs_mode) 1584 return size; 1585 1586 /* Requested size of an even CS but none are enabled. */ 1587 if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) 1588 return size; 1589 1590 /* Requested size of an odd CS but none are enabled. */ 1591 if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) 1592 return size; 1593 1594 /* 1595 * There is one mask per DIMM, and two Chip Selects per DIMM. 1596 * CS0 and CS1 -> DIMM0 1597 * CS2 and CS3 -> DIMM1 1598 */ 1599 dimm = csrow_nr >> 1; 1600 1601 /* Asymmetric dual-rank DIMM support. */ 1602 if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) 1603 addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm]; 1604 else 1605 addr_mask_orig = pvt->csels[umc].csmasks[dimm]; 1606 1607 /* 1608 * The number of zero bits in the mask is equal to the number of bits 1609 * in a full mask minus the number of bits in the current mask. 1610 * 1611 * The MSB is the number of bits in the full mask because BIT[0] is 1612 * always 0. 1613 */ 1614 msb = fls(addr_mask_orig) - 1; 1615 weight = hweight_long(addr_mask_orig); 1616 num_zero_bits = msb - weight; 1617 1618 /* Take the number of zero bits off from the top of the mask. */ 1619 addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); 1620 1621 edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); 1622 edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); 1623 edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); 1624 1625 /* Register [31:1] = Address [39:9]. Size is in kBs here. */ 1626 size = (addr_mask_deinterleaved >> 2) + 1; 1627 1628 /* Return size in MBs. */ 1629 return size >> 10; 1630 } 1631 1632 static void read_dram_ctl_register(struct amd64_pvt *pvt) 1633 { 1634 1635 if (pvt->fam == 0xf) 1636 return; 1637 1638 if (!amd64_read_pci_cfg(pvt->F2, DCT_SEL_LO, &pvt->dct_sel_lo)) { 1639 edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n", 1640 pvt->dct_sel_lo, dct_sel_baseaddr(pvt)); 1641 1642 edac_dbg(0, " DCTs operate in %s mode\n", 1643 (dct_ganging_enabled(pvt) ? "ganged" : "unganged")); 1644 1645 if (!dct_ganging_enabled(pvt)) 1646 edac_dbg(0, " Address range split per DCT: %s\n", 1647 (dct_high_range_enabled(pvt) ? "yes" : "no")); 1648 1649 edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n", 1650 (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"), 1651 (dct_memory_cleared(pvt) ? "yes" : "no")); 1652 1653 edac_dbg(0, " channel interleave: %s, " 1654 "interleave bits selector: 0x%x\n", 1655 (dct_interleave_enabled(pvt) ? "enabled" : "disabled"), 1656 dct_sel_interleave_addr(pvt)); 1657 } 1658 1659 amd64_read_pci_cfg(pvt->F2, DCT_SEL_HI, &pvt->dct_sel_hi); 1660 } 1661 1662 /* 1663 * Determine channel (DCT) based on the interleaving mode (see F15h M30h BKDG, 1664 * 2.10.12 Memory Interleaving Modes). 1665 */ 1666 static u8 f15_m30h_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, 1667 u8 intlv_en, int num_dcts_intlv, 1668 u32 dct_sel) 1669 { 1670 u8 channel = 0; 1671 u8 select; 1672 1673 if (!(intlv_en)) 1674 return (u8)(dct_sel); 1675 1676 if (num_dcts_intlv == 2) { 1677 select = (sys_addr >> 8) & 0x3; 1678 channel = select ? 0x3 : 0; 1679 } else if (num_dcts_intlv == 4) { 1680 u8 intlv_addr = dct_sel_interleave_addr(pvt); 1681 switch (intlv_addr) { 1682 case 0x4: 1683 channel = (sys_addr >> 8) & 0x3; 1684 break; 1685 case 0x5: 1686 channel = (sys_addr >> 9) & 0x3; 1687 break; 1688 } 1689 } 1690 return channel; 1691 } 1692 1693 /* 1694 * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory 1695 * Interleaving Modes. 1696 */ 1697 static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, 1698 bool hi_range_sel, u8 intlv_en) 1699 { 1700 u8 dct_sel_high = (pvt->dct_sel_lo >> 1) & 1; 1701 1702 if (dct_ganging_enabled(pvt)) 1703 return 0; 1704 1705 if (hi_range_sel) 1706 return dct_sel_high; 1707 1708 /* 1709 * see F2x110[DctSelIntLvAddr] - channel interleave mode 1710 */ 1711 if (dct_interleave_enabled(pvt)) { 1712 u8 intlv_addr = dct_sel_interleave_addr(pvt); 1713 1714 /* return DCT select function: 0=DCT0, 1=DCT1 */ 1715 if (!intlv_addr) 1716 return sys_addr >> 6 & 1; 1717 1718 if (intlv_addr & 0x2) { 1719 u8 shift = intlv_addr & 0x1 ? 9 : 6; 1720 u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) & 1; 1721 1722 return ((sys_addr >> shift) & 1) ^ temp; 1723 } 1724 1725 if (intlv_addr & 0x4) { 1726 u8 shift = intlv_addr & 0x1 ? 9 : 8; 1727 1728 return (sys_addr >> shift) & 1; 1729 } 1730 1731 return (sys_addr >> (12 + hweight8(intlv_en))) & 1; 1732 } 1733 1734 if (dct_high_range_enabled(pvt)) 1735 return ~dct_sel_high & 1; 1736 1737 return 0; 1738 } 1739 1740 /* Convert the sys_addr to the normalized DCT address */ 1741 static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, 1742 u64 sys_addr, bool hi_rng, 1743 u32 dct_sel_base_addr) 1744 { 1745 u64 chan_off; 1746 u64 dram_base = get_dram_base(pvt, range); 1747 u64 hole_off = f10_dhar_offset(pvt); 1748 u64 dct_sel_base_off = (u64)(pvt->dct_sel_hi & 0xFFFFFC00) << 16; 1749 1750 if (hi_rng) { 1751 /* 1752 * if 1753 * base address of high range is below 4Gb 1754 * (bits [47:27] at [31:11]) 1755 * DRAM address space on this DCT is hoisted above 4Gb && 1756 * sys_addr > 4Gb 1757 * 1758 * remove hole offset from sys_addr 1759 * else 1760 * remove high range offset from sys_addr 1761 */ 1762 if ((!(dct_sel_base_addr >> 16) || 1763 dct_sel_base_addr < dhar_base(pvt)) && 1764 dhar_valid(pvt) && 1765 (sys_addr >= BIT_64(32))) 1766 chan_off = hole_off; 1767 else 1768 chan_off = dct_sel_base_off; 1769 } else { 1770 /* 1771 * if 1772 * we have a valid hole && 1773 * sys_addr > 4Gb 1774 * 1775 * remove hole 1776 * else 1777 * remove dram base to normalize to DCT address 1778 */ 1779 if (dhar_valid(pvt) && (sys_addr >= BIT_64(32))) 1780 chan_off = hole_off; 1781 else 1782 chan_off = dram_base; 1783 } 1784 1785 return (sys_addr & GENMASK_ULL(47,6)) - (chan_off & GENMASK_ULL(47,23)); 1786 } 1787 1788 /* 1789 * checks if the csrow passed in is marked as SPARED, if so returns the new 1790 * spare row 1791 */ 1792 static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) 1793 { 1794 int tmp_cs; 1795 1796 if (online_spare_swap_done(pvt, dct) && 1797 csrow == online_spare_bad_dramcs(pvt, dct)) { 1798 1799 for_each_chip_select(tmp_cs, dct, pvt) { 1800 if (chip_select_base(tmp_cs, dct, pvt) & 0x2) { 1801 csrow = tmp_cs; 1802 break; 1803 } 1804 } 1805 } 1806 return csrow; 1807 } 1808 1809 /* 1810 * Iterate over the DRAM DCT "base" and "mask" registers looking for a 1811 * SystemAddr match on the specified 'ChannelSelect' and 'NodeID' 1812 * 1813 * Return: 1814 * -EINVAL: NOT FOUND 1815 * 0..csrow = Chip-Select Row 1816 */ 1817 static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) 1818 { 1819 struct mem_ctl_info *mci; 1820 struct amd64_pvt *pvt; 1821 u64 cs_base, cs_mask; 1822 int cs_found = -EINVAL; 1823 int csrow; 1824 1825 mci = edac_mc_find(nid); 1826 if (!mci) 1827 return cs_found; 1828 1829 pvt = mci->pvt_info; 1830 1831 edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr, dct); 1832 1833 for_each_chip_select(csrow, dct, pvt) { 1834 if (!csrow_enabled(csrow, dct, pvt)) 1835 continue; 1836 1837 get_cs_base_and_mask(pvt, csrow, dct, &cs_base, &cs_mask); 1838 1839 edac_dbg(1, " CSROW=%d CSBase=0x%llx CSMask=0x%llx\n", 1840 csrow, cs_base, cs_mask); 1841 1842 cs_mask = ~cs_mask; 1843 1844 edac_dbg(1, " (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n", 1845 (in_addr & cs_mask), (cs_base & cs_mask)); 1846 1847 if ((in_addr & cs_mask) == (cs_base & cs_mask)) { 1848 if (pvt->fam == 0x15 && pvt->model >= 0x30) { 1849 cs_found = csrow; 1850 break; 1851 } 1852 cs_found = f10_process_possible_spare(pvt, dct, csrow); 1853 1854 edac_dbg(1, " MATCH csrow=%d\n", cs_found); 1855 break; 1856 } 1857 } 1858 return cs_found; 1859 } 1860 1861 /* 1862 * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is 1863 * swapped with a region located at the bottom of memory so that the GPU can use 1864 * the interleaved region and thus two channels. 1865 */ 1866 static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr) 1867 { 1868 u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr; 1869 1870 if (pvt->fam == 0x10) { 1871 /* only revC3 and revE have that feature */ 1872 if (pvt->model < 4 || (pvt->model < 0xa && pvt->stepping < 3)) 1873 return sys_addr; 1874 } 1875 1876 amd64_read_pci_cfg(pvt->F2, SWAP_INTLV_REG, &swap_reg); 1877 1878 if (!(swap_reg & 0x1)) 1879 return sys_addr; 1880 1881 swap_base = (swap_reg >> 3) & 0x7f; 1882 swap_limit = (swap_reg >> 11) & 0x7f; 1883 rgn_size = (swap_reg >> 20) & 0x7f; 1884 tmp_addr = sys_addr >> 27; 1885 1886 if (!(sys_addr >> 34) && 1887 (((tmp_addr >= swap_base) && 1888 (tmp_addr <= swap_limit)) || 1889 (tmp_addr < rgn_size))) 1890 return sys_addr ^ (u64)swap_base << 27; 1891 1892 return sys_addr; 1893 } 1894 1895 /* For a given @dram_range, check if @sys_addr falls within it. */ 1896 static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, 1897 u64 sys_addr, int *chan_sel) 1898 { 1899 int cs_found = -EINVAL; 1900 u64 chan_addr; 1901 u32 dct_sel_base; 1902 u8 channel; 1903 bool high_range = false; 1904 1905 u8 node_id = dram_dst_node(pvt, range); 1906 u8 intlv_en = dram_intlv_en(pvt, range); 1907 u32 intlv_sel = dram_intlv_sel(pvt, range); 1908 1909 edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n", 1910 range, sys_addr, get_dram_limit(pvt, range)); 1911 1912 if (dhar_valid(pvt) && 1913 dhar_base(pvt) <= sys_addr && 1914 sys_addr < BIT_64(32)) { 1915 amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n", 1916 sys_addr); 1917 return -EINVAL; 1918 } 1919 1920 if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en))) 1921 return -EINVAL; 1922 1923 sys_addr = f1x_swap_interleaved_region(pvt, sys_addr); 1924 1925 dct_sel_base = dct_sel_baseaddr(pvt); 1926 1927 /* 1928 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to 1929 * select between DCT0 and DCT1. 1930 */ 1931 if (dct_high_range_enabled(pvt) && 1932 !dct_ganging_enabled(pvt) && 1933 ((sys_addr >> 27) >= (dct_sel_base >> 11))) 1934 high_range = true; 1935 1936 channel = f1x_determine_channel(pvt, sys_addr, high_range, intlv_en); 1937 1938 chan_addr = f1x_get_norm_dct_addr(pvt, range, sys_addr, 1939 high_range, dct_sel_base); 1940 1941 /* Remove node interleaving, see F1x120 */ 1942 if (intlv_en) 1943 chan_addr = ((chan_addr >> (12 + hweight8(intlv_en))) << 12) | 1944 (chan_addr & 0xfff); 1945 1946 /* remove channel interleave */ 1947 if (dct_interleave_enabled(pvt) && 1948 !dct_high_range_enabled(pvt) && 1949 !dct_ganging_enabled(pvt)) { 1950 1951 if (dct_sel_interleave_addr(pvt) != 1) { 1952 if (dct_sel_interleave_addr(pvt) == 0x3) 1953 /* hash 9 */ 1954 chan_addr = ((chan_addr >> 10) << 9) | 1955 (chan_addr & 0x1ff); 1956 else 1957 /* A[6] or hash 6 */ 1958 chan_addr = ((chan_addr >> 7) << 6) | 1959 (chan_addr & 0x3f); 1960 } else 1961 /* A[12] */ 1962 chan_addr = ((chan_addr >> 13) << 12) | 1963 (chan_addr & 0xfff); 1964 } 1965 1966 edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr); 1967 1968 cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel); 1969 1970 if (cs_found >= 0) 1971 *chan_sel = channel; 1972 1973 return cs_found; 1974 } 1975 1976 static int f15_m30h_match_to_this_node(struct amd64_pvt *pvt, unsigned range, 1977 u64 sys_addr, int *chan_sel) 1978 { 1979 int cs_found = -EINVAL; 1980 int num_dcts_intlv = 0; 1981 u64 chan_addr, chan_offset; 1982 u64 dct_base, dct_limit; 1983 u32 dct_cont_base_reg, dct_cont_limit_reg, tmp; 1984 u8 channel, alias_channel, leg_mmio_hole, dct_sel, dct_offset_en; 1985 1986 u64 dhar_offset = f10_dhar_offset(pvt); 1987 u8 intlv_addr = dct_sel_interleave_addr(pvt); 1988 u8 node_id = dram_dst_node(pvt, range); 1989 u8 intlv_en = dram_intlv_en(pvt, range); 1990 1991 amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &dct_cont_base_reg); 1992 amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &dct_cont_limit_reg); 1993 1994 dct_offset_en = (u8) ((dct_cont_base_reg >> 3) & BIT(0)); 1995 dct_sel = (u8) ((dct_cont_base_reg >> 4) & 0x7); 1996 1997 edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n", 1998 range, sys_addr, get_dram_limit(pvt, range)); 1999 2000 if (!(get_dram_base(pvt, range) <= sys_addr) && 2001 !(get_dram_limit(pvt, range) >= sys_addr)) 2002 return -EINVAL; 2003 2004 if (dhar_valid(pvt) && 2005 dhar_base(pvt) <= sys_addr && 2006 sys_addr < BIT_64(32)) { 2007 amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n", 2008 sys_addr); 2009 return -EINVAL; 2010 } 2011 2012 /* Verify sys_addr is within DCT Range. */ 2013 dct_base = (u64) dct_sel_baseaddr(pvt); 2014 dct_limit = (dct_cont_limit_reg >> 11) & 0x1FFF; 2015 2016 if (!(dct_cont_base_reg & BIT(0)) && 2017 !(dct_base <= (sys_addr >> 27) && 2018 dct_limit >= (sys_addr >> 27))) 2019 return -EINVAL; 2020 2021 /* Verify number of dct's that participate in channel interleaving. */ 2022 num_dcts_intlv = (int) hweight8(intlv_en); 2023 2024 if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4)) 2025 return -EINVAL; 2026 2027 if (pvt->model >= 0x60) 2028 channel = f1x_determine_channel(pvt, sys_addr, false, intlv_en); 2029 else 2030 channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en, 2031 num_dcts_intlv, dct_sel); 2032 2033 /* Verify we stay within the MAX number of channels allowed */ 2034 if (channel > 3) 2035 return -EINVAL; 2036 2037 leg_mmio_hole = (u8) (dct_cont_base_reg >> 1 & BIT(0)); 2038 2039 /* Get normalized DCT addr */ 2040 if (leg_mmio_hole && (sys_addr >= BIT_64(32))) 2041 chan_offset = dhar_offset; 2042 else 2043 chan_offset = dct_base << 27; 2044 2045 chan_addr = sys_addr - chan_offset; 2046 2047 /* remove channel interleave */ 2048 if (num_dcts_intlv == 2) { 2049 if (intlv_addr == 0x4) 2050 chan_addr = ((chan_addr >> 9) << 8) | 2051 (chan_addr & 0xff); 2052 else if (intlv_addr == 0x5) 2053 chan_addr = ((chan_addr >> 10) << 9) | 2054 (chan_addr & 0x1ff); 2055 else 2056 return -EINVAL; 2057 2058 } else if (num_dcts_intlv == 4) { 2059 if (intlv_addr == 0x4) 2060 chan_addr = ((chan_addr >> 10) << 8) | 2061 (chan_addr & 0xff); 2062 else if (intlv_addr == 0x5) 2063 chan_addr = ((chan_addr >> 11) << 9) | 2064 (chan_addr & 0x1ff); 2065 else 2066 return -EINVAL; 2067 } 2068 2069 if (dct_offset_en) { 2070 amd64_read_pci_cfg(pvt->F1, 2071 DRAM_CONT_HIGH_OFF + (int) channel * 4, 2072 &tmp); 2073 chan_addr += (u64) ((tmp >> 11) & 0xfff) << 27; 2074 } 2075 2076 f15h_select_dct(pvt, channel); 2077 2078 edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr); 2079 2080 /* 2081 * Find Chip select: 2082 * if channel = 3, then alias it to 1. This is because, in F15 M30h, 2083 * there is support for 4 DCT's, but only 2 are currently functional. 2084 * They are DCT0 and DCT3. But we have read all registers of DCT3 into 2085 * pvt->csels[1]. So we need to use '1' here to get correct info. 2086 * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications. 2087 */ 2088 alias_channel = (channel == 3) ? 1 : channel; 2089 2090 cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, alias_channel); 2091 2092 if (cs_found >= 0) 2093 *chan_sel = alias_channel; 2094 2095 return cs_found; 2096 } 2097 2098 static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, 2099 u64 sys_addr, 2100 int *chan_sel) 2101 { 2102 int cs_found = -EINVAL; 2103 unsigned range; 2104 2105 for (range = 0; range < DRAM_RANGES; range++) { 2106 if (!dram_rw(pvt, range)) 2107 continue; 2108 2109 if (pvt->fam == 0x15 && pvt->model >= 0x30) 2110 cs_found = f15_m30h_match_to_this_node(pvt, range, 2111 sys_addr, 2112 chan_sel); 2113 2114 else if ((get_dram_base(pvt, range) <= sys_addr) && 2115 (get_dram_limit(pvt, range) >= sys_addr)) { 2116 cs_found = f1x_match_to_this_node(pvt, range, 2117 sys_addr, chan_sel); 2118 if (cs_found >= 0) 2119 break; 2120 } 2121 } 2122 return cs_found; 2123 } 2124 2125 /* 2126 * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps 2127 * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW). 2128 * 2129 * The @sys_addr is usually an error address received from the hardware 2130 * (MCX_ADDR). 2131 */ 2132 static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, 2133 struct err_info *err) 2134 { 2135 struct amd64_pvt *pvt = mci->pvt_info; 2136 2137 error_address_to_page_and_offset(sys_addr, err); 2138 2139 err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel); 2140 if (err->csrow < 0) { 2141 err->err_code = ERR_CSROW; 2142 return; 2143 } 2144 2145 /* 2146 * We need the syndromes for channel detection only when we're 2147 * ganged. Otherwise @chan should already contain the channel at 2148 * this point. 2149 */ 2150 if (dct_ganging_enabled(pvt)) 2151 err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); 2152 } 2153 2154 /* 2155 * debug routine to display the memory sizes of all logical DIMMs and its 2156 * CSROWs 2157 */ 2158 static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) 2159 { 2160 int dimm, size0, size1; 2161 u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; 2162 u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; 2163 2164 if (pvt->fam == 0xf) { 2165 /* K8 families < revF not supported yet */ 2166 if (pvt->ext_model < K8_REV_F) 2167 return; 2168 else 2169 WARN_ON(ctrl != 0); 2170 } 2171 2172 if (pvt->fam == 0x10) { 2173 dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 2174 : pvt->dbam0; 2175 dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? 2176 pvt->csels[1].csbases : 2177 pvt->csels[0].csbases; 2178 } else if (ctrl) { 2179 dbam = pvt->dbam0; 2180 dcsb = pvt->csels[1].csbases; 2181 } 2182 edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", 2183 ctrl, dbam); 2184 2185 edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl); 2186 2187 /* Dump memory sizes for DIMM and its CSROWs */ 2188 for (dimm = 0; dimm < 4; dimm++) { 2189 2190 size0 = 0; 2191 if (dcsb[dimm*2] & DCSB_CS_ENABLE) 2192 /* 2193 * For F15m60h, we need multiplier for LRDIMM cs_size 2194 * calculation. We pass dimm value to the dbam_to_cs 2195 * mapper so we can find the multiplier from the 2196 * corresponding DCSM. 2197 */ 2198 size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 2199 DBAM_DIMM(dimm, dbam), 2200 dimm); 2201 2202 size1 = 0; 2203 if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE) 2204 size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 2205 DBAM_DIMM(dimm, dbam), 2206 dimm); 2207 2208 amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", 2209 dimm * 2, size0, 2210 dimm * 2 + 1, size1); 2211 } 2212 } 2213 2214 static struct amd64_family_type family_types[] = { 2215 [K8_CPUS] = { 2216 .ctl_name = "K8", 2217 .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, 2218 .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, 2219 .max_mcs = 2, 2220 .ops = { 2221 .early_channel_count = k8_early_channel_count, 2222 .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, 2223 .dbam_to_cs = k8_dbam_to_chip_select, 2224 } 2225 }, 2226 [F10_CPUS] = { 2227 .ctl_name = "F10h", 2228 .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, 2229 .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM, 2230 .max_mcs = 2, 2231 .ops = { 2232 .early_channel_count = f1x_early_channel_count, 2233 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2234 .dbam_to_cs = f10_dbam_to_chip_select, 2235 } 2236 }, 2237 [F15_CPUS] = { 2238 .ctl_name = "F15h", 2239 .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1, 2240 .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2, 2241 .max_mcs = 2, 2242 .ops = { 2243 .early_channel_count = f1x_early_channel_count, 2244 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2245 .dbam_to_cs = f15_dbam_to_chip_select, 2246 } 2247 }, 2248 [F15_M30H_CPUS] = { 2249 .ctl_name = "F15h_M30h", 2250 .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, 2251 .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, 2252 .max_mcs = 2, 2253 .ops = { 2254 .early_channel_count = f1x_early_channel_count, 2255 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2256 .dbam_to_cs = f16_dbam_to_chip_select, 2257 } 2258 }, 2259 [F15_M60H_CPUS] = { 2260 .ctl_name = "F15h_M60h", 2261 .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, 2262 .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2, 2263 .max_mcs = 2, 2264 .ops = { 2265 .early_channel_count = f1x_early_channel_count, 2266 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2267 .dbam_to_cs = f15_m60h_dbam_to_chip_select, 2268 } 2269 }, 2270 [F16_CPUS] = { 2271 .ctl_name = "F16h", 2272 .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, 2273 .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2, 2274 .max_mcs = 2, 2275 .ops = { 2276 .early_channel_count = f1x_early_channel_count, 2277 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2278 .dbam_to_cs = f16_dbam_to_chip_select, 2279 } 2280 }, 2281 [F16_M30H_CPUS] = { 2282 .ctl_name = "F16h_M30h", 2283 .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, 2284 .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, 2285 .max_mcs = 2, 2286 .ops = { 2287 .early_channel_count = f1x_early_channel_count, 2288 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 2289 .dbam_to_cs = f16_dbam_to_chip_select, 2290 } 2291 }, 2292 [F17_CPUS] = { 2293 .ctl_name = "F17h", 2294 .f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0, 2295 .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6, 2296 .max_mcs = 2, 2297 .ops = { 2298 .early_channel_count = f17_early_channel_count, 2299 .dbam_to_cs = f17_addr_mask_to_cs_size, 2300 } 2301 }, 2302 [F17_M10H_CPUS] = { 2303 .ctl_name = "F17h_M10h", 2304 .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, 2305 .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, 2306 .max_mcs = 2, 2307 .ops = { 2308 .early_channel_count = f17_early_channel_count, 2309 .dbam_to_cs = f17_addr_mask_to_cs_size, 2310 } 2311 }, 2312 [F17_M30H_CPUS] = { 2313 .ctl_name = "F17h_M30h", 2314 .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0, 2315 .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, 2316 .max_mcs = 8, 2317 .ops = { 2318 .early_channel_count = f17_early_channel_count, 2319 .dbam_to_cs = f17_addr_mask_to_cs_size, 2320 } 2321 }, 2322 [F17_M70H_CPUS] = { 2323 .ctl_name = "F17h_M70h", 2324 .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0, 2325 .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6, 2326 .max_mcs = 2, 2327 .ops = { 2328 .early_channel_count = f17_early_channel_count, 2329 .dbam_to_cs = f17_addr_mask_to_cs_size, 2330 } 2331 }, 2332 [F19_CPUS] = { 2333 .ctl_name = "F19h", 2334 .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0, 2335 .f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6, 2336 .max_mcs = 8, 2337 .ops = { 2338 .early_channel_count = f17_early_channel_count, 2339 .dbam_to_cs = f17_addr_mask_to_cs_size, 2340 } 2341 }, 2342 }; 2343 2344 /* 2345 * These are tables of eigenvectors (one per line) which can be used for the 2346 * construction of the syndrome tables. The modified syndrome search algorithm 2347 * uses those to find the symbol in error and thus the DIMM. 2348 * 2349 * Algorithm courtesy of Ross LaFetra from AMD. 2350 */ 2351 static const u16 x4_vectors[] = { 2352 0x2f57, 0x1afe, 0x66cc, 0xdd88, 2353 0x11eb, 0x3396, 0x7f4c, 0xeac8, 2354 0x0001, 0x0002, 0x0004, 0x0008, 2355 0x1013, 0x3032, 0x4044, 0x8088, 2356 0x106b, 0x30d6, 0x70fc, 0xe0a8, 2357 0x4857, 0xc4fe, 0x13cc, 0x3288, 2358 0x1ac5, 0x2f4a, 0x5394, 0xa1e8, 2359 0x1f39, 0x251e, 0xbd6c, 0x6bd8, 2360 0x15c1, 0x2a42, 0x89ac, 0x4758, 2361 0x2b03, 0x1602, 0x4f0c, 0xca08, 2362 0x1f07, 0x3a0e, 0x6b04, 0xbd08, 2363 0x8ba7, 0x465e, 0x244c, 0x1cc8, 2364 0x2b87, 0x164e, 0x642c, 0xdc18, 2365 0x40b9, 0x80de, 0x1094, 0x20e8, 2366 0x27db, 0x1eb6, 0x9dac, 0x7b58, 2367 0x11c1, 0x2242, 0x84ac, 0x4c58, 2368 0x1be5, 0x2d7a, 0x5e34, 0xa718, 2369 0x4b39, 0x8d1e, 0x14b4, 0x28d8, 2370 0x4c97, 0xc87e, 0x11fc, 0x33a8, 2371 0x8e97, 0x497e, 0x2ffc, 0x1aa8, 2372 0x16b3, 0x3d62, 0x4f34, 0x8518, 2373 0x1e2f, 0x391a, 0x5cac, 0xf858, 2374 0x1d9f, 0x3b7a, 0x572c, 0xfe18, 2375 0x15f5, 0x2a5a, 0x5264, 0xa3b8, 2376 0x1dbb, 0x3b66, 0x715c, 0xe3f8, 2377 0x4397, 0xc27e, 0x17fc, 0x3ea8, 2378 0x1617, 0x3d3e, 0x6464, 0xb8b8, 2379 0x23ff, 0x12aa, 0xab6c, 0x56d8, 2380 0x2dfb, 0x1ba6, 0x913c, 0x7328, 2381 0x185d, 0x2ca6, 0x7914, 0x9e28, 2382 0x171b, 0x3e36, 0x7d7c, 0xebe8, 2383 0x4199, 0x82ee, 0x19f4, 0x2e58, 2384 0x4807, 0xc40e, 0x130c, 0x3208, 2385 0x1905, 0x2e0a, 0x5804, 0xac08, 2386 0x213f, 0x132a, 0xadfc, 0x5ba8, 2387 0x19a9, 0x2efe, 0xb5cc, 0x6f88, 2388 }; 2389 2390 static const u16 x8_vectors[] = { 2391 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480, 2392 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80, 2393 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80, 2394 0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80, 2395 0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780, 2396 0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080, 2397 0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080, 2398 0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080, 2399 0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80, 2400 0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580, 2401 0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880, 2402 0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280, 2403 0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180, 2404 0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580, 2405 0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280, 2406 0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180, 2407 0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080, 2408 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 2409 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000, 2410 }; 2411 2412 static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs, 2413 unsigned v_dim) 2414 { 2415 unsigned int i, err_sym; 2416 2417 for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) { 2418 u16 s = syndrome; 2419 unsigned v_idx = err_sym * v_dim; 2420 unsigned v_end = (err_sym + 1) * v_dim; 2421 2422 /* walk over all 16 bits of the syndrome */ 2423 for (i = 1; i < (1U << 16); i <<= 1) { 2424 2425 /* if bit is set in that eigenvector... */ 2426 if (v_idx < v_end && vectors[v_idx] & i) { 2427 u16 ev_comp = vectors[v_idx++]; 2428 2429 /* ... and bit set in the modified syndrome, */ 2430 if (s & i) { 2431 /* remove it. */ 2432 s ^= ev_comp; 2433 2434 if (!s) 2435 return err_sym; 2436 } 2437 2438 } else if (s & i) 2439 /* can't get to zero, move to next symbol */ 2440 break; 2441 } 2442 } 2443 2444 edac_dbg(0, "syndrome(%x) not found\n", syndrome); 2445 return -1; 2446 } 2447 2448 static int map_err_sym_to_channel(int err_sym, int sym_size) 2449 { 2450 if (sym_size == 4) 2451 switch (err_sym) { 2452 case 0x20: 2453 case 0x21: 2454 return 0; 2455 break; 2456 case 0x22: 2457 case 0x23: 2458 return 1; 2459 break; 2460 default: 2461 return err_sym >> 4; 2462 break; 2463 } 2464 /* x8 symbols */ 2465 else 2466 switch (err_sym) { 2467 /* imaginary bits not in a DIMM */ 2468 case 0x10: 2469 WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n", 2470 err_sym); 2471 return -1; 2472 break; 2473 2474 case 0x11: 2475 return 0; 2476 break; 2477 case 0x12: 2478 return 1; 2479 break; 2480 default: 2481 return err_sym >> 3; 2482 break; 2483 } 2484 return -1; 2485 } 2486 2487 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) 2488 { 2489 struct amd64_pvt *pvt = mci->pvt_info; 2490 int err_sym = -1; 2491 2492 if (pvt->ecc_sym_sz == 8) 2493 err_sym = decode_syndrome(syndrome, x8_vectors, 2494 ARRAY_SIZE(x8_vectors), 2495 pvt->ecc_sym_sz); 2496 else if (pvt->ecc_sym_sz == 4) 2497 err_sym = decode_syndrome(syndrome, x4_vectors, 2498 ARRAY_SIZE(x4_vectors), 2499 pvt->ecc_sym_sz); 2500 else { 2501 amd64_warn("Illegal syndrome type: %u\n", pvt->ecc_sym_sz); 2502 return err_sym; 2503 } 2504 2505 return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz); 2506 } 2507 2508 static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err, 2509 u8 ecc_type) 2510 { 2511 enum hw_event_mc_err_type err_type; 2512 const char *string; 2513 2514 if (ecc_type == 2) 2515 err_type = HW_EVENT_ERR_CORRECTED; 2516 else if (ecc_type == 1) 2517 err_type = HW_EVENT_ERR_UNCORRECTED; 2518 else if (ecc_type == 3) 2519 err_type = HW_EVENT_ERR_DEFERRED; 2520 else { 2521 WARN(1, "Something is rotten in the state of Denmark.\n"); 2522 return; 2523 } 2524 2525 switch (err->err_code) { 2526 case DECODE_OK: 2527 string = ""; 2528 break; 2529 case ERR_NODE: 2530 string = "Failed to map error addr to a node"; 2531 break; 2532 case ERR_CSROW: 2533 string = "Failed to map error addr to a csrow"; 2534 break; 2535 case ERR_CHANNEL: 2536 string = "Unknown syndrome - possible error reporting race"; 2537 break; 2538 case ERR_SYND: 2539 string = "MCA_SYND not valid - unknown syndrome and csrow"; 2540 break; 2541 case ERR_NORM_ADDR: 2542 string = "Cannot decode normalized address"; 2543 break; 2544 default: 2545 string = "WTF error"; 2546 break; 2547 } 2548 2549 edac_mc_handle_error(err_type, mci, 1, 2550 err->page, err->offset, err->syndrome, 2551 err->csrow, err->channel, -1, 2552 string, ""); 2553 } 2554 2555 static inline void decode_bus_error(int node_id, struct mce *m) 2556 { 2557 struct mem_ctl_info *mci; 2558 struct amd64_pvt *pvt; 2559 u8 ecc_type = (m->status >> 45) & 0x3; 2560 u8 xec = XEC(m->status, 0x1f); 2561 u16 ec = EC(m->status); 2562 u64 sys_addr; 2563 struct err_info err; 2564 2565 mci = edac_mc_find(node_id); 2566 if (!mci) 2567 return; 2568 2569 pvt = mci->pvt_info; 2570 2571 /* Bail out early if this was an 'observed' error */ 2572 if (PP(ec) == NBSL_PP_OBS) 2573 return; 2574 2575 /* Do only ECC errors */ 2576 if (xec && xec != F10_NBSL_EXT_ERR_ECC) 2577 return; 2578 2579 memset(&err, 0, sizeof(err)); 2580 2581 sys_addr = get_error_address(pvt, m); 2582 2583 if (ecc_type == 2) 2584 err.syndrome = extract_syndrome(m->status); 2585 2586 pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err); 2587 2588 __log_ecc_error(mci, &err, ecc_type); 2589 } 2590 2591 /* 2592 * To find the UMC channel represented by this bank we need to match on its 2593 * instance_id. The instance_id of a bank is held in the lower 32 bits of its 2594 * IPID. 2595 * 2596 * Currently, we can derive the channel number by looking at the 6th nibble in 2597 * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel 2598 * number. 2599 */ 2600 static int find_umc_channel(struct mce *m) 2601 { 2602 return (m->ipid & GENMASK(31, 0)) >> 20; 2603 } 2604 2605 static void decode_umc_error(int node_id, struct mce *m) 2606 { 2607 u8 ecc_type = (m->status >> 45) & 0x3; 2608 struct mem_ctl_info *mci; 2609 struct amd64_pvt *pvt; 2610 struct err_info err; 2611 u64 sys_addr; 2612 2613 mci = edac_mc_find(node_id); 2614 if (!mci) 2615 return; 2616 2617 pvt = mci->pvt_info; 2618 2619 memset(&err, 0, sizeof(err)); 2620 2621 if (m->status & MCI_STATUS_DEFERRED) 2622 ecc_type = 3; 2623 2624 err.channel = find_umc_channel(m); 2625 2626 if (!(m->status & MCI_STATUS_SYNDV)) { 2627 err.err_code = ERR_SYND; 2628 goto log_error; 2629 } 2630 2631 if (ecc_type == 2) { 2632 u8 length = (m->synd >> 18) & 0x3f; 2633 2634 if (length) 2635 err.syndrome = (m->synd >> 32) & GENMASK(length - 1, 0); 2636 else 2637 err.err_code = ERR_CHANNEL; 2638 } 2639 2640 err.csrow = m->synd & 0x7; 2641 2642 if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { 2643 err.err_code = ERR_NORM_ADDR; 2644 goto log_error; 2645 } 2646 2647 error_address_to_page_and_offset(sys_addr, &err); 2648 2649 log_error: 2650 __log_ecc_error(mci, &err, ecc_type); 2651 } 2652 2653 /* 2654 * Use pvt->F3 which contains the F3 CPU PCI device to get the related 2655 * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error. 2656 * Reserve F0 and F6 on systems with a UMC. 2657 */ 2658 static int 2659 reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2) 2660 { 2661 if (pvt->umc) { 2662 pvt->F0 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3); 2663 if (!pvt->F0) { 2664 amd64_err("F0 not found, device 0x%x (broken BIOS?)\n", pci_id1); 2665 return -ENODEV; 2666 } 2667 2668 pvt->F6 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3); 2669 if (!pvt->F6) { 2670 pci_dev_put(pvt->F0); 2671 pvt->F0 = NULL; 2672 2673 amd64_err("F6 not found: device 0x%x (broken BIOS?)\n", pci_id2); 2674 return -ENODEV; 2675 } 2676 2677 edac_dbg(1, "F0: %s\n", pci_name(pvt->F0)); 2678 edac_dbg(1, "F3: %s\n", pci_name(pvt->F3)); 2679 edac_dbg(1, "F6: %s\n", pci_name(pvt->F6)); 2680 2681 return 0; 2682 } 2683 2684 /* Reserve the ADDRESS MAP Device */ 2685 pvt->F1 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3); 2686 if (!pvt->F1) { 2687 amd64_err("F1 not found: device 0x%x (broken BIOS?)\n", pci_id1); 2688 return -ENODEV; 2689 } 2690 2691 /* Reserve the DCT Device */ 2692 pvt->F2 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3); 2693 if (!pvt->F2) { 2694 pci_dev_put(pvt->F1); 2695 pvt->F1 = NULL; 2696 2697 amd64_err("F2 not found: device 0x%x (broken BIOS?)\n", pci_id2); 2698 return -ENODEV; 2699 } 2700 2701 edac_dbg(1, "F1: %s\n", pci_name(pvt->F1)); 2702 edac_dbg(1, "F2: %s\n", pci_name(pvt->F2)); 2703 edac_dbg(1, "F3: %s\n", pci_name(pvt->F3)); 2704 2705 return 0; 2706 } 2707 2708 static void free_mc_sibling_devs(struct amd64_pvt *pvt) 2709 { 2710 if (pvt->umc) { 2711 pci_dev_put(pvt->F0); 2712 pci_dev_put(pvt->F6); 2713 } else { 2714 pci_dev_put(pvt->F1); 2715 pci_dev_put(pvt->F2); 2716 } 2717 } 2718 2719 static void determine_ecc_sym_sz(struct amd64_pvt *pvt) 2720 { 2721 pvt->ecc_sym_sz = 4; 2722 2723 if (pvt->umc) { 2724 u8 i; 2725 2726 for_each_umc(i) { 2727 /* Check enabled channels only: */ 2728 if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { 2729 if (pvt->umc[i].ecc_ctrl & BIT(9)) { 2730 pvt->ecc_sym_sz = 16; 2731 return; 2732 } else if (pvt->umc[i].ecc_ctrl & BIT(7)) { 2733 pvt->ecc_sym_sz = 8; 2734 return; 2735 } 2736 } 2737 } 2738 } else if (pvt->fam >= 0x10) { 2739 u32 tmp; 2740 2741 amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); 2742 /* F16h has only DCT0, so no need to read dbam1. */ 2743 if (pvt->fam != 0x16) 2744 amd64_read_dct_pci_cfg(pvt, 1, DBAM0, &pvt->dbam1); 2745 2746 /* F10h, revD and later can do x8 ECC too. */ 2747 if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25)) 2748 pvt->ecc_sym_sz = 8; 2749 } 2750 } 2751 2752 /* 2753 * Retrieve the hardware registers of the memory controller. 2754 */ 2755 static void __read_mc_regs_df(struct amd64_pvt *pvt) 2756 { 2757 u8 nid = pvt->mc_node_id; 2758 struct amd64_umc *umc; 2759 u32 i, umc_base; 2760 2761 /* Read registers from each UMC */ 2762 for_each_umc(i) { 2763 2764 umc_base = get_umc_base(i); 2765 umc = &pvt->umc[i]; 2766 2767 amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg); 2768 amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg); 2769 amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl); 2770 amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl); 2771 amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi); 2772 } 2773 } 2774 2775 /* 2776 * Retrieve the hardware registers of the memory controller (this includes the 2777 * 'Address Map' and 'Misc' device regs) 2778 */ 2779 static void read_mc_regs(struct amd64_pvt *pvt) 2780 { 2781 unsigned int range; 2782 u64 msr_val; 2783 2784 /* 2785 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since 2786 * those are Read-As-Zero. 2787 */ 2788 rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem); 2789 edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem); 2790 2791 /* Check first whether TOP_MEM2 is enabled: */ 2792 rdmsrl(MSR_K8_SYSCFG, msr_val); 2793 if (msr_val & BIT(21)) { 2794 rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2); 2795 edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2); 2796 } else { 2797 edac_dbg(0, " TOP_MEM2 disabled\n"); 2798 } 2799 2800 if (pvt->umc) { 2801 __read_mc_regs_df(pvt); 2802 amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar); 2803 2804 goto skip; 2805 } 2806 2807 amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap); 2808 2809 read_dram_ctl_register(pvt); 2810 2811 for (range = 0; range < DRAM_RANGES; range++) { 2812 u8 rw; 2813 2814 /* read settings for this DRAM range */ 2815 read_dram_base_limit_regs(pvt, range); 2816 2817 rw = dram_rw(pvt, range); 2818 if (!rw) 2819 continue; 2820 2821 edac_dbg(1, " DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n", 2822 range, 2823 get_dram_base(pvt, range), 2824 get_dram_limit(pvt, range)); 2825 2826 edac_dbg(1, " IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n", 2827 dram_intlv_en(pvt, range) ? "Enabled" : "Disabled", 2828 (rw & 0x1) ? "R" : "-", 2829 (rw & 0x2) ? "W" : "-", 2830 dram_intlv_sel(pvt, range), 2831 dram_dst_node(pvt, range)); 2832 } 2833 2834 amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar); 2835 amd64_read_dct_pci_cfg(pvt, 0, DBAM0, &pvt->dbam0); 2836 2837 amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare); 2838 2839 amd64_read_dct_pci_cfg(pvt, 0, DCLR0, &pvt->dclr0); 2840 amd64_read_dct_pci_cfg(pvt, 0, DCHR0, &pvt->dchr0); 2841 2842 if (!dct_ganging_enabled(pvt)) { 2843 amd64_read_dct_pci_cfg(pvt, 1, DCLR0, &pvt->dclr1); 2844 amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1); 2845 } 2846 2847 skip: 2848 read_dct_base_mask(pvt); 2849 2850 determine_memory_type(pvt); 2851 edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); 2852 2853 determine_ecc_sym_sz(pvt); 2854 } 2855 2856 /* 2857 * NOTE: CPU Revision Dependent code 2858 * 2859 * Input: 2860 * @csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1) 2861 * k8 private pointer to --> 2862 * DRAM Bank Address mapping register 2863 * node_id 2864 * DCL register where dual_channel_active is 2865 * 2866 * The DBAM register consists of 4 sets of 4 bits each definitions: 2867 * 2868 * Bits: CSROWs 2869 * 0-3 CSROWs 0 and 1 2870 * 4-7 CSROWs 2 and 3 2871 * 8-11 CSROWs 4 and 5 2872 * 12-15 CSROWs 6 and 7 2873 * 2874 * Values range from: 0 to 15 2875 * The meaning of the values depends on CPU revision and dual-channel state, 2876 * see relevant BKDG more info. 2877 * 2878 * The memory controller provides for total of only 8 CSROWs in its current 2879 * architecture. Each "pair" of CSROWs normally represents just one DIMM in 2880 * single channel or two (2) DIMMs in dual channel mode. 2881 * 2882 * The following code logic collapses the various tables for CSROW based on CPU 2883 * revision. 2884 * 2885 * Returns: 2886 * The number of PAGE_SIZE pages on the specified CSROW number it 2887 * encompasses 2888 * 2889 */ 2890 static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) 2891 { 2892 u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; 2893 int csrow_nr = csrow_nr_orig; 2894 u32 cs_mode, nr_pages; 2895 2896 if (!pvt->umc) { 2897 csrow_nr >>= 1; 2898 cs_mode = DBAM_DIMM(csrow_nr, dbam); 2899 } else { 2900 cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt); 2901 } 2902 2903 nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); 2904 nr_pages <<= 20 - PAGE_SHIFT; 2905 2906 edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", 2907 csrow_nr_orig, dct, cs_mode); 2908 edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); 2909 2910 return nr_pages; 2911 } 2912 2913 static int init_csrows_df(struct mem_ctl_info *mci) 2914 { 2915 struct amd64_pvt *pvt = mci->pvt_info; 2916 enum edac_type edac_mode = EDAC_NONE; 2917 enum dev_type dev_type = DEV_UNKNOWN; 2918 struct dimm_info *dimm; 2919 int empty = 1; 2920 u8 umc, cs; 2921 2922 if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) { 2923 edac_mode = EDAC_S16ECD16ED; 2924 dev_type = DEV_X16; 2925 } else if (mci->edac_ctl_cap & EDAC_FLAG_S8ECD8ED) { 2926 edac_mode = EDAC_S8ECD8ED; 2927 dev_type = DEV_X8; 2928 } else if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) { 2929 edac_mode = EDAC_S4ECD4ED; 2930 dev_type = DEV_X4; 2931 } else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) { 2932 edac_mode = EDAC_SECDED; 2933 } 2934 2935 for_each_umc(umc) { 2936 for_each_chip_select(cs, umc, pvt) { 2937 if (!csrow_enabled(cs, umc, pvt)) 2938 continue; 2939 2940 empty = 0; 2941 dimm = mci->csrows[cs]->channels[umc]->dimm; 2942 2943 edac_dbg(1, "MC node: %d, csrow: %d\n", 2944 pvt->mc_node_id, cs); 2945 2946 dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); 2947 dimm->mtype = pvt->dram_type; 2948 dimm->edac_mode = edac_mode; 2949 dimm->dtype = dev_type; 2950 dimm->grain = 64; 2951 } 2952 } 2953 2954 return empty; 2955 } 2956 2957 /* 2958 * Initialize the array of csrow attribute instances, based on the values 2959 * from pci config hardware registers. 2960 */ 2961 static int init_csrows(struct mem_ctl_info *mci) 2962 { 2963 struct amd64_pvt *pvt = mci->pvt_info; 2964 enum edac_type edac_mode = EDAC_NONE; 2965 struct csrow_info *csrow; 2966 struct dimm_info *dimm; 2967 int i, j, empty = 1; 2968 int nr_pages = 0; 2969 u32 val; 2970 2971 if (pvt->umc) 2972 return init_csrows_df(mci); 2973 2974 amd64_read_pci_cfg(pvt->F3, NBCFG, &val); 2975 2976 pvt->nbcfg = val; 2977 2978 edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", 2979 pvt->mc_node_id, val, 2980 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); 2981 2982 /* 2983 * We iterate over DCT0 here but we look at DCT1 in parallel, if needed. 2984 */ 2985 for_each_chip_select(i, 0, pvt) { 2986 bool row_dct0 = !!csrow_enabled(i, 0, pvt); 2987 bool row_dct1 = false; 2988 2989 if (pvt->fam != 0xf) 2990 row_dct1 = !!csrow_enabled(i, 1, pvt); 2991 2992 if (!row_dct0 && !row_dct1) 2993 continue; 2994 2995 csrow = mci->csrows[i]; 2996 empty = 0; 2997 2998 edac_dbg(1, "MC node: %d, csrow: %d\n", 2999 pvt->mc_node_id, i); 3000 3001 if (row_dct0) { 3002 nr_pages = get_csrow_nr_pages(pvt, 0, i); 3003 csrow->channels[0]->dimm->nr_pages = nr_pages; 3004 } 3005 3006 /* K8 has only one DCT */ 3007 if (pvt->fam != 0xf && row_dct1) { 3008 int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i); 3009 3010 csrow->channels[1]->dimm->nr_pages = row_dct1_pages; 3011 nr_pages += row_dct1_pages; 3012 } 3013 3014 edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); 3015 3016 /* Determine DIMM ECC mode: */ 3017 if (pvt->nbcfg & NBCFG_ECC_ENABLE) { 3018 edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) 3019 ? EDAC_S4ECD4ED 3020 : EDAC_SECDED; 3021 } 3022 3023 for (j = 0; j < pvt->channel_count; j++) { 3024 dimm = csrow->channels[j]->dimm; 3025 dimm->mtype = pvt->dram_type; 3026 dimm->edac_mode = edac_mode; 3027 dimm->grain = 64; 3028 } 3029 } 3030 3031 return empty; 3032 } 3033 3034 /* get all cores on this DCT */ 3035 static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) 3036 { 3037 int cpu; 3038 3039 for_each_online_cpu(cpu) 3040 if (amd_get_nb_id(cpu) == nid) 3041 cpumask_set_cpu(cpu, mask); 3042 } 3043 3044 /* check MCG_CTL on all the cpus on this node */ 3045 static bool nb_mce_bank_enabled_on_node(u16 nid) 3046 { 3047 cpumask_var_t mask; 3048 int cpu, nbe; 3049 bool ret = false; 3050 3051 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 3052 amd64_warn("%s: Error allocating mask\n", __func__); 3053 return false; 3054 } 3055 3056 get_cpus_on_this_dct_cpumask(mask, nid); 3057 3058 rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs); 3059 3060 for_each_cpu(cpu, mask) { 3061 struct msr *reg = per_cpu_ptr(msrs, cpu); 3062 nbe = reg->l & MSR_MCGCTL_NBE; 3063 3064 edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", 3065 cpu, reg->q, 3066 (nbe ? "enabled" : "disabled")); 3067 3068 if (!nbe) 3069 goto out; 3070 } 3071 ret = true; 3072 3073 out: 3074 free_cpumask_var(mask); 3075 return ret; 3076 } 3077 3078 static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) 3079 { 3080 cpumask_var_t cmask; 3081 int cpu; 3082 3083 if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) { 3084 amd64_warn("%s: error allocating mask\n", __func__); 3085 return -ENOMEM; 3086 } 3087 3088 get_cpus_on_this_dct_cpumask(cmask, nid); 3089 3090 rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); 3091 3092 for_each_cpu(cpu, cmask) { 3093 3094 struct msr *reg = per_cpu_ptr(msrs, cpu); 3095 3096 if (on) { 3097 if (reg->l & MSR_MCGCTL_NBE) 3098 s->flags.nb_mce_enable = 1; 3099 3100 reg->l |= MSR_MCGCTL_NBE; 3101 } else { 3102 /* 3103 * Turn off NB MCE reporting only when it was off before 3104 */ 3105 if (!s->flags.nb_mce_enable) 3106 reg->l &= ~MSR_MCGCTL_NBE; 3107 } 3108 } 3109 wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); 3110 3111 free_cpumask_var(cmask); 3112 3113 return 0; 3114 } 3115 3116 static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, 3117 struct pci_dev *F3) 3118 { 3119 bool ret = true; 3120 u32 value, mask = 0x3; /* UECC/CECC enable */ 3121 3122 if (toggle_ecc_err_reporting(s, nid, ON)) { 3123 amd64_warn("Error enabling ECC reporting over MCGCTL!\n"); 3124 return false; 3125 } 3126 3127 amd64_read_pci_cfg(F3, NBCTL, &value); 3128 3129 s->old_nbctl = value & mask; 3130 s->nbctl_valid = true; 3131 3132 value |= mask; 3133 amd64_write_pci_cfg(F3, NBCTL, value); 3134 3135 amd64_read_pci_cfg(F3, NBCFG, &value); 3136 3137 edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n", 3138 nid, value, !!(value & NBCFG_ECC_ENABLE)); 3139 3140 if (!(value & NBCFG_ECC_ENABLE)) { 3141 amd64_warn("DRAM ECC disabled on this node, enabling...\n"); 3142 3143 s->flags.nb_ecc_prev = 0; 3144 3145 /* Attempt to turn on DRAM ECC Enable */ 3146 value |= NBCFG_ECC_ENABLE; 3147 amd64_write_pci_cfg(F3, NBCFG, value); 3148 3149 amd64_read_pci_cfg(F3, NBCFG, &value); 3150 3151 if (!(value & NBCFG_ECC_ENABLE)) { 3152 amd64_warn("Hardware rejected DRAM ECC enable," 3153 "check memory DIMM configuration.\n"); 3154 ret = false; 3155 } else { 3156 amd64_info("Hardware accepted DRAM ECC Enable\n"); 3157 } 3158 } else { 3159 s->flags.nb_ecc_prev = 1; 3160 } 3161 3162 edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n", 3163 nid, value, !!(value & NBCFG_ECC_ENABLE)); 3164 3165 return ret; 3166 } 3167 3168 static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, 3169 struct pci_dev *F3) 3170 { 3171 u32 value, mask = 0x3; /* UECC/CECC enable */ 3172 3173 if (!s->nbctl_valid) 3174 return; 3175 3176 amd64_read_pci_cfg(F3, NBCTL, &value); 3177 value &= ~mask; 3178 value |= s->old_nbctl; 3179 3180 amd64_write_pci_cfg(F3, NBCTL, value); 3181 3182 /* restore previous BIOS DRAM ECC "off" setting we force-enabled */ 3183 if (!s->flags.nb_ecc_prev) { 3184 amd64_read_pci_cfg(F3, NBCFG, &value); 3185 value &= ~NBCFG_ECC_ENABLE; 3186 amd64_write_pci_cfg(F3, NBCFG, value); 3187 } 3188 3189 /* restore the NB Enable MCGCTL bit */ 3190 if (toggle_ecc_err_reporting(s, nid, OFF)) 3191 amd64_warn("Error restoring NB MCGCTL settings!\n"); 3192 } 3193 3194 static bool ecc_enabled(struct amd64_pvt *pvt) 3195 { 3196 u16 nid = pvt->mc_node_id; 3197 bool nb_mce_en = false; 3198 u8 ecc_en = 0, i; 3199 u32 value; 3200 3201 if (boot_cpu_data.x86 >= 0x17) { 3202 u8 umc_en_mask = 0, ecc_en_mask = 0; 3203 struct amd64_umc *umc; 3204 3205 for_each_umc(i) { 3206 umc = &pvt->umc[i]; 3207 3208 /* Only check enabled UMCs. */ 3209 if (!(umc->sdp_ctrl & UMC_SDP_INIT)) 3210 continue; 3211 3212 umc_en_mask |= BIT(i); 3213 3214 if (umc->umc_cap_hi & UMC_ECC_ENABLED) 3215 ecc_en_mask |= BIT(i); 3216 } 3217 3218 /* Check whether at least one UMC is enabled: */ 3219 if (umc_en_mask) 3220 ecc_en = umc_en_mask == ecc_en_mask; 3221 else 3222 edac_dbg(0, "Node %d: No enabled UMCs.\n", nid); 3223 3224 /* Assume UMC MCA banks are enabled. */ 3225 nb_mce_en = true; 3226 } else { 3227 amd64_read_pci_cfg(pvt->F3, NBCFG, &value); 3228 3229 ecc_en = !!(value & NBCFG_ECC_ENABLE); 3230 3231 nb_mce_en = nb_mce_bank_enabled_on_node(nid); 3232 if (!nb_mce_en) 3233 edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n", 3234 MSR_IA32_MCG_CTL, nid); 3235 } 3236 3237 amd64_info("Node %d: DRAM ECC %s.\n", 3238 nid, (ecc_en ? "enabled" : "disabled")); 3239 3240 if (!ecc_en || !nb_mce_en) 3241 return false; 3242 else 3243 return true; 3244 } 3245 3246 static inline void 3247 f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) 3248 { 3249 u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1; 3250 3251 for_each_umc(i) { 3252 if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { 3253 ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED); 3254 cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP); 3255 3256 dev_x4 &= !!(pvt->umc[i].dimm_cfg & BIT(6)); 3257 dev_x16 &= !!(pvt->umc[i].dimm_cfg & BIT(7)); 3258 } 3259 } 3260 3261 /* Set chipkill only if ECC is enabled: */ 3262 if (ecc_en) { 3263 mci->edac_ctl_cap |= EDAC_FLAG_SECDED; 3264 3265 if (!cpk_en) 3266 return; 3267 3268 if (dev_x4) 3269 mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; 3270 else if (dev_x16) 3271 mci->edac_ctl_cap |= EDAC_FLAG_S16ECD16ED; 3272 else 3273 mci->edac_ctl_cap |= EDAC_FLAG_S8ECD8ED; 3274 } 3275 } 3276 3277 static void setup_mci_misc_attrs(struct mem_ctl_info *mci) 3278 { 3279 struct amd64_pvt *pvt = mci->pvt_info; 3280 3281 mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2; 3282 mci->edac_ctl_cap = EDAC_FLAG_NONE; 3283 3284 if (pvt->umc) { 3285 f17h_determine_edac_ctl_cap(mci, pvt); 3286 } else { 3287 if (pvt->nbcap & NBCAP_SECDED) 3288 mci->edac_ctl_cap |= EDAC_FLAG_SECDED; 3289 3290 if (pvt->nbcap & NBCAP_CHIPKILL) 3291 mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; 3292 } 3293 3294 mci->edac_cap = determine_edac_cap(pvt); 3295 mci->mod_name = EDAC_MOD_STR; 3296 mci->ctl_name = fam_type->ctl_name; 3297 mci->dev_name = pci_name(pvt->F3); 3298 mci->ctl_page_to_phys = NULL; 3299 3300 /* memory scrubber interface */ 3301 mci->set_sdram_scrub_rate = set_scrub_rate; 3302 mci->get_sdram_scrub_rate = get_scrub_rate; 3303 } 3304 3305 /* 3306 * returns a pointer to the family descriptor on success, NULL otherwise. 3307 */ 3308 static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) 3309 { 3310 pvt->ext_model = boot_cpu_data.x86_model >> 4; 3311 pvt->stepping = boot_cpu_data.x86_stepping; 3312 pvt->model = boot_cpu_data.x86_model; 3313 pvt->fam = boot_cpu_data.x86; 3314 3315 switch (pvt->fam) { 3316 case 0xf: 3317 fam_type = &family_types[K8_CPUS]; 3318 pvt->ops = &family_types[K8_CPUS].ops; 3319 break; 3320 3321 case 0x10: 3322 fam_type = &family_types[F10_CPUS]; 3323 pvt->ops = &family_types[F10_CPUS].ops; 3324 break; 3325 3326 case 0x15: 3327 if (pvt->model == 0x30) { 3328 fam_type = &family_types[F15_M30H_CPUS]; 3329 pvt->ops = &family_types[F15_M30H_CPUS].ops; 3330 break; 3331 } else if (pvt->model == 0x60) { 3332 fam_type = &family_types[F15_M60H_CPUS]; 3333 pvt->ops = &family_types[F15_M60H_CPUS].ops; 3334 break; 3335 } 3336 3337 fam_type = &family_types[F15_CPUS]; 3338 pvt->ops = &family_types[F15_CPUS].ops; 3339 break; 3340 3341 case 0x16: 3342 if (pvt->model == 0x30) { 3343 fam_type = &family_types[F16_M30H_CPUS]; 3344 pvt->ops = &family_types[F16_M30H_CPUS].ops; 3345 break; 3346 } 3347 fam_type = &family_types[F16_CPUS]; 3348 pvt->ops = &family_types[F16_CPUS].ops; 3349 break; 3350 3351 case 0x17: 3352 if (pvt->model >= 0x10 && pvt->model <= 0x2f) { 3353 fam_type = &family_types[F17_M10H_CPUS]; 3354 pvt->ops = &family_types[F17_M10H_CPUS].ops; 3355 break; 3356 } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) { 3357 fam_type = &family_types[F17_M30H_CPUS]; 3358 pvt->ops = &family_types[F17_M30H_CPUS].ops; 3359 break; 3360 } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) { 3361 fam_type = &family_types[F17_M70H_CPUS]; 3362 pvt->ops = &family_types[F17_M70H_CPUS].ops; 3363 break; 3364 } 3365 /* fall through */ 3366 case 0x18: 3367 fam_type = &family_types[F17_CPUS]; 3368 pvt->ops = &family_types[F17_CPUS].ops; 3369 3370 if (pvt->fam == 0x18) 3371 family_types[F17_CPUS].ctl_name = "F18h"; 3372 break; 3373 3374 case 0x19: 3375 fam_type = &family_types[F19_CPUS]; 3376 pvt->ops = &family_types[F19_CPUS].ops; 3377 family_types[F19_CPUS].ctl_name = "F19h"; 3378 break; 3379 3380 default: 3381 amd64_err("Unsupported family!\n"); 3382 return NULL; 3383 } 3384 3385 amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name, 3386 (pvt->fam == 0xf ? 3387 (pvt->ext_model >= K8_REV_F ? "revF or later " 3388 : "revE or earlier ") 3389 : ""), pvt->mc_node_id); 3390 return fam_type; 3391 } 3392 3393 static const struct attribute_group *amd64_edac_attr_groups[] = { 3394 #ifdef CONFIG_EDAC_DEBUG 3395 &amd64_edac_dbg_group, 3396 #endif 3397 #ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION 3398 &amd64_edac_inj_group, 3399 #endif 3400 NULL 3401 }; 3402 3403 static int hw_info_get(struct amd64_pvt *pvt) 3404 { 3405 u16 pci_id1, pci_id2; 3406 int ret = -EINVAL; 3407 3408 if (pvt->fam >= 0x17) { 3409 pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); 3410 if (!pvt->umc) 3411 return -ENOMEM; 3412 3413 pci_id1 = fam_type->f0_id; 3414 pci_id2 = fam_type->f6_id; 3415 } else { 3416 pci_id1 = fam_type->f1_id; 3417 pci_id2 = fam_type->f2_id; 3418 } 3419 3420 ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); 3421 if (ret) 3422 return ret; 3423 3424 read_mc_regs(pvt); 3425 3426 return 0; 3427 } 3428 3429 static void hw_info_put(struct amd64_pvt *pvt) 3430 { 3431 if (pvt->F0 || pvt->F1) 3432 free_mc_sibling_devs(pvt); 3433 3434 kfree(pvt->umc); 3435 } 3436 3437 static int init_one_instance(struct amd64_pvt *pvt) 3438 { 3439 struct mem_ctl_info *mci = NULL; 3440 struct edac_mc_layer layers[2]; 3441 int ret = -EINVAL; 3442 3443 /* 3444 * We need to determine how many memory channels there are. Then use 3445 * that information for calculating the size of the dynamic instance 3446 * tables in the 'mci' structure. 3447 */ 3448 pvt->channel_count = pvt->ops->early_channel_count(pvt); 3449 if (pvt->channel_count < 0) 3450 return ret; 3451 3452 ret = -ENOMEM; 3453 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; 3454 layers[0].size = pvt->csels[0].b_cnt; 3455 layers[0].is_virt_csrow = true; 3456 layers[1].type = EDAC_MC_LAYER_CHANNEL; 3457 3458 /* 3459 * Always allocate two channels since we can have setups with DIMMs on 3460 * only one channel. Also, this simplifies handling later for the price 3461 * of a couple of KBs tops. 3462 */ 3463 layers[1].size = fam_type->max_mcs; 3464 layers[1].is_virt_csrow = false; 3465 3466 mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); 3467 if (!mci) 3468 return ret; 3469 3470 mci->pvt_info = pvt; 3471 mci->pdev = &pvt->F3->dev; 3472 3473 setup_mci_misc_attrs(mci); 3474 3475 if (init_csrows(mci)) 3476 mci->edac_cap = EDAC_FLAG_NONE; 3477 3478 ret = -ENODEV; 3479 if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) { 3480 edac_dbg(1, "failed edac_mc_add_mc()\n"); 3481 edac_mc_free(mci); 3482 return ret; 3483 } 3484 3485 return 0; 3486 } 3487 3488 static bool instance_has_memory(struct amd64_pvt *pvt) 3489 { 3490 bool cs_enabled = false; 3491 int cs = 0, dct = 0; 3492 3493 for (dct = 0; dct < fam_type->max_mcs; dct++) { 3494 for_each_chip_select(cs, dct, pvt) 3495 cs_enabled |= csrow_enabled(cs, dct, pvt); 3496 } 3497 3498 return cs_enabled; 3499 } 3500 3501 static int probe_one_instance(unsigned int nid) 3502 { 3503 struct pci_dev *F3 = node_to_amd_nb(nid)->misc; 3504 struct amd64_pvt *pvt = NULL; 3505 struct ecc_settings *s; 3506 int ret; 3507 3508 ret = -ENOMEM; 3509 s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); 3510 if (!s) 3511 goto err_out; 3512 3513 ecc_stngs[nid] = s; 3514 3515 pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); 3516 if (!pvt) 3517 goto err_settings; 3518 3519 pvt->mc_node_id = nid; 3520 pvt->F3 = F3; 3521 3522 fam_type = per_family_init(pvt); 3523 if (!fam_type) 3524 goto err_enable; 3525 3526 ret = hw_info_get(pvt); 3527 if (ret < 0) 3528 goto err_enable; 3529 3530 ret = 0; 3531 if (!instance_has_memory(pvt)) { 3532 amd64_info("Node %d: No DIMMs detected.\n", nid); 3533 goto err_enable; 3534 } 3535 3536 if (!ecc_enabled(pvt)) { 3537 ret = -ENODEV; 3538 3539 if (!ecc_enable_override) 3540 goto err_enable; 3541 3542 if (boot_cpu_data.x86 >= 0x17) { 3543 amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS."); 3544 goto err_enable; 3545 } else 3546 amd64_warn("Forcing ECC on!\n"); 3547 3548 if (!enable_ecc_error_reporting(s, nid, F3)) 3549 goto err_enable; 3550 } 3551 3552 ret = init_one_instance(pvt); 3553 if (ret < 0) { 3554 amd64_err("Error probing instance: %d\n", nid); 3555 3556 if (boot_cpu_data.x86 < 0x17) 3557 restore_ecc_error_reporting(s, nid, F3); 3558 3559 goto err_enable; 3560 } 3561 3562 dump_misc_regs(pvt); 3563 3564 return ret; 3565 3566 err_enable: 3567 hw_info_put(pvt); 3568 kfree(pvt); 3569 3570 err_settings: 3571 kfree(s); 3572 ecc_stngs[nid] = NULL; 3573 3574 err_out: 3575 return ret; 3576 } 3577 3578 static void remove_one_instance(unsigned int nid) 3579 { 3580 struct pci_dev *F3 = node_to_amd_nb(nid)->misc; 3581 struct ecc_settings *s = ecc_stngs[nid]; 3582 struct mem_ctl_info *mci; 3583 struct amd64_pvt *pvt; 3584 3585 /* Remove from EDAC CORE tracking list */ 3586 mci = edac_mc_del_mc(&F3->dev); 3587 if (!mci) 3588 return; 3589 3590 pvt = mci->pvt_info; 3591 3592 restore_ecc_error_reporting(s, nid, F3); 3593 3594 kfree(ecc_stngs[nid]); 3595 ecc_stngs[nid] = NULL; 3596 3597 /* Free the EDAC CORE resources */ 3598 mci->pvt_info = NULL; 3599 3600 hw_info_put(pvt); 3601 kfree(pvt); 3602 edac_mc_free(mci); 3603 } 3604 3605 static void setup_pci_device(void) 3606 { 3607 struct mem_ctl_info *mci; 3608 struct amd64_pvt *pvt; 3609 3610 if (pci_ctl) 3611 return; 3612 3613 mci = edac_mc_find(0); 3614 if (!mci) 3615 return; 3616 3617 pvt = mci->pvt_info; 3618 if (pvt->umc) 3619 pci_ctl = edac_pci_create_generic_ctl(&pvt->F0->dev, EDAC_MOD_STR); 3620 else 3621 pci_ctl = edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR); 3622 if (!pci_ctl) { 3623 pr_warn("%s(): Unable to create PCI control\n", __func__); 3624 pr_warn("%s(): PCI error report via EDAC not set\n", __func__); 3625 } 3626 } 3627 3628 static const struct x86_cpu_id amd64_cpuids[] = { 3629 X86_MATCH_VENDOR_FAM(AMD, 0x0F, NULL), 3630 X86_MATCH_VENDOR_FAM(AMD, 0x10, NULL), 3631 X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL), 3632 X86_MATCH_VENDOR_FAM(AMD, 0x16, NULL), 3633 X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL), 3634 X86_MATCH_VENDOR_FAM(HYGON, 0x18, NULL), 3635 X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), 3636 { } 3637 }; 3638 MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); 3639 3640 static int __init amd64_edac_init(void) 3641 { 3642 const char *owner; 3643 int err = -ENODEV; 3644 int i; 3645 3646 owner = edac_get_owner(); 3647 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) 3648 return -EBUSY; 3649 3650 if (!x86_match_cpu(amd64_cpuids)) 3651 return -ENODEV; 3652 3653 if (amd_cache_northbridges() < 0) 3654 return -ENODEV; 3655 3656 opstate_init(); 3657 3658 err = -ENOMEM; 3659 ecc_stngs = kcalloc(amd_nb_num(), sizeof(ecc_stngs[0]), GFP_KERNEL); 3660 if (!ecc_stngs) 3661 goto err_free; 3662 3663 msrs = msrs_alloc(); 3664 if (!msrs) 3665 goto err_free; 3666 3667 for (i = 0; i < amd_nb_num(); i++) { 3668 err = probe_one_instance(i); 3669 if (err) { 3670 /* unwind properly */ 3671 while (--i >= 0) 3672 remove_one_instance(i); 3673 3674 goto err_pci; 3675 } 3676 } 3677 3678 if (!edac_has_mcs()) { 3679 err = -ENODEV; 3680 goto err_pci; 3681 } 3682 3683 /* register stuff with EDAC MCE */ 3684 if (report_gart_errors) 3685 amd_report_gart_errors(true); 3686 3687 if (boot_cpu_data.x86 >= 0x17) 3688 amd_register_ecc_decoder(decode_umc_error); 3689 else 3690 amd_register_ecc_decoder(decode_bus_error); 3691 3692 setup_pci_device(); 3693 3694 #ifdef CONFIG_X86_32 3695 amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR); 3696 #endif 3697 3698 printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION); 3699 3700 return 0; 3701 3702 err_pci: 3703 msrs_free(msrs); 3704 msrs = NULL; 3705 3706 err_free: 3707 kfree(ecc_stngs); 3708 ecc_stngs = NULL; 3709 3710 return err; 3711 } 3712 3713 static void __exit amd64_edac_exit(void) 3714 { 3715 int i; 3716 3717 if (pci_ctl) 3718 edac_pci_release_generic_ctl(pci_ctl); 3719 3720 /* unregister from EDAC MCE */ 3721 amd_report_gart_errors(false); 3722 3723 if (boot_cpu_data.x86 >= 0x17) 3724 amd_unregister_ecc_decoder(decode_umc_error); 3725 else 3726 amd_unregister_ecc_decoder(decode_bus_error); 3727 3728 for (i = 0; i < amd_nb_num(); i++) 3729 remove_one_instance(i); 3730 3731 kfree(ecc_stngs); 3732 ecc_stngs = NULL; 3733 3734 msrs_free(msrs); 3735 msrs = NULL; 3736 } 3737 3738 module_init(amd64_edac_init); 3739 module_exit(amd64_edac_exit); 3740 3741 MODULE_LICENSE("GPL"); 3742 MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, " 3743 "Dave Peterson, Thayne Harbaugh"); 3744 MODULE_DESCRIPTION("MC support for AMD64 memory controllers - " 3745 EDAC_AMD64_VERSION); 3746 3747 module_param(edac_op_state, int, 0444); 3748 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); 3749