1 /* 2 * Intel 3200/3210 Memory Controller kernel module 3 * Copyright (C) 2008-2009 Akamai Technologies, Inc. 4 * Portions by Hitoshi Mitake <h.mitake@gmail.com>. 5 * 6 * This file may be distributed under the terms of the 7 * GNU General Public License. 8 */ 9 10 #include <linux/module.h> 11 #include <linux/init.h> 12 #include <linux/pci.h> 13 #include <linux/pci_ids.h> 14 #include <linux/edac.h> 15 #include <linux/io.h> 16 #include "edac_core.h" 17 18 #define I3200_REVISION "1.1" 19 20 #define EDAC_MOD_STR "i3200_edac" 21 22 #define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0 23 24 #define I3200_RANKS 8 25 #define I3200_RANKS_PER_CHANNEL 4 26 #define I3200_CHANNELS 2 27 28 /* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */ 29 30 #define I3200_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */ 31 #define I3200_MCHBAR_HIGH 0x4c 32 #define I3200_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */ 33 #define I3200_MMR_WINDOW_SIZE 16384 34 35 #define I3200_TOM 0xa0 /* Top of Memory (16b) 36 * 37 * 15:10 reserved 38 * 9:0 total populated physical memory 39 */ 40 #define I3200_TOM_MASK 0x3ff /* bits 9:0 */ 41 #define I3200_TOM_SHIFT 26 /* 64MiB grain */ 42 43 #define I3200_ERRSTS 0xc8 /* Error Status Register (16b) 44 * 45 * 15 reserved 46 * 14 Isochronous TBWRR Run Behind FIFO Full 47 * (ITCV) 48 * 13 Isochronous TBWRR Run Behind FIFO Put 49 * (ITSTV) 50 * 12 reserved 51 * 11 MCH Thermal Sensor Event 52 * for SMI/SCI/SERR (GTSE) 53 * 10 reserved 54 * 9 LOCK to non-DRAM Memory Flag (LCKF) 55 * 8 reserved 56 * 7 DRAM Throttle Flag (DTF) 57 * 6:2 reserved 58 * 1 Multi-bit DRAM ECC Error Flag (DMERR) 59 * 0 Single-bit DRAM ECC Error Flag (DSERR) 60 */ 61 #define I3200_ERRSTS_UE 0x0002 62 #define I3200_ERRSTS_CE 0x0001 63 #define I3200_ERRSTS_BITS (I3200_ERRSTS_UE | I3200_ERRSTS_CE) 64 65 66 /* Intel MMIO register space - device 0 function 0 - MMR space */ 67 68 #define I3200_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4) 69 * 70 * 15:10 reserved 71 * 9:0 Channel 0 DRAM Rank Boundary Address 72 */ 73 #define I3200_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */ 74 #define I3200_DRB_MASK 0x3ff /* bits 9:0 */ 75 #define I3200_DRB_SHIFT 26 /* 64MiB grain */ 76 77 #define I3200_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b) 78 * 79 * 63:48 Error Column Address (ERRCOL) 80 * 47:32 Error Row Address (ERRROW) 81 * 31:29 Error Bank Address (ERRBANK) 82 * 28:27 Error Rank Address (ERRRANK) 83 * 26:24 reserved 84 * 23:16 Error Syndrome (ERRSYND) 85 * 15: 2 reserved 86 * 1 Multiple Bit Error Status (MERRSTS) 87 * 0 Correctable Error Status (CERRSTS) 88 */ 89 #define I3200_C1ECCERRLOG 0x680 /* Chan 1 ECC Error Log (64b) */ 90 #define I3200_ECCERRLOG_CE 0x1 91 #define I3200_ECCERRLOG_UE 0x2 92 #define I3200_ECCERRLOG_RANK_BITS 0x18000000 93 #define I3200_ECCERRLOG_RANK_SHIFT 27 94 #define I3200_ECCERRLOG_SYNDROME_BITS 0xff0000 95 #define I3200_ECCERRLOG_SYNDROME_SHIFT 16 96 #define I3200_CAPID0 0xe0 /* P.95 of spec for details */ 97 98 struct i3200_priv { 99 void __iomem *window; 100 }; 101 102 static int nr_channels; 103 104 static int how_many_channels(struct pci_dev *pdev) 105 { 106 unsigned char capid0_8b; /* 8th byte of CAPID0 */ 107 108 pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); 109 if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ 110 debugf0("In single channel mode.\n"); 111 return 1; 112 } else { 113 debugf0("In dual channel mode.\n"); 114 return 2; 115 } 116 } 117 118 static unsigned long eccerrlog_syndrome(u64 log) 119 { 120 return (log & I3200_ECCERRLOG_SYNDROME_BITS) >> 121 I3200_ECCERRLOG_SYNDROME_SHIFT; 122 } 123 124 static int eccerrlog_row(int channel, u64 log) 125 { 126 u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >> 127 I3200_ECCERRLOG_RANK_SHIFT); 128 return rank | (channel * I3200_RANKS_PER_CHANNEL); 129 } 130 131 enum i3200_chips { 132 I3200 = 0, 133 }; 134 135 struct i3200_dev_info { 136 const char *ctl_name; 137 }; 138 139 struct i3200_error_info { 140 u16 errsts; 141 u16 errsts2; 142 u64 eccerrlog[I3200_CHANNELS]; 143 }; 144 145 static const struct i3200_dev_info i3200_devs[] = { 146 [I3200] = { 147 .ctl_name = "i3200" 148 }, 149 }; 150 151 static struct pci_dev *mci_pdev; 152 static int i3200_registered = 1; 153 154 155 static void i3200_clear_error_info(struct mem_ctl_info *mci) 156 { 157 struct pci_dev *pdev; 158 159 pdev = to_pci_dev(mci->dev); 160 161 /* 162 * Clear any error bits. 163 * (Yes, we really clear bits by writing 1 to them.) 164 */ 165 pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS, 166 I3200_ERRSTS_BITS); 167 } 168 169 static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci, 170 struct i3200_error_info *info) 171 { 172 struct pci_dev *pdev; 173 struct i3200_priv *priv = mci->pvt_info; 174 void __iomem *window = priv->window; 175 176 pdev = to_pci_dev(mci->dev); 177 178 /* 179 * This is a mess because there is no atomic way to read all the 180 * registers at once and the registers can transition from CE being 181 * overwritten by UE. 182 */ 183 pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts); 184 if (!(info->errsts & I3200_ERRSTS_BITS)) 185 return; 186 187 info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG); 188 if (nr_channels == 2) 189 info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG); 190 191 pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2); 192 193 /* 194 * If the error is the same for both reads then the first set 195 * of reads is valid. If there is a change then there is a CE 196 * with no info and the second set of reads is valid and 197 * should be UE info. 198 */ 199 if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { 200 info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG); 201 if (nr_channels == 2) 202 info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG); 203 } 204 205 i3200_clear_error_info(mci); 206 } 207 208 static void i3200_process_error_info(struct mem_ctl_info *mci, 209 struct i3200_error_info *info) 210 { 211 int channel; 212 u64 log; 213 214 if (!(info->errsts & I3200_ERRSTS_BITS)) 215 return; 216 217 if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { 218 edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); 219 info->errsts = info->errsts2; 220 } 221 222 for (channel = 0; channel < nr_channels; channel++) { 223 log = info->eccerrlog[channel]; 224 if (log & I3200_ECCERRLOG_UE) { 225 edac_mc_handle_ue(mci, 0, 0, 226 eccerrlog_row(channel, log), 227 "i3200 UE"); 228 } else if (log & I3200_ECCERRLOG_CE) { 229 edac_mc_handle_ce(mci, 0, 0, 230 eccerrlog_syndrome(log), 231 eccerrlog_row(channel, log), 0, 232 "i3200 CE"); 233 } 234 } 235 } 236 237 static void i3200_check(struct mem_ctl_info *mci) 238 { 239 struct i3200_error_info info; 240 241 debugf1("MC%d: %s()\n", mci->mc_idx, __func__); 242 i3200_get_and_clear_error_info(mci, &info); 243 i3200_process_error_info(mci, &info); 244 } 245 246 247 void __iomem *i3200_map_mchbar(struct pci_dev *pdev) 248 { 249 union { 250 u64 mchbar; 251 struct { 252 u32 mchbar_low; 253 u32 mchbar_high; 254 }; 255 } u; 256 void __iomem *window; 257 258 pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low); 259 pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high); 260 u.mchbar &= I3200_MCHBAR_MASK; 261 262 if (u.mchbar != (resource_size_t)u.mchbar) { 263 printk(KERN_ERR 264 "i3200: mmio space beyond accessible range (0x%llx)\n", 265 (unsigned long long)u.mchbar); 266 return NULL; 267 } 268 269 window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE); 270 if (!window) 271 printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n", 272 (unsigned long long)u.mchbar); 273 274 return window; 275 } 276 277 278 static void i3200_get_drbs(void __iomem *window, 279 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]) 280 { 281 int i; 282 283 for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { 284 drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; 285 drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; 286 } 287 } 288 289 static bool i3200_is_stacked(struct pci_dev *pdev, 290 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]) 291 { 292 u16 tom; 293 294 pci_read_config_word(pdev, I3200_TOM, &tom); 295 tom &= I3200_TOM_MASK; 296 297 return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom; 298 } 299 300 static unsigned long drb_to_nr_pages( 301 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked, 302 int channel, int rank) 303 { 304 int n; 305 306 n = drbs[channel][rank]; 307 if (rank > 0) 308 n -= drbs[channel][rank - 1]; 309 if (stacked && (channel == 1) && 310 drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1]) 311 n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1]; 312 313 n <<= (I3200_DRB_SHIFT - PAGE_SHIFT); 314 return n; 315 } 316 317 static int i3200_probe1(struct pci_dev *pdev, int dev_idx) 318 { 319 int rc; 320 int i; 321 struct mem_ctl_info *mci = NULL; 322 unsigned long last_page; 323 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]; 324 bool stacked; 325 void __iomem *window; 326 struct i3200_priv *priv; 327 328 debugf0("MC: %s()\n", __func__); 329 330 window = i3200_map_mchbar(pdev); 331 if (!window) 332 return -ENODEV; 333 334 i3200_get_drbs(window, drbs); 335 nr_channels = how_many_channels(pdev); 336 337 mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS, 338 nr_channels, 0); 339 if (!mci) 340 return -ENOMEM; 341 342 debugf3("MC: %s(): init mci\n", __func__); 343 344 mci->dev = &pdev->dev; 345 mci->mtype_cap = MEM_FLAG_DDR2; 346 347 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 348 mci->edac_cap = EDAC_FLAG_SECDED; 349 350 mci->mod_name = EDAC_MOD_STR; 351 mci->mod_ver = I3200_REVISION; 352 mci->ctl_name = i3200_devs[dev_idx].ctl_name; 353 mci->dev_name = pci_name(pdev); 354 mci->edac_check = i3200_check; 355 mci->ctl_page_to_phys = NULL; 356 priv = mci->pvt_info; 357 priv->window = window; 358 359 stacked = i3200_is_stacked(pdev, drbs); 360 361 /* 362 * The dram rank boundary (DRB) reg values are boundary addresses 363 * for each DRAM rank with a granularity of 64MB. DRB regs are 364 * cumulative; the last one will contain the total memory 365 * contained in all ranks. 366 */ 367 last_page = -1UL; 368 for (i = 0; i < mci->nr_csrows; i++) { 369 unsigned long nr_pages; 370 struct csrow_info *csrow = &mci->csrows[i]; 371 372 nr_pages = drb_to_nr_pages(drbs, stacked, 373 i / I3200_RANKS_PER_CHANNEL, 374 i % I3200_RANKS_PER_CHANNEL); 375 376 if (nr_pages == 0) { 377 csrow->mtype = MEM_EMPTY; 378 continue; 379 } 380 381 csrow->first_page = last_page + 1; 382 last_page += nr_pages; 383 csrow->last_page = last_page; 384 csrow->nr_pages = nr_pages; 385 386 csrow->grain = nr_pages << PAGE_SHIFT; 387 csrow->mtype = MEM_DDR2; 388 csrow->dtype = DEV_UNKNOWN; 389 csrow->edac_mode = EDAC_UNKNOWN; 390 } 391 392 i3200_clear_error_info(mci); 393 394 rc = -ENODEV; 395 if (edac_mc_add_mc(mci)) { 396 debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__); 397 goto fail; 398 } 399 400 /* get this far and it's successful */ 401 debugf3("MC: %s(): success\n", __func__); 402 return 0; 403 404 fail: 405 iounmap(window); 406 if (mci) 407 edac_mc_free(mci); 408 409 return rc; 410 } 411 412 static int __devinit i3200_init_one(struct pci_dev *pdev, 413 const struct pci_device_id *ent) 414 { 415 int rc; 416 417 debugf0("MC: %s()\n", __func__); 418 419 if (pci_enable_device(pdev) < 0) 420 return -EIO; 421 422 rc = i3200_probe1(pdev, ent->driver_data); 423 if (!mci_pdev) 424 mci_pdev = pci_dev_get(pdev); 425 426 return rc; 427 } 428 429 static void __devexit i3200_remove_one(struct pci_dev *pdev) 430 { 431 struct mem_ctl_info *mci; 432 struct i3200_priv *priv; 433 434 debugf0("%s()\n", __func__); 435 436 mci = edac_mc_del_mc(&pdev->dev); 437 if (!mci) 438 return; 439 440 priv = mci->pvt_info; 441 iounmap(priv->window); 442 443 edac_mc_free(mci); 444 } 445 446 static const struct pci_device_id i3200_pci_tbl[] __devinitdata = { 447 { 448 PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, 449 I3200}, 450 { 451 0, 452 } /* 0 terminated list. */ 453 }; 454 455 MODULE_DEVICE_TABLE(pci, i3200_pci_tbl); 456 457 static struct pci_driver i3200_driver = { 458 .name = EDAC_MOD_STR, 459 .probe = i3200_init_one, 460 .remove = __devexit_p(i3200_remove_one), 461 .id_table = i3200_pci_tbl, 462 }; 463 464 static int __init i3200_init(void) 465 { 466 int pci_rc; 467 468 debugf3("MC: %s()\n", __func__); 469 470 /* Ensure that the OPSTATE is set correctly for POLL or NMI */ 471 opstate_init(); 472 473 pci_rc = pci_register_driver(&i3200_driver); 474 if (pci_rc < 0) 475 goto fail0; 476 477 if (!mci_pdev) { 478 i3200_registered = 0; 479 mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 480 PCI_DEVICE_ID_INTEL_3200_HB, NULL); 481 if (!mci_pdev) { 482 debugf0("i3200 pci_get_device fail\n"); 483 pci_rc = -ENODEV; 484 goto fail1; 485 } 486 487 pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl); 488 if (pci_rc < 0) { 489 debugf0("i3200 init fail\n"); 490 pci_rc = -ENODEV; 491 goto fail1; 492 } 493 } 494 495 return 0; 496 497 fail1: 498 pci_unregister_driver(&i3200_driver); 499 500 fail0: 501 if (mci_pdev) 502 pci_dev_put(mci_pdev); 503 504 return pci_rc; 505 } 506 507 static void __exit i3200_exit(void) 508 { 509 debugf3("MC: %s()\n", __func__); 510 511 pci_unregister_driver(&i3200_driver); 512 if (!i3200_registered) { 513 i3200_remove_one(mci_pdev); 514 pci_dev_put(mci_pdev); 515 } 516 } 517 518 module_init(i3200_init); 519 module_exit(i3200_exit); 520 521 MODULE_LICENSE("GPL"); 522 MODULE_AUTHOR("Akamai Technologies, Inc."); 523 MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers"); 524 525 module_param(edac_op_state, int, 0444); 526 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); 527