1 /* 2 * Intel 5400 class Memory Controllers kernel module (Seaburg) 3 * 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Copyright (c) 2008 by: 8 * Ben Woodard <woodard@redhat.com> 9 * Mauro Carvalho Chehab <mchehab@redhat.com> 10 * 11 * Red Hat Inc. http://www.redhat.com 12 * 13 * Forked and adapted from the i5000_edac driver which was 14 * written by Douglas Thompson Linux Networx <norsk5@xmission.com> 15 * 16 * This module is based on the following document: 17 * 18 * Intel 5400 Chipset Memory Controller Hub (MCH) - Datasheet 19 * http://developer.intel.com/design/chipsets/datashts/313070.htm 20 * 21 */ 22 23 #include <linux/module.h> 24 #include <linux/init.h> 25 #include <linux/pci.h> 26 #include <linux/pci_ids.h> 27 #include <linux/slab.h> 28 #include <linux/edac.h> 29 #include <linux/mmzone.h> 30 31 #include "edac_core.h" 32 33 /* 34 * Alter this version for the I5400 module when modifications are made 35 */ 36 #define I5400_REVISION " Ver: 1.0.0" 37 38 #define EDAC_MOD_STR "i5400_edac" 39 40 #define i5400_printk(level, fmt, arg...) \ 41 edac_printk(level, "i5400", fmt, ##arg) 42 43 #define i5400_mc_printk(mci, level, fmt, arg...) \ 44 edac_mc_chipset_printk(mci, level, "i5400", fmt, ##arg) 45 46 /* Limits for i5400 */ 47 #define NUM_MTRS_PER_BRANCH 4 48 #define CHANNELS_PER_BRANCH 2 49 #define MAX_DIMMS_PER_CHANNEL NUM_MTRS_PER_BRANCH 50 #define MAX_CHANNELS 4 51 /* max possible csrows per channel */ 52 #define MAX_CSROWS (MAX_DIMMS_PER_CHANNEL) 53 54 /* Device 16, 55 * Function 0: System Address 56 * Function 1: Memory Branch Map, Control, Errors Register 57 * Function 2: FSB Error Registers 58 * 59 * All 3 functions of Device 16 (0,1,2) share the SAME DID and 60 * uses PCI_DEVICE_ID_INTEL_5400_ERR for device 16 (0,1,2), 61 * PCI_DEVICE_ID_INTEL_5400_FBD0 and PCI_DEVICE_ID_INTEL_5400_FBD1 62 * for device 21 (0,1). 63 */ 64 65 /* OFFSETS for Function 0 */ 66 #define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */ 67 #define MAXCH 0x56 /* Max Channel Number */ 68 #define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */ 69 70 /* OFFSETS for Function 1 */ 71 #define TOLM 0x6C 72 #define REDMEMB 0x7C 73 #define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0] indicate EVEN */ 74 #define MIR0 0x80 75 #define MIR1 0x84 76 #define AMIR0 0x8c 77 #define AMIR1 0x90 78 79 /* Fatal error registers */ 80 #define FERR_FAT_FBD 0x98 /* also called as FERR_FAT_FB_DIMM at datasheet */ 81 #define FERR_FAT_FBDCHAN (3<<28) /* channel index where the highest-order error occurred */ 82 83 #define NERR_FAT_FBD 0x9c 84 #define FERR_NF_FBD 0xa0 /* also called as FERR_NFAT_FB_DIMM at datasheet */ 85 86 /* Non-fatal error register */ 87 #define NERR_NF_FBD 0xa4 88 89 /* Enable error mask */ 90 #define EMASK_FBD 0xa8 91 92 #define ERR0_FBD 0xac 93 #define ERR1_FBD 0xb0 94 #define ERR2_FBD 0xb4 95 #define MCERR_FBD 0xb8 96 97 /* No OFFSETS for Device 16 Function 2 */ 98 99 /* 100 * Device 21, 101 * Function 0: Memory Map Branch 0 102 * 103 * Device 22, 104 * Function 0: Memory Map Branch 1 105 */ 106 107 /* OFFSETS for Function 0 */ 108 #define AMBPRESENT_0 0x64 109 #define AMBPRESENT_1 0x66 110 #define MTR0 0x80 111 #define MTR1 0x82 112 #define MTR2 0x84 113 #define MTR3 0x86 114 115 /* OFFSETS for Function 1 */ 116 #define NRECFGLOG 0x74 117 #define RECFGLOG 0x78 118 #define NRECMEMA 0xbe 119 #define NRECMEMB 0xc0 120 #define NRECFB_DIMMA 0xc4 121 #define NRECFB_DIMMB 0xc8 122 #define NRECFB_DIMMC 0xcc 123 #define NRECFB_DIMMD 0xd0 124 #define NRECFB_DIMME 0xd4 125 #define NRECFB_DIMMF 0xd8 126 #define REDMEMA 0xdC 127 #define RECMEMA 0xf0 128 #define RECMEMB 0xf4 129 #define RECFB_DIMMA 0xf8 130 #define RECFB_DIMMB 0xec 131 #define RECFB_DIMMC 0xf0 132 #define RECFB_DIMMD 0xf4 133 #define RECFB_DIMME 0xf8 134 #define RECFB_DIMMF 0xfC 135 136 /* 137 * Error indicator bits and masks 138 * Error masks are according with Table 5-17 of i5400 datasheet 139 */ 140 141 enum error_mask { 142 EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */ 143 EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */ 144 EMASK_M3 = 1<<2, /* Reserved */ 145 EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */ 146 EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */ 147 EMASK_M6 = 1<<5, /* Unsupported on i5400 */ 148 EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ 149 EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */ 150 EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */ 151 EMASK_M10 = 1<<9, /* Unsupported on i5400 */ 152 EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ 153 EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */ 154 EMASK_M13 = 1<<12, /* Memory Write error on first attempt */ 155 EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */ 156 EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */ 157 EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */ 158 EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */ 159 EMASK_M18 = 1<<17, /* Unsupported on i5400 */ 160 EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */ 161 EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */ 162 EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */ 163 EMASK_M22 = 1<<21, /* SPD protocol Error */ 164 EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */ 165 EMASK_M24 = 1<<23, /* Refresh error */ 166 EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */ 167 EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */ 168 EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */ 169 EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */ 170 EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */ 171 }; 172 173 /* 174 * Names to translate bit error into something useful 175 */ 176 static const char *error_name[] = { 177 [0] = "Memory Write error on non-redundant retry", 178 [1] = "Memory or FB-DIMM configuration CRC read error", 179 /* Reserved */ 180 [3] = "Uncorrectable Data ECC on Replay", 181 [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", 182 /* M6 Unsupported on i5400 */ 183 [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", 184 [7] = "Aliased Uncorrectable Patrol Data ECC", 185 [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC", 186 /* M10 Unsupported on i5400 */ 187 [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", 188 [11] = "Non-Aliased Uncorrectable Patrol Data ECC", 189 [12] = "Memory Write error on first attempt", 190 [13] = "FB-DIMM Configuration Write error on first attempt", 191 [14] = "Memory or FB-DIMM configuration CRC read error", 192 [15] = "Channel Failed-Over Occurred", 193 [16] = "Correctable Non-Mirrored Demand Data ECC", 194 /* M18 Unsupported on i5400 */ 195 [18] = "Correctable Resilver- or Spare-Copy Data ECC", 196 [19] = "Correctable Patrol Data ECC", 197 [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status", 198 [21] = "SPD protocol Error", 199 [22] = "Non-Redundant Fast Reset Timeout", 200 [23] = "Refresh error", 201 [24] = "Memory Write error on redundant retry", 202 [25] = "Redundant Fast Reset Timeout", 203 [26] = "Correctable Counter Threshold Exceeded", 204 [27] = "DIMM-Spare Copy Completed", 205 [28] = "DIMM-Isolation Completed", 206 }; 207 208 /* Fatal errors */ 209 #define ERROR_FAT_MASK (EMASK_M1 | \ 210 EMASK_M2 | \ 211 EMASK_M23) 212 213 /* Correctable errors */ 214 #define ERROR_NF_CORRECTABLE (EMASK_M27 | \ 215 EMASK_M20 | \ 216 EMASK_M19 | \ 217 EMASK_M18 | \ 218 EMASK_M17 | \ 219 EMASK_M16) 220 #define ERROR_NF_DIMM_SPARE (EMASK_M29 | \ 221 EMASK_M28) 222 #define ERROR_NF_SPD_PROTOCOL (EMASK_M22) 223 #define ERROR_NF_NORTH_CRC (EMASK_M21) 224 225 /* Recoverable errors */ 226 #define ERROR_NF_RECOVERABLE (EMASK_M26 | \ 227 EMASK_M25 | \ 228 EMASK_M24 | \ 229 EMASK_M15 | \ 230 EMASK_M14 | \ 231 EMASK_M13 | \ 232 EMASK_M12 | \ 233 EMASK_M11 | \ 234 EMASK_M9 | \ 235 EMASK_M8 | \ 236 EMASK_M7 | \ 237 EMASK_M5) 238 239 /* uncorrectable errors */ 240 #define ERROR_NF_UNCORRECTABLE (EMASK_M4) 241 242 /* mask to all non-fatal errors */ 243 #define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \ 244 ERROR_NF_UNCORRECTABLE | \ 245 ERROR_NF_RECOVERABLE | \ 246 ERROR_NF_DIMM_SPARE | \ 247 ERROR_NF_SPD_PROTOCOL | \ 248 ERROR_NF_NORTH_CRC) 249 250 /* 251 * Define error masks for the several registers 252 */ 253 254 /* Enable all fatal and non fatal errors */ 255 #define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK) 256 257 /* mask for fatal error registers */ 258 #define FERR_FAT_MASK ERROR_FAT_MASK 259 260 /* masks for non-fatal error register */ 261 static inline int to_nf_mask(unsigned int mask) 262 { 263 return (mask & EMASK_M29) | (mask >> 3); 264 }; 265 266 static inline int from_nf_ferr(unsigned int mask) 267 { 268 return (mask & EMASK_M29) | /* Bit 28 */ 269 (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */ 270 }; 271 272 #define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK) 273 #define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE) 274 #define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE) 275 #define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL) 276 #define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC) 277 #define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE) 278 #define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE) 279 280 /* Defines to extract the vaious fields from the 281 * MTRx - Memory Technology Registers 282 */ 283 #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 10)) 284 #define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 9)) 285 #define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 8)) ? 8 : 4) 286 #define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 6)) ? 8 : 4) 287 #define MTR_DRAM_BANKS_ADDR_BITS(mtr) ((MTR_DRAM_BANKS(mtr) == 8) ? 3 : 2) 288 #define MTR_DIMM_RANK(mtr) (((mtr) >> 5) & 0x1) 289 #define MTR_DIMM_RANK_ADDR_BITS(mtr) (MTR_DIMM_RANK(mtr) ? 2 : 1) 290 #define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3) 291 #define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13) 292 #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) 293 #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) 294 295 /* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */ 296 static inline int extract_fbdchan_indx(u32 x) 297 { 298 return (x>>28) & 0x3; 299 } 300 301 #ifdef CONFIG_EDAC_DEBUG 302 /* MTR NUMROW */ 303 static const char *numrow_toString[] = { 304 "8,192 - 13 rows", 305 "16,384 - 14 rows", 306 "32,768 - 15 rows", 307 "65,536 - 16 rows" 308 }; 309 310 /* MTR NUMCOL */ 311 static const char *numcol_toString[] = { 312 "1,024 - 10 columns", 313 "2,048 - 11 columns", 314 "4,096 - 12 columns", 315 "reserved" 316 }; 317 #endif 318 319 /* Device name and register DID (Device ID) */ 320 struct i5400_dev_info { 321 const char *ctl_name; /* name for this device */ 322 u16 fsb_mapping_errors; /* DID for the branchmap,control */ 323 }; 324 325 /* Table of devices attributes supported by this driver */ 326 static const struct i5400_dev_info i5400_devs[] = { 327 { 328 .ctl_name = "I5400", 329 .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_5400_ERR, 330 }, 331 }; 332 333 struct i5400_dimm_info { 334 int megabytes; /* size, 0 means not present */ 335 }; 336 337 /* driver private data structure */ 338 struct i5400_pvt { 339 struct pci_dev *system_address; /* 16.0 */ 340 struct pci_dev *branchmap_werrors; /* 16.1 */ 341 struct pci_dev *fsb_error_regs; /* 16.2 */ 342 struct pci_dev *branch_0; /* 21.0 */ 343 struct pci_dev *branch_1; /* 22.0 */ 344 345 u16 tolm; /* top of low memory */ 346 u64 ambase; /* AMB BAR */ 347 348 u16 mir0, mir1; 349 350 u16 b0_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */ 351 u16 b0_ambpresent0; /* Branch 0, Channel 0 */ 352 u16 b0_ambpresent1; /* Brnach 0, Channel 1 */ 353 354 u16 b1_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */ 355 u16 b1_ambpresent0; /* Branch 1, Channel 8 */ 356 u16 b1_ambpresent1; /* Branch 1, Channel 1 */ 357 358 /* DIMM information matrix, allocating architecture maximums */ 359 struct i5400_dimm_info dimm_info[MAX_CSROWS][MAX_CHANNELS]; 360 361 /* Actual values for this controller */ 362 int maxch; /* Max channels */ 363 int maxdimmperch; /* Max DIMMs per channel */ 364 }; 365 366 /* I5400 MCH error information retrieved from Hardware */ 367 struct i5400_error_info { 368 /* These registers are always read from the MC */ 369 u32 ferr_fat_fbd; /* First Errors Fatal */ 370 u32 nerr_fat_fbd; /* Next Errors Fatal */ 371 u32 ferr_nf_fbd; /* First Errors Non-Fatal */ 372 u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ 373 374 /* These registers are input ONLY if there was a Recoverable Error */ 375 u32 redmemb; /* Recoverable Mem Data Error log B */ 376 u16 recmema; /* Recoverable Mem Error log A */ 377 u32 recmemb; /* Recoverable Mem Error log B */ 378 379 /* These registers are input ONLY if there was a Non-Rec Error */ 380 u16 nrecmema; /* Non-Recoverable Mem log A */ 381 u16 nrecmemb; /* Non-Recoverable Mem log B */ 382 383 }; 384 385 /* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and 386 5400 better to use an inline function than a macro in this case */ 387 static inline int nrec_bank(struct i5400_error_info *info) 388 { 389 return ((info->nrecmema) >> 12) & 0x7; 390 } 391 static inline int nrec_rank(struct i5400_error_info *info) 392 { 393 return ((info->nrecmema) >> 8) & 0xf; 394 } 395 static inline int nrec_buf_id(struct i5400_error_info *info) 396 { 397 return ((info->nrecmema)) & 0xff; 398 } 399 static inline int nrec_rdwr(struct i5400_error_info *info) 400 { 401 return (info->nrecmemb) >> 31; 402 } 403 /* This applies to both NREC and REC string so it can be used with nrec_rdwr 404 and rec_rdwr */ 405 static inline const char *rdwr_str(int rdwr) 406 { 407 return rdwr ? "Write" : "Read"; 408 } 409 static inline int nrec_cas(struct i5400_error_info *info) 410 { 411 return ((info->nrecmemb) >> 16) & 0x1fff; 412 } 413 static inline int nrec_ras(struct i5400_error_info *info) 414 { 415 return (info->nrecmemb) & 0xffff; 416 } 417 static inline int rec_bank(struct i5400_error_info *info) 418 { 419 return ((info->recmema) >> 12) & 0x7; 420 } 421 static inline int rec_rank(struct i5400_error_info *info) 422 { 423 return ((info->recmema) >> 8) & 0xf; 424 } 425 static inline int rec_rdwr(struct i5400_error_info *info) 426 { 427 return (info->recmemb) >> 31; 428 } 429 static inline int rec_cas(struct i5400_error_info *info) 430 { 431 return ((info->recmemb) >> 16) & 0x1fff; 432 } 433 static inline int rec_ras(struct i5400_error_info *info) 434 { 435 return (info->recmemb) & 0xffff; 436 } 437 438 static struct edac_pci_ctl_info *i5400_pci; 439 440 /* 441 * i5400_get_error_info Retrieve the hardware error information from 442 * the hardware and cache it in the 'info' 443 * structure 444 */ 445 static void i5400_get_error_info(struct mem_ctl_info *mci, 446 struct i5400_error_info *info) 447 { 448 struct i5400_pvt *pvt; 449 u32 value; 450 451 pvt = mci->pvt_info; 452 453 /* read in the 1st FATAL error register */ 454 pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value); 455 456 /* Mask only the bits that the doc says are valid 457 */ 458 value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); 459 460 /* If there is an error, then read in the 461 NEXT FATAL error register and the Memory Error Log Register A 462 */ 463 if (value & FERR_FAT_MASK) { 464 info->ferr_fat_fbd = value; 465 466 /* harvest the various error data we need */ 467 pci_read_config_dword(pvt->branchmap_werrors, 468 NERR_FAT_FBD, &info->nerr_fat_fbd); 469 pci_read_config_word(pvt->branchmap_werrors, 470 NRECMEMA, &info->nrecmema); 471 pci_read_config_word(pvt->branchmap_werrors, 472 NRECMEMB, &info->nrecmemb); 473 474 /* Clear the error bits, by writing them back */ 475 pci_write_config_dword(pvt->branchmap_werrors, 476 FERR_FAT_FBD, value); 477 } else { 478 info->ferr_fat_fbd = 0; 479 info->nerr_fat_fbd = 0; 480 info->nrecmema = 0; 481 info->nrecmemb = 0; 482 } 483 484 /* read in the 1st NON-FATAL error register */ 485 pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value); 486 487 /* If there is an error, then read in the 1st NON-FATAL error 488 * register as well */ 489 if (value & FERR_NF_MASK) { 490 info->ferr_nf_fbd = value; 491 492 /* harvest the various error data we need */ 493 pci_read_config_dword(pvt->branchmap_werrors, 494 NERR_NF_FBD, &info->nerr_nf_fbd); 495 pci_read_config_word(pvt->branchmap_werrors, 496 RECMEMA, &info->recmema); 497 pci_read_config_dword(pvt->branchmap_werrors, 498 RECMEMB, &info->recmemb); 499 pci_read_config_dword(pvt->branchmap_werrors, 500 REDMEMB, &info->redmemb); 501 502 /* Clear the error bits, by writing them back */ 503 pci_write_config_dword(pvt->branchmap_werrors, 504 FERR_NF_FBD, value); 505 } else { 506 info->ferr_nf_fbd = 0; 507 info->nerr_nf_fbd = 0; 508 info->recmema = 0; 509 info->recmemb = 0; 510 info->redmemb = 0; 511 } 512 } 513 514 /* 515 * i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci, 516 * struct i5400_error_info *info, 517 * int handle_errors); 518 * 519 * handle the Intel FATAL and unrecoverable errors, if any 520 */ 521 static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci, 522 struct i5400_error_info *info, 523 unsigned long allErrors) 524 { 525 char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; 526 int branch; 527 int channel; 528 int bank; 529 int buf_id; 530 int rank; 531 int rdwr; 532 int ras, cas; 533 int errnum; 534 char *type = NULL; 535 536 if (!allErrors) 537 return; /* if no error, return now */ 538 539 if (allErrors & ERROR_FAT_MASK) 540 type = "FATAL"; 541 else if (allErrors & FERR_NF_UNCORRECTABLE) 542 type = "NON-FATAL uncorrected"; 543 else 544 type = "NON-FATAL recoverable"; 545 546 /* ONLY ONE of the possible error bits will be set, as per the docs */ 547 548 branch = extract_fbdchan_indx(info->ferr_fat_fbd); 549 channel = branch; 550 551 /* Use the NON-Recoverable macros to extract data */ 552 bank = nrec_bank(info); 553 rank = nrec_rank(info); 554 buf_id = nrec_buf_id(info); 555 rdwr = nrec_rdwr(info); 556 ras = nrec_ras(info); 557 cas = nrec_cas(info); 558 559 debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " 560 "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", 561 rank, channel, channel + 1, branch >> 1, bank, 562 buf_id, rdwr_str(rdwr), ras, cas); 563 564 /* Only 1 bit will be on */ 565 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); 566 567 /* Form out message */ 568 snprintf(msg, sizeof(msg), 569 "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s " 570 "RAS=%d CAS=%d %s Err=0x%lx (%s))", 571 type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, 572 type, allErrors, error_name[errnum]); 573 574 /* Call the helper to output message */ 575 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); 576 } 577 578 /* 579 * i5400_process_fatal_error_info(struct mem_ctl_info *mci, 580 * struct i5400_error_info *info, 581 * int handle_errors); 582 * 583 * handle the Intel NON-FATAL errors, if any 584 */ 585 static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci, 586 struct i5400_error_info *info) 587 { 588 char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; 589 unsigned long allErrors; 590 int branch; 591 int channel; 592 int bank; 593 int rank; 594 int rdwr; 595 int ras, cas; 596 int errnum; 597 598 /* mask off the Error bits that are possible */ 599 allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK); 600 if (!allErrors) 601 return; /* if no error, return now */ 602 603 /* ONLY ONE of the possible error bits will be set, as per the docs */ 604 605 if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) { 606 i5400_proccess_non_recoverable_info(mci, info, allErrors); 607 return; 608 } 609 610 /* Correctable errors */ 611 if (allErrors & ERROR_NF_CORRECTABLE) { 612 debugf0("\tCorrected bits= 0x%lx\n", allErrors); 613 614 branch = extract_fbdchan_indx(info->ferr_nf_fbd); 615 616 channel = 0; 617 if (REC_ECC_LOCATOR_ODD(info->redmemb)) 618 channel = 1; 619 620 /* Convert channel to be based from zero, instead of 621 * from branch base of 0 */ 622 channel += branch; 623 624 bank = rec_bank(info); 625 rank = rec_rank(info); 626 rdwr = rec_rdwr(info); 627 ras = rec_ras(info); 628 cas = rec_cas(info); 629 630 /* Only 1 bit will be on */ 631 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); 632 633 debugf0("\t\tCSROW= %d Channel= %d (Branch %d " 634 "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", 635 rank, channel, branch >> 1, bank, 636 rdwr_str(rdwr), ras, cas); 637 638 /* Form out message */ 639 snprintf(msg, sizeof(msg), 640 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s " 641 "RAS=%d CAS=%d, CE Err=0x%lx (%s))", 642 branch >> 1, bank, rdwr_str(rdwr), ras, cas, 643 allErrors, error_name[errnum]); 644 645 /* Call the helper to output message */ 646 edac_mc_handle_fbd_ce(mci, rank, channel, msg); 647 648 return; 649 } 650 651 /* Miscellaneous errors */ 652 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); 653 654 branch = extract_fbdchan_indx(info->ferr_nf_fbd); 655 656 i5400_mc_printk(mci, KERN_EMERG, 657 "Non-Fatal misc error (Branch=%d Err=%#lx (%s))", 658 branch >> 1, allErrors, error_name[errnum]); 659 } 660 661 /* 662 * i5400_process_error_info Process the error info that is 663 * in the 'info' structure, previously retrieved from hardware 664 */ 665 static void i5400_process_error_info(struct mem_ctl_info *mci, 666 struct i5400_error_info *info) 667 { u32 allErrors; 668 669 /* First handle any fatal errors that occurred */ 670 allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); 671 i5400_proccess_non_recoverable_info(mci, info, allErrors); 672 673 /* now handle any non-fatal errors that occurred */ 674 i5400_process_nonfatal_error_info(mci, info); 675 } 676 677 /* 678 * i5400_clear_error Retrieve any error from the hardware 679 * but do NOT process that error. 680 * Used for 'clearing' out of previous errors 681 * Called by the Core module. 682 */ 683 static void i5400_clear_error(struct mem_ctl_info *mci) 684 { 685 struct i5400_error_info info; 686 687 i5400_get_error_info(mci, &info); 688 } 689 690 /* 691 * i5400_check_error Retrieve and process errors reported by the 692 * hardware. Called by the Core module. 693 */ 694 static void i5400_check_error(struct mem_ctl_info *mci) 695 { 696 struct i5400_error_info info; 697 debugf4("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__); 698 i5400_get_error_info(mci, &info); 699 i5400_process_error_info(mci, &info); 700 } 701 702 /* 703 * i5400_put_devices 'put' all the devices that we have 704 * reserved via 'get' 705 */ 706 static void i5400_put_devices(struct mem_ctl_info *mci) 707 { 708 struct i5400_pvt *pvt; 709 710 pvt = mci->pvt_info; 711 712 /* Decrement usage count for devices */ 713 pci_dev_put(pvt->branch_1); 714 pci_dev_put(pvt->branch_0); 715 pci_dev_put(pvt->fsb_error_regs); 716 pci_dev_put(pvt->branchmap_werrors); 717 } 718 719 /* 720 * i5400_get_devices Find and perform 'get' operation on the MCH's 721 * device/functions we want to reference for this driver 722 * 723 * Need to 'get' device 16 func 1 and func 2 724 */ 725 static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) 726 { 727 struct i5400_pvt *pvt; 728 struct pci_dev *pdev; 729 730 pvt = mci->pvt_info; 731 pvt->branchmap_werrors = NULL; 732 pvt->fsb_error_regs = NULL; 733 pvt->branch_0 = NULL; 734 pvt->branch_1 = NULL; 735 736 /* Attempt to 'get' the MCH register we want */ 737 pdev = NULL; 738 while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) { 739 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 740 PCI_DEVICE_ID_INTEL_5400_ERR, pdev); 741 if (!pdev) { 742 /* End of list, leave */ 743 i5400_printk(KERN_ERR, 744 "'system address,Process Bus' " 745 "device not found:" 746 "vendor 0x%x device 0x%x ERR funcs " 747 "(broken BIOS?)\n", 748 PCI_VENDOR_ID_INTEL, 749 PCI_DEVICE_ID_INTEL_5400_ERR); 750 goto error; 751 } 752 753 /* Store device 16 funcs 1 and 2 */ 754 switch (PCI_FUNC(pdev->devfn)) { 755 case 1: 756 pvt->branchmap_werrors = pdev; 757 break; 758 case 2: 759 pvt->fsb_error_regs = pdev; 760 break; 761 } 762 } 763 764 debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", 765 pci_name(pvt->system_address), 766 pvt->system_address->vendor, pvt->system_address->device); 767 debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", 768 pci_name(pvt->branchmap_werrors), 769 pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); 770 debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", 771 pci_name(pvt->fsb_error_regs), 772 pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); 773 774 pvt->branch_0 = pci_get_device(PCI_VENDOR_ID_INTEL, 775 PCI_DEVICE_ID_INTEL_5400_FBD0, NULL); 776 if (!pvt->branch_0) { 777 i5400_printk(KERN_ERR, 778 "MC: 'BRANCH 0' device not found:" 779 "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", 780 PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0); 781 goto error; 782 } 783 784 /* If this device claims to have more than 2 channels then 785 * fetch Branch 1's information 786 */ 787 if (pvt->maxch < CHANNELS_PER_BRANCH) 788 return 0; 789 790 pvt->branch_1 = pci_get_device(PCI_VENDOR_ID_INTEL, 791 PCI_DEVICE_ID_INTEL_5400_FBD1, NULL); 792 if (!pvt->branch_1) { 793 i5400_printk(KERN_ERR, 794 "MC: 'BRANCH 1' device not found:" 795 "vendor 0x%x device 0x%x Func 0 " 796 "(broken BIOS?)\n", 797 PCI_VENDOR_ID_INTEL, 798 PCI_DEVICE_ID_INTEL_5400_FBD1); 799 goto error; 800 } 801 802 return 0; 803 804 error: 805 i5400_put_devices(mci); 806 return -ENODEV; 807 } 808 809 /* 810 * determine_amb_present 811 * 812 * the information is contained in NUM_MTRS_PER_BRANCH different 813 * registers determining which of the NUM_MTRS_PER_BRANCH requires 814 * knowing which channel is in question 815 * 816 * 2 branches, each with 2 channels 817 * b0_ambpresent0 for channel '0' 818 * b0_ambpresent1 for channel '1' 819 * b1_ambpresent0 for channel '2' 820 * b1_ambpresent1 for channel '3' 821 */ 822 static int determine_amb_present_reg(struct i5400_pvt *pvt, int channel) 823 { 824 int amb_present; 825 826 if (channel < CHANNELS_PER_BRANCH) { 827 if (channel & 0x1) 828 amb_present = pvt->b0_ambpresent1; 829 else 830 amb_present = pvt->b0_ambpresent0; 831 } else { 832 if (channel & 0x1) 833 amb_present = pvt->b1_ambpresent1; 834 else 835 amb_present = pvt->b1_ambpresent0; 836 } 837 838 return amb_present; 839 } 840 841 /* 842 * determine_mtr(pvt, csrow, channel) 843 * 844 * return the proper MTR register as determine by the csrow and desired channel 845 */ 846 static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel) 847 { 848 int mtr; 849 int n; 850 851 /* There is one MTR for each slot pair of FB-DIMMs, 852 Each slot pair may be at branch 0 or branch 1. 853 */ 854 n = csrow; 855 856 if (n >= NUM_MTRS_PER_BRANCH) { 857 debugf0("ERROR: trying to access an invalid csrow: %d\n", 858 csrow); 859 return 0; 860 } 861 862 if (channel < CHANNELS_PER_BRANCH) 863 mtr = pvt->b0_mtr[n]; 864 else 865 mtr = pvt->b1_mtr[n]; 866 867 return mtr; 868 } 869 870 /* 871 */ 872 static void decode_mtr(int slot_row, u16 mtr) 873 { 874 int ans; 875 876 ans = MTR_DIMMS_PRESENT(mtr); 877 878 debugf2("\tMTR%d=0x%x: DIMMs are %s\n", slot_row, mtr, 879 ans ? "Present" : "NOT Present"); 880 if (!ans) 881 return; 882 883 debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); 884 885 debugf2("\t\tELECTRICAL THROTTLING is %s\n", 886 MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); 887 888 debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); 889 debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single"); 890 debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); 891 debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); 892 } 893 894 static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel, 895 struct i5400_dimm_info *dinfo) 896 { 897 int mtr; 898 int amb_present_reg; 899 int addrBits; 900 901 mtr = determine_mtr(pvt, csrow, channel); 902 if (MTR_DIMMS_PRESENT(mtr)) { 903 amb_present_reg = determine_amb_present_reg(pvt, channel); 904 905 /* Determine if there is a DIMM present in this DIMM slot */ 906 if (amb_present_reg & (1 << csrow)) { 907 /* Start with the number of bits for a Bank 908 * on the DRAM */ 909 addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr); 910 /* Add thenumber of ROW bits */ 911 addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); 912 /* add the number of COLUMN bits */ 913 addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); 914 /* add the number of RANK bits */ 915 addrBits += MTR_DIMM_RANK(mtr); 916 917 addrBits += 6; /* add 64 bits per DIMM */ 918 addrBits -= 20; /* divide by 2^^20 */ 919 addrBits -= 3; /* 8 bits per bytes */ 920 921 dinfo->megabytes = 1 << addrBits; 922 } 923 } 924 } 925 926 /* 927 * calculate_dimm_size 928 * 929 * also will output a DIMM matrix map, if debug is enabled, for viewing 930 * how the DIMMs are populated 931 */ 932 static void calculate_dimm_size(struct i5400_pvt *pvt) 933 { 934 struct i5400_dimm_info *dinfo; 935 int csrow, max_csrows; 936 char *p, *mem_buffer; 937 int space, n; 938 int channel; 939 940 /* ================= Generate some debug output ================= */ 941 space = PAGE_SIZE; 942 mem_buffer = p = kmalloc(space, GFP_KERNEL); 943 if (p == NULL) { 944 i5400_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n", 945 __FILE__, __func__); 946 return; 947 } 948 949 /* Scan all the actual CSROWS 950 * and calculate the information for each DIMM 951 * Start with the highest csrow first, to display it first 952 * and work toward the 0th csrow 953 */ 954 max_csrows = pvt->maxdimmperch; 955 for (csrow = max_csrows - 1; csrow >= 0; csrow--) { 956 957 /* on an odd csrow, first output a 'boundary' marker, 958 * then reset the message buffer */ 959 if (csrow & 0x1) { 960 n = snprintf(p, space, "---------------------------" 961 "--------------------------------"); 962 p += n; 963 space -= n; 964 debugf2("%s\n", mem_buffer); 965 p = mem_buffer; 966 space = PAGE_SIZE; 967 } 968 n = snprintf(p, space, "csrow %2d ", csrow); 969 p += n; 970 space -= n; 971 972 for (channel = 0; channel < pvt->maxch; channel++) { 973 dinfo = &pvt->dimm_info[csrow][channel]; 974 handle_channel(pvt, csrow, channel, dinfo); 975 n = snprintf(p, space, "%4d MB | ", dinfo->megabytes); 976 p += n; 977 space -= n; 978 } 979 debugf2("%s\n", mem_buffer); 980 p = mem_buffer; 981 space = PAGE_SIZE; 982 } 983 984 /* Output the last bottom 'boundary' marker */ 985 n = snprintf(p, space, "---------------------------" 986 "--------------------------------"); 987 p += n; 988 space -= n; 989 debugf2("%s\n", mem_buffer); 990 p = mem_buffer; 991 space = PAGE_SIZE; 992 993 /* now output the 'channel' labels */ 994 n = snprintf(p, space, " "); 995 p += n; 996 space -= n; 997 for (channel = 0; channel < pvt->maxch; channel++) { 998 n = snprintf(p, space, "channel %d | ", channel); 999 p += n; 1000 space -= n; 1001 } 1002 1003 /* output the last message and free buffer */ 1004 debugf2("%s\n", mem_buffer); 1005 kfree(mem_buffer); 1006 } 1007 1008 /* 1009 * i5400_get_mc_regs read in the necessary registers and 1010 * cache locally 1011 * 1012 * Fills in the private data members 1013 */ 1014 static void i5400_get_mc_regs(struct mem_ctl_info *mci) 1015 { 1016 struct i5400_pvt *pvt; 1017 u32 actual_tolm; 1018 u16 limit; 1019 int slot_row; 1020 int maxch; 1021 int maxdimmperch; 1022 int way0, way1; 1023 1024 pvt = mci->pvt_info; 1025 1026 pci_read_config_dword(pvt->system_address, AMBASE, 1027 (u32 *) &pvt->ambase); 1028 pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32), 1029 ((u32 *) &pvt->ambase) + sizeof(u32)); 1030 1031 maxdimmperch = pvt->maxdimmperch; 1032 maxch = pvt->maxch; 1033 1034 debugf2("AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", 1035 (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); 1036 1037 /* Get the Branch Map regs */ 1038 pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); 1039 pvt->tolm >>= 12; 1040 debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, 1041 pvt->tolm); 1042 1043 actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28)); 1044 debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n", 1045 actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); 1046 1047 pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0); 1048 pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1); 1049 1050 /* Get the MIR[0-1] regs */ 1051 limit = (pvt->mir0 >> 4) & 0x0fff; 1052 way0 = pvt->mir0 & 0x1; 1053 way1 = pvt->mir0 & 0x2; 1054 debugf2("MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); 1055 limit = (pvt->mir1 >> 4) & 0xfff; 1056 way0 = pvt->mir1 & 0x1; 1057 way1 = pvt->mir1 & 0x2; 1058 debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); 1059 1060 /* Get the set of MTR[0-3] regs by each branch */ 1061 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) { 1062 int where = MTR0 + (slot_row * sizeof(u16)); 1063 1064 /* Branch 0 set of MTR registers */ 1065 pci_read_config_word(pvt->branch_0, where, 1066 &pvt->b0_mtr[slot_row]); 1067 1068 debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where, 1069 pvt->b0_mtr[slot_row]); 1070 1071 if (pvt->maxch < CHANNELS_PER_BRANCH) { 1072 pvt->b1_mtr[slot_row] = 0; 1073 continue; 1074 } 1075 1076 /* Branch 1 set of MTR registers */ 1077 pci_read_config_word(pvt->branch_1, where, 1078 &pvt->b1_mtr[slot_row]); 1079 debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row, where, 1080 pvt->b1_mtr[slot_row]); 1081 } 1082 1083 /* Read and dump branch 0's MTRs */ 1084 debugf2("\nMemory Technology Registers:\n"); 1085 debugf2(" Branch 0:\n"); 1086 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) 1087 decode_mtr(slot_row, pvt->b0_mtr[slot_row]); 1088 1089 pci_read_config_word(pvt->branch_0, AMBPRESENT_0, 1090 &pvt->b0_ambpresent0); 1091 debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0); 1092 pci_read_config_word(pvt->branch_0, AMBPRESENT_1, 1093 &pvt->b0_ambpresent1); 1094 debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); 1095 1096 /* Only if we have 2 branchs (4 channels) */ 1097 if (pvt->maxch < CHANNELS_PER_BRANCH) { 1098 pvt->b1_ambpresent0 = 0; 1099 pvt->b1_ambpresent1 = 0; 1100 } else { 1101 /* Read and dump branch 1's MTRs */ 1102 debugf2(" Branch 1:\n"); 1103 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) 1104 decode_mtr(slot_row, pvt->b1_mtr[slot_row]); 1105 1106 pci_read_config_word(pvt->branch_1, AMBPRESENT_0, 1107 &pvt->b1_ambpresent0); 1108 debugf2("\t\tAMB-Branch 1-present0 0x%x:\n", 1109 pvt->b1_ambpresent0); 1110 pci_read_config_word(pvt->branch_1, AMBPRESENT_1, 1111 &pvt->b1_ambpresent1); 1112 debugf2("\t\tAMB-Branch 1-present1 0x%x:\n", 1113 pvt->b1_ambpresent1); 1114 } 1115 1116 /* Go and determine the size of each DIMM and place in an 1117 * orderly matrix */ 1118 calculate_dimm_size(pvt); 1119 } 1120 1121 /* 1122 * i5400_init_csrows Initialize the 'csrows' table within 1123 * the mci control structure with the 1124 * addressing of memory. 1125 * 1126 * return: 1127 * 0 success 1128 * 1 no actual memory found on this MC 1129 */ 1130 static int i5400_init_csrows(struct mem_ctl_info *mci) 1131 { 1132 struct i5400_pvt *pvt; 1133 struct csrow_info *p_csrow; 1134 int empty, channel_count; 1135 int max_csrows; 1136 int mtr; 1137 int csrow_megs; 1138 int channel; 1139 int csrow; 1140 1141 pvt = mci->pvt_info; 1142 1143 channel_count = pvt->maxch; 1144 max_csrows = pvt->maxdimmperch; 1145 1146 empty = 1; /* Assume NO memory */ 1147 1148 for (csrow = 0; csrow < max_csrows; csrow++) { 1149 p_csrow = &mci->csrows[csrow]; 1150 1151 p_csrow->csrow_idx = csrow; 1152 1153 /* use branch 0 for the basis */ 1154 mtr = determine_mtr(pvt, csrow, 0); 1155 1156 /* if no DIMMS on this row, continue */ 1157 if (!MTR_DIMMS_PRESENT(mtr)) 1158 continue; 1159 1160 /* FAKE OUT VALUES, FIXME */ 1161 p_csrow->first_page = 0 + csrow * 20; 1162 p_csrow->last_page = 9 + csrow * 20; 1163 p_csrow->page_mask = 0xFFF; 1164 1165 p_csrow->grain = 8; 1166 1167 csrow_megs = 0; 1168 for (channel = 0; channel < pvt->maxch; channel++) 1169 csrow_megs += pvt->dimm_info[csrow][channel].megabytes; 1170 1171 p_csrow->nr_pages = csrow_megs << 8; 1172 1173 /* Assume DDR2 for now */ 1174 p_csrow->mtype = MEM_FB_DDR2; 1175 1176 /* ask what device type on this row */ 1177 if (MTR_DRAM_WIDTH(mtr)) 1178 p_csrow->dtype = DEV_X8; 1179 else 1180 p_csrow->dtype = DEV_X4; 1181 1182 p_csrow->edac_mode = EDAC_S8ECD8ED; 1183 1184 empty = 0; 1185 } 1186 1187 return empty; 1188 } 1189 1190 /* 1191 * i5400_enable_error_reporting 1192 * Turn on the memory reporting features of the hardware 1193 */ 1194 static void i5400_enable_error_reporting(struct mem_ctl_info *mci) 1195 { 1196 struct i5400_pvt *pvt; 1197 u32 fbd_error_mask; 1198 1199 pvt = mci->pvt_info; 1200 1201 /* Read the FBD Error Mask Register */ 1202 pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD, 1203 &fbd_error_mask); 1204 1205 /* Enable with a '0' */ 1206 fbd_error_mask &= ~(ENABLE_EMASK_ALL); 1207 1208 pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD, 1209 fbd_error_mask); 1210 } 1211 1212 /* 1213 * i5400_probe1 Probe for ONE instance of device to see if it is 1214 * present. 1215 * return: 1216 * 0 for FOUND a device 1217 * < 0 for error code 1218 */ 1219 static int i5400_probe1(struct pci_dev *pdev, int dev_idx) 1220 { 1221 struct mem_ctl_info *mci; 1222 struct i5400_pvt *pvt; 1223 int num_channels; 1224 int num_dimms_per_channel; 1225 int num_csrows; 1226 1227 if (dev_idx >= ARRAY_SIZE(i5400_devs)) 1228 return -EINVAL; 1229 1230 debugf0("MC: %s: %s(), pdev bus %u dev=0x%x fn=0x%x\n", 1231 __FILE__, __func__, 1232 pdev->bus->number, 1233 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 1234 1235 /* We only are looking for func 0 of the set */ 1236 if (PCI_FUNC(pdev->devfn) != 0) 1237 return -ENODEV; 1238 1239 /* As we don't have a motherboard identification routine to determine 1240 * actual number of slots/dimms per channel, we thus utilize the 1241 * resource as specified by the chipset. Thus, we might have 1242 * have more DIMMs per channel than actually on the mobo, but this 1243 * allows the driver to support up to the chipset max, without 1244 * some fancy mobo determination. 1245 */ 1246 num_dimms_per_channel = MAX_DIMMS_PER_CHANNEL; 1247 num_channels = MAX_CHANNELS; 1248 num_csrows = num_dimms_per_channel; 1249 1250 debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", 1251 __func__, num_channels, num_dimms_per_channel, num_csrows); 1252 1253 /* allocate a new MC control structure */ 1254 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); 1255 1256 if (mci == NULL) 1257 return -ENOMEM; 1258 1259 debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci); 1260 1261 mci->dev = &pdev->dev; /* record ptr to the generic device */ 1262 1263 pvt = mci->pvt_info; 1264 pvt->system_address = pdev; /* Record this device in our private */ 1265 pvt->maxch = num_channels; 1266 pvt->maxdimmperch = num_dimms_per_channel; 1267 1268 /* 'get' the pci devices we want to reserve for our use */ 1269 if (i5400_get_devices(mci, dev_idx)) 1270 goto fail0; 1271 1272 /* Time to get serious */ 1273 i5400_get_mc_regs(mci); /* retrieve the hardware registers */ 1274 1275 mci->mc_idx = 0; 1276 mci->mtype_cap = MEM_FLAG_FB_DDR2; 1277 mci->edac_ctl_cap = EDAC_FLAG_NONE; 1278 mci->edac_cap = EDAC_FLAG_NONE; 1279 mci->mod_name = "i5400_edac.c"; 1280 mci->mod_ver = I5400_REVISION; 1281 mci->ctl_name = i5400_devs[dev_idx].ctl_name; 1282 mci->dev_name = pci_name(pdev); 1283 mci->ctl_page_to_phys = NULL; 1284 1285 /* Set the function pointer to an actual operation function */ 1286 mci->edac_check = i5400_check_error; 1287 1288 /* initialize the MC control structure 'csrows' table 1289 * with the mapping and control information */ 1290 if (i5400_init_csrows(mci)) { 1291 debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" 1292 " because i5400_init_csrows() returned nonzero " 1293 "value\n"); 1294 mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ 1295 } else { 1296 debugf1("MC: Enable error reporting now\n"); 1297 i5400_enable_error_reporting(mci); 1298 } 1299 1300 /* add this new MC control structure to EDAC's list of MCs */ 1301 if (edac_mc_add_mc(mci)) { 1302 debugf0("MC: %s: %s(): failed edac_mc_add_mc()\n", 1303 __FILE__, __func__); 1304 /* FIXME: perhaps some code should go here that disables error 1305 * reporting if we just enabled it 1306 */ 1307 goto fail1; 1308 } 1309 1310 i5400_clear_error(mci); 1311 1312 /* allocating generic PCI control info */ 1313 i5400_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); 1314 if (!i5400_pci) { 1315 printk(KERN_WARNING 1316 "%s(): Unable to create PCI control\n", 1317 __func__); 1318 printk(KERN_WARNING 1319 "%s(): PCI error report via EDAC not setup\n", 1320 __func__); 1321 } 1322 1323 return 0; 1324 1325 /* Error exit unwinding stack */ 1326 fail1: 1327 1328 i5400_put_devices(mci); 1329 1330 fail0: 1331 edac_mc_free(mci); 1332 return -ENODEV; 1333 } 1334 1335 /* 1336 * i5400_init_one constructor for one instance of device 1337 * 1338 * returns: 1339 * negative on error 1340 * count (>= 0) 1341 */ 1342 static int __devinit i5400_init_one(struct pci_dev *pdev, 1343 const struct pci_device_id *id) 1344 { 1345 int rc; 1346 1347 debugf0("MC: %s: %s()\n", __FILE__, __func__); 1348 1349 /* wake up device */ 1350 rc = pci_enable_device(pdev); 1351 if (rc) 1352 return rc; 1353 1354 /* now probe and enable the device */ 1355 return i5400_probe1(pdev, id->driver_data); 1356 } 1357 1358 /* 1359 * i5400_remove_one destructor for one instance of device 1360 * 1361 */ 1362 static void __devexit i5400_remove_one(struct pci_dev *pdev) 1363 { 1364 struct mem_ctl_info *mci; 1365 1366 debugf0("%s: %s()\n", __FILE__, __func__); 1367 1368 if (i5400_pci) 1369 edac_pci_release_generic_ctl(i5400_pci); 1370 1371 mci = edac_mc_del_mc(&pdev->dev); 1372 if (!mci) 1373 return; 1374 1375 /* retrieve references to resources, and free those resources */ 1376 i5400_put_devices(mci); 1377 1378 edac_mc_free(mci); 1379 } 1380 1381 /* 1382 * pci_device_id table for which devices we are looking for 1383 * 1384 * The "E500P" device is the first device supported. 1385 */ 1386 static const struct pci_device_id i5400_pci_tbl[] __devinitdata = { 1387 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)}, 1388 {0,} /* 0 terminated list. */ 1389 }; 1390 1391 MODULE_DEVICE_TABLE(pci, i5400_pci_tbl); 1392 1393 /* 1394 * i5400_driver pci_driver structure for this module 1395 * 1396 */ 1397 static struct pci_driver i5400_driver = { 1398 .name = "i5400_edac", 1399 .probe = i5400_init_one, 1400 .remove = __devexit_p(i5400_remove_one), 1401 .id_table = i5400_pci_tbl, 1402 }; 1403 1404 /* 1405 * i5400_init Module entry function 1406 * Try to initialize this module for its devices 1407 */ 1408 static int __init i5400_init(void) 1409 { 1410 int pci_rc; 1411 1412 debugf2("MC: %s: %s()\n", __FILE__, __func__); 1413 1414 /* Ensure that the OPSTATE is set correctly for POLL or NMI */ 1415 opstate_init(); 1416 1417 pci_rc = pci_register_driver(&i5400_driver); 1418 1419 return (pci_rc < 0) ? pci_rc : 0; 1420 } 1421 1422 /* 1423 * i5400_exit() Module exit function 1424 * Unregister the driver 1425 */ 1426 static void __exit i5400_exit(void) 1427 { 1428 debugf2("MC: %s: %s()\n", __FILE__, __func__); 1429 pci_unregister_driver(&i5400_driver); 1430 } 1431 1432 module_init(i5400_init); 1433 module_exit(i5400_exit); 1434 1435 MODULE_LICENSE("GPL"); 1436 MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>"); 1437 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); 1438 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); 1439 MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - " 1440 I5400_REVISION); 1441 1442 module_param(edac_op_state, int, 0444); 1443 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); 1444