1 /* 2 * Intel 5400 class Memory Controllers kernel module (Seaburg) 3 * 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Copyright (c) 2008 by: 8 * Ben Woodard <woodard@redhat.com> 9 * Mauro Carvalho Chehab <mchehab@redhat.com> 10 * 11 * Red Hat Inc. http://www.redhat.com 12 * 13 * Forked and adapted from the i5000_edac driver which was 14 * written by Douglas Thompson Linux Networx <norsk5@xmission.com> 15 * 16 * This module is based on the following document: 17 * 18 * Intel 5400 Chipset Memory Controller Hub (MCH) - Datasheet 19 * http://developer.intel.com/design/chipsets/datashts/313070.htm 20 * 21 */ 22 23 #include <linux/module.h> 24 #include <linux/init.h> 25 #include <linux/pci.h> 26 #include <linux/pci_ids.h> 27 #include <linux/slab.h> 28 #include <linux/edac.h> 29 #include <linux/mmzone.h> 30 31 #include "edac_core.h" 32 33 /* 34 * Alter this version for the I5400 module when modifications are made 35 */ 36 #define I5400_REVISION " Ver: 1.0.0" 37 38 #define EDAC_MOD_STR "i5400_edac" 39 40 #define i5400_printk(level, fmt, arg...) \ 41 edac_printk(level, "i5400", fmt, ##arg) 42 43 #define i5400_mc_printk(mci, level, fmt, arg...) \ 44 edac_mc_chipset_printk(mci, level, "i5400", fmt, ##arg) 45 46 /* Limits for i5400 */ 47 #define NUM_MTRS_PER_BRANCH 4 48 #define CHANNELS_PER_BRANCH 2 49 #define MAX_DIMMS_PER_CHANNEL NUM_MTRS_PER_BRANCH 50 #define MAX_CHANNELS 4 51 /* max possible csrows per channel */ 52 #define MAX_CSROWS (MAX_DIMMS_PER_CHANNEL) 53 54 /* Device 16, 55 * Function 0: System Address 56 * Function 1: Memory Branch Map, Control, Errors Register 57 * Function 2: FSB Error Registers 58 * 59 * All 3 functions of Device 16 (0,1,2) share the SAME DID and 60 * uses PCI_DEVICE_ID_INTEL_5400_ERR for device 16 (0,1,2), 61 * PCI_DEVICE_ID_INTEL_5400_FBD0 and PCI_DEVICE_ID_INTEL_5400_FBD1 62 * for device 21 (0,1). 63 */ 64 65 /* OFFSETS for Function 0 */ 66 #define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */ 67 #define MAXCH 0x56 /* Max Channel Number */ 68 #define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */ 69 70 /* OFFSETS for Function 1 */ 71 #define TOLM 0x6C 72 #define REDMEMB 0x7C 73 #define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0] indicate EVEN */ 74 #define MIR0 0x80 75 #define MIR1 0x84 76 #define AMIR0 0x8c 77 #define AMIR1 0x90 78 79 /* Fatal error registers */ 80 #define FERR_FAT_FBD 0x98 /* also called as FERR_FAT_FB_DIMM at datasheet */ 81 #define FERR_FAT_FBDCHAN (3<<28) /* channel index where the highest-order error occurred */ 82 83 #define NERR_FAT_FBD 0x9c 84 #define FERR_NF_FBD 0xa0 /* also called as FERR_NFAT_FB_DIMM at datasheet */ 85 86 /* Non-fatal error register */ 87 #define NERR_NF_FBD 0xa4 88 89 /* Enable error mask */ 90 #define EMASK_FBD 0xa8 91 92 #define ERR0_FBD 0xac 93 #define ERR1_FBD 0xb0 94 #define ERR2_FBD 0xb4 95 #define MCERR_FBD 0xb8 96 97 /* No OFFSETS for Device 16 Function 2 */ 98 99 /* 100 * Device 21, 101 * Function 0: Memory Map Branch 0 102 * 103 * Device 22, 104 * Function 0: Memory Map Branch 1 105 */ 106 107 /* OFFSETS for Function 0 */ 108 #define AMBPRESENT_0 0x64 109 #define AMBPRESENT_1 0x66 110 #define MTR0 0x80 111 #define MTR1 0x82 112 #define MTR2 0x84 113 #define MTR3 0x86 114 115 /* OFFSETS for Function 1 */ 116 #define NRECFGLOG 0x74 117 #define RECFGLOG 0x78 118 #define NRECMEMA 0xbe 119 #define NRECMEMB 0xc0 120 #define NRECFB_DIMMA 0xc4 121 #define NRECFB_DIMMB 0xc8 122 #define NRECFB_DIMMC 0xcc 123 #define NRECFB_DIMMD 0xd0 124 #define NRECFB_DIMME 0xd4 125 #define NRECFB_DIMMF 0xd8 126 #define REDMEMA 0xdC 127 #define RECMEMA 0xf0 128 #define RECMEMB 0xf4 129 #define RECFB_DIMMA 0xf8 130 #define RECFB_DIMMB 0xec 131 #define RECFB_DIMMC 0xf0 132 #define RECFB_DIMMD 0xf4 133 #define RECFB_DIMME 0xf8 134 #define RECFB_DIMMF 0xfC 135 136 /* 137 * Error indicator bits and masks 138 * Error masks are according with Table 5-17 of i5400 datasheet 139 */ 140 141 enum error_mask { 142 EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */ 143 EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */ 144 EMASK_M3 = 1<<2, /* Reserved */ 145 EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */ 146 EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */ 147 EMASK_M6 = 1<<5, /* Unsupported on i5400 */ 148 EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ 149 EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */ 150 EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */ 151 EMASK_M10 = 1<<9, /* Unsupported on i5400 */ 152 EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ 153 EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */ 154 EMASK_M13 = 1<<12, /* Memory Write error on first attempt */ 155 EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */ 156 EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */ 157 EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */ 158 EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */ 159 EMASK_M18 = 1<<17, /* Unsupported on i5400 */ 160 EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */ 161 EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */ 162 EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */ 163 EMASK_M22 = 1<<21, /* SPD protocol Error */ 164 EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */ 165 EMASK_M24 = 1<<23, /* Refresh error */ 166 EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */ 167 EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */ 168 EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */ 169 EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */ 170 EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */ 171 }; 172 173 /* 174 * Names to translate bit error into something useful 175 */ 176 static const char *error_name[] = { 177 [0] = "Memory Write error on non-redundant retry", 178 [1] = "Memory or FB-DIMM configuration CRC read error", 179 /* Reserved */ 180 [3] = "Uncorrectable Data ECC on Replay", 181 [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", 182 /* M6 Unsupported on i5400 */ 183 [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", 184 [7] = "Aliased Uncorrectable Patrol Data ECC", 185 [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC", 186 /* M10 Unsupported on i5400 */ 187 [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", 188 [11] = "Non-Aliased Uncorrectable Patrol Data ECC", 189 [12] = "Memory Write error on first attempt", 190 [13] = "FB-DIMM Configuration Write error on first attempt", 191 [14] = "Memory or FB-DIMM configuration CRC read error", 192 [15] = "Channel Failed-Over Occurred", 193 [16] = "Correctable Non-Mirrored Demand Data ECC", 194 /* M18 Unsupported on i5400 */ 195 [18] = "Correctable Resilver- or Spare-Copy Data ECC", 196 [19] = "Correctable Patrol Data ECC", 197 [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status", 198 [21] = "SPD protocol Error", 199 [22] = "Non-Redundant Fast Reset Timeout", 200 [23] = "Refresh error", 201 [24] = "Memory Write error on redundant retry", 202 [25] = "Redundant Fast Reset Timeout", 203 [26] = "Correctable Counter Threshold Exceeded", 204 [27] = "DIMM-Spare Copy Completed", 205 [28] = "DIMM-Isolation Completed", 206 }; 207 208 /* Fatal errors */ 209 #define ERROR_FAT_MASK (EMASK_M1 | \ 210 EMASK_M2 | \ 211 EMASK_M23) 212 213 /* Correctable errors */ 214 #define ERROR_NF_CORRECTABLE (EMASK_M27 | \ 215 EMASK_M20 | \ 216 EMASK_M19 | \ 217 EMASK_M18 | \ 218 EMASK_M17 | \ 219 EMASK_M16) 220 #define ERROR_NF_DIMM_SPARE (EMASK_M29 | \ 221 EMASK_M28) 222 #define ERROR_NF_SPD_PROTOCOL (EMASK_M22) 223 #define ERROR_NF_NORTH_CRC (EMASK_M21) 224 225 /* Recoverable errors */ 226 #define ERROR_NF_RECOVERABLE (EMASK_M26 | \ 227 EMASK_M25 | \ 228 EMASK_M24 | \ 229 EMASK_M15 | \ 230 EMASK_M14 | \ 231 EMASK_M13 | \ 232 EMASK_M12 | \ 233 EMASK_M11 | \ 234 EMASK_M9 | \ 235 EMASK_M8 | \ 236 EMASK_M7 | \ 237 EMASK_M5) 238 239 /* uncorrectable errors */ 240 #define ERROR_NF_UNCORRECTABLE (EMASK_M4) 241 242 /* mask to all non-fatal errors */ 243 #define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \ 244 ERROR_NF_UNCORRECTABLE | \ 245 ERROR_NF_RECOVERABLE | \ 246 ERROR_NF_DIMM_SPARE | \ 247 ERROR_NF_SPD_PROTOCOL | \ 248 ERROR_NF_NORTH_CRC) 249 250 /* 251 * Define error masks for the several registers 252 */ 253 254 /* Enable all fatal and non fatal errors */ 255 #define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK) 256 257 /* mask for fatal error registers */ 258 #define FERR_FAT_MASK ERROR_FAT_MASK 259 260 /* masks for non-fatal error register */ 261 static inline int to_nf_mask(unsigned int mask) 262 { 263 return (mask & EMASK_M29) | (mask >> 3); 264 }; 265 266 static inline int from_nf_ferr(unsigned int mask) 267 { 268 return (mask & EMASK_M29) | /* Bit 28 */ 269 (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */ 270 }; 271 272 #define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK) 273 #define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE) 274 #define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE) 275 #define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL) 276 #define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC) 277 #define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE) 278 #define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE) 279 280 /* Defines to extract the vaious fields from the 281 * MTRx - Memory Technology Registers 282 */ 283 #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 10)) 284 #define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 9)) 285 #define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 8)) ? 8 : 4) 286 #define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 6)) ? 8 : 4) 287 #define MTR_DRAM_BANKS_ADDR_BITS(mtr) ((MTR_DRAM_BANKS(mtr) == 8) ? 3 : 2) 288 #define MTR_DIMM_RANK(mtr) (((mtr) >> 5) & 0x1) 289 #define MTR_DIMM_RANK_ADDR_BITS(mtr) (MTR_DIMM_RANK(mtr) ? 2 : 1) 290 #define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3) 291 #define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13) 292 #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) 293 #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) 294 295 /* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */ 296 static inline int extract_fbdchan_indx(u32 x) 297 { 298 return (x>>28) & 0x3; 299 } 300 301 #ifdef CONFIG_EDAC_DEBUG 302 /* MTR NUMROW */ 303 static const char *numrow_toString[] = { 304 "8,192 - 13 rows", 305 "16,384 - 14 rows", 306 "32,768 - 15 rows", 307 "65,536 - 16 rows" 308 }; 309 310 /* MTR NUMCOL */ 311 static const char *numcol_toString[] = { 312 "1,024 - 10 columns", 313 "2,048 - 11 columns", 314 "4,096 - 12 columns", 315 "reserved" 316 }; 317 #endif 318 319 /* Device name and register DID (Device ID) */ 320 struct i5400_dev_info { 321 const char *ctl_name; /* name for this device */ 322 u16 fsb_mapping_errors; /* DID for the branchmap,control */ 323 }; 324 325 /* Table of devices attributes supported by this driver */ 326 static const struct i5400_dev_info i5400_devs[] = { 327 { 328 .ctl_name = "I5400", 329 .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_5400_ERR, 330 }, 331 }; 332 333 struct i5400_dimm_info { 334 int megabytes; /* size, 0 means not present */ 335 }; 336 337 /* driver private data structure */ 338 struct i5400_pvt { 339 struct pci_dev *system_address; /* 16.0 */ 340 struct pci_dev *branchmap_werrors; /* 16.1 */ 341 struct pci_dev *fsb_error_regs; /* 16.2 */ 342 struct pci_dev *branch_0; /* 21.0 */ 343 struct pci_dev *branch_1; /* 22.0 */ 344 345 u16 tolm; /* top of low memory */ 346 u64 ambase; /* AMB BAR */ 347 348 u16 mir0, mir1; 349 350 u16 b0_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */ 351 u16 b0_ambpresent0; /* Branch 0, Channel 0 */ 352 u16 b0_ambpresent1; /* Brnach 0, Channel 1 */ 353 354 u16 b1_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */ 355 u16 b1_ambpresent0; /* Branch 1, Channel 8 */ 356 u16 b1_ambpresent1; /* Branch 1, Channel 1 */ 357 358 /* DIMM information matrix, allocating architecture maximums */ 359 struct i5400_dimm_info dimm_info[MAX_CSROWS][MAX_CHANNELS]; 360 361 /* Actual values for this controller */ 362 int maxch; /* Max channels */ 363 int maxdimmperch; /* Max DIMMs per channel */ 364 }; 365 366 /* I5400 MCH error information retrieved from Hardware */ 367 struct i5400_error_info { 368 /* These registers are always read from the MC */ 369 u32 ferr_fat_fbd; /* First Errors Fatal */ 370 u32 nerr_fat_fbd; /* Next Errors Fatal */ 371 u32 ferr_nf_fbd; /* First Errors Non-Fatal */ 372 u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ 373 374 /* These registers are input ONLY if there was a Recoverable Error */ 375 u32 redmemb; /* Recoverable Mem Data Error log B */ 376 u16 recmema; /* Recoverable Mem Error log A */ 377 u32 recmemb; /* Recoverable Mem Error log B */ 378 379 /* These registers are input ONLY if there was a Non-Rec Error */ 380 u16 nrecmema; /* Non-Recoverable Mem log A */ 381 u16 nrecmemb; /* Non-Recoverable Mem log B */ 382 383 }; 384 385 /* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and 386 5400 better to use an inline function than a macro in this case */ 387 static inline int nrec_bank(struct i5400_error_info *info) 388 { 389 return ((info->nrecmema) >> 12) & 0x7; 390 } 391 static inline int nrec_rank(struct i5400_error_info *info) 392 { 393 return ((info->nrecmema) >> 8) & 0xf; 394 } 395 static inline int nrec_buf_id(struct i5400_error_info *info) 396 { 397 return ((info->nrecmema)) & 0xff; 398 } 399 static inline int nrec_rdwr(struct i5400_error_info *info) 400 { 401 return (info->nrecmemb) >> 31; 402 } 403 /* This applies to both NREC and REC string so it can be used with nrec_rdwr 404 and rec_rdwr */ 405 static inline const char *rdwr_str(int rdwr) 406 { 407 return rdwr ? "Write" : "Read"; 408 } 409 static inline int nrec_cas(struct i5400_error_info *info) 410 { 411 return ((info->nrecmemb) >> 16) & 0x1fff; 412 } 413 static inline int nrec_ras(struct i5400_error_info *info) 414 { 415 return (info->nrecmemb) & 0xffff; 416 } 417 static inline int rec_bank(struct i5400_error_info *info) 418 { 419 return ((info->recmema) >> 12) & 0x7; 420 } 421 static inline int rec_rank(struct i5400_error_info *info) 422 { 423 return ((info->recmema) >> 8) & 0xf; 424 } 425 static inline int rec_rdwr(struct i5400_error_info *info) 426 { 427 return (info->recmemb) >> 31; 428 } 429 static inline int rec_cas(struct i5400_error_info *info) 430 { 431 return ((info->recmemb) >> 16) & 0x1fff; 432 } 433 static inline int rec_ras(struct i5400_error_info *info) 434 { 435 return (info->recmemb) & 0xffff; 436 } 437 438 static struct edac_pci_ctl_info *i5400_pci; 439 440 /* 441 * i5400_get_error_info Retrieve the hardware error information from 442 * the hardware and cache it in the 'info' 443 * structure 444 */ 445 static void i5400_get_error_info(struct mem_ctl_info *mci, 446 struct i5400_error_info *info) 447 { 448 struct i5400_pvt *pvt; 449 u32 value; 450 451 pvt = mci->pvt_info; 452 453 /* read in the 1st FATAL error register */ 454 pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value); 455 456 /* Mask only the bits that the doc says are valid 457 */ 458 value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); 459 460 /* If there is an error, then read in the 461 NEXT FATAL error register and the Memory Error Log Register A 462 */ 463 if (value & FERR_FAT_MASK) { 464 info->ferr_fat_fbd = value; 465 466 /* harvest the various error data we need */ 467 pci_read_config_dword(pvt->branchmap_werrors, 468 NERR_FAT_FBD, &info->nerr_fat_fbd); 469 pci_read_config_word(pvt->branchmap_werrors, 470 NRECMEMA, &info->nrecmema); 471 pci_read_config_word(pvt->branchmap_werrors, 472 NRECMEMB, &info->nrecmemb); 473 474 /* Clear the error bits, by writing them back */ 475 pci_write_config_dword(pvt->branchmap_werrors, 476 FERR_FAT_FBD, value); 477 } else { 478 info->ferr_fat_fbd = 0; 479 info->nerr_fat_fbd = 0; 480 info->nrecmema = 0; 481 info->nrecmemb = 0; 482 } 483 484 /* read in the 1st NON-FATAL error register */ 485 pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value); 486 487 /* If there is an error, then read in the 1st NON-FATAL error 488 * register as well */ 489 if (value & FERR_NF_MASK) { 490 info->ferr_nf_fbd = value; 491 492 /* harvest the various error data we need */ 493 pci_read_config_dword(pvt->branchmap_werrors, 494 NERR_NF_FBD, &info->nerr_nf_fbd); 495 pci_read_config_word(pvt->branchmap_werrors, 496 RECMEMA, &info->recmema); 497 pci_read_config_dword(pvt->branchmap_werrors, 498 RECMEMB, &info->recmemb); 499 pci_read_config_dword(pvt->branchmap_werrors, 500 REDMEMB, &info->redmemb); 501 502 /* Clear the error bits, by writing them back */ 503 pci_write_config_dword(pvt->branchmap_werrors, 504 FERR_NF_FBD, value); 505 } else { 506 info->ferr_nf_fbd = 0; 507 info->nerr_nf_fbd = 0; 508 info->recmema = 0; 509 info->recmemb = 0; 510 info->redmemb = 0; 511 } 512 } 513 514 /* 515 * i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci, 516 * struct i5400_error_info *info, 517 * int handle_errors); 518 * 519 * handle the Intel FATAL and unrecoverable errors, if any 520 */ 521 static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci, 522 struct i5400_error_info *info, 523 unsigned long allErrors) 524 { 525 char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; 526 int branch; 527 int channel; 528 int bank; 529 int buf_id; 530 int rank; 531 int rdwr; 532 int ras, cas; 533 int errnum; 534 char *type = NULL; 535 536 if (!allErrors) 537 return; /* if no error, return now */ 538 539 if (allErrors & ERROR_FAT_MASK) 540 type = "FATAL"; 541 else if (allErrors & FERR_NF_UNCORRECTABLE) 542 type = "NON-FATAL uncorrected"; 543 else 544 type = "NON-FATAL recoverable"; 545 546 /* ONLY ONE of the possible error bits will be set, as per the docs */ 547 548 branch = extract_fbdchan_indx(info->ferr_fat_fbd); 549 channel = branch; 550 551 /* Use the NON-Recoverable macros to extract data */ 552 bank = nrec_bank(info); 553 rank = nrec_rank(info); 554 buf_id = nrec_buf_id(info); 555 rdwr = nrec_rdwr(info); 556 ras = nrec_ras(info); 557 cas = nrec_cas(info); 558 559 debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " 560 "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", 561 rank, channel, channel + 1, branch >> 1, bank, 562 buf_id, rdwr_str(rdwr), ras, cas); 563 564 /* Only 1 bit will be on */ 565 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); 566 567 /* Form out message */ 568 snprintf(msg, sizeof(msg), 569 "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s " 570 "RAS=%d CAS=%d %s Err=0x%lx (%s))", 571 type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, 572 type, allErrors, error_name[errnum]); 573 574 /* Call the helper to output message */ 575 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); 576 } 577 578 /* 579 * i5400_process_fatal_error_info(struct mem_ctl_info *mci, 580 * struct i5400_error_info *info, 581 * int handle_errors); 582 * 583 * handle the Intel NON-FATAL errors, if any 584 */ 585 static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci, 586 struct i5400_error_info *info) 587 { 588 char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; 589 unsigned long allErrors; 590 int branch; 591 int channel; 592 int bank; 593 int rank; 594 int rdwr; 595 int ras, cas; 596 int errnum; 597 598 /* mask off the Error bits that are possible */ 599 allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK); 600 if (!allErrors) 601 return; /* if no error, return now */ 602 603 /* ONLY ONE of the possible error bits will be set, as per the docs */ 604 605 if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) { 606 i5400_proccess_non_recoverable_info(mci, info, allErrors); 607 return; 608 } 609 610 /* Correctable errors */ 611 if (allErrors & ERROR_NF_CORRECTABLE) { 612 debugf0("\tCorrected bits= 0x%lx\n", allErrors); 613 614 branch = extract_fbdchan_indx(info->ferr_nf_fbd); 615 616 channel = 0; 617 if (REC_ECC_LOCATOR_ODD(info->redmemb)) 618 channel = 1; 619 620 /* Convert channel to be based from zero, instead of 621 * from branch base of 0 */ 622 channel += branch; 623 624 bank = rec_bank(info); 625 rank = rec_rank(info); 626 rdwr = rec_rdwr(info); 627 ras = rec_ras(info); 628 cas = rec_cas(info); 629 630 /* Only 1 bit will be on */ 631 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); 632 633 debugf0("\t\tCSROW= %d Channel= %d (Branch %d " 634 "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", 635 rank, channel, branch >> 1, bank, 636 rdwr_str(rdwr), ras, cas); 637 638 /* Form out message */ 639 snprintf(msg, sizeof(msg), 640 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s " 641 "RAS=%d CAS=%d, CE Err=0x%lx (%s))", 642 branch >> 1, bank, rdwr_str(rdwr), ras, cas, 643 allErrors, error_name[errnum]); 644 645 /* Call the helper to output message */ 646 edac_mc_handle_fbd_ce(mci, rank, channel, msg); 647 648 return; 649 } 650 651 /* Miscellaneous errors */ 652 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); 653 654 branch = extract_fbdchan_indx(info->ferr_nf_fbd); 655 656 i5400_mc_printk(mci, KERN_EMERG, 657 "Non-Fatal misc error (Branch=%d Err=%#lx (%s))", 658 branch >> 1, allErrors, error_name[errnum]); 659 } 660 661 /* 662 * i5400_process_error_info Process the error info that is 663 * in the 'info' structure, previously retrieved from hardware 664 */ 665 static void i5400_process_error_info(struct mem_ctl_info *mci, 666 struct i5400_error_info *info) 667 { u32 allErrors; 668 669 /* First handle any fatal errors that occurred */ 670 allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); 671 i5400_proccess_non_recoverable_info(mci, info, allErrors); 672 673 /* now handle any non-fatal errors that occurred */ 674 i5400_process_nonfatal_error_info(mci, info); 675 } 676 677 /* 678 * i5400_clear_error Retrieve any error from the hardware 679 * but do NOT process that error. 680 * Used for 'clearing' out of previous errors 681 * Called by the Core module. 682 */ 683 static void i5400_clear_error(struct mem_ctl_info *mci) 684 { 685 struct i5400_error_info info; 686 687 i5400_get_error_info(mci, &info); 688 } 689 690 /* 691 * i5400_check_error Retrieve and process errors reported by the 692 * hardware. Called by the Core module. 693 */ 694 static void i5400_check_error(struct mem_ctl_info *mci) 695 { 696 struct i5400_error_info info; 697 debugf4("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__); 698 i5400_get_error_info(mci, &info); 699 i5400_process_error_info(mci, &info); 700 } 701 702 /* 703 * i5400_put_devices 'put' all the devices that we have 704 * reserved via 'get' 705 */ 706 static void i5400_put_devices(struct mem_ctl_info *mci) 707 { 708 struct i5400_pvt *pvt; 709 710 pvt = mci->pvt_info; 711 712 /* Decrement usage count for devices */ 713 pci_dev_put(pvt->branch_1); 714 pci_dev_put(pvt->branch_0); 715 pci_dev_put(pvt->fsb_error_regs); 716 pci_dev_put(pvt->branchmap_werrors); 717 } 718 719 /* 720 * i5400_get_devices Find and perform 'get' operation on the MCH's 721 * device/functions we want to reference for this driver 722 * 723 * Need to 'get' device 16 func 1 and func 2 724 */ 725 static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) 726 { 727 struct i5400_pvt *pvt; 728 struct pci_dev *pdev; 729 730 pvt = mci->pvt_info; 731 pvt->branchmap_werrors = NULL; 732 pvt->fsb_error_regs = NULL; 733 pvt->branch_0 = NULL; 734 pvt->branch_1 = NULL; 735 736 /* Attempt to 'get' the MCH register we want */ 737 pdev = NULL; 738 while (1) { 739 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 740 PCI_DEVICE_ID_INTEL_5400_ERR, pdev); 741 if (!pdev) { 742 /* End of list, leave */ 743 i5400_printk(KERN_ERR, 744 "'system address,Process Bus' " 745 "device not found:" 746 "vendor 0x%x device 0x%x ERR func 1 " 747 "(broken BIOS?)\n", 748 PCI_VENDOR_ID_INTEL, 749 PCI_DEVICE_ID_INTEL_5400_ERR); 750 return -ENODEV; 751 } 752 753 /* Store device 16 func 1 */ 754 if (PCI_FUNC(pdev->devfn) == 1) 755 break; 756 } 757 pvt->branchmap_werrors = pdev; 758 759 pdev = NULL; 760 while (1) { 761 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 762 PCI_DEVICE_ID_INTEL_5400_ERR, pdev); 763 if (!pdev) { 764 /* End of list, leave */ 765 i5400_printk(KERN_ERR, 766 "'system address,Process Bus' " 767 "device not found:" 768 "vendor 0x%x device 0x%x ERR func 2 " 769 "(broken BIOS?)\n", 770 PCI_VENDOR_ID_INTEL, 771 PCI_DEVICE_ID_INTEL_5400_ERR); 772 773 pci_dev_put(pvt->branchmap_werrors); 774 return -ENODEV; 775 } 776 777 /* Store device 16 func 2 */ 778 if (PCI_FUNC(pdev->devfn) == 2) 779 break; 780 } 781 pvt->fsb_error_regs = pdev; 782 783 debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", 784 pci_name(pvt->system_address), 785 pvt->system_address->vendor, pvt->system_address->device); 786 debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", 787 pci_name(pvt->branchmap_werrors), 788 pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); 789 debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", 790 pci_name(pvt->fsb_error_regs), 791 pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); 792 793 pvt->branch_0 = pci_get_device(PCI_VENDOR_ID_INTEL, 794 PCI_DEVICE_ID_INTEL_5400_FBD0, NULL); 795 if (!pvt->branch_0) { 796 i5400_printk(KERN_ERR, 797 "MC: 'BRANCH 0' device not found:" 798 "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", 799 PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0); 800 801 pci_dev_put(pvt->fsb_error_regs); 802 pci_dev_put(pvt->branchmap_werrors); 803 return -ENODEV; 804 } 805 806 /* If this device claims to have more than 2 channels then 807 * fetch Branch 1's information 808 */ 809 if (pvt->maxch < CHANNELS_PER_BRANCH) 810 return 0; 811 812 pvt->branch_1 = pci_get_device(PCI_VENDOR_ID_INTEL, 813 PCI_DEVICE_ID_INTEL_5400_FBD1, NULL); 814 if (!pvt->branch_1) { 815 i5400_printk(KERN_ERR, 816 "MC: 'BRANCH 1' device not found:" 817 "vendor 0x%x device 0x%x Func 0 " 818 "(broken BIOS?)\n", 819 PCI_VENDOR_ID_INTEL, 820 PCI_DEVICE_ID_INTEL_5400_FBD1); 821 822 pci_dev_put(pvt->branch_0); 823 pci_dev_put(pvt->fsb_error_regs); 824 pci_dev_put(pvt->branchmap_werrors); 825 return -ENODEV; 826 } 827 828 return 0; 829 } 830 831 /* 832 * determine_amb_present 833 * 834 * the information is contained in NUM_MTRS_PER_BRANCH different 835 * registers determining which of the NUM_MTRS_PER_BRANCH requires 836 * knowing which channel is in question 837 * 838 * 2 branches, each with 2 channels 839 * b0_ambpresent0 for channel '0' 840 * b0_ambpresent1 for channel '1' 841 * b1_ambpresent0 for channel '2' 842 * b1_ambpresent1 for channel '3' 843 */ 844 static int determine_amb_present_reg(struct i5400_pvt *pvt, int channel) 845 { 846 int amb_present; 847 848 if (channel < CHANNELS_PER_BRANCH) { 849 if (channel & 0x1) 850 amb_present = pvt->b0_ambpresent1; 851 else 852 amb_present = pvt->b0_ambpresent0; 853 } else { 854 if (channel & 0x1) 855 amb_present = pvt->b1_ambpresent1; 856 else 857 amb_present = pvt->b1_ambpresent0; 858 } 859 860 return amb_present; 861 } 862 863 /* 864 * determine_mtr(pvt, csrow, channel) 865 * 866 * return the proper MTR register as determine by the csrow and desired channel 867 */ 868 static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel) 869 { 870 int mtr; 871 int n; 872 873 /* There is one MTR for each slot pair of FB-DIMMs, 874 Each slot pair may be at branch 0 or branch 1. 875 */ 876 n = csrow; 877 878 if (n >= NUM_MTRS_PER_BRANCH) { 879 debugf0("ERROR: trying to access an invalid csrow: %d\n", 880 csrow); 881 return 0; 882 } 883 884 if (channel < CHANNELS_PER_BRANCH) 885 mtr = pvt->b0_mtr[n]; 886 else 887 mtr = pvt->b1_mtr[n]; 888 889 return mtr; 890 } 891 892 /* 893 */ 894 static void decode_mtr(int slot_row, u16 mtr) 895 { 896 int ans; 897 898 ans = MTR_DIMMS_PRESENT(mtr); 899 900 debugf2("\tMTR%d=0x%x: DIMMs are %s\n", slot_row, mtr, 901 ans ? "Present" : "NOT Present"); 902 if (!ans) 903 return; 904 905 debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); 906 907 debugf2("\t\tELECTRICAL THROTTLING is %s\n", 908 MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); 909 910 debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); 911 debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single"); 912 debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); 913 debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); 914 } 915 916 static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel, 917 struct i5400_dimm_info *dinfo) 918 { 919 int mtr; 920 int amb_present_reg; 921 int addrBits; 922 923 mtr = determine_mtr(pvt, csrow, channel); 924 if (MTR_DIMMS_PRESENT(mtr)) { 925 amb_present_reg = determine_amb_present_reg(pvt, channel); 926 927 /* Determine if there is a DIMM present in this DIMM slot */ 928 if (amb_present_reg & (1 << csrow)) { 929 /* Start with the number of bits for a Bank 930 * on the DRAM */ 931 addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr); 932 /* Add thenumber of ROW bits */ 933 addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); 934 /* add the number of COLUMN bits */ 935 addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); 936 /* add the number of RANK bits */ 937 addrBits += MTR_DIMM_RANK(mtr); 938 939 addrBits += 6; /* add 64 bits per DIMM */ 940 addrBits -= 20; /* divide by 2^^20 */ 941 addrBits -= 3; /* 8 bits per bytes */ 942 943 dinfo->megabytes = 1 << addrBits; 944 } 945 } 946 } 947 948 /* 949 * calculate_dimm_size 950 * 951 * also will output a DIMM matrix map, if debug is enabled, for viewing 952 * how the DIMMs are populated 953 */ 954 static void calculate_dimm_size(struct i5400_pvt *pvt) 955 { 956 struct i5400_dimm_info *dinfo; 957 int csrow, max_csrows; 958 char *p, *mem_buffer; 959 int space, n; 960 int channel; 961 962 /* ================= Generate some debug output ================= */ 963 space = PAGE_SIZE; 964 mem_buffer = p = kmalloc(space, GFP_KERNEL); 965 if (p == NULL) { 966 i5400_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n", 967 __FILE__, __func__); 968 return; 969 } 970 971 /* Scan all the actual CSROWS 972 * and calculate the information for each DIMM 973 * Start with the highest csrow first, to display it first 974 * and work toward the 0th csrow 975 */ 976 max_csrows = pvt->maxdimmperch; 977 for (csrow = max_csrows - 1; csrow >= 0; csrow--) { 978 979 /* on an odd csrow, first output a 'boundary' marker, 980 * then reset the message buffer */ 981 if (csrow & 0x1) { 982 n = snprintf(p, space, "---------------------------" 983 "--------------------------------"); 984 p += n; 985 space -= n; 986 debugf2("%s\n", mem_buffer); 987 p = mem_buffer; 988 space = PAGE_SIZE; 989 } 990 n = snprintf(p, space, "csrow %2d ", csrow); 991 p += n; 992 space -= n; 993 994 for (channel = 0; channel < pvt->maxch; channel++) { 995 dinfo = &pvt->dimm_info[csrow][channel]; 996 handle_channel(pvt, csrow, channel, dinfo); 997 n = snprintf(p, space, "%4d MB | ", dinfo->megabytes); 998 p += n; 999 space -= n; 1000 } 1001 debugf2("%s\n", mem_buffer); 1002 p = mem_buffer; 1003 space = PAGE_SIZE; 1004 } 1005 1006 /* Output the last bottom 'boundary' marker */ 1007 n = snprintf(p, space, "---------------------------" 1008 "--------------------------------"); 1009 p += n; 1010 space -= n; 1011 debugf2("%s\n", mem_buffer); 1012 p = mem_buffer; 1013 space = PAGE_SIZE; 1014 1015 /* now output the 'channel' labels */ 1016 n = snprintf(p, space, " "); 1017 p += n; 1018 space -= n; 1019 for (channel = 0; channel < pvt->maxch; channel++) { 1020 n = snprintf(p, space, "channel %d | ", channel); 1021 p += n; 1022 space -= n; 1023 } 1024 1025 /* output the last message and free buffer */ 1026 debugf2("%s\n", mem_buffer); 1027 kfree(mem_buffer); 1028 } 1029 1030 /* 1031 * i5400_get_mc_regs read in the necessary registers and 1032 * cache locally 1033 * 1034 * Fills in the private data members 1035 */ 1036 static void i5400_get_mc_regs(struct mem_ctl_info *mci) 1037 { 1038 struct i5400_pvt *pvt; 1039 u32 actual_tolm; 1040 u16 limit; 1041 int slot_row; 1042 int maxch; 1043 int maxdimmperch; 1044 int way0, way1; 1045 1046 pvt = mci->pvt_info; 1047 1048 pci_read_config_dword(pvt->system_address, AMBASE, 1049 (u32 *) &pvt->ambase); 1050 pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32), 1051 ((u32 *) &pvt->ambase) + sizeof(u32)); 1052 1053 maxdimmperch = pvt->maxdimmperch; 1054 maxch = pvt->maxch; 1055 1056 debugf2("AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", 1057 (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); 1058 1059 /* Get the Branch Map regs */ 1060 pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); 1061 pvt->tolm >>= 12; 1062 debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, 1063 pvt->tolm); 1064 1065 actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28)); 1066 debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n", 1067 actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); 1068 1069 pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0); 1070 pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1); 1071 1072 /* Get the MIR[0-1] regs */ 1073 limit = (pvt->mir0 >> 4) & 0x0fff; 1074 way0 = pvt->mir0 & 0x1; 1075 way1 = pvt->mir0 & 0x2; 1076 debugf2("MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); 1077 limit = (pvt->mir1 >> 4) & 0xfff; 1078 way0 = pvt->mir1 & 0x1; 1079 way1 = pvt->mir1 & 0x2; 1080 debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); 1081 1082 /* Get the set of MTR[0-3] regs by each branch */ 1083 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) { 1084 int where = MTR0 + (slot_row * sizeof(u16)); 1085 1086 /* Branch 0 set of MTR registers */ 1087 pci_read_config_word(pvt->branch_0, where, 1088 &pvt->b0_mtr[slot_row]); 1089 1090 debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where, 1091 pvt->b0_mtr[slot_row]); 1092 1093 if (pvt->maxch < CHANNELS_PER_BRANCH) { 1094 pvt->b1_mtr[slot_row] = 0; 1095 continue; 1096 } 1097 1098 /* Branch 1 set of MTR registers */ 1099 pci_read_config_word(pvt->branch_1, where, 1100 &pvt->b1_mtr[slot_row]); 1101 debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row, where, 1102 pvt->b1_mtr[slot_row]); 1103 } 1104 1105 /* Read and dump branch 0's MTRs */ 1106 debugf2("\nMemory Technology Registers:\n"); 1107 debugf2(" Branch 0:\n"); 1108 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) 1109 decode_mtr(slot_row, pvt->b0_mtr[slot_row]); 1110 1111 pci_read_config_word(pvt->branch_0, AMBPRESENT_0, 1112 &pvt->b0_ambpresent0); 1113 debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0); 1114 pci_read_config_word(pvt->branch_0, AMBPRESENT_1, 1115 &pvt->b0_ambpresent1); 1116 debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); 1117 1118 /* Only if we have 2 branchs (4 channels) */ 1119 if (pvt->maxch < CHANNELS_PER_BRANCH) { 1120 pvt->b1_ambpresent0 = 0; 1121 pvt->b1_ambpresent1 = 0; 1122 } else { 1123 /* Read and dump branch 1's MTRs */ 1124 debugf2(" Branch 1:\n"); 1125 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) 1126 decode_mtr(slot_row, pvt->b1_mtr[slot_row]); 1127 1128 pci_read_config_word(pvt->branch_1, AMBPRESENT_0, 1129 &pvt->b1_ambpresent0); 1130 debugf2("\t\tAMB-Branch 1-present0 0x%x:\n", 1131 pvt->b1_ambpresent0); 1132 pci_read_config_word(pvt->branch_1, AMBPRESENT_1, 1133 &pvt->b1_ambpresent1); 1134 debugf2("\t\tAMB-Branch 1-present1 0x%x:\n", 1135 pvt->b1_ambpresent1); 1136 } 1137 1138 /* Go and determine the size of each DIMM and place in an 1139 * orderly matrix */ 1140 calculate_dimm_size(pvt); 1141 } 1142 1143 /* 1144 * i5400_init_csrows Initialize the 'csrows' table within 1145 * the mci control structure with the 1146 * addressing of memory. 1147 * 1148 * return: 1149 * 0 success 1150 * 1 no actual memory found on this MC 1151 */ 1152 static int i5400_init_csrows(struct mem_ctl_info *mci) 1153 { 1154 struct i5400_pvt *pvt; 1155 struct csrow_info *p_csrow; 1156 int empty, channel_count; 1157 int max_csrows; 1158 int mtr; 1159 int csrow_megs; 1160 int channel; 1161 int csrow; 1162 1163 pvt = mci->pvt_info; 1164 1165 channel_count = pvt->maxch; 1166 max_csrows = pvt->maxdimmperch; 1167 1168 empty = 1; /* Assume NO memory */ 1169 1170 for (csrow = 0; csrow < max_csrows; csrow++) { 1171 p_csrow = &mci->csrows[csrow]; 1172 1173 p_csrow->csrow_idx = csrow; 1174 1175 /* use branch 0 for the basis */ 1176 mtr = determine_mtr(pvt, csrow, 0); 1177 1178 /* if no DIMMS on this row, continue */ 1179 if (!MTR_DIMMS_PRESENT(mtr)) 1180 continue; 1181 1182 /* FAKE OUT VALUES, FIXME */ 1183 p_csrow->first_page = 0 + csrow * 20; 1184 p_csrow->last_page = 9 + csrow * 20; 1185 p_csrow->page_mask = 0xFFF; 1186 1187 p_csrow->grain = 8; 1188 1189 csrow_megs = 0; 1190 for (channel = 0; channel < pvt->maxch; channel++) 1191 csrow_megs += pvt->dimm_info[csrow][channel].megabytes; 1192 1193 p_csrow->nr_pages = csrow_megs << 8; 1194 1195 /* Assume DDR2 for now */ 1196 p_csrow->mtype = MEM_FB_DDR2; 1197 1198 /* ask what device type on this row */ 1199 if (MTR_DRAM_WIDTH(mtr)) 1200 p_csrow->dtype = DEV_X8; 1201 else 1202 p_csrow->dtype = DEV_X4; 1203 1204 p_csrow->edac_mode = EDAC_S8ECD8ED; 1205 1206 empty = 0; 1207 } 1208 1209 return empty; 1210 } 1211 1212 /* 1213 * i5400_enable_error_reporting 1214 * Turn on the memory reporting features of the hardware 1215 */ 1216 static void i5400_enable_error_reporting(struct mem_ctl_info *mci) 1217 { 1218 struct i5400_pvt *pvt; 1219 u32 fbd_error_mask; 1220 1221 pvt = mci->pvt_info; 1222 1223 /* Read the FBD Error Mask Register */ 1224 pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD, 1225 &fbd_error_mask); 1226 1227 /* Enable with a '0' */ 1228 fbd_error_mask &= ~(ENABLE_EMASK_ALL); 1229 1230 pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD, 1231 fbd_error_mask); 1232 } 1233 1234 /* 1235 * i5400_probe1 Probe for ONE instance of device to see if it is 1236 * present. 1237 * return: 1238 * 0 for FOUND a device 1239 * < 0 for error code 1240 */ 1241 static int i5400_probe1(struct pci_dev *pdev, int dev_idx) 1242 { 1243 struct mem_ctl_info *mci; 1244 struct i5400_pvt *pvt; 1245 int num_channels; 1246 int num_dimms_per_channel; 1247 int num_csrows; 1248 1249 if (dev_idx >= ARRAY_SIZE(i5400_devs)) 1250 return -EINVAL; 1251 1252 debugf0("MC: %s: %s(), pdev bus %u dev=0x%x fn=0x%x\n", 1253 __FILE__, __func__, 1254 pdev->bus->number, 1255 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 1256 1257 /* We only are looking for func 0 of the set */ 1258 if (PCI_FUNC(pdev->devfn) != 0) 1259 return -ENODEV; 1260 1261 /* As we don't have a motherboard identification routine to determine 1262 * actual number of slots/dimms per channel, we thus utilize the 1263 * resource as specified by the chipset. Thus, we might have 1264 * have more DIMMs per channel than actually on the mobo, but this 1265 * allows the driver to support up to the chipset max, without 1266 * some fancy mobo determination. 1267 */ 1268 num_dimms_per_channel = MAX_DIMMS_PER_CHANNEL; 1269 num_channels = MAX_CHANNELS; 1270 num_csrows = num_dimms_per_channel; 1271 1272 debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", 1273 __func__, num_channels, num_dimms_per_channel, num_csrows); 1274 1275 /* allocate a new MC control structure */ 1276 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); 1277 1278 if (mci == NULL) 1279 return -ENOMEM; 1280 1281 debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci); 1282 1283 mci->dev = &pdev->dev; /* record ptr to the generic device */ 1284 1285 pvt = mci->pvt_info; 1286 pvt->system_address = pdev; /* Record this device in our private */ 1287 pvt->maxch = num_channels; 1288 pvt->maxdimmperch = num_dimms_per_channel; 1289 1290 /* 'get' the pci devices we want to reserve for our use */ 1291 if (i5400_get_devices(mci, dev_idx)) 1292 goto fail0; 1293 1294 /* Time to get serious */ 1295 i5400_get_mc_regs(mci); /* retrieve the hardware registers */ 1296 1297 mci->mc_idx = 0; 1298 mci->mtype_cap = MEM_FLAG_FB_DDR2; 1299 mci->edac_ctl_cap = EDAC_FLAG_NONE; 1300 mci->edac_cap = EDAC_FLAG_NONE; 1301 mci->mod_name = "i5400_edac.c"; 1302 mci->mod_ver = I5400_REVISION; 1303 mci->ctl_name = i5400_devs[dev_idx].ctl_name; 1304 mci->dev_name = pci_name(pdev); 1305 mci->ctl_page_to_phys = NULL; 1306 1307 /* Set the function pointer to an actual operation function */ 1308 mci->edac_check = i5400_check_error; 1309 1310 /* initialize the MC control structure 'csrows' table 1311 * with the mapping and control information */ 1312 if (i5400_init_csrows(mci)) { 1313 debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" 1314 " because i5400_init_csrows() returned nonzero " 1315 "value\n"); 1316 mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ 1317 } else { 1318 debugf1("MC: Enable error reporting now\n"); 1319 i5400_enable_error_reporting(mci); 1320 } 1321 1322 /* add this new MC control structure to EDAC's list of MCs */ 1323 if (edac_mc_add_mc(mci)) { 1324 debugf0("MC: %s: %s(): failed edac_mc_add_mc()\n", 1325 __FILE__, __func__); 1326 /* FIXME: perhaps some code should go here that disables error 1327 * reporting if we just enabled it 1328 */ 1329 goto fail1; 1330 } 1331 1332 i5400_clear_error(mci); 1333 1334 /* allocating generic PCI control info */ 1335 i5400_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); 1336 if (!i5400_pci) { 1337 printk(KERN_WARNING 1338 "%s(): Unable to create PCI control\n", 1339 __func__); 1340 printk(KERN_WARNING 1341 "%s(): PCI error report via EDAC not setup\n", 1342 __func__); 1343 } 1344 1345 return 0; 1346 1347 /* Error exit unwinding stack */ 1348 fail1: 1349 1350 i5400_put_devices(mci); 1351 1352 fail0: 1353 edac_mc_free(mci); 1354 return -ENODEV; 1355 } 1356 1357 /* 1358 * i5400_init_one constructor for one instance of device 1359 * 1360 * returns: 1361 * negative on error 1362 * count (>= 0) 1363 */ 1364 static int __devinit i5400_init_one(struct pci_dev *pdev, 1365 const struct pci_device_id *id) 1366 { 1367 int rc; 1368 1369 debugf0("MC: %s: %s()\n", __FILE__, __func__); 1370 1371 /* wake up device */ 1372 rc = pci_enable_device(pdev); 1373 if (rc) 1374 return rc; 1375 1376 /* now probe and enable the device */ 1377 return i5400_probe1(pdev, id->driver_data); 1378 } 1379 1380 /* 1381 * i5400_remove_one destructor for one instance of device 1382 * 1383 */ 1384 static void __devexit i5400_remove_one(struct pci_dev *pdev) 1385 { 1386 struct mem_ctl_info *mci; 1387 1388 debugf0("%s: %s()\n", __FILE__, __func__); 1389 1390 if (i5400_pci) 1391 edac_pci_release_generic_ctl(i5400_pci); 1392 1393 mci = edac_mc_del_mc(&pdev->dev); 1394 if (!mci) 1395 return; 1396 1397 /* retrieve references to resources, and free those resources */ 1398 i5400_put_devices(mci); 1399 1400 edac_mc_free(mci); 1401 } 1402 1403 /* 1404 * pci_device_id table for which devices we are looking for 1405 * 1406 * The "E500P" device is the first device supported. 1407 */ 1408 static DEFINE_PCI_DEVICE_TABLE(i5400_pci_tbl) = { 1409 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)}, 1410 {0,} /* 0 terminated list. */ 1411 }; 1412 1413 MODULE_DEVICE_TABLE(pci, i5400_pci_tbl); 1414 1415 /* 1416 * i5400_driver pci_driver structure for this module 1417 * 1418 */ 1419 static struct pci_driver i5400_driver = { 1420 .name = "i5400_edac", 1421 .probe = i5400_init_one, 1422 .remove = __devexit_p(i5400_remove_one), 1423 .id_table = i5400_pci_tbl, 1424 }; 1425 1426 /* 1427 * i5400_init Module entry function 1428 * Try to initialize this module for its devices 1429 */ 1430 static int __init i5400_init(void) 1431 { 1432 int pci_rc; 1433 1434 debugf2("MC: %s: %s()\n", __FILE__, __func__); 1435 1436 /* Ensure that the OPSTATE is set correctly for POLL or NMI */ 1437 opstate_init(); 1438 1439 pci_rc = pci_register_driver(&i5400_driver); 1440 1441 return (pci_rc < 0) ? pci_rc : 0; 1442 } 1443 1444 /* 1445 * i5400_exit() Module exit function 1446 * Unregister the driver 1447 */ 1448 static void __exit i5400_exit(void) 1449 { 1450 debugf2("MC: %s: %s()\n", __FILE__, __func__); 1451 pci_unregister_driver(&i5400_driver); 1452 } 1453 1454 module_init(i5400_init); 1455 module_exit(i5400_exit); 1456 1457 MODULE_LICENSE("GPL"); 1458 MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>"); 1459 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); 1460 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); 1461 MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - " 1462 I5400_REVISION); 1463 1464 module_param(edac_op_state, int, 0444); 1465 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); 1466