1 /* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module 2 * 3 * This driver supports the memory controllers found on the Intel 4 * processor family Sandy Bridge. 5 * 6 * This file may be distributed under the terms of the 7 * GNU General Public License version 2 only. 8 * 9 * Copyright (c) 2011 by: 10 * Mauro Carvalho Chehab 11 */ 12 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/pci.h> 16 #include <linux/pci_ids.h> 17 #include <linux/slab.h> 18 #include <linux/delay.h> 19 #include <linux/edac.h> 20 #include <linux/mmzone.h> 21 #include <linux/smp.h> 22 #include <linux/bitmap.h> 23 #include <linux/math64.h> 24 #include <asm/processor.h> 25 #include <asm/mce.h> 26 27 #include "edac_core.h" 28 29 /* Static vars */ 30 static LIST_HEAD(sbridge_edac_list); 31 static DEFINE_MUTEX(sbridge_edac_lock); 32 static int probed; 33 34 /* 35 * Alter this version for the module when modifications are made 36 */ 37 #define SBRIDGE_REVISION " Ver: 1.1.0 " 38 #define EDAC_MOD_STR "sbridge_edac" 39 40 /* 41 * Debug macros 42 */ 43 #define sbridge_printk(level, fmt, arg...) \ 44 edac_printk(level, "sbridge", fmt, ##arg) 45 46 #define sbridge_mc_printk(mci, level, fmt, arg...) \ 47 edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg) 48 49 /* 50 * Get a bit field at register value <v>, from bit <lo> to bit <hi> 51 */ 52 #define GET_BITFIELD(v, lo, hi) \ 53 (((v) & GENMASK_ULL(hi, lo)) >> (lo)) 54 55 /* 56 * sbridge Memory Controller Registers 57 */ 58 59 /* 60 * FIXME: For now, let's order by device function, as it makes 61 * easier for driver's development process. This table should be 62 * moved to pci_id.h when submitted upstream 63 */ 64 #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */ 65 #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */ 66 #define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */ 67 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0 0x3ca0 /* 14.0 */ 68 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */ 69 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */ 70 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0 0x3caa /* 15.2 */ 71 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */ 72 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */ 73 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */ 74 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */ 75 76 /* 77 * Currently, unused, but will be needed in the future 78 * implementations, as they hold the error counters 79 */ 80 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */ 81 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */ 82 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2 0x3c76 /* 16.6 */ 83 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */ 84 85 /* Devices 12 Function 6, Offsets 0x80 to 0xcc */ 86 static const u32 sbridge_dram_rule[] = { 87 0x80, 0x88, 0x90, 0x98, 0xa0, 88 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, 89 }; 90 91 static const u32 ibridge_dram_rule[] = { 92 0x60, 0x68, 0x70, 0x78, 0x80, 93 0x88, 0x90, 0x98, 0xa0, 0xa8, 94 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 95 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, 96 }; 97 98 #define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff) 99 #define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3) 100 #define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1) 101 #define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0) 102 103 static char *get_dram_attr(u32 reg) 104 { 105 switch(DRAM_ATTR(reg)) { 106 case 0: 107 return "DRAM"; 108 case 1: 109 return "MMCFG"; 110 case 2: 111 return "NXM"; 112 default: 113 return "unknown"; 114 } 115 } 116 117 static const u32 sbridge_interleave_list[] = { 118 0x84, 0x8c, 0x94, 0x9c, 0xa4, 119 0xac, 0xb4, 0xbc, 0xc4, 0xcc, 120 }; 121 122 static const u32 ibridge_interleave_list[] = { 123 0x64, 0x6c, 0x74, 0x7c, 0x84, 124 0x8c, 0x94, 0x9c, 0xa4, 0xac, 125 0xb4, 0xbc, 0xc4, 0xcc, 0xd4, 126 0xdc, 0xe4, 0xec, 0xf4, 0xfc, 127 }; 128 129 struct interleave_pkg { 130 unsigned char start; 131 unsigned char end; 132 }; 133 134 static const struct interleave_pkg sbridge_interleave_pkg[] = { 135 { 0, 2 }, 136 { 3, 5 }, 137 { 8, 10 }, 138 { 11, 13 }, 139 { 16, 18 }, 140 { 19, 21 }, 141 { 24, 26 }, 142 { 27, 29 }, 143 }; 144 145 static const struct interleave_pkg ibridge_interleave_pkg[] = { 146 { 0, 3 }, 147 { 4, 7 }, 148 { 8, 11 }, 149 { 12, 15 }, 150 { 16, 19 }, 151 { 20, 23 }, 152 { 24, 27 }, 153 { 28, 31 }, 154 }; 155 156 static inline int sad_pkg(const struct interleave_pkg *table, u32 reg, 157 int interleave) 158 { 159 return GET_BITFIELD(reg, table[interleave].start, 160 table[interleave].end); 161 } 162 163 /* Devices 12 Function 7 */ 164 165 #define TOLM 0x80 166 #define TOHM 0x84 167 168 #define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff) 169 #define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff) 170 171 /* Device 13 Function 6 */ 172 173 #define SAD_TARGET 0xf0 174 175 #define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11) 176 177 #define SAD_CONTROL 0xf4 178 179 #define NODE_ID(reg) GET_BITFIELD(reg, 0, 2) 180 181 /* Device 14 function 0 */ 182 183 static const u32 tad_dram_rule[] = { 184 0x40, 0x44, 0x48, 0x4c, 185 0x50, 0x54, 0x58, 0x5c, 186 0x60, 0x64, 0x68, 0x6c, 187 }; 188 #define MAX_TAD ARRAY_SIZE(tad_dram_rule) 189 190 #define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff) 191 #define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11) 192 #define TAD_CH(reg) GET_BITFIELD(reg, 8, 9) 193 #define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7) 194 #define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5) 195 #define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3) 196 #define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1) 197 198 /* Device 15, function 0 */ 199 200 #define MCMTR 0x7c 201 202 #define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2) 203 #define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1) 204 #define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0) 205 206 /* Device 15, function 1 */ 207 208 #define RASENABLES 0xac 209 #define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0) 210 211 /* Device 15, functions 2-5 */ 212 213 static const int mtr_regs[] = { 214 0x80, 0x84, 0x88, 215 }; 216 217 #define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19) 218 #define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14) 219 #define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13) 220 #define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4) 221 #define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1) 222 223 static const u32 tad_ch_nilv_offset[] = { 224 0x90, 0x94, 0x98, 0x9c, 225 0xa0, 0xa4, 0xa8, 0xac, 226 0xb0, 0xb4, 0xb8, 0xbc, 227 }; 228 #define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29) 229 #define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26) 230 231 static const u32 rir_way_limit[] = { 232 0x108, 0x10c, 0x110, 0x114, 0x118, 233 }; 234 #define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit) 235 236 #define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31) 237 #define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29) 238 #define RIR_LIMIT(reg) ((GET_BITFIELD(reg, 1, 10) << 29)| 0x1fffffff) 239 240 #define MAX_RIR_WAY 8 241 242 static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = { 243 { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c }, 244 { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c }, 245 { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c }, 246 { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c }, 247 { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc }, 248 }; 249 250 #define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19) 251 #define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14) 252 253 /* Device 16, functions 2-7 */ 254 255 /* 256 * FIXME: Implement the error count reads directly 257 */ 258 259 static const u32 correrrcnt[] = { 260 0x104, 0x108, 0x10c, 0x110, 261 }; 262 263 #define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31) 264 #define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30) 265 #define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15) 266 #define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14) 267 268 static const u32 correrrthrsld[] = { 269 0x11c, 0x120, 0x124, 0x128, 270 }; 271 272 #define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30) 273 #define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14) 274 275 276 /* Device 17, function 0 */ 277 278 #define SB_RANK_CFG_A 0x0328 279 280 #define IB_RANK_CFG_A 0x0320 281 282 #define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11) 283 284 /* 285 * sbridge structs 286 */ 287 288 #define NUM_CHANNELS 4 289 #define MAX_DIMMS 3 /* Max DIMMS per channel */ 290 291 enum type { 292 SANDY_BRIDGE, 293 IVY_BRIDGE, 294 }; 295 296 struct sbridge_pvt; 297 struct sbridge_info { 298 enum type type; 299 u32 mcmtr; 300 u32 rankcfgr; 301 u64 (*get_tolm)(struct sbridge_pvt *pvt); 302 u64 (*get_tohm)(struct sbridge_pvt *pvt); 303 const u32 *dram_rule; 304 const u32 *interleave_list; 305 const struct interleave_pkg *interleave_pkg; 306 u8 max_sad; 307 u8 max_interleave; 308 }; 309 310 struct sbridge_channel { 311 u32 ranks; 312 u32 dimms; 313 }; 314 315 struct pci_id_descr { 316 int dev; 317 int func; 318 int dev_id; 319 int optional; 320 }; 321 322 struct pci_id_table { 323 const struct pci_id_descr *descr; 324 int n_devs; 325 }; 326 327 struct sbridge_dev { 328 struct list_head list; 329 u8 bus, mc; 330 u8 node_id, source_id; 331 struct pci_dev **pdev; 332 int n_devs; 333 struct mem_ctl_info *mci; 334 }; 335 336 struct sbridge_pvt { 337 struct pci_dev *pci_ta, *pci_ddrio, *pci_ras; 338 struct pci_dev *pci_sad0, *pci_sad1; 339 struct pci_dev *pci_ha0, *pci_ha1; 340 struct pci_dev *pci_br0, *pci_br1; 341 struct pci_dev *pci_tad[NUM_CHANNELS]; 342 343 struct sbridge_dev *sbridge_dev; 344 345 struct sbridge_info info; 346 struct sbridge_channel channel[NUM_CHANNELS]; 347 348 /* Memory type detection */ 349 bool is_mirrored, is_lockstep, is_close_pg; 350 351 /* Fifo double buffers */ 352 struct mce mce_entry[MCE_LOG_LEN]; 353 struct mce mce_outentry[MCE_LOG_LEN]; 354 355 /* Fifo in/out counters */ 356 unsigned mce_in, mce_out; 357 358 /* Count indicator to show errors not got */ 359 unsigned mce_overrun; 360 361 /* Memory description */ 362 u64 tolm, tohm; 363 }; 364 365 #define PCI_DESCR(device, function, device_id, opt) \ 366 .dev = (device), \ 367 .func = (function), \ 368 .dev_id = (device_id), \ 369 .optional = opt 370 371 static const struct pci_id_descr pci_dev_descr_sbridge[] = { 372 /* Processor Home Agent */ 373 { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) }, 374 375 /* Memory controller */ 376 { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) }, 377 { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) }, 378 { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) }, 379 { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) }, 380 { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) }, 381 { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) }, 382 { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) }, 383 384 /* System Address Decoder */ 385 { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) }, 386 { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) }, 387 388 /* Broadcast Registers */ 389 { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) }, 390 }; 391 392 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } 393 static const struct pci_id_table pci_dev_descr_sbridge_table[] = { 394 PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge), 395 {0,} /* 0 terminated list. */ 396 }; 397 398 /* This changes depending if 1HA or 2HA: 399 * 1HA: 400 * 0x0eb8 (17.0) is DDRIO0 401 * 2HA: 402 * 0x0ebc (17.4) is DDRIO0 403 */ 404 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0 0x0eb8 405 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0 0x0ebc 406 407 /* pci ids */ 408 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0 0x0ea0 409 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA 0x0ea8 410 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS 0x0e71 411 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0 0x0eaa 412 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1 0x0eab 413 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2 0x0eac 414 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3 0x0ead 415 #define PCI_DEVICE_ID_INTEL_IBRIDGE_SAD 0x0ec8 416 #define PCI_DEVICE_ID_INTEL_IBRIDGE_BR0 0x0ec9 417 #define PCI_DEVICE_ID_INTEL_IBRIDGE_BR1 0x0eca 418 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1 0x0e60 419 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA 0x0e68 420 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS 0x0e79 421 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 0x0e6a 422 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1 0x0e6b 423 424 static const struct pci_id_descr pci_dev_descr_ibridge[] = { 425 /* Processor Home Agent */ 426 { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0) }, 427 428 /* Memory controller */ 429 { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0) }, 430 { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0) }, 431 { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0) }, 432 { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0) }, 433 { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0) }, 434 { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0) }, 435 436 /* System Address Decoder */ 437 { PCI_DESCR(22, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0) }, 438 439 /* Broadcast Registers */ 440 { PCI_DESCR(22, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1) }, 441 { PCI_DESCR(22, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0) }, 442 443 /* Optional, mode 2HA */ 444 { PCI_DESCR(28, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1) }, 445 #if 0 446 { PCI_DESCR(29, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1) }, 447 { PCI_DESCR(29, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1) }, 448 #endif 449 { PCI_DESCR(29, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1) }, 450 { PCI_DESCR(29, 3, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1) }, 451 452 { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1) }, 453 { PCI_DESCR(17, 4, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1) }, 454 }; 455 456 static const struct pci_id_table pci_dev_descr_ibridge_table[] = { 457 PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge), 458 {0,} /* 0 terminated list. */ 459 }; 460 461 /* 462 * pci_device_id table for which devices we are looking for 463 */ 464 static const struct pci_device_id sbridge_pci_tbl[] = { 465 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)}, 466 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)}, 467 {0,} /* 0 terminated list. */ 468 }; 469 470 471 /**************************************************************************** 472 Ancillary status routines 473 ****************************************************************************/ 474 475 static inline int numrank(u32 mtr) 476 { 477 int ranks = (1 << RANK_CNT_BITS(mtr)); 478 479 if (ranks > 4) { 480 edac_dbg(0, "Invalid number of ranks: %d (max = 4) raw value = %x (%04x)\n", 481 ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr); 482 return -EINVAL; 483 } 484 485 return ranks; 486 } 487 488 static inline int numrow(u32 mtr) 489 { 490 int rows = (RANK_WIDTH_BITS(mtr) + 12); 491 492 if (rows < 13 || rows > 18) { 493 edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n", 494 rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr); 495 return -EINVAL; 496 } 497 498 return 1 << rows; 499 } 500 501 static inline int numcol(u32 mtr) 502 { 503 int cols = (COL_WIDTH_BITS(mtr) + 10); 504 505 if (cols > 12) { 506 edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n", 507 cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr); 508 return -EINVAL; 509 } 510 511 return 1 << cols; 512 } 513 514 static struct sbridge_dev *get_sbridge_dev(u8 bus) 515 { 516 struct sbridge_dev *sbridge_dev; 517 518 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { 519 if (sbridge_dev->bus == bus) 520 return sbridge_dev; 521 } 522 523 return NULL; 524 } 525 526 static struct sbridge_dev *alloc_sbridge_dev(u8 bus, 527 const struct pci_id_table *table) 528 { 529 struct sbridge_dev *sbridge_dev; 530 531 sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL); 532 if (!sbridge_dev) 533 return NULL; 534 535 sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs, 536 GFP_KERNEL); 537 if (!sbridge_dev->pdev) { 538 kfree(sbridge_dev); 539 return NULL; 540 } 541 542 sbridge_dev->bus = bus; 543 sbridge_dev->n_devs = table->n_devs; 544 list_add_tail(&sbridge_dev->list, &sbridge_edac_list); 545 546 return sbridge_dev; 547 } 548 549 static void free_sbridge_dev(struct sbridge_dev *sbridge_dev) 550 { 551 list_del(&sbridge_dev->list); 552 kfree(sbridge_dev->pdev); 553 kfree(sbridge_dev); 554 } 555 556 static u64 sbridge_get_tolm(struct sbridge_pvt *pvt) 557 { 558 u32 reg; 559 560 /* Address range is 32:28 */ 561 pci_read_config_dword(pvt->pci_sad1, TOLM, ®); 562 return GET_TOLM(reg); 563 } 564 565 static u64 sbridge_get_tohm(struct sbridge_pvt *pvt) 566 { 567 u32 reg; 568 569 pci_read_config_dword(pvt->pci_sad1, TOHM, ®); 570 return GET_TOHM(reg); 571 } 572 573 static u64 ibridge_get_tolm(struct sbridge_pvt *pvt) 574 { 575 u32 reg; 576 577 pci_read_config_dword(pvt->pci_br1, TOLM, ®); 578 579 return GET_TOLM(reg); 580 } 581 582 static u64 ibridge_get_tohm(struct sbridge_pvt *pvt) 583 { 584 u32 reg; 585 586 pci_read_config_dword(pvt->pci_br1, TOHM, ®); 587 588 return GET_TOHM(reg); 589 } 590 591 static inline u8 sad_pkg_socket(u8 pkg) 592 { 593 /* on Ivy Bridge, nodeID is SASS, where A is HA and S is node id */ 594 return (pkg >> 3) | (pkg & 0x3); 595 } 596 597 static inline u8 sad_pkg_ha(u8 pkg) 598 { 599 return (pkg >> 2) & 0x1; 600 } 601 602 /**************************************************************************** 603 Memory check routines 604 ****************************************************************************/ 605 static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot, 606 unsigned func) 607 { 608 struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus); 609 int i; 610 611 if (!sbridge_dev) 612 return NULL; 613 614 for (i = 0; i < sbridge_dev->n_devs; i++) { 615 if (!sbridge_dev->pdev[i]) 616 continue; 617 618 if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot && 619 PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) { 620 edac_dbg(1, "Associated %02x.%02x.%d with %p\n", 621 bus, slot, func, sbridge_dev->pdev[i]); 622 return sbridge_dev->pdev[i]; 623 } 624 } 625 626 return NULL; 627 } 628 629 /** 630 * check_if_ecc_is_active() - Checks if ECC is active 631 * bus: Device bus 632 */ 633 static int check_if_ecc_is_active(const u8 bus) 634 { 635 struct pci_dev *pdev = NULL; 636 u32 mcmtr; 637 638 pdev = get_pdev_slot_func(bus, 15, 0); 639 if (!pdev) { 640 sbridge_printk(KERN_ERR, "Couldn't find PCI device " 641 "%2x.%02d.%d!!!\n", 642 bus, 15, 0); 643 return -ENODEV; 644 } 645 646 pci_read_config_dword(pdev, MCMTR, &mcmtr); 647 if (!IS_ECC_ENABLED(mcmtr)) { 648 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); 649 return -ENODEV; 650 } 651 return 0; 652 } 653 654 static int get_dimm_config(struct mem_ctl_info *mci) 655 { 656 struct sbridge_pvt *pvt = mci->pvt_info; 657 struct dimm_info *dimm; 658 unsigned i, j, banks, ranks, rows, cols, npages; 659 u64 size; 660 u32 reg; 661 enum edac_type mode; 662 enum mem_type mtype; 663 664 pci_read_config_dword(pvt->pci_br0, SAD_TARGET, ®); 665 pvt->sbridge_dev->source_id = SOURCE_ID(reg); 666 667 pci_read_config_dword(pvt->pci_br0, SAD_CONTROL, ®); 668 pvt->sbridge_dev->node_id = NODE_ID(reg); 669 edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n", 670 pvt->sbridge_dev->mc, 671 pvt->sbridge_dev->node_id, 672 pvt->sbridge_dev->source_id); 673 674 pci_read_config_dword(pvt->pci_ras, RASENABLES, ®); 675 if (IS_MIRROR_ENABLED(reg)) { 676 edac_dbg(0, "Memory mirror is enabled\n"); 677 pvt->is_mirrored = true; 678 } else { 679 edac_dbg(0, "Memory mirror is disabled\n"); 680 pvt->is_mirrored = false; 681 } 682 683 pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr); 684 if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) { 685 edac_dbg(0, "Lockstep is enabled\n"); 686 mode = EDAC_S8ECD8ED; 687 pvt->is_lockstep = true; 688 } else { 689 edac_dbg(0, "Lockstep is disabled\n"); 690 mode = EDAC_S4ECD4ED; 691 pvt->is_lockstep = false; 692 } 693 if (IS_CLOSE_PG(pvt->info.mcmtr)) { 694 edac_dbg(0, "address map is on closed page mode\n"); 695 pvt->is_close_pg = true; 696 } else { 697 edac_dbg(0, "address map is on open page mode\n"); 698 pvt->is_close_pg = false; 699 } 700 701 if (pvt->pci_ddrio) { 702 pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr, 703 ®); 704 if (IS_RDIMM_ENABLED(reg)) { 705 /* FIXME: Can also be LRDIMM */ 706 edac_dbg(0, "Memory is registered\n"); 707 mtype = MEM_RDDR3; 708 } else { 709 edac_dbg(0, "Memory is unregistered\n"); 710 mtype = MEM_DDR3; 711 } 712 } else { 713 edac_dbg(0, "Cannot determine memory type\n"); 714 mtype = MEM_UNKNOWN; 715 } 716 717 /* On all supported DDR3 DIMM types, there are 8 banks available */ 718 banks = 8; 719 720 for (i = 0; i < NUM_CHANNELS; i++) { 721 u32 mtr; 722 723 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { 724 dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, 725 i, j, 0); 726 pci_read_config_dword(pvt->pci_tad[i], 727 mtr_regs[j], &mtr); 728 edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr); 729 if (IS_DIMM_PRESENT(mtr)) { 730 pvt->channel[i].dimms++; 731 732 ranks = numrank(mtr); 733 rows = numrow(mtr); 734 cols = numcol(mtr); 735 736 /* DDR3 has 8 I/O banks */ 737 size = ((u64)rows * cols * banks * ranks) >> (20 - 3); 738 npages = MiB_TO_PAGES(size); 739 740 edac_dbg(0, "mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", 741 pvt->sbridge_dev->mc, i, j, 742 size, npages, 743 banks, ranks, rows, cols); 744 745 dimm->nr_pages = npages; 746 dimm->grain = 32; 747 dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4; 748 dimm->mtype = mtype; 749 dimm->edac_mode = mode; 750 snprintf(dimm->label, sizeof(dimm->label), 751 "CPU_SrcID#%u_Channel#%u_DIMM#%u", 752 pvt->sbridge_dev->source_id, i, j); 753 } 754 } 755 } 756 757 return 0; 758 } 759 760 static void get_memory_layout(const struct mem_ctl_info *mci) 761 { 762 struct sbridge_pvt *pvt = mci->pvt_info; 763 int i, j, k, n_sads, n_tads, sad_interl; 764 u32 reg; 765 u64 limit, prv = 0; 766 u64 tmp_mb; 767 u32 mb, kb; 768 u32 rir_way; 769 770 /* 771 * Step 1) Get TOLM/TOHM ranges 772 */ 773 774 pvt->tolm = pvt->info.get_tolm(pvt); 775 tmp_mb = (1 + pvt->tolm) >> 20; 776 777 mb = div_u64_rem(tmp_mb, 1000, &kb); 778 edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm); 779 780 /* Address range is already 45:25 */ 781 pvt->tohm = pvt->info.get_tohm(pvt); 782 tmp_mb = (1 + pvt->tohm) >> 20; 783 784 mb = div_u64_rem(tmp_mb, 1000, &kb); 785 edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); 786 787 /* 788 * Step 2) Get SAD range and SAD Interleave list 789 * TAD registers contain the interleave wayness. However, it 790 * seems simpler to just discover it indirectly, with the 791 * algorithm bellow. 792 */ 793 prv = 0; 794 for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) { 795 /* SAD_LIMIT Address range is 45:26 */ 796 pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], 797 ®); 798 limit = SAD_LIMIT(reg); 799 800 if (!DRAM_RULE_ENABLE(reg)) 801 continue; 802 803 if (limit <= prv) 804 break; 805 806 tmp_mb = (limit + 1) >> 20; 807 mb = div_u64_rem(tmp_mb, 1000, &kb); 808 edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n", 809 n_sads, 810 get_dram_attr(reg), 811 mb, kb, 812 ((u64)tmp_mb) << 20L, 813 INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]", 814 reg); 815 prv = limit; 816 817 pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], 818 ®); 819 sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0); 820 for (j = 0; j < 8; j++) { 821 u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, j); 822 if (j > 0 && sad_interl == pkg) 823 break; 824 825 edac_dbg(0, "SAD#%d, interleave #%d: %d\n", 826 n_sads, j, pkg); 827 } 828 } 829 830 /* 831 * Step 3) Get TAD range 832 */ 833 prv = 0; 834 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { 835 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads], 836 ®); 837 limit = TAD_LIMIT(reg); 838 if (limit <= prv) 839 break; 840 tmp_mb = (limit + 1) >> 20; 841 842 mb = div_u64_rem(tmp_mb, 1000, &kb); 843 edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", 844 n_tads, mb, kb, 845 ((u64)tmp_mb) << 20L, 846 (u32)TAD_SOCK(reg), 847 (u32)TAD_CH(reg), 848 (u32)TAD_TGT0(reg), 849 (u32)TAD_TGT1(reg), 850 (u32)TAD_TGT2(reg), 851 (u32)TAD_TGT3(reg), 852 reg); 853 prv = limit; 854 } 855 856 /* 857 * Step 4) Get TAD offsets, per each channel 858 */ 859 for (i = 0; i < NUM_CHANNELS; i++) { 860 if (!pvt->channel[i].dimms) 861 continue; 862 for (j = 0; j < n_tads; j++) { 863 pci_read_config_dword(pvt->pci_tad[i], 864 tad_ch_nilv_offset[j], 865 ®); 866 tmp_mb = TAD_OFFSET(reg) >> 20; 867 mb = div_u64_rem(tmp_mb, 1000, &kb); 868 edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n", 869 i, j, 870 mb, kb, 871 ((u64)tmp_mb) << 20L, 872 reg); 873 } 874 } 875 876 /* 877 * Step 6) Get RIR Wayness/Limit, per each channel 878 */ 879 for (i = 0; i < NUM_CHANNELS; i++) { 880 if (!pvt->channel[i].dimms) 881 continue; 882 for (j = 0; j < MAX_RIR_RANGES; j++) { 883 pci_read_config_dword(pvt->pci_tad[i], 884 rir_way_limit[j], 885 ®); 886 887 if (!IS_RIR_VALID(reg)) 888 continue; 889 890 tmp_mb = RIR_LIMIT(reg) >> 20; 891 rir_way = 1 << RIR_WAY(reg); 892 mb = div_u64_rem(tmp_mb, 1000, &kb); 893 edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n", 894 i, j, 895 mb, kb, 896 ((u64)tmp_mb) << 20L, 897 rir_way, 898 reg); 899 900 for (k = 0; k < rir_way; k++) { 901 pci_read_config_dword(pvt->pci_tad[i], 902 rir_offset[j][k], 903 ®); 904 tmp_mb = RIR_OFFSET(reg) << 6; 905 906 mb = div_u64_rem(tmp_mb, 1000, &kb); 907 edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", 908 i, j, k, 909 mb, kb, 910 ((u64)tmp_mb) << 20L, 911 (u32)RIR_RNK_TGT(reg), 912 reg); 913 } 914 } 915 } 916 } 917 918 static struct mem_ctl_info *get_mci_for_node_id(u8 node_id) 919 { 920 struct sbridge_dev *sbridge_dev; 921 922 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { 923 if (sbridge_dev->node_id == node_id) 924 return sbridge_dev->mci; 925 } 926 return NULL; 927 } 928 929 static int get_memory_error_data(struct mem_ctl_info *mci, 930 u64 addr, 931 u8 *socket, 932 long *channel_mask, 933 u8 *rank, 934 char **area_type, char *msg) 935 { 936 struct mem_ctl_info *new_mci; 937 struct sbridge_pvt *pvt = mci->pvt_info; 938 struct pci_dev *pci_ha; 939 int n_rir, n_sads, n_tads, sad_way, sck_xch; 940 int sad_interl, idx, base_ch; 941 int interleave_mode; 942 unsigned sad_interleave[pvt->info.max_interleave]; 943 u32 reg; 944 u8 ch_way, sck_way, pkg, sad_ha = 0; 945 u32 tad_offset; 946 u32 rir_way; 947 u32 mb, kb; 948 u64 ch_addr, offset, limit = 0, prv = 0; 949 950 951 /* 952 * Step 0) Check if the address is at special memory ranges 953 * The check bellow is probably enough to fill all cases where 954 * the error is not inside a memory, except for the legacy 955 * range (e. g. VGA addresses). It is unlikely, however, that the 956 * memory controller would generate an error on that range. 957 */ 958 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) { 959 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); 960 return -EINVAL; 961 } 962 if (addr >= (u64)pvt->tohm) { 963 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr); 964 return -EINVAL; 965 } 966 967 /* 968 * Step 1) Get socket 969 */ 970 for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) { 971 pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], 972 ®); 973 974 if (!DRAM_RULE_ENABLE(reg)) 975 continue; 976 977 limit = SAD_LIMIT(reg); 978 if (limit <= prv) { 979 sprintf(msg, "Can't discover the memory socket"); 980 return -EINVAL; 981 } 982 if (addr <= limit) 983 break; 984 prv = limit; 985 } 986 if (n_sads == pvt->info.max_sad) { 987 sprintf(msg, "Can't discover the memory socket"); 988 return -EINVAL; 989 } 990 *area_type = get_dram_attr(reg); 991 interleave_mode = INTERLEAVE_MODE(reg); 992 993 pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], 994 ®); 995 996 if (pvt->info.type == SANDY_BRIDGE) { 997 sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0); 998 for (sad_way = 0; sad_way < 8; sad_way++) { 999 u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, sad_way); 1000 if (sad_way > 0 && sad_interl == pkg) 1001 break; 1002 sad_interleave[sad_way] = pkg; 1003 edac_dbg(0, "SAD interleave #%d: %d\n", 1004 sad_way, sad_interleave[sad_way]); 1005 } 1006 edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", 1007 pvt->sbridge_dev->mc, 1008 n_sads, 1009 addr, 1010 limit, 1011 sad_way + 7, 1012 !interleave_mode ? "" : "XOR[18:16]"); 1013 if (interleave_mode) 1014 idx = ((addr >> 6) ^ (addr >> 16)) & 7; 1015 else 1016 idx = (addr >> 6) & 7; 1017 switch (sad_way) { 1018 case 1: 1019 idx = 0; 1020 break; 1021 case 2: 1022 idx = idx & 1; 1023 break; 1024 case 4: 1025 idx = idx & 3; 1026 break; 1027 case 8: 1028 break; 1029 default: 1030 sprintf(msg, "Can't discover socket interleave"); 1031 return -EINVAL; 1032 } 1033 *socket = sad_interleave[idx]; 1034 edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n", 1035 idx, sad_way, *socket); 1036 } else { 1037 /* Ivy Bridge's SAD mode doesn't support XOR interleave mode */ 1038 idx = (addr >> 6) & 7; 1039 pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx); 1040 *socket = sad_pkg_socket(pkg); 1041 sad_ha = sad_pkg_ha(pkg); 1042 edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n", 1043 idx, *socket, sad_ha); 1044 } 1045 1046 /* 1047 * Move to the proper node structure, in order to access the 1048 * right PCI registers 1049 */ 1050 new_mci = get_mci_for_node_id(*socket); 1051 if (!new_mci) { 1052 sprintf(msg, "Struct for socket #%u wasn't initialized", 1053 *socket); 1054 return -EINVAL; 1055 } 1056 mci = new_mci; 1057 pvt = mci->pvt_info; 1058 1059 /* 1060 * Step 2) Get memory channel 1061 */ 1062 prv = 0; 1063 if (pvt->info.type == SANDY_BRIDGE) 1064 pci_ha = pvt->pci_ha0; 1065 else { 1066 if (sad_ha) 1067 pci_ha = pvt->pci_ha1; 1068 else 1069 pci_ha = pvt->pci_ha0; 1070 } 1071 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { 1072 pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], ®); 1073 limit = TAD_LIMIT(reg); 1074 if (limit <= prv) { 1075 sprintf(msg, "Can't discover the memory channel"); 1076 return -EINVAL; 1077 } 1078 if (addr <= limit) 1079 break; 1080 prv = limit; 1081 } 1082 if (n_tads == MAX_TAD) { 1083 sprintf(msg, "Can't discover the memory channel"); 1084 return -EINVAL; 1085 } 1086 1087 ch_way = TAD_CH(reg) + 1; 1088 sck_way = TAD_SOCK(reg) + 1; 1089 1090 if (ch_way == 3) 1091 idx = addr >> 6; 1092 else 1093 idx = addr >> (6 + sck_way); 1094 idx = idx % ch_way; 1095 1096 /* 1097 * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ??? 1098 */ 1099 switch (idx) { 1100 case 0: 1101 base_ch = TAD_TGT0(reg); 1102 break; 1103 case 1: 1104 base_ch = TAD_TGT1(reg); 1105 break; 1106 case 2: 1107 base_ch = TAD_TGT2(reg); 1108 break; 1109 case 3: 1110 base_ch = TAD_TGT3(reg); 1111 break; 1112 default: 1113 sprintf(msg, "Can't discover the TAD target"); 1114 return -EINVAL; 1115 } 1116 *channel_mask = 1 << base_ch; 1117 1118 pci_read_config_dword(pvt->pci_tad[base_ch], 1119 tad_ch_nilv_offset[n_tads], 1120 &tad_offset); 1121 1122 if (pvt->is_mirrored) { 1123 *channel_mask |= 1 << ((base_ch + 2) % 4); 1124 switch(ch_way) { 1125 case 2: 1126 case 4: 1127 sck_xch = 1 << sck_way * (ch_way >> 1); 1128 break; 1129 default: 1130 sprintf(msg, "Invalid mirror set. Can't decode addr"); 1131 return -EINVAL; 1132 } 1133 } else 1134 sck_xch = (1 << sck_way) * ch_way; 1135 1136 if (pvt->is_lockstep) 1137 *channel_mask |= 1 << ((base_ch + 1) % 4); 1138 1139 offset = TAD_OFFSET(tad_offset); 1140 1141 edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n", 1142 n_tads, 1143 addr, 1144 limit, 1145 (u32)TAD_SOCK(reg), 1146 ch_way, 1147 offset, 1148 idx, 1149 base_ch, 1150 *channel_mask); 1151 1152 /* Calculate channel address */ 1153 /* Remove the TAD offset */ 1154 1155 if (offset > addr) { 1156 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!", 1157 offset, addr); 1158 return -EINVAL; 1159 } 1160 addr -= offset; 1161 /* Store the low bits [0:6] of the addr */ 1162 ch_addr = addr & 0x7f; 1163 /* Remove socket wayness and remove 6 bits */ 1164 addr >>= 6; 1165 addr = div_u64(addr, sck_xch); 1166 #if 0 1167 /* Divide by channel way */ 1168 addr = addr / ch_way; 1169 #endif 1170 /* Recover the last 6 bits */ 1171 ch_addr |= addr << 6; 1172 1173 /* 1174 * Step 3) Decode rank 1175 */ 1176 for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) { 1177 pci_read_config_dword(pvt->pci_tad[base_ch], 1178 rir_way_limit[n_rir], 1179 ®); 1180 1181 if (!IS_RIR_VALID(reg)) 1182 continue; 1183 1184 limit = RIR_LIMIT(reg); 1185 mb = div_u64_rem(limit >> 20, 1000, &kb); 1186 edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n", 1187 n_rir, 1188 mb, kb, 1189 limit, 1190 1 << RIR_WAY(reg)); 1191 if (ch_addr <= limit) 1192 break; 1193 } 1194 if (n_rir == MAX_RIR_RANGES) { 1195 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx", 1196 ch_addr); 1197 return -EINVAL; 1198 } 1199 rir_way = RIR_WAY(reg); 1200 if (pvt->is_close_pg) 1201 idx = (ch_addr >> 6); 1202 else 1203 idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */ 1204 idx %= 1 << rir_way; 1205 1206 pci_read_config_dword(pvt->pci_tad[base_ch], 1207 rir_offset[n_rir][idx], 1208 ®); 1209 *rank = RIR_RNK_TGT(reg); 1210 1211 edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", 1212 n_rir, 1213 ch_addr, 1214 limit, 1215 rir_way, 1216 idx); 1217 1218 return 0; 1219 } 1220 1221 /**************************************************************************** 1222 Device initialization routines: put/get, init/exit 1223 ****************************************************************************/ 1224 1225 /* 1226 * sbridge_put_all_devices 'put' all the devices that we have 1227 * reserved via 'get' 1228 */ 1229 static void sbridge_put_devices(struct sbridge_dev *sbridge_dev) 1230 { 1231 int i; 1232 1233 edac_dbg(0, "\n"); 1234 for (i = 0; i < sbridge_dev->n_devs; i++) { 1235 struct pci_dev *pdev = sbridge_dev->pdev[i]; 1236 if (!pdev) 1237 continue; 1238 edac_dbg(0, "Removing dev %02x:%02x.%d\n", 1239 pdev->bus->number, 1240 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 1241 pci_dev_put(pdev); 1242 } 1243 } 1244 1245 static void sbridge_put_all_devices(void) 1246 { 1247 struct sbridge_dev *sbridge_dev, *tmp; 1248 1249 list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) { 1250 sbridge_put_devices(sbridge_dev); 1251 free_sbridge_dev(sbridge_dev); 1252 } 1253 } 1254 1255 static int sbridge_get_onedevice(struct pci_dev **prev, 1256 u8 *num_mc, 1257 const struct pci_id_table *table, 1258 const unsigned devno) 1259 { 1260 struct sbridge_dev *sbridge_dev; 1261 const struct pci_id_descr *dev_descr = &table->descr[devno]; 1262 1263 struct pci_dev *pdev = NULL; 1264 u8 bus = 0; 1265 1266 sbridge_printk(KERN_DEBUG, 1267 "Seeking for: dev %02x.%d PCI ID %04x:%04x\n", 1268 dev_descr->dev, dev_descr->func, 1269 PCI_VENDOR_ID_INTEL, dev_descr->dev_id); 1270 1271 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 1272 dev_descr->dev_id, *prev); 1273 1274 if (!pdev) { 1275 if (*prev) { 1276 *prev = pdev; 1277 return 0; 1278 } 1279 1280 if (dev_descr->optional) 1281 return 0; 1282 1283 if (devno == 0) 1284 return -ENODEV; 1285 1286 sbridge_printk(KERN_INFO, 1287 "Device not found: dev %02x.%d PCI ID %04x:%04x\n", 1288 dev_descr->dev, dev_descr->func, 1289 PCI_VENDOR_ID_INTEL, dev_descr->dev_id); 1290 1291 /* End of list, leave */ 1292 return -ENODEV; 1293 } 1294 bus = pdev->bus->number; 1295 1296 sbridge_dev = get_sbridge_dev(bus); 1297 if (!sbridge_dev) { 1298 sbridge_dev = alloc_sbridge_dev(bus, table); 1299 if (!sbridge_dev) { 1300 pci_dev_put(pdev); 1301 return -ENOMEM; 1302 } 1303 (*num_mc)++; 1304 } 1305 1306 if (sbridge_dev->pdev[devno]) { 1307 sbridge_printk(KERN_ERR, 1308 "Duplicated device for " 1309 "dev %02x:%d.%d PCI ID %04x:%04x\n", 1310 bus, dev_descr->dev, dev_descr->func, 1311 PCI_VENDOR_ID_INTEL, dev_descr->dev_id); 1312 pci_dev_put(pdev); 1313 return -ENODEV; 1314 } 1315 1316 sbridge_dev->pdev[devno] = pdev; 1317 1318 /* Sanity check */ 1319 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev || 1320 PCI_FUNC(pdev->devfn) != dev_descr->func)) { 1321 sbridge_printk(KERN_ERR, 1322 "Device PCI ID %04x:%04x " 1323 "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n", 1324 PCI_VENDOR_ID_INTEL, dev_descr->dev_id, 1325 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), 1326 bus, dev_descr->dev, dev_descr->func); 1327 return -ENODEV; 1328 } 1329 1330 /* Be sure that the device is enabled */ 1331 if (unlikely(pci_enable_device(pdev) < 0)) { 1332 sbridge_printk(KERN_ERR, 1333 "Couldn't enable " 1334 "dev %02x:%d.%d PCI ID %04x:%04x\n", 1335 bus, dev_descr->dev, dev_descr->func, 1336 PCI_VENDOR_ID_INTEL, dev_descr->dev_id); 1337 return -ENODEV; 1338 } 1339 1340 edac_dbg(0, "Detected dev %02x:%d.%d PCI ID %04x:%04x\n", 1341 bus, dev_descr->dev, dev_descr->func, 1342 PCI_VENDOR_ID_INTEL, dev_descr->dev_id); 1343 1344 /* 1345 * As stated on drivers/pci/search.c, the reference count for 1346 * @from is always decremented if it is not %NULL. So, as we need 1347 * to get all devices up to null, we need to do a get for the device 1348 */ 1349 pci_dev_get(pdev); 1350 1351 *prev = pdev; 1352 1353 return 0; 1354 } 1355 1356 /* 1357 * sbridge_get_all_devices - Find and perform 'get' operation on the MCH's 1358 * device/functions we want to reference for this driver. 1359 * Need to 'get' device 16 func 1 and func 2. 1360 * @num_mc: pointer to the memory controllers count, to be incremented in case 1361 * of success. 1362 * @table: model specific table 1363 * 1364 * returns 0 in case of success or error code 1365 */ 1366 static int sbridge_get_all_devices(u8 *num_mc, 1367 const struct pci_id_table *table) 1368 { 1369 int i, rc; 1370 struct pci_dev *pdev = NULL; 1371 1372 while (table && table->descr) { 1373 for (i = 0; i < table->n_devs; i++) { 1374 pdev = NULL; 1375 do { 1376 rc = sbridge_get_onedevice(&pdev, num_mc, 1377 table, i); 1378 if (rc < 0) { 1379 if (i == 0) { 1380 i = table->n_devs; 1381 break; 1382 } 1383 sbridge_put_all_devices(); 1384 return -ENODEV; 1385 } 1386 } while (pdev); 1387 } 1388 table++; 1389 } 1390 1391 return 0; 1392 } 1393 1394 static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, 1395 struct sbridge_dev *sbridge_dev) 1396 { 1397 struct sbridge_pvt *pvt = mci->pvt_info; 1398 struct pci_dev *pdev; 1399 int i, func, slot; 1400 1401 for (i = 0; i < sbridge_dev->n_devs; i++) { 1402 pdev = sbridge_dev->pdev[i]; 1403 if (!pdev) 1404 continue; 1405 slot = PCI_SLOT(pdev->devfn); 1406 func = PCI_FUNC(pdev->devfn); 1407 switch (slot) { 1408 case 12: 1409 switch (func) { 1410 case 6: 1411 pvt->pci_sad0 = pdev; 1412 break; 1413 case 7: 1414 pvt->pci_sad1 = pdev; 1415 break; 1416 default: 1417 goto error; 1418 } 1419 break; 1420 case 13: 1421 switch (func) { 1422 case 6: 1423 pvt->pci_br0 = pdev; 1424 break; 1425 default: 1426 goto error; 1427 } 1428 break; 1429 case 14: 1430 switch (func) { 1431 case 0: 1432 pvt->pci_ha0 = pdev; 1433 break; 1434 default: 1435 goto error; 1436 } 1437 break; 1438 case 15: 1439 switch (func) { 1440 case 0: 1441 pvt->pci_ta = pdev; 1442 break; 1443 case 1: 1444 pvt->pci_ras = pdev; 1445 break; 1446 case 2: 1447 case 3: 1448 case 4: 1449 case 5: 1450 pvt->pci_tad[func - 2] = pdev; 1451 break; 1452 default: 1453 goto error; 1454 } 1455 break; 1456 case 17: 1457 switch (func) { 1458 case 0: 1459 pvt->pci_ddrio = pdev; 1460 break; 1461 default: 1462 goto error; 1463 } 1464 break; 1465 default: 1466 goto error; 1467 } 1468 1469 edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", 1470 sbridge_dev->bus, 1471 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), 1472 pdev); 1473 } 1474 1475 /* Check if everything were registered */ 1476 if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 || 1477 !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta) 1478 goto enodev; 1479 1480 for (i = 0; i < NUM_CHANNELS; i++) { 1481 if (!pvt->pci_tad[i]) 1482 goto enodev; 1483 } 1484 return 0; 1485 1486 enodev: 1487 sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); 1488 return -ENODEV; 1489 1490 error: 1491 sbridge_printk(KERN_ERR, "Device %d, function %d " 1492 "is out of the expected range\n", 1493 slot, func); 1494 return -EINVAL; 1495 } 1496 1497 static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, 1498 struct sbridge_dev *sbridge_dev) 1499 { 1500 struct sbridge_pvt *pvt = mci->pvt_info; 1501 struct pci_dev *pdev, *tmp; 1502 int i, func, slot; 1503 bool mode_2ha = false; 1504 1505 tmp = pci_get_device(PCI_VENDOR_ID_INTEL, 1506 PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, NULL); 1507 if (tmp) { 1508 mode_2ha = true; 1509 pci_dev_put(tmp); 1510 } 1511 1512 for (i = 0; i < sbridge_dev->n_devs; i++) { 1513 pdev = sbridge_dev->pdev[i]; 1514 if (!pdev) 1515 continue; 1516 slot = PCI_SLOT(pdev->devfn); 1517 func = PCI_FUNC(pdev->devfn); 1518 1519 switch (slot) { 1520 case 14: 1521 if (func == 0) { 1522 pvt->pci_ha0 = pdev; 1523 break; 1524 } 1525 goto error; 1526 case 15: 1527 switch (func) { 1528 case 0: 1529 pvt->pci_ta = pdev; 1530 break; 1531 case 1: 1532 pvt->pci_ras = pdev; 1533 break; 1534 case 4: 1535 case 5: 1536 /* if we have 2 HAs active, channels 2 and 3 1537 * are in other device */ 1538 if (mode_2ha) 1539 break; 1540 /* fall through */ 1541 case 2: 1542 case 3: 1543 pvt->pci_tad[func - 2] = pdev; 1544 break; 1545 default: 1546 goto error; 1547 } 1548 break; 1549 case 17: 1550 if (func == 4) { 1551 pvt->pci_ddrio = pdev; 1552 break; 1553 } else if (func == 0) { 1554 if (!mode_2ha) 1555 pvt->pci_ddrio = pdev; 1556 break; 1557 } 1558 goto error; 1559 case 22: 1560 switch (func) { 1561 case 0: 1562 pvt->pci_sad0 = pdev; 1563 break; 1564 case 1: 1565 pvt->pci_br0 = pdev; 1566 break; 1567 case 2: 1568 pvt->pci_br1 = pdev; 1569 break; 1570 default: 1571 goto error; 1572 } 1573 break; 1574 case 28: 1575 if (func == 0) { 1576 pvt->pci_ha1 = pdev; 1577 break; 1578 } 1579 goto error; 1580 case 29: 1581 /* we shouldn't have this device if we have just one 1582 * HA present */ 1583 WARN_ON(!mode_2ha); 1584 if (func == 2 || func == 3) { 1585 pvt->pci_tad[func] = pdev; 1586 break; 1587 } 1588 goto error; 1589 default: 1590 goto error; 1591 } 1592 1593 edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", 1594 sbridge_dev->bus, 1595 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), 1596 pdev); 1597 } 1598 1599 /* Check if everything were registered */ 1600 if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 || 1601 !pvt->pci_br1 || !pvt->pci_tad || !pvt->pci_ras || 1602 !pvt->pci_ta) 1603 goto enodev; 1604 1605 for (i = 0; i < NUM_CHANNELS; i++) { 1606 if (!pvt->pci_tad[i]) 1607 goto enodev; 1608 } 1609 return 0; 1610 1611 enodev: 1612 sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); 1613 return -ENODEV; 1614 1615 error: 1616 sbridge_printk(KERN_ERR, 1617 "Device %d, function %d is out of the expected range\n", 1618 slot, func); 1619 return -EINVAL; 1620 } 1621 1622 /**************************************************************************** 1623 Error check routines 1624 ****************************************************************************/ 1625 1626 /* 1627 * While Sandy Bridge has error count registers, SMI BIOS read values from 1628 * and resets the counters. So, they are not reliable for the OS to read 1629 * from them. So, we have no option but to just trust on whatever MCE is 1630 * telling us about the errors. 1631 */ 1632 static void sbridge_mce_output_error(struct mem_ctl_info *mci, 1633 const struct mce *m) 1634 { 1635 struct mem_ctl_info *new_mci; 1636 struct sbridge_pvt *pvt = mci->pvt_info; 1637 enum hw_event_mc_err_type tp_event; 1638 char *type, *optype, msg[256]; 1639 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); 1640 bool overflow = GET_BITFIELD(m->status, 62, 62); 1641 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); 1642 bool recoverable; 1643 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); 1644 u32 mscod = GET_BITFIELD(m->status, 16, 31); 1645 u32 errcode = GET_BITFIELD(m->status, 0, 15); 1646 u32 channel = GET_BITFIELD(m->status, 0, 3); 1647 u32 optypenum = GET_BITFIELD(m->status, 4, 6); 1648 long channel_mask, first_channel; 1649 u8 rank, socket; 1650 int rc, dimm; 1651 char *area_type = NULL; 1652 1653 if (pvt->info.type == IVY_BRIDGE) 1654 recoverable = true; 1655 else 1656 recoverable = GET_BITFIELD(m->status, 56, 56); 1657 1658 if (uncorrected_error) { 1659 if (ripv) { 1660 type = "FATAL"; 1661 tp_event = HW_EVENT_ERR_FATAL; 1662 } else { 1663 type = "NON_FATAL"; 1664 tp_event = HW_EVENT_ERR_UNCORRECTED; 1665 } 1666 } else { 1667 type = "CORRECTED"; 1668 tp_event = HW_EVENT_ERR_CORRECTED; 1669 } 1670 1671 /* 1672 * According with Table 15-9 of the Intel Architecture spec vol 3A, 1673 * memory errors should fit in this mask: 1674 * 000f 0000 1mmm cccc (binary) 1675 * where: 1676 * f = Correction Report Filtering Bit. If 1, subsequent errors 1677 * won't be shown 1678 * mmm = error type 1679 * cccc = channel 1680 * If the mask doesn't match, report an error to the parsing logic 1681 */ 1682 if (! ((errcode & 0xef80) == 0x80)) { 1683 optype = "Can't parse: it is not a mem"; 1684 } else { 1685 switch (optypenum) { 1686 case 0: 1687 optype = "generic undef request error"; 1688 break; 1689 case 1: 1690 optype = "memory read error"; 1691 break; 1692 case 2: 1693 optype = "memory write error"; 1694 break; 1695 case 3: 1696 optype = "addr/cmd error"; 1697 break; 1698 case 4: 1699 optype = "memory scrubbing error"; 1700 break; 1701 default: 1702 optype = "reserved"; 1703 break; 1704 } 1705 } 1706 1707 /* Only decode errors with an valid address (ADDRV) */ 1708 if (!GET_BITFIELD(m->status, 58, 58)) 1709 return; 1710 1711 rc = get_memory_error_data(mci, m->addr, &socket, 1712 &channel_mask, &rank, &area_type, msg); 1713 if (rc < 0) 1714 goto err_parsing; 1715 new_mci = get_mci_for_node_id(socket); 1716 if (!new_mci) { 1717 strcpy(msg, "Error: socket got corrupted!"); 1718 goto err_parsing; 1719 } 1720 mci = new_mci; 1721 pvt = mci->pvt_info; 1722 1723 first_channel = find_first_bit(&channel_mask, NUM_CHANNELS); 1724 1725 if (rank < 4) 1726 dimm = 0; 1727 else if (rank < 8) 1728 dimm = 1; 1729 else 1730 dimm = 2; 1731 1732 1733 /* 1734 * FIXME: On some memory configurations (mirror, lockstep), the 1735 * Memory Controller can't point the error to a single DIMM. The 1736 * EDAC core should be handling the channel mask, in order to point 1737 * to the group of dimm's where the error may be happening. 1738 */ 1739 snprintf(msg, sizeof(msg), 1740 "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d", 1741 overflow ? " OVERFLOW" : "", 1742 (uncorrected_error && recoverable) ? " recoverable" : "", 1743 area_type, 1744 mscod, errcode, 1745 socket, 1746 channel_mask, 1747 rank); 1748 1749 edac_dbg(0, "%s\n", msg); 1750 1751 /* FIXME: need support for channel mask */ 1752 1753 /* Call the helper to output message */ 1754 edac_mc_handle_error(tp_event, mci, core_err_cnt, 1755 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, 1756 channel, dimm, -1, 1757 optype, msg); 1758 return; 1759 err_parsing: 1760 edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, 1761 -1, -1, -1, 1762 msg, ""); 1763 1764 } 1765 1766 /* 1767 * sbridge_check_error Retrieve and process errors reported by the 1768 * hardware. Called by the Core module. 1769 */ 1770 static void sbridge_check_error(struct mem_ctl_info *mci) 1771 { 1772 struct sbridge_pvt *pvt = mci->pvt_info; 1773 int i; 1774 unsigned count = 0; 1775 struct mce *m; 1776 1777 /* 1778 * MCE first step: Copy all mce errors into a temporary buffer 1779 * We use a double buffering here, to reduce the risk of 1780 * loosing an error. 1781 */ 1782 smp_rmb(); 1783 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) 1784 % MCE_LOG_LEN; 1785 if (!count) 1786 return; 1787 1788 m = pvt->mce_outentry; 1789 if (pvt->mce_in + count > MCE_LOG_LEN) { 1790 unsigned l = MCE_LOG_LEN - pvt->mce_in; 1791 1792 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); 1793 smp_wmb(); 1794 pvt->mce_in = 0; 1795 count -= l; 1796 m += l; 1797 } 1798 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); 1799 smp_wmb(); 1800 pvt->mce_in += count; 1801 1802 smp_rmb(); 1803 if (pvt->mce_overrun) { 1804 sbridge_printk(KERN_ERR, "Lost %d memory errors\n", 1805 pvt->mce_overrun); 1806 smp_wmb(); 1807 pvt->mce_overrun = 0; 1808 } 1809 1810 /* 1811 * MCE second step: parse errors and display 1812 */ 1813 for (i = 0; i < count; i++) 1814 sbridge_mce_output_error(mci, &pvt->mce_outentry[i]); 1815 } 1816 1817 /* 1818 * sbridge_mce_check_error Replicates mcelog routine to get errors 1819 * This routine simply queues mcelog errors, and 1820 * return. The error itself should be handled later 1821 * by sbridge_check_error. 1822 * WARNING: As this routine should be called at NMI time, extra care should 1823 * be taken to avoid deadlocks, and to be as fast as possible. 1824 */ 1825 static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, 1826 void *data) 1827 { 1828 struct mce *mce = (struct mce *)data; 1829 struct mem_ctl_info *mci; 1830 struct sbridge_pvt *pvt; 1831 char *type; 1832 1833 if (get_edac_report_status() == EDAC_REPORTING_DISABLED) 1834 return NOTIFY_DONE; 1835 1836 mci = get_mci_for_node_id(mce->socketid); 1837 if (!mci) 1838 return NOTIFY_BAD; 1839 pvt = mci->pvt_info; 1840 1841 /* 1842 * Just let mcelog handle it if the error is 1843 * outside the memory controller. A memory error 1844 * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0. 1845 * bit 12 has an special meaning. 1846 */ 1847 if ((mce->status & 0xefff) >> 7 != 1) 1848 return NOTIFY_DONE; 1849 1850 if (mce->mcgstatus & MCG_STATUS_MCIP) 1851 type = "Exception"; 1852 else 1853 type = "Event"; 1854 1855 sbridge_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); 1856 1857 sbridge_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx " 1858 "Bank %d: %016Lx\n", mce->extcpu, type, 1859 mce->mcgstatus, mce->bank, mce->status); 1860 sbridge_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc); 1861 sbridge_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr); 1862 sbridge_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc); 1863 1864 sbridge_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET " 1865 "%u APIC %x\n", mce->cpuvendor, mce->cpuid, 1866 mce->time, mce->socketid, mce->apicid); 1867 1868 /* Only handle if it is the right mc controller */ 1869 if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc) 1870 return NOTIFY_DONE; 1871 1872 smp_rmb(); 1873 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { 1874 smp_wmb(); 1875 pvt->mce_overrun++; 1876 return NOTIFY_DONE; 1877 } 1878 1879 /* Copy memory error at the ringbuffer */ 1880 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); 1881 smp_wmb(); 1882 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; 1883 1884 /* Handle fatal errors immediately */ 1885 if (mce->mcgstatus & 1) 1886 sbridge_check_error(mci); 1887 1888 /* Advice mcelog that the error were handled */ 1889 return NOTIFY_STOP; 1890 } 1891 1892 static struct notifier_block sbridge_mce_dec = { 1893 .notifier_call = sbridge_mce_check_error, 1894 }; 1895 1896 /**************************************************************************** 1897 EDAC register/unregister logic 1898 ****************************************************************************/ 1899 1900 static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) 1901 { 1902 struct mem_ctl_info *mci = sbridge_dev->mci; 1903 struct sbridge_pvt *pvt; 1904 1905 if (unlikely(!mci || !mci->pvt_info)) { 1906 edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev); 1907 1908 sbridge_printk(KERN_ERR, "Couldn't find mci handler\n"); 1909 return; 1910 } 1911 1912 pvt = mci->pvt_info; 1913 1914 edac_dbg(0, "MC: mci = %p, dev = %p\n", 1915 mci, &sbridge_dev->pdev[0]->dev); 1916 1917 /* Remove MC sysfs nodes */ 1918 edac_mc_del_mc(mci->pdev); 1919 1920 edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); 1921 kfree(mci->ctl_name); 1922 edac_mc_free(mci); 1923 sbridge_dev->mci = NULL; 1924 } 1925 1926 static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) 1927 { 1928 struct mem_ctl_info *mci; 1929 struct edac_mc_layer layers[2]; 1930 struct sbridge_pvt *pvt; 1931 struct pci_dev *pdev = sbridge_dev->pdev[0]; 1932 int rc; 1933 1934 /* Check the number of active and not disabled channels */ 1935 rc = check_if_ecc_is_active(sbridge_dev->bus); 1936 if (unlikely(rc < 0)) 1937 return rc; 1938 1939 /* allocate a new MC control structure */ 1940 layers[0].type = EDAC_MC_LAYER_CHANNEL; 1941 layers[0].size = NUM_CHANNELS; 1942 layers[0].is_virt_csrow = false; 1943 layers[1].type = EDAC_MC_LAYER_SLOT; 1944 layers[1].size = MAX_DIMMS; 1945 layers[1].is_virt_csrow = true; 1946 mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers, 1947 sizeof(*pvt)); 1948 1949 if (unlikely(!mci)) 1950 return -ENOMEM; 1951 1952 edac_dbg(0, "MC: mci = %p, dev = %p\n", 1953 mci, &pdev->dev); 1954 1955 pvt = mci->pvt_info; 1956 memset(pvt, 0, sizeof(*pvt)); 1957 1958 /* Associate sbridge_dev and mci for future usage */ 1959 pvt->sbridge_dev = sbridge_dev; 1960 sbridge_dev->mci = mci; 1961 1962 mci->mtype_cap = MEM_FLAG_DDR3; 1963 mci->edac_ctl_cap = EDAC_FLAG_NONE; 1964 mci->edac_cap = EDAC_FLAG_NONE; 1965 mci->mod_name = "sbridge_edac.c"; 1966 mci->mod_ver = SBRIDGE_REVISION; 1967 mci->dev_name = pci_name(pdev); 1968 mci->ctl_page_to_phys = NULL; 1969 1970 /* Set the function pointer to an actual operation function */ 1971 mci->edac_check = sbridge_check_error; 1972 1973 pvt->info.type = type; 1974 if (type == IVY_BRIDGE) { 1975 pvt->info.rankcfgr = IB_RANK_CFG_A; 1976 pvt->info.get_tolm = ibridge_get_tolm; 1977 pvt->info.get_tohm = ibridge_get_tohm; 1978 pvt->info.dram_rule = ibridge_dram_rule; 1979 pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); 1980 pvt->info.interleave_list = ibridge_interleave_list; 1981 pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); 1982 pvt->info.interleave_pkg = ibridge_interleave_pkg; 1983 mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx); 1984 1985 /* Store pci devices at mci for faster access */ 1986 rc = ibridge_mci_bind_devs(mci, sbridge_dev); 1987 if (unlikely(rc < 0)) 1988 goto fail0; 1989 } else { 1990 pvt->info.rankcfgr = SB_RANK_CFG_A; 1991 pvt->info.get_tolm = sbridge_get_tolm; 1992 pvt->info.get_tohm = sbridge_get_tohm; 1993 pvt->info.dram_rule = sbridge_dram_rule; 1994 pvt->info.max_sad = ARRAY_SIZE(sbridge_dram_rule); 1995 pvt->info.interleave_list = sbridge_interleave_list; 1996 pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list); 1997 pvt->info.interleave_pkg = sbridge_interleave_pkg; 1998 mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx); 1999 2000 /* Store pci devices at mci for faster access */ 2001 rc = sbridge_mci_bind_devs(mci, sbridge_dev); 2002 if (unlikely(rc < 0)) 2003 goto fail0; 2004 } 2005 2006 2007 /* Get dimm basic config and the memory layout */ 2008 get_dimm_config(mci); 2009 get_memory_layout(mci); 2010 2011 /* record ptr to the generic device */ 2012 mci->pdev = &pdev->dev; 2013 2014 /* add this new MC control structure to EDAC's list of MCs */ 2015 if (unlikely(edac_mc_add_mc(mci))) { 2016 edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); 2017 rc = -EINVAL; 2018 goto fail0; 2019 } 2020 2021 return 0; 2022 2023 fail0: 2024 kfree(mci->ctl_name); 2025 edac_mc_free(mci); 2026 sbridge_dev->mci = NULL; 2027 return rc; 2028 } 2029 2030 /* 2031 * sbridge_probe Probe for ONE instance of device to see if it is 2032 * present. 2033 * return: 2034 * 0 for FOUND a device 2035 * < 0 for error code 2036 */ 2037 2038 static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) 2039 { 2040 int rc; 2041 u8 mc, num_mc = 0; 2042 struct sbridge_dev *sbridge_dev; 2043 enum type type; 2044 2045 /* get the pci devices we want to reserve for our use */ 2046 mutex_lock(&sbridge_edac_lock); 2047 2048 /* 2049 * All memory controllers are allocated at the first pass. 2050 */ 2051 if (unlikely(probed >= 1)) { 2052 mutex_unlock(&sbridge_edac_lock); 2053 return -ENODEV; 2054 } 2055 probed++; 2056 2057 if (pdev->device == PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA) { 2058 rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_ibridge_table); 2059 type = IVY_BRIDGE; 2060 } else { 2061 rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_sbridge_table); 2062 type = SANDY_BRIDGE; 2063 } 2064 if (unlikely(rc < 0)) 2065 goto fail0; 2066 mc = 0; 2067 2068 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { 2069 edac_dbg(0, "Registering MC#%d (%d of %d)\n", 2070 mc, mc + 1, num_mc); 2071 sbridge_dev->mc = mc++; 2072 rc = sbridge_register_mci(sbridge_dev, type); 2073 if (unlikely(rc < 0)) 2074 goto fail1; 2075 } 2076 2077 sbridge_printk(KERN_INFO, "Driver loaded.\n"); 2078 2079 mutex_unlock(&sbridge_edac_lock); 2080 return 0; 2081 2082 fail1: 2083 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) 2084 sbridge_unregister_mci(sbridge_dev); 2085 2086 sbridge_put_all_devices(); 2087 fail0: 2088 mutex_unlock(&sbridge_edac_lock); 2089 return rc; 2090 } 2091 2092 /* 2093 * sbridge_remove destructor for one instance of device 2094 * 2095 */ 2096 static void sbridge_remove(struct pci_dev *pdev) 2097 { 2098 struct sbridge_dev *sbridge_dev; 2099 2100 edac_dbg(0, "\n"); 2101 2102 /* 2103 * we have a trouble here: pdev value for removal will be wrong, since 2104 * it will point to the X58 register used to detect that the machine 2105 * is a Nehalem or upper design. However, due to the way several PCI 2106 * devices are grouped together to provide MC functionality, we need 2107 * to use a different method for releasing the devices 2108 */ 2109 2110 mutex_lock(&sbridge_edac_lock); 2111 2112 if (unlikely(!probed)) { 2113 mutex_unlock(&sbridge_edac_lock); 2114 return; 2115 } 2116 2117 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) 2118 sbridge_unregister_mci(sbridge_dev); 2119 2120 /* Release PCI resources */ 2121 sbridge_put_all_devices(); 2122 2123 probed--; 2124 2125 mutex_unlock(&sbridge_edac_lock); 2126 } 2127 2128 MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl); 2129 2130 /* 2131 * sbridge_driver pci_driver structure for this module 2132 * 2133 */ 2134 static struct pci_driver sbridge_driver = { 2135 .name = "sbridge_edac", 2136 .probe = sbridge_probe, 2137 .remove = sbridge_remove, 2138 .id_table = sbridge_pci_tbl, 2139 }; 2140 2141 /* 2142 * sbridge_init Module entry function 2143 * Try to initialize this module for its devices 2144 */ 2145 static int __init sbridge_init(void) 2146 { 2147 int pci_rc; 2148 2149 edac_dbg(2, "\n"); 2150 2151 /* Ensure that the OPSTATE is set correctly for POLL or NMI */ 2152 opstate_init(); 2153 2154 pci_rc = pci_register_driver(&sbridge_driver); 2155 if (pci_rc >= 0) { 2156 mce_register_decode_chain(&sbridge_mce_dec); 2157 if (get_edac_report_status() == EDAC_REPORTING_DISABLED) 2158 sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); 2159 return 0; 2160 } 2161 2162 sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", 2163 pci_rc); 2164 2165 return pci_rc; 2166 } 2167 2168 /* 2169 * sbridge_exit() Module exit function 2170 * Unregister the driver 2171 */ 2172 static void __exit sbridge_exit(void) 2173 { 2174 edac_dbg(2, "\n"); 2175 pci_unregister_driver(&sbridge_driver); 2176 mce_unregister_decode_chain(&sbridge_mce_dec); 2177 } 2178 2179 module_init(sbridge_init); 2180 module_exit(sbridge_exit); 2181 2182 module_param(edac_op_state, int, 0444); 2183 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); 2184 2185 MODULE_LICENSE("GPL"); 2186 MODULE_AUTHOR("Mauro Carvalho Chehab"); 2187 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); 2188 MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - " 2189 SBRIDGE_REVISION); 2190