1 /* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module 2 * 3 * This driver supports the memory controllers found on the Intel 4 * processor family Sandy Bridge. 5 * 6 * This file may be distributed under the terms of the 7 * GNU General Public License version 2 only. 8 * 9 * Copyright (c) 2011 by: 10 * Mauro Carvalho Chehab 11 */ 12 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/pci.h> 16 #include <linux/pci_ids.h> 17 #include <linux/slab.h> 18 #include <linux/delay.h> 19 #include <linux/edac.h> 20 #include <linux/mmzone.h> 21 #include <linux/smp.h> 22 #include <linux/bitmap.h> 23 #include <linux/math64.h> 24 #include <asm/processor.h> 25 #include <asm/mce.h> 26 27 #include "edac_core.h" 28 29 /* Static vars */ 30 static LIST_HEAD(sbridge_edac_list); 31 static DEFINE_MUTEX(sbridge_edac_lock); 32 static int probed; 33 34 /* 35 * Alter this version for the module when modifications are made 36 */ 37 #define SBRIDGE_REVISION " Ver: 1.1.1 " 38 #define EDAC_MOD_STR "sbridge_edac" 39 40 /* 41 * Debug macros 42 */ 43 #define sbridge_printk(level, fmt, arg...) \ 44 edac_printk(level, "sbridge", fmt, ##arg) 45 46 #define sbridge_mc_printk(mci, level, fmt, arg...) \ 47 edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg) 48 49 /* 50 * Get a bit field at register value <v>, from bit <lo> to bit <hi> 51 */ 52 #define GET_BITFIELD(v, lo, hi) \ 53 (((v) & GENMASK_ULL(hi, lo)) >> (lo)) 54 55 /* Devices 12 Function 6, Offsets 0x80 to 0xcc */ 56 static const u32 sbridge_dram_rule[] = { 57 0x80, 0x88, 0x90, 0x98, 0xa0, 58 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, 59 }; 60 61 static const u32 ibridge_dram_rule[] = { 62 0x60, 0x68, 0x70, 0x78, 0x80, 63 0x88, 0x90, 0x98, 0xa0, 0xa8, 64 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 65 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, 66 }; 67 68 #define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff) 69 #define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3) 70 #define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1) 71 #define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0) 72 #define A7MODE(reg) GET_BITFIELD(reg, 26, 26) 73 74 static char *get_dram_attr(u32 reg) 75 { 76 switch(DRAM_ATTR(reg)) { 77 case 0: 78 return "DRAM"; 79 case 1: 80 return "MMCFG"; 81 case 2: 82 return "NXM"; 83 default: 84 return "unknown"; 85 } 86 } 87 88 static const u32 sbridge_interleave_list[] = { 89 0x84, 0x8c, 0x94, 0x9c, 0xa4, 90 0xac, 0xb4, 0xbc, 0xc4, 0xcc, 91 }; 92 93 static const u32 ibridge_interleave_list[] = { 94 0x64, 0x6c, 0x74, 0x7c, 0x84, 95 0x8c, 0x94, 0x9c, 0xa4, 0xac, 96 0xb4, 0xbc, 0xc4, 0xcc, 0xd4, 97 0xdc, 0xe4, 0xec, 0xf4, 0xfc, 98 }; 99 100 struct interleave_pkg { 101 unsigned char start; 102 unsigned char end; 103 }; 104 105 static const struct interleave_pkg sbridge_interleave_pkg[] = { 106 { 0, 2 }, 107 { 3, 5 }, 108 { 8, 10 }, 109 { 11, 13 }, 110 { 16, 18 }, 111 { 19, 21 }, 112 { 24, 26 }, 113 { 27, 29 }, 114 }; 115 116 static const struct interleave_pkg ibridge_interleave_pkg[] = { 117 { 0, 3 }, 118 { 4, 7 }, 119 { 8, 11 }, 120 { 12, 15 }, 121 { 16, 19 }, 122 { 20, 23 }, 123 { 24, 27 }, 124 { 28, 31 }, 125 }; 126 127 static inline int sad_pkg(const struct interleave_pkg *table, u32 reg, 128 int interleave) 129 { 130 return GET_BITFIELD(reg, table[interleave].start, 131 table[interleave].end); 132 } 133 134 /* Devices 12 Function 7 */ 135 136 #define TOLM 0x80 137 #define TOHM 0x84 138 #define HASWELL_TOLM 0xd0 139 #define HASWELL_TOHM_0 0xd4 140 #define HASWELL_TOHM_1 0xd8 141 142 #define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff) 143 #define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff) 144 145 /* Device 13 Function 6 */ 146 147 #define SAD_TARGET 0xf0 148 149 #define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11) 150 151 #define SAD_CONTROL 0xf4 152 153 /* Device 14 function 0 */ 154 155 static const u32 tad_dram_rule[] = { 156 0x40, 0x44, 0x48, 0x4c, 157 0x50, 0x54, 0x58, 0x5c, 158 0x60, 0x64, 0x68, 0x6c, 159 }; 160 #define MAX_TAD ARRAY_SIZE(tad_dram_rule) 161 162 #define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff) 163 #define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11) 164 #define TAD_CH(reg) GET_BITFIELD(reg, 8, 9) 165 #define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7) 166 #define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5) 167 #define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3) 168 #define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1) 169 170 /* Device 15, function 0 */ 171 172 #define MCMTR 0x7c 173 174 #define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2) 175 #define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1) 176 #define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0) 177 178 /* Device 15, function 1 */ 179 180 #define RASENABLES 0xac 181 #define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0) 182 183 /* Device 15, functions 2-5 */ 184 185 static const int mtr_regs[] = { 186 0x80, 0x84, 0x88, 187 }; 188 189 #define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19) 190 #define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14) 191 #define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13) 192 #define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4) 193 #define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1) 194 195 static const u32 tad_ch_nilv_offset[] = { 196 0x90, 0x94, 0x98, 0x9c, 197 0xa0, 0xa4, 0xa8, 0xac, 198 0xb0, 0xb4, 0xb8, 0xbc, 199 }; 200 #define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29) 201 #define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26) 202 203 static const u32 rir_way_limit[] = { 204 0x108, 0x10c, 0x110, 0x114, 0x118, 205 }; 206 #define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit) 207 208 #define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31) 209 #define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29) 210 211 #define MAX_RIR_WAY 8 212 213 static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = { 214 { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c }, 215 { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c }, 216 { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c }, 217 { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c }, 218 { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc }, 219 }; 220 221 #define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19) 222 #define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14) 223 224 /* Device 16, functions 2-7 */ 225 226 /* 227 * FIXME: Implement the error count reads directly 228 */ 229 230 static const u32 correrrcnt[] = { 231 0x104, 0x108, 0x10c, 0x110, 232 }; 233 234 #define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31) 235 #define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30) 236 #define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15) 237 #define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14) 238 239 static const u32 correrrthrsld[] = { 240 0x11c, 0x120, 0x124, 0x128, 241 }; 242 243 #define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30) 244 #define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14) 245 246 247 /* Device 17, function 0 */ 248 249 #define SB_RANK_CFG_A 0x0328 250 251 #define IB_RANK_CFG_A 0x0320 252 253 /* 254 * sbridge structs 255 */ 256 257 #define NUM_CHANNELS 8 /* 2MC per socket, four chan per MC */ 258 #define MAX_DIMMS 3 /* Max DIMMS per channel */ 259 #define CHANNEL_UNSPECIFIED 0xf /* Intel IA32 SDM 15-14 */ 260 261 enum type { 262 SANDY_BRIDGE, 263 IVY_BRIDGE, 264 HASWELL, 265 BROADWELL, 266 }; 267 268 struct sbridge_pvt; 269 struct sbridge_info { 270 enum type type; 271 u32 mcmtr; 272 u32 rankcfgr; 273 u64 (*get_tolm)(struct sbridge_pvt *pvt); 274 u64 (*get_tohm)(struct sbridge_pvt *pvt); 275 u64 (*rir_limit)(u32 reg); 276 const u32 *dram_rule; 277 const u32 *interleave_list; 278 const struct interleave_pkg *interleave_pkg; 279 u8 max_sad; 280 u8 max_interleave; 281 u8 (*get_node_id)(struct sbridge_pvt *pvt); 282 enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt); 283 enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr); 284 struct pci_dev *pci_vtd; 285 }; 286 287 struct sbridge_channel { 288 u32 ranks; 289 u32 dimms; 290 }; 291 292 struct pci_id_descr { 293 int dev_id; 294 int optional; 295 }; 296 297 struct pci_id_table { 298 const struct pci_id_descr *descr; 299 int n_devs; 300 }; 301 302 struct sbridge_dev { 303 struct list_head list; 304 u8 bus, mc; 305 u8 node_id, source_id; 306 struct pci_dev **pdev; 307 int n_devs; 308 struct mem_ctl_info *mci; 309 }; 310 311 struct sbridge_pvt { 312 struct pci_dev *pci_ta, *pci_ddrio, *pci_ras; 313 struct pci_dev *pci_sad0, *pci_sad1; 314 struct pci_dev *pci_ha0, *pci_ha1; 315 struct pci_dev *pci_br0, *pci_br1; 316 struct pci_dev *pci_ha1_ta; 317 struct pci_dev *pci_tad[NUM_CHANNELS]; 318 319 struct sbridge_dev *sbridge_dev; 320 321 struct sbridge_info info; 322 struct sbridge_channel channel[NUM_CHANNELS]; 323 324 /* Memory type detection */ 325 bool is_mirrored, is_lockstep, is_close_pg; 326 327 /* Fifo double buffers */ 328 struct mce mce_entry[MCE_LOG_LEN]; 329 struct mce mce_outentry[MCE_LOG_LEN]; 330 331 /* Fifo in/out counters */ 332 unsigned mce_in, mce_out; 333 334 /* Count indicator to show errors not got */ 335 unsigned mce_overrun; 336 337 /* Memory description */ 338 u64 tolm, tohm; 339 }; 340 341 #define PCI_DESCR(device_id, opt) \ 342 .dev_id = (device_id), \ 343 .optional = opt 344 345 static const struct pci_id_descr pci_dev_descr_sbridge[] = { 346 /* Processor Home Agent */ 347 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) }, 348 349 /* Memory controller */ 350 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) }, 351 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) }, 352 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) }, 353 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) }, 354 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) }, 355 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) }, 356 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) }, 357 358 /* System Address Decoder */ 359 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) }, 360 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) }, 361 362 /* Broadcast Registers */ 363 { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) }, 364 }; 365 366 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } 367 static const struct pci_id_table pci_dev_descr_sbridge_table[] = { 368 PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge), 369 {0,} /* 0 terminated list. */ 370 }; 371 372 /* This changes depending if 1HA or 2HA: 373 * 1HA: 374 * 0x0eb8 (17.0) is DDRIO0 375 * 2HA: 376 * 0x0ebc (17.4) is DDRIO0 377 */ 378 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0 0x0eb8 379 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0 0x0ebc 380 381 /* pci ids */ 382 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0 0x0ea0 383 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA 0x0ea8 384 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS 0x0e71 385 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0 0x0eaa 386 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1 0x0eab 387 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2 0x0eac 388 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3 0x0ead 389 #define PCI_DEVICE_ID_INTEL_IBRIDGE_SAD 0x0ec8 390 #define PCI_DEVICE_ID_INTEL_IBRIDGE_BR0 0x0ec9 391 #define PCI_DEVICE_ID_INTEL_IBRIDGE_BR1 0x0eca 392 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1 0x0e60 393 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA 0x0e68 394 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS 0x0e79 395 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 0x0e6a 396 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1 0x0e6b 397 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2 0x0e6c 398 #define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3 0x0e6d 399 400 static const struct pci_id_descr pci_dev_descr_ibridge[] = { 401 /* Processor Home Agent */ 402 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0) }, 403 404 /* Memory controller */ 405 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0) }, 406 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0) }, 407 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0) }, 408 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0) }, 409 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0) }, 410 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0) }, 411 412 /* System Address Decoder */ 413 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0) }, 414 415 /* Broadcast Registers */ 416 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1) }, 417 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0) }, 418 419 /* Optional, mode 2HA */ 420 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1) }, 421 #if 0 422 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1) }, 423 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1) }, 424 #endif 425 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1) }, 426 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1) }, 427 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2, 1) }, 428 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3, 1) }, 429 430 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1) }, 431 { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1) }, 432 }; 433 434 static const struct pci_id_table pci_dev_descr_ibridge_table[] = { 435 PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge), 436 {0,} /* 0 terminated list. */ 437 }; 438 439 /* Haswell support */ 440 /* EN processor: 441 * - 1 IMC 442 * - 3 DDR3 channels, 2 DPC per channel 443 * EP processor: 444 * - 1 or 2 IMC 445 * - 4 DDR4 channels, 3 DPC per channel 446 * EP 4S processor: 447 * - 2 IMC 448 * - 4 DDR4 channels, 3 DPC per channel 449 * EX processor: 450 * - 2 IMC 451 * - each IMC interfaces with a SMI 2 channel 452 * - each SMI channel interfaces with a scalable memory buffer 453 * - each scalable memory buffer supports 4 DDR3/DDR4 channels, 3 DPC 454 */ 455 #define HASWELL_DDRCRCLKCONTROLS 0xa10 /* Ditto on Broadwell */ 456 #define HASWELL_HASYSDEFEATURE2 0x84 457 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_VTD_MISC 0x2f28 458 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0 0x2fa0 459 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1 0x2f60 460 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA 0x2fa8 461 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL 0x2f71 462 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA 0x2f68 463 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL 0x2f79 464 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0 0x2ffc 465 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1 0x2ffd 466 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0 0x2faa 467 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1 0x2fab 468 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2 0x2fac 469 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3 0x2fad 470 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0 0x2f6a 471 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1 0x2f6b 472 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2 0x2f6c 473 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3 0x2f6d 474 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0 0x2fbd 475 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1 0x2fbf 476 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2 0x2fb9 477 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3 0x2fbb 478 static const struct pci_id_descr pci_dev_descr_haswell[] = { 479 /* first item must be the HA */ 480 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0, 0) }, 481 482 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0) }, 483 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0) }, 484 485 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1, 1) }, 486 487 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA, 0) }, 488 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL, 0) }, 489 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0) }, 490 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0) }, 491 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1) }, 492 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1) }, 493 494 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0, 1) }, 495 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1, 1) }, 496 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2, 1) }, 497 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3, 1) }, 498 499 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA, 1) }, 500 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL, 1) }, 501 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1) }, 502 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1) }, 503 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1) }, 504 { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1) }, 505 }; 506 507 static const struct pci_id_table pci_dev_descr_haswell_table[] = { 508 PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell), 509 {0,} /* 0 terminated list. */ 510 }; 511 512 /* 513 * Broadwell support 514 * 515 * DE processor: 516 * - 1 IMC 517 * - 2 DDR3 channels, 2 DPC per channel 518 * EP processor: 519 * - 1 or 2 IMC 520 * - 4 DDR4 channels, 3 DPC per channel 521 * EP 4S processor: 522 * - 2 IMC 523 * - 4 DDR4 channels, 3 DPC per channel 524 * EX processor: 525 * - 2 IMC 526 * - each IMC interfaces with a SMI 2 channel 527 * - each SMI channel interfaces with a scalable memory buffer 528 * - each scalable memory buffer supports 4 DDR3/DDR4 channels, 3 DPC 529 */ 530 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_VTD_MISC 0x6f28 531 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0 0x6fa0 532 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1 0x6f60 533 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA 0x6fa8 534 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL 0x6f71 535 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA 0x6f68 536 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL 0x6f79 537 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0 0x6ffc 538 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1 0x6ffd 539 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0 0x6faa 540 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1 0x6fab 541 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2 0x6fac 542 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3 0x6fad 543 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0 0x6f6a 544 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1 0x6f6b 545 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2 0x6f6c 546 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3 0x6f6d 547 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0 0x6faf 548 549 static const struct pci_id_descr pci_dev_descr_broadwell[] = { 550 /* first item must be the HA */ 551 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0, 0) }, 552 553 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0) }, 554 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0) }, 555 556 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1, 1) }, 557 558 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA, 0) }, 559 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL, 0) }, 560 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0) }, 561 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0) }, 562 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1) }, 563 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1) }, 564 565 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0, 1) }, 566 567 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA, 1) }, 568 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL, 1) }, 569 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1) }, 570 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1) }, 571 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1) }, 572 { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1) }, 573 }; 574 575 static const struct pci_id_table pci_dev_descr_broadwell_table[] = { 576 PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell), 577 {0,} /* 0 terminated list. */ 578 }; 579 580 /* 581 * pci_device_id table for which devices we are looking for 582 */ 583 static const struct pci_device_id sbridge_pci_tbl[] = { 584 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0)}, 585 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)}, 586 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0)}, 587 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0)}, 588 {0,} /* 0 terminated list. */ 589 }; 590 591 592 /**************************************************************************** 593 Ancillary status routines 594 ****************************************************************************/ 595 596 static inline int numrank(enum type type, u32 mtr) 597 { 598 int ranks = (1 << RANK_CNT_BITS(mtr)); 599 int max = 4; 600 601 if (type == HASWELL || type == BROADWELL) 602 max = 8; 603 604 if (ranks > max) { 605 edac_dbg(0, "Invalid number of ranks: %d (max = %i) raw value = %x (%04x)\n", 606 ranks, max, (unsigned int)RANK_CNT_BITS(mtr), mtr); 607 return -EINVAL; 608 } 609 610 return ranks; 611 } 612 613 static inline int numrow(u32 mtr) 614 { 615 int rows = (RANK_WIDTH_BITS(mtr) + 12); 616 617 if (rows < 13 || rows > 18) { 618 edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n", 619 rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr); 620 return -EINVAL; 621 } 622 623 return 1 << rows; 624 } 625 626 static inline int numcol(u32 mtr) 627 { 628 int cols = (COL_WIDTH_BITS(mtr) + 10); 629 630 if (cols > 12) { 631 edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n", 632 cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr); 633 return -EINVAL; 634 } 635 636 return 1 << cols; 637 } 638 639 static struct sbridge_dev *get_sbridge_dev(u8 bus) 640 { 641 struct sbridge_dev *sbridge_dev; 642 643 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { 644 if (sbridge_dev->bus == bus) 645 return sbridge_dev; 646 } 647 648 return NULL; 649 } 650 651 static struct sbridge_dev *alloc_sbridge_dev(u8 bus, 652 const struct pci_id_table *table) 653 { 654 struct sbridge_dev *sbridge_dev; 655 656 sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL); 657 if (!sbridge_dev) 658 return NULL; 659 660 sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs, 661 GFP_KERNEL); 662 if (!sbridge_dev->pdev) { 663 kfree(sbridge_dev); 664 return NULL; 665 } 666 667 sbridge_dev->bus = bus; 668 sbridge_dev->n_devs = table->n_devs; 669 list_add_tail(&sbridge_dev->list, &sbridge_edac_list); 670 671 return sbridge_dev; 672 } 673 674 static void free_sbridge_dev(struct sbridge_dev *sbridge_dev) 675 { 676 list_del(&sbridge_dev->list); 677 kfree(sbridge_dev->pdev); 678 kfree(sbridge_dev); 679 } 680 681 static u64 sbridge_get_tolm(struct sbridge_pvt *pvt) 682 { 683 u32 reg; 684 685 /* Address range is 32:28 */ 686 pci_read_config_dword(pvt->pci_sad1, TOLM, ®); 687 return GET_TOLM(reg); 688 } 689 690 static u64 sbridge_get_tohm(struct sbridge_pvt *pvt) 691 { 692 u32 reg; 693 694 pci_read_config_dword(pvt->pci_sad1, TOHM, ®); 695 return GET_TOHM(reg); 696 } 697 698 static u64 ibridge_get_tolm(struct sbridge_pvt *pvt) 699 { 700 u32 reg; 701 702 pci_read_config_dword(pvt->pci_br1, TOLM, ®); 703 704 return GET_TOLM(reg); 705 } 706 707 static u64 ibridge_get_tohm(struct sbridge_pvt *pvt) 708 { 709 u32 reg; 710 711 pci_read_config_dword(pvt->pci_br1, TOHM, ®); 712 713 return GET_TOHM(reg); 714 } 715 716 static u64 rir_limit(u32 reg) 717 { 718 return ((u64)GET_BITFIELD(reg, 1, 10) << 29) | 0x1fffffff; 719 } 720 721 static enum mem_type get_memory_type(struct sbridge_pvt *pvt) 722 { 723 u32 reg; 724 enum mem_type mtype; 725 726 if (pvt->pci_ddrio) { 727 pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr, 728 ®); 729 if (GET_BITFIELD(reg, 11, 11)) 730 /* FIXME: Can also be LRDIMM */ 731 mtype = MEM_RDDR3; 732 else 733 mtype = MEM_DDR3; 734 } else 735 mtype = MEM_UNKNOWN; 736 737 return mtype; 738 } 739 740 static enum mem_type haswell_get_memory_type(struct sbridge_pvt *pvt) 741 { 742 u32 reg; 743 bool registered = false; 744 enum mem_type mtype = MEM_UNKNOWN; 745 746 if (!pvt->pci_ddrio) 747 goto out; 748 749 pci_read_config_dword(pvt->pci_ddrio, 750 HASWELL_DDRCRCLKCONTROLS, ®); 751 /* Is_Rdimm */ 752 if (GET_BITFIELD(reg, 16, 16)) 753 registered = true; 754 755 pci_read_config_dword(pvt->pci_ta, MCMTR, ®); 756 if (GET_BITFIELD(reg, 14, 14)) { 757 if (registered) 758 mtype = MEM_RDDR4; 759 else 760 mtype = MEM_DDR4; 761 } else { 762 if (registered) 763 mtype = MEM_RDDR3; 764 else 765 mtype = MEM_DDR3; 766 } 767 768 out: 769 return mtype; 770 } 771 772 static enum dev_type sbridge_get_width(struct sbridge_pvt *pvt, u32 mtr) 773 { 774 /* there's no way to figure out */ 775 return DEV_UNKNOWN; 776 } 777 778 static enum dev_type __ibridge_get_width(u32 mtr) 779 { 780 enum dev_type type; 781 782 switch (mtr) { 783 case 3: 784 type = DEV_UNKNOWN; 785 break; 786 case 2: 787 type = DEV_X16; 788 break; 789 case 1: 790 type = DEV_X8; 791 break; 792 case 0: 793 type = DEV_X4; 794 break; 795 } 796 797 return type; 798 } 799 800 static enum dev_type ibridge_get_width(struct sbridge_pvt *pvt, u32 mtr) 801 { 802 /* 803 * ddr3_width on the documentation but also valid for DDR4 on 804 * Haswell 805 */ 806 return __ibridge_get_width(GET_BITFIELD(mtr, 7, 8)); 807 } 808 809 static enum dev_type broadwell_get_width(struct sbridge_pvt *pvt, u32 mtr) 810 { 811 /* ddr3_width on the documentation but also valid for DDR4 */ 812 return __ibridge_get_width(GET_BITFIELD(mtr, 8, 9)); 813 } 814 815 static u8 get_node_id(struct sbridge_pvt *pvt) 816 { 817 u32 reg; 818 pci_read_config_dword(pvt->pci_br0, SAD_CONTROL, ®); 819 return GET_BITFIELD(reg, 0, 2); 820 } 821 822 static u8 haswell_get_node_id(struct sbridge_pvt *pvt) 823 { 824 u32 reg; 825 826 pci_read_config_dword(pvt->pci_sad1, SAD_CONTROL, ®); 827 return GET_BITFIELD(reg, 0, 3); 828 } 829 830 static u64 haswell_get_tolm(struct sbridge_pvt *pvt) 831 { 832 u32 reg; 833 834 pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOLM, ®); 835 return (GET_BITFIELD(reg, 26, 31) << 26) | 0x3ffffff; 836 } 837 838 static u64 haswell_get_tohm(struct sbridge_pvt *pvt) 839 { 840 u64 rc; 841 u32 reg; 842 843 pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOHM_0, ®); 844 rc = GET_BITFIELD(reg, 26, 31); 845 pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOHM_1, ®); 846 rc = ((reg << 6) | rc) << 26; 847 848 return rc | 0x1ffffff; 849 } 850 851 static u64 haswell_rir_limit(u32 reg) 852 { 853 return (((u64)GET_BITFIELD(reg, 1, 11) + 1) << 29) - 1; 854 } 855 856 static inline u8 sad_pkg_socket(u8 pkg) 857 { 858 /* on Ivy Bridge, nodeID is SASS, where A is HA and S is node id */ 859 return ((pkg >> 3) << 2) | (pkg & 0x3); 860 } 861 862 static inline u8 sad_pkg_ha(u8 pkg) 863 { 864 return (pkg >> 2) & 0x1; 865 } 866 867 /**************************************************************************** 868 Memory check routines 869 ****************************************************************************/ 870 static struct pci_dev *get_pdev_same_bus(u8 bus, u32 id) 871 { 872 struct pci_dev *pdev = NULL; 873 874 do { 875 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, pdev); 876 if (pdev && pdev->bus->number == bus) 877 break; 878 } while (pdev); 879 880 return pdev; 881 } 882 883 /** 884 * check_if_ecc_is_active() - Checks if ECC is active 885 * @bus: Device bus 886 * @type: Memory controller type 887 * returns: 0 in case ECC is active, -ENODEV if it can't be determined or 888 * disabled 889 */ 890 static int check_if_ecc_is_active(const u8 bus, enum type type) 891 { 892 struct pci_dev *pdev = NULL; 893 u32 mcmtr, id; 894 895 switch (type) { 896 case IVY_BRIDGE: 897 id = PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA; 898 break; 899 case HASWELL: 900 id = PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA; 901 break; 902 case SANDY_BRIDGE: 903 id = PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA; 904 break; 905 case BROADWELL: 906 id = PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA; 907 break; 908 default: 909 return -ENODEV; 910 } 911 912 pdev = get_pdev_same_bus(bus, id); 913 if (!pdev) { 914 sbridge_printk(KERN_ERR, "Couldn't find PCI device " 915 "%04x:%04x! on bus %02d\n", 916 PCI_VENDOR_ID_INTEL, id, bus); 917 return -ENODEV; 918 } 919 920 pci_read_config_dword(pdev, MCMTR, &mcmtr); 921 if (!IS_ECC_ENABLED(mcmtr)) { 922 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); 923 return -ENODEV; 924 } 925 return 0; 926 } 927 928 static int get_dimm_config(struct mem_ctl_info *mci) 929 { 930 struct sbridge_pvt *pvt = mci->pvt_info; 931 struct dimm_info *dimm; 932 unsigned i, j, banks, ranks, rows, cols, npages; 933 u64 size; 934 u32 reg; 935 enum edac_type mode; 936 enum mem_type mtype; 937 938 if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) 939 pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, ®); 940 else 941 pci_read_config_dword(pvt->pci_br0, SAD_TARGET, ®); 942 943 pvt->sbridge_dev->source_id = SOURCE_ID(reg); 944 945 pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt); 946 edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n", 947 pvt->sbridge_dev->mc, 948 pvt->sbridge_dev->node_id, 949 pvt->sbridge_dev->source_id); 950 951 pci_read_config_dword(pvt->pci_ras, RASENABLES, ®); 952 if (IS_MIRROR_ENABLED(reg)) { 953 edac_dbg(0, "Memory mirror is enabled\n"); 954 pvt->is_mirrored = true; 955 } else { 956 edac_dbg(0, "Memory mirror is disabled\n"); 957 pvt->is_mirrored = false; 958 } 959 960 pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr); 961 if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) { 962 edac_dbg(0, "Lockstep is enabled\n"); 963 mode = EDAC_S8ECD8ED; 964 pvt->is_lockstep = true; 965 } else { 966 edac_dbg(0, "Lockstep is disabled\n"); 967 mode = EDAC_S4ECD4ED; 968 pvt->is_lockstep = false; 969 } 970 if (IS_CLOSE_PG(pvt->info.mcmtr)) { 971 edac_dbg(0, "address map is on closed page mode\n"); 972 pvt->is_close_pg = true; 973 } else { 974 edac_dbg(0, "address map is on open page mode\n"); 975 pvt->is_close_pg = false; 976 } 977 978 mtype = pvt->info.get_memory_type(pvt); 979 if (mtype == MEM_RDDR3 || mtype == MEM_RDDR4) 980 edac_dbg(0, "Memory is registered\n"); 981 else if (mtype == MEM_UNKNOWN) 982 edac_dbg(0, "Cannot determine memory type\n"); 983 else 984 edac_dbg(0, "Memory is unregistered\n"); 985 986 if (mtype == MEM_DDR4 || mtype == MEM_RDDR4) 987 banks = 16; 988 else 989 banks = 8; 990 991 for (i = 0; i < NUM_CHANNELS; i++) { 992 u32 mtr; 993 994 if (!pvt->pci_tad[i]) 995 continue; 996 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { 997 dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, 998 i, j, 0); 999 pci_read_config_dword(pvt->pci_tad[i], 1000 mtr_regs[j], &mtr); 1001 edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr); 1002 if (IS_DIMM_PRESENT(mtr)) { 1003 pvt->channel[i].dimms++; 1004 1005 ranks = numrank(pvt->info.type, mtr); 1006 rows = numrow(mtr); 1007 cols = numcol(mtr); 1008 1009 size = ((u64)rows * cols * banks * ranks) >> (20 - 3); 1010 npages = MiB_TO_PAGES(size); 1011 1012 edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", 1013 pvt->sbridge_dev->mc, i/4, i%4, j, 1014 size, npages, 1015 banks, ranks, rows, cols); 1016 1017 dimm->nr_pages = npages; 1018 dimm->grain = 32; 1019 dimm->dtype = pvt->info.get_width(pvt, mtr); 1020 dimm->mtype = mtype; 1021 dimm->edac_mode = mode; 1022 snprintf(dimm->label, sizeof(dimm->label), 1023 "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u", 1024 pvt->sbridge_dev->source_id, i/4, i%4, j); 1025 } 1026 } 1027 } 1028 1029 return 0; 1030 } 1031 1032 static void get_memory_layout(const struct mem_ctl_info *mci) 1033 { 1034 struct sbridge_pvt *pvt = mci->pvt_info; 1035 int i, j, k, n_sads, n_tads, sad_interl; 1036 u32 reg; 1037 u64 limit, prv = 0; 1038 u64 tmp_mb; 1039 u32 gb, mb; 1040 u32 rir_way; 1041 1042 /* 1043 * Step 1) Get TOLM/TOHM ranges 1044 */ 1045 1046 pvt->tolm = pvt->info.get_tolm(pvt); 1047 tmp_mb = (1 + pvt->tolm) >> 20; 1048 1049 gb = div_u64_rem(tmp_mb, 1024, &mb); 1050 edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", 1051 gb, (mb*1000)/1024, (u64)pvt->tolm); 1052 1053 /* Address range is already 45:25 */ 1054 pvt->tohm = pvt->info.get_tohm(pvt); 1055 tmp_mb = (1 + pvt->tohm) >> 20; 1056 1057 gb = div_u64_rem(tmp_mb, 1024, &mb); 1058 edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", 1059 gb, (mb*1000)/1024, (u64)pvt->tohm); 1060 1061 /* 1062 * Step 2) Get SAD range and SAD Interleave list 1063 * TAD registers contain the interleave wayness. However, it 1064 * seems simpler to just discover it indirectly, with the 1065 * algorithm bellow. 1066 */ 1067 prv = 0; 1068 for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) { 1069 /* SAD_LIMIT Address range is 45:26 */ 1070 pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], 1071 ®); 1072 limit = SAD_LIMIT(reg); 1073 1074 if (!DRAM_RULE_ENABLE(reg)) 1075 continue; 1076 1077 if (limit <= prv) 1078 break; 1079 1080 tmp_mb = (limit + 1) >> 20; 1081 gb = div_u64_rem(tmp_mb, 1024, &mb); 1082 edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n", 1083 n_sads, 1084 get_dram_attr(reg), 1085 gb, (mb*1000)/1024, 1086 ((u64)tmp_mb) << 20L, 1087 INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]", 1088 reg); 1089 prv = limit; 1090 1091 pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], 1092 ®); 1093 sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0); 1094 for (j = 0; j < 8; j++) { 1095 u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, j); 1096 if (j > 0 && sad_interl == pkg) 1097 break; 1098 1099 edac_dbg(0, "SAD#%d, interleave #%d: %d\n", 1100 n_sads, j, pkg); 1101 } 1102 } 1103 1104 /* 1105 * Step 3) Get TAD range 1106 */ 1107 prv = 0; 1108 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { 1109 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads], 1110 ®); 1111 limit = TAD_LIMIT(reg); 1112 if (limit <= prv) 1113 break; 1114 tmp_mb = (limit + 1) >> 20; 1115 1116 gb = div_u64_rem(tmp_mb, 1024, &mb); 1117 edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", 1118 n_tads, gb, (mb*1000)/1024, 1119 ((u64)tmp_mb) << 20L, 1120 (u32)TAD_SOCK(reg), 1121 (u32)TAD_CH(reg), 1122 (u32)TAD_TGT0(reg), 1123 (u32)TAD_TGT1(reg), 1124 (u32)TAD_TGT2(reg), 1125 (u32)TAD_TGT3(reg), 1126 reg); 1127 prv = limit; 1128 } 1129 1130 /* 1131 * Step 4) Get TAD offsets, per each channel 1132 */ 1133 for (i = 0; i < NUM_CHANNELS; i++) { 1134 if (!pvt->channel[i].dimms) 1135 continue; 1136 for (j = 0; j < n_tads; j++) { 1137 pci_read_config_dword(pvt->pci_tad[i], 1138 tad_ch_nilv_offset[j], 1139 ®); 1140 tmp_mb = TAD_OFFSET(reg) >> 20; 1141 gb = div_u64_rem(tmp_mb, 1024, &mb); 1142 edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n", 1143 i, j, 1144 gb, (mb*1000)/1024, 1145 ((u64)tmp_mb) << 20L, 1146 reg); 1147 } 1148 } 1149 1150 /* 1151 * Step 6) Get RIR Wayness/Limit, per each channel 1152 */ 1153 for (i = 0; i < NUM_CHANNELS; i++) { 1154 if (!pvt->channel[i].dimms) 1155 continue; 1156 for (j = 0; j < MAX_RIR_RANGES; j++) { 1157 pci_read_config_dword(pvt->pci_tad[i], 1158 rir_way_limit[j], 1159 ®); 1160 1161 if (!IS_RIR_VALID(reg)) 1162 continue; 1163 1164 tmp_mb = pvt->info.rir_limit(reg) >> 20; 1165 rir_way = 1 << RIR_WAY(reg); 1166 gb = div_u64_rem(tmp_mb, 1024, &mb); 1167 edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n", 1168 i, j, 1169 gb, (mb*1000)/1024, 1170 ((u64)tmp_mb) << 20L, 1171 rir_way, 1172 reg); 1173 1174 for (k = 0; k < rir_way; k++) { 1175 pci_read_config_dword(pvt->pci_tad[i], 1176 rir_offset[j][k], 1177 ®); 1178 tmp_mb = RIR_OFFSET(reg) << 6; 1179 1180 gb = div_u64_rem(tmp_mb, 1024, &mb); 1181 edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", 1182 i, j, k, 1183 gb, (mb*1000)/1024, 1184 ((u64)tmp_mb) << 20L, 1185 (u32)RIR_RNK_TGT(reg), 1186 reg); 1187 } 1188 } 1189 } 1190 } 1191 1192 static struct mem_ctl_info *get_mci_for_node_id(u8 node_id) 1193 { 1194 struct sbridge_dev *sbridge_dev; 1195 1196 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { 1197 if (sbridge_dev->node_id == node_id) 1198 return sbridge_dev->mci; 1199 } 1200 return NULL; 1201 } 1202 1203 static int get_memory_error_data(struct mem_ctl_info *mci, 1204 u64 addr, 1205 u8 *socket, u8 *ha, 1206 long *channel_mask, 1207 u8 *rank, 1208 char **area_type, char *msg) 1209 { 1210 struct mem_ctl_info *new_mci; 1211 struct sbridge_pvt *pvt = mci->pvt_info; 1212 struct pci_dev *pci_ha; 1213 int n_rir, n_sads, n_tads, sad_way, sck_xch; 1214 int sad_interl, idx, base_ch; 1215 int interleave_mode, shiftup = 0; 1216 unsigned sad_interleave[pvt->info.max_interleave]; 1217 u32 reg, dram_rule; 1218 u8 ch_way, sck_way, pkg, sad_ha = 0, ch_add = 0; 1219 u32 tad_offset; 1220 u32 rir_way; 1221 u32 mb, gb; 1222 u64 ch_addr, offset, limit = 0, prv = 0; 1223 1224 1225 /* 1226 * Step 0) Check if the address is at special memory ranges 1227 * The check bellow is probably enough to fill all cases where 1228 * the error is not inside a memory, except for the legacy 1229 * range (e. g. VGA addresses). It is unlikely, however, that the 1230 * memory controller would generate an error on that range. 1231 */ 1232 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) { 1233 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); 1234 return -EINVAL; 1235 } 1236 if (addr >= (u64)pvt->tohm) { 1237 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr); 1238 return -EINVAL; 1239 } 1240 1241 /* 1242 * Step 1) Get socket 1243 */ 1244 for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) { 1245 pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], 1246 ®); 1247 1248 if (!DRAM_RULE_ENABLE(reg)) 1249 continue; 1250 1251 limit = SAD_LIMIT(reg); 1252 if (limit <= prv) { 1253 sprintf(msg, "Can't discover the memory socket"); 1254 return -EINVAL; 1255 } 1256 if (addr <= limit) 1257 break; 1258 prv = limit; 1259 } 1260 if (n_sads == pvt->info.max_sad) { 1261 sprintf(msg, "Can't discover the memory socket"); 1262 return -EINVAL; 1263 } 1264 dram_rule = reg; 1265 *area_type = get_dram_attr(dram_rule); 1266 interleave_mode = INTERLEAVE_MODE(dram_rule); 1267 1268 pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], 1269 ®); 1270 1271 if (pvt->info.type == SANDY_BRIDGE) { 1272 sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0); 1273 for (sad_way = 0; sad_way < 8; sad_way++) { 1274 u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, sad_way); 1275 if (sad_way > 0 && sad_interl == pkg) 1276 break; 1277 sad_interleave[sad_way] = pkg; 1278 edac_dbg(0, "SAD interleave #%d: %d\n", 1279 sad_way, sad_interleave[sad_way]); 1280 } 1281 edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", 1282 pvt->sbridge_dev->mc, 1283 n_sads, 1284 addr, 1285 limit, 1286 sad_way + 7, 1287 !interleave_mode ? "" : "XOR[18:16]"); 1288 if (interleave_mode) 1289 idx = ((addr >> 6) ^ (addr >> 16)) & 7; 1290 else 1291 idx = (addr >> 6) & 7; 1292 switch (sad_way) { 1293 case 1: 1294 idx = 0; 1295 break; 1296 case 2: 1297 idx = idx & 1; 1298 break; 1299 case 4: 1300 idx = idx & 3; 1301 break; 1302 case 8: 1303 break; 1304 default: 1305 sprintf(msg, "Can't discover socket interleave"); 1306 return -EINVAL; 1307 } 1308 *socket = sad_interleave[idx]; 1309 edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n", 1310 idx, sad_way, *socket); 1311 } else if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) { 1312 int bits, a7mode = A7MODE(dram_rule); 1313 1314 if (a7mode) { 1315 /* A7 mode swaps P9 with P6 */ 1316 bits = GET_BITFIELD(addr, 7, 8) << 1; 1317 bits |= GET_BITFIELD(addr, 9, 9); 1318 } else 1319 bits = GET_BITFIELD(addr, 6, 8); 1320 1321 if (interleave_mode == 0) { 1322 /* interleave mode will XOR {8,7,6} with {18,17,16} */ 1323 idx = GET_BITFIELD(addr, 16, 18); 1324 idx ^= bits; 1325 } else 1326 idx = bits; 1327 1328 pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx); 1329 *socket = sad_pkg_socket(pkg); 1330 sad_ha = sad_pkg_ha(pkg); 1331 if (sad_ha) 1332 ch_add = 4; 1333 1334 if (a7mode) { 1335 /* MCChanShiftUpEnable */ 1336 pci_read_config_dword(pvt->pci_ha0, 1337 HASWELL_HASYSDEFEATURE2, ®); 1338 shiftup = GET_BITFIELD(reg, 22, 22); 1339 } 1340 1341 edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %i, shiftup: %i\n", 1342 idx, *socket, sad_ha, shiftup); 1343 } else { 1344 /* Ivy Bridge's SAD mode doesn't support XOR interleave mode */ 1345 idx = (addr >> 6) & 7; 1346 pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx); 1347 *socket = sad_pkg_socket(pkg); 1348 sad_ha = sad_pkg_ha(pkg); 1349 if (sad_ha) 1350 ch_add = 4; 1351 edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n", 1352 idx, *socket, sad_ha); 1353 } 1354 1355 *ha = sad_ha; 1356 1357 /* 1358 * Move to the proper node structure, in order to access the 1359 * right PCI registers 1360 */ 1361 new_mci = get_mci_for_node_id(*socket); 1362 if (!new_mci) { 1363 sprintf(msg, "Struct for socket #%u wasn't initialized", 1364 *socket); 1365 return -EINVAL; 1366 } 1367 mci = new_mci; 1368 pvt = mci->pvt_info; 1369 1370 /* 1371 * Step 2) Get memory channel 1372 */ 1373 prv = 0; 1374 if (pvt->info.type == SANDY_BRIDGE) 1375 pci_ha = pvt->pci_ha0; 1376 else { 1377 if (sad_ha) 1378 pci_ha = pvt->pci_ha1; 1379 else 1380 pci_ha = pvt->pci_ha0; 1381 } 1382 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { 1383 pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], ®); 1384 limit = TAD_LIMIT(reg); 1385 if (limit <= prv) { 1386 sprintf(msg, "Can't discover the memory channel"); 1387 return -EINVAL; 1388 } 1389 if (addr <= limit) 1390 break; 1391 prv = limit; 1392 } 1393 if (n_tads == MAX_TAD) { 1394 sprintf(msg, "Can't discover the memory channel"); 1395 return -EINVAL; 1396 } 1397 1398 ch_way = TAD_CH(reg) + 1; 1399 sck_way = TAD_SOCK(reg) + 1; 1400 1401 if (ch_way == 3) 1402 idx = addr >> 6; 1403 else 1404 idx = (addr >> (6 + sck_way + shiftup)) & 0x3; 1405 idx = idx % ch_way; 1406 1407 /* 1408 * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ??? 1409 */ 1410 switch (idx) { 1411 case 0: 1412 base_ch = TAD_TGT0(reg); 1413 break; 1414 case 1: 1415 base_ch = TAD_TGT1(reg); 1416 break; 1417 case 2: 1418 base_ch = TAD_TGT2(reg); 1419 break; 1420 case 3: 1421 base_ch = TAD_TGT3(reg); 1422 break; 1423 default: 1424 sprintf(msg, "Can't discover the TAD target"); 1425 return -EINVAL; 1426 } 1427 *channel_mask = 1 << base_ch; 1428 1429 pci_read_config_dword(pvt->pci_tad[ch_add + base_ch], 1430 tad_ch_nilv_offset[n_tads], 1431 &tad_offset); 1432 1433 if (pvt->is_mirrored) { 1434 *channel_mask |= 1 << ((base_ch + 2) % 4); 1435 switch(ch_way) { 1436 case 2: 1437 case 4: 1438 sck_xch = 1 << sck_way * (ch_way >> 1); 1439 break; 1440 default: 1441 sprintf(msg, "Invalid mirror set. Can't decode addr"); 1442 return -EINVAL; 1443 } 1444 } else 1445 sck_xch = (1 << sck_way) * ch_way; 1446 1447 if (pvt->is_lockstep) 1448 *channel_mask |= 1 << ((base_ch + 1) % 4); 1449 1450 offset = TAD_OFFSET(tad_offset); 1451 1452 edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n", 1453 n_tads, 1454 addr, 1455 limit, 1456 (u32)TAD_SOCK(reg), 1457 ch_way, 1458 offset, 1459 idx, 1460 base_ch, 1461 *channel_mask); 1462 1463 /* Calculate channel address */ 1464 /* Remove the TAD offset */ 1465 1466 if (offset > addr) { 1467 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!", 1468 offset, addr); 1469 return -EINVAL; 1470 } 1471 addr -= offset; 1472 /* Store the low bits [0:6] of the addr */ 1473 ch_addr = addr & 0x7f; 1474 /* Remove socket wayness and remove 6 bits */ 1475 addr >>= 6; 1476 addr = div_u64(addr, sck_xch); 1477 #if 0 1478 /* Divide by channel way */ 1479 addr = addr / ch_way; 1480 #endif 1481 /* Recover the last 6 bits */ 1482 ch_addr |= addr << 6; 1483 1484 /* 1485 * Step 3) Decode rank 1486 */ 1487 for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) { 1488 pci_read_config_dword(pvt->pci_tad[ch_add + base_ch], 1489 rir_way_limit[n_rir], 1490 ®); 1491 1492 if (!IS_RIR_VALID(reg)) 1493 continue; 1494 1495 limit = pvt->info.rir_limit(reg); 1496 gb = div_u64_rem(limit >> 20, 1024, &mb); 1497 edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n", 1498 n_rir, 1499 gb, (mb*1000)/1024, 1500 limit, 1501 1 << RIR_WAY(reg)); 1502 if (ch_addr <= limit) 1503 break; 1504 } 1505 if (n_rir == MAX_RIR_RANGES) { 1506 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx", 1507 ch_addr); 1508 return -EINVAL; 1509 } 1510 rir_way = RIR_WAY(reg); 1511 1512 if (pvt->is_close_pg) 1513 idx = (ch_addr >> 6); 1514 else 1515 idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */ 1516 idx %= 1 << rir_way; 1517 1518 pci_read_config_dword(pvt->pci_tad[ch_add + base_ch], 1519 rir_offset[n_rir][idx], 1520 ®); 1521 *rank = RIR_RNK_TGT(reg); 1522 1523 edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", 1524 n_rir, 1525 ch_addr, 1526 limit, 1527 rir_way, 1528 idx); 1529 1530 return 0; 1531 } 1532 1533 /**************************************************************************** 1534 Device initialization routines: put/get, init/exit 1535 ****************************************************************************/ 1536 1537 /* 1538 * sbridge_put_all_devices 'put' all the devices that we have 1539 * reserved via 'get' 1540 */ 1541 static void sbridge_put_devices(struct sbridge_dev *sbridge_dev) 1542 { 1543 int i; 1544 1545 edac_dbg(0, "\n"); 1546 for (i = 0; i < sbridge_dev->n_devs; i++) { 1547 struct pci_dev *pdev = sbridge_dev->pdev[i]; 1548 if (!pdev) 1549 continue; 1550 edac_dbg(0, "Removing dev %02x:%02x.%d\n", 1551 pdev->bus->number, 1552 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 1553 pci_dev_put(pdev); 1554 } 1555 } 1556 1557 static void sbridge_put_all_devices(void) 1558 { 1559 struct sbridge_dev *sbridge_dev, *tmp; 1560 1561 list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) { 1562 sbridge_put_devices(sbridge_dev); 1563 free_sbridge_dev(sbridge_dev); 1564 } 1565 } 1566 1567 static int sbridge_get_onedevice(struct pci_dev **prev, 1568 u8 *num_mc, 1569 const struct pci_id_table *table, 1570 const unsigned devno) 1571 { 1572 struct sbridge_dev *sbridge_dev; 1573 const struct pci_id_descr *dev_descr = &table->descr[devno]; 1574 struct pci_dev *pdev = NULL; 1575 u8 bus = 0; 1576 1577 sbridge_printk(KERN_DEBUG, 1578 "Seeking for: PCI ID %04x:%04x\n", 1579 PCI_VENDOR_ID_INTEL, dev_descr->dev_id); 1580 1581 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 1582 dev_descr->dev_id, *prev); 1583 1584 if (!pdev) { 1585 if (*prev) { 1586 *prev = pdev; 1587 return 0; 1588 } 1589 1590 if (dev_descr->optional) 1591 return 0; 1592 1593 /* if the HA wasn't found */ 1594 if (devno == 0) 1595 return -ENODEV; 1596 1597 sbridge_printk(KERN_INFO, 1598 "Device not found: %04x:%04x\n", 1599 PCI_VENDOR_ID_INTEL, dev_descr->dev_id); 1600 1601 /* End of list, leave */ 1602 return -ENODEV; 1603 } 1604 bus = pdev->bus->number; 1605 1606 sbridge_dev = get_sbridge_dev(bus); 1607 if (!sbridge_dev) { 1608 sbridge_dev = alloc_sbridge_dev(bus, table); 1609 if (!sbridge_dev) { 1610 pci_dev_put(pdev); 1611 return -ENOMEM; 1612 } 1613 (*num_mc)++; 1614 } 1615 1616 if (sbridge_dev->pdev[devno]) { 1617 sbridge_printk(KERN_ERR, 1618 "Duplicated device for %04x:%04x\n", 1619 PCI_VENDOR_ID_INTEL, dev_descr->dev_id); 1620 pci_dev_put(pdev); 1621 return -ENODEV; 1622 } 1623 1624 sbridge_dev->pdev[devno] = pdev; 1625 1626 /* Be sure that the device is enabled */ 1627 if (unlikely(pci_enable_device(pdev) < 0)) { 1628 sbridge_printk(KERN_ERR, 1629 "Couldn't enable %04x:%04x\n", 1630 PCI_VENDOR_ID_INTEL, dev_descr->dev_id); 1631 return -ENODEV; 1632 } 1633 1634 edac_dbg(0, "Detected %04x:%04x\n", 1635 PCI_VENDOR_ID_INTEL, dev_descr->dev_id); 1636 1637 /* 1638 * As stated on drivers/pci/search.c, the reference count for 1639 * @from is always decremented if it is not %NULL. So, as we need 1640 * to get all devices up to null, we need to do a get for the device 1641 */ 1642 pci_dev_get(pdev); 1643 1644 *prev = pdev; 1645 1646 return 0; 1647 } 1648 1649 /* 1650 * sbridge_get_all_devices - Find and perform 'get' operation on the MCH's 1651 * devices we want to reference for this driver. 1652 * @num_mc: pointer to the memory controllers count, to be incremented in case 1653 * of success. 1654 * @table: model specific table 1655 * 1656 * returns 0 in case of success or error code 1657 */ 1658 static int sbridge_get_all_devices(u8 *num_mc, 1659 const struct pci_id_table *table) 1660 { 1661 int i, rc; 1662 struct pci_dev *pdev = NULL; 1663 1664 while (table && table->descr) { 1665 for (i = 0; i < table->n_devs; i++) { 1666 pdev = NULL; 1667 do { 1668 rc = sbridge_get_onedevice(&pdev, num_mc, 1669 table, i); 1670 if (rc < 0) { 1671 if (i == 0) { 1672 i = table->n_devs; 1673 break; 1674 } 1675 sbridge_put_all_devices(); 1676 return -ENODEV; 1677 } 1678 } while (pdev); 1679 } 1680 table++; 1681 } 1682 1683 return 0; 1684 } 1685 1686 static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, 1687 struct sbridge_dev *sbridge_dev) 1688 { 1689 struct sbridge_pvt *pvt = mci->pvt_info; 1690 struct pci_dev *pdev; 1691 u8 saw_chan_mask = 0; 1692 int i; 1693 1694 for (i = 0; i < sbridge_dev->n_devs; i++) { 1695 pdev = sbridge_dev->pdev[i]; 1696 if (!pdev) 1697 continue; 1698 1699 switch (pdev->device) { 1700 case PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0: 1701 pvt->pci_sad0 = pdev; 1702 break; 1703 case PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1: 1704 pvt->pci_sad1 = pdev; 1705 break; 1706 case PCI_DEVICE_ID_INTEL_SBRIDGE_BR: 1707 pvt->pci_br0 = pdev; 1708 break; 1709 case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0: 1710 pvt->pci_ha0 = pdev; 1711 break; 1712 case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA: 1713 pvt->pci_ta = pdev; 1714 break; 1715 case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS: 1716 pvt->pci_ras = pdev; 1717 break; 1718 case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0: 1719 case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1: 1720 case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2: 1721 case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3: 1722 { 1723 int id = pdev->device - PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0; 1724 pvt->pci_tad[id] = pdev; 1725 saw_chan_mask |= 1 << id; 1726 } 1727 break; 1728 case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO: 1729 pvt->pci_ddrio = pdev; 1730 break; 1731 default: 1732 goto error; 1733 } 1734 1735 edac_dbg(0, "Associated PCI %02x:%02x, bus %d with dev = %p\n", 1736 pdev->vendor, pdev->device, 1737 sbridge_dev->bus, 1738 pdev); 1739 } 1740 1741 /* Check if everything were registered */ 1742 if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 || 1743 !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta) 1744 goto enodev; 1745 1746 if (saw_chan_mask != 0x0f) 1747 goto enodev; 1748 return 0; 1749 1750 enodev: 1751 sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); 1752 return -ENODEV; 1753 1754 error: 1755 sbridge_printk(KERN_ERR, "Unexpected device %02x:%02x\n", 1756 PCI_VENDOR_ID_INTEL, pdev->device); 1757 return -EINVAL; 1758 } 1759 1760 static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, 1761 struct sbridge_dev *sbridge_dev) 1762 { 1763 struct sbridge_pvt *pvt = mci->pvt_info; 1764 struct pci_dev *pdev; 1765 u8 saw_chan_mask = 0; 1766 int i; 1767 1768 for (i = 0; i < sbridge_dev->n_devs; i++) { 1769 pdev = sbridge_dev->pdev[i]; 1770 if (!pdev) 1771 continue; 1772 1773 switch (pdev->device) { 1774 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0: 1775 pvt->pci_ha0 = pdev; 1776 break; 1777 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA: 1778 pvt->pci_ta = pdev; 1779 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS: 1780 pvt->pci_ras = pdev; 1781 break; 1782 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0: 1783 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1: 1784 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2: 1785 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3: 1786 { 1787 int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0; 1788 pvt->pci_tad[id] = pdev; 1789 saw_chan_mask |= 1 << id; 1790 } 1791 break; 1792 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0: 1793 pvt->pci_ddrio = pdev; 1794 break; 1795 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0: 1796 pvt->pci_ddrio = pdev; 1797 break; 1798 case PCI_DEVICE_ID_INTEL_IBRIDGE_SAD: 1799 pvt->pci_sad0 = pdev; 1800 break; 1801 case PCI_DEVICE_ID_INTEL_IBRIDGE_BR0: 1802 pvt->pci_br0 = pdev; 1803 break; 1804 case PCI_DEVICE_ID_INTEL_IBRIDGE_BR1: 1805 pvt->pci_br1 = pdev; 1806 break; 1807 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1: 1808 pvt->pci_ha1 = pdev; 1809 break; 1810 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0: 1811 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1: 1812 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2: 1813 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3: 1814 { 1815 int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 + 4; 1816 pvt->pci_tad[id] = pdev; 1817 saw_chan_mask |= 1 << id; 1818 } 1819 break; 1820 default: 1821 goto error; 1822 } 1823 1824 edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", 1825 sbridge_dev->bus, 1826 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), 1827 pdev); 1828 } 1829 1830 /* Check if everything were registered */ 1831 if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 || 1832 !pvt->pci_br1 || !pvt->pci_tad || !pvt->pci_ras || 1833 !pvt->pci_ta) 1834 goto enodev; 1835 1836 if (saw_chan_mask != 0x0f && /* -EN */ 1837 saw_chan_mask != 0x33 && /* -EP */ 1838 saw_chan_mask != 0xff) /* -EX */ 1839 goto enodev; 1840 return 0; 1841 1842 enodev: 1843 sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); 1844 return -ENODEV; 1845 1846 error: 1847 sbridge_printk(KERN_ERR, 1848 "Unexpected device %02x:%02x\n", PCI_VENDOR_ID_INTEL, 1849 pdev->device); 1850 return -EINVAL; 1851 } 1852 1853 static int haswell_mci_bind_devs(struct mem_ctl_info *mci, 1854 struct sbridge_dev *sbridge_dev) 1855 { 1856 struct sbridge_pvt *pvt = mci->pvt_info; 1857 struct pci_dev *pdev; 1858 u8 saw_chan_mask = 0; 1859 int i; 1860 1861 /* there's only one device per system; not tied to any bus */ 1862 if (pvt->info.pci_vtd == NULL) 1863 /* result will be checked later */ 1864 pvt->info.pci_vtd = pci_get_device(PCI_VENDOR_ID_INTEL, 1865 PCI_DEVICE_ID_INTEL_HASWELL_IMC_VTD_MISC, 1866 NULL); 1867 1868 for (i = 0; i < sbridge_dev->n_devs; i++) { 1869 pdev = sbridge_dev->pdev[i]; 1870 if (!pdev) 1871 continue; 1872 1873 switch (pdev->device) { 1874 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0: 1875 pvt->pci_sad0 = pdev; 1876 break; 1877 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1: 1878 pvt->pci_sad1 = pdev; 1879 break; 1880 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0: 1881 pvt->pci_ha0 = pdev; 1882 break; 1883 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA: 1884 pvt->pci_ta = pdev; 1885 break; 1886 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL: 1887 pvt->pci_ras = pdev; 1888 break; 1889 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0: 1890 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1: 1891 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2: 1892 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3: 1893 { 1894 int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0; 1895 1896 pvt->pci_tad[id] = pdev; 1897 saw_chan_mask |= 1 << id; 1898 } 1899 break; 1900 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0: 1901 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1: 1902 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2: 1903 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3: 1904 { 1905 int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0 + 4; 1906 1907 pvt->pci_tad[id] = pdev; 1908 saw_chan_mask |= 1 << id; 1909 } 1910 break; 1911 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0: 1912 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1: 1913 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2: 1914 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3: 1915 if (!pvt->pci_ddrio) 1916 pvt->pci_ddrio = pdev; 1917 break; 1918 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1: 1919 pvt->pci_ha1 = pdev; 1920 break; 1921 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA: 1922 pvt->pci_ha1_ta = pdev; 1923 break; 1924 default: 1925 break; 1926 } 1927 1928 edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", 1929 sbridge_dev->bus, 1930 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), 1931 pdev); 1932 } 1933 1934 /* Check if everything were registered */ 1935 if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 || 1936 !pvt->pci_ras || !pvt->pci_ta || !pvt->info.pci_vtd) 1937 goto enodev; 1938 1939 if (saw_chan_mask != 0x0f && /* -EN */ 1940 saw_chan_mask != 0x33 && /* -EP */ 1941 saw_chan_mask != 0xff) /* -EX */ 1942 goto enodev; 1943 return 0; 1944 1945 enodev: 1946 sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); 1947 return -ENODEV; 1948 } 1949 1950 static int broadwell_mci_bind_devs(struct mem_ctl_info *mci, 1951 struct sbridge_dev *sbridge_dev) 1952 { 1953 struct sbridge_pvt *pvt = mci->pvt_info; 1954 struct pci_dev *pdev; 1955 u8 saw_chan_mask = 0; 1956 int i; 1957 1958 /* there's only one device per system; not tied to any bus */ 1959 if (pvt->info.pci_vtd == NULL) 1960 /* result will be checked later */ 1961 pvt->info.pci_vtd = pci_get_device(PCI_VENDOR_ID_INTEL, 1962 PCI_DEVICE_ID_INTEL_BROADWELL_IMC_VTD_MISC, 1963 NULL); 1964 1965 for (i = 0; i < sbridge_dev->n_devs; i++) { 1966 pdev = sbridge_dev->pdev[i]; 1967 if (!pdev) 1968 continue; 1969 1970 switch (pdev->device) { 1971 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0: 1972 pvt->pci_sad0 = pdev; 1973 break; 1974 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1: 1975 pvt->pci_sad1 = pdev; 1976 break; 1977 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0: 1978 pvt->pci_ha0 = pdev; 1979 break; 1980 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA: 1981 pvt->pci_ta = pdev; 1982 break; 1983 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL: 1984 pvt->pci_ras = pdev; 1985 break; 1986 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0: 1987 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1: 1988 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2: 1989 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3: 1990 { 1991 int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0; 1992 pvt->pci_tad[id] = pdev; 1993 saw_chan_mask |= 1 << id; 1994 } 1995 break; 1996 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0: 1997 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1: 1998 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2: 1999 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3: 2000 { 2001 int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0 + 4; 2002 pvt->pci_tad[id] = pdev; 2003 saw_chan_mask |= 1 << id; 2004 } 2005 break; 2006 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0: 2007 pvt->pci_ddrio = pdev; 2008 break; 2009 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1: 2010 pvt->pci_ha1 = pdev; 2011 break; 2012 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA: 2013 pvt->pci_ha1_ta = pdev; 2014 break; 2015 default: 2016 break; 2017 } 2018 2019 edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", 2020 sbridge_dev->bus, 2021 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), 2022 pdev); 2023 } 2024 2025 /* Check if everything were registered */ 2026 if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 || 2027 !pvt->pci_ras || !pvt->pci_ta || !pvt->info.pci_vtd) 2028 goto enodev; 2029 2030 if (saw_chan_mask != 0x0f && /* -EN */ 2031 saw_chan_mask != 0x33 && /* -EP */ 2032 saw_chan_mask != 0xff) /* -EX */ 2033 goto enodev; 2034 return 0; 2035 2036 enodev: 2037 sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); 2038 return -ENODEV; 2039 } 2040 2041 /**************************************************************************** 2042 Error check routines 2043 ****************************************************************************/ 2044 2045 /* 2046 * While Sandy Bridge has error count registers, SMI BIOS read values from 2047 * and resets the counters. So, they are not reliable for the OS to read 2048 * from them. So, we have no option but to just trust on whatever MCE is 2049 * telling us about the errors. 2050 */ 2051 static void sbridge_mce_output_error(struct mem_ctl_info *mci, 2052 const struct mce *m) 2053 { 2054 struct mem_ctl_info *new_mci; 2055 struct sbridge_pvt *pvt = mci->pvt_info; 2056 enum hw_event_mc_err_type tp_event; 2057 char *type, *optype, msg[256]; 2058 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); 2059 bool overflow = GET_BITFIELD(m->status, 62, 62); 2060 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); 2061 bool recoverable; 2062 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); 2063 u32 mscod = GET_BITFIELD(m->status, 16, 31); 2064 u32 errcode = GET_BITFIELD(m->status, 0, 15); 2065 u32 channel = GET_BITFIELD(m->status, 0, 3); 2066 u32 optypenum = GET_BITFIELD(m->status, 4, 6); 2067 long channel_mask, first_channel; 2068 u8 rank, socket, ha; 2069 int rc, dimm; 2070 char *area_type = NULL; 2071 2072 if (pvt->info.type != SANDY_BRIDGE) 2073 recoverable = true; 2074 else 2075 recoverable = GET_BITFIELD(m->status, 56, 56); 2076 2077 if (uncorrected_error) { 2078 if (ripv) { 2079 type = "FATAL"; 2080 tp_event = HW_EVENT_ERR_FATAL; 2081 } else { 2082 type = "NON_FATAL"; 2083 tp_event = HW_EVENT_ERR_UNCORRECTED; 2084 } 2085 } else { 2086 type = "CORRECTED"; 2087 tp_event = HW_EVENT_ERR_CORRECTED; 2088 } 2089 2090 /* 2091 * According with Table 15-9 of the Intel Architecture spec vol 3A, 2092 * memory errors should fit in this mask: 2093 * 000f 0000 1mmm cccc (binary) 2094 * where: 2095 * f = Correction Report Filtering Bit. If 1, subsequent errors 2096 * won't be shown 2097 * mmm = error type 2098 * cccc = channel 2099 * If the mask doesn't match, report an error to the parsing logic 2100 */ 2101 if (! ((errcode & 0xef80) == 0x80)) { 2102 optype = "Can't parse: it is not a mem"; 2103 } else { 2104 switch (optypenum) { 2105 case 0: 2106 optype = "generic undef request error"; 2107 break; 2108 case 1: 2109 optype = "memory read error"; 2110 break; 2111 case 2: 2112 optype = "memory write error"; 2113 break; 2114 case 3: 2115 optype = "addr/cmd error"; 2116 break; 2117 case 4: 2118 optype = "memory scrubbing error"; 2119 break; 2120 default: 2121 optype = "reserved"; 2122 break; 2123 } 2124 } 2125 2126 /* Only decode errors with an valid address (ADDRV) */ 2127 if (!GET_BITFIELD(m->status, 58, 58)) 2128 return; 2129 2130 rc = get_memory_error_data(mci, m->addr, &socket, &ha, 2131 &channel_mask, &rank, &area_type, msg); 2132 if (rc < 0) 2133 goto err_parsing; 2134 new_mci = get_mci_for_node_id(socket); 2135 if (!new_mci) { 2136 strcpy(msg, "Error: socket got corrupted!"); 2137 goto err_parsing; 2138 } 2139 mci = new_mci; 2140 pvt = mci->pvt_info; 2141 2142 first_channel = find_first_bit(&channel_mask, NUM_CHANNELS); 2143 2144 if (rank < 4) 2145 dimm = 0; 2146 else if (rank < 8) 2147 dimm = 1; 2148 else 2149 dimm = 2; 2150 2151 2152 /* 2153 * FIXME: On some memory configurations (mirror, lockstep), the 2154 * Memory Controller can't point the error to a single DIMM. The 2155 * EDAC core should be handling the channel mask, in order to point 2156 * to the group of dimm's where the error may be happening. 2157 */ 2158 if (!pvt->is_lockstep && !pvt->is_mirrored && !pvt->is_close_pg) 2159 channel = first_channel; 2160 2161 snprintf(msg, sizeof(msg), 2162 "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d", 2163 overflow ? " OVERFLOW" : "", 2164 (uncorrected_error && recoverable) ? " recoverable" : "", 2165 area_type, 2166 mscod, errcode, 2167 socket, ha, 2168 channel_mask, 2169 rank); 2170 2171 edac_dbg(0, "%s\n", msg); 2172 2173 /* FIXME: need support for channel mask */ 2174 2175 if (channel == CHANNEL_UNSPECIFIED) 2176 channel = -1; 2177 2178 /* Call the helper to output message */ 2179 edac_mc_handle_error(tp_event, mci, core_err_cnt, 2180 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, 2181 4*ha+channel, dimm, -1, 2182 optype, msg); 2183 return; 2184 err_parsing: 2185 edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, 2186 -1, -1, -1, 2187 msg, ""); 2188 2189 } 2190 2191 /* 2192 * sbridge_check_error Retrieve and process errors reported by the 2193 * hardware. Called by the Core module. 2194 */ 2195 static void sbridge_check_error(struct mem_ctl_info *mci) 2196 { 2197 struct sbridge_pvt *pvt = mci->pvt_info; 2198 int i; 2199 unsigned count = 0; 2200 struct mce *m; 2201 2202 /* 2203 * MCE first step: Copy all mce errors into a temporary buffer 2204 * We use a double buffering here, to reduce the risk of 2205 * loosing an error. 2206 */ 2207 smp_rmb(); 2208 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) 2209 % MCE_LOG_LEN; 2210 if (!count) 2211 return; 2212 2213 m = pvt->mce_outentry; 2214 if (pvt->mce_in + count > MCE_LOG_LEN) { 2215 unsigned l = MCE_LOG_LEN - pvt->mce_in; 2216 2217 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); 2218 smp_wmb(); 2219 pvt->mce_in = 0; 2220 count -= l; 2221 m += l; 2222 } 2223 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); 2224 smp_wmb(); 2225 pvt->mce_in += count; 2226 2227 smp_rmb(); 2228 if (pvt->mce_overrun) { 2229 sbridge_printk(KERN_ERR, "Lost %d memory errors\n", 2230 pvt->mce_overrun); 2231 smp_wmb(); 2232 pvt->mce_overrun = 0; 2233 } 2234 2235 /* 2236 * MCE second step: parse errors and display 2237 */ 2238 for (i = 0; i < count; i++) 2239 sbridge_mce_output_error(mci, &pvt->mce_outentry[i]); 2240 } 2241 2242 /* 2243 * sbridge_mce_check_error Replicates mcelog routine to get errors 2244 * This routine simply queues mcelog errors, and 2245 * return. The error itself should be handled later 2246 * by sbridge_check_error. 2247 * WARNING: As this routine should be called at NMI time, extra care should 2248 * be taken to avoid deadlocks, and to be as fast as possible. 2249 */ 2250 static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, 2251 void *data) 2252 { 2253 struct mce *mce = (struct mce *)data; 2254 struct mem_ctl_info *mci; 2255 struct sbridge_pvt *pvt; 2256 char *type; 2257 2258 if (get_edac_report_status() == EDAC_REPORTING_DISABLED) 2259 return NOTIFY_DONE; 2260 2261 mci = get_mci_for_node_id(mce->socketid); 2262 if (!mci) 2263 return NOTIFY_BAD; 2264 pvt = mci->pvt_info; 2265 2266 /* 2267 * Just let mcelog handle it if the error is 2268 * outside the memory controller. A memory error 2269 * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0. 2270 * bit 12 has an special meaning. 2271 */ 2272 if ((mce->status & 0xefff) >> 7 != 1) 2273 return NOTIFY_DONE; 2274 2275 if (mce->mcgstatus & MCG_STATUS_MCIP) 2276 type = "Exception"; 2277 else 2278 type = "Event"; 2279 2280 sbridge_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); 2281 2282 sbridge_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx " 2283 "Bank %d: %016Lx\n", mce->extcpu, type, 2284 mce->mcgstatus, mce->bank, mce->status); 2285 sbridge_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc); 2286 sbridge_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr); 2287 sbridge_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc); 2288 2289 sbridge_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET " 2290 "%u APIC %x\n", mce->cpuvendor, mce->cpuid, 2291 mce->time, mce->socketid, mce->apicid); 2292 2293 smp_rmb(); 2294 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { 2295 smp_wmb(); 2296 pvt->mce_overrun++; 2297 return NOTIFY_DONE; 2298 } 2299 2300 /* Copy memory error at the ringbuffer */ 2301 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); 2302 smp_wmb(); 2303 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; 2304 2305 /* Handle fatal errors immediately */ 2306 if (mce->mcgstatus & 1) 2307 sbridge_check_error(mci); 2308 2309 /* Advice mcelog that the error were handled */ 2310 return NOTIFY_STOP; 2311 } 2312 2313 static struct notifier_block sbridge_mce_dec = { 2314 .notifier_call = sbridge_mce_check_error, 2315 }; 2316 2317 /**************************************************************************** 2318 EDAC register/unregister logic 2319 ****************************************************************************/ 2320 2321 static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) 2322 { 2323 struct mem_ctl_info *mci = sbridge_dev->mci; 2324 struct sbridge_pvt *pvt; 2325 2326 if (unlikely(!mci || !mci->pvt_info)) { 2327 edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev); 2328 2329 sbridge_printk(KERN_ERR, "Couldn't find mci handler\n"); 2330 return; 2331 } 2332 2333 pvt = mci->pvt_info; 2334 2335 edac_dbg(0, "MC: mci = %p, dev = %p\n", 2336 mci, &sbridge_dev->pdev[0]->dev); 2337 2338 /* Remove MC sysfs nodes */ 2339 edac_mc_del_mc(mci->pdev); 2340 2341 edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); 2342 kfree(mci->ctl_name); 2343 edac_mc_free(mci); 2344 sbridge_dev->mci = NULL; 2345 } 2346 2347 static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) 2348 { 2349 struct mem_ctl_info *mci; 2350 struct edac_mc_layer layers[2]; 2351 struct sbridge_pvt *pvt; 2352 struct pci_dev *pdev = sbridge_dev->pdev[0]; 2353 int rc; 2354 2355 /* Check the number of active and not disabled channels */ 2356 rc = check_if_ecc_is_active(sbridge_dev->bus, type); 2357 if (unlikely(rc < 0)) 2358 return rc; 2359 2360 /* allocate a new MC control structure */ 2361 layers[0].type = EDAC_MC_LAYER_CHANNEL; 2362 layers[0].size = NUM_CHANNELS; 2363 layers[0].is_virt_csrow = false; 2364 layers[1].type = EDAC_MC_LAYER_SLOT; 2365 layers[1].size = MAX_DIMMS; 2366 layers[1].is_virt_csrow = true; 2367 mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers, 2368 sizeof(*pvt)); 2369 2370 if (unlikely(!mci)) 2371 return -ENOMEM; 2372 2373 edac_dbg(0, "MC: mci = %p, dev = %p\n", 2374 mci, &pdev->dev); 2375 2376 pvt = mci->pvt_info; 2377 memset(pvt, 0, sizeof(*pvt)); 2378 2379 /* Associate sbridge_dev and mci for future usage */ 2380 pvt->sbridge_dev = sbridge_dev; 2381 sbridge_dev->mci = mci; 2382 2383 mci->mtype_cap = MEM_FLAG_DDR3; 2384 mci->edac_ctl_cap = EDAC_FLAG_NONE; 2385 mci->edac_cap = EDAC_FLAG_NONE; 2386 mci->mod_name = "sbridge_edac.c"; 2387 mci->mod_ver = SBRIDGE_REVISION; 2388 mci->dev_name = pci_name(pdev); 2389 mci->ctl_page_to_phys = NULL; 2390 2391 /* Set the function pointer to an actual operation function */ 2392 mci->edac_check = sbridge_check_error; 2393 2394 pvt->info.type = type; 2395 switch (type) { 2396 case IVY_BRIDGE: 2397 pvt->info.rankcfgr = IB_RANK_CFG_A; 2398 pvt->info.get_tolm = ibridge_get_tolm; 2399 pvt->info.get_tohm = ibridge_get_tohm; 2400 pvt->info.dram_rule = ibridge_dram_rule; 2401 pvt->info.get_memory_type = get_memory_type; 2402 pvt->info.get_node_id = get_node_id; 2403 pvt->info.rir_limit = rir_limit; 2404 pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); 2405 pvt->info.interleave_list = ibridge_interleave_list; 2406 pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); 2407 pvt->info.interleave_pkg = ibridge_interleave_pkg; 2408 pvt->info.get_width = ibridge_get_width; 2409 mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx); 2410 2411 /* Store pci devices at mci for faster access */ 2412 rc = ibridge_mci_bind_devs(mci, sbridge_dev); 2413 if (unlikely(rc < 0)) 2414 goto fail0; 2415 break; 2416 case SANDY_BRIDGE: 2417 pvt->info.rankcfgr = SB_RANK_CFG_A; 2418 pvt->info.get_tolm = sbridge_get_tolm; 2419 pvt->info.get_tohm = sbridge_get_tohm; 2420 pvt->info.dram_rule = sbridge_dram_rule; 2421 pvt->info.get_memory_type = get_memory_type; 2422 pvt->info.get_node_id = get_node_id; 2423 pvt->info.rir_limit = rir_limit; 2424 pvt->info.max_sad = ARRAY_SIZE(sbridge_dram_rule); 2425 pvt->info.interleave_list = sbridge_interleave_list; 2426 pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list); 2427 pvt->info.interleave_pkg = sbridge_interleave_pkg; 2428 pvt->info.get_width = sbridge_get_width; 2429 mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx); 2430 2431 /* Store pci devices at mci for faster access */ 2432 rc = sbridge_mci_bind_devs(mci, sbridge_dev); 2433 if (unlikely(rc < 0)) 2434 goto fail0; 2435 break; 2436 case HASWELL: 2437 /* rankcfgr isn't used */ 2438 pvt->info.get_tolm = haswell_get_tolm; 2439 pvt->info.get_tohm = haswell_get_tohm; 2440 pvt->info.dram_rule = ibridge_dram_rule; 2441 pvt->info.get_memory_type = haswell_get_memory_type; 2442 pvt->info.get_node_id = haswell_get_node_id; 2443 pvt->info.rir_limit = haswell_rir_limit; 2444 pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); 2445 pvt->info.interleave_list = ibridge_interleave_list; 2446 pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); 2447 pvt->info.interleave_pkg = ibridge_interleave_pkg; 2448 pvt->info.get_width = ibridge_get_width; 2449 mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell Socket#%d", mci->mc_idx); 2450 2451 /* Store pci devices at mci for faster access */ 2452 rc = haswell_mci_bind_devs(mci, sbridge_dev); 2453 if (unlikely(rc < 0)) 2454 goto fail0; 2455 break; 2456 case BROADWELL: 2457 /* rankcfgr isn't used */ 2458 pvt->info.get_tolm = haswell_get_tolm; 2459 pvt->info.get_tohm = haswell_get_tohm; 2460 pvt->info.dram_rule = ibridge_dram_rule; 2461 pvt->info.get_memory_type = haswell_get_memory_type; 2462 pvt->info.get_node_id = haswell_get_node_id; 2463 pvt->info.rir_limit = haswell_rir_limit; 2464 pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); 2465 pvt->info.interleave_list = ibridge_interleave_list; 2466 pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); 2467 pvt->info.interleave_pkg = ibridge_interleave_pkg; 2468 pvt->info.get_width = broadwell_get_width; 2469 mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell Socket#%d", mci->mc_idx); 2470 2471 /* Store pci devices at mci for faster access */ 2472 rc = broadwell_mci_bind_devs(mci, sbridge_dev); 2473 if (unlikely(rc < 0)) 2474 goto fail0; 2475 break; 2476 } 2477 2478 /* Get dimm basic config and the memory layout */ 2479 get_dimm_config(mci); 2480 get_memory_layout(mci); 2481 2482 /* record ptr to the generic device */ 2483 mci->pdev = &pdev->dev; 2484 2485 /* add this new MC control structure to EDAC's list of MCs */ 2486 if (unlikely(edac_mc_add_mc(mci))) { 2487 edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); 2488 rc = -EINVAL; 2489 goto fail0; 2490 } 2491 2492 return 0; 2493 2494 fail0: 2495 kfree(mci->ctl_name); 2496 edac_mc_free(mci); 2497 sbridge_dev->mci = NULL; 2498 return rc; 2499 } 2500 2501 /* 2502 * sbridge_probe Probe for ONE instance of device to see if it is 2503 * present. 2504 * return: 2505 * 0 for FOUND a device 2506 * < 0 for error code 2507 */ 2508 2509 static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) 2510 { 2511 int rc = -ENODEV; 2512 u8 mc, num_mc = 0; 2513 struct sbridge_dev *sbridge_dev; 2514 enum type type = SANDY_BRIDGE; 2515 2516 /* get the pci devices we want to reserve for our use */ 2517 mutex_lock(&sbridge_edac_lock); 2518 2519 /* 2520 * All memory controllers are allocated at the first pass. 2521 */ 2522 if (unlikely(probed >= 1)) { 2523 mutex_unlock(&sbridge_edac_lock); 2524 return -ENODEV; 2525 } 2526 probed++; 2527 2528 switch (pdev->device) { 2529 case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA: 2530 rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_ibridge_table); 2531 type = IVY_BRIDGE; 2532 break; 2533 case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0: 2534 rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_sbridge_table); 2535 type = SANDY_BRIDGE; 2536 break; 2537 case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0: 2538 rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_haswell_table); 2539 type = HASWELL; 2540 break; 2541 case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0: 2542 rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_broadwell_table); 2543 type = BROADWELL; 2544 break; 2545 } 2546 if (unlikely(rc < 0)) { 2547 edac_dbg(0, "couldn't get all devices for 0x%x\n", pdev->device); 2548 goto fail0; 2549 } 2550 2551 mc = 0; 2552 2553 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { 2554 edac_dbg(0, "Registering MC#%d (%d of %d)\n", 2555 mc, mc + 1, num_mc); 2556 2557 sbridge_dev->mc = mc++; 2558 rc = sbridge_register_mci(sbridge_dev, type); 2559 if (unlikely(rc < 0)) 2560 goto fail1; 2561 } 2562 2563 sbridge_printk(KERN_INFO, "%s\n", SBRIDGE_REVISION); 2564 2565 mutex_unlock(&sbridge_edac_lock); 2566 return 0; 2567 2568 fail1: 2569 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) 2570 sbridge_unregister_mci(sbridge_dev); 2571 2572 sbridge_put_all_devices(); 2573 fail0: 2574 mutex_unlock(&sbridge_edac_lock); 2575 return rc; 2576 } 2577 2578 /* 2579 * sbridge_remove destructor for one instance of device 2580 * 2581 */ 2582 static void sbridge_remove(struct pci_dev *pdev) 2583 { 2584 struct sbridge_dev *sbridge_dev; 2585 2586 edac_dbg(0, "\n"); 2587 2588 /* 2589 * we have a trouble here: pdev value for removal will be wrong, since 2590 * it will point to the X58 register used to detect that the machine 2591 * is a Nehalem or upper design. However, due to the way several PCI 2592 * devices are grouped together to provide MC functionality, we need 2593 * to use a different method for releasing the devices 2594 */ 2595 2596 mutex_lock(&sbridge_edac_lock); 2597 2598 if (unlikely(!probed)) { 2599 mutex_unlock(&sbridge_edac_lock); 2600 return; 2601 } 2602 2603 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) 2604 sbridge_unregister_mci(sbridge_dev); 2605 2606 /* Release PCI resources */ 2607 sbridge_put_all_devices(); 2608 2609 probed--; 2610 2611 mutex_unlock(&sbridge_edac_lock); 2612 } 2613 2614 MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl); 2615 2616 /* 2617 * sbridge_driver pci_driver structure for this module 2618 * 2619 */ 2620 static struct pci_driver sbridge_driver = { 2621 .name = "sbridge_edac", 2622 .probe = sbridge_probe, 2623 .remove = sbridge_remove, 2624 .id_table = sbridge_pci_tbl, 2625 }; 2626 2627 /* 2628 * sbridge_init Module entry function 2629 * Try to initialize this module for its devices 2630 */ 2631 static int __init sbridge_init(void) 2632 { 2633 int pci_rc; 2634 2635 edac_dbg(2, "\n"); 2636 2637 /* Ensure that the OPSTATE is set correctly for POLL or NMI */ 2638 opstate_init(); 2639 2640 pci_rc = pci_register_driver(&sbridge_driver); 2641 if (pci_rc >= 0) { 2642 mce_register_decode_chain(&sbridge_mce_dec); 2643 if (get_edac_report_status() == EDAC_REPORTING_DISABLED) 2644 sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); 2645 return 0; 2646 } 2647 2648 sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", 2649 pci_rc); 2650 2651 return pci_rc; 2652 } 2653 2654 /* 2655 * sbridge_exit() Module exit function 2656 * Unregister the driver 2657 */ 2658 static void __exit sbridge_exit(void) 2659 { 2660 edac_dbg(2, "\n"); 2661 pci_unregister_driver(&sbridge_driver); 2662 mce_unregister_decode_chain(&sbridge_mce_dec); 2663 } 2664 2665 module_init(sbridge_init); 2666 module_exit(sbridge_exit); 2667 2668 module_param(edac_op_state, int, 0444); 2669 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); 2670 2671 MODULE_LICENSE("GPL"); 2672 MODULE_AUTHOR("Mauro Carvalho Chehab"); 2673 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); 2674 MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - " 2675 SBRIDGE_REVISION); 2676