xref: /openbmc/linux/drivers/edac/i7core_edac.c (revision 565d76cb)
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *	 Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *	http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27 
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39 
40 #include "edac_core.h"
41 
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46 
47 static int use_pci_fixup;
48 module_param(use_pci_fixup, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50 /*
51  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52  * registers start at bus 255, and are not reported by BIOS.
53  * We currently find devices with only 2 sockets. In order to support more QPI
54  * Quick Path Interconnect, just increment this number.
55  */
56 #define MAX_SOCKET_BUSES	2
57 
58 
59 /*
60  * Alter this version for the module when modifications are made
61  */
62 #define I7CORE_REVISION    " Ver: 1.0.0 " __DATE__
63 #define EDAC_MOD_STR      "i7core_edac"
64 
65 /*
66  * Debug macros
67  */
68 #define i7core_printk(level, fmt, arg...)			\
69 	edac_printk(level, "i7core", fmt, ##arg)
70 
71 #define i7core_mc_printk(mci, level, fmt, arg...)		\
72 	edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73 
74 /*
75  * i7core Memory Controller Registers
76  */
77 
78 	/* OFFSETS for Device 0 Function 0 */
79 
80 #define MC_CFG_CONTROL	0x90
81 
82 	/* OFFSETS for Device 3 Function 0 */
83 
84 #define MC_CONTROL	0x48
85 #define MC_STATUS	0x4c
86 #define MC_MAX_DOD	0x64
87 
88 /*
89  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
90  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
91  */
92 
93 #define MC_TEST_ERR_RCV1	0x60
94   #define DIMM2_COR_ERR(r)			((r) & 0x7fff)
95 
96 #define MC_TEST_ERR_RCV0	0x64
97   #define DIMM1_COR_ERR(r)			(((r) >> 16) & 0x7fff)
98   #define DIMM0_COR_ERR(r)			((r) & 0x7fff)
99 
100 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
101 #define MC_COR_ECC_CNT_0	0x80
102 #define MC_COR_ECC_CNT_1	0x84
103 #define MC_COR_ECC_CNT_2	0x88
104 #define MC_COR_ECC_CNT_3	0x8c
105 #define MC_COR_ECC_CNT_4	0x90
106 #define MC_COR_ECC_CNT_5	0x94
107 
108 #define DIMM_TOP_COR_ERR(r)			(((r) >> 16) & 0x7fff)
109 #define DIMM_BOT_COR_ERR(r)			((r) & 0x7fff)
110 
111 
112 	/* OFFSETS for Devices 4,5 and 6 Function 0 */
113 
114 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
115   #define THREE_DIMMS_PRESENT		(1 << 24)
116   #define SINGLE_QUAD_RANK_PRESENT	(1 << 23)
117   #define QUAD_RANK_PRESENT		(1 << 22)
118   #define REGISTERED_DIMM		(1 << 15)
119 
120 #define MC_CHANNEL_MAPPER	0x60
121   #define RDLCH(r, ch)		((((r) >> (3 + (ch * 6))) & 0x07) - 1)
122   #define WRLCH(r, ch)		((((r) >> (ch * 6)) & 0x07) - 1)
123 
124 #define MC_CHANNEL_RANK_PRESENT 0x7c
125   #define RANK_PRESENT_MASK		0xffff
126 
127 #define MC_CHANNEL_ADDR_MATCH	0xf0
128 #define MC_CHANNEL_ERROR_MASK	0xf8
129 #define MC_CHANNEL_ERROR_INJECT	0xfc
130   #define INJECT_ADDR_PARITY	0x10
131   #define INJECT_ECC		0x08
132   #define MASK_CACHELINE	0x06
133   #define MASK_FULL_CACHELINE	0x06
134   #define MASK_MSB32_CACHELINE	0x04
135   #define MASK_LSB32_CACHELINE	0x02
136   #define NO_MASK_CACHELINE	0x00
137   #define REPEAT_EN		0x01
138 
139 	/* OFFSETS for Devices 4,5 and 6 Function 1 */
140 
141 #define MC_DOD_CH_DIMM0		0x48
142 #define MC_DOD_CH_DIMM1		0x4c
143 #define MC_DOD_CH_DIMM2		0x50
144   #define RANKOFFSET_MASK	((1 << 12) | (1 << 11) | (1 << 10))
145   #define RANKOFFSET(x)		((x & RANKOFFSET_MASK) >> 10)
146   #define DIMM_PRESENT_MASK	(1 << 9)
147   #define DIMM_PRESENT(x)	(((x) & DIMM_PRESENT_MASK) >> 9)
148   #define MC_DOD_NUMBANK_MASK		((1 << 8) | (1 << 7))
149   #define MC_DOD_NUMBANK(x)		(((x) & MC_DOD_NUMBANK_MASK) >> 7)
150   #define MC_DOD_NUMRANK_MASK		((1 << 6) | (1 << 5))
151   #define MC_DOD_NUMRANK(x)		(((x) & MC_DOD_NUMRANK_MASK) >> 5)
152   #define MC_DOD_NUMROW_MASK		((1 << 4) | (1 << 3) | (1 << 2))
153   #define MC_DOD_NUMROW(x)		(((x) & MC_DOD_NUMROW_MASK) >> 2)
154   #define MC_DOD_NUMCOL_MASK		3
155   #define MC_DOD_NUMCOL(x)		((x) & MC_DOD_NUMCOL_MASK)
156 
157 #define MC_RANK_PRESENT		0x7c
158 
159 #define MC_SAG_CH_0	0x80
160 #define MC_SAG_CH_1	0x84
161 #define MC_SAG_CH_2	0x88
162 #define MC_SAG_CH_3	0x8c
163 #define MC_SAG_CH_4	0x90
164 #define MC_SAG_CH_5	0x94
165 #define MC_SAG_CH_6	0x98
166 #define MC_SAG_CH_7	0x9c
167 
168 #define MC_RIR_LIMIT_CH_0	0x40
169 #define MC_RIR_LIMIT_CH_1	0x44
170 #define MC_RIR_LIMIT_CH_2	0x48
171 #define MC_RIR_LIMIT_CH_3	0x4C
172 #define MC_RIR_LIMIT_CH_4	0x50
173 #define MC_RIR_LIMIT_CH_5	0x54
174 #define MC_RIR_LIMIT_CH_6	0x58
175 #define MC_RIR_LIMIT_CH_7	0x5C
176 #define MC_RIR_LIMIT_MASK	((1 << 10) - 1)
177 
178 #define MC_RIR_WAY_CH		0x80
179   #define MC_RIR_WAY_OFFSET_MASK	(((1 << 14) - 1) & ~0x7)
180   #define MC_RIR_WAY_RANK_MASK		0x7
181 
182 /*
183  * i7core structs
184  */
185 
186 #define NUM_CHANS 3
187 #define MAX_DIMMS 3		/* Max DIMMS per channel */
188 #define MAX_MCR_FUNC  4
189 #define MAX_CHAN_FUNC 3
190 
191 struct i7core_info {
192 	u32	mc_control;
193 	u32	mc_status;
194 	u32	max_dod;
195 	u32	ch_map;
196 };
197 
198 
199 struct i7core_inject {
200 	int	enable;
201 
202 	u32	section;
203 	u32	type;
204 	u32	eccmask;
205 
206 	/* Error address mask */
207 	int channel, dimm, rank, bank, page, col;
208 };
209 
210 struct i7core_channel {
211 	u32		ranks;
212 	u32		dimms;
213 };
214 
215 struct pci_id_descr {
216 	int			dev;
217 	int			func;
218 	int 			dev_id;
219 	int			optional;
220 };
221 
222 struct pci_id_table {
223 	const struct pci_id_descr	*descr;
224 	int				n_devs;
225 };
226 
227 struct i7core_dev {
228 	struct list_head	list;
229 	u8			socket;
230 	struct pci_dev		**pdev;
231 	int			n_devs;
232 	struct mem_ctl_info	*mci;
233 };
234 
235 struct i7core_pvt {
236 	struct pci_dev	*pci_noncore;
237 	struct pci_dev	*pci_mcr[MAX_MCR_FUNC + 1];
238 	struct pci_dev	*pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
239 
240 	struct i7core_dev *i7core_dev;
241 
242 	struct i7core_info	info;
243 	struct i7core_inject	inject;
244 	struct i7core_channel	channel[NUM_CHANS];
245 
246 	int		ce_count_available;
247 	int 		csrow_map[NUM_CHANS][MAX_DIMMS];
248 
249 			/* ECC corrected errors counts per udimm */
250 	unsigned long	udimm_ce_count[MAX_DIMMS];
251 	int		udimm_last_ce_count[MAX_DIMMS];
252 			/* ECC corrected errors counts per rdimm */
253 	unsigned long	rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
254 	int		rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
255 
256 	unsigned int	is_registered;
257 
258 	/* mcelog glue */
259 	struct edac_mce		edac_mce;
260 
261 	/* Fifo double buffers */
262 	struct mce		mce_entry[MCE_LOG_LEN];
263 	struct mce		mce_outentry[MCE_LOG_LEN];
264 
265 	/* Fifo in/out counters */
266 	unsigned		mce_in, mce_out;
267 
268 	/* Count indicator to show errors not got */
269 	unsigned		mce_overrun;
270 
271 	/* Struct to control EDAC polling */
272 	struct edac_pci_ctl_info *i7core_pci;
273 };
274 
275 #define PCI_DESCR(device, function, device_id)	\
276 	.dev = (device),			\
277 	.func = (function),			\
278 	.dev_id = (device_id)
279 
280 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
281 		/* Memory controller */
282 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
283 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
284 
285 		/* Exists only for RDIMM */
286 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
287 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
288 
289 		/* Channel 0 */
290 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
291 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
292 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
293 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
294 
295 		/* Channel 1 */
296 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
297 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
298 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
299 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
300 
301 		/* Channel 2 */
302 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
303 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
304 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
305 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
306 };
307 
308 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
309 	{ PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
310 	{ PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
311 	{ PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
312 
313 	{ PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
314 	{ PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
315 	{ PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
316 	{ PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
317 
318 	{ PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
319 	{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
320 	{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
321 	{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
322 };
323 
324 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
325 		/* Memory controller */
326 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
327 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
328 			/* Exists only for RDIMM */
329 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
330 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
331 
332 		/* Channel 0 */
333 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
334 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
335 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
336 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
337 
338 		/* Channel 1 */
339 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
340 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
341 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
342 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
343 
344 		/* Channel 2 */
345 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
346 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
347 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
348 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
349 };
350 
351 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
352 static const struct pci_id_table pci_dev_table[] = {
353 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
354 	PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
355 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
356 	{0,}			/* 0 terminated list. */
357 };
358 
359 /*
360  *	pci_device_id	table for which devices we are looking for
361  */
362 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
363 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
364 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
365 	{0,}			/* 0 terminated list. */
366 };
367 
368 /****************************************************************************
369 			Anciliary status routines
370  ****************************************************************************/
371 
372 	/* MC_CONTROL bits */
373 #define CH_ACTIVE(pvt, ch)	((pvt)->info.mc_control & (1 << (8 + ch)))
374 #define ECCx8(pvt)		((pvt)->info.mc_control & (1 << 1))
375 
376 	/* MC_STATUS bits */
377 #define ECC_ENABLED(pvt)	((pvt)->info.mc_status & (1 << 4))
378 #define CH_DISABLED(pvt, ch)	((pvt)->info.mc_status & (1 << ch))
379 
380 	/* MC_MAX_DOD read functions */
381 static inline int numdimms(u32 dimms)
382 {
383 	return (dimms & 0x3) + 1;
384 }
385 
386 static inline int numrank(u32 rank)
387 {
388 	static int ranks[4] = { 1, 2, 4, -EINVAL };
389 
390 	return ranks[rank & 0x3];
391 }
392 
393 static inline int numbank(u32 bank)
394 {
395 	static int banks[4] = { 4, 8, 16, -EINVAL };
396 
397 	return banks[bank & 0x3];
398 }
399 
400 static inline int numrow(u32 row)
401 {
402 	static int rows[8] = {
403 		1 << 12, 1 << 13, 1 << 14, 1 << 15,
404 		1 << 16, -EINVAL, -EINVAL, -EINVAL,
405 	};
406 
407 	return rows[row & 0x7];
408 }
409 
410 static inline int numcol(u32 col)
411 {
412 	static int cols[8] = {
413 		1 << 10, 1 << 11, 1 << 12, -EINVAL,
414 	};
415 	return cols[col & 0x3];
416 }
417 
418 static struct i7core_dev *get_i7core_dev(u8 socket)
419 {
420 	struct i7core_dev *i7core_dev;
421 
422 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
423 		if (i7core_dev->socket == socket)
424 			return i7core_dev;
425 	}
426 
427 	return NULL;
428 }
429 
430 static struct i7core_dev *alloc_i7core_dev(u8 socket,
431 					   const struct pci_id_table *table)
432 {
433 	struct i7core_dev *i7core_dev;
434 
435 	i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
436 	if (!i7core_dev)
437 		return NULL;
438 
439 	i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
440 				   GFP_KERNEL);
441 	if (!i7core_dev->pdev) {
442 		kfree(i7core_dev);
443 		return NULL;
444 	}
445 
446 	i7core_dev->socket = socket;
447 	i7core_dev->n_devs = table->n_devs;
448 	list_add_tail(&i7core_dev->list, &i7core_edac_list);
449 
450 	return i7core_dev;
451 }
452 
453 static void free_i7core_dev(struct i7core_dev *i7core_dev)
454 {
455 	list_del(&i7core_dev->list);
456 	kfree(i7core_dev->pdev);
457 	kfree(i7core_dev);
458 }
459 
460 /****************************************************************************
461 			Memory check routines
462  ****************************************************************************/
463 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
464 					  unsigned func)
465 {
466 	struct i7core_dev *i7core_dev = get_i7core_dev(socket);
467 	int i;
468 
469 	if (!i7core_dev)
470 		return NULL;
471 
472 	for (i = 0; i < i7core_dev->n_devs; i++) {
473 		if (!i7core_dev->pdev[i])
474 			continue;
475 
476 		if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
477 		    PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
478 			return i7core_dev->pdev[i];
479 		}
480 	}
481 
482 	return NULL;
483 }
484 
485 /**
486  * i7core_get_active_channels() - gets the number of channels and csrows
487  * @socket:	Quick Path Interconnect socket
488  * @channels:	Number of channels that will be returned
489  * @csrows:	Number of csrows found
490  *
491  * Since EDAC core needs to know in advance the number of available channels
492  * and csrows, in order to allocate memory for csrows/channels, it is needed
493  * to run two similar steps. At the first step, implemented on this function,
494  * it checks the number of csrows/channels present at one socket.
495  * this is used in order to properly allocate the size of mci components.
496  *
497  * It should be noticed that none of the current available datasheets explain
498  * or even mention how csrows are seen by the memory controller. So, we need
499  * to add a fake description for csrows.
500  * So, this driver is attributing one DIMM memory for one csrow.
501  */
502 static int i7core_get_active_channels(const u8 socket, unsigned *channels,
503 				      unsigned *csrows)
504 {
505 	struct pci_dev *pdev = NULL;
506 	int i, j;
507 	u32 status, control;
508 
509 	*channels = 0;
510 	*csrows = 0;
511 
512 	pdev = get_pdev_slot_func(socket, 3, 0);
513 	if (!pdev) {
514 		i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
515 			      socket);
516 		return -ENODEV;
517 	}
518 
519 	/* Device 3 function 0 reads */
520 	pci_read_config_dword(pdev, MC_STATUS, &status);
521 	pci_read_config_dword(pdev, MC_CONTROL, &control);
522 
523 	for (i = 0; i < NUM_CHANS; i++) {
524 		u32 dimm_dod[3];
525 		/* Check if the channel is active */
526 		if (!(control & (1 << (8 + i))))
527 			continue;
528 
529 		/* Check if the channel is disabled */
530 		if (status & (1 << i))
531 			continue;
532 
533 		pdev = get_pdev_slot_func(socket, i + 4, 1);
534 		if (!pdev) {
535 			i7core_printk(KERN_ERR, "Couldn't find socket %d "
536 						"fn %d.%d!!!\n",
537 						socket, i + 4, 1);
538 			return -ENODEV;
539 		}
540 		/* Devices 4-6 function 1 */
541 		pci_read_config_dword(pdev,
542 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
543 		pci_read_config_dword(pdev,
544 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
545 		pci_read_config_dword(pdev,
546 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
547 
548 		(*channels)++;
549 
550 		for (j = 0; j < 3; j++) {
551 			if (!DIMM_PRESENT(dimm_dod[j]))
552 				continue;
553 			(*csrows)++;
554 		}
555 	}
556 
557 	debugf0("Number of active channels on socket %d: %d\n",
558 		socket, *channels);
559 
560 	return 0;
561 }
562 
563 static int get_dimm_config(const struct mem_ctl_info *mci)
564 {
565 	struct i7core_pvt *pvt = mci->pvt_info;
566 	struct csrow_info *csr;
567 	struct pci_dev *pdev;
568 	int i, j;
569 	int csrow = 0;
570 	unsigned long last_page = 0;
571 	enum edac_type mode;
572 	enum mem_type mtype;
573 
574 	/* Get data from the MC register, function 0 */
575 	pdev = pvt->pci_mcr[0];
576 	if (!pdev)
577 		return -ENODEV;
578 
579 	/* Device 3 function 0 reads */
580 	pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
581 	pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
582 	pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
583 	pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
584 
585 	debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
586 		pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
587 		pvt->info.max_dod, pvt->info.ch_map);
588 
589 	if (ECC_ENABLED(pvt)) {
590 		debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
591 		if (ECCx8(pvt))
592 			mode = EDAC_S8ECD8ED;
593 		else
594 			mode = EDAC_S4ECD4ED;
595 	} else {
596 		debugf0("ECC disabled\n");
597 		mode = EDAC_NONE;
598 	}
599 
600 	/* FIXME: need to handle the error codes */
601 	debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
602 		"x%x x 0x%x\n",
603 		numdimms(pvt->info.max_dod),
604 		numrank(pvt->info.max_dod >> 2),
605 		numbank(pvt->info.max_dod >> 4),
606 		numrow(pvt->info.max_dod >> 6),
607 		numcol(pvt->info.max_dod >> 9));
608 
609 	for (i = 0; i < NUM_CHANS; i++) {
610 		u32 data, dimm_dod[3], value[8];
611 
612 		if (!pvt->pci_ch[i][0])
613 			continue;
614 
615 		if (!CH_ACTIVE(pvt, i)) {
616 			debugf0("Channel %i is not active\n", i);
617 			continue;
618 		}
619 		if (CH_DISABLED(pvt, i)) {
620 			debugf0("Channel %i is disabled\n", i);
621 			continue;
622 		}
623 
624 		/* Devices 4-6 function 0 */
625 		pci_read_config_dword(pvt->pci_ch[i][0],
626 				MC_CHANNEL_DIMM_INIT_PARAMS, &data);
627 
628 		pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
629 						4 : 2;
630 
631 		if (data & REGISTERED_DIMM)
632 			mtype = MEM_RDDR3;
633 		else
634 			mtype = MEM_DDR3;
635 #if 0
636 		if (data & THREE_DIMMS_PRESENT)
637 			pvt->channel[i].dimms = 3;
638 		else if (data & SINGLE_QUAD_RANK_PRESENT)
639 			pvt->channel[i].dimms = 1;
640 		else
641 			pvt->channel[i].dimms = 2;
642 #endif
643 
644 		/* Devices 4-6 function 1 */
645 		pci_read_config_dword(pvt->pci_ch[i][1],
646 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
647 		pci_read_config_dword(pvt->pci_ch[i][1],
648 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
649 		pci_read_config_dword(pvt->pci_ch[i][1],
650 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
651 
652 		debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
653 			"%d ranks, %cDIMMs\n",
654 			i,
655 			RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
656 			data,
657 			pvt->channel[i].ranks,
658 			(data & REGISTERED_DIMM) ? 'R' : 'U');
659 
660 		for (j = 0; j < 3; j++) {
661 			u32 banks, ranks, rows, cols;
662 			u32 size, npages;
663 
664 			if (!DIMM_PRESENT(dimm_dod[j]))
665 				continue;
666 
667 			banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
668 			ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
669 			rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
670 			cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
671 
672 			/* DDR3 has 8 I/O banks */
673 			size = (rows * cols * banks * ranks) >> (20 - 3);
674 
675 			pvt->channel[i].dimms++;
676 
677 			debugf0("\tdimm %d %d Mb offset: %x, "
678 				"bank: %d, rank: %d, row: %#x, col: %#x\n",
679 				j, size,
680 				RANKOFFSET(dimm_dod[j]),
681 				banks, ranks, rows, cols);
682 
683 			npages = MiB_TO_PAGES(size);
684 
685 			csr = &mci->csrows[csrow];
686 			csr->first_page = last_page + 1;
687 			last_page += npages;
688 			csr->last_page = last_page;
689 			csr->nr_pages = npages;
690 
691 			csr->page_mask = 0;
692 			csr->grain = 8;
693 			csr->csrow_idx = csrow;
694 			csr->nr_channels = 1;
695 
696 			csr->channels[0].chan_idx = i;
697 			csr->channels[0].ce_count = 0;
698 
699 			pvt->csrow_map[i][j] = csrow;
700 
701 			switch (banks) {
702 			case 4:
703 				csr->dtype = DEV_X4;
704 				break;
705 			case 8:
706 				csr->dtype = DEV_X8;
707 				break;
708 			case 16:
709 				csr->dtype = DEV_X16;
710 				break;
711 			default:
712 				csr->dtype = DEV_UNKNOWN;
713 			}
714 
715 			csr->edac_mode = mode;
716 			csr->mtype = mtype;
717 
718 			csrow++;
719 		}
720 
721 		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
722 		pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
723 		pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
724 		pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
725 		pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
726 		pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
727 		pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
728 		pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
729 		debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
730 		for (j = 0; j < 8; j++)
731 			debugf1("\t\t%#x\t%#x\t%#x\n",
732 				(value[j] >> 27) & 0x1,
733 				(value[j] >> 24) & 0x7,
734 				(value[j] && ((1 << 24) - 1)));
735 	}
736 
737 	return 0;
738 }
739 
740 /****************************************************************************
741 			Error insertion routines
742  ****************************************************************************/
743 
744 /* The i7core has independent error injection features per channel.
745    However, to have a simpler code, we don't allow enabling error injection
746    on more than one channel.
747    Also, since a change at an inject parameter will be applied only at enable,
748    we're disabling error injection on all write calls to the sysfs nodes that
749    controls the error code injection.
750  */
751 static int disable_inject(const struct mem_ctl_info *mci)
752 {
753 	struct i7core_pvt *pvt = mci->pvt_info;
754 
755 	pvt->inject.enable = 0;
756 
757 	if (!pvt->pci_ch[pvt->inject.channel][0])
758 		return -ENODEV;
759 
760 	pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
761 				MC_CHANNEL_ERROR_INJECT, 0);
762 
763 	return 0;
764 }
765 
766 /*
767  * i7core inject inject.section
768  *
769  *	accept and store error injection inject.section value
770  *	bit 0 - refers to the lower 32-byte half cacheline
771  *	bit 1 - refers to the upper 32-byte half cacheline
772  */
773 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
774 					   const char *data, size_t count)
775 {
776 	struct i7core_pvt *pvt = mci->pvt_info;
777 	unsigned long value;
778 	int rc;
779 
780 	if (pvt->inject.enable)
781 		disable_inject(mci);
782 
783 	rc = strict_strtoul(data, 10, &value);
784 	if ((rc < 0) || (value > 3))
785 		return -EIO;
786 
787 	pvt->inject.section = (u32) value;
788 	return count;
789 }
790 
791 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
792 					      char *data)
793 {
794 	struct i7core_pvt *pvt = mci->pvt_info;
795 	return sprintf(data, "0x%08x\n", pvt->inject.section);
796 }
797 
798 /*
799  * i7core inject.type
800  *
801  *	accept and store error injection inject.section value
802  *	bit 0 - repeat enable - Enable error repetition
803  *	bit 1 - inject ECC error
804  *	bit 2 - inject parity error
805  */
806 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
807 					const char *data, size_t count)
808 {
809 	struct i7core_pvt *pvt = mci->pvt_info;
810 	unsigned long value;
811 	int rc;
812 
813 	if (pvt->inject.enable)
814 		disable_inject(mci);
815 
816 	rc = strict_strtoul(data, 10, &value);
817 	if ((rc < 0) || (value > 7))
818 		return -EIO;
819 
820 	pvt->inject.type = (u32) value;
821 	return count;
822 }
823 
824 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
825 					      char *data)
826 {
827 	struct i7core_pvt *pvt = mci->pvt_info;
828 	return sprintf(data, "0x%08x\n", pvt->inject.type);
829 }
830 
831 /*
832  * i7core_inject_inject.eccmask_store
833  *
834  * The type of error (UE/CE) will depend on the inject.eccmask value:
835  *   Any bits set to a 1 will flip the corresponding ECC bit
836  *   Correctable errors can be injected by flipping 1 bit or the bits within
837  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
838  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
839  *   uncorrectable error to be injected.
840  */
841 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
842 					const char *data, size_t count)
843 {
844 	struct i7core_pvt *pvt = mci->pvt_info;
845 	unsigned long value;
846 	int rc;
847 
848 	if (pvt->inject.enable)
849 		disable_inject(mci);
850 
851 	rc = strict_strtoul(data, 10, &value);
852 	if (rc < 0)
853 		return -EIO;
854 
855 	pvt->inject.eccmask = (u32) value;
856 	return count;
857 }
858 
859 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
860 					      char *data)
861 {
862 	struct i7core_pvt *pvt = mci->pvt_info;
863 	return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
864 }
865 
866 /*
867  * i7core_addrmatch
868  *
869  * The type of error (UE/CE) will depend on the inject.eccmask value:
870  *   Any bits set to a 1 will flip the corresponding ECC bit
871  *   Correctable errors can be injected by flipping 1 bit or the bits within
872  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
873  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
874  *   uncorrectable error to be injected.
875  */
876 
877 #define DECLARE_ADDR_MATCH(param, limit)			\
878 static ssize_t i7core_inject_store_##param(			\
879 		struct mem_ctl_info *mci,			\
880 		const char *data, size_t count)			\
881 {								\
882 	struct i7core_pvt *pvt;					\
883 	long value;						\
884 	int rc;							\
885 								\
886 	debugf1("%s()\n", __func__);				\
887 	pvt = mci->pvt_info;					\
888 								\
889 	if (pvt->inject.enable)					\
890 		disable_inject(mci);				\
891 								\
892 	if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
893 		value = -1;					\
894 	else {							\
895 		rc = strict_strtoul(data, 10, &value);		\
896 		if ((rc < 0) || (value >= limit))		\
897 			return -EIO;				\
898 	}							\
899 								\
900 	pvt->inject.param = value;				\
901 								\
902 	return count;						\
903 }								\
904 								\
905 static ssize_t i7core_inject_show_##param(			\
906 		struct mem_ctl_info *mci,			\
907 		char *data)					\
908 {								\
909 	struct i7core_pvt *pvt;					\
910 								\
911 	pvt = mci->pvt_info;					\
912 	debugf1("%s() pvt=%p\n", __func__, pvt);		\
913 	if (pvt->inject.param < 0)				\
914 		return sprintf(data, "any\n");			\
915 	else							\
916 		return sprintf(data, "%d\n", pvt->inject.param);\
917 }
918 
919 #define ATTR_ADDR_MATCH(param)					\
920 	{							\
921 		.attr = {					\
922 			.name = #param,				\
923 			.mode = (S_IRUGO | S_IWUSR)		\
924 		},						\
925 		.show  = i7core_inject_show_##param,		\
926 		.store = i7core_inject_store_##param,		\
927 	}
928 
929 DECLARE_ADDR_MATCH(channel, 3);
930 DECLARE_ADDR_MATCH(dimm, 3);
931 DECLARE_ADDR_MATCH(rank, 4);
932 DECLARE_ADDR_MATCH(bank, 32);
933 DECLARE_ADDR_MATCH(page, 0x10000);
934 DECLARE_ADDR_MATCH(col, 0x4000);
935 
936 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
937 {
938 	u32 read;
939 	int count;
940 
941 	debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
942 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
943 		where, val);
944 
945 	for (count = 0; count < 10; count++) {
946 		if (count)
947 			msleep(100);
948 		pci_write_config_dword(dev, where, val);
949 		pci_read_config_dword(dev, where, &read);
950 
951 		if (read == val)
952 			return 0;
953 	}
954 
955 	i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
956 		"write=%08x. Read=%08x\n",
957 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
958 		where, val, read);
959 
960 	return -EINVAL;
961 }
962 
963 /*
964  * This routine prepares the Memory Controller for error injection.
965  * The error will be injected when some process tries to write to the
966  * memory that matches the given criteria.
967  * The criteria can be set in terms of a mask where dimm, rank, bank, page
968  * and col can be specified.
969  * A -1 value for any of the mask items will make the MCU to ignore
970  * that matching criteria for error injection.
971  *
972  * It should be noticed that the error will only happen after a write operation
973  * on a memory that matches the condition. if REPEAT_EN is not enabled at
974  * inject mask, then it will produce just one error. Otherwise, it will repeat
975  * until the injectmask would be cleaned.
976  *
977  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
978  *    is reliable enough to check if the MC is using the
979  *    three channels. However, this is not clear at the datasheet.
980  */
981 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
982 				       const char *data, size_t count)
983 {
984 	struct i7core_pvt *pvt = mci->pvt_info;
985 	u32 injectmask;
986 	u64 mask = 0;
987 	int  rc;
988 	long enable;
989 
990 	if (!pvt->pci_ch[pvt->inject.channel][0])
991 		return 0;
992 
993 	rc = strict_strtoul(data, 10, &enable);
994 	if ((rc < 0))
995 		return 0;
996 
997 	if (enable) {
998 		pvt->inject.enable = 1;
999 	} else {
1000 		disable_inject(mci);
1001 		return count;
1002 	}
1003 
1004 	/* Sets pvt->inject.dimm mask */
1005 	if (pvt->inject.dimm < 0)
1006 		mask |= 1LL << 41;
1007 	else {
1008 		if (pvt->channel[pvt->inject.channel].dimms > 2)
1009 			mask |= (pvt->inject.dimm & 0x3LL) << 35;
1010 		else
1011 			mask |= (pvt->inject.dimm & 0x1LL) << 36;
1012 	}
1013 
1014 	/* Sets pvt->inject.rank mask */
1015 	if (pvt->inject.rank < 0)
1016 		mask |= 1LL << 40;
1017 	else {
1018 		if (pvt->channel[pvt->inject.channel].dimms > 2)
1019 			mask |= (pvt->inject.rank & 0x1LL) << 34;
1020 		else
1021 			mask |= (pvt->inject.rank & 0x3LL) << 34;
1022 	}
1023 
1024 	/* Sets pvt->inject.bank mask */
1025 	if (pvt->inject.bank < 0)
1026 		mask |= 1LL << 39;
1027 	else
1028 		mask |= (pvt->inject.bank & 0x15LL) << 30;
1029 
1030 	/* Sets pvt->inject.page mask */
1031 	if (pvt->inject.page < 0)
1032 		mask |= 1LL << 38;
1033 	else
1034 		mask |= (pvt->inject.page & 0xffff) << 14;
1035 
1036 	/* Sets pvt->inject.column mask */
1037 	if (pvt->inject.col < 0)
1038 		mask |= 1LL << 37;
1039 	else
1040 		mask |= (pvt->inject.col & 0x3fff);
1041 
1042 	/*
1043 	 * bit    0: REPEAT_EN
1044 	 * bits 1-2: MASK_HALF_CACHELINE
1045 	 * bit    3: INJECT_ECC
1046 	 * bit    4: INJECT_ADDR_PARITY
1047 	 */
1048 
1049 	injectmask = (pvt->inject.type & 1) |
1050 		     (pvt->inject.section & 0x3) << 1 |
1051 		     (pvt->inject.type & 0x6) << (3 - 1);
1052 
1053 	/* Unlock writes to registers - this register is write only */
1054 	pci_write_config_dword(pvt->pci_noncore,
1055 			       MC_CFG_CONTROL, 0x2);
1056 
1057 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1058 			       MC_CHANNEL_ADDR_MATCH, mask);
1059 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1060 			       MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1061 
1062 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1063 			       MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1064 
1065 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1066 			       MC_CHANNEL_ERROR_INJECT, injectmask);
1067 
1068 	/*
1069 	 * This is something undocumented, based on my tests
1070 	 * Without writing 8 to this register, errors aren't injected. Not sure
1071 	 * why.
1072 	 */
1073 	pci_write_config_dword(pvt->pci_noncore,
1074 			       MC_CFG_CONTROL, 8);
1075 
1076 	debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1077 		" inject 0x%08x\n",
1078 		mask, pvt->inject.eccmask, injectmask);
1079 
1080 
1081 	return count;
1082 }
1083 
1084 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1085 					char *data)
1086 {
1087 	struct i7core_pvt *pvt = mci->pvt_info;
1088 	u32 injectmask;
1089 
1090 	if (!pvt->pci_ch[pvt->inject.channel][0])
1091 		return 0;
1092 
1093 	pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1094 			       MC_CHANNEL_ERROR_INJECT, &injectmask);
1095 
1096 	debugf0("Inject error read: 0x%018x\n", injectmask);
1097 
1098 	if (injectmask & 0x0c)
1099 		pvt->inject.enable = 1;
1100 
1101 	return sprintf(data, "%d\n", pvt->inject.enable);
1102 }
1103 
1104 #define DECLARE_COUNTER(param)					\
1105 static ssize_t i7core_show_counter_##param(			\
1106 		struct mem_ctl_info *mci,			\
1107 		char *data)					\
1108 {								\
1109 	struct i7core_pvt *pvt = mci->pvt_info;			\
1110 								\
1111 	debugf1("%s() \n", __func__);				\
1112 	if (!pvt->ce_count_available || (pvt->is_registered))	\
1113 		return sprintf(data, "data unavailable\n");	\
1114 	return sprintf(data, "%lu\n",				\
1115 			pvt->udimm_ce_count[param]);		\
1116 }
1117 
1118 #define ATTR_COUNTER(param)					\
1119 	{							\
1120 		.attr = {					\
1121 			.name = __stringify(udimm##param),	\
1122 			.mode = (S_IRUGO | S_IWUSR)		\
1123 		},						\
1124 		.show  = i7core_show_counter_##param		\
1125 	}
1126 
1127 DECLARE_COUNTER(0);
1128 DECLARE_COUNTER(1);
1129 DECLARE_COUNTER(2);
1130 
1131 /*
1132  * Sysfs struct
1133  */
1134 
1135 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1136 	ATTR_ADDR_MATCH(channel),
1137 	ATTR_ADDR_MATCH(dimm),
1138 	ATTR_ADDR_MATCH(rank),
1139 	ATTR_ADDR_MATCH(bank),
1140 	ATTR_ADDR_MATCH(page),
1141 	ATTR_ADDR_MATCH(col),
1142 	{ } /* End of list */
1143 };
1144 
1145 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1146 	.name  = "inject_addrmatch",
1147 	.mcidev_attr = i7core_addrmatch_attrs,
1148 };
1149 
1150 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1151 	ATTR_COUNTER(0),
1152 	ATTR_COUNTER(1),
1153 	ATTR_COUNTER(2),
1154 	{ .attr = { .name = NULL } }
1155 };
1156 
1157 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1158 	.name  = "all_channel_counts",
1159 	.mcidev_attr = i7core_udimm_counters_attrs,
1160 };
1161 
1162 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1163 	{
1164 		.attr = {
1165 			.name = "inject_section",
1166 			.mode = (S_IRUGO | S_IWUSR)
1167 		},
1168 		.show  = i7core_inject_section_show,
1169 		.store = i7core_inject_section_store,
1170 	}, {
1171 		.attr = {
1172 			.name = "inject_type",
1173 			.mode = (S_IRUGO | S_IWUSR)
1174 		},
1175 		.show  = i7core_inject_type_show,
1176 		.store = i7core_inject_type_store,
1177 	}, {
1178 		.attr = {
1179 			.name = "inject_eccmask",
1180 			.mode = (S_IRUGO | S_IWUSR)
1181 		},
1182 		.show  = i7core_inject_eccmask_show,
1183 		.store = i7core_inject_eccmask_store,
1184 	}, {
1185 		.grp = &i7core_inject_addrmatch,
1186 	}, {
1187 		.attr = {
1188 			.name = "inject_enable",
1189 			.mode = (S_IRUGO | S_IWUSR)
1190 		},
1191 		.show  = i7core_inject_enable_show,
1192 		.store = i7core_inject_enable_store,
1193 	},
1194 	{ }	/* End of list */
1195 };
1196 
1197 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1198 	{
1199 		.attr = {
1200 			.name = "inject_section",
1201 			.mode = (S_IRUGO | S_IWUSR)
1202 		},
1203 		.show  = i7core_inject_section_show,
1204 		.store = i7core_inject_section_store,
1205 	}, {
1206 		.attr = {
1207 			.name = "inject_type",
1208 			.mode = (S_IRUGO | S_IWUSR)
1209 		},
1210 		.show  = i7core_inject_type_show,
1211 		.store = i7core_inject_type_store,
1212 	}, {
1213 		.attr = {
1214 			.name = "inject_eccmask",
1215 			.mode = (S_IRUGO | S_IWUSR)
1216 		},
1217 		.show  = i7core_inject_eccmask_show,
1218 		.store = i7core_inject_eccmask_store,
1219 	}, {
1220 		.grp = &i7core_inject_addrmatch,
1221 	}, {
1222 		.attr = {
1223 			.name = "inject_enable",
1224 			.mode = (S_IRUGO | S_IWUSR)
1225 		},
1226 		.show  = i7core_inject_enable_show,
1227 		.store = i7core_inject_enable_store,
1228 	}, {
1229 		.grp = &i7core_udimm_counters,
1230 	},
1231 	{ }	/* End of list */
1232 };
1233 
1234 /****************************************************************************
1235 	Device initialization routines: put/get, init/exit
1236  ****************************************************************************/
1237 
1238 /*
1239  *	i7core_put_all_devices	'put' all the devices that we have
1240  *				reserved via 'get'
1241  */
1242 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1243 {
1244 	int i;
1245 
1246 	debugf0(__FILE__ ": %s()\n", __func__);
1247 	for (i = 0; i < i7core_dev->n_devs; i++) {
1248 		struct pci_dev *pdev = i7core_dev->pdev[i];
1249 		if (!pdev)
1250 			continue;
1251 		debugf0("Removing dev %02x:%02x.%d\n",
1252 			pdev->bus->number,
1253 			PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1254 		pci_dev_put(pdev);
1255 	}
1256 }
1257 
1258 static void i7core_put_all_devices(void)
1259 {
1260 	struct i7core_dev *i7core_dev, *tmp;
1261 
1262 	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1263 		i7core_put_devices(i7core_dev);
1264 		free_i7core_dev(i7core_dev);
1265 	}
1266 }
1267 
1268 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1269 {
1270 	struct pci_dev *pdev = NULL;
1271 	int i;
1272 
1273 	/*
1274 	 * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1275 	 * aren't announced by acpi. So, we need to use a legacy scan probing
1276 	 * to detect them
1277 	 */
1278 	while (table && table->descr) {
1279 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1280 		if (unlikely(!pdev)) {
1281 			for (i = 0; i < MAX_SOCKET_BUSES; i++)
1282 				pcibios_scan_specific_bus(255-i);
1283 		}
1284 		pci_dev_put(pdev);
1285 		table++;
1286 	}
1287 }
1288 
1289 static unsigned i7core_pci_lastbus(void)
1290 {
1291 	int last_bus = 0, bus;
1292 	struct pci_bus *b = NULL;
1293 
1294 	while ((b = pci_find_next_bus(b)) != NULL) {
1295 		bus = b->number;
1296 		debugf0("Found bus %d\n", bus);
1297 		if (bus > last_bus)
1298 			last_bus = bus;
1299 	}
1300 
1301 	debugf0("Last bus %d\n", last_bus);
1302 
1303 	return last_bus;
1304 }
1305 
1306 /*
1307  *	i7core_get_all_devices	Find and perform 'get' operation on the MCH's
1308  *			device/functions we want to reference for this driver
1309  *
1310  *			Need to 'get' device 16 func 1 and func 2
1311  */
1312 static int i7core_get_onedevice(struct pci_dev **prev,
1313 				const struct pci_id_table *table,
1314 				const unsigned devno,
1315 				const unsigned last_bus)
1316 {
1317 	struct i7core_dev *i7core_dev;
1318 	const struct pci_id_descr *dev_descr = &table->descr[devno];
1319 
1320 	struct pci_dev *pdev = NULL;
1321 	u8 bus = 0;
1322 	u8 socket = 0;
1323 
1324 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1325 			      dev_descr->dev_id, *prev);
1326 
1327 	if (!pdev) {
1328 		if (*prev) {
1329 			*prev = pdev;
1330 			return 0;
1331 		}
1332 
1333 		if (dev_descr->optional)
1334 			return 0;
1335 
1336 		if (devno == 0)
1337 			return -ENODEV;
1338 
1339 		i7core_printk(KERN_INFO,
1340 			"Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1341 			dev_descr->dev, dev_descr->func,
1342 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1343 
1344 		/* End of list, leave */
1345 		return -ENODEV;
1346 	}
1347 	bus = pdev->bus->number;
1348 
1349 	socket = last_bus - bus;
1350 
1351 	i7core_dev = get_i7core_dev(socket);
1352 	if (!i7core_dev) {
1353 		i7core_dev = alloc_i7core_dev(socket, table);
1354 		if (!i7core_dev) {
1355 			pci_dev_put(pdev);
1356 			return -ENOMEM;
1357 		}
1358 	}
1359 
1360 	if (i7core_dev->pdev[devno]) {
1361 		i7core_printk(KERN_ERR,
1362 			"Duplicated device for "
1363 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1364 			bus, dev_descr->dev, dev_descr->func,
1365 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1366 		pci_dev_put(pdev);
1367 		return -ENODEV;
1368 	}
1369 
1370 	i7core_dev->pdev[devno] = pdev;
1371 
1372 	/* Sanity check */
1373 	if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1374 			PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1375 		i7core_printk(KERN_ERR,
1376 			"Device PCI ID %04x:%04x "
1377 			"has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1378 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1379 			bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1380 			bus, dev_descr->dev, dev_descr->func);
1381 		return -ENODEV;
1382 	}
1383 
1384 	/* Be sure that the device is enabled */
1385 	if (unlikely(pci_enable_device(pdev) < 0)) {
1386 		i7core_printk(KERN_ERR,
1387 			"Couldn't enable "
1388 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1389 			bus, dev_descr->dev, dev_descr->func,
1390 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1391 		return -ENODEV;
1392 	}
1393 
1394 	debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1395 		socket, bus, dev_descr->dev,
1396 		dev_descr->func,
1397 		PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1398 
1399 	/*
1400 	 * As stated on drivers/pci/search.c, the reference count for
1401 	 * @from is always decremented if it is not %NULL. So, as we need
1402 	 * to get all devices up to null, we need to do a get for the device
1403 	 */
1404 	pci_dev_get(pdev);
1405 
1406 	*prev = pdev;
1407 
1408 	return 0;
1409 }
1410 
1411 static int i7core_get_all_devices(void)
1412 {
1413 	int i, rc, last_bus;
1414 	struct pci_dev *pdev = NULL;
1415 	const struct pci_id_table *table = pci_dev_table;
1416 
1417 	last_bus = i7core_pci_lastbus();
1418 
1419 	while (table && table->descr) {
1420 		for (i = 0; i < table->n_devs; i++) {
1421 			pdev = NULL;
1422 			do {
1423 				rc = i7core_get_onedevice(&pdev, table, i,
1424 							  last_bus);
1425 				if (rc < 0) {
1426 					if (i == 0) {
1427 						i = table->n_devs;
1428 						break;
1429 					}
1430 					i7core_put_all_devices();
1431 					return -ENODEV;
1432 				}
1433 			} while (pdev);
1434 		}
1435 		table++;
1436 	}
1437 
1438 	return 0;
1439 }
1440 
1441 static int mci_bind_devs(struct mem_ctl_info *mci,
1442 			 struct i7core_dev *i7core_dev)
1443 {
1444 	struct i7core_pvt *pvt = mci->pvt_info;
1445 	struct pci_dev *pdev;
1446 	int i, func, slot;
1447 
1448 	pvt->is_registered = 0;
1449 	for (i = 0; i < i7core_dev->n_devs; i++) {
1450 		pdev = i7core_dev->pdev[i];
1451 		if (!pdev)
1452 			continue;
1453 
1454 		func = PCI_FUNC(pdev->devfn);
1455 		slot = PCI_SLOT(pdev->devfn);
1456 		if (slot == 3) {
1457 			if (unlikely(func > MAX_MCR_FUNC))
1458 				goto error;
1459 			pvt->pci_mcr[func] = pdev;
1460 		} else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1461 			if (unlikely(func > MAX_CHAN_FUNC))
1462 				goto error;
1463 			pvt->pci_ch[slot - 4][func] = pdev;
1464 		} else if (!slot && !func)
1465 			pvt->pci_noncore = pdev;
1466 		else
1467 			goto error;
1468 
1469 		debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1470 			PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1471 			pdev, i7core_dev->socket);
1472 
1473 		if (PCI_SLOT(pdev->devfn) == 3 &&
1474 			PCI_FUNC(pdev->devfn) == 2)
1475 			pvt->is_registered = 1;
1476 	}
1477 
1478 	return 0;
1479 
1480 error:
1481 	i7core_printk(KERN_ERR, "Device %d, function %d "
1482 		      "is out of the expected range\n",
1483 		      slot, func);
1484 	return -EINVAL;
1485 }
1486 
1487 /****************************************************************************
1488 			Error check routines
1489  ****************************************************************************/
1490 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1491 				      const int chan,
1492 				      const int dimm,
1493 				      const int add)
1494 {
1495 	char *msg;
1496 	struct i7core_pvt *pvt = mci->pvt_info;
1497 	int row = pvt->csrow_map[chan][dimm], i;
1498 
1499 	for (i = 0; i < add; i++) {
1500 		msg = kasprintf(GFP_KERNEL, "Corrected error "
1501 				"(Socket=%d channel=%d dimm=%d)",
1502 				pvt->i7core_dev->socket, chan, dimm);
1503 
1504 		edac_mc_handle_fbd_ce(mci, row, 0, msg);
1505 		kfree (msg);
1506 	}
1507 }
1508 
1509 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1510 					 const int chan,
1511 					 const int new0,
1512 					 const int new1,
1513 					 const int new2)
1514 {
1515 	struct i7core_pvt *pvt = mci->pvt_info;
1516 	int add0 = 0, add1 = 0, add2 = 0;
1517 	/* Updates CE counters if it is not the first time here */
1518 	if (pvt->ce_count_available) {
1519 		/* Updates CE counters */
1520 
1521 		add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1522 		add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1523 		add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1524 
1525 		if (add2 < 0)
1526 			add2 += 0x7fff;
1527 		pvt->rdimm_ce_count[chan][2] += add2;
1528 
1529 		if (add1 < 0)
1530 			add1 += 0x7fff;
1531 		pvt->rdimm_ce_count[chan][1] += add1;
1532 
1533 		if (add0 < 0)
1534 			add0 += 0x7fff;
1535 		pvt->rdimm_ce_count[chan][0] += add0;
1536 	} else
1537 		pvt->ce_count_available = 1;
1538 
1539 	/* Store the new values */
1540 	pvt->rdimm_last_ce_count[chan][2] = new2;
1541 	pvt->rdimm_last_ce_count[chan][1] = new1;
1542 	pvt->rdimm_last_ce_count[chan][0] = new0;
1543 
1544 	/*updated the edac core */
1545 	if (add0 != 0)
1546 		i7core_rdimm_update_csrow(mci, chan, 0, add0);
1547 	if (add1 != 0)
1548 		i7core_rdimm_update_csrow(mci, chan, 1, add1);
1549 	if (add2 != 0)
1550 		i7core_rdimm_update_csrow(mci, chan, 2, add2);
1551 
1552 }
1553 
1554 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1555 {
1556 	struct i7core_pvt *pvt = mci->pvt_info;
1557 	u32 rcv[3][2];
1558 	int i, new0, new1, new2;
1559 
1560 	/*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1561 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1562 								&rcv[0][0]);
1563 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1564 								&rcv[0][1]);
1565 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1566 								&rcv[1][0]);
1567 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1568 								&rcv[1][1]);
1569 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1570 								&rcv[2][0]);
1571 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1572 								&rcv[2][1]);
1573 	for (i = 0 ; i < 3; i++) {
1574 		debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1575 			(i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1576 		/*if the channel has 3 dimms*/
1577 		if (pvt->channel[i].dimms > 2) {
1578 			new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1579 			new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1580 			new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1581 		} else {
1582 			new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1583 					DIMM_BOT_COR_ERR(rcv[i][0]);
1584 			new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1585 					DIMM_BOT_COR_ERR(rcv[i][1]);
1586 			new2 = 0;
1587 		}
1588 
1589 		i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1590 	}
1591 }
1592 
1593 /* This function is based on the device 3 function 4 registers as described on:
1594  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1595  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1596  * also available at:
1597  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1598  */
1599 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1600 {
1601 	struct i7core_pvt *pvt = mci->pvt_info;
1602 	u32 rcv1, rcv0;
1603 	int new0, new1, new2;
1604 
1605 	if (!pvt->pci_mcr[4]) {
1606 		debugf0("%s MCR registers not found\n", __func__);
1607 		return;
1608 	}
1609 
1610 	/* Corrected test errors */
1611 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1612 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1613 
1614 	/* Store the new values */
1615 	new2 = DIMM2_COR_ERR(rcv1);
1616 	new1 = DIMM1_COR_ERR(rcv0);
1617 	new0 = DIMM0_COR_ERR(rcv0);
1618 
1619 	/* Updates CE counters if it is not the first time here */
1620 	if (pvt->ce_count_available) {
1621 		/* Updates CE counters */
1622 		int add0, add1, add2;
1623 
1624 		add2 = new2 - pvt->udimm_last_ce_count[2];
1625 		add1 = new1 - pvt->udimm_last_ce_count[1];
1626 		add0 = new0 - pvt->udimm_last_ce_count[0];
1627 
1628 		if (add2 < 0)
1629 			add2 += 0x7fff;
1630 		pvt->udimm_ce_count[2] += add2;
1631 
1632 		if (add1 < 0)
1633 			add1 += 0x7fff;
1634 		pvt->udimm_ce_count[1] += add1;
1635 
1636 		if (add0 < 0)
1637 			add0 += 0x7fff;
1638 		pvt->udimm_ce_count[0] += add0;
1639 
1640 		if (add0 | add1 | add2)
1641 			i7core_printk(KERN_ERR, "New Corrected error(s): "
1642 				      "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1643 				      add0, add1, add2);
1644 	} else
1645 		pvt->ce_count_available = 1;
1646 
1647 	/* Store the new values */
1648 	pvt->udimm_last_ce_count[2] = new2;
1649 	pvt->udimm_last_ce_count[1] = new1;
1650 	pvt->udimm_last_ce_count[0] = new0;
1651 }
1652 
1653 /*
1654  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1655  * Architectures Software Developer’s Manual Volume 3B.
1656  * Nehalem are defined as family 0x06, model 0x1a
1657  *
1658  * The MCA registers used here are the following ones:
1659  *     struct mce field	MCA Register
1660  *     m->status	MSR_IA32_MC8_STATUS
1661  *     m->addr		MSR_IA32_MC8_ADDR
1662  *     m->misc		MSR_IA32_MC8_MISC
1663  * In the case of Nehalem, the error information is masked at .status and .misc
1664  * fields
1665  */
1666 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1667 				    const struct mce *m)
1668 {
1669 	struct i7core_pvt *pvt = mci->pvt_info;
1670 	char *type, *optype, *err, *msg;
1671 	unsigned long error = m->status & 0x1ff0000l;
1672 	u32 optypenum = (m->status >> 4) & 0x07;
1673 	u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1674 	u32 dimm = (m->misc >> 16) & 0x3;
1675 	u32 channel = (m->misc >> 18) & 0x3;
1676 	u32 syndrome = m->misc >> 32;
1677 	u32 errnum = find_first_bit(&error, 32);
1678 	int csrow;
1679 
1680 	if (m->mcgstatus & 1)
1681 		type = "FATAL";
1682 	else
1683 		type = "NON_FATAL";
1684 
1685 	switch (optypenum) {
1686 	case 0:
1687 		optype = "generic undef request";
1688 		break;
1689 	case 1:
1690 		optype = "read error";
1691 		break;
1692 	case 2:
1693 		optype = "write error";
1694 		break;
1695 	case 3:
1696 		optype = "addr/cmd error";
1697 		break;
1698 	case 4:
1699 		optype = "scrubbing error";
1700 		break;
1701 	default:
1702 		optype = "reserved";
1703 		break;
1704 	}
1705 
1706 	switch (errnum) {
1707 	case 16:
1708 		err = "read ECC error";
1709 		break;
1710 	case 17:
1711 		err = "RAS ECC error";
1712 		break;
1713 	case 18:
1714 		err = "write parity error";
1715 		break;
1716 	case 19:
1717 		err = "redundacy loss";
1718 		break;
1719 	case 20:
1720 		err = "reserved";
1721 		break;
1722 	case 21:
1723 		err = "memory range error";
1724 		break;
1725 	case 22:
1726 		err = "RTID out of range";
1727 		break;
1728 	case 23:
1729 		err = "address parity error";
1730 		break;
1731 	case 24:
1732 		err = "byte enable parity error";
1733 		break;
1734 	default:
1735 		err = "unknown";
1736 	}
1737 
1738 	/* FIXME: should convert addr into bank and rank information */
1739 	msg = kasprintf(GFP_ATOMIC,
1740 		"%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1741 		"syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1742 		type, (long long) m->addr, m->cpu, dimm, channel,
1743 		syndrome, core_err_cnt, (long long)m->status,
1744 		(long long)m->misc, optype, err);
1745 
1746 	debugf0("%s", msg);
1747 
1748 	csrow = pvt->csrow_map[channel][dimm];
1749 
1750 	/* Call the helper to output message */
1751 	if (m->mcgstatus & 1)
1752 		edac_mc_handle_fbd_ue(mci, csrow, 0,
1753 				0 /* FIXME: should be channel here */, msg);
1754 	else if (!pvt->is_registered)
1755 		edac_mc_handle_fbd_ce(mci, csrow,
1756 				0 /* FIXME: should be channel here */, msg);
1757 
1758 	kfree(msg);
1759 }
1760 
1761 /*
1762  *	i7core_check_error	Retrieve and process errors reported by the
1763  *				hardware. Called by the Core module.
1764  */
1765 static void i7core_check_error(struct mem_ctl_info *mci)
1766 {
1767 	struct i7core_pvt *pvt = mci->pvt_info;
1768 	int i;
1769 	unsigned count = 0;
1770 	struct mce *m;
1771 
1772 	/*
1773 	 * MCE first step: Copy all mce errors into a temporary buffer
1774 	 * We use a double buffering here, to reduce the risk of
1775 	 * loosing an error.
1776 	 */
1777 	smp_rmb();
1778 	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1779 		% MCE_LOG_LEN;
1780 	if (!count)
1781 		goto check_ce_error;
1782 
1783 	m = pvt->mce_outentry;
1784 	if (pvt->mce_in + count > MCE_LOG_LEN) {
1785 		unsigned l = MCE_LOG_LEN - pvt->mce_in;
1786 
1787 		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1788 		smp_wmb();
1789 		pvt->mce_in = 0;
1790 		count -= l;
1791 		m += l;
1792 	}
1793 	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1794 	smp_wmb();
1795 	pvt->mce_in += count;
1796 
1797 	smp_rmb();
1798 	if (pvt->mce_overrun) {
1799 		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1800 			      pvt->mce_overrun);
1801 		smp_wmb();
1802 		pvt->mce_overrun = 0;
1803 	}
1804 
1805 	/*
1806 	 * MCE second step: parse errors and display
1807 	 */
1808 	for (i = 0; i < count; i++)
1809 		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1810 
1811 	/*
1812 	 * Now, let's increment CE error counts
1813 	 */
1814 check_ce_error:
1815 	if (!pvt->is_registered)
1816 		i7core_udimm_check_mc_ecc_err(mci);
1817 	else
1818 		i7core_rdimm_check_mc_ecc_err(mci);
1819 }
1820 
1821 /*
1822  * i7core_mce_check_error	Replicates mcelog routine to get errors
1823  *				This routine simply queues mcelog errors, and
1824  *				return. The error itself should be handled later
1825  *				by i7core_check_error.
1826  * WARNING: As this routine should be called at NMI time, extra care should
1827  * be taken to avoid deadlocks, and to be as fast as possible.
1828  */
1829 static int i7core_mce_check_error(void *priv, struct mce *mce)
1830 {
1831 	struct mem_ctl_info *mci = priv;
1832 	struct i7core_pvt *pvt = mci->pvt_info;
1833 
1834 	/*
1835 	 * Just let mcelog handle it if the error is
1836 	 * outside the memory controller
1837 	 */
1838 	if (((mce->status & 0xffff) >> 7) != 1)
1839 		return 0;
1840 
1841 	/* Bank 8 registers are the only ones that we know how to handle */
1842 	if (mce->bank != 8)
1843 		return 0;
1844 
1845 #ifdef CONFIG_SMP
1846 	/* Only handle if it is the right mc controller */
1847 	if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1848 		return 0;
1849 #endif
1850 
1851 	smp_rmb();
1852 	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1853 		smp_wmb();
1854 		pvt->mce_overrun++;
1855 		return 0;
1856 	}
1857 
1858 	/* Copy memory error at the ringbuffer */
1859 	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1860 	smp_wmb();
1861 	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1862 
1863 	/* Handle fatal errors immediately */
1864 	if (mce->mcgstatus & 1)
1865 		i7core_check_error(mci);
1866 
1867 	/* Advise mcelog that the errors were handled */
1868 	return 1;
1869 }
1870 
1871 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
1872 {
1873 	pvt->i7core_pci = edac_pci_create_generic_ctl(
1874 						&pvt->i7core_dev->pdev[0]->dev,
1875 						EDAC_MOD_STR);
1876 	if (unlikely(!pvt->i7core_pci))
1877 		pr_warn("Unable to setup PCI error report via EDAC\n");
1878 }
1879 
1880 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
1881 {
1882 	if (likely(pvt->i7core_pci))
1883 		edac_pci_release_generic_ctl(pvt->i7core_pci);
1884 	else
1885 		i7core_printk(KERN_ERR,
1886 				"Couldn't find mem_ctl_info for socket %d\n",
1887 				pvt->i7core_dev->socket);
1888 	pvt->i7core_pci = NULL;
1889 }
1890 
1891 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
1892 {
1893 	struct mem_ctl_info *mci = i7core_dev->mci;
1894 	struct i7core_pvt *pvt;
1895 
1896 	if (unlikely(!mci || !mci->pvt_info)) {
1897 		debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
1898 			__func__, &i7core_dev->pdev[0]->dev);
1899 
1900 		i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
1901 		return;
1902 	}
1903 
1904 	pvt = mci->pvt_info;
1905 
1906 	debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1907 		__func__, mci, &i7core_dev->pdev[0]->dev);
1908 
1909 	/* Disable MCE NMI handler */
1910 	edac_mce_unregister(&pvt->edac_mce);
1911 
1912 	/* Disable EDAC polling */
1913 	i7core_pci_ctl_release(pvt);
1914 
1915 	/* Remove MC sysfs nodes */
1916 	edac_mc_del_mc(mci->dev);
1917 
1918 	debugf1("%s: free mci struct\n", mci->ctl_name);
1919 	kfree(mci->ctl_name);
1920 	edac_mc_free(mci);
1921 	i7core_dev->mci = NULL;
1922 }
1923 
1924 static int i7core_register_mci(struct i7core_dev *i7core_dev)
1925 {
1926 	struct mem_ctl_info *mci;
1927 	struct i7core_pvt *pvt;
1928 	int rc, channels, csrows;
1929 
1930 	/* Check the number of active and not disabled channels */
1931 	rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
1932 	if (unlikely(rc < 0))
1933 		return rc;
1934 
1935 	/* allocate a new MC control structure */
1936 	mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
1937 	if (unlikely(!mci))
1938 		return -ENOMEM;
1939 
1940 	debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1941 		__func__, mci, &i7core_dev->pdev[0]->dev);
1942 
1943 	pvt = mci->pvt_info;
1944 	memset(pvt, 0, sizeof(*pvt));
1945 
1946 	/* Associates i7core_dev and mci for future usage */
1947 	pvt->i7core_dev = i7core_dev;
1948 	i7core_dev->mci = mci;
1949 
1950 	/*
1951 	 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1952 	 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1953 	 * memory channels
1954 	 */
1955 	mci->mtype_cap = MEM_FLAG_DDR3;
1956 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
1957 	mci->edac_cap = EDAC_FLAG_NONE;
1958 	mci->mod_name = "i7core_edac.c";
1959 	mci->mod_ver = I7CORE_REVISION;
1960 	mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1961 				  i7core_dev->socket);
1962 	mci->dev_name = pci_name(i7core_dev->pdev[0]);
1963 	mci->ctl_page_to_phys = NULL;
1964 
1965 	/* Store pci devices at mci for faster access */
1966 	rc = mci_bind_devs(mci, i7core_dev);
1967 	if (unlikely(rc < 0))
1968 		goto fail0;
1969 
1970 	if (pvt->is_registered)
1971 		mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
1972 	else
1973 		mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
1974 
1975 	/* Get dimm basic config */
1976 	get_dimm_config(mci);
1977 	/* record ptr to the generic device */
1978 	mci->dev = &i7core_dev->pdev[0]->dev;
1979 	/* Set the function pointer to an actual operation function */
1980 	mci->edac_check = i7core_check_error;
1981 
1982 	/* add this new MC control structure to EDAC's list of MCs */
1983 	if (unlikely(edac_mc_add_mc(mci))) {
1984 		debugf0("MC: " __FILE__
1985 			": %s(): failed edac_mc_add_mc()\n", __func__);
1986 		/* FIXME: perhaps some code should go here that disables error
1987 		 * reporting if we just enabled it
1988 		 */
1989 
1990 		rc = -EINVAL;
1991 		goto fail0;
1992 	}
1993 
1994 	/* Default error mask is any memory */
1995 	pvt->inject.channel = 0;
1996 	pvt->inject.dimm = -1;
1997 	pvt->inject.rank = -1;
1998 	pvt->inject.bank = -1;
1999 	pvt->inject.page = -1;
2000 	pvt->inject.col = -1;
2001 
2002 	/* allocating generic PCI control info */
2003 	i7core_pci_ctl_create(pvt);
2004 
2005 	/* Registers on edac_mce in order to receive memory errors */
2006 	pvt->edac_mce.priv = mci;
2007 	pvt->edac_mce.check_error = i7core_mce_check_error;
2008 	rc = edac_mce_register(&pvt->edac_mce);
2009 	if (unlikely(rc < 0)) {
2010 		debugf0("MC: " __FILE__
2011 			": %s(): failed edac_mce_register()\n", __func__);
2012 		goto fail1;
2013 	}
2014 
2015 	return 0;
2016 
2017 fail1:
2018 	i7core_pci_ctl_release(pvt);
2019 	edac_mc_del_mc(mci->dev);
2020 fail0:
2021 	kfree(mci->ctl_name);
2022 	edac_mc_free(mci);
2023 	i7core_dev->mci = NULL;
2024 	return rc;
2025 }
2026 
2027 /*
2028  *	i7core_probe	Probe for ONE instance of device to see if it is
2029  *			present.
2030  *	return:
2031  *		0 for FOUND a device
2032  *		< 0 for error code
2033  */
2034 
2035 static int __devinit i7core_probe(struct pci_dev *pdev,
2036 				  const struct pci_device_id *id)
2037 {
2038 	int rc;
2039 	struct i7core_dev *i7core_dev;
2040 
2041 	/* get the pci devices we want to reserve for our use */
2042 	mutex_lock(&i7core_edac_lock);
2043 
2044 	/*
2045 	 * All memory controllers are allocated at the first pass.
2046 	 */
2047 	if (unlikely(probed >= 1)) {
2048 		mutex_unlock(&i7core_edac_lock);
2049 		return -ENODEV;
2050 	}
2051 	probed++;
2052 
2053 	rc = i7core_get_all_devices();
2054 	if (unlikely(rc < 0))
2055 		goto fail0;
2056 
2057 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2058 		rc = i7core_register_mci(i7core_dev);
2059 		if (unlikely(rc < 0))
2060 			goto fail1;
2061 	}
2062 
2063 	i7core_printk(KERN_INFO, "Driver loaded.\n");
2064 
2065 	mutex_unlock(&i7core_edac_lock);
2066 	return 0;
2067 
2068 fail1:
2069 	list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2070 		i7core_unregister_mci(i7core_dev);
2071 
2072 	i7core_put_all_devices();
2073 fail0:
2074 	mutex_unlock(&i7core_edac_lock);
2075 	return rc;
2076 }
2077 
2078 /*
2079  *	i7core_remove	destructor for one instance of device
2080  *
2081  */
2082 static void __devexit i7core_remove(struct pci_dev *pdev)
2083 {
2084 	struct i7core_dev *i7core_dev;
2085 
2086 	debugf0(__FILE__ ": %s()\n", __func__);
2087 
2088 	/*
2089 	 * we have a trouble here: pdev value for removal will be wrong, since
2090 	 * it will point to the X58 register used to detect that the machine
2091 	 * is a Nehalem or upper design. However, due to the way several PCI
2092 	 * devices are grouped together to provide MC functionality, we need
2093 	 * to use a different method for releasing the devices
2094 	 */
2095 
2096 	mutex_lock(&i7core_edac_lock);
2097 
2098 	if (unlikely(!probed)) {
2099 		mutex_unlock(&i7core_edac_lock);
2100 		return;
2101 	}
2102 
2103 	list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2104 		i7core_unregister_mci(i7core_dev);
2105 
2106 	/* Release PCI resources */
2107 	i7core_put_all_devices();
2108 
2109 	probed--;
2110 
2111 	mutex_unlock(&i7core_edac_lock);
2112 }
2113 
2114 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2115 
2116 /*
2117  *	i7core_driver	pci_driver structure for this module
2118  *
2119  */
2120 static struct pci_driver i7core_driver = {
2121 	.name     = "i7core_edac",
2122 	.probe    = i7core_probe,
2123 	.remove   = __devexit_p(i7core_remove),
2124 	.id_table = i7core_pci_tbl,
2125 };
2126 
2127 /*
2128  *	i7core_init		Module entry function
2129  *			Try to initialize this module for its devices
2130  */
2131 static int __init i7core_init(void)
2132 {
2133 	int pci_rc;
2134 
2135 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
2136 
2137 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
2138 	opstate_init();
2139 
2140 	if (use_pci_fixup)
2141 		i7core_xeon_pci_fixup(pci_dev_table);
2142 
2143 	pci_rc = pci_register_driver(&i7core_driver);
2144 
2145 	if (pci_rc >= 0)
2146 		return 0;
2147 
2148 	i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2149 		      pci_rc);
2150 
2151 	return pci_rc;
2152 }
2153 
2154 /*
2155  *	i7core_exit()	Module exit function
2156  *			Unregister the driver
2157  */
2158 static void __exit i7core_exit(void)
2159 {
2160 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
2161 	pci_unregister_driver(&i7core_driver);
2162 }
2163 
2164 module_init(i7core_init);
2165 module_exit(i7core_exit);
2166 
2167 MODULE_LICENSE("GPL");
2168 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2169 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2170 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2171 		   I7CORE_REVISION);
2172 
2173 module_param(edac_op_state, int, 0444);
2174 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2175