xref: /openbmc/linux/drivers/edac/i7core_edac.c (revision 171f1bc7)
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *	 Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *	http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27 
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/dmi.h>
35 #include <linux/edac.h>
36 #include <linux/mmzone.h>
37 #include <linux/smp.h>
38 #include <asm/mce.h>
39 #include <asm/processor.h>
40 #include <asm/div64.h>
41 
42 #include "edac_core.h"
43 
44 /* Static vars */
45 static LIST_HEAD(i7core_edac_list);
46 static DEFINE_MUTEX(i7core_edac_lock);
47 static int probed;
48 
49 static int use_pci_fixup;
50 module_param(use_pci_fixup, int, 0444);
51 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
52 /*
53  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
54  * registers start at bus 255, and are not reported by BIOS.
55  * We currently find devices with only 2 sockets. In order to support more QPI
56  * Quick Path Interconnect, just increment this number.
57  */
58 #define MAX_SOCKET_BUSES	2
59 
60 
61 /*
62  * Alter this version for the module when modifications are made
63  */
64 #define I7CORE_REVISION    " Ver: 1.0.0"
65 #define EDAC_MOD_STR      "i7core_edac"
66 
67 /*
68  * Debug macros
69  */
70 #define i7core_printk(level, fmt, arg...)			\
71 	edac_printk(level, "i7core", fmt, ##arg)
72 
73 #define i7core_mc_printk(mci, level, fmt, arg...)		\
74 	edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75 
76 /*
77  * i7core Memory Controller Registers
78  */
79 
80 	/* OFFSETS for Device 0 Function 0 */
81 
82 #define MC_CFG_CONTROL	0x90
83   #define MC_CFG_UNLOCK		0x02
84   #define MC_CFG_LOCK		0x00
85 
86 	/* OFFSETS for Device 3 Function 0 */
87 
88 #define MC_CONTROL	0x48
89 #define MC_STATUS	0x4c
90 #define MC_MAX_DOD	0x64
91 
92 /*
93  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
94  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
95  */
96 
97 #define MC_TEST_ERR_RCV1	0x60
98   #define DIMM2_COR_ERR(r)			((r) & 0x7fff)
99 
100 #define MC_TEST_ERR_RCV0	0x64
101   #define DIMM1_COR_ERR(r)			(((r) >> 16) & 0x7fff)
102   #define DIMM0_COR_ERR(r)			((r) & 0x7fff)
103 
104 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
105 #define MC_SSRCONTROL		0x48
106   #define SSR_MODE_DISABLE	0x00
107   #define SSR_MODE_ENABLE	0x01
108   #define SSR_MODE_MASK		0x03
109 
110 #define MC_SCRUB_CONTROL	0x4c
111   #define STARTSCRUB		(1 << 24)
112   #define SCRUBINTERVAL_MASK    0xffffff
113 
114 #define MC_COR_ECC_CNT_0	0x80
115 #define MC_COR_ECC_CNT_1	0x84
116 #define MC_COR_ECC_CNT_2	0x88
117 #define MC_COR_ECC_CNT_3	0x8c
118 #define MC_COR_ECC_CNT_4	0x90
119 #define MC_COR_ECC_CNT_5	0x94
120 
121 #define DIMM_TOP_COR_ERR(r)			(((r) >> 16) & 0x7fff)
122 #define DIMM_BOT_COR_ERR(r)			((r) & 0x7fff)
123 
124 
125 	/* OFFSETS for Devices 4,5 and 6 Function 0 */
126 
127 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
128   #define THREE_DIMMS_PRESENT		(1 << 24)
129   #define SINGLE_QUAD_RANK_PRESENT	(1 << 23)
130   #define QUAD_RANK_PRESENT		(1 << 22)
131   #define REGISTERED_DIMM		(1 << 15)
132 
133 #define MC_CHANNEL_MAPPER	0x60
134   #define RDLCH(r, ch)		((((r) >> (3 + (ch * 6))) & 0x07) - 1)
135   #define WRLCH(r, ch)		((((r) >> (ch * 6)) & 0x07) - 1)
136 
137 #define MC_CHANNEL_RANK_PRESENT 0x7c
138   #define RANK_PRESENT_MASK		0xffff
139 
140 #define MC_CHANNEL_ADDR_MATCH	0xf0
141 #define MC_CHANNEL_ERROR_MASK	0xf8
142 #define MC_CHANNEL_ERROR_INJECT	0xfc
143   #define INJECT_ADDR_PARITY	0x10
144   #define INJECT_ECC		0x08
145   #define MASK_CACHELINE	0x06
146   #define MASK_FULL_CACHELINE	0x06
147   #define MASK_MSB32_CACHELINE	0x04
148   #define MASK_LSB32_CACHELINE	0x02
149   #define NO_MASK_CACHELINE	0x00
150   #define REPEAT_EN		0x01
151 
152 	/* OFFSETS for Devices 4,5 and 6 Function 1 */
153 
154 #define MC_DOD_CH_DIMM0		0x48
155 #define MC_DOD_CH_DIMM1		0x4c
156 #define MC_DOD_CH_DIMM2		0x50
157   #define RANKOFFSET_MASK	((1 << 12) | (1 << 11) | (1 << 10))
158   #define RANKOFFSET(x)		((x & RANKOFFSET_MASK) >> 10)
159   #define DIMM_PRESENT_MASK	(1 << 9)
160   #define DIMM_PRESENT(x)	(((x) & DIMM_PRESENT_MASK) >> 9)
161   #define MC_DOD_NUMBANK_MASK		((1 << 8) | (1 << 7))
162   #define MC_DOD_NUMBANK(x)		(((x) & MC_DOD_NUMBANK_MASK) >> 7)
163   #define MC_DOD_NUMRANK_MASK		((1 << 6) | (1 << 5))
164   #define MC_DOD_NUMRANK(x)		(((x) & MC_DOD_NUMRANK_MASK) >> 5)
165   #define MC_DOD_NUMROW_MASK		((1 << 4) | (1 << 3) | (1 << 2))
166   #define MC_DOD_NUMROW(x)		(((x) & MC_DOD_NUMROW_MASK) >> 2)
167   #define MC_DOD_NUMCOL_MASK		3
168   #define MC_DOD_NUMCOL(x)		((x) & MC_DOD_NUMCOL_MASK)
169 
170 #define MC_RANK_PRESENT		0x7c
171 
172 #define MC_SAG_CH_0	0x80
173 #define MC_SAG_CH_1	0x84
174 #define MC_SAG_CH_2	0x88
175 #define MC_SAG_CH_3	0x8c
176 #define MC_SAG_CH_4	0x90
177 #define MC_SAG_CH_5	0x94
178 #define MC_SAG_CH_6	0x98
179 #define MC_SAG_CH_7	0x9c
180 
181 #define MC_RIR_LIMIT_CH_0	0x40
182 #define MC_RIR_LIMIT_CH_1	0x44
183 #define MC_RIR_LIMIT_CH_2	0x48
184 #define MC_RIR_LIMIT_CH_3	0x4C
185 #define MC_RIR_LIMIT_CH_4	0x50
186 #define MC_RIR_LIMIT_CH_5	0x54
187 #define MC_RIR_LIMIT_CH_6	0x58
188 #define MC_RIR_LIMIT_CH_7	0x5C
189 #define MC_RIR_LIMIT_MASK	((1 << 10) - 1)
190 
191 #define MC_RIR_WAY_CH		0x80
192   #define MC_RIR_WAY_OFFSET_MASK	(((1 << 14) - 1) & ~0x7)
193   #define MC_RIR_WAY_RANK_MASK		0x7
194 
195 /*
196  * i7core structs
197  */
198 
199 #define NUM_CHANS 3
200 #define MAX_DIMMS 3		/* Max DIMMS per channel */
201 #define MAX_MCR_FUNC  4
202 #define MAX_CHAN_FUNC 3
203 
204 struct i7core_info {
205 	u32	mc_control;
206 	u32	mc_status;
207 	u32	max_dod;
208 	u32	ch_map;
209 };
210 
211 
212 struct i7core_inject {
213 	int	enable;
214 
215 	u32	section;
216 	u32	type;
217 	u32	eccmask;
218 
219 	/* Error address mask */
220 	int channel, dimm, rank, bank, page, col;
221 };
222 
223 struct i7core_channel {
224 	u32		ranks;
225 	u32		dimms;
226 };
227 
228 struct pci_id_descr {
229 	int			dev;
230 	int			func;
231 	int 			dev_id;
232 	int			optional;
233 };
234 
235 struct pci_id_table {
236 	const struct pci_id_descr	*descr;
237 	int				n_devs;
238 };
239 
240 struct i7core_dev {
241 	struct list_head	list;
242 	u8			socket;
243 	struct pci_dev		**pdev;
244 	int			n_devs;
245 	struct mem_ctl_info	*mci;
246 };
247 
248 struct i7core_pvt {
249 	struct pci_dev	*pci_noncore;
250 	struct pci_dev	*pci_mcr[MAX_MCR_FUNC + 1];
251 	struct pci_dev	*pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
252 
253 	struct i7core_dev *i7core_dev;
254 
255 	struct i7core_info	info;
256 	struct i7core_inject	inject;
257 	struct i7core_channel	channel[NUM_CHANS];
258 
259 	int		ce_count_available;
260 	int 		csrow_map[NUM_CHANS][MAX_DIMMS];
261 
262 			/* ECC corrected errors counts per udimm */
263 	unsigned long	udimm_ce_count[MAX_DIMMS];
264 	int		udimm_last_ce_count[MAX_DIMMS];
265 			/* ECC corrected errors counts per rdimm */
266 	unsigned long	rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
267 	int		rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
268 
269 	bool		is_registered, enable_scrub;
270 
271 	/* Fifo double buffers */
272 	struct mce		mce_entry[MCE_LOG_LEN];
273 	struct mce		mce_outentry[MCE_LOG_LEN];
274 
275 	/* Fifo in/out counters */
276 	unsigned		mce_in, mce_out;
277 
278 	/* Count indicator to show errors not got */
279 	unsigned		mce_overrun;
280 
281 	/* DCLK Frequency used for computing scrub rate */
282 	int			dclk_freq;
283 
284 	/* Struct to control EDAC polling */
285 	struct edac_pci_ctl_info *i7core_pci;
286 };
287 
288 #define PCI_DESCR(device, function, device_id)	\
289 	.dev = (device),			\
290 	.func = (function),			\
291 	.dev_id = (device_id)
292 
293 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
294 		/* Memory controller */
295 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
296 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
297 			/* Exists only for RDIMM */
298 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
299 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
300 
301 		/* Channel 0 */
302 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
303 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
304 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
305 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
306 
307 		/* Channel 1 */
308 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
309 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
310 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
311 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
312 
313 		/* Channel 2 */
314 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
315 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
316 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
317 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
318 
319 		/* Generic Non-core registers */
320 	/*
321 	 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
322 	 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
323 	 * the probing code needs to test for the other address in case of
324 	 * failure of this one
325 	 */
326 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
327 
328 };
329 
330 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
331 	{ PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
332 	{ PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
333 	{ PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
334 
335 	{ PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
336 	{ PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
337 	{ PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
338 	{ PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
339 
340 	{ PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
341 	{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
342 	{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
343 	{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
344 
345 	/*
346 	 * This is the PCI device has an alternate address on some
347 	 * processors like Core i7 860
348 	 */
349 	{ PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
350 };
351 
352 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
353 		/* Memory controller */
354 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
355 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
356 			/* Exists only for RDIMM */
357 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
358 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
359 
360 		/* Channel 0 */
361 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
362 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
363 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
364 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
365 
366 		/* Channel 1 */
367 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
368 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
369 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
370 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
371 
372 		/* Channel 2 */
373 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
374 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
375 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
376 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
377 
378 		/* Generic Non-core registers */
379 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
380 
381 };
382 
383 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
384 static const struct pci_id_table pci_dev_table[] = {
385 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
386 	PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
387 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
388 	{0,}			/* 0 terminated list. */
389 };
390 
391 /*
392  *	pci_device_id	table for which devices we are looking for
393  */
394 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
395 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
396 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
397 	{0,}			/* 0 terminated list. */
398 };
399 
400 /****************************************************************************
401 			Anciliary status routines
402  ****************************************************************************/
403 
404 	/* MC_CONTROL bits */
405 #define CH_ACTIVE(pvt, ch)	((pvt)->info.mc_control & (1 << (8 + ch)))
406 #define ECCx8(pvt)		((pvt)->info.mc_control & (1 << 1))
407 
408 	/* MC_STATUS bits */
409 #define ECC_ENABLED(pvt)	((pvt)->info.mc_status & (1 << 4))
410 #define CH_DISABLED(pvt, ch)	((pvt)->info.mc_status & (1 << ch))
411 
412 	/* MC_MAX_DOD read functions */
413 static inline int numdimms(u32 dimms)
414 {
415 	return (dimms & 0x3) + 1;
416 }
417 
418 static inline int numrank(u32 rank)
419 {
420 	static int ranks[4] = { 1, 2, 4, -EINVAL };
421 
422 	return ranks[rank & 0x3];
423 }
424 
425 static inline int numbank(u32 bank)
426 {
427 	static int banks[4] = { 4, 8, 16, -EINVAL };
428 
429 	return banks[bank & 0x3];
430 }
431 
432 static inline int numrow(u32 row)
433 {
434 	static int rows[8] = {
435 		1 << 12, 1 << 13, 1 << 14, 1 << 15,
436 		1 << 16, -EINVAL, -EINVAL, -EINVAL,
437 	};
438 
439 	return rows[row & 0x7];
440 }
441 
442 static inline int numcol(u32 col)
443 {
444 	static int cols[8] = {
445 		1 << 10, 1 << 11, 1 << 12, -EINVAL,
446 	};
447 	return cols[col & 0x3];
448 }
449 
450 static struct i7core_dev *get_i7core_dev(u8 socket)
451 {
452 	struct i7core_dev *i7core_dev;
453 
454 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
455 		if (i7core_dev->socket == socket)
456 			return i7core_dev;
457 	}
458 
459 	return NULL;
460 }
461 
462 static struct i7core_dev *alloc_i7core_dev(u8 socket,
463 					   const struct pci_id_table *table)
464 {
465 	struct i7core_dev *i7core_dev;
466 
467 	i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
468 	if (!i7core_dev)
469 		return NULL;
470 
471 	i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
472 				   GFP_KERNEL);
473 	if (!i7core_dev->pdev) {
474 		kfree(i7core_dev);
475 		return NULL;
476 	}
477 
478 	i7core_dev->socket = socket;
479 	i7core_dev->n_devs = table->n_devs;
480 	list_add_tail(&i7core_dev->list, &i7core_edac_list);
481 
482 	return i7core_dev;
483 }
484 
485 static void free_i7core_dev(struct i7core_dev *i7core_dev)
486 {
487 	list_del(&i7core_dev->list);
488 	kfree(i7core_dev->pdev);
489 	kfree(i7core_dev);
490 }
491 
492 /****************************************************************************
493 			Memory check routines
494  ****************************************************************************/
495 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
496 					  unsigned func)
497 {
498 	struct i7core_dev *i7core_dev = get_i7core_dev(socket);
499 	int i;
500 
501 	if (!i7core_dev)
502 		return NULL;
503 
504 	for (i = 0; i < i7core_dev->n_devs; i++) {
505 		if (!i7core_dev->pdev[i])
506 			continue;
507 
508 		if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
509 		    PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
510 			return i7core_dev->pdev[i];
511 		}
512 	}
513 
514 	return NULL;
515 }
516 
517 /**
518  * i7core_get_active_channels() - gets the number of channels and csrows
519  * @socket:	Quick Path Interconnect socket
520  * @channels:	Number of channels that will be returned
521  * @csrows:	Number of csrows found
522  *
523  * Since EDAC core needs to know in advance the number of available channels
524  * and csrows, in order to allocate memory for csrows/channels, it is needed
525  * to run two similar steps. At the first step, implemented on this function,
526  * it checks the number of csrows/channels present at one socket.
527  * this is used in order to properly allocate the size of mci components.
528  *
529  * It should be noticed that none of the current available datasheets explain
530  * or even mention how csrows are seen by the memory controller. So, we need
531  * to add a fake description for csrows.
532  * So, this driver is attributing one DIMM memory for one csrow.
533  */
534 static int i7core_get_active_channels(const u8 socket, unsigned *channels,
535 				      unsigned *csrows)
536 {
537 	struct pci_dev *pdev = NULL;
538 	int i, j;
539 	u32 status, control;
540 
541 	*channels = 0;
542 	*csrows = 0;
543 
544 	pdev = get_pdev_slot_func(socket, 3, 0);
545 	if (!pdev) {
546 		i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
547 			      socket);
548 		return -ENODEV;
549 	}
550 
551 	/* Device 3 function 0 reads */
552 	pci_read_config_dword(pdev, MC_STATUS, &status);
553 	pci_read_config_dword(pdev, MC_CONTROL, &control);
554 
555 	for (i = 0; i < NUM_CHANS; i++) {
556 		u32 dimm_dod[3];
557 		/* Check if the channel is active */
558 		if (!(control & (1 << (8 + i))))
559 			continue;
560 
561 		/* Check if the channel is disabled */
562 		if (status & (1 << i))
563 			continue;
564 
565 		pdev = get_pdev_slot_func(socket, i + 4, 1);
566 		if (!pdev) {
567 			i7core_printk(KERN_ERR, "Couldn't find socket %d "
568 						"fn %d.%d!!!\n",
569 						socket, i + 4, 1);
570 			return -ENODEV;
571 		}
572 		/* Devices 4-6 function 1 */
573 		pci_read_config_dword(pdev,
574 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
575 		pci_read_config_dword(pdev,
576 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
577 		pci_read_config_dword(pdev,
578 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
579 
580 		(*channels)++;
581 
582 		for (j = 0; j < 3; j++) {
583 			if (!DIMM_PRESENT(dimm_dod[j]))
584 				continue;
585 			(*csrows)++;
586 		}
587 	}
588 
589 	debugf0("Number of active channels on socket %d: %d\n",
590 		socket, *channels);
591 
592 	return 0;
593 }
594 
595 static int get_dimm_config(const struct mem_ctl_info *mci)
596 {
597 	struct i7core_pvt *pvt = mci->pvt_info;
598 	struct csrow_info *csr;
599 	struct pci_dev *pdev;
600 	int i, j;
601 	int csrow = 0;
602 	unsigned long last_page = 0;
603 	enum edac_type mode;
604 	enum mem_type mtype;
605 
606 	/* Get data from the MC register, function 0 */
607 	pdev = pvt->pci_mcr[0];
608 	if (!pdev)
609 		return -ENODEV;
610 
611 	/* Device 3 function 0 reads */
612 	pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
613 	pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
614 	pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
615 	pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
616 
617 	debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
618 		pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
619 		pvt->info.max_dod, pvt->info.ch_map);
620 
621 	if (ECC_ENABLED(pvt)) {
622 		debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
623 		if (ECCx8(pvt))
624 			mode = EDAC_S8ECD8ED;
625 		else
626 			mode = EDAC_S4ECD4ED;
627 	} else {
628 		debugf0("ECC disabled\n");
629 		mode = EDAC_NONE;
630 	}
631 
632 	/* FIXME: need to handle the error codes */
633 	debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
634 		"x%x x 0x%x\n",
635 		numdimms(pvt->info.max_dod),
636 		numrank(pvt->info.max_dod >> 2),
637 		numbank(pvt->info.max_dod >> 4),
638 		numrow(pvt->info.max_dod >> 6),
639 		numcol(pvt->info.max_dod >> 9));
640 
641 	for (i = 0; i < NUM_CHANS; i++) {
642 		u32 data, dimm_dod[3], value[8];
643 
644 		if (!pvt->pci_ch[i][0])
645 			continue;
646 
647 		if (!CH_ACTIVE(pvt, i)) {
648 			debugf0("Channel %i is not active\n", i);
649 			continue;
650 		}
651 		if (CH_DISABLED(pvt, i)) {
652 			debugf0("Channel %i is disabled\n", i);
653 			continue;
654 		}
655 
656 		/* Devices 4-6 function 0 */
657 		pci_read_config_dword(pvt->pci_ch[i][0],
658 				MC_CHANNEL_DIMM_INIT_PARAMS, &data);
659 
660 		pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
661 						4 : 2;
662 
663 		if (data & REGISTERED_DIMM)
664 			mtype = MEM_RDDR3;
665 		else
666 			mtype = MEM_DDR3;
667 #if 0
668 		if (data & THREE_DIMMS_PRESENT)
669 			pvt->channel[i].dimms = 3;
670 		else if (data & SINGLE_QUAD_RANK_PRESENT)
671 			pvt->channel[i].dimms = 1;
672 		else
673 			pvt->channel[i].dimms = 2;
674 #endif
675 
676 		/* Devices 4-6 function 1 */
677 		pci_read_config_dword(pvt->pci_ch[i][1],
678 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
679 		pci_read_config_dword(pvt->pci_ch[i][1],
680 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
681 		pci_read_config_dword(pvt->pci_ch[i][1],
682 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
683 
684 		debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
685 			"%d ranks, %cDIMMs\n",
686 			i,
687 			RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
688 			data,
689 			pvt->channel[i].ranks,
690 			(data & REGISTERED_DIMM) ? 'R' : 'U');
691 
692 		for (j = 0; j < 3; j++) {
693 			u32 banks, ranks, rows, cols;
694 			u32 size, npages;
695 
696 			if (!DIMM_PRESENT(dimm_dod[j]))
697 				continue;
698 
699 			banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
700 			ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
701 			rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
702 			cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
703 
704 			/* DDR3 has 8 I/O banks */
705 			size = (rows * cols * banks * ranks) >> (20 - 3);
706 
707 			pvt->channel[i].dimms++;
708 
709 			debugf0("\tdimm %d %d Mb offset: %x, "
710 				"bank: %d, rank: %d, row: %#x, col: %#x\n",
711 				j, size,
712 				RANKOFFSET(dimm_dod[j]),
713 				banks, ranks, rows, cols);
714 
715 			npages = MiB_TO_PAGES(size);
716 
717 			csr = &mci->csrows[csrow];
718 			csr->first_page = last_page + 1;
719 			last_page += npages;
720 			csr->last_page = last_page;
721 			csr->nr_pages = npages;
722 
723 			csr->page_mask = 0;
724 			csr->grain = 8;
725 			csr->csrow_idx = csrow;
726 			csr->nr_channels = 1;
727 
728 			csr->channels[0].chan_idx = i;
729 			csr->channels[0].ce_count = 0;
730 
731 			pvt->csrow_map[i][j] = csrow;
732 
733 			switch (banks) {
734 			case 4:
735 				csr->dtype = DEV_X4;
736 				break;
737 			case 8:
738 				csr->dtype = DEV_X8;
739 				break;
740 			case 16:
741 				csr->dtype = DEV_X16;
742 				break;
743 			default:
744 				csr->dtype = DEV_UNKNOWN;
745 			}
746 
747 			csr->edac_mode = mode;
748 			csr->mtype = mtype;
749 			snprintf(csr->channels[0].label,
750 					sizeof(csr->channels[0].label),
751 					"CPU#%uChannel#%u_DIMM#%u",
752 					pvt->i7core_dev->socket, i, j);
753 
754 			csrow++;
755 		}
756 
757 		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
758 		pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
759 		pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
760 		pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
761 		pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
762 		pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
763 		pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
764 		pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
765 		debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
766 		for (j = 0; j < 8; j++)
767 			debugf1("\t\t%#x\t%#x\t%#x\n",
768 				(value[j] >> 27) & 0x1,
769 				(value[j] >> 24) & 0x7,
770 				(value[j] & ((1 << 24) - 1)));
771 	}
772 
773 	return 0;
774 }
775 
776 /****************************************************************************
777 			Error insertion routines
778  ****************************************************************************/
779 
780 /* The i7core has independent error injection features per channel.
781    However, to have a simpler code, we don't allow enabling error injection
782    on more than one channel.
783    Also, since a change at an inject parameter will be applied only at enable,
784    we're disabling error injection on all write calls to the sysfs nodes that
785    controls the error code injection.
786  */
787 static int disable_inject(const struct mem_ctl_info *mci)
788 {
789 	struct i7core_pvt *pvt = mci->pvt_info;
790 
791 	pvt->inject.enable = 0;
792 
793 	if (!pvt->pci_ch[pvt->inject.channel][0])
794 		return -ENODEV;
795 
796 	pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
797 				MC_CHANNEL_ERROR_INJECT, 0);
798 
799 	return 0;
800 }
801 
802 /*
803  * i7core inject inject.section
804  *
805  *	accept and store error injection inject.section value
806  *	bit 0 - refers to the lower 32-byte half cacheline
807  *	bit 1 - refers to the upper 32-byte half cacheline
808  */
809 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
810 					   const char *data, size_t count)
811 {
812 	struct i7core_pvt *pvt = mci->pvt_info;
813 	unsigned long value;
814 	int rc;
815 
816 	if (pvt->inject.enable)
817 		disable_inject(mci);
818 
819 	rc = strict_strtoul(data, 10, &value);
820 	if ((rc < 0) || (value > 3))
821 		return -EIO;
822 
823 	pvt->inject.section = (u32) value;
824 	return count;
825 }
826 
827 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
828 					      char *data)
829 {
830 	struct i7core_pvt *pvt = mci->pvt_info;
831 	return sprintf(data, "0x%08x\n", pvt->inject.section);
832 }
833 
834 /*
835  * i7core inject.type
836  *
837  *	accept and store error injection inject.section value
838  *	bit 0 - repeat enable - Enable error repetition
839  *	bit 1 - inject ECC error
840  *	bit 2 - inject parity error
841  */
842 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
843 					const char *data, size_t count)
844 {
845 	struct i7core_pvt *pvt = mci->pvt_info;
846 	unsigned long value;
847 	int rc;
848 
849 	if (pvt->inject.enable)
850 		disable_inject(mci);
851 
852 	rc = strict_strtoul(data, 10, &value);
853 	if ((rc < 0) || (value > 7))
854 		return -EIO;
855 
856 	pvt->inject.type = (u32) value;
857 	return count;
858 }
859 
860 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
861 					      char *data)
862 {
863 	struct i7core_pvt *pvt = mci->pvt_info;
864 	return sprintf(data, "0x%08x\n", pvt->inject.type);
865 }
866 
867 /*
868  * i7core_inject_inject.eccmask_store
869  *
870  * The type of error (UE/CE) will depend on the inject.eccmask value:
871  *   Any bits set to a 1 will flip the corresponding ECC bit
872  *   Correctable errors can be injected by flipping 1 bit or the bits within
873  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
874  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
875  *   uncorrectable error to be injected.
876  */
877 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
878 					const char *data, size_t count)
879 {
880 	struct i7core_pvt *pvt = mci->pvt_info;
881 	unsigned long value;
882 	int rc;
883 
884 	if (pvt->inject.enable)
885 		disable_inject(mci);
886 
887 	rc = strict_strtoul(data, 10, &value);
888 	if (rc < 0)
889 		return -EIO;
890 
891 	pvt->inject.eccmask = (u32) value;
892 	return count;
893 }
894 
895 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
896 					      char *data)
897 {
898 	struct i7core_pvt *pvt = mci->pvt_info;
899 	return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
900 }
901 
902 /*
903  * i7core_addrmatch
904  *
905  * The type of error (UE/CE) will depend on the inject.eccmask value:
906  *   Any bits set to a 1 will flip the corresponding ECC bit
907  *   Correctable errors can be injected by flipping 1 bit or the bits within
908  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
909  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
910  *   uncorrectable error to be injected.
911  */
912 
913 #define DECLARE_ADDR_MATCH(param, limit)			\
914 static ssize_t i7core_inject_store_##param(			\
915 		struct mem_ctl_info *mci,			\
916 		const char *data, size_t count)			\
917 {								\
918 	struct i7core_pvt *pvt;					\
919 	long value;						\
920 	int rc;							\
921 								\
922 	debugf1("%s()\n", __func__);				\
923 	pvt = mci->pvt_info;					\
924 								\
925 	if (pvt->inject.enable)					\
926 		disable_inject(mci);				\
927 								\
928 	if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
929 		value = -1;					\
930 	else {							\
931 		rc = strict_strtoul(data, 10, &value);		\
932 		if ((rc < 0) || (value >= limit))		\
933 			return -EIO;				\
934 	}							\
935 								\
936 	pvt->inject.param = value;				\
937 								\
938 	return count;						\
939 }								\
940 								\
941 static ssize_t i7core_inject_show_##param(			\
942 		struct mem_ctl_info *mci,			\
943 		char *data)					\
944 {								\
945 	struct i7core_pvt *pvt;					\
946 								\
947 	pvt = mci->pvt_info;					\
948 	debugf1("%s() pvt=%p\n", __func__, pvt);		\
949 	if (pvt->inject.param < 0)				\
950 		return sprintf(data, "any\n");			\
951 	else							\
952 		return sprintf(data, "%d\n", pvt->inject.param);\
953 }
954 
955 #define ATTR_ADDR_MATCH(param)					\
956 	{							\
957 		.attr = {					\
958 			.name = #param,				\
959 			.mode = (S_IRUGO | S_IWUSR)		\
960 		},						\
961 		.show  = i7core_inject_show_##param,		\
962 		.store = i7core_inject_store_##param,		\
963 	}
964 
965 DECLARE_ADDR_MATCH(channel, 3);
966 DECLARE_ADDR_MATCH(dimm, 3);
967 DECLARE_ADDR_MATCH(rank, 4);
968 DECLARE_ADDR_MATCH(bank, 32);
969 DECLARE_ADDR_MATCH(page, 0x10000);
970 DECLARE_ADDR_MATCH(col, 0x4000);
971 
972 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
973 {
974 	u32 read;
975 	int count;
976 
977 	debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
978 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
979 		where, val);
980 
981 	for (count = 0; count < 10; count++) {
982 		if (count)
983 			msleep(100);
984 		pci_write_config_dword(dev, where, val);
985 		pci_read_config_dword(dev, where, &read);
986 
987 		if (read == val)
988 			return 0;
989 	}
990 
991 	i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
992 		"write=%08x. Read=%08x\n",
993 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
994 		where, val, read);
995 
996 	return -EINVAL;
997 }
998 
999 /*
1000  * This routine prepares the Memory Controller for error injection.
1001  * The error will be injected when some process tries to write to the
1002  * memory that matches the given criteria.
1003  * The criteria can be set in terms of a mask where dimm, rank, bank, page
1004  * and col can be specified.
1005  * A -1 value for any of the mask items will make the MCU to ignore
1006  * that matching criteria for error injection.
1007  *
1008  * It should be noticed that the error will only happen after a write operation
1009  * on a memory that matches the condition. if REPEAT_EN is not enabled at
1010  * inject mask, then it will produce just one error. Otherwise, it will repeat
1011  * until the injectmask would be cleaned.
1012  *
1013  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
1014  *    is reliable enough to check if the MC is using the
1015  *    three channels. However, this is not clear at the datasheet.
1016  */
1017 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
1018 				       const char *data, size_t count)
1019 {
1020 	struct i7core_pvt *pvt = mci->pvt_info;
1021 	u32 injectmask;
1022 	u64 mask = 0;
1023 	int  rc;
1024 	long enable;
1025 
1026 	if (!pvt->pci_ch[pvt->inject.channel][0])
1027 		return 0;
1028 
1029 	rc = strict_strtoul(data, 10, &enable);
1030 	if ((rc < 0))
1031 		return 0;
1032 
1033 	if (enable) {
1034 		pvt->inject.enable = 1;
1035 	} else {
1036 		disable_inject(mci);
1037 		return count;
1038 	}
1039 
1040 	/* Sets pvt->inject.dimm mask */
1041 	if (pvt->inject.dimm < 0)
1042 		mask |= 1LL << 41;
1043 	else {
1044 		if (pvt->channel[pvt->inject.channel].dimms > 2)
1045 			mask |= (pvt->inject.dimm & 0x3LL) << 35;
1046 		else
1047 			mask |= (pvt->inject.dimm & 0x1LL) << 36;
1048 	}
1049 
1050 	/* Sets pvt->inject.rank mask */
1051 	if (pvt->inject.rank < 0)
1052 		mask |= 1LL << 40;
1053 	else {
1054 		if (pvt->channel[pvt->inject.channel].dimms > 2)
1055 			mask |= (pvt->inject.rank & 0x1LL) << 34;
1056 		else
1057 			mask |= (pvt->inject.rank & 0x3LL) << 34;
1058 	}
1059 
1060 	/* Sets pvt->inject.bank mask */
1061 	if (pvt->inject.bank < 0)
1062 		mask |= 1LL << 39;
1063 	else
1064 		mask |= (pvt->inject.bank & 0x15LL) << 30;
1065 
1066 	/* Sets pvt->inject.page mask */
1067 	if (pvt->inject.page < 0)
1068 		mask |= 1LL << 38;
1069 	else
1070 		mask |= (pvt->inject.page & 0xffff) << 14;
1071 
1072 	/* Sets pvt->inject.column mask */
1073 	if (pvt->inject.col < 0)
1074 		mask |= 1LL << 37;
1075 	else
1076 		mask |= (pvt->inject.col & 0x3fff);
1077 
1078 	/*
1079 	 * bit    0: REPEAT_EN
1080 	 * bits 1-2: MASK_HALF_CACHELINE
1081 	 * bit    3: INJECT_ECC
1082 	 * bit    4: INJECT_ADDR_PARITY
1083 	 */
1084 
1085 	injectmask = (pvt->inject.type & 1) |
1086 		     (pvt->inject.section & 0x3) << 1 |
1087 		     (pvt->inject.type & 0x6) << (3 - 1);
1088 
1089 	/* Unlock writes to registers - this register is write only */
1090 	pci_write_config_dword(pvt->pci_noncore,
1091 			       MC_CFG_CONTROL, 0x2);
1092 
1093 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1094 			       MC_CHANNEL_ADDR_MATCH, mask);
1095 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1096 			       MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1097 
1098 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1099 			       MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1100 
1101 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1102 			       MC_CHANNEL_ERROR_INJECT, injectmask);
1103 
1104 	/*
1105 	 * This is something undocumented, based on my tests
1106 	 * Without writing 8 to this register, errors aren't injected. Not sure
1107 	 * why.
1108 	 */
1109 	pci_write_config_dword(pvt->pci_noncore,
1110 			       MC_CFG_CONTROL, 8);
1111 
1112 	debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1113 		" inject 0x%08x\n",
1114 		mask, pvt->inject.eccmask, injectmask);
1115 
1116 
1117 	return count;
1118 }
1119 
1120 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1121 					char *data)
1122 {
1123 	struct i7core_pvt *pvt = mci->pvt_info;
1124 	u32 injectmask;
1125 
1126 	if (!pvt->pci_ch[pvt->inject.channel][0])
1127 		return 0;
1128 
1129 	pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1130 			       MC_CHANNEL_ERROR_INJECT, &injectmask);
1131 
1132 	debugf0("Inject error read: 0x%018x\n", injectmask);
1133 
1134 	if (injectmask & 0x0c)
1135 		pvt->inject.enable = 1;
1136 
1137 	return sprintf(data, "%d\n", pvt->inject.enable);
1138 }
1139 
1140 #define DECLARE_COUNTER(param)					\
1141 static ssize_t i7core_show_counter_##param(			\
1142 		struct mem_ctl_info *mci,			\
1143 		char *data)					\
1144 {								\
1145 	struct i7core_pvt *pvt = mci->pvt_info;			\
1146 								\
1147 	debugf1("%s() \n", __func__);				\
1148 	if (!pvt->ce_count_available || (pvt->is_registered))	\
1149 		return sprintf(data, "data unavailable\n");	\
1150 	return sprintf(data, "%lu\n",				\
1151 			pvt->udimm_ce_count[param]);		\
1152 }
1153 
1154 #define ATTR_COUNTER(param)					\
1155 	{							\
1156 		.attr = {					\
1157 			.name = __stringify(udimm##param),	\
1158 			.mode = (S_IRUGO | S_IWUSR)		\
1159 		},						\
1160 		.show  = i7core_show_counter_##param		\
1161 	}
1162 
1163 DECLARE_COUNTER(0);
1164 DECLARE_COUNTER(1);
1165 DECLARE_COUNTER(2);
1166 
1167 /*
1168  * Sysfs struct
1169  */
1170 
1171 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1172 	ATTR_ADDR_MATCH(channel),
1173 	ATTR_ADDR_MATCH(dimm),
1174 	ATTR_ADDR_MATCH(rank),
1175 	ATTR_ADDR_MATCH(bank),
1176 	ATTR_ADDR_MATCH(page),
1177 	ATTR_ADDR_MATCH(col),
1178 	{ } /* End of list */
1179 };
1180 
1181 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1182 	.name  = "inject_addrmatch",
1183 	.mcidev_attr = i7core_addrmatch_attrs,
1184 };
1185 
1186 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1187 	ATTR_COUNTER(0),
1188 	ATTR_COUNTER(1),
1189 	ATTR_COUNTER(2),
1190 	{ .attr = { .name = NULL } }
1191 };
1192 
1193 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1194 	.name  = "all_channel_counts",
1195 	.mcidev_attr = i7core_udimm_counters_attrs,
1196 };
1197 
1198 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1199 	{
1200 		.attr = {
1201 			.name = "inject_section",
1202 			.mode = (S_IRUGO | S_IWUSR)
1203 		},
1204 		.show  = i7core_inject_section_show,
1205 		.store = i7core_inject_section_store,
1206 	}, {
1207 		.attr = {
1208 			.name = "inject_type",
1209 			.mode = (S_IRUGO | S_IWUSR)
1210 		},
1211 		.show  = i7core_inject_type_show,
1212 		.store = i7core_inject_type_store,
1213 	}, {
1214 		.attr = {
1215 			.name = "inject_eccmask",
1216 			.mode = (S_IRUGO | S_IWUSR)
1217 		},
1218 		.show  = i7core_inject_eccmask_show,
1219 		.store = i7core_inject_eccmask_store,
1220 	}, {
1221 		.grp = &i7core_inject_addrmatch,
1222 	}, {
1223 		.attr = {
1224 			.name = "inject_enable",
1225 			.mode = (S_IRUGO | S_IWUSR)
1226 		},
1227 		.show  = i7core_inject_enable_show,
1228 		.store = i7core_inject_enable_store,
1229 	},
1230 	{ }	/* End of list */
1231 };
1232 
1233 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1234 	{
1235 		.attr = {
1236 			.name = "inject_section",
1237 			.mode = (S_IRUGO | S_IWUSR)
1238 		},
1239 		.show  = i7core_inject_section_show,
1240 		.store = i7core_inject_section_store,
1241 	}, {
1242 		.attr = {
1243 			.name = "inject_type",
1244 			.mode = (S_IRUGO | S_IWUSR)
1245 		},
1246 		.show  = i7core_inject_type_show,
1247 		.store = i7core_inject_type_store,
1248 	}, {
1249 		.attr = {
1250 			.name = "inject_eccmask",
1251 			.mode = (S_IRUGO | S_IWUSR)
1252 		},
1253 		.show  = i7core_inject_eccmask_show,
1254 		.store = i7core_inject_eccmask_store,
1255 	}, {
1256 		.grp = &i7core_inject_addrmatch,
1257 	}, {
1258 		.attr = {
1259 			.name = "inject_enable",
1260 			.mode = (S_IRUGO | S_IWUSR)
1261 		},
1262 		.show  = i7core_inject_enable_show,
1263 		.store = i7core_inject_enable_store,
1264 	}, {
1265 		.grp = &i7core_udimm_counters,
1266 	},
1267 	{ }	/* End of list */
1268 };
1269 
1270 /****************************************************************************
1271 	Device initialization routines: put/get, init/exit
1272  ****************************************************************************/
1273 
1274 /*
1275  *	i7core_put_all_devices	'put' all the devices that we have
1276  *				reserved via 'get'
1277  */
1278 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1279 {
1280 	int i;
1281 
1282 	debugf0(__FILE__ ": %s()\n", __func__);
1283 	for (i = 0; i < i7core_dev->n_devs; i++) {
1284 		struct pci_dev *pdev = i7core_dev->pdev[i];
1285 		if (!pdev)
1286 			continue;
1287 		debugf0("Removing dev %02x:%02x.%d\n",
1288 			pdev->bus->number,
1289 			PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1290 		pci_dev_put(pdev);
1291 	}
1292 }
1293 
1294 static void i7core_put_all_devices(void)
1295 {
1296 	struct i7core_dev *i7core_dev, *tmp;
1297 
1298 	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1299 		i7core_put_devices(i7core_dev);
1300 		free_i7core_dev(i7core_dev);
1301 	}
1302 }
1303 
1304 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1305 {
1306 	struct pci_dev *pdev = NULL;
1307 	int i;
1308 
1309 	/*
1310 	 * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1311 	 * aren't announced by acpi. So, we need to use a legacy scan probing
1312 	 * to detect them
1313 	 */
1314 	while (table && table->descr) {
1315 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1316 		if (unlikely(!pdev)) {
1317 			for (i = 0; i < MAX_SOCKET_BUSES; i++)
1318 				pcibios_scan_specific_bus(255-i);
1319 		}
1320 		pci_dev_put(pdev);
1321 		table++;
1322 	}
1323 }
1324 
1325 static unsigned i7core_pci_lastbus(void)
1326 {
1327 	int last_bus = 0, bus;
1328 	struct pci_bus *b = NULL;
1329 
1330 	while ((b = pci_find_next_bus(b)) != NULL) {
1331 		bus = b->number;
1332 		debugf0("Found bus %d\n", bus);
1333 		if (bus > last_bus)
1334 			last_bus = bus;
1335 	}
1336 
1337 	debugf0("Last bus %d\n", last_bus);
1338 
1339 	return last_bus;
1340 }
1341 
1342 /*
1343  *	i7core_get_all_devices	Find and perform 'get' operation on the MCH's
1344  *			device/functions we want to reference for this driver
1345  *
1346  *			Need to 'get' device 16 func 1 and func 2
1347  */
1348 static int i7core_get_onedevice(struct pci_dev **prev,
1349 				const struct pci_id_table *table,
1350 				const unsigned devno,
1351 				const unsigned last_bus)
1352 {
1353 	struct i7core_dev *i7core_dev;
1354 	const struct pci_id_descr *dev_descr = &table->descr[devno];
1355 
1356 	struct pci_dev *pdev = NULL;
1357 	u8 bus = 0;
1358 	u8 socket = 0;
1359 
1360 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1361 			      dev_descr->dev_id, *prev);
1362 
1363 	/*
1364 	 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1365 	 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1366 	 * to probe for the alternate address in case of failure
1367 	 */
1368 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1369 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1370 				      PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1371 
1372 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1373 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1374 				      PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1375 				      *prev);
1376 
1377 	if (!pdev) {
1378 		if (*prev) {
1379 			*prev = pdev;
1380 			return 0;
1381 		}
1382 
1383 		if (dev_descr->optional)
1384 			return 0;
1385 
1386 		if (devno == 0)
1387 			return -ENODEV;
1388 
1389 		i7core_printk(KERN_INFO,
1390 			"Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1391 			dev_descr->dev, dev_descr->func,
1392 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1393 
1394 		/* End of list, leave */
1395 		return -ENODEV;
1396 	}
1397 	bus = pdev->bus->number;
1398 
1399 	socket = last_bus - bus;
1400 
1401 	i7core_dev = get_i7core_dev(socket);
1402 	if (!i7core_dev) {
1403 		i7core_dev = alloc_i7core_dev(socket, table);
1404 		if (!i7core_dev) {
1405 			pci_dev_put(pdev);
1406 			return -ENOMEM;
1407 		}
1408 	}
1409 
1410 	if (i7core_dev->pdev[devno]) {
1411 		i7core_printk(KERN_ERR,
1412 			"Duplicated device for "
1413 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1414 			bus, dev_descr->dev, dev_descr->func,
1415 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1416 		pci_dev_put(pdev);
1417 		return -ENODEV;
1418 	}
1419 
1420 	i7core_dev->pdev[devno] = pdev;
1421 
1422 	/* Sanity check */
1423 	if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1424 			PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1425 		i7core_printk(KERN_ERR,
1426 			"Device PCI ID %04x:%04x "
1427 			"has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1428 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1429 			bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1430 			bus, dev_descr->dev, dev_descr->func);
1431 		return -ENODEV;
1432 	}
1433 
1434 	/* Be sure that the device is enabled */
1435 	if (unlikely(pci_enable_device(pdev) < 0)) {
1436 		i7core_printk(KERN_ERR,
1437 			"Couldn't enable "
1438 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1439 			bus, dev_descr->dev, dev_descr->func,
1440 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1441 		return -ENODEV;
1442 	}
1443 
1444 	debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1445 		socket, bus, dev_descr->dev,
1446 		dev_descr->func,
1447 		PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1448 
1449 	/*
1450 	 * As stated on drivers/pci/search.c, the reference count for
1451 	 * @from is always decremented if it is not %NULL. So, as we need
1452 	 * to get all devices up to null, we need to do a get for the device
1453 	 */
1454 	pci_dev_get(pdev);
1455 
1456 	*prev = pdev;
1457 
1458 	return 0;
1459 }
1460 
1461 static int i7core_get_all_devices(void)
1462 {
1463 	int i, rc, last_bus;
1464 	struct pci_dev *pdev = NULL;
1465 	const struct pci_id_table *table = pci_dev_table;
1466 
1467 	last_bus = i7core_pci_lastbus();
1468 
1469 	while (table && table->descr) {
1470 		for (i = 0; i < table->n_devs; i++) {
1471 			pdev = NULL;
1472 			do {
1473 				rc = i7core_get_onedevice(&pdev, table, i,
1474 							  last_bus);
1475 				if (rc < 0) {
1476 					if (i == 0) {
1477 						i = table->n_devs;
1478 						break;
1479 					}
1480 					i7core_put_all_devices();
1481 					return -ENODEV;
1482 				}
1483 			} while (pdev);
1484 		}
1485 		table++;
1486 	}
1487 
1488 	return 0;
1489 }
1490 
1491 static int mci_bind_devs(struct mem_ctl_info *mci,
1492 			 struct i7core_dev *i7core_dev)
1493 {
1494 	struct i7core_pvt *pvt = mci->pvt_info;
1495 	struct pci_dev *pdev;
1496 	int i, func, slot;
1497 	char *family;
1498 
1499 	pvt->is_registered = false;
1500 	pvt->enable_scrub  = false;
1501 	for (i = 0; i < i7core_dev->n_devs; i++) {
1502 		pdev = i7core_dev->pdev[i];
1503 		if (!pdev)
1504 			continue;
1505 
1506 		func = PCI_FUNC(pdev->devfn);
1507 		slot = PCI_SLOT(pdev->devfn);
1508 		if (slot == 3) {
1509 			if (unlikely(func > MAX_MCR_FUNC))
1510 				goto error;
1511 			pvt->pci_mcr[func] = pdev;
1512 		} else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1513 			if (unlikely(func > MAX_CHAN_FUNC))
1514 				goto error;
1515 			pvt->pci_ch[slot - 4][func] = pdev;
1516 		} else if (!slot && !func) {
1517 			pvt->pci_noncore = pdev;
1518 
1519 			/* Detect the processor family */
1520 			switch (pdev->device) {
1521 			case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1522 				family = "Xeon 35xx/ i7core";
1523 				pvt->enable_scrub = false;
1524 				break;
1525 			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1526 				family = "i7-800/i5-700";
1527 				pvt->enable_scrub = false;
1528 				break;
1529 			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1530 				family = "Xeon 34xx";
1531 				pvt->enable_scrub = false;
1532 				break;
1533 			case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1534 				family = "Xeon 55xx";
1535 				pvt->enable_scrub = true;
1536 				break;
1537 			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1538 				family = "Xeon 56xx / i7-900";
1539 				pvt->enable_scrub = true;
1540 				break;
1541 			default:
1542 				family = "unknown";
1543 				pvt->enable_scrub = false;
1544 			}
1545 			debugf0("Detected a processor type %s\n", family);
1546 		} else
1547 			goto error;
1548 
1549 		debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1550 			PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1551 			pdev, i7core_dev->socket);
1552 
1553 		if (PCI_SLOT(pdev->devfn) == 3 &&
1554 			PCI_FUNC(pdev->devfn) == 2)
1555 			pvt->is_registered = true;
1556 	}
1557 
1558 	return 0;
1559 
1560 error:
1561 	i7core_printk(KERN_ERR, "Device %d, function %d "
1562 		      "is out of the expected range\n",
1563 		      slot, func);
1564 	return -EINVAL;
1565 }
1566 
1567 /****************************************************************************
1568 			Error check routines
1569  ****************************************************************************/
1570 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1571 				      const int chan,
1572 				      const int dimm,
1573 				      const int add)
1574 {
1575 	char *msg;
1576 	struct i7core_pvt *pvt = mci->pvt_info;
1577 	int row = pvt->csrow_map[chan][dimm], i;
1578 
1579 	for (i = 0; i < add; i++) {
1580 		msg = kasprintf(GFP_KERNEL, "Corrected error "
1581 				"(Socket=%d channel=%d dimm=%d)",
1582 				pvt->i7core_dev->socket, chan, dimm);
1583 
1584 		edac_mc_handle_fbd_ce(mci, row, 0, msg);
1585 		kfree (msg);
1586 	}
1587 }
1588 
1589 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1590 					 const int chan,
1591 					 const int new0,
1592 					 const int new1,
1593 					 const int new2)
1594 {
1595 	struct i7core_pvt *pvt = mci->pvt_info;
1596 	int add0 = 0, add1 = 0, add2 = 0;
1597 	/* Updates CE counters if it is not the first time here */
1598 	if (pvt->ce_count_available) {
1599 		/* Updates CE counters */
1600 
1601 		add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1602 		add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1603 		add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1604 
1605 		if (add2 < 0)
1606 			add2 += 0x7fff;
1607 		pvt->rdimm_ce_count[chan][2] += add2;
1608 
1609 		if (add1 < 0)
1610 			add1 += 0x7fff;
1611 		pvt->rdimm_ce_count[chan][1] += add1;
1612 
1613 		if (add0 < 0)
1614 			add0 += 0x7fff;
1615 		pvt->rdimm_ce_count[chan][0] += add0;
1616 	} else
1617 		pvt->ce_count_available = 1;
1618 
1619 	/* Store the new values */
1620 	pvt->rdimm_last_ce_count[chan][2] = new2;
1621 	pvt->rdimm_last_ce_count[chan][1] = new1;
1622 	pvt->rdimm_last_ce_count[chan][0] = new0;
1623 
1624 	/*updated the edac core */
1625 	if (add0 != 0)
1626 		i7core_rdimm_update_csrow(mci, chan, 0, add0);
1627 	if (add1 != 0)
1628 		i7core_rdimm_update_csrow(mci, chan, 1, add1);
1629 	if (add2 != 0)
1630 		i7core_rdimm_update_csrow(mci, chan, 2, add2);
1631 
1632 }
1633 
1634 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1635 {
1636 	struct i7core_pvt *pvt = mci->pvt_info;
1637 	u32 rcv[3][2];
1638 	int i, new0, new1, new2;
1639 
1640 	/*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1641 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1642 								&rcv[0][0]);
1643 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1644 								&rcv[0][1]);
1645 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1646 								&rcv[1][0]);
1647 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1648 								&rcv[1][1]);
1649 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1650 								&rcv[2][0]);
1651 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1652 								&rcv[2][1]);
1653 	for (i = 0 ; i < 3; i++) {
1654 		debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1655 			(i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1656 		/*if the channel has 3 dimms*/
1657 		if (pvt->channel[i].dimms > 2) {
1658 			new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1659 			new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1660 			new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1661 		} else {
1662 			new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1663 					DIMM_BOT_COR_ERR(rcv[i][0]);
1664 			new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1665 					DIMM_BOT_COR_ERR(rcv[i][1]);
1666 			new2 = 0;
1667 		}
1668 
1669 		i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1670 	}
1671 }
1672 
1673 /* This function is based on the device 3 function 4 registers as described on:
1674  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1675  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1676  * also available at:
1677  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1678  */
1679 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1680 {
1681 	struct i7core_pvt *pvt = mci->pvt_info;
1682 	u32 rcv1, rcv0;
1683 	int new0, new1, new2;
1684 
1685 	if (!pvt->pci_mcr[4]) {
1686 		debugf0("%s MCR registers not found\n", __func__);
1687 		return;
1688 	}
1689 
1690 	/* Corrected test errors */
1691 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1692 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1693 
1694 	/* Store the new values */
1695 	new2 = DIMM2_COR_ERR(rcv1);
1696 	new1 = DIMM1_COR_ERR(rcv0);
1697 	new0 = DIMM0_COR_ERR(rcv0);
1698 
1699 	/* Updates CE counters if it is not the first time here */
1700 	if (pvt->ce_count_available) {
1701 		/* Updates CE counters */
1702 		int add0, add1, add2;
1703 
1704 		add2 = new2 - pvt->udimm_last_ce_count[2];
1705 		add1 = new1 - pvt->udimm_last_ce_count[1];
1706 		add0 = new0 - pvt->udimm_last_ce_count[0];
1707 
1708 		if (add2 < 0)
1709 			add2 += 0x7fff;
1710 		pvt->udimm_ce_count[2] += add2;
1711 
1712 		if (add1 < 0)
1713 			add1 += 0x7fff;
1714 		pvt->udimm_ce_count[1] += add1;
1715 
1716 		if (add0 < 0)
1717 			add0 += 0x7fff;
1718 		pvt->udimm_ce_count[0] += add0;
1719 
1720 		if (add0 | add1 | add2)
1721 			i7core_printk(KERN_ERR, "New Corrected error(s): "
1722 				      "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1723 				      add0, add1, add2);
1724 	} else
1725 		pvt->ce_count_available = 1;
1726 
1727 	/* Store the new values */
1728 	pvt->udimm_last_ce_count[2] = new2;
1729 	pvt->udimm_last_ce_count[1] = new1;
1730 	pvt->udimm_last_ce_count[0] = new0;
1731 }
1732 
1733 /*
1734  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1735  * Architectures Software Developer’s Manual Volume 3B.
1736  * Nehalem are defined as family 0x06, model 0x1a
1737  *
1738  * The MCA registers used here are the following ones:
1739  *     struct mce field	MCA Register
1740  *     m->status	MSR_IA32_MC8_STATUS
1741  *     m->addr		MSR_IA32_MC8_ADDR
1742  *     m->misc		MSR_IA32_MC8_MISC
1743  * In the case of Nehalem, the error information is masked at .status and .misc
1744  * fields
1745  */
1746 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1747 				    const struct mce *m)
1748 {
1749 	struct i7core_pvt *pvt = mci->pvt_info;
1750 	char *type, *optype, *err, *msg;
1751 	unsigned long error = m->status & 0x1ff0000l;
1752 	u32 optypenum = (m->status >> 4) & 0x07;
1753 	u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1754 	u32 dimm = (m->misc >> 16) & 0x3;
1755 	u32 channel = (m->misc >> 18) & 0x3;
1756 	u32 syndrome = m->misc >> 32;
1757 	u32 errnum = find_first_bit(&error, 32);
1758 	int csrow;
1759 
1760 	if (m->mcgstatus & 1)
1761 		type = "FATAL";
1762 	else
1763 		type = "NON_FATAL";
1764 
1765 	switch (optypenum) {
1766 	case 0:
1767 		optype = "generic undef request";
1768 		break;
1769 	case 1:
1770 		optype = "read error";
1771 		break;
1772 	case 2:
1773 		optype = "write error";
1774 		break;
1775 	case 3:
1776 		optype = "addr/cmd error";
1777 		break;
1778 	case 4:
1779 		optype = "scrubbing error";
1780 		break;
1781 	default:
1782 		optype = "reserved";
1783 		break;
1784 	}
1785 
1786 	switch (errnum) {
1787 	case 16:
1788 		err = "read ECC error";
1789 		break;
1790 	case 17:
1791 		err = "RAS ECC error";
1792 		break;
1793 	case 18:
1794 		err = "write parity error";
1795 		break;
1796 	case 19:
1797 		err = "redundacy loss";
1798 		break;
1799 	case 20:
1800 		err = "reserved";
1801 		break;
1802 	case 21:
1803 		err = "memory range error";
1804 		break;
1805 	case 22:
1806 		err = "RTID out of range";
1807 		break;
1808 	case 23:
1809 		err = "address parity error";
1810 		break;
1811 	case 24:
1812 		err = "byte enable parity error";
1813 		break;
1814 	default:
1815 		err = "unknown";
1816 	}
1817 
1818 	/* FIXME: should convert addr into bank and rank information */
1819 	msg = kasprintf(GFP_ATOMIC,
1820 		"%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1821 		"syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1822 		type, (long long) m->addr, m->cpu, dimm, channel,
1823 		syndrome, core_err_cnt, (long long)m->status,
1824 		(long long)m->misc, optype, err);
1825 
1826 	debugf0("%s", msg);
1827 
1828 	csrow = pvt->csrow_map[channel][dimm];
1829 
1830 	/* Call the helper to output message */
1831 	if (m->mcgstatus & 1)
1832 		edac_mc_handle_fbd_ue(mci, csrow, 0,
1833 				0 /* FIXME: should be channel here */, msg);
1834 	else if (!pvt->is_registered)
1835 		edac_mc_handle_fbd_ce(mci, csrow,
1836 				0 /* FIXME: should be channel here */, msg);
1837 
1838 	kfree(msg);
1839 }
1840 
1841 /*
1842  *	i7core_check_error	Retrieve and process errors reported by the
1843  *				hardware. Called by the Core module.
1844  */
1845 static void i7core_check_error(struct mem_ctl_info *mci)
1846 {
1847 	struct i7core_pvt *pvt = mci->pvt_info;
1848 	int i;
1849 	unsigned count = 0;
1850 	struct mce *m;
1851 
1852 	/*
1853 	 * MCE first step: Copy all mce errors into a temporary buffer
1854 	 * We use a double buffering here, to reduce the risk of
1855 	 * losing an error.
1856 	 */
1857 	smp_rmb();
1858 	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1859 		% MCE_LOG_LEN;
1860 	if (!count)
1861 		goto check_ce_error;
1862 
1863 	m = pvt->mce_outentry;
1864 	if (pvt->mce_in + count > MCE_LOG_LEN) {
1865 		unsigned l = MCE_LOG_LEN - pvt->mce_in;
1866 
1867 		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1868 		smp_wmb();
1869 		pvt->mce_in = 0;
1870 		count -= l;
1871 		m += l;
1872 	}
1873 	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1874 	smp_wmb();
1875 	pvt->mce_in += count;
1876 
1877 	smp_rmb();
1878 	if (pvt->mce_overrun) {
1879 		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1880 			      pvt->mce_overrun);
1881 		smp_wmb();
1882 		pvt->mce_overrun = 0;
1883 	}
1884 
1885 	/*
1886 	 * MCE second step: parse errors and display
1887 	 */
1888 	for (i = 0; i < count; i++)
1889 		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1890 
1891 	/*
1892 	 * Now, let's increment CE error counts
1893 	 */
1894 check_ce_error:
1895 	if (!pvt->is_registered)
1896 		i7core_udimm_check_mc_ecc_err(mci);
1897 	else
1898 		i7core_rdimm_check_mc_ecc_err(mci);
1899 }
1900 
1901 /*
1902  * i7core_mce_check_error	Replicates mcelog routine to get errors
1903  *				This routine simply queues mcelog errors, and
1904  *				return. The error itself should be handled later
1905  *				by i7core_check_error.
1906  * WARNING: As this routine should be called at NMI time, extra care should
1907  * be taken to avoid deadlocks, and to be as fast as possible.
1908  */
1909 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1910 				  void *data)
1911 {
1912 	struct mce *mce = (struct mce *)data;
1913 	struct i7core_dev *i7_dev;
1914 	struct mem_ctl_info *mci;
1915 	struct i7core_pvt *pvt;
1916 
1917 	i7_dev = get_i7core_dev(mce->socketid);
1918 	if (!i7_dev)
1919 		return NOTIFY_BAD;
1920 
1921 	mci = i7_dev->mci;
1922 	pvt = mci->pvt_info;
1923 
1924 	/*
1925 	 * Just let mcelog handle it if the error is
1926 	 * outside the memory controller
1927 	 */
1928 	if (((mce->status & 0xffff) >> 7) != 1)
1929 		return NOTIFY_DONE;
1930 
1931 	/* Bank 8 registers are the only ones that we know how to handle */
1932 	if (mce->bank != 8)
1933 		return NOTIFY_DONE;
1934 
1935 #ifdef CONFIG_SMP
1936 	/* Only handle if it is the right mc controller */
1937 	if (mce->socketid != pvt->i7core_dev->socket)
1938 		return NOTIFY_DONE;
1939 #endif
1940 
1941 	smp_rmb();
1942 	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1943 		smp_wmb();
1944 		pvt->mce_overrun++;
1945 		return NOTIFY_DONE;
1946 	}
1947 
1948 	/* Copy memory error at the ringbuffer */
1949 	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1950 	smp_wmb();
1951 	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1952 
1953 	/* Handle fatal errors immediately */
1954 	if (mce->mcgstatus & 1)
1955 		i7core_check_error(mci);
1956 
1957 	/* Advise mcelog that the errors were handled */
1958 	return NOTIFY_STOP;
1959 }
1960 
1961 static struct notifier_block i7_mce_dec = {
1962 	.notifier_call	= i7core_mce_check_error,
1963 };
1964 
1965 struct memdev_dmi_entry {
1966 	u8 type;
1967 	u8 length;
1968 	u16 handle;
1969 	u16 phys_mem_array_handle;
1970 	u16 mem_err_info_handle;
1971 	u16 total_width;
1972 	u16 data_width;
1973 	u16 size;
1974 	u8 form;
1975 	u8 device_set;
1976 	u8 device_locator;
1977 	u8 bank_locator;
1978 	u8 memory_type;
1979 	u16 type_detail;
1980 	u16 speed;
1981 	u8 manufacturer;
1982 	u8 serial_number;
1983 	u8 asset_tag;
1984 	u8 part_number;
1985 	u8 attributes;
1986 	u32 extended_size;
1987 	u16 conf_mem_clk_speed;
1988 } __attribute__((__packed__));
1989 
1990 
1991 /*
1992  * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1993  * memory devices show the same speed, and if they don't then consider
1994  * all speeds to be invalid.
1995  */
1996 static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1997 {
1998 	int *dclk_freq = _dclk_freq;
1999 	u16 dmi_mem_clk_speed;
2000 
2001 	if (*dclk_freq == -1)
2002 		return;
2003 
2004 	if (dh->type == DMI_ENTRY_MEM_DEVICE) {
2005 		struct memdev_dmi_entry *memdev_dmi_entry =
2006 			(struct memdev_dmi_entry *)dh;
2007 		unsigned long conf_mem_clk_speed_offset =
2008 			(unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
2009 			(unsigned long)&memdev_dmi_entry->type;
2010 		unsigned long speed_offset =
2011 			(unsigned long)&memdev_dmi_entry->speed -
2012 			(unsigned long)&memdev_dmi_entry->type;
2013 
2014 		/* Check that a DIMM is present */
2015 		if (memdev_dmi_entry->size == 0)
2016 			return;
2017 
2018 		/*
2019 		 * Pick the configured speed if it's available, otherwise
2020 		 * pick the DIMM speed, or we don't have a speed.
2021 		 */
2022 		if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
2023 			dmi_mem_clk_speed =
2024 				memdev_dmi_entry->conf_mem_clk_speed;
2025 		} else if (memdev_dmi_entry->length > speed_offset) {
2026 			dmi_mem_clk_speed = memdev_dmi_entry->speed;
2027 		} else {
2028 			*dclk_freq = -1;
2029 			return;
2030 		}
2031 
2032 		if (*dclk_freq == 0) {
2033 			/* First pass, speed was 0 */
2034 			if (dmi_mem_clk_speed > 0) {
2035 				/* Set speed if a valid speed is read */
2036 				*dclk_freq = dmi_mem_clk_speed;
2037 			} else {
2038 				/* Otherwise we don't have a valid speed */
2039 				*dclk_freq = -1;
2040 			}
2041 		} else if (*dclk_freq > 0 &&
2042 			   *dclk_freq != dmi_mem_clk_speed) {
2043 			/*
2044 			 * If we have a speed, check that all DIMMS are the same
2045 			 * speed, otherwise set the speed as invalid.
2046 			 */
2047 			*dclk_freq = -1;
2048 		}
2049 	}
2050 }
2051 
2052 /*
2053  * The default DCLK frequency is used as a fallback if we
2054  * fail to find anything reliable in the DMI. The value
2055  * is taken straight from the datasheet.
2056  */
2057 #define DEFAULT_DCLK_FREQ 800
2058 
2059 static int get_dclk_freq(void)
2060 {
2061 	int dclk_freq = 0;
2062 
2063 	dmi_walk(decode_dclk, (void *)&dclk_freq);
2064 
2065 	if (dclk_freq < 1)
2066 		return DEFAULT_DCLK_FREQ;
2067 
2068 	return dclk_freq;
2069 }
2070 
2071 /*
2072  * set_sdram_scrub_rate		This routine sets byte/sec bandwidth scrub rate
2073  *				to hardware according to SCRUBINTERVAL formula
2074  *				found in datasheet.
2075  */
2076 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
2077 {
2078 	struct i7core_pvt *pvt = mci->pvt_info;
2079 	struct pci_dev *pdev;
2080 	u32 dw_scrub;
2081 	u32 dw_ssr;
2082 
2083 	/* Get data from the MC register, function 2 */
2084 	pdev = pvt->pci_mcr[2];
2085 	if (!pdev)
2086 		return -ENODEV;
2087 
2088 	pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
2089 
2090 	if (new_bw == 0) {
2091 		/* Prepare to disable petrol scrub */
2092 		dw_scrub &= ~STARTSCRUB;
2093 		/* Stop the patrol scrub engine */
2094 		write_and_test(pdev, MC_SCRUB_CONTROL,
2095 			       dw_scrub & ~SCRUBINTERVAL_MASK);
2096 
2097 		/* Get current status of scrub rate and set bit to disable */
2098 		pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2099 		dw_ssr &= ~SSR_MODE_MASK;
2100 		dw_ssr |= SSR_MODE_DISABLE;
2101 	} else {
2102 		const int cache_line_size = 64;
2103 		const u32 freq_dclk_mhz = pvt->dclk_freq;
2104 		unsigned long long scrub_interval;
2105 		/*
2106 		 * Translate the desired scrub rate to a register value and
2107 		 * program the corresponding register value.
2108 		 */
2109 		scrub_interval = (unsigned long long)freq_dclk_mhz *
2110 			cache_line_size * 1000000;
2111 		do_div(scrub_interval, new_bw);
2112 
2113 		if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
2114 			return -EINVAL;
2115 
2116 		dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
2117 
2118 		/* Start the patrol scrub engine */
2119 		pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
2120 				       STARTSCRUB | dw_scrub);
2121 
2122 		/* Get current status of scrub rate and set bit to enable */
2123 		pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2124 		dw_ssr &= ~SSR_MODE_MASK;
2125 		dw_ssr |= SSR_MODE_ENABLE;
2126 	}
2127 	/* Disable or enable scrubbing */
2128 	pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2129 
2130 	return new_bw;
2131 }
2132 
2133 /*
2134  * get_sdram_scrub_rate		This routine convert current scrub rate value
2135  *				into byte/sec bandwidth accourding to
2136  *				SCRUBINTERVAL formula found in datasheet.
2137  */
2138 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2139 {
2140 	struct i7core_pvt *pvt = mci->pvt_info;
2141 	struct pci_dev *pdev;
2142 	const u32 cache_line_size = 64;
2143 	const u32 freq_dclk_mhz = pvt->dclk_freq;
2144 	unsigned long long scrub_rate;
2145 	u32 scrubval;
2146 
2147 	/* Get data from the MC register, function 2 */
2148 	pdev = pvt->pci_mcr[2];
2149 	if (!pdev)
2150 		return -ENODEV;
2151 
2152 	/* Get current scrub control data */
2153 	pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2154 
2155 	/* Mask highest 8-bits to 0 */
2156 	scrubval &=  SCRUBINTERVAL_MASK;
2157 	if (!scrubval)
2158 		return 0;
2159 
2160 	/* Calculate scrub rate value into byte/sec bandwidth */
2161 	scrub_rate =  (unsigned long long)freq_dclk_mhz *
2162 		1000000 * cache_line_size;
2163 	do_div(scrub_rate, scrubval);
2164 	return (int)scrub_rate;
2165 }
2166 
2167 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2168 {
2169 	struct i7core_pvt *pvt = mci->pvt_info;
2170 	u32 pci_lock;
2171 
2172 	/* Unlock writes to pci registers */
2173 	pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2174 	pci_lock &= ~0x3;
2175 	pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2176 			       pci_lock | MC_CFG_UNLOCK);
2177 
2178 	mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2179 	mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2180 }
2181 
2182 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2183 {
2184 	struct i7core_pvt *pvt = mci->pvt_info;
2185 	u32 pci_lock;
2186 
2187 	/* Lock writes to pci registers */
2188 	pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2189 	pci_lock &= ~0x3;
2190 	pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2191 			       pci_lock | MC_CFG_LOCK);
2192 }
2193 
2194 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2195 {
2196 	pvt->i7core_pci = edac_pci_create_generic_ctl(
2197 						&pvt->i7core_dev->pdev[0]->dev,
2198 						EDAC_MOD_STR);
2199 	if (unlikely(!pvt->i7core_pci))
2200 		i7core_printk(KERN_WARNING,
2201 			      "Unable to setup PCI error report via EDAC\n");
2202 }
2203 
2204 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2205 {
2206 	if (likely(pvt->i7core_pci))
2207 		edac_pci_release_generic_ctl(pvt->i7core_pci);
2208 	else
2209 		i7core_printk(KERN_ERR,
2210 				"Couldn't find mem_ctl_info for socket %d\n",
2211 				pvt->i7core_dev->socket);
2212 	pvt->i7core_pci = NULL;
2213 }
2214 
2215 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2216 {
2217 	struct mem_ctl_info *mci = i7core_dev->mci;
2218 	struct i7core_pvt *pvt;
2219 
2220 	if (unlikely(!mci || !mci->pvt_info)) {
2221 		debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2222 			__func__, &i7core_dev->pdev[0]->dev);
2223 
2224 		i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2225 		return;
2226 	}
2227 
2228 	pvt = mci->pvt_info;
2229 
2230 	debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2231 		__func__, mci, &i7core_dev->pdev[0]->dev);
2232 
2233 	/* Disable scrubrate setting */
2234 	if (pvt->enable_scrub)
2235 		disable_sdram_scrub_setting(mci);
2236 
2237 	atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec);
2238 
2239 	/* Disable EDAC polling */
2240 	i7core_pci_ctl_release(pvt);
2241 
2242 	/* Remove MC sysfs nodes */
2243 	edac_mc_del_mc(mci->dev);
2244 
2245 	debugf1("%s: free mci struct\n", mci->ctl_name);
2246 	kfree(mci->ctl_name);
2247 	edac_mc_free(mci);
2248 	i7core_dev->mci = NULL;
2249 }
2250 
2251 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2252 {
2253 	struct mem_ctl_info *mci;
2254 	struct i7core_pvt *pvt;
2255 	int rc, channels, csrows;
2256 
2257 	/* Check the number of active and not disabled channels */
2258 	rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
2259 	if (unlikely(rc < 0))
2260 		return rc;
2261 
2262 	/* allocate a new MC control structure */
2263 	mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
2264 	if (unlikely(!mci))
2265 		return -ENOMEM;
2266 
2267 	debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2268 		__func__, mci, &i7core_dev->pdev[0]->dev);
2269 
2270 	pvt = mci->pvt_info;
2271 	memset(pvt, 0, sizeof(*pvt));
2272 
2273 	/* Associates i7core_dev and mci for future usage */
2274 	pvt->i7core_dev = i7core_dev;
2275 	i7core_dev->mci = mci;
2276 
2277 	/*
2278 	 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2279 	 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2280 	 * memory channels
2281 	 */
2282 	mci->mtype_cap = MEM_FLAG_DDR3;
2283 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
2284 	mci->edac_cap = EDAC_FLAG_NONE;
2285 	mci->mod_name = "i7core_edac.c";
2286 	mci->mod_ver = I7CORE_REVISION;
2287 	mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2288 				  i7core_dev->socket);
2289 	mci->dev_name = pci_name(i7core_dev->pdev[0]);
2290 	mci->ctl_page_to_phys = NULL;
2291 
2292 	/* Store pci devices at mci for faster access */
2293 	rc = mci_bind_devs(mci, i7core_dev);
2294 	if (unlikely(rc < 0))
2295 		goto fail0;
2296 
2297 	if (pvt->is_registered)
2298 		mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
2299 	else
2300 		mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
2301 
2302 	/* Get dimm basic config */
2303 	get_dimm_config(mci);
2304 	/* record ptr to the generic device */
2305 	mci->dev = &i7core_dev->pdev[0]->dev;
2306 	/* Set the function pointer to an actual operation function */
2307 	mci->edac_check = i7core_check_error;
2308 
2309 	/* Enable scrubrate setting */
2310 	if (pvt->enable_scrub)
2311 		enable_sdram_scrub_setting(mci);
2312 
2313 	/* add this new MC control structure to EDAC's list of MCs */
2314 	if (unlikely(edac_mc_add_mc(mci))) {
2315 		debugf0("MC: " __FILE__
2316 			": %s(): failed edac_mc_add_mc()\n", __func__);
2317 		/* FIXME: perhaps some code should go here that disables error
2318 		 * reporting if we just enabled it
2319 		 */
2320 
2321 		rc = -EINVAL;
2322 		goto fail0;
2323 	}
2324 
2325 	/* Default error mask is any memory */
2326 	pvt->inject.channel = 0;
2327 	pvt->inject.dimm = -1;
2328 	pvt->inject.rank = -1;
2329 	pvt->inject.bank = -1;
2330 	pvt->inject.page = -1;
2331 	pvt->inject.col = -1;
2332 
2333 	/* allocating generic PCI control info */
2334 	i7core_pci_ctl_create(pvt);
2335 
2336 	/* DCLK for scrub rate setting */
2337 	pvt->dclk_freq = get_dclk_freq();
2338 
2339 	atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec);
2340 
2341 	return 0;
2342 
2343 fail0:
2344 	kfree(mci->ctl_name);
2345 	edac_mc_free(mci);
2346 	i7core_dev->mci = NULL;
2347 	return rc;
2348 }
2349 
2350 /*
2351  *	i7core_probe	Probe for ONE instance of device to see if it is
2352  *			present.
2353  *	return:
2354  *		0 for FOUND a device
2355  *		< 0 for error code
2356  */
2357 
2358 static int __devinit i7core_probe(struct pci_dev *pdev,
2359 				  const struct pci_device_id *id)
2360 {
2361 	int rc, count = 0;
2362 	struct i7core_dev *i7core_dev;
2363 
2364 	/* get the pci devices we want to reserve for our use */
2365 	mutex_lock(&i7core_edac_lock);
2366 
2367 	/*
2368 	 * All memory controllers are allocated at the first pass.
2369 	 */
2370 	if (unlikely(probed >= 1)) {
2371 		mutex_unlock(&i7core_edac_lock);
2372 		return -ENODEV;
2373 	}
2374 	probed++;
2375 
2376 	rc = i7core_get_all_devices();
2377 	if (unlikely(rc < 0))
2378 		goto fail0;
2379 
2380 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2381 		count++;
2382 		rc = i7core_register_mci(i7core_dev);
2383 		if (unlikely(rc < 0))
2384 			goto fail1;
2385 	}
2386 
2387 	/*
2388 	 * Nehalem-EX uses a different memory controller. However, as the
2389 	 * memory controller is not visible on some Nehalem/Nehalem-EP, we
2390 	 * need to indirectly probe via a X58 PCI device. The same devices
2391 	 * are found on (some) Nehalem-EX. So, on those machines, the
2392 	 * probe routine needs to return -ENODEV, as the actual Memory
2393 	 * Controller registers won't be detected.
2394 	 */
2395 	if (!count) {
2396 		rc = -ENODEV;
2397 		goto fail1;
2398 	}
2399 
2400 	i7core_printk(KERN_INFO,
2401 		      "Driver loaded, %d memory controller(s) found.\n",
2402 		      count);
2403 
2404 	mutex_unlock(&i7core_edac_lock);
2405 	return 0;
2406 
2407 fail1:
2408 	list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2409 		i7core_unregister_mci(i7core_dev);
2410 
2411 	i7core_put_all_devices();
2412 fail0:
2413 	mutex_unlock(&i7core_edac_lock);
2414 	return rc;
2415 }
2416 
2417 /*
2418  *	i7core_remove	destructor for one instance of device
2419  *
2420  */
2421 static void __devexit i7core_remove(struct pci_dev *pdev)
2422 {
2423 	struct i7core_dev *i7core_dev;
2424 
2425 	debugf0(__FILE__ ": %s()\n", __func__);
2426 
2427 	/*
2428 	 * we have a trouble here: pdev value for removal will be wrong, since
2429 	 * it will point to the X58 register used to detect that the machine
2430 	 * is a Nehalem or upper design. However, due to the way several PCI
2431 	 * devices are grouped together to provide MC functionality, we need
2432 	 * to use a different method for releasing the devices
2433 	 */
2434 
2435 	mutex_lock(&i7core_edac_lock);
2436 
2437 	if (unlikely(!probed)) {
2438 		mutex_unlock(&i7core_edac_lock);
2439 		return;
2440 	}
2441 
2442 	list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2443 		i7core_unregister_mci(i7core_dev);
2444 
2445 	/* Release PCI resources */
2446 	i7core_put_all_devices();
2447 
2448 	probed--;
2449 
2450 	mutex_unlock(&i7core_edac_lock);
2451 }
2452 
2453 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2454 
2455 /*
2456  *	i7core_driver	pci_driver structure for this module
2457  *
2458  */
2459 static struct pci_driver i7core_driver = {
2460 	.name     = "i7core_edac",
2461 	.probe    = i7core_probe,
2462 	.remove   = __devexit_p(i7core_remove),
2463 	.id_table = i7core_pci_tbl,
2464 };
2465 
2466 /*
2467  *	i7core_init		Module entry function
2468  *			Try to initialize this module for its devices
2469  */
2470 static int __init i7core_init(void)
2471 {
2472 	int pci_rc;
2473 
2474 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
2475 
2476 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
2477 	opstate_init();
2478 
2479 	if (use_pci_fixup)
2480 		i7core_xeon_pci_fixup(pci_dev_table);
2481 
2482 	pci_rc = pci_register_driver(&i7core_driver);
2483 
2484 	if (pci_rc >= 0)
2485 		return 0;
2486 
2487 	i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2488 		      pci_rc);
2489 
2490 	return pci_rc;
2491 }
2492 
2493 /*
2494  *	i7core_exit()	Module exit function
2495  *			Unregister the driver
2496  */
2497 static void __exit i7core_exit(void)
2498 {
2499 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
2500 	pci_unregister_driver(&i7core_driver);
2501 }
2502 
2503 module_init(i7core_init);
2504 module_exit(i7core_exit);
2505 
2506 MODULE_LICENSE("GPL");
2507 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2508 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2509 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2510 		   I7CORE_REVISION);
2511 
2512 module_param(edac_op_state, int, 0444);
2513 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2514