xref: /openbmc/linux/drivers/edac/i7core_edac.c (revision e97d7e38)
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *	 Mauro Carvalho Chehab
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *	http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27 
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/dmi.h>
35 #include <linux/edac.h>
36 #include <linux/mmzone.h>
37 #include <linux/smp.h>
38 #include <asm/mce.h>
39 #include <asm/processor.h>
40 #include <asm/div64.h>
41 
42 #include "edac_core.h"
43 
44 /* Static vars */
45 static LIST_HEAD(i7core_edac_list);
46 static DEFINE_MUTEX(i7core_edac_lock);
47 static int probed;
48 
49 static int use_pci_fixup;
50 module_param(use_pci_fixup, int, 0444);
51 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
52 /*
53  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
54  * registers start at bus 255, and are not reported by BIOS.
55  * We currently find devices with only 2 sockets. In order to support more QPI
56  * Quick Path Interconnect, just increment this number.
57  */
58 #define MAX_SOCKET_BUSES	2
59 
60 
61 /*
62  * Alter this version for the module when modifications are made
63  */
64 #define I7CORE_REVISION    " Ver: 1.0.0"
65 #define EDAC_MOD_STR      "i7core_edac"
66 
67 /*
68  * Debug macros
69  */
70 #define i7core_printk(level, fmt, arg...)			\
71 	edac_printk(level, "i7core", fmt, ##arg)
72 
73 #define i7core_mc_printk(mci, level, fmt, arg...)		\
74 	edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75 
76 /*
77  * i7core Memory Controller Registers
78  */
79 
80 	/* OFFSETS for Device 0 Function 0 */
81 
82 #define MC_CFG_CONTROL	0x90
83   #define MC_CFG_UNLOCK		0x02
84   #define MC_CFG_LOCK		0x00
85 
86 	/* OFFSETS for Device 3 Function 0 */
87 
88 #define MC_CONTROL	0x48
89 #define MC_STATUS	0x4c
90 #define MC_MAX_DOD	0x64
91 
92 /*
93  * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
94  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
95  */
96 
97 #define MC_TEST_ERR_RCV1	0x60
98   #define DIMM2_COR_ERR(r)			((r) & 0x7fff)
99 
100 #define MC_TEST_ERR_RCV0	0x64
101   #define DIMM1_COR_ERR(r)			(((r) >> 16) & 0x7fff)
102   #define DIMM0_COR_ERR(r)			((r) & 0x7fff)
103 
104 /* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
105 #define MC_SSRCONTROL		0x48
106   #define SSR_MODE_DISABLE	0x00
107   #define SSR_MODE_ENABLE	0x01
108   #define SSR_MODE_MASK		0x03
109 
110 #define MC_SCRUB_CONTROL	0x4c
111   #define STARTSCRUB		(1 << 24)
112   #define SCRUBINTERVAL_MASK    0xffffff
113 
114 #define MC_COR_ECC_CNT_0	0x80
115 #define MC_COR_ECC_CNT_1	0x84
116 #define MC_COR_ECC_CNT_2	0x88
117 #define MC_COR_ECC_CNT_3	0x8c
118 #define MC_COR_ECC_CNT_4	0x90
119 #define MC_COR_ECC_CNT_5	0x94
120 
121 #define DIMM_TOP_COR_ERR(r)			(((r) >> 16) & 0x7fff)
122 #define DIMM_BOT_COR_ERR(r)			((r) & 0x7fff)
123 
124 
125 	/* OFFSETS for Devices 4,5 and 6 Function 0 */
126 
127 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
128   #define THREE_DIMMS_PRESENT		(1 << 24)
129   #define SINGLE_QUAD_RANK_PRESENT	(1 << 23)
130   #define QUAD_RANK_PRESENT		(1 << 22)
131   #define REGISTERED_DIMM		(1 << 15)
132 
133 #define MC_CHANNEL_MAPPER	0x60
134   #define RDLCH(r, ch)		((((r) >> (3 + (ch * 6))) & 0x07) - 1)
135   #define WRLCH(r, ch)		((((r) >> (ch * 6)) & 0x07) - 1)
136 
137 #define MC_CHANNEL_RANK_PRESENT 0x7c
138   #define RANK_PRESENT_MASK		0xffff
139 
140 #define MC_CHANNEL_ADDR_MATCH	0xf0
141 #define MC_CHANNEL_ERROR_MASK	0xf8
142 #define MC_CHANNEL_ERROR_INJECT	0xfc
143   #define INJECT_ADDR_PARITY	0x10
144   #define INJECT_ECC		0x08
145   #define MASK_CACHELINE	0x06
146   #define MASK_FULL_CACHELINE	0x06
147   #define MASK_MSB32_CACHELINE	0x04
148   #define MASK_LSB32_CACHELINE	0x02
149   #define NO_MASK_CACHELINE	0x00
150   #define REPEAT_EN		0x01
151 
152 	/* OFFSETS for Devices 4,5 and 6 Function 1 */
153 
154 #define MC_DOD_CH_DIMM0		0x48
155 #define MC_DOD_CH_DIMM1		0x4c
156 #define MC_DOD_CH_DIMM2		0x50
157   #define RANKOFFSET_MASK	((1 << 12) | (1 << 11) | (1 << 10))
158   #define RANKOFFSET(x)		((x & RANKOFFSET_MASK) >> 10)
159   #define DIMM_PRESENT_MASK	(1 << 9)
160   #define DIMM_PRESENT(x)	(((x) & DIMM_PRESENT_MASK) >> 9)
161   #define MC_DOD_NUMBANK_MASK		((1 << 8) | (1 << 7))
162   #define MC_DOD_NUMBANK(x)		(((x) & MC_DOD_NUMBANK_MASK) >> 7)
163   #define MC_DOD_NUMRANK_MASK		((1 << 6) | (1 << 5))
164   #define MC_DOD_NUMRANK(x)		(((x) & MC_DOD_NUMRANK_MASK) >> 5)
165   #define MC_DOD_NUMROW_MASK		((1 << 4) | (1 << 3) | (1 << 2))
166   #define MC_DOD_NUMROW(x)		(((x) & MC_DOD_NUMROW_MASK) >> 2)
167   #define MC_DOD_NUMCOL_MASK		3
168   #define MC_DOD_NUMCOL(x)		((x) & MC_DOD_NUMCOL_MASK)
169 
170 #define MC_RANK_PRESENT		0x7c
171 
172 #define MC_SAG_CH_0	0x80
173 #define MC_SAG_CH_1	0x84
174 #define MC_SAG_CH_2	0x88
175 #define MC_SAG_CH_3	0x8c
176 #define MC_SAG_CH_4	0x90
177 #define MC_SAG_CH_5	0x94
178 #define MC_SAG_CH_6	0x98
179 #define MC_SAG_CH_7	0x9c
180 
181 #define MC_RIR_LIMIT_CH_0	0x40
182 #define MC_RIR_LIMIT_CH_1	0x44
183 #define MC_RIR_LIMIT_CH_2	0x48
184 #define MC_RIR_LIMIT_CH_3	0x4C
185 #define MC_RIR_LIMIT_CH_4	0x50
186 #define MC_RIR_LIMIT_CH_5	0x54
187 #define MC_RIR_LIMIT_CH_6	0x58
188 #define MC_RIR_LIMIT_CH_7	0x5C
189 #define MC_RIR_LIMIT_MASK	((1 << 10) - 1)
190 
191 #define MC_RIR_WAY_CH		0x80
192   #define MC_RIR_WAY_OFFSET_MASK	(((1 << 14) - 1) & ~0x7)
193   #define MC_RIR_WAY_RANK_MASK		0x7
194 
195 /*
196  * i7core structs
197  */
198 
199 #define NUM_CHANS 3
200 #define MAX_DIMMS 3		/* Max DIMMS per channel */
201 #define MAX_MCR_FUNC  4
202 #define MAX_CHAN_FUNC 3
203 
204 struct i7core_info {
205 	u32	mc_control;
206 	u32	mc_status;
207 	u32	max_dod;
208 	u32	ch_map;
209 };
210 
211 
212 struct i7core_inject {
213 	int	enable;
214 
215 	u32	section;
216 	u32	type;
217 	u32	eccmask;
218 
219 	/* Error address mask */
220 	int channel, dimm, rank, bank, page, col;
221 };
222 
223 struct i7core_channel {
224 	bool		is_3dimms_present;
225 	bool		is_single_4rank;
226 	bool		has_4rank;
227 	u32		dimms;
228 };
229 
230 struct pci_id_descr {
231 	int			dev;
232 	int			func;
233 	int 			dev_id;
234 	int			optional;
235 };
236 
237 struct pci_id_table {
238 	const struct pci_id_descr	*descr;
239 	int				n_devs;
240 };
241 
242 struct i7core_dev {
243 	struct list_head	list;
244 	u8			socket;
245 	struct pci_dev		**pdev;
246 	int			n_devs;
247 	struct mem_ctl_info	*mci;
248 };
249 
250 struct i7core_pvt {
251 	struct device *addrmatch_dev, *chancounts_dev;
252 
253 	struct pci_dev	*pci_noncore;
254 	struct pci_dev	*pci_mcr[MAX_MCR_FUNC + 1];
255 	struct pci_dev	*pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
256 
257 	struct i7core_dev *i7core_dev;
258 
259 	struct i7core_info	info;
260 	struct i7core_inject	inject;
261 	struct i7core_channel	channel[NUM_CHANS];
262 
263 	int		ce_count_available;
264 
265 			/* ECC corrected errors counts per udimm */
266 	unsigned long	udimm_ce_count[MAX_DIMMS];
267 	int		udimm_last_ce_count[MAX_DIMMS];
268 			/* ECC corrected errors counts per rdimm */
269 	unsigned long	rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
270 	int		rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
271 
272 	bool		is_registered, enable_scrub;
273 
274 	/* Fifo double buffers */
275 	struct mce		mce_entry[MCE_LOG_LEN];
276 	struct mce		mce_outentry[MCE_LOG_LEN];
277 
278 	/* Fifo in/out counters */
279 	unsigned		mce_in, mce_out;
280 
281 	/* Count indicator to show errors not got */
282 	unsigned		mce_overrun;
283 
284 	/* DCLK Frequency used for computing scrub rate */
285 	int			dclk_freq;
286 
287 	/* Struct to control EDAC polling */
288 	struct edac_pci_ctl_info *i7core_pci;
289 };
290 
291 #define PCI_DESCR(device, function, device_id)	\
292 	.dev = (device),			\
293 	.func = (function),			\
294 	.dev_id = (device_id)
295 
296 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
297 		/* Memory controller */
298 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
299 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
300 			/* Exists only for RDIMM */
301 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
302 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
303 
304 		/* Channel 0 */
305 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
306 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
307 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
308 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
309 
310 		/* Channel 1 */
311 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
312 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
313 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
314 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
315 
316 		/* Channel 2 */
317 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
318 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
319 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
320 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
321 
322 		/* Generic Non-core registers */
323 	/*
324 	 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
325 	 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
326 	 * the probing code needs to test for the other address in case of
327 	 * failure of this one
328 	 */
329 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
330 
331 };
332 
333 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
334 	{ PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
335 	{ PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
336 	{ PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
337 
338 	{ PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
339 	{ PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
340 	{ PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
341 	{ PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
342 
343 	{ PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
344 	{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
345 	{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
346 	{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
347 
348 	/*
349 	 * This is the PCI device has an alternate address on some
350 	 * processors like Core i7 860
351 	 */
352 	{ PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
353 };
354 
355 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
356 		/* Memory controller */
357 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
358 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
359 			/* Exists only for RDIMM */
360 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
361 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
362 
363 		/* Channel 0 */
364 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
365 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
366 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
367 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
368 
369 		/* Channel 1 */
370 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
371 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
372 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
373 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
374 
375 		/* Channel 2 */
376 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
377 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
378 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
379 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
380 
381 		/* Generic Non-core registers */
382 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
383 
384 };
385 
386 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
387 static const struct pci_id_table pci_dev_table[] = {
388 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
389 	PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
390 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
391 	{0,}			/* 0 terminated list. */
392 };
393 
394 /*
395  *	pci_device_id	table for which devices we are looking for
396  */
397 static const struct pci_device_id i7core_pci_tbl[] = {
398 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
399 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
400 	{0,}			/* 0 terminated list. */
401 };
402 
403 /****************************************************************************
404 			Ancillary status routines
405  ****************************************************************************/
406 
407 	/* MC_CONTROL bits */
408 #define CH_ACTIVE(pvt, ch)	((pvt)->info.mc_control & (1 << (8 + ch)))
409 #define ECCx8(pvt)		((pvt)->info.mc_control & (1 << 1))
410 
411 	/* MC_STATUS bits */
412 #define ECC_ENABLED(pvt)	((pvt)->info.mc_status & (1 << 4))
413 #define CH_DISABLED(pvt, ch)	((pvt)->info.mc_status & (1 << ch))
414 
415 	/* MC_MAX_DOD read functions */
416 static inline int numdimms(u32 dimms)
417 {
418 	return (dimms & 0x3) + 1;
419 }
420 
421 static inline int numrank(u32 rank)
422 {
423 	static const int ranks[] = { 1, 2, 4, -EINVAL };
424 
425 	return ranks[rank & 0x3];
426 }
427 
428 static inline int numbank(u32 bank)
429 {
430 	static const int banks[] = { 4, 8, 16, -EINVAL };
431 
432 	return banks[bank & 0x3];
433 }
434 
435 static inline int numrow(u32 row)
436 {
437 	static const int rows[] = {
438 		1 << 12, 1 << 13, 1 << 14, 1 << 15,
439 		1 << 16, -EINVAL, -EINVAL, -EINVAL,
440 	};
441 
442 	return rows[row & 0x7];
443 }
444 
445 static inline int numcol(u32 col)
446 {
447 	static const int cols[] = {
448 		1 << 10, 1 << 11, 1 << 12, -EINVAL,
449 	};
450 	return cols[col & 0x3];
451 }
452 
453 static struct i7core_dev *get_i7core_dev(u8 socket)
454 {
455 	struct i7core_dev *i7core_dev;
456 
457 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
458 		if (i7core_dev->socket == socket)
459 			return i7core_dev;
460 	}
461 
462 	return NULL;
463 }
464 
465 static struct i7core_dev *alloc_i7core_dev(u8 socket,
466 					   const struct pci_id_table *table)
467 {
468 	struct i7core_dev *i7core_dev;
469 
470 	i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
471 	if (!i7core_dev)
472 		return NULL;
473 
474 	i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
475 				   GFP_KERNEL);
476 	if (!i7core_dev->pdev) {
477 		kfree(i7core_dev);
478 		return NULL;
479 	}
480 
481 	i7core_dev->socket = socket;
482 	i7core_dev->n_devs = table->n_devs;
483 	list_add_tail(&i7core_dev->list, &i7core_edac_list);
484 
485 	return i7core_dev;
486 }
487 
488 static void free_i7core_dev(struct i7core_dev *i7core_dev)
489 {
490 	list_del(&i7core_dev->list);
491 	kfree(i7core_dev->pdev);
492 	kfree(i7core_dev);
493 }
494 
495 /****************************************************************************
496 			Memory check routines
497  ****************************************************************************/
498 
499 static int get_dimm_config(struct mem_ctl_info *mci)
500 {
501 	struct i7core_pvt *pvt = mci->pvt_info;
502 	struct pci_dev *pdev;
503 	int i, j;
504 	enum edac_type mode;
505 	enum mem_type mtype;
506 	struct dimm_info *dimm;
507 
508 	/* Get data from the MC register, function 0 */
509 	pdev = pvt->pci_mcr[0];
510 	if (!pdev)
511 		return -ENODEV;
512 
513 	/* Device 3 function 0 reads */
514 	pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
515 	pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
516 	pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
517 	pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
518 
519 	edac_dbg(0, "QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
520 		 pvt->i7core_dev->socket, pvt->info.mc_control,
521 		 pvt->info.mc_status, pvt->info.max_dod, pvt->info.ch_map);
522 
523 	if (ECC_ENABLED(pvt)) {
524 		edac_dbg(0, "ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
525 		if (ECCx8(pvt))
526 			mode = EDAC_S8ECD8ED;
527 		else
528 			mode = EDAC_S4ECD4ED;
529 	} else {
530 		edac_dbg(0, "ECC disabled\n");
531 		mode = EDAC_NONE;
532 	}
533 
534 	/* FIXME: need to handle the error codes */
535 	edac_dbg(0, "DOD Max limits: DIMMS: %d, %d-ranked, %d-banked x%x x 0x%x\n",
536 		 numdimms(pvt->info.max_dod),
537 		 numrank(pvt->info.max_dod >> 2),
538 		 numbank(pvt->info.max_dod >> 4),
539 		 numrow(pvt->info.max_dod >> 6),
540 		 numcol(pvt->info.max_dod >> 9));
541 
542 	for (i = 0; i < NUM_CHANS; i++) {
543 		u32 data, dimm_dod[3], value[8];
544 
545 		if (!pvt->pci_ch[i][0])
546 			continue;
547 
548 		if (!CH_ACTIVE(pvt, i)) {
549 			edac_dbg(0, "Channel %i is not active\n", i);
550 			continue;
551 		}
552 		if (CH_DISABLED(pvt, i)) {
553 			edac_dbg(0, "Channel %i is disabled\n", i);
554 			continue;
555 		}
556 
557 		/* Devices 4-6 function 0 */
558 		pci_read_config_dword(pvt->pci_ch[i][0],
559 				MC_CHANNEL_DIMM_INIT_PARAMS, &data);
560 
561 
562 		if (data & THREE_DIMMS_PRESENT)
563 			pvt->channel[i].is_3dimms_present = true;
564 
565 		if (data & SINGLE_QUAD_RANK_PRESENT)
566 			pvt->channel[i].is_single_4rank = true;
567 
568 		if (data & QUAD_RANK_PRESENT)
569 			pvt->channel[i].has_4rank = true;
570 
571 		if (data & REGISTERED_DIMM)
572 			mtype = MEM_RDDR3;
573 		else
574 			mtype = MEM_DDR3;
575 
576 		/* Devices 4-6 function 1 */
577 		pci_read_config_dword(pvt->pci_ch[i][1],
578 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
579 		pci_read_config_dword(pvt->pci_ch[i][1],
580 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
581 		pci_read_config_dword(pvt->pci_ch[i][1],
582 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
583 
584 		edac_dbg(0, "Ch%d phy rd%d, wr%d (0x%08x): %s%s%s%cDIMMs\n",
585 			 i,
586 			 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
587 			 data,
588 			 pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
589 			 pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
590 			 pvt->channel[i].has_4rank ? "HAS_4R " : "",
591 			 (data & REGISTERED_DIMM) ? 'R' : 'U');
592 
593 		for (j = 0; j < 3; j++) {
594 			u32 banks, ranks, rows, cols;
595 			u32 size, npages;
596 
597 			if (!DIMM_PRESENT(dimm_dod[j]))
598 				continue;
599 
600 			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
601 				       i, j, 0);
602 			banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
603 			ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
604 			rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
605 			cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
606 
607 			/* DDR3 has 8 I/O banks */
608 			size = (rows * cols * banks * ranks) >> (20 - 3);
609 
610 			edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
611 				 j, size,
612 				 RANKOFFSET(dimm_dod[j]),
613 				 banks, ranks, rows, cols);
614 
615 			npages = MiB_TO_PAGES(size);
616 
617 			dimm->nr_pages = npages;
618 
619 			switch (banks) {
620 			case 4:
621 				dimm->dtype = DEV_X4;
622 				break;
623 			case 8:
624 				dimm->dtype = DEV_X8;
625 				break;
626 			case 16:
627 				dimm->dtype = DEV_X16;
628 				break;
629 			default:
630 				dimm->dtype = DEV_UNKNOWN;
631 			}
632 
633 			snprintf(dimm->label, sizeof(dimm->label),
634 				 "CPU#%uChannel#%u_DIMM#%u",
635 				 pvt->i7core_dev->socket, i, j);
636 			dimm->grain = 8;
637 			dimm->edac_mode = mode;
638 			dimm->mtype = mtype;
639 		}
640 
641 		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
642 		pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
643 		pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
644 		pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
645 		pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
646 		pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
647 		pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
648 		pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
649 		edac_dbg(1, "\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
650 		for (j = 0; j < 8; j++)
651 			edac_dbg(1, "\t\t%#x\t%#x\t%#x\n",
652 				 (value[j] >> 27) & 0x1,
653 				 (value[j] >> 24) & 0x7,
654 				 (value[j] & ((1 << 24) - 1)));
655 	}
656 
657 	return 0;
658 }
659 
660 /****************************************************************************
661 			Error insertion routines
662  ****************************************************************************/
663 
664 #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
665 
666 /* The i7core has independent error injection features per channel.
667    However, to have a simpler code, we don't allow enabling error injection
668    on more than one channel.
669    Also, since a change at an inject parameter will be applied only at enable,
670    we're disabling error injection on all write calls to the sysfs nodes that
671    controls the error code injection.
672  */
673 static int disable_inject(const struct mem_ctl_info *mci)
674 {
675 	struct i7core_pvt *pvt = mci->pvt_info;
676 
677 	pvt->inject.enable = 0;
678 
679 	if (!pvt->pci_ch[pvt->inject.channel][0])
680 		return -ENODEV;
681 
682 	pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
683 				MC_CHANNEL_ERROR_INJECT, 0);
684 
685 	return 0;
686 }
687 
688 /*
689  * i7core inject inject.section
690  *
691  *	accept and store error injection inject.section value
692  *	bit 0 - refers to the lower 32-byte half cacheline
693  *	bit 1 - refers to the upper 32-byte half cacheline
694  */
695 static ssize_t i7core_inject_section_store(struct device *dev,
696 					   struct device_attribute *mattr,
697 					   const char *data, size_t count)
698 {
699 	struct mem_ctl_info *mci = to_mci(dev);
700 	struct i7core_pvt *pvt = mci->pvt_info;
701 	unsigned long value;
702 	int rc;
703 
704 	if (pvt->inject.enable)
705 		disable_inject(mci);
706 
707 	rc = kstrtoul(data, 10, &value);
708 	if ((rc < 0) || (value > 3))
709 		return -EIO;
710 
711 	pvt->inject.section = (u32) value;
712 	return count;
713 }
714 
715 static ssize_t i7core_inject_section_show(struct device *dev,
716 					  struct device_attribute *mattr,
717 					  char *data)
718 {
719 	struct mem_ctl_info *mci = to_mci(dev);
720 	struct i7core_pvt *pvt = mci->pvt_info;
721 	return sprintf(data, "0x%08x\n", pvt->inject.section);
722 }
723 
724 /*
725  * i7core inject.type
726  *
727  *	accept and store error injection inject.section value
728  *	bit 0 - repeat enable - Enable error repetition
729  *	bit 1 - inject ECC error
730  *	bit 2 - inject parity error
731  */
732 static ssize_t i7core_inject_type_store(struct device *dev,
733 					struct device_attribute *mattr,
734 					const char *data, size_t count)
735 {
736 	struct mem_ctl_info *mci = to_mci(dev);
737 struct i7core_pvt *pvt = mci->pvt_info;
738 	unsigned long value;
739 	int rc;
740 
741 	if (pvt->inject.enable)
742 		disable_inject(mci);
743 
744 	rc = kstrtoul(data, 10, &value);
745 	if ((rc < 0) || (value > 7))
746 		return -EIO;
747 
748 	pvt->inject.type = (u32) value;
749 	return count;
750 }
751 
752 static ssize_t i7core_inject_type_show(struct device *dev,
753 				       struct device_attribute *mattr,
754 				       char *data)
755 {
756 	struct mem_ctl_info *mci = to_mci(dev);
757 	struct i7core_pvt *pvt = mci->pvt_info;
758 
759 	return sprintf(data, "0x%08x\n", pvt->inject.type);
760 }
761 
762 /*
763  * i7core_inject_inject.eccmask_store
764  *
765  * The type of error (UE/CE) will depend on the inject.eccmask value:
766  *   Any bits set to a 1 will flip the corresponding ECC bit
767  *   Correctable errors can be injected by flipping 1 bit or the bits within
768  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
769  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
770  *   uncorrectable error to be injected.
771  */
772 static ssize_t i7core_inject_eccmask_store(struct device *dev,
773 					   struct device_attribute *mattr,
774 					   const char *data, size_t count)
775 {
776 	struct mem_ctl_info *mci = to_mci(dev);
777 	struct i7core_pvt *pvt = mci->pvt_info;
778 	unsigned long value;
779 	int rc;
780 
781 	if (pvt->inject.enable)
782 		disable_inject(mci);
783 
784 	rc = kstrtoul(data, 10, &value);
785 	if (rc < 0)
786 		return -EIO;
787 
788 	pvt->inject.eccmask = (u32) value;
789 	return count;
790 }
791 
792 static ssize_t i7core_inject_eccmask_show(struct device *dev,
793 					  struct device_attribute *mattr,
794 					  char *data)
795 {
796 	struct mem_ctl_info *mci = to_mci(dev);
797 	struct i7core_pvt *pvt = mci->pvt_info;
798 
799 	return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
800 }
801 
802 /*
803  * i7core_addrmatch
804  *
805  * The type of error (UE/CE) will depend on the inject.eccmask value:
806  *   Any bits set to a 1 will flip the corresponding ECC bit
807  *   Correctable errors can be injected by flipping 1 bit or the bits within
808  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
809  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
810  *   uncorrectable error to be injected.
811  */
812 
813 #define DECLARE_ADDR_MATCH(param, limit)			\
814 static ssize_t i7core_inject_store_##param(			\
815 	struct device *dev,					\
816 	struct device_attribute *mattr,				\
817 	const char *data, size_t count)				\
818 {								\
819 	struct mem_ctl_info *mci = dev_get_drvdata(dev);	\
820 	struct i7core_pvt *pvt;					\
821 	long value;						\
822 	int rc;							\
823 								\
824 	edac_dbg(1, "\n");					\
825 	pvt = mci->pvt_info;					\
826 								\
827 	if (pvt->inject.enable)					\
828 		disable_inject(mci);				\
829 								\
830 	if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
831 		value = -1;					\
832 	else {							\
833 		rc = kstrtoul(data, 10, &value);		\
834 		if ((rc < 0) || (value >= limit))		\
835 			return -EIO;				\
836 	}							\
837 								\
838 	pvt->inject.param = value;				\
839 								\
840 	return count;						\
841 }								\
842 								\
843 static ssize_t i7core_inject_show_##param(			\
844 	struct device *dev,					\
845 	struct device_attribute *mattr,				\
846 	char *data)						\
847 {								\
848 	struct mem_ctl_info *mci = dev_get_drvdata(dev);	\
849 	struct i7core_pvt *pvt;					\
850 								\
851 	pvt = mci->pvt_info;					\
852 	edac_dbg(1, "pvt=%p\n", pvt);				\
853 	if (pvt->inject.param < 0)				\
854 		return sprintf(data, "any\n");			\
855 	else							\
856 		return sprintf(data, "%d\n", pvt->inject.param);\
857 }
858 
859 #define ATTR_ADDR_MATCH(param)					\
860 	static DEVICE_ATTR(param, S_IRUGO | S_IWUSR,		\
861 		    i7core_inject_show_##param,			\
862 		    i7core_inject_store_##param)
863 
864 DECLARE_ADDR_MATCH(channel, 3);
865 DECLARE_ADDR_MATCH(dimm, 3);
866 DECLARE_ADDR_MATCH(rank, 4);
867 DECLARE_ADDR_MATCH(bank, 32);
868 DECLARE_ADDR_MATCH(page, 0x10000);
869 DECLARE_ADDR_MATCH(col, 0x4000);
870 
871 ATTR_ADDR_MATCH(channel);
872 ATTR_ADDR_MATCH(dimm);
873 ATTR_ADDR_MATCH(rank);
874 ATTR_ADDR_MATCH(bank);
875 ATTR_ADDR_MATCH(page);
876 ATTR_ADDR_MATCH(col);
877 
878 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
879 {
880 	u32 read;
881 	int count;
882 
883 	edac_dbg(0, "setting pci %02x:%02x.%x reg=%02x value=%08x\n",
884 		 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
885 		 where, val);
886 
887 	for (count = 0; count < 10; count++) {
888 		if (count)
889 			msleep(100);
890 		pci_write_config_dword(dev, where, val);
891 		pci_read_config_dword(dev, where, &read);
892 
893 		if (read == val)
894 			return 0;
895 	}
896 
897 	i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
898 		"write=%08x. Read=%08x\n",
899 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
900 		where, val, read);
901 
902 	return -EINVAL;
903 }
904 
905 /*
906  * This routine prepares the Memory Controller for error injection.
907  * The error will be injected when some process tries to write to the
908  * memory that matches the given criteria.
909  * The criteria can be set in terms of a mask where dimm, rank, bank, page
910  * and col can be specified.
911  * A -1 value for any of the mask items will make the MCU to ignore
912  * that matching criteria for error injection.
913  *
914  * It should be noticed that the error will only happen after a write operation
915  * on a memory that matches the condition. if REPEAT_EN is not enabled at
916  * inject mask, then it will produce just one error. Otherwise, it will repeat
917  * until the injectmask would be cleaned.
918  *
919  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
920  *    is reliable enough to check if the MC is using the
921  *    three channels. However, this is not clear at the datasheet.
922  */
923 static ssize_t i7core_inject_enable_store(struct device *dev,
924 					  struct device_attribute *mattr,
925 					  const char *data, size_t count)
926 {
927 	struct mem_ctl_info *mci = to_mci(dev);
928 	struct i7core_pvt *pvt = mci->pvt_info;
929 	u32 injectmask;
930 	u64 mask = 0;
931 	int  rc;
932 	long enable;
933 
934 	if (!pvt->pci_ch[pvt->inject.channel][0])
935 		return 0;
936 
937 	rc = kstrtoul(data, 10, &enable);
938 	if ((rc < 0))
939 		return 0;
940 
941 	if (enable) {
942 		pvt->inject.enable = 1;
943 	} else {
944 		disable_inject(mci);
945 		return count;
946 	}
947 
948 	/* Sets pvt->inject.dimm mask */
949 	if (pvt->inject.dimm < 0)
950 		mask |= 1LL << 41;
951 	else {
952 		if (pvt->channel[pvt->inject.channel].dimms > 2)
953 			mask |= (pvt->inject.dimm & 0x3LL) << 35;
954 		else
955 			mask |= (pvt->inject.dimm & 0x1LL) << 36;
956 	}
957 
958 	/* Sets pvt->inject.rank mask */
959 	if (pvt->inject.rank < 0)
960 		mask |= 1LL << 40;
961 	else {
962 		if (pvt->channel[pvt->inject.channel].dimms > 2)
963 			mask |= (pvt->inject.rank & 0x1LL) << 34;
964 		else
965 			mask |= (pvt->inject.rank & 0x3LL) << 34;
966 	}
967 
968 	/* Sets pvt->inject.bank mask */
969 	if (pvt->inject.bank < 0)
970 		mask |= 1LL << 39;
971 	else
972 		mask |= (pvt->inject.bank & 0x15LL) << 30;
973 
974 	/* Sets pvt->inject.page mask */
975 	if (pvt->inject.page < 0)
976 		mask |= 1LL << 38;
977 	else
978 		mask |= (pvt->inject.page & 0xffff) << 14;
979 
980 	/* Sets pvt->inject.column mask */
981 	if (pvt->inject.col < 0)
982 		mask |= 1LL << 37;
983 	else
984 		mask |= (pvt->inject.col & 0x3fff);
985 
986 	/*
987 	 * bit    0: REPEAT_EN
988 	 * bits 1-2: MASK_HALF_CACHELINE
989 	 * bit    3: INJECT_ECC
990 	 * bit    4: INJECT_ADDR_PARITY
991 	 */
992 
993 	injectmask = (pvt->inject.type & 1) |
994 		     (pvt->inject.section & 0x3) << 1 |
995 		     (pvt->inject.type & 0x6) << (3 - 1);
996 
997 	/* Unlock writes to registers - this register is write only */
998 	pci_write_config_dword(pvt->pci_noncore,
999 			       MC_CFG_CONTROL, 0x2);
1000 
1001 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1002 			       MC_CHANNEL_ADDR_MATCH, mask);
1003 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1004 			       MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1005 
1006 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1007 			       MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1008 
1009 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1010 			       MC_CHANNEL_ERROR_INJECT, injectmask);
1011 
1012 	/*
1013 	 * This is something undocumented, based on my tests
1014 	 * Without writing 8 to this register, errors aren't injected. Not sure
1015 	 * why.
1016 	 */
1017 	pci_write_config_dword(pvt->pci_noncore,
1018 			       MC_CFG_CONTROL, 8);
1019 
1020 	edac_dbg(0, "Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n",
1021 		 mask, pvt->inject.eccmask, injectmask);
1022 
1023 
1024 	return count;
1025 }
1026 
1027 static ssize_t i7core_inject_enable_show(struct device *dev,
1028 					 struct device_attribute *mattr,
1029 					 char *data)
1030 {
1031 	struct mem_ctl_info *mci = to_mci(dev);
1032 	struct i7core_pvt *pvt = mci->pvt_info;
1033 	u32 injectmask;
1034 
1035 	if (!pvt->pci_ch[pvt->inject.channel][0])
1036 		return 0;
1037 
1038 	pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1039 			       MC_CHANNEL_ERROR_INJECT, &injectmask);
1040 
1041 	edac_dbg(0, "Inject error read: 0x%018x\n", injectmask);
1042 
1043 	if (injectmask & 0x0c)
1044 		pvt->inject.enable = 1;
1045 
1046 	return sprintf(data, "%d\n", pvt->inject.enable);
1047 }
1048 
1049 #define DECLARE_COUNTER(param)					\
1050 static ssize_t i7core_show_counter_##param(			\
1051 	struct device *dev,					\
1052 	struct device_attribute *mattr,				\
1053 	char *data)						\
1054 {								\
1055 	struct mem_ctl_info *mci = dev_get_drvdata(dev);	\
1056 	struct i7core_pvt *pvt = mci->pvt_info;			\
1057 								\
1058 	edac_dbg(1, "\n");					\
1059 	if (!pvt->ce_count_available || (pvt->is_registered))	\
1060 		return sprintf(data, "data unavailable\n");	\
1061 	return sprintf(data, "%lu\n",				\
1062 			pvt->udimm_ce_count[param]);		\
1063 }
1064 
1065 #define ATTR_COUNTER(param)					\
1066 	static DEVICE_ATTR(udimm##param, S_IRUGO | S_IWUSR,	\
1067 		    i7core_show_counter_##param,		\
1068 		    NULL)
1069 
1070 DECLARE_COUNTER(0);
1071 DECLARE_COUNTER(1);
1072 DECLARE_COUNTER(2);
1073 
1074 ATTR_COUNTER(0);
1075 ATTR_COUNTER(1);
1076 ATTR_COUNTER(2);
1077 
1078 /*
1079  * inject_addrmatch device sysfs struct
1080  */
1081 
1082 static struct attribute *i7core_addrmatch_attrs[] = {
1083 	&dev_attr_channel.attr,
1084 	&dev_attr_dimm.attr,
1085 	&dev_attr_rank.attr,
1086 	&dev_attr_bank.attr,
1087 	&dev_attr_page.attr,
1088 	&dev_attr_col.attr,
1089 	NULL
1090 };
1091 
1092 static struct attribute_group addrmatch_grp = {
1093 	.attrs	= i7core_addrmatch_attrs,
1094 };
1095 
1096 static const struct attribute_group *addrmatch_groups[] = {
1097 	&addrmatch_grp,
1098 	NULL
1099 };
1100 
1101 static void addrmatch_release(struct device *device)
1102 {
1103 	edac_dbg(1, "Releasing device %s\n", dev_name(device));
1104 	kfree(device);
1105 }
1106 
1107 static struct device_type addrmatch_type = {
1108 	.groups		= addrmatch_groups,
1109 	.release	= addrmatch_release,
1110 };
1111 
1112 /*
1113  * all_channel_counts sysfs struct
1114  */
1115 
1116 static struct attribute *i7core_udimm_counters_attrs[] = {
1117 	&dev_attr_udimm0.attr,
1118 	&dev_attr_udimm1.attr,
1119 	&dev_attr_udimm2.attr,
1120 	NULL
1121 };
1122 
1123 static struct attribute_group all_channel_counts_grp = {
1124 	.attrs	= i7core_udimm_counters_attrs,
1125 };
1126 
1127 static const struct attribute_group *all_channel_counts_groups[] = {
1128 	&all_channel_counts_grp,
1129 	NULL
1130 };
1131 
1132 static void all_channel_counts_release(struct device *device)
1133 {
1134 	edac_dbg(1, "Releasing device %s\n", dev_name(device));
1135 	kfree(device);
1136 }
1137 
1138 static struct device_type all_channel_counts_type = {
1139 	.groups		= all_channel_counts_groups,
1140 	.release	= all_channel_counts_release,
1141 };
1142 
1143 /*
1144  * inject sysfs attributes
1145  */
1146 
1147 static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
1148 		   i7core_inject_section_show, i7core_inject_section_store);
1149 
1150 static DEVICE_ATTR(inject_type, S_IRUGO | S_IWUSR,
1151 		   i7core_inject_type_show, i7core_inject_type_store);
1152 
1153 
1154 static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR,
1155 		   i7core_inject_eccmask_show, i7core_inject_eccmask_store);
1156 
1157 static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR,
1158 		   i7core_inject_enable_show, i7core_inject_enable_store);
1159 
1160 static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
1161 {
1162 	struct i7core_pvt *pvt = mci->pvt_info;
1163 	int rc;
1164 
1165 	rc = device_create_file(&mci->dev, &dev_attr_inject_section);
1166 	if (rc < 0)
1167 		return rc;
1168 	rc = device_create_file(&mci->dev, &dev_attr_inject_type);
1169 	if (rc < 0)
1170 		return rc;
1171 	rc = device_create_file(&mci->dev, &dev_attr_inject_eccmask);
1172 	if (rc < 0)
1173 		return rc;
1174 	rc = device_create_file(&mci->dev, &dev_attr_inject_enable);
1175 	if (rc < 0)
1176 		return rc;
1177 
1178 	pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL);
1179 	if (!pvt->addrmatch_dev)
1180 		return -ENOMEM;
1181 
1182 	pvt->addrmatch_dev->type = &addrmatch_type;
1183 	pvt->addrmatch_dev->bus = mci->dev.bus;
1184 	device_initialize(pvt->addrmatch_dev);
1185 	pvt->addrmatch_dev->parent = &mci->dev;
1186 	dev_set_name(pvt->addrmatch_dev, "inject_addrmatch");
1187 	dev_set_drvdata(pvt->addrmatch_dev, mci);
1188 
1189 	edac_dbg(1, "creating %s\n", dev_name(pvt->addrmatch_dev));
1190 
1191 	rc = device_add(pvt->addrmatch_dev);
1192 	if (rc < 0)
1193 		return rc;
1194 
1195 	if (!pvt->is_registered) {
1196 		pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev),
1197 					      GFP_KERNEL);
1198 		if (!pvt->chancounts_dev) {
1199 			put_device(pvt->addrmatch_dev);
1200 			device_del(pvt->addrmatch_dev);
1201 			return -ENOMEM;
1202 		}
1203 
1204 		pvt->chancounts_dev->type = &all_channel_counts_type;
1205 		pvt->chancounts_dev->bus = mci->dev.bus;
1206 		device_initialize(pvt->chancounts_dev);
1207 		pvt->chancounts_dev->parent = &mci->dev;
1208 		dev_set_name(pvt->chancounts_dev, "all_channel_counts");
1209 		dev_set_drvdata(pvt->chancounts_dev, mci);
1210 
1211 		edac_dbg(1, "creating %s\n", dev_name(pvt->chancounts_dev));
1212 
1213 		rc = device_add(pvt->chancounts_dev);
1214 		if (rc < 0)
1215 			return rc;
1216 	}
1217 	return 0;
1218 }
1219 
1220 static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci)
1221 {
1222 	struct i7core_pvt *pvt = mci->pvt_info;
1223 
1224 	edac_dbg(1, "\n");
1225 
1226 	device_remove_file(&mci->dev, &dev_attr_inject_section);
1227 	device_remove_file(&mci->dev, &dev_attr_inject_type);
1228 	device_remove_file(&mci->dev, &dev_attr_inject_eccmask);
1229 	device_remove_file(&mci->dev, &dev_attr_inject_enable);
1230 
1231 	if (!pvt->is_registered) {
1232 		put_device(pvt->chancounts_dev);
1233 		device_del(pvt->chancounts_dev);
1234 	}
1235 	put_device(pvt->addrmatch_dev);
1236 	device_del(pvt->addrmatch_dev);
1237 }
1238 
1239 /****************************************************************************
1240 	Device initialization routines: put/get, init/exit
1241  ****************************************************************************/
1242 
1243 /*
1244  *	i7core_put_all_devices	'put' all the devices that we have
1245  *				reserved via 'get'
1246  */
1247 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1248 {
1249 	int i;
1250 
1251 	edac_dbg(0, "\n");
1252 	for (i = 0; i < i7core_dev->n_devs; i++) {
1253 		struct pci_dev *pdev = i7core_dev->pdev[i];
1254 		if (!pdev)
1255 			continue;
1256 		edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1257 			 pdev->bus->number,
1258 			 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1259 		pci_dev_put(pdev);
1260 	}
1261 }
1262 
1263 static void i7core_put_all_devices(void)
1264 {
1265 	struct i7core_dev *i7core_dev, *tmp;
1266 
1267 	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1268 		i7core_put_devices(i7core_dev);
1269 		free_i7core_dev(i7core_dev);
1270 	}
1271 }
1272 
1273 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1274 {
1275 	struct pci_dev *pdev = NULL;
1276 	int i;
1277 
1278 	/*
1279 	 * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1280 	 * aren't announced by acpi. So, we need to use a legacy scan probing
1281 	 * to detect them
1282 	 */
1283 	while (table && table->descr) {
1284 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1285 		if (unlikely(!pdev)) {
1286 			for (i = 0; i < MAX_SOCKET_BUSES; i++)
1287 				pcibios_scan_specific_bus(255-i);
1288 		}
1289 		pci_dev_put(pdev);
1290 		table++;
1291 	}
1292 }
1293 
1294 static unsigned i7core_pci_lastbus(void)
1295 {
1296 	int last_bus = 0, bus;
1297 	struct pci_bus *b = NULL;
1298 
1299 	while ((b = pci_find_next_bus(b)) != NULL) {
1300 		bus = b->number;
1301 		edac_dbg(0, "Found bus %d\n", bus);
1302 		if (bus > last_bus)
1303 			last_bus = bus;
1304 	}
1305 
1306 	edac_dbg(0, "Last bus %d\n", last_bus);
1307 
1308 	return last_bus;
1309 }
1310 
1311 /*
1312  *	i7core_get_all_devices	Find and perform 'get' operation on the MCH's
1313  *			device/functions we want to reference for this driver
1314  *
1315  *			Need to 'get' device 16 func 1 and func 2
1316  */
1317 static int i7core_get_onedevice(struct pci_dev **prev,
1318 				const struct pci_id_table *table,
1319 				const unsigned devno,
1320 				const unsigned last_bus)
1321 {
1322 	struct i7core_dev *i7core_dev;
1323 	const struct pci_id_descr *dev_descr = &table->descr[devno];
1324 
1325 	struct pci_dev *pdev = NULL;
1326 	u8 bus = 0;
1327 	u8 socket = 0;
1328 
1329 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1330 			      dev_descr->dev_id, *prev);
1331 
1332 	/*
1333 	 * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
1334 	 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1335 	 * to probe for the alternate address in case of failure
1336 	 */
1337 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) {
1338 		pci_dev_get(*prev);	/* pci_get_device will put it */
1339 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1340 				      PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1341 	}
1342 
1343 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE &&
1344 	    !pdev) {
1345 		pci_dev_get(*prev);	/* pci_get_device will put it */
1346 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1347 				      PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1348 				      *prev);
1349 	}
1350 
1351 	if (!pdev) {
1352 		if (*prev) {
1353 			*prev = pdev;
1354 			return 0;
1355 		}
1356 
1357 		if (dev_descr->optional)
1358 			return 0;
1359 
1360 		if (devno == 0)
1361 			return -ENODEV;
1362 
1363 		i7core_printk(KERN_INFO,
1364 			"Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1365 			dev_descr->dev, dev_descr->func,
1366 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1367 
1368 		/* End of list, leave */
1369 		return -ENODEV;
1370 	}
1371 	bus = pdev->bus->number;
1372 
1373 	socket = last_bus - bus;
1374 
1375 	i7core_dev = get_i7core_dev(socket);
1376 	if (!i7core_dev) {
1377 		i7core_dev = alloc_i7core_dev(socket, table);
1378 		if (!i7core_dev) {
1379 			pci_dev_put(pdev);
1380 			return -ENOMEM;
1381 		}
1382 	}
1383 
1384 	if (i7core_dev->pdev[devno]) {
1385 		i7core_printk(KERN_ERR,
1386 			"Duplicated device for "
1387 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1388 			bus, dev_descr->dev, dev_descr->func,
1389 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1390 		pci_dev_put(pdev);
1391 		return -ENODEV;
1392 	}
1393 
1394 	i7core_dev->pdev[devno] = pdev;
1395 
1396 	/* Sanity check */
1397 	if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1398 			PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1399 		i7core_printk(KERN_ERR,
1400 			"Device PCI ID %04x:%04x "
1401 			"has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1402 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1403 			bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1404 			bus, dev_descr->dev, dev_descr->func);
1405 		return -ENODEV;
1406 	}
1407 
1408 	/* Be sure that the device is enabled */
1409 	if (unlikely(pci_enable_device(pdev) < 0)) {
1410 		i7core_printk(KERN_ERR,
1411 			"Couldn't enable "
1412 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1413 			bus, dev_descr->dev, dev_descr->func,
1414 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1415 		return -ENODEV;
1416 	}
1417 
1418 	edac_dbg(0, "Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1419 		 socket, bus, dev_descr->dev,
1420 		 dev_descr->func,
1421 		 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1422 
1423 	/*
1424 	 * As stated on drivers/pci/search.c, the reference count for
1425 	 * @from is always decremented if it is not %NULL. So, as we need
1426 	 * to get all devices up to null, we need to do a get for the device
1427 	 */
1428 	pci_dev_get(pdev);
1429 
1430 	*prev = pdev;
1431 
1432 	return 0;
1433 }
1434 
1435 static int i7core_get_all_devices(void)
1436 {
1437 	int i, rc, last_bus;
1438 	struct pci_dev *pdev = NULL;
1439 	const struct pci_id_table *table = pci_dev_table;
1440 
1441 	last_bus = i7core_pci_lastbus();
1442 
1443 	while (table && table->descr) {
1444 		for (i = 0; i < table->n_devs; i++) {
1445 			pdev = NULL;
1446 			do {
1447 				rc = i7core_get_onedevice(&pdev, table, i,
1448 							  last_bus);
1449 				if (rc < 0) {
1450 					if (i == 0) {
1451 						i = table->n_devs;
1452 						break;
1453 					}
1454 					i7core_put_all_devices();
1455 					return -ENODEV;
1456 				}
1457 			} while (pdev);
1458 		}
1459 		table++;
1460 	}
1461 
1462 	return 0;
1463 }
1464 
1465 static int mci_bind_devs(struct mem_ctl_info *mci,
1466 			 struct i7core_dev *i7core_dev)
1467 {
1468 	struct i7core_pvt *pvt = mci->pvt_info;
1469 	struct pci_dev *pdev;
1470 	int i, func, slot;
1471 	char *family;
1472 
1473 	pvt->is_registered = false;
1474 	pvt->enable_scrub  = false;
1475 	for (i = 0; i < i7core_dev->n_devs; i++) {
1476 		pdev = i7core_dev->pdev[i];
1477 		if (!pdev)
1478 			continue;
1479 
1480 		func = PCI_FUNC(pdev->devfn);
1481 		slot = PCI_SLOT(pdev->devfn);
1482 		if (slot == 3) {
1483 			if (unlikely(func > MAX_MCR_FUNC))
1484 				goto error;
1485 			pvt->pci_mcr[func] = pdev;
1486 		} else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1487 			if (unlikely(func > MAX_CHAN_FUNC))
1488 				goto error;
1489 			pvt->pci_ch[slot - 4][func] = pdev;
1490 		} else if (!slot && !func) {
1491 			pvt->pci_noncore = pdev;
1492 
1493 			/* Detect the processor family */
1494 			switch (pdev->device) {
1495 			case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1496 				family = "Xeon 35xx/ i7core";
1497 				pvt->enable_scrub = false;
1498 				break;
1499 			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1500 				family = "i7-800/i5-700";
1501 				pvt->enable_scrub = false;
1502 				break;
1503 			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1504 				family = "Xeon 34xx";
1505 				pvt->enable_scrub = false;
1506 				break;
1507 			case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1508 				family = "Xeon 55xx";
1509 				pvt->enable_scrub = true;
1510 				break;
1511 			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1512 				family = "Xeon 56xx / i7-900";
1513 				pvt->enable_scrub = true;
1514 				break;
1515 			default:
1516 				family = "unknown";
1517 				pvt->enable_scrub = false;
1518 			}
1519 			edac_dbg(0, "Detected a processor type %s\n", family);
1520 		} else
1521 			goto error;
1522 
1523 		edac_dbg(0, "Associated fn %d.%d, dev = %p, socket %d\n",
1524 			 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1525 			 pdev, i7core_dev->socket);
1526 
1527 		if (PCI_SLOT(pdev->devfn) == 3 &&
1528 			PCI_FUNC(pdev->devfn) == 2)
1529 			pvt->is_registered = true;
1530 	}
1531 
1532 	return 0;
1533 
1534 error:
1535 	i7core_printk(KERN_ERR, "Device %d, function %d "
1536 		      "is out of the expected range\n",
1537 		      slot, func);
1538 	return -EINVAL;
1539 }
1540 
1541 /****************************************************************************
1542 			Error check routines
1543  ****************************************************************************/
1544 
1545 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1546 					 const int chan,
1547 					 const int new0,
1548 					 const int new1,
1549 					 const int new2)
1550 {
1551 	struct i7core_pvt *pvt = mci->pvt_info;
1552 	int add0 = 0, add1 = 0, add2 = 0;
1553 	/* Updates CE counters if it is not the first time here */
1554 	if (pvt->ce_count_available) {
1555 		/* Updates CE counters */
1556 
1557 		add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1558 		add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1559 		add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1560 
1561 		if (add2 < 0)
1562 			add2 += 0x7fff;
1563 		pvt->rdimm_ce_count[chan][2] += add2;
1564 
1565 		if (add1 < 0)
1566 			add1 += 0x7fff;
1567 		pvt->rdimm_ce_count[chan][1] += add1;
1568 
1569 		if (add0 < 0)
1570 			add0 += 0x7fff;
1571 		pvt->rdimm_ce_count[chan][0] += add0;
1572 	} else
1573 		pvt->ce_count_available = 1;
1574 
1575 	/* Store the new values */
1576 	pvt->rdimm_last_ce_count[chan][2] = new2;
1577 	pvt->rdimm_last_ce_count[chan][1] = new1;
1578 	pvt->rdimm_last_ce_count[chan][0] = new0;
1579 
1580 	/*updated the edac core */
1581 	if (add0 != 0)
1582 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add0,
1583 				     0, 0, 0,
1584 				     chan, 0, -1, "error", "");
1585 	if (add1 != 0)
1586 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add1,
1587 				     0, 0, 0,
1588 				     chan, 1, -1, "error", "");
1589 	if (add2 != 0)
1590 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add2,
1591 				     0, 0, 0,
1592 				     chan, 2, -1, "error", "");
1593 }
1594 
1595 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1596 {
1597 	struct i7core_pvt *pvt = mci->pvt_info;
1598 	u32 rcv[3][2];
1599 	int i, new0, new1, new2;
1600 
1601 	/*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1602 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1603 								&rcv[0][0]);
1604 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1605 								&rcv[0][1]);
1606 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1607 								&rcv[1][0]);
1608 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1609 								&rcv[1][1]);
1610 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1611 								&rcv[2][0]);
1612 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1613 								&rcv[2][1]);
1614 	for (i = 0 ; i < 3; i++) {
1615 		edac_dbg(3, "MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1616 			 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1617 		/*if the channel has 3 dimms*/
1618 		if (pvt->channel[i].dimms > 2) {
1619 			new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1620 			new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1621 			new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1622 		} else {
1623 			new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1624 					DIMM_BOT_COR_ERR(rcv[i][0]);
1625 			new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1626 					DIMM_BOT_COR_ERR(rcv[i][1]);
1627 			new2 = 0;
1628 		}
1629 
1630 		i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1631 	}
1632 }
1633 
1634 /* This function is based on the device 3 function 4 registers as described on:
1635  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1636  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1637  * also available at:
1638  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1639  */
1640 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1641 {
1642 	struct i7core_pvt *pvt = mci->pvt_info;
1643 	u32 rcv1, rcv0;
1644 	int new0, new1, new2;
1645 
1646 	if (!pvt->pci_mcr[4]) {
1647 		edac_dbg(0, "MCR registers not found\n");
1648 		return;
1649 	}
1650 
1651 	/* Corrected test errors */
1652 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1653 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1654 
1655 	/* Store the new values */
1656 	new2 = DIMM2_COR_ERR(rcv1);
1657 	new1 = DIMM1_COR_ERR(rcv0);
1658 	new0 = DIMM0_COR_ERR(rcv0);
1659 
1660 	/* Updates CE counters if it is not the first time here */
1661 	if (pvt->ce_count_available) {
1662 		/* Updates CE counters */
1663 		int add0, add1, add2;
1664 
1665 		add2 = new2 - pvt->udimm_last_ce_count[2];
1666 		add1 = new1 - pvt->udimm_last_ce_count[1];
1667 		add0 = new0 - pvt->udimm_last_ce_count[0];
1668 
1669 		if (add2 < 0)
1670 			add2 += 0x7fff;
1671 		pvt->udimm_ce_count[2] += add2;
1672 
1673 		if (add1 < 0)
1674 			add1 += 0x7fff;
1675 		pvt->udimm_ce_count[1] += add1;
1676 
1677 		if (add0 < 0)
1678 			add0 += 0x7fff;
1679 		pvt->udimm_ce_count[0] += add0;
1680 
1681 		if (add0 | add1 | add2)
1682 			i7core_printk(KERN_ERR, "New Corrected error(s): "
1683 				      "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1684 				      add0, add1, add2);
1685 	} else
1686 		pvt->ce_count_available = 1;
1687 
1688 	/* Store the new values */
1689 	pvt->udimm_last_ce_count[2] = new2;
1690 	pvt->udimm_last_ce_count[1] = new1;
1691 	pvt->udimm_last_ce_count[0] = new0;
1692 }
1693 
1694 /*
1695  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1696  * Architectures Software Developer’s Manual Volume 3B.
1697  * Nehalem are defined as family 0x06, model 0x1a
1698  *
1699  * The MCA registers used here are the following ones:
1700  *     struct mce field	MCA Register
1701  *     m->status	MSR_IA32_MC8_STATUS
1702  *     m->addr		MSR_IA32_MC8_ADDR
1703  *     m->misc		MSR_IA32_MC8_MISC
1704  * In the case of Nehalem, the error information is masked at .status and .misc
1705  * fields
1706  */
1707 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1708 				    const struct mce *m)
1709 {
1710 	struct i7core_pvt *pvt = mci->pvt_info;
1711 	char *optype, *err;
1712 	enum hw_event_mc_err_type tp_event;
1713 	unsigned long error = m->status & 0x1ff0000l;
1714 	bool uncorrected_error = m->mcgstatus & 1ll << 61;
1715 	bool ripv = m->mcgstatus & 1;
1716 	u32 optypenum = (m->status >> 4) & 0x07;
1717 	u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1718 	u32 dimm = (m->misc >> 16) & 0x3;
1719 	u32 channel = (m->misc >> 18) & 0x3;
1720 	u32 syndrome = m->misc >> 32;
1721 	u32 errnum = find_first_bit(&error, 32);
1722 
1723 	if (uncorrected_error) {
1724 		if (ripv)
1725 			tp_event = HW_EVENT_ERR_FATAL;
1726 		else
1727 			tp_event = HW_EVENT_ERR_UNCORRECTED;
1728 	} else {
1729 		tp_event = HW_EVENT_ERR_CORRECTED;
1730 	}
1731 
1732 	switch (optypenum) {
1733 	case 0:
1734 		optype = "generic undef request";
1735 		break;
1736 	case 1:
1737 		optype = "read error";
1738 		break;
1739 	case 2:
1740 		optype = "write error";
1741 		break;
1742 	case 3:
1743 		optype = "addr/cmd error";
1744 		break;
1745 	case 4:
1746 		optype = "scrubbing error";
1747 		break;
1748 	default:
1749 		optype = "reserved";
1750 		break;
1751 	}
1752 
1753 	switch (errnum) {
1754 	case 16:
1755 		err = "read ECC error";
1756 		break;
1757 	case 17:
1758 		err = "RAS ECC error";
1759 		break;
1760 	case 18:
1761 		err = "write parity error";
1762 		break;
1763 	case 19:
1764 		err = "redundacy loss";
1765 		break;
1766 	case 20:
1767 		err = "reserved";
1768 		break;
1769 	case 21:
1770 		err = "memory range error";
1771 		break;
1772 	case 22:
1773 		err = "RTID out of range";
1774 		break;
1775 	case 23:
1776 		err = "address parity error";
1777 		break;
1778 	case 24:
1779 		err = "byte enable parity error";
1780 		break;
1781 	default:
1782 		err = "unknown";
1783 	}
1784 
1785 	/*
1786 	 * Call the helper to output message
1787 	 * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1788 	 * only one event
1789 	 */
1790 	if (uncorrected_error || !pvt->is_registered)
1791 		edac_mc_handle_error(tp_event, mci, core_err_cnt,
1792 				     m->addr >> PAGE_SHIFT,
1793 				     m->addr & ~PAGE_MASK,
1794 				     syndrome,
1795 				     channel, dimm, -1,
1796 				     err, optype);
1797 }
1798 
1799 /*
1800  *	i7core_check_error	Retrieve and process errors reported by the
1801  *				hardware. Called by the Core module.
1802  */
1803 static void i7core_check_error(struct mem_ctl_info *mci)
1804 {
1805 	struct i7core_pvt *pvt = mci->pvt_info;
1806 	int i;
1807 	unsigned count = 0;
1808 	struct mce *m;
1809 
1810 	/*
1811 	 * MCE first step: Copy all mce errors into a temporary buffer
1812 	 * We use a double buffering here, to reduce the risk of
1813 	 * losing an error.
1814 	 */
1815 	smp_rmb();
1816 	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1817 		% MCE_LOG_LEN;
1818 	if (!count)
1819 		goto check_ce_error;
1820 
1821 	m = pvt->mce_outentry;
1822 	if (pvt->mce_in + count > MCE_LOG_LEN) {
1823 		unsigned l = MCE_LOG_LEN - pvt->mce_in;
1824 
1825 		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1826 		smp_wmb();
1827 		pvt->mce_in = 0;
1828 		count -= l;
1829 		m += l;
1830 	}
1831 	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1832 	smp_wmb();
1833 	pvt->mce_in += count;
1834 
1835 	smp_rmb();
1836 	if (pvt->mce_overrun) {
1837 		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1838 			      pvt->mce_overrun);
1839 		smp_wmb();
1840 		pvt->mce_overrun = 0;
1841 	}
1842 
1843 	/*
1844 	 * MCE second step: parse errors and display
1845 	 */
1846 	for (i = 0; i < count; i++)
1847 		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1848 
1849 	/*
1850 	 * Now, let's increment CE error counts
1851 	 */
1852 check_ce_error:
1853 	if (!pvt->is_registered)
1854 		i7core_udimm_check_mc_ecc_err(mci);
1855 	else
1856 		i7core_rdimm_check_mc_ecc_err(mci);
1857 }
1858 
1859 /*
1860  * i7core_mce_check_error	Replicates mcelog routine to get errors
1861  *				This routine simply queues mcelog errors, and
1862  *				return. The error itself should be handled later
1863  *				by i7core_check_error.
1864  * WARNING: As this routine should be called at NMI time, extra care should
1865  * be taken to avoid deadlocks, and to be as fast as possible.
1866  */
1867 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1868 				  void *data)
1869 {
1870 	struct mce *mce = (struct mce *)data;
1871 	struct i7core_dev *i7_dev;
1872 	struct mem_ctl_info *mci;
1873 	struct i7core_pvt *pvt;
1874 
1875 	i7_dev = get_i7core_dev(mce->socketid);
1876 	if (!i7_dev)
1877 		return NOTIFY_BAD;
1878 
1879 	mci = i7_dev->mci;
1880 	pvt = mci->pvt_info;
1881 
1882 	/*
1883 	 * Just let mcelog handle it if the error is
1884 	 * outside the memory controller
1885 	 */
1886 	if (((mce->status & 0xffff) >> 7) != 1)
1887 		return NOTIFY_DONE;
1888 
1889 	/* Bank 8 registers are the only ones that we know how to handle */
1890 	if (mce->bank != 8)
1891 		return NOTIFY_DONE;
1892 
1893 	smp_rmb();
1894 	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1895 		smp_wmb();
1896 		pvt->mce_overrun++;
1897 		return NOTIFY_DONE;
1898 	}
1899 
1900 	/* Copy memory error at the ringbuffer */
1901 	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1902 	smp_wmb();
1903 	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1904 
1905 	/* Handle fatal errors immediately */
1906 	if (mce->mcgstatus & 1)
1907 		i7core_check_error(mci);
1908 
1909 	/* Advise mcelog that the errors were handled */
1910 	return NOTIFY_STOP;
1911 }
1912 
1913 static struct notifier_block i7_mce_dec = {
1914 	.notifier_call	= i7core_mce_check_error,
1915 };
1916 
1917 struct memdev_dmi_entry {
1918 	u8 type;
1919 	u8 length;
1920 	u16 handle;
1921 	u16 phys_mem_array_handle;
1922 	u16 mem_err_info_handle;
1923 	u16 total_width;
1924 	u16 data_width;
1925 	u16 size;
1926 	u8 form;
1927 	u8 device_set;
1928 	u8 device_locator;
1929 	u8 bank_locator;
1930 	u8 memory_type;
1931 	u16 type_detail;
1932 	u16 speed;
1933 	u8 manufacturer;
1934 	u8 serial_number;
1935 	u8 asset_tag;
1936 	u8 part_number;
1937 	u8 attributes;
1938 	u32 extended_size;
1939 	u16 conf_mem_clk_speed;
1940 } __attribute__((__packed__));
1941 
1942 
1943 /*
1944  * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1945  * memory devices show the same speed, and if they don't then consider
1946  * all speeds to be invalid.
1947  */
1948 static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1949 {
1950 	int *dclk_freq = _dclk_freq;
1951 	u16 dmi_mem_clk_speed;
1952 
1953 	if (*dclk_freq == -1)
1954 		return;
1955 
1956 	if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1957 		struct memdev_dmi_entry *memdev_dmi_entry =
1958 			(struct memdev_dmi_entry *)dh;
1959 		unsigned long conf_mem_clk_speed_offset =
1960 			(unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1961 			(unsigned long)&memdev_dmi_entry->type;
1962 		unsigned long speed_offset =
1963 			(unsigned long)&memdev_dmi_entry->speed -
1964 			(unsigned long)&memdev_dmi_entry->type;
1965 
1966 		/* Check that a DIMM is present */
1967 		if (memdev_dmi_entry->size == 0)
1968 			return;
1969 
1970 		/*
1971 		 * Pick the configured speed if it's available, otherwise
1972 		 * pick the DIMM speed, or we don't have a speed.
1973 		 */
1974 		if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1975 			dmi_mem_clk_speed =
1976 				memdev_dmi_entry->conf_mem_clk_speed;
1977 		} else if (memdev_dmi_entry->length > speed_offset) {
1978 			dmi_mem_clk_speed = memdev_dmi_entry->speed;
1979 		} else {
1980 			*dclk_freq = -1;
1981 			return;
1982 		}
1983 
1984 		if (*dclk_freq == 0) {
1985 			/* First pass, speed was 0 */
1986 			if (dmi_mem_clk_speed > 0) {
1987 				/* Set speed if a valid speed is read */
1988 				*dclk_freq = dmi_mem_clk_speed;
1989 			} else {
1990 				/* Otherwise we don't have a valid speed */
1991 				*dclk_freq = -1;
1992 			}
1993 		} else if (*dclk_freq > 0 &&
1994 			   *dclk_freq != dmi_mem_clk_speed) {
1995 			/*
1996 			 * If we have a speed, check that all DIMMS are the same
1997 			 * speed, otherwise set the speed as invalid.
1998 			 */
1999 			*dclk_freq = -1;
2000 		}
2001 	}
2002 }
2003 
2004 /*
2005  * The default DCLK frequency is used as a fallback if we
2006  * fail to find anything reliable in the DMI. The value
2007  * is taken straight from the datasheet.
2008  */
2009 #define DEFAULT_DCLK_FREQ 800
2010 
2011 static int get_dclk_freq(void)
2012 {
2013 	int dclk_freq = 0;
2014 
2015 	dmi_walk(decode_dclk, (void *)&dclk_freq);
2016 
2017 	if (dclk_freq < 1)
2018 		return DEFAULT_DCLK_FREQ;
2019 
2020 	return dclk_freq;
2021 }
2022 
2023 /*
2024  * set_sdram_scrub_rate		This routine sets byte/sec bandwidth scrub rate
2025  *				to hardware according to SCRUBINTERVAL formula
2026  *				found in datasheet.
2027  */
2028 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
2029 {
2030 	struct i7core_pvt *pvt = mci->pvt_info;
2031 	struct pci_dev *pdev;
2032 	u32 dw_scrub;
2033 	u32 dw_ssr;
2034 
2035 	/* Get data from the MC register, function 2 */
2036 	pdev = pvt->pci_mcr[2];
2037 	if (!pdev)
2038 		return -ENODEV;
2039 
2040 	pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
2041 
2042 	if (new_bw == 0) {
2043 		/* Prepare to disable petrol scrub */
2044 		dw_scrub &= ~STARTSCRUB;
2045 		/* Stop the patrol scrub engine */
2046 		write_and_test(pdev, MC_SCRUB_CONTROL,
2047 			       dw_scrub & ~SCRUBINTERVAL_MASK);
2048 
2049 		/* Get current status of scrub rate and set bit to disable */
2050 		pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2051 		dw_ssr &= ~SSR_MODE_MASK;
2052 		dw_ssr |= SSR_MODE_DISABLE;
2053 	} else {
2054 		const int cache_line_size = 64;
2055 		const u32 freq_dclk_mhz = pvt->dclk_freq;
2056 		unsigned long long scrub_interval;
2057 		/*
2058 		 * Translate the desired scrub rate to a register value and
2059 		 * program the corresponding register value.
2060 		 */
2061 		scrub_interval = (unsigned long long)freq_dclk_mhz *
2062 			cache_line_size * 1000000;
2063 		do_div(scrub_interval, new_bw);
2064 
2065 		if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
2066 			return -EINVAL;
2067 
2068 		dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
2069 
2070 		/* Start the patrol scrub engine */
2071 		pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
2072 				       STARTSCRUB | dw_scrub);
2073 
2074 		/* Get current status of scrub rate and set bit to enable */
2075 		pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2076 		dw_ssr &= ~SSR_MODE_MASK;
2077 		dw_ssr |= SSR_MODE_ENABLE;
2078 	}
2079 	/* Disable or enable scrubbing */
2080 	pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2081 
2082 	return new_bw;
2083 }
2084 
2085 /*
2086  * get_sdram_scrub_rate		This routine convert current scrub rate value
2087  *				into byte/sec bandwidth according to
2088  *				SCRUBINTERVAL formula found in datasheet.
2089  */
2090 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2091 {
2092 	struct i7core_pvt *pvt = mci->pvt_info;
2093 	struct pci_dev *pdev;
2094 	const u32 cache_line_size = 64;
2095 	const u32 freq_dclk_mhz = pvt->dclk_freq;
2096 	unsigned long long scrub_rate;
2097 	u32 scrubval;
2098 
2099 	/* Get data from the MC register, function 2 */
2100 	pdev = pvt->pci_mcr[2];
2101 	if (!pdev)
2102 		return -ENODEV;
2103 
2104 	/* Get current scrub control data */
2105 	pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2106 
2107 	/* Mask highest 8-bits to 0 */
2108 	scrubval &=  SCRUBINTERVAL_MASK;
2109 	if (!scrubval)
2110 		return 0;
2111 
2112 	/* Calculate scrub rate value into byte/sec bandwidth */
2113 	scrub_rate =  (unsigned long long)freq_dclk_mhz *
2114 		1000000 * cache_line_size;
2115 	do_div(scrub_rate, scrubval);
2116 	return (int)scrub_rate;
2117 }
2118 
2119 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2120 {
2121 	struct i7core_pvt *pvt = mci->pvt_info;
2122 	u32 pci_lock;
2123 
2124 	/* Unlock writes to pci registers */
2125 	pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2126 	pci_lock &= ~0x3;
2127 	pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2128 			       pci_lock | MC_CFG_UNLOCK);
2129 
2130 	mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2131 	mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2132 }
2133 
2134 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2135 {
2136 	struct i7core_pvt *pvt = mci->pvt_info;
2137 	u32 pci_lock;
2138 
2139 	/* Lock writes to pci registers */
2140 	pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2141 	pci_lock &= ~0x3;
2142 	pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2143 			       pci_lock | MC_CFG_LOCK);
2144 }
2145 
2146 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2147 {
2148 	pvt->i7core_pci = edac_pci_create_generic_ctl(
2149 						&pvt->i7core_dev->pdev[0]->dev,
2150 						EDAC_MOD_STR);
2151 	if (unlikely(!pvt->i7core_pci))
2152 		i7core_printk(KERN_WARNING,
2153 			      "Unable to setup PCI error report via EDAC\n");
2154 }
2155 
2156 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2157 {
2158 	if (likely(pvt->i7core_pci))
2159 		edac_pci_release_generic_ctl(pvt->i7core_pci);
2160 	else
2161 		i7core_printk(KERN_ERR,
2162 				"Couldn't find mem_ctl_info for socket %d\n",
2163 				pvt->i7core_dev->socket);
2164 	pvt->i7core_pci = NULL;
2165 }
2166 
2167 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2168 {
2169 	struct mem_ctl_info *mci = i7core_dev->mci;
2170 	struct i7core_pvt *pvt;
2171 
2172 	if (unlikely(!mci || !mci->pvt_info)) {
2173 		edac_dbg(0, "MC: dev = %p\n", &i7core_dev->pdev[0]->dev);
2174 
2175 		i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2176 		return;
2177 	}
2178 
2179 	pvt = mci->pvt_info;
2180 
2181 	edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2182 
2183 	/* Disable scrubrate setting */
2184 	if (pvt->enable_scrub)
2185 		disable_sdram_scrub_setting(mci);
2186 
2187 	/* Disable EDAC polling */
2188 	i7core_pci_ctl_release(pvt);
2189 
2190 	/* Remove MC sysfs nodes */
2191 	i7core_delete_sysfs_devices(mci);
2192 	edac_mc_del_mc(mci->pdev);
2193 
2194 	edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
2195 	kfree(mci->ctl_name);
2196 	edac_mc_free(mci);
2197 	i7core_dev->mci = NULL;
2198 }
2199 
2200 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2201 {
2202 	struct mem_ctl_info *mci;
2203 	struct i7core_pvt *pvt;
2204 	int rc;
2205 	struct edac_mc_layer layers[2];
2206 
2207 	/* allocate a new MC control structure */
2208 
2209 	layers[0].type = EDAC_MC_LAYER_CHANNEL;
2210 	layers[0].size = NUM_CHANS;
2211 	layers[0].is_virt_csrow = false;
2212 	layers[1].type = EDAC_MC_LAYER_SLOT;
2213 	layers[1].size = MAX_DIMMS;
2214 	layers[1].is_virt_csrow = true;
2215 	mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2216 			    sizeof(*pvt));
2217 	if (unlikely(!mci))
2218 		return -ENOMEM;
2219 
2220 	edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2221 
2222 	pvt = mci->pvt_info;
2223 	memset(pvt, 0, sizeof(*pvt));
2224 
2225 	/* Associates i7core_dev and mci for future usage */
2226 	pvt->i7core_dev = i7core_dev;
2227 	i7core_dev->mci = mci;
2228 
2229 	/*
2230 	 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2231 	 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2232 	 * memory channels
2233 	 */
2234 	mci->mtype_cap = MEM_FLAG_DDR3;
2235 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
2236 	mci->edac_cap = EDAC_FLAG_NONE;
2237 	mci->mod_name = "i7core_edac.c";
2238 	mci->mod_ver = I7CORE_REVISION;
2239 	mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2240 				  i7core_dev->socket);
2241 	mci->dev_name = pci_name(i7core_dev->pdev[0]);
2242 	mci->ctl_page_to_phys = NULL;
2243 
2244 	/* Store pci devices at mci for faster access */
2245 	rc = mci_bind_devs(mci, i7core_dev);
2246 	if (unlikely(rc < 0))
2247 		goto fail0;
2248 
2249 
2250 	/* Get dimm basic config */
2251 	get_dimm_config(mci);
2252 	/* record ptr to the generic device */
2253 	mci->pdev = &i7core_dev->pdev[0]->dev;
2254 	/* Set the function pointer to an actual operation function */
2255 	mci->edac_check = i7core_check_error;
2256 
2257 	/* Enable scrubrate setting */
2258 	if (pvt->enable_scrub)
2259 		enable_sdram_scrub_setting(mci);
2260 
2261 	/* add this new MC control structure to EDAC's list of MCs */
2262 	if (unlikely(edac_mc_add_mc(mci))) {
2263 		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
2264 		/* FIXME: perhaps some code should go here that disables error
2265 		 * reporting if we just enabled it
2266 		 */
2267 
2268 		rc = -EINVAL;
2269 		goto fail0;
2270 	}
2271 	if (i7core_create_sysfs_devices(mci)) {
2272 		edac_dbg(0, "MC: failed to create sysfs nodes\n");
2273 		edac_mc_del_mc(mci->pdev);
2274 		rc = -EINVAL;
2275 		goto fail0;
2276 	}
2277 
2278 	/* Default error mask is any memory */
2279 	pvt->inject.channel = 0;
2280 	pvt->inject.dimm = -1;
2281 	pvt->inject.rank = -1;
2282 	pvt->inject.bank = -1;
2283 	pvt->inject.page = -1;
2284 	pvt->inject.col = -1;
2285 
2286 	/* allocating generic PCI control info */
2287 	i7core_pci_ctl_create(pvt);
2288 
2289 	/* DCLK for scrub rate setting */
2290 	pvt->dclk_freq = get_dclk_freq();
2291 
2292 	return 0;
2293 
2294 fail0:
2295 	kfree(mci->ctl_name);
2296 	edac_mc_free(mci);
2297 	i7core_dev->mci = NULL;
2298 	return rc;
2299 }
2300 
2301 /*
2302  *	i7core_probe	Probe for ONE instance of device to see if it is
2303  *			present.
2304  *	return:
2305  *		0 for FOUND a device
2306  *		< 0 for error code
2307  */
2308 
2309 static int i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2310 {
2311 	int rc, count = 0;
2312 	struct i7core_dev *i7core_dev;
2313 
2314 	/* get the pci devices we want to reserve for our use */
2315 	mutex_lock(&i7core_edac_lock);
2316 
2317 	/*
2318 	 * All memory controllers are allocated at the first pass.
2319 	 */
2320 	if (unlikely(probed >= 1)) {
2321 		mutex_unlock(&i7core_edac_lock);
2322 		return -ENODEV;
2323 	}
2324 	probed++;
2325 
2326 	rc = i7core_get_all_devices();
2327 	if (unlikely(rc < 0))
2328 		goto fail0;
2329 
2330 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2331 		count++;
2332 		rc = i7core_register_mci(i7core_dev);
2333 		if (unlikely(rc < 0))
2334 			goto fail1;
2335 	}
2336 
2337 	/*
2338 	 * Nehalem-EX uses a different memory controller. However, as the
2339 	 * memory controller is not visible on some Nehalem/Nehalem-EP, we
2340 	 * need to indirectly probe via a X58 PCI device. The same devices
2341 	 * are found on (some) Nehalem-EX. So, on those machines, the
2342 	 * probe routine needs to return -ENODEV, as the actual Memory
2343 	 * Controller registers won't be detected.
2344 	 */
2345 	if (!count) {
2346 		rc = -ENODEV;
2347 		goto fail1;
2348 	}
2349 
2350 	i7core_printk(KERN_INFO,
2351 		      "Driver loaded, %d memory controller(s) found.\n",
2352 		      count);
2353 
2354 	mutex_unlock(&i7core_edac_lock);
2355 	return 0;
2356 
2357 fail1:
2358 	list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2359 		i7core_unregister_mci(i7core_dev);
2360 
2361 	i7core_put_all_devices();
2362 fail0:
2363 	mutex_unlock(&i7core_edac_lock);
2364 	return rc;
2365 }
2366 
2367 /*
2368  *	i7core_remove	destructor for one instance of device
2369  *
2370  */
2371 static void i7core_remove(struct pci_dev *pdev)
2372 {
2373 	struct i7core_dev *i7core_dev;
2374 
2375 	edac_dbg(0, "\n");
2376 
2377 	/*
2378 	 * we have a trouble here: pdev value for removal will be wrong, since
2379 	 * it will point to the X58 register used to detect that the machine
2380 	 * is a Nehalem or upper design. However, due to the way several PCI
2381 	 * devices are grouped together to provide MC functionality, we need
2382 	 * to use a different method for releasing the devices
2383 	 */
2384 
2385 	mutex_lock(&i7core_edac_lock);
2386 
2387 	if (unlikely(!probed)) {
2388 		mutex_unlock(&i7core_edac_lock);
2389 		return;
2390 	}
2391 
2392 	list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2393 		i7core_unregister_mci(i7core_dev);
2394 
2395 	/* Release PCI resources */
2396 	i7core_put_all_devices();
2397 
2398 	probed--;
2399 
2400 	mutex_unlock(&i7core_edac_lock);
2401 }
2402 
2403 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2404 
2405 /*
2406  *	i7core_driver	pci_driver structure for this module
2407  *
2408  */
2409 static struct pci_driver i7core_driver = {
2410 	.name     = "i7core_edac",
2411 	.probe    = i7core_probe,
2412 	.remove   = i7core_remove,
2413 	.id_table = i7core_pci_tbl,
2414 };
2415 
2416 /*
2417  *	i7core_init		Module entry function
2418  *			Try to initialize this module for its devices
2419  */
2420 static int __init i7core_init(void)
2421 {
2422 	int pci_rc;
2423 
2424 	edac_dbg(2, "\n");
2425 
2426 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
2427 	opstate_init();
2428 
2429 	if (use_pci_fixup)
2430 		i7core_xeon_pci_fixup(pci_dev_table);
2431 
2432 	pci_rc = pci_register_driver(&i7core_driver);
2433 
2434 	if (pci_rc >= 0) {
2435 		mce_register_decode_chain(&i7_mce_dec);
2436 		return 0;
2437 	}
2438 
2439 	i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2440 		      pci_rc);
2441 
2442 	return pci_rc;
2443 }
2444 
2445 /*
2446  *	i7core_exit()	Module exit function
2447  *			Unregister the driver
2448  */
2449 static void __exit i7core_exit(void)
2450 {
2451 	edac_dbg(2, "\n");
2452 	pci_unregister_driver(&i7core_driver);
2453 	mce_unregister_decode_chain(&i7_mce_dec);
2454 }
2455 
2456 module_init(i7core_init);
2457 module_exit(i7core_exit);
2458 
2459 MODULE_LICENSE("GPL");
2460 MODULE_AUTHOR("Mauro Carvalho Chehab");
2461 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2462 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2463 		   I7CORE_REVISION);
2464 
2465 module_param(edac_op_state, int, 0444);
2466 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2467