xref: /openbmc/linux/drivers/edac/i7core_edac.c (revision d0b73b48)
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *	 Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *	http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27 
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/dmi.h>
35 #include <linux/edac.h>
36 #include <linux/mmzone.h>
37 #include <linux/smp.h>
38 #include <asm/mce.h>
39 #include <asm/processor.h>
40 #include <asm/div64.h>
41 
42 #include "edac_core.h"
43 
44 /* Static vars */
45 static LIST_HEAD(i7core_edac_list);
46 static DEFINE_MUTEX(i7core_edac_lock);
47 static int probed;
48 
49 static int use_pci_fixup;
50 module_param(use_pci_fixup, int, 0444);
51 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
52 /*
53  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
54  * registers start at bus 255, and are not reported by BIOS.
55  * We currently find devices with only 2 sockets. In order to support more QPI
56  * Quick Path Interconnect, just increment this number.
57  */
58 #define MAX_SOCKET_BUSES	2
59 
60 
61 /*
62  * Alter this version for the module when modifications are made
63  */
64 #define I7CORE_REVISION    " Ver: 1.0.0"
65 #define EDAC_MOD_STR      "i7core_edac"
66 
67 /*
68  * Debug macros
69  */
70 #define i7core_printk(level, fmt, arg...)			\
71 	edac_printk(level, "i7core", fmt, ##arg)
72 
73 #define i7core_mc_printk(mci, level, fmt, arg...)		\
74 	edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75 
76 /*
77  * i7core Memory Controller Registers
78  */
79 
80 	/* OFFSETS for Device 0 Function 0 */
81 
82 #define MC_CFG_CONTROL	0x90
83   #define MC_CFG_UNLOCK		0x02
84   #define MC_CFG_LOCK		0x00
85 
86 	/* OFFSETS for Device 3 Function 0 */
87 
88 #define MC_CONTROL	0x48
89 #define MC_STATUS	0x4c
90 #define MC_MAX_DOD	0x64
91 
92 /*
93  * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
94  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
95  */
96 
97 #define MC_TEST_ERR_RCV1	0x60
98   #define DIMM2_COR_ERR(r)			((r) & 0x7fff)
99 
100 #define MC_TEST_ERR_RCV0	0x64
101   #define DIMM1_COR_ERR(r)			(((r) >> 16) & 0x7fff)
102   #define DIMM0_COR_ERR(r)			((r) & 0x7fff)
103 
104 /* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
105 #define MC_SSRCONTROL		0x48
106   #define SSR_MODE_DISABLE	0x00
107   #define SSR_MODE_ENABLE	0x01
108   #define SSR_MODE_MASK		0x03
109 
110 #define MC_SCRUB_CONTROL	0x4c
111   #define STARTSCRUB		(1 << 24)
112   #define SCRUBINTERVAL_MASK    0xffffff
113 
114 #define MC_COR_ECC_CNT_0	0x80
115 #define MC_COR_ECC_CNT_1	0x84
116 #define MC_COR_ECC_CNT_2	0x88
117 #define MC_COR_ECC_CNT_3	0x8c
118 #define MC_COR_ECC_CNT_4	0x90
119 #define MC_COR_ECC_CNT_5	0x94
120 
121 #define DIMM_TOP_COR_ERR(r)			(((r) >> 16) & 0x7fff)
122 #define DIMM_BOT_COR_ERR(r)			((r) & 0x7fff)
123 
124 
125 	/* OFFSETS for Devices 4,5 and 6 Function 0 */
126 
127 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
128   #define THREE_DIMMS_PRESENT		(1 << 24)
129   #define SINGLE_QUAD_RANK_PRESENT	(1 << 23)
130   #define QUAD_RANK_PRESENT		(1 << 22)
131   #define REGISTERED_DIMM		(1 << 15)
132 
133 #define MC_CHANNEL_MAPPER	0x60
134   #define RDLCH(r, ch)		((((r) >> (3 + (ch * 6))) & 0x07) - 1)
135   #define WRLCH(r, ch)		((((r) >> (ch * 6)) & 0x07) - 1)
136 
137 #define MC_CHANNEL_RANK_PRESENT 0x7c
138   #define RANK_PRESENT_MASK		0xffff
139 
140 #define MC_CHANNEL_ADDR_MATCH	0xf0
141 #define MC_CHANNEL_ERROR_MASK	0xf8
142 #define MC_CHANNEL_ERROR_INJECT	0xfc
143   #define INJECT_ADDR_PARITY	0x10
144   #define INJECT_ECC		0x08
145   #define MASK_CACHELINE	0x06
146   #define MASK_FULL_CACHELINE	0x06
147   #define MASK_MSB32_CACHELINE	0x04
148   #define MASK_LSB32_CACHELINE	0x02
149   #define NO_MASK_CACHELINE	0x00
150   #define REPEAT_EN		0x01
151 
152 	/* OFFSETS for Devices 4,5 and 6 Function 1 */
153 
154 #define MC_DOD_CH_DIMM0		0x48
155 #define MC_DOD_CH_DIMM1		0x4c
156 #define MC_DOD_CH_DIMM2		0x50
157   #define RANKOFFSET_MASK	((1 << 12) | (1 << 11) | (1 << 10))
158   #define RANKOFFSET(x)		((x & RANKOFFSET_MASK) >> 10)
159   #define DIMM_PRESENT_MASK	(1 << 9)
160   #define DIMM_PRESENT(x)	(((x) & DIMM_PRESENT_MASK) >> 9)
161   #define MC_DOD_NUMBANK_MASK		((1 << 8) | (1 << 7))
162   #define MC_DOD_NUMBANK(x)		(((x) & MC_DOD_NUMBANK_MASK) >> 7)
163   #define MC_DOD_NUMRANK_MASK		((1 << 6) | (1 << 5))
164   #define MC_DOD_NUMRANK(x)		(((x) & MC_DOD_NUMRANK_MASK) >> 5)
165   #define MC_DOD_NUMROW_MASK		((1 << 4) | (1 << 3) | (1 << 2))
166   #define MC_DOD_NUMROW(x)		(((x) & MC_DOD_NUMROW_MASK) >> 2)
167   #define MC_DOD_NUMCOL_MASK		3
168   #define MC_DOD_NUMCOL(x)		((x) & MC_DOD_NUMCOL_MASK)
169 
170 #define MC_RANK_PRESENT		0x7c
171 
172 #define MC_SAG_CH_0	0x80
173 #define MC_SAG_CH_1	0x84
174 #define MC_SAG_CH_2	0x88
175 #define MC_SAG_CH_3	0x8c
176 #define MC_SAG_CH_4	0x90
177 #define MC_SAG_CH_5	0x94
178 #define MC_SAG_CH_6	0x98
179 #define MC_SAG_CH_7	0x9c
180 
181 #define MC_RIR_LIMIT_CH_0	0x40
182 #define MC_RIR_LIMIT_CH_1	0x44
183 #define MC_RIR_LIMIT_CH_2	0x48
184 #define MC_RIR_LIMIT_CH_3	0x4C
185 #define MC_RIR_LIMIT_CH_4	0x50
186 #define MC_RIR_LIMIT_CH_5	0x54
187 #define MC_RIR_LIMIT_CH_6	0x58
188 #define MC_RIR_LIMIT_CH_7	0x5C
189 #define MC_RIR_LIMIT_MASK	((1 << 10) - 1)
190 
191 #define MC_RIR_WAY_CH		0x80
192   #define MC_RIR_WAY_OFFSET_MASK	(((1 << 14) - 1) & ~0x7)
193   #define MC_RIR_WAY_RANK_MASK		0x7
194 
195 /*
196  * i7core structs
197  */
198 
199 #define NUM_CHANS 3
200 #define MAX_DIMMS 3		/* Max DIMMS per channel */
201 #define MAX_MCR_FUNC  4
202 #define MAX_CHAN_FUNC 3
203 
204 struct i7core_info {
205 	u32	mc_control;
206 	u32	mc_status;
207 	u32	max_dod;
208 	u32	ch_map;
209 };
210 
211 
212 struct i7core_inject {
213 	int	enable;
214 
215 	u32	section;
216 	u32	type;
217 	u32	eccmask;
218 
219 	/* Error address mask */
220 	int channel, dimm, rank, bank, page, col;
221 };
222 
223 struct i7core_channel {
224 	bool		is_3dimms_present;
225 	bool		is_single_4rank;
226 	bool		has_4rank;
227 	u32		dimms;
228 };
229 
230 struct pci_id_descr {
231 	int			dev;
232 	int			func;
233 	int 			dev_id;
234 	int			optional;
235 };
236 
237 struct pci_id_table {
238 	const struct pci_id_descr	*descr;
239 	int				n_devs;
240 };
241 
242 struct i7core_dev {
243 	struct list_head	list;
244 	u8			socket;
245 	struct pci_dev		**pdev;
246 	int			n_devs;
247 	struct mem_ctl_info	*mci;
248 };
249 
250 struct i7core_pvt {
251 	struct device *addrmatch_dev, *chancounts_dev;
252 
253 	struct pci_dev	*pci_noncore;
254 	struct pci_dev	*pci_mcr[MAX_MCR_FUNC + 1];
255 	struct pci_dev	*pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
256 
257 	struct i7core_dev *i7core_dev;
258 
259 	struct i7core_info	info;
260 	struct i7core_inject	inject;
261 	struct i7core_channel	channel[NUM_CHANS];
262 
263 	int		ce_count_available;
264 
265 			/* ECC corrected errors counts per udimm */
266 	unsigned long	udimm_ce_count[MAX_DIMMS];
267 	int		udimm_last_ce_count[MAX_DIMMS];
268 			/* ECC corrected errors counts per rdimm */
269 	unsigned long	rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
270 	int		rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
271 
272 	bool		is_registered, enable_scrub;
273 
274 	/* Fifo double buffers */
275 	struct mce		mce_entry[MCE_LOG_LEN];
276 	struct mce		mce_outentry[MCE_LOG_LEN];
277 
278 	/* Fifo in/out counters */
279 	unsigned		mce_in, mce_out;
280 
281 	/* Count indicator to show errors not got */
282 	unsigned		mce_overrun;
283 
284 	/* DCLK Frequency used for computing scrub rate */
285 	int			dclk_freq;
286 
287 	/* Struct to control EDAC polling */
288 	struct edac_pci_ctl_info *i7core_pci;
289 };
290 
291 #define PCI_DESCR(device, function, device_id)	\
292 	.dev = (device),			\
293 	.func = (function),			\
294 	.dev_id = (device_id)
295 
296 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
297 		/* Memory controller */
298 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
299 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
300 			/* Exists only for RDIMM */
301 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
302 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
303 
304 		/* Channel 0 */
305 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
306 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
307 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
308 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
309 
310 		/* Channel 1 */
311 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
312 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
313 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
314 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
315 
316 		/* Channel 2 */
317 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
318 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
319 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
320 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
321 
322 		/* Generic Non-core registers */
323 	/*
324 	 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
325 	 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
326 	 * the probing code needs to test for the other address in case of
327 	 * failure of this one
328 	 */
329 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
330 
331 };
332 
333 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
334 	{ PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
335 	{ PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
336 	{ PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
337 
338 	{ PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
339 	{ PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
340 	{ PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
341 	{ PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
342 
343 	{ PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
344 	{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
345 	{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
346 	{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
347 
348 	/*
349 	 * This is the PCI device has an alternate address on some
350 	 * processors like Core i7 860
351 	 */
352 	{ PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
353 };
354 
355 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
356 		/* Memory controller */
357 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
358 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
359 			/* Exists only for RDIMM */
360 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
361 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
362 
363 		/* Channel 0 */
364 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
365 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
366 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
367 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
368 
369 		/* Channel 1 */
370 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
371 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
372 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
373 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
374 
375 		/* Channel 2 */
376 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
377 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
378 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
379 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
380 
381 		/* Generic Non-core registers */
382 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
383 
384 };
385 
386 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
387 static const struct pci_id_table pci_dev_table[] = {
388 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
389 	PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
390 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
391 	{0,}			/* 0 terminated list. */
392 };
393 
394 /*
395  *	pci_device_id	table for which devices we are looking for
396  */
397 static DEFINE_PCI_DEVICE_TABLE(i7core_pci_tbl) = {
398 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
399 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
400 	{0,}			/* 0 terminated list. */
401 };
402 
403 /****************************************************************************
404 			Ancillary status routines
405  ****************************************************************************/
406 
407 	/* MC_CONTROL bits */
408 #define CH_ACTIVE(pvt, ch)	((pvt)->info.mc_control & (1 << (8 + ch)))
409 #define ECCx8(pvt)		((pvt)->info.mc_control & (1 << 1))
410 
411 	/* MC_STATUS bits */
412 #define ECC_ENABLED(pvt)	((pvt)->info.mc_status & (1 << 4))
413 #define CH_DISABLED(pvt, ch)	((pvt)->info.mc_status & (1 << ch))
414 
415 	/* MC_MAX_DOD read functions */
416 static inline int numdimms(u32 dimms)
417 {
418 	return (dimms & 0x3) + 1;
419 }
420 
421 static inline int numrank(u32 rank)
422 {
423 	static int ranks[4] = { 1, 2, 4, -EINVAL };
424 
425 	return ranks[rank & 0x3];
426 }
427 
428 static inline int numbank(u32 bank)
429 {
430 	static int banks[4] = { 4, 8, 16, -EINVAL };
431 
432 	return banks[bank & 0x3];
433 }
434 
435 static inline int numrow(u32 row)
436 {
437 	static int rows[8] = {
438 		1 << 12, 1 << 13, 1 << 14, 1 << 15,
439 		1 << 16, -EINVAL, -EINVAL, -EINVAL,
440 	};
441 
442 	return rows[row & 0x7];
443 }
444 
445 static inline int numcol(u32 col)
446 {
447 	static int cols[8] = {
448 		1 << 10, 1 << 11, 1 << 12, -EINVAL,
449 	};
450 	return cols[col & 0x3];
451 }
452 
453 static struct i7core_dev *get_i7core_dev(u8 socket)
454 {
455 	struct i7core_dev *i7core_dev;
456 
457 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
458 		if (i7core_dev->socket == socket)
459 			return i7core_dev;
460 	}
461 
462 	return NULL;
463 }
464 
465 static struct i7core_dev *alloc_i7core_dev(u8 socket,
466 					   const struct pci_id_table *table)
467 {
468 	struct i7core_dev *i7core_dev;
469 
470 	i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
471 	if (!i7core_dev)
472 		return NULL;
473 
474 	i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
475 				   GFP_KERNEL);
476 	if (!i7core_dev->pdev) {
477 		kfree(i7core_dev);
478 		return NULL;
479 	}
480 
481 	i7core_dev->socket = socket;
482 	i7core_dev->n_devs = table->n_devs;
483 	list_add_tail(&i7core_dev->list, &i7core_edac_list);
484 
485 	return i7core_dev;
486 }
487 
488 static void free_i7core_dev(struct i7core_dev *i7core_dev)
489 {
490 	list_del(&i7core_dev->list);
491 	kfree(i7core_dev->pdev);
492 	kfree(i7core_dev);
493 }
494 
495 /****************************************************************************
496 			Memory check routines
497  ****************************************************************************/
498 
499 static int get_dimm_config(struct mem_ctl_info *mci)
500 {
501 	struct i7core_pvt *pvt = mci->pvt_info;
502 	struct pci_dev *pdev;
503 	int i, j;
504 	enum edac_type mode;
505 	enum mem_type mtype;
506 	struct dimm_info *dimm;
507 
508 	/* Get data from the MC register, function 0 */
509 	pdev = pvt->pci_mcr[0];
510 	if (!pdev)
511 		return -ENODEV;
512 
513 	/* Device 3 function 0 reads */
514 	pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
515 	pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
516 	pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
517 	pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
518 
519 	edac_dbg(0, "QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
520 		 pvt->i7core_dev->socket, pvt->info.mc_control,
521 		 pvt->info.mc_status, pvt->info.max_dod, pvt->info.ch_map);
522 
523 	if (ECC_ENABLED(pvt)) {
524 		edac_dbg(0, "ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
525 		if (ECCx8(pvt))
526 			mode = EDAC_S8ECD8ED;
527 		else
528 			mode = EDAC_S4ECD4ED;
529 	} else {
530 		edac_dbg(0, "ECC disabled\n");
531 		mode = EDAC_NONE;
532 	}
533 
534 	/* FIXME: need to handle the error codes */
535 	edac_dbg(0, "DOD Max limits: DIMMS: %d, %d-ranked, %d-banked x%x x 0x%x\n",
536 		 numdimms(pvt->info.max_dod),
537 		 numrank(pvt->info.max_dod >> 2),
538 		 numbank(pvt->info.max_dod >> 4),
539 		 numrow(pvt->info.max_dod >> 6),
540 		 numcol(pvt->info.max_dod >> 9));
541 
542 	for (i = 0; i < NUM_CHANS; i++) {
543 		u32 data, dimm_dod[3], value[8];
544 
545 		if (!pvt->pci_ch[i][0])
546 			continue;
547 
548 		if (!CH_ACTIVE(pvt, i)) {
549 			edac_dbg(0, "Channel %i is not active\n", i);
550 			continue;
551 		}
552 		if (CH_DISABLED(pvt, i)) {
553 			edac_dbg(0, "Channel %i is disabled\n", i);
554 			continue;
555 		}
556 
557 		/* Devices 4-6 function 0 */
558 		pci_read_config_dword(pvt->pci_ch[i][0],
559 				MC_CHANNEL_DIMM_INIT_PARAMS, &data);
560 
561 
562 		if (data & THREE_DIMMS_PRESENT)
563 			pvt->channel[i].is_3dimms_present = true;
564 
565 		if (data & SINGLE_QUAD_RANK_PRESENT)
566 			pvt->channel[i].is_single_4rank = true;
567 
568 		if (data & QUAD_RANK_PRESENT)
569 			pvt->channel[i].has_4rank = true;
570 
571 		if (data & REGISTERED_DIMM)
572 			mtype = MEM_RDDR3;
573 		else
574 			mtype = MEM_DDR3;
575 
576 		/* Devices 4-6 function 1 */
577 		pci_read_config_dword(pvt->pci_ch[i][1],
578 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
579 		pci_read_config_dword(pvt->pci_ch[i][1],
580 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
581 		pci_read_config_dword(pvt->pci_ch[i][1],
582 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
583 
584 		edac_dbg(0, "Ch%d phy rd%d, wr%d (0x%08x): %s%s%s%cDIMMs\n",
585 			 i,
586 			 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
587 			 data,
588 			 pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
589 			 pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
590 			 pvt->channel[i].has_4rank ? "HAS_4R " : "",
591 			 (data & REGISTERED_DIMM) ? 'R' : 'U');
592 
593 		for (j = 0; j < 3; j++) {
594 			u32 banks, ranks, rows, cols;
595 			u32 size, npages;
596 
597 			if (!DIMM_PRESENT(dimm_dod[j]))
598 				continue;
599 
600 			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
601 				       i, j, 0);
602 			banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
603 			ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
604 			rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
605 			cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
606 
607 			/* DDR3 has 8 I/O banks */
608 			size = (rows * cols * banks * ranks) >> (20 - 3);
609 
610 			edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
611 				 j, size,
612 				 RANKOFFSET(dimm_dod[j]),
613 				 banks, ranks, rows, cols);
614 
615 			npages = MiB_TO_PAGES(size);
616 
617 			dimm->nr_pages = npages;
618 
619 			switch (banks) {
620 			case 4:
621 				dimm->dtype = DEV_X4;
622 				break;
623 			case 8:
624 				dimm->dtype = DEV_X8;
625 				break;
626 			case 16:
627 				dimm->dtype = DEV_X16;
628 				break;
629 			default:
630 				dimm->dtype = DEV_UNKNOWN;
631 			}
632 
633 			snprintf(dimm->label, sizeof(dimm->label),
634 				 "CPU#%uChannel#%u_DIMM#%u",
635 				 pvt->i7core_dev->socket, i, j);
636 			dimm->grain = 8;
637 			dimm->edac_mode = mode;
638 			dimm->mtype = mtype;
639 		}
640 
641 		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
642 		pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
643 		pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
644 		pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
645 		pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
646 		pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
647 		pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
648 		pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
649 		edac_dbg(1, "\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
650 		for (j = 0; j < 8; j++)
651 			edac_dbg(1, "\t\t%#x\t%#x\t%#x\n",
652 				 (value[j] >> 27) & 0x1,
653 				 (value[j] >> 24) & 0x7,
654 				 (value[j] & ((1 << 24) - 1)));
655 	}
656 
657 	return 0;
658 }
659 
660 /****************************************************************************
661 			Error insertion routines
662  ****************************************************************************/
663 
664 #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
665 
666 /* The i7core has independent error injection features per channel.
667    However, to have a simpler code, we don't allow enabling error injection
668    on more than one channel.
669    Also, since a change at an inject parameter will be applied only at enable,
670    we're disabling error injection on all write calls to the sysfs nodes that
671    controls the error code injection.
672  */
673 static int disable_inject(const struct mem_ctl_info *mci)
674 {
675 	struct i7core_pvt *pvt = mci->pvt_info;
676 
677 	pvt->inject.enable = 0;
678 
679 	if (!pvt->pci_ch[pvt->inject.channel][0])
680 		return -ENODEV;
681 
682 	pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
683 				MC_CHANNEL_ERROR_INJECT, 0);
684 
685 	return 0;
686 }
687 
688 /*
689  * i7core inject inject.section
690  *
691  *	accept and store error injection inject.section value
692  *	bit 0 - refers to the lower 32-byte half cacheline
693  *	bit 1 - refers to the upper 32-byte half cacheline
694  */
695 static ssize_t i7core_inject_section_store(struct device *dev,
696 					   struct device_attribute *mattr,
697 					   const char *data, size_t count)
698 {
699 	struct mem_ctl_info *mci = to_mci(dev);
700 	struct i7core_pvt *pvt = mci->pvt_info;
701 	unsigned long value;
702 	int rc;
703 
704 	if (pvt->inject.enable)
705 		disable_inject(mci);
706 
707 	rc = strict_strtoul(data, 10, &value);
708 	if ((rc < 0) || (value > 3))
709 		return -EIO;
710 
711 	pvt->inject.section = (u32) value;
712 	return count;
713 }
714 
715 static ssize_t i7core_inject_section_show(struct device *dev,
716 					  struct device_attribute *mattr,
717 					  char *data)
718 {
719 	struct mem_ctl_info *mci = to_mci(dev);
720 	struct i7core_pvt *pvt = mci->pvt_info;
721 	return sprintf(data, "0x%08x\n", pvt->inject.section);
722 }
723 
724 /*
725  * i7core inject.type
726  *
727  *	accept and store error injection inject.section value
728  *	bit 0 - repeat enable - Enable error repetition
729  *	bit 1 - inject ECC error
730  *	bit 2 - inject parity error
731  */
732 static ssize_t i7core_inject_type_store(struct device *dev,
733 					struct device_attribute *mattr,
734 					const char *data, size_t count)
735 {
736 	struct mem_ctl_info *mci = to_mci(dev);
737 struct i7core_pvt *pvt = mci->pvt_info;
738 	unsigned long value;
739 	int rc;
740 
741 	if (pvt->inject.enable)
742 		disable_inject(mci);
743 
744 	rc = strict_strtoul(data, 10, &value);
745 	if ((rc < 0) || (value > 7))
746 		return -EIO;
747 
748 	pvt->inject.type = (u32) value;
749 	return count;
750 }
751 
752 static ssize_t i7core_inject_type_show(struct device *dev,
753 				       struct device_attribute *mattr,
754 				       char *data)
755 {
756 	struct mem_ctl_info *mci = to_mci(dev);
757 	struct i7core_pvt *pvt = mci->pvt_info;
758 
759 	return sprintf(data, "0x%08x\n", pvt->inject.type);
760 }
761 
762 /*
763  * i7core_inject_inject.eccmask_store
764  *
765  * The type of error (UE/CE) will depend on the inject.eccmask value:
766  *   Any bits set to a 1 will flip the corresponding ECC bit
767  *   Correctable errors can be injected by flipping 1 bit or the bits within
768  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
769  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
770  *   uncorrectable error to be injected.
771  */
772 static ssize_t i7core_inject_eccmask_store(struct device *dev,
773 					   struct device_attribute *mattr,
774 					   const char *data, size_t count)
775 {
776 	struct mem_ctl_info *mci = to_mci(dev);
777 	struct i7core_pvt *pvt = mci->pvt_info;
778 	unsigned long value;
779 	int rc;
780 
781 	if (pvt->inject.enable)
782 		disable_inject(mci);
783 
784 	rc = strict_strtoul(data, 10, &value);
785 	if (rc < 0)
786 		return -EIO;
787 
788 	pvt->inject.eccmask = (u32) value;
789 	return count;
790 }
791 
792 static ssize_t i7core_inject_eccmask_show(struct device *dev,
793 					  struct device_attribute *mattr,
794 					  char *data)
795 {
796 	struct mem_ctl_info *mci = to_mci(dev);
797 	struct i7core_pvt *pvt = mci->pvt_info;
798 
799 	return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
800 }
801 
802 /*
803  * i7core_addrmatch
804  *
805  * The type of error (UE/CE) will depend on the inject.eccmask value:
806  *   Any bits set to a 1 will flip the corresponding ECC bit
807  *   Correctable errors can be injected by flipping 1 bit or the bits within
808  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
809  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
810  *   uncorrectable error to be injected.
811  */
812 
813 #define DECLARE_ADDR_MATCH(param, limit)			\
814 static ssize_t i7core_inject_store_##param(			\
815 	struct device *dev,					\
816 	struct device_attribute *mattr,				\
817 	const char *data, size_t count)				\
818 {								\
819 	struct mem_ctl_info *mci = dev_get_drvdata(dev);	\
820 	struct i7core_pvt *pvt;					\
821 	long value;						\
822 	int rc;							\
823 								\
824 	edac_dbg(1, "\n");					\
825 	pvt = mci->pvt_info;					\
826 								\
827 	if (pvt->inject.enable)					\
828 		disable_inject(mci);				\
829 								\
830 	if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
831 		value = -1;					\
832 	else {							\
833 		rc = strict_strtoul(data, 10, &value);		\
834 		if ((rc < 0) || (value >= limit))		\
835 			return -EIO;				\
836 	}							\
837 								\
838 	pvt->inject.param = value;				\
839 								\
840 	return count;						\
841 }								\
842 								\
843 static ssize_t i7core_inject_show_##param(			\
844 	struct device *dev,					\
845 	struct device_attribute *mattr,				\
846 	char *data)						\
847 {								\
848 	struct mem_ctl_info *mci = dev_get_drvdata(dev);	\
849 	struct i7core_pvt *pvt;					\
850 								\
851 	pvt = mci->pvt_info;					\
852 	edac_dbg(1, "pvt=%p\n", pvt);				\
853 	if (pvt->inject.param < 0)				\
854 		return sprintf(data, "any\n");			\
855 	else							\
856 		return sprintf(data, "%d\n", pvt->inject.param);\
857 }
858 
859 #define ATTR_ADDR_MATCH(param)					\
860 	static DEVICE_ATTR(param, S_IRUGO | S_IWUSR,		\
861 		    i7core_inject_show_##param,			\
862 		    i7core_inject_store_##param)
863 
864 DECLARE_ADDR_MATCH(channel, 3);
865 DECLARE_ADDR_MATCH(dimm, 3);
866 DECLARE_ADDR_MATCH(rank, 4);
867 DECLARE_ADDR_MATCH(bank, 32);
868 DECLARE_ADDR_MATCH(page, 0x10000);
869 DECLARE_ADDR_MATCH(col, 0x4000);
870 
871 ATTR_ADDR_MATCH(channel);
872 ATTR_ADDR_MATCH(dimm);
873 ATTR_ADDR_MATCH(rank);
874 ATTR_ADDR_MATCH(bank);
875 ATTR_ADDR_MATCH(page);
876 ATTR_ADDR_MATCH(col);
877 
878 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
879 {
880 	u32 read;
881 	int count;
882 
883 	edac_dbg(0, "setting pci %02x:%02x.%x reg=%02x value=%08x\n",
884 		 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
885 		 where, val);
886 
887 	for (count = 0; count < 10; count++) {
888 		if (count)
889 			msleep(100);
890 		pci_write_config_dword(dev, where, val);
891 		pci_read_config_dword(dev, where, &read);
892 
893 		if (read == val)
894 			return 0;
895 	}
896 
897 	i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
898 		"write=%08x. Read=%08x\n",
899 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
900 		where, val, read);
901 
902 	return -EINVAL;
903 }
904 
905 /*
906  * This routine prepares the Memory Controller for error injection.
907  * The error will be injected when some process tries to write to the
908  * memory that matches the given criteria.
909  * The criteria can be set in terms of a mask where dimm, rank, bank, page
910  * and col can be specified.
911  * A -1 value for any of the mask items will make the MCU to ignore
912  * that matching criteria for error injection.
913  *
914  * It should be noticed that the error will only happen after a write operation
915  * on a memory that matches the condition. if REPEAT_EN is not enabled at
916  * inject mask, then it will produce just one error. Otherwise, it will repeat
917  * until the injectmask would be cleaned.
918  *
919  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
920  *    is reliable enough to check if the MC is using the
921  *    three channels. However, this is not clear at the datasheet.
922  */
923 static ssize_t i7core_inject_enable_store(struct device *dev,
924 					  struct device_attribute *mattr,
925 					  const char *data, size_t count)
926 {
927 	struct mem_ctl_info *mci = to_mci(dev);
928 	struct i7core_pvt *pvt = mci->pvt_info;
929 	u32 injectmask;
930 	u64 mask = 0;
931 	int  rc;
932 	long enable;
933 
934 	if (!pvt->pci_ch[pvt->inject.channel][0])
935 		return 0;
936 
937 	rc = strict_strtoul(data, 10, &enable);
938 	if ((rc < 0))
939 		return 0;
940 
941 	if (enable) {
942 		pvt->inject.enable = 1;
943 	} else {
944 		disable_inject(mci);
945 		return count;
946 	}
947 
948 	/* Sets pvt->inject.dimm mask */
949 	if (pvt->inject.dimm < 0)
950 		mask |= 1LL << 41;
951 	else {
952 		if (pvt->channel[pvt->inject.channel].dimms > 2)
953 			mask |= (pvt->inject.dimm & 0x3LL) << 35;
954 		else
955 			mask |= (pvt->inject.dimm & 0x1LL) << 36;
956 	}
957 
958 	/* Sets pvt->inject.rank mask */
959 	if (pvt->inject.rank < 0)
960 		mask |= 1LL << 40;
961 	else {
962 		if (pvt->channel[pvt->inject.channel].dimms > 2)
963 			mask |= (pvt->inject.rank & 0x1LL) << 34;
964 		else
965 			mask |= (pvt->inject.rank & 0x3LL) << 34;
966 	}
967 
968 	/* Sets pvt->inject.bank mask */
969 	if (pvt->inject.bank < 0)
970 		mask |= 1LL << 39;
971 	else
972 		mask |= (pvt->inject.bank & 0x15LL) << 30;
973 
974 	/* Sets pvt->inject.page mask */
975 	if (pvt->inject.page < 0)
976 		mask |= 1LL << 38;
977 	else
978 		mask |= (pvt->inject.page & 0xffff) << 14;
979 
980 	/* Sets pvt->inject.column mask */
981 	if (pvt->inject.col < 0)
982 		mask |= 1LL << 37;
983 	else
984 		mask |= (pvt->inject.col & 0x3fff);
985 
986 	/*
987 	 * bit    0: REPEAT_EN
988 	 * bits 1-2: MASK_HALF_CACHELINE
989 	 * bit    3: INJECT_ECC
990 	 * bit    4: INJECT_ADDR_PARITY
991 	 */
992 
993 	injectmask = (pvt->inject.type & 1) |
994 		     (pvt->inject.section & 0x3) << 1 |
995 		     (pvt->inject.type & 0x6) << (3 - 1);
996 
997 	/* Unlock writes to registers - this register is write only */
998 	pci_write_config_dword(pvt->pci_noncore,
999 			       MC_CFG_CONTROL, 0x2);
1000 
1001 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1002 			       MC_CHANNEL_ADDR_MATCH, mask);
1003 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1004 			       MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1005 
1006 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1007 			       MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1008 
1009 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1010 			       MC_CHANNEL_ERROR_INJECT, injectmask);
1011 
1012 	/*
1013 	 * This is something undocumented, based on my tests
1014 	 * Without writing 8 to this register, errors aren't injected. Not sure
1015 	 * why.
1016 	 */
1017 	pci_write_config_dword(pvt->pci_noncore,
1018 			       MC_CFG_CONTROL, 8);
1019 
1020 	edac_dbg(0, "Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n",
1021 		 mask, pvt->inject.eccmask, injectmask);
1022 
1023 
1024 	return count;
1025 }
1026 
1027 static ssize_t i7core_inject_enable_show(struct device *dev,
1028 					 struct device_attribute *mattr,
1029 					 char *data)
1030 {
1031 	struct mem_ctl_info *mci = to_mci(dev);
1032 	struct i7core_pvt *pvt = mci->pvt_info;
1033 	u32 injectmask;
1034 
1035 	if (!pvt->pci_ch[pvt->inject.channel][0])
1036 		return 0;
1037 
1038 	pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1039 			       MC_CHANNEL_ERROR_INJECT, &injectmask);
1040 
1041 	edac_dbg(0, "Inject error read: 0x%018x\n", injectmask);
1042 
1043 	if (injectmask & 0x0c)
1044 		pvt->inject.enable = 1;
1045 
1046 	return sprintf(data, "%d\n", pvt->inject.enable);
1047 }
1048 
1049 #define DECLARE_COUNTER(param)					\
1050 static ssize_t i7core_show_counter_##param(			\
1051 	struct device *dev,					\
1052 	struct device_attribute *mattr,				\
1053 	char *data)						\
1054 {								\
1055 	struct mem_ctl_info *mci = dev_get_drvdata(dev);	\
1056 	struct i7core_pvt *pvt = mci->pvt_info;			\
1057 								\
1058 	edac_dbg(1, "\n");					\
1059 	if (!pvt->ce_count_available || (pvt->is_registered))	\
1060 		return sprintf(data, "data unavailable\n");	\
1061 	return sprintf(data, "%lu\n",				\
1062 			pvt->udimm_ce_count[param]);		\
1063 }
1064 
1065 #define ATTR_COUNTER(param)					\
1066 	static DEVICE_ATTR(udimm##param, S_IRUGO | S_IWUSR,	\
1067 		    i7core_show_counter_##param,		\
1068 		    NULL)
1069 
1070 DECLARE_COUNTER(0);
1071 DECLARE_COUNTER(1);
1072 DECLARE_COUNTER(2);
1073 
1074 ATTR_COUNTER(0);
1075 ATTR_COUNTER(1);
1076 ATTR_COUNTER(2);
1077 
1078 /*
1079  * inject_addrmatch device sysfs struct
1080  */
1081 
1082 static struct attribute *i7core_addrmatch_attrs[] = {
1083 	&dev_attr_channel.attr,
1084 	&dev_attr_dimm.attr,
1085 	&dev_attr_rank.attr,
1086 	&dev_attr_bank.attr,
1087 	&dev_attr_page.attr,
1088 	&dev_attr_col.attr,
1089 	NULL
1090 };
1091 
1092 static struct attribute_group addrmatch_grp = {
1093 	.attrs	= i7core_addrmatch_attrs,
1094 };
1095 
1096 static const struct attribute_group *addrmatch_groups[] = {
1097 	&addrmatch_grp,
1098 	NULL
1099 };
1100 
1101 static void addrmatch_release(struct device *device)
1102 {
1103 	edac_dbg(1, "Releasing device %s\n", dev_name(device));
1104 	kfree(device);
1105 }
1106 
1107 static struct device_type addrmatch_type = {
1108 	.groups		= addrmatch_groups,
1109 	.release	= addrmatch_release,
1110 };
1111 
1112 /*
1113  * all_channel_counts sysfs struct
1114  */
1115 
1116 static struct attribute *i7core_udimm_counters_attrs[] = {
1117 	&dev_attr_udimm0.attr,
1118 	&dev_attr_udimm1.attr,
1119 	&dev_attr_udimm2.attr,
1120 	NULL
1121 };
1122 
1123 static struct attribute_group all_channel_counts_grp = {
1124 	.attrs	= i7core_udimm_counters_attrs,
1125 };
1126 
1127 static const struct attribute_group *all_channel_counts_groups[] = {
1128 	&all_channel_counts_grp,
1129 	NULL
1130 };
1131 
1132 static void all_channel_counts_release(struct device *device)
1133 {
1134 	edac_dbg(1, "Releasing device %s\n", dev_name(device));
1135 	kfree(device);
1136 }
1137 
1138 static struct device_type all_channel_counts_type = {
1139 	.groups		= all_channel_counts_groups,
1140 	.release	= all_channel_counts_release,
1141 };
1142 
1143 /*
1144  * inject sysfs attributes
1145  */
1146 
1147 static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
1148 		   i7core_inject_section_show, i7core_inject_section_store);
1149 
1150 static DEVICE_ATTR(inject_type, S_IRUGO | S_IWUSR,
1151 		   i7core_inject_type_show, i7core_inject_type_store);
1152 
1153 
1154 static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR,
1155 		   i7core_inject_eccmask_show, i7core_inject_eccmask_store);
1156 
1157 static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR,
1158 		   i7core_inject_enable_show, i7core_inject_enable_store);
1159 
1160 static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
1161 {
1162 	struct i7core_pvt *pvt = mci->pvt_info;
1163 	int rc;
1164 
1165 	rc = device_create_file(&mci->dev, &dev_attr_inject_section);
1166 	if (rc < 0)
1167 		return rc;
1168 	rc = device_create_file(&mci->dev, &dev_attr_inject_type);
1169 	if (rc < 0)
1170 		return rc;
1171 	rc = device_create_file(&mci->dev, &dev_attr_inject_eccmask);
1172 	if (rc < 0)
1173 		return rc;
1174 	rc = device_create_file(&mci->dev, &dev_attr_inject_enable);
1175 	if (rc < 0)
1176 		return rc;
1177 
1178 	pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL);
1179 	if (!pvt->addrmatch_dev)
1180 		return rc;
1181 
1182 	pvt->addrmatch_dev->type = &addrmatch_type;
1183 	pvt->addrmatch_dev->bus = mci->dev.bus;
1184 	device_initialize(pvt->addrmatch_dev);
1185 	pvt->addrmatch_dev->parent = &mci->dev;
1186 	dev_set_name(pvt->addrmatch_dev, "inject_addrmatch");
1187 	dev_set_drvdata(pvt->addrmatch_dev, mci);
1188 
1189 	edac_dbg(1, "creating %s\n", dev_name(pvt->addrmatch_dev));
1190 
1191 	rc = device_add(pvt->addrmatch_dev);
1192 	if (rc < 0)
1193 		return rc;
1194 
1195 	if (!pvt->is_registered) {
1196 		pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev),
1197 					      GFP_KERNEL);
1198 		if (!pvt->chancounts_dev) {
1199 			put_device(pvt->addrmatch_dev);
1200 			device_del(pvt->addrmatch_dev);
1201 			return rc;
1202 		}
1203 
1204 		pvt->chancounts_dev->type = &all_channel_counts_type;
1205 		pvt->chancounts_dev->bus = mci->dev.bus;
1206 		device_initialize(pvt->chancounts_dev);
1207 		pvt->chancounts_dev->parent = &mci->dev;
1208 		dev_set_name(pvt->chancounts_dev, "all_channel_counts");
1209 		dev_set_drvdata(pvt->chancounts_dev, mci);
1210 
1211 		edac_dbg(1, "creating %s\n", dev_name(pvt->chancounts_dev));
1212 
1213 		rc = device_add(pvt->chancounts_dev);
1214 		if (rc < 0)
1215 			return rc;
1216 	}
1217 	return 0;
1218 }
1219 
1220 static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci)
1221 {
1222 	struct i7core_pvt *pvt = mci->pvt_info;
1223 
1224 	edac_dbg(1, "\n");
1225 
1226 	device_remove_file(&mci->dev, &dev_attr_inject_section);
1227 	device_remove_file(&mci->dev, &dev_attr_inject_type);
1228 	device_remove_file(&mci->dev, &dev_attr_inject_eccmask);
1229 	device_remove_file(&mci->dev, &dev_attr_inject_enable);
1230 
1231 	if (!pvt->is_registered) {
1232 		put_device(pvt->chancounts_dev);
1233 		device_del(pvt->chancounts_dev);
1234 	}
1235 	put_device(pvt->addrmatch_dev);
1236 	device_del(pvt->addrmatch_dev);
1237 }
1238 
1239 /****************************************************************************
1240 	Device initialization routines: put/get, init/exit
1241  ****************************************************************************/
1242 
1243 /*
1244  *	i7core_put_all_devices	'put' all the devices that we have
1245  *				reserved via 'get'
1246  */
1247 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1248 {
1249 	int i;
1250 
1251 	edac_dbg(0, "\n");
1252 	for (i = 0; i < i7core_dev->n_devs; i++) {
1253 		struct pci_dev *pdev = i7core_dev->pdev[i];
1254 		if (!pdev)
1255 			continue;
1256 		edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1257 			 pdev->bus->number,
1258 			 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1259 		pci_dev_put(pdev);
1260 	}
1261 }
1262 
1263 static void i7core_put_all_devices(void)
1264 {
1265 	struct i7core_dev *i7core_dev, *tmp;
1266 
1267 	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1268 		i7core_put_devices(i7core_dev);
1269 		free_i7core_dev(i7core_dev);
1270 	}
1271 }
1272 
1273 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1274 {
1275 	struct pci_dev *pdev = NULL;
1276 	int i;
1277 
1278 	/*
1279 	 * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1280 	 * aren't announced by acpi. So, we need to use a legacy scan probing
1281 	 * to detect them
1282 	 */
1283 	while (table && table->descr) {
1284 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1285 		if (unlikely(!pdev)) {
1286 			for (i = 0; i < MAX_SOCKET_BUSES; i++)
1287 				pcibios_scan_specific_bus(255-i);
1288 		}
1289 		pci_dev_put(pdev);
1290 		table++;
1291 	}
1292 }
1293 
1294 static unsigned i7core_pci_lastbus(void)
1295 {
1296 	int last_bus = 0, bus;
1297 	struct pci_bus *b = NULL;
1298 
1299 	while ((b = pci_find_next_bus(b)) != NULL) {
1300 		bus = b->number;
1301 		edac_dbg(0, "Found bus %d\n", bus);
1302 		if (bus > last_bus)
1303 			last_bus = bus;
1304 	}
1305 
1306 	edac_dbg(0, "Last bus %d\n", last_bus);
1307 
1308 	return last_bus;
1309 }
1310 
1311 /*
1312  *	i7core_get_all_devices	Find and perform 'get' operation on the MCH's
1313  *			device/functions we want to reference for this driver
1314  *
1315  *			Need to 'get' device 16 func 1 and func 2
1316  */
1317 static int i7core_get_onedevice(struct pci_dev **prev,
1318 				const struct pci_id_table *table,
1319 				const unsigned devno,
1320 				const unsigned last_bus)
1321 {
1322 	struct i7core_dev *i7core_dev;
1323 	const struct pci_id_descr *dev_descr = &table->descr[devno];
1324 
1325 	struct pci_dev *pdev = NULL;
1326 	u8 bus = 0;
1327 	u8 socket = 0;
1328 
1329 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1330 			      dev_descr->dev_id, *prev);
1331 
1332 	/*
1333 	 * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
1334 	 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1335 	 * to probe for the alternate address in case of failure
1336 	 */
1337 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1338 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1339 				      PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1340 
1341 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1342 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1343 				      PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1344 				      *prev);
1345 
1346 	if (!pdev) {
1347 		if (*prev) {
1348 			*prev = pdev;
1349 			return 0;
1350 		}
1351 
1352 		if (dev_descr->optional)
1353 			return 0;
1354 
1355 		if (devno == 0)
1356 			return -ENODEV;
1357 
1358 		i7core_printk(KERN_INFO,
1359 			"Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1360 			dev_descr->dev, dev_descr->func,
1361 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1362 
1363 		/* End of list, leave */
1364 		return -ENODEV;
1365 	}
1366 	bus = pdev->bus->number;
1367 
1368 	socket = last_bus - bus;
1369 
1370 	i7core_dev = get_i7core_dev(socket);
1371 	if (!i7core_dev) {
1372 		i7core_dev = alloc_i7core_dev(socket, table);
1373 		if (!i7core_dev) {
1374 			pci_dev_put(pdev);
1375 			return -ENOMEM;
1376 		}
1377 	}
1378 
1379 	if (i7core_dev->pdev[devno]) {
1380 		i7core_printk(KERN_ERR,
1381 			"Duplicated device for "
1382 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1383 			bus, dev_descr->dev, dev_descr->func,
1384 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1385 		pci_dev_put(pdev);
1386 		return -ENODEV;
1387 	}
1388 
1389 	i7core_dev->pdev[devno] = pdev;
1390 
1391 	/* Sanity check */
1392 	if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1393 			PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1394 		i7core_printk(KERN_ERR,
1395 			"Device PCI ID %04x:%04x "
1396 			"has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1397 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1398 			bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1399 			bus, dev_descr->dev, dev_descr->func);
1400 		return -ENODEV;
1401 	}
1402 
1403 	/* Be sure that the device is enabled */
1404 	if (unlikely(pci_enable_device(pdev) < 0)) {
1405 		i7core_printk(KERN_ERR,
1406 			"Couldn't enable "
1407 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1408 			bus, dev_descr->dev, dev_descr->func,
1409 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1410 		return -ENODEV;
1411 	}
1412 
1413 	edac_dbg(0, "Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1414 		 socket, bus, dev_descr->dev,
1415 		 dev_descr->func,
1416 		 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1417 
1418 	/*
1419 	 * As stated on drivers/pci/search.c, the reference count for
1420 	 * @from is always decremented if it is not %NULL. So, as we need
1421 	 * to get all devices up to null, we need to do a get for the device
1422 	 */
1423 	pci_dev_get(pdev);
1424 
1425 	*prev = pdev;
1426 
1427 	return 0;
1428 }
1429 
1430 static int i7core_get_all_devices(void)
1431 {
1432 	int i, rc, last_bus;
1433 	struct pci_dev *pdev = NULL;
1434 	const struct pci_id_table *table = pci_dev_table;
1435 
1436 	last_bus = i7core_pci_lastbus();
1437 
1438 	while (table && table->descr) {
1439 		for (i = 0; i < table->n_devs; i++) {
1440 			pdev = NULL;
1441 			do {
1442 				rc = i7core_get_onedevice(&pdev, table, i,
1443 							  last_bus);
1444 				if (rc < 0) {
1445 					if (i == 0) {
1446 						i = table->n_devs;
1447 						break;
1448 					}
1449 					i7core_put_all_devices();
1450 					return -ENODEV;
1451 				}
1452 			} while (pdev);
1453 		}
1454 		table++;
1455 	}
1456 
1457 	return 0;
1458 }
1459 
1460 static int mci_bind_devs(struct mem_ctl_info *mci,
1461 			 struct i7core_dev *i7core_dev)
1462 {
1463 	struct i7core_pvt *pvt = mci->pvt_info;
1464 	struct pci_dev *pdev;
1465 	int i, func, slot;
1466 	char *family;
1467 
1468 	pvt->is_registered = false;
1469 	pvt->enable_scrub  = false;
1470 	for (i = 0; i < i7core_dev->n_devs; i++) {
1471 		pdev = i7core_dev->pdev[i];
1472 		if (!pdev)
1473 			continue;
1474 
1475 		func = PCI_FUNC(pdev->devfn);
1476 		slot = PCI_SLOT(pdev->devfn);
1477 		if (slot == 3) {
1478 			if (unlikely(func > MAX_MCR_FUNC))
1479 				goto error;
1480 			pvt->pci_mcr[func] = pdev;
1481 		} else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1482 			if (unlikely(func > MAX_CHAN_FUNC))
1483 				goto error;
1484 			pvt->pci_ch[slot - 4][func] = pdev;
1485 		} else if (!slot && !func) {
1486 			pvt->pci_noncore = pdev;
1487 
1488 			/* Detect the processor family */
1489 			switch (pdev->device) {
1490 			case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1491 				family = "Xeon 35xx/ i7core";
1492 				pvt->enable_scrub = false;
1493 				break;
1494 			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1495 				family = "i7-800/i5-700";
1496 				pvt->enable_scrub = false;
1497 				break;
1498 			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1499 				family = "Xeon 34xx";
1500 				pvt->enable_scrub = false;
1501 				break;
1502 			case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1503 				family = "Xeon 55xx";
1504 				pvt->enable_scrub = true;
1505 				break;
1506 			case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1507 				family = "Xeon 56xx / i7-900";
1508 				pvt->enable_scrub = true;
1509 				break;
1510 			default:
1511 				family = "unknown";
1512 				pvt->enable_scrub = false;
1513 			}
1514 			edac_dbg(0, "Detected a processor type %s\n", family);
1515 		} else
1516 			goto error;
1517 
1518 		edac_dbg(0, "Associated fn %d.%d, dev = %p, socket %d\n",
1519 			 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1520 			 pdev, i7core_dev->socket);
1521 
1522 		if (PCI_SLOT(pdev->devfn) == 3 &&
1523 			PCI_FUNC(pdev->devfn) == 2)
1524 			pvt->is_registered = true;
1525 	}
1526 
1527 	return 0;
1528 
1529 error:
1530 	i7core_printk(KERN_ERR, "Device %d, function %d "
1531 		      "is out of the expected range\n",
1532 		      slot, func);
1533 	return -EINVAL;
1534 }
1535 
1536 /****************************************************************************
1537 			Error check routines
1538  ****************************************************************************/
1539 
1540 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1541 					 const int chan,
1542 					 const int new0,
1543 					 const int new1,
1544 					 const int new2)
1545 {
1546 	struct i7core_pvt *pvt = mci->pvt_info;
1547 	int add0 = 0, add1 = 0, add2 = 0;
1548 	/* Updates CE counters if it is not the first time here */
1549 	if (pvt->ce_count_available) {
1550 		/* Updates CE counters */
1551 
1552 		add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1553 		add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1554 		add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1555 
1556 		if (add2 < 0)
1557 			add2 += 0x7fff;
1558 		pvt->rdimm_ce_count[chan][2] += add2;
1559 
1560 		if (add1 < 0)
1561 			add1 += 0x7fff;
1562 		pvt->rdimm_ce_count[chan][1] += add1;
1563 
1564 		if (add0 < 0)
1565 			add0 += 0x7fff;
1566 		pvt->rdimm_ce_count[chan][0] += add0;
1567 	} else
1568 		pvt->ce_count_available = 1;
1569 
1570 	/* Store the new values */
1571 	pvt->rdimm_last_ce_count[chan][2] = new2;
1572 	pvt->rdimm_last_ce_count[chan][1] = new1;
1573 	pvt->rdimm_last_ce_count[chan][0] = new0;
1574 
1575 	/*updated the edac core */
1576 	if (add0 != 0)
1577 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add0,
1578 				     0, 0, 0,
1579 				     chan, 0, -1, "error", "");
1580 	if (add1 != 0)
1581 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add1,
1582 				     0, 0, 0,
1583 				     chan, 1, -1, "error", "");
1584 	if (add2 != 0)
1585 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add2,
1586 				     0, 0, 0,
1587 				     chan, 2, -1, "error", "");
1588 }
1589 
1590 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1591 {
1592 	struct i7core_pvt *pvt = mci->pvt_info;
1593 	u32 rcv[3][2];
1594 	int i, new0, new1, new2;
1595 
1596 	/*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1597 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1598 								&rcv[0][0]);
1599 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1600 								&rcv[0][1]);
1601 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1602 								&rcv[1][0]);
1603 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1604 								&rcv[1][1]);
1605 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1606 								&rcv[2][0]);
1607 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1608 								&rcv[2][1]);
1609 	for (i = 0 ; i < 3; i++) {
1610 		edac_dbg(3, "MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1611 			 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1612 		/*if the channel has 3 dimms*/
1613 		if (pvt->channel[i].dimms > 2) {
1614 			new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1615 			new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1616 			new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1617 		} else {
1618 			new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1619 					DIMM_BOT_COR_ERR(rcv[i][0]);
1620 			new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1621 					DIMM_BOT_COR_ERR(rcv[i][1]);
1622 			new2 = 0;
1623 		}
1624 
1625 		i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1626 	}
1627 }
1628 
1629 /* This function is based on the device 3 function 4 registers as described on:
1630  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1631  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1632  * also available at:
1633  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1634  */
1635 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1636 {
1637 	struct i7core_pvt *pvt = mci->pvt_info;
1638 	u32 rcv1, rcv0;
1639 	int new0, new1, new2;
1640 
1641 	if (!pvt->pci_mcr[4]) {
1642 		edac_dbg(0, "MCR registers not found\n");
1643 		return;
1644 	}
1645 
1646 	/* Corrected test errors */
1647 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1648 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1649 
1650 	/* Store the new values */
1651 	new2 = DIMM2_COR_ERR(rcv1);
1652 	new1 = DIMM1_COR_ERR(rcv0);
1653 	new0 = DIMM0_COR_ERR(rcv0);
1654 
1655 	/* Updates CE counters if it is not the first time here */
1656 	if (pvt->ce_count_available) {
1657 		/* Updates CE counters */
1658 		int add0, add1, add2;
1659 
1660 		add2 = new2 - pvt->udimm_last_ce_count[2];
1661 		add1 = new1 - pvt->udimm_last_ce_count[1];
1662 		add0 = new0 - pvt->udimm_last_ce_count[0];
1663 
1664 		if (add2 < 0)
1665 			add2 += 0x7fff;
1666 		pvt->udimm_ce_count[2] += add2;
1667 
1668 		if (add1 < 0)
1669 			add1 += 0x7fff;
1670 		pvt->udimm_ce_count[1] += add1;
1671 
1672 		if (add0 < 0)
1673 			add0 += 0x7fff;
1674 		pvt->udimm_ce_count[0] += add0;
1675 
1676 		if (add0 | add1 | add2)
1677 			i7core_printk(KERN_ERR, "New Corrected error(s): "
1678 				      "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1679 				      add0, add1, add2);
1680 	} else
1681 		pvt->ce_count_available = 1;
1682 
1683 	/* Store the new values */
1684 	pvt->udimm_last_ce_count[2] = new2;
1685 	pvt->udimm_last_ce_count[1] = new1;
1686 	pvt->udimm_last_ce_count[0] = new0;
1687 }
1688 
1689 /*
1690  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1691  * Architectures Software Developer’s Manual Volume 3B.
1692  * Nehalem are defined as family 0x06, model 0x1a
1693  *
1694  * The MCA registers used here are the following ones:
1695  *     struct mce field	MCA Register
1696  *     m->status	MSR_IA32_MC8_STATUS
1697  *     m->addr		MSR_IA32_MC8_ADDR
1698  *     m->misc		MSR_IA32_MC8_MISC
1699  * In the case of Nehalem, the error information is masked at .status and .misc
1700  * fields
1701  */
1702 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1703 				    const struct mce *m)
1704 {
1705 	struct i7core_pvt *pvt = mci->pvt_info;
1706 	char *type, *optype, *err;
1707 	enum hw_event_mc_err_type tp_event;
1708 	unsigned long error = m->status & 0x1ff0000l;
1709 	bool uncorrected_error = m->mcgstatus & 1ll << 61;
1710 	bool ripv = m->mcgstatus & 1;
1711 	u32 optypenum = (m->status >> 4) & 0x07;
1712 	u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1713 	u32 dimm = (m->misc >> 16) & 0x3;
1714 	u32 channel = (m->misc >> 18) & 0x3;
1715 	u32 syndrome = m->misc >> 32;
1716 	u32 errnum = find_first_bit(&error, 32);
1717 
1718 	if (uncorrected_error) {
1719 		if (ripv) {
1720 			type = "FATAL";
1721 			tp_event = HW_EVENT_ERR_FATAL;
1722 		} else {
1723 			type = "NON_FATAL";
1724 			tp_event = HW_EVENT_ERR_UNCORRECTED;
1725 		}
1726 	} else {
1727 		type = "CORRECTED";
1728 		tp_event = HW_EVENT_ERR_CORRECTED;
1729 	}
1730 
1731 	switch (optypenum) {
1732 	case 0:
1733 		optype = "generic undef request";
1734 		break;
1735 	case 1:
1736 		optype = "read error";
1737 		break;
1738 	case 2:
1739 		optype = "write error";
1740 		break;
1741 	case 3:
1742 		optype = "addr/cmd error";
1743 		break;
1744 	case 4:
1745 		optype = "scrubbing error";
1746 		break;
1747 	default:
1748 		optype = "reserved";
1749 		break;
1750 	}
1751 
1752 	switch (errnum) {
1753 	case 16:
1754 		err = "read ECC error";
1755 		break;
1756 	case 17:
1757 		err = "RAS ECC error";
1758 		break;
1759 	case 18:
1760 		err = "write parity error";
1761 		break;
1762 	case 19:
1763 		err = "redundacy loss";
1764 		break;
1765 	case 20:
1766 		err = "reserved";
1767 		break;
1768 	case 21:
1769 		err = "memory range error";
1770 		break;
1771 	case 22:
1772 		err = "RTID out of range";
1773 		break;
1774 	case 23:
1775 		err = "address parity error";
1776 		break;
1777 	case 24:
1778 		err = "byte enable parity error";
1779 		break;
1780 	default:
1781 		err = "unknown";
1782 	}
1783 
1784 	/*
1785 	 * Call the helper to output message
1786 	 * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1787 	 * only one event
1788 	 */
1789 	if (uncorrected_error || !pvt->is_registered)
1790 		edac_mc_handle_error(tp_event, mci, core_err_cnt,
1791 				     m->addr >> PAGE_SHIFT,
1792 				     m->addr & ~PAGE_MASK,
1793 				     syndrome,
1794 				     channel, dimm, -1,
1795 				     err, optype);
1796 }
1797 
1798 /*
1799  *	i7core_check_error	Retrieve and process errors reported by the
1800  *				hardware. Called by the Core module.
1801  */
1802 static void i7core_check_error(struct mem_ctl_info *mci)
1803 {
1804 	struct i7core_pvt *pvt = mci->pvt_info;
1805 	int i;
1806 	unsigned count = 0;
1807 	struct mce *m;
1808 
1809 	/*
1810 	 * MCE first step: Copy all mce errors into a temporary buffer
1811 	 * We use a double buffering here, to reduce the risk of
1812 	 * losing an error.
1813 	 */
1814 	smp_rmb();
1815 	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1816 		% MCE_LOG_LEN;
1817 	if (!count)
1818 		goto check_ce_error;
1819 
1820 	m = pvt->mce_outentry;
1821 	if (pvt->mce_in + count > MCE_LOG_LEN) {
1822 		unsigned l = MCE_LOG_LEN - pvt->mce_in;
1823 
1824 		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1825 		smp_wmb();
1826 		pvt->mce_in = 0;
1827 		count -= l;
1828 		m += l;
1829 	}
1830 	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1831 	smp_wmb();
1832 	pvt->mce_in += count;
1833 
1834 	smp_rmb();
1835 	if (pvt->mce_overrun) {
1836 		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1837 			      pvt->mce_overrun);
1838 		smp_wmb();
1839 		pvt->mce_overrun = 0;
1840 	}
1841 
1842 	/*
1843 	 * MCE second step: parse errors and display
1844 	 */
1845 	for (i = 0; i < count; i++)
1846 		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1847 
1848 	/*
1849 	 * Now, let's increment CE error counts
1850 	 */
1851 check_ce_error:
1852 	if (!pvt->is_registered)
1853 		i7core_udimm_check_mc_ecc_err(mci);
1854 	else
1855 		i7core_rdimm_check_mc_ecc_err(mci);
1856 }
1857 
1858 /*
1859  * i7core_mce_check_error	Replicates mcelog routine to get errors
1860  *				This routine simply queues mcelog errors, and
1861  *				return. The error itself should be handled later
1862  *				by i7core_check_error.
1863  * WARNING: As this routine should be called at NMI time, extra care should
1864  * be taken to avoid deadlocks, and to be as fast as possible.
1865  */
1866 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1867 				  void *data)
1868 {
1869 	struct mce *mce = (struct mce *)data;
1870 	struct i7core_dev *i7_dev;
1871 	struct mem_ctl_info *mci;
1872 	struct i7core_pvt *pvt;
1873 
1874 	i7_dev = get_i7core_dev(mce->socketid);
1875 	if (!i7_dev)
1876 		return NOTIFY_BAD;
1877 
1878 	mci = i7_dev->mci;
1879 	pvt = mci->pvt_info;
1880 
1881 	/*
1882 	 * Just let mcelog handle it if the error is
1883 	 * outside the memory controller
1884 	 */
1885 	if (((mce->status & 0xffff) >> 7) != 1)
1886 		return NOTIFY_DONE;
1887 
1888 	/* Bank 8 registers are the only ones that we know how to handle */
1889 	if (mce->bank != 8)
1890 		return NOTIFY_DONE;
1891 
1892 	smp_rmb();
1893 	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1894 		smp_wmb();
1895 		pvt->mce_overrun++;
1896 		return NOTIFY_DONE;
1897 	}
1898 
1899 	/* Copy memory error at the ringbuffer */
1900 	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1901 	smp_wmb();
1902 	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1903 
1904 	/* Handle fatal errors immediately */
1905 	if (mce->mcgstatus & 1)
1906 		i7core_check_error(mci);
1907 
1908 	/* Advise mcelog that the errors were handled */
1909 	return NOTIFY_STOP;
1910 }
1911 
1912 static struct notifier_block i7_mce_dec = {
1913 	.notifier_call	= i7core_mce_check_error,
1914 };
1915 
1916 struct memdev_dmi_entry {
1917 	u8 type;
1918 	u8 length;
1919 	u16 handle;
1920 	u16 phys_mem_array_handle;
1921 	u16 mem_err_info_handle;
1922 	u16 total_width;
1923 	u16 data_width;
1924 	u16 size;
1925 	u8 form;
1926 	u8 device_set;
1927 	u8 device_locator;
1928 	u8 bank_locator;
1929 	u8 memory_type;
1930 	u16 type_detail;
1931 	u16 speed;
1932 	u8 manufacturer;
1933 	u8 serial_number;
1934 	u8 asset_tag;
1935 	u8 part_number;
1936 	u8 attributes;
1937 	u32 extended_size;
1938 	u16 conf_mem_clk_speed;
1939 } __attribute__((__packed__));
1940 
1941 
1942 /*
1943  * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1944  * memory devices show the same speed, and if they don't then consider
1945  * all speeds to be invalid.
1946  */
1947 static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1948 {
1949 	int *dclk_freq = _dclk_freq;
1950 	u16 dmi_mem_clk_speed;
1951 
1952 	if (*dclk_freq == -1)
1953 		return;
1954 
1955 	if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1956 		struct memdev_dmi_entry *memdev_dmi_entry =
1957 			(struct memdev_dmi_entry *)dh;
1958 		unsigned long conf_mem_clk_speed_offset =
1959 			(unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1960 			(unsigned long)&memdev_dmi_entry->type;
1961 		unsigned long speed_offset =
1962 			(unsigned long)&memdev_dmi_entry->speed -
1963 			(unsigned long)&memdev_dmi_entry->type;
1964 
1965 		/* Check that a DIMM is present */
1966 		if (memdev_dmi_entry->size == 0)
1967 			return;
1968 
1969 		/*
1970 		 * Pick the configured speed if it's available, otherwise
1971 		 * pick the DIMM speed, or we don't have a speed.
1972 		 */
1973 		if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1974 			dmi_mem_clk_speed =
1975 				memdev_dmi_entry->conf_mem_clk_speed;
1976 		} else if (memdev_dmi_entry->length > speed_offset) {
1977 			dmi_mem_clk_speed = memdev_dmi_entry->speed;
1978 		} else {
1979 			*dclk_freq = -1;
1980 			return;
1981 		}
1982 
1983 		if (*dclk_freq == 0) {
1984 			/* First pass, speed was 0 */
1985 			if (dmi_mem_clk_speed > 0) {
1986 				/* Set speed if a valid speed is read */
1987 				*dclk_freq = dmi_mem_clk_speed;
1988 			} else {
1989 				/* Otherwise we don't have a valid speed */
1990 				*dclk_freq = -1;
1991 			}
1992 		} else if (*dclk_freq > 0 &&
1993 			   *dclk_freq != dmi_mem_clk_speed) {
1994 			/*
1995 			 * If we have a speed, check that all DIMMS are the same
1996 			 * speed, otherwise set the speed as invalid.
1997 			 */
1998 			*dclk_freq = -1;
1999 		}
2000 	}
2001 }
2002 
2003 /*
2004  * The default DCLK frequency is used as a fallback if we
2005  * fail to find anything reliable in the DMI. The value
2006  * is taken straight from the datasheet.
2007  */
2008 #define DEFAULT_DCLK_FREQ 800
2009 
2010 static int get_dclk_freq(void)
2011 {
2012 	int dclk_freq = 0;
2013 
2014 	dmi_walk(decode_dclk, (void *)&dclk_freq);
2015 
2016 	if (dclk_freq < 1)
2017 		return DEFAULT_DCLK_FREQ;
2018 
2019 	return dclk_freq;
2020 }
2021 
2022 /*
2023  * set_sdram_scrub_rate		This routine sets byte/sec bandwidth scrub rate
2024  *				to hardware according to SCRUBINTERVAL formula
2025  *				found in datasheet.
2026  */
2027 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
2028 {
2029 	struct i7core_pvt *pvt = mci->pvt_info;
2030 	struct pci_dev *pdev;
2031 	u32 dw_scrub;
2032 	u32 dw_ssr;
2033 
2034 	/* Get data from the MC register, function 2 */
2035 	pdev = pvt->pci_mcr[2];
2036 	if (!pdev)
2037 		return -ENODEV;
2038 
2039 	pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
2040 
2041 	if (new_bw == 0) {
2042 		/* Prepare to disable petrol scrub */
2043 		dw_scrub &= ~STARTSCRUB;
2044 		/* Stop the patrol scrub engine */
2045 		write_and_test(pdev, MC_SCRUB_CONTROL,
2046 			       dw_scrub & ~SCRUBINTERVAL_MASK);
2047 
2048 		/* Get current status of scrub rate and set bit to disable */
2049 		pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2050 		dw_ssr &= ~SSR_MODE_MASK;
2051 		dw_ssr |= SSR_MODE_DISABLE;
2052 	} else {
2053 		const int cache_line_size = 64;
2054 		const u32 freq_dclk_mhz = pvt->dclk_freq;
2055 		unsigned long long scrub_interval;
2056 		/*
2057 		 * Translate the desired scrub rate to a register value and
2058 		 * program the corresponding register value.
2059 		 */
2060 		scrub_interval = (unsigned long long)freq_dclk_mhz *
2061 			cache_line_size * 1000000;
2062 		do_div(scrub_interval, new_bw);
2063 
2064 		if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
2065 			return -EINVAL;
2066 
2067 		dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
2068 
2069 		/* Start the patrol scrub engine */
2070 		pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
2071 				       STARTSCRUB | dw_scrub);
2072 
2073 		/* Get current status of scrub rate and set bit to enable */
2074 		pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2075 		dw_ssr &= ~SSR_MODE_MASK;
2076 		dw_ssr |= SSR_MODE_ENABLE;
2077 	}
2078 	/* Disable or enable scrubbing */
2079 	pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2080 
2081 	return new_bw;
2082 }
2083 
2084 /*
2085  * get_sdram_scrub_rate		This routine convert current scrub rate value
2086  *				into byte/sec bandwidth according to
2087  *				SCRUBINTERVAL formula found in datasheet.
2088  */
2089 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2090 {
2091 	struct i7core_pvt *pvt = mci->pvt_info;
2092 	struct pci_dev *pdev;
2093 	const u32 cache_line_size = 64;
2094 	const u32 freq_dclk_mhz = pvt->dclk_freq;
2095 	unsigned long long scrub_rate;
2096 	u32 scrubval;
2097 
2098 	/* Get data from the MC register, function 2 */
2099 	pdev = pvt->pci_mcr[2];
2100 	if (!pdev)
2101 		return -ENODEV;
2102 
2103 	/* Get current scrub control data */
2104 	pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2105 
2106 	/* Mask highest 8-bits to 0 */
2107 	scrubval &=  SCRUBINTERVAL_MASK;
2108 	if (!scrubval)
2109 		return 0;
2110 
2111 	/* Calculate scrub rate value into byte/sec bandwidth */
2112 	scrub_rate =  (unsigned long long)freq_dclk_mhz *
2113 		1000000 * cache_line_size;
2114 	do_div(scrub_rate, scrubval);
2115 	return (int)scrub_rate;
2116 }
2117 
2118 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2119 {
2120 	struct i7core_pvt *pvt = mci->pvt_info;
2121 	u32 pci_lock;
2122 
2123 	/* Unlock writes to pci registers */
2124 	pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2125 	pci_lock &= ~0x3;
2126 	pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2127 			       pci_lock | MC_CFG_UNLOCK);
2128 
2129 	mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2130 	mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2131 }
2132 
2133 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2134 {
2135 	struct i7core_pvt *pvt = mci->pvt_info;
2136 	u32 pci_lock;
2137 
2138 	/* Lock writes to pci registers */
2139 	pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2140 	pci_lock &= ~0x3;
2141 	pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2142 			       pci_lock | MC_CFG_LOCK);
2143 }
2144 
2145 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2146 {
2147 	pvt->i7core_pci = edac_pci_create_generic_ctl(
2148 						&pvt->i7core_dev->pdev[0]->dev,
2149 						EDAC_MOD_STR);
2150 	if (unlikely(!pvt->i7core_pci))
2151 		i7core_printk(KERN_WARNING,
2152 			      "Unable to setup PCI error report via EDAC\n");
2153 }
2154 
2155 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2156 {
2157 	if (likely(pvt->i7core_pci))
2158 		edac_pci_release_generic_ctl(pvt->i7core_pci);
2159 	else
2160 		i7core_printk(KERN_ERR,
2161 				"Couldn't find mem_ctl_info for socket %d\n",
2162 				pvt->i7core_dev->socket);
2163 	pvt->i7core_pci = NULL;
2164 }
2165 
2166 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2167 {
2168 	struct mem_ctl_info *mci = i7core_dev->mci;
2169 	struct i7core_pvt *pvt;
2170 
2171 	if (unlikely(!mci || !mci->pvt_info)) {
2172 		edac_dbg(0, "MC: dev = %p\n", &i7core_dev->pdev[0]->dev);
2173 
2174 		i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2175 		return;
2176 	}
2177 
2178 	pvt = mci->pvt_info;
2179 
2180 	edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2181 
2182 	/* Disable scrubrate setting */
2183 	if (pvt->enable_scrub)
2184 		disable_sdram_scrub_setting(mci);
2185 
2186 	/* Disable EDAC polling */
2187 	i7core_pci_ctl_release(pvt);
2188 
2189 	/* Remove MC sysfs nodes */
2190 	i7core_delete_sysfs_devices(mci);
2191 	edac_mc_del_mc(mci->pdev);
2192 
2193 	edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
2194 	kfree(mci->ctl_name);
2195 	edac_mc_free(mci);
2196 	i7core_dev->mci = NULL;
2197 }
2198 
2199 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2200 {
2201 	struct mem_ctl_info *mci;
2202 	struct i7core_pvt *pvt;
2203 	int rc;
2204 	struct edac_mc_layer layers[2];
2205 
2206 	/* allocate a new MC control structure */
2207 
2208 	layers[0].type = EDAC_MC_LAYER_CHANNEL;
2209 	layers[0].size = NUM_CHANS;
2210 	layers[0].is_virt_csrow = false;
2211 	layers[1].type = EDAC_MC_LAYER_SLOT;
2212 	layers[1].size = MAX_DIMMS;
2213 	layers[1].is_virt_csrow = true;
2214 	mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2215 			    sizeof(*pvt));
2216 	if (unlikely(!mci))
2217 		return -ENOMEM;
2218 
2219 	edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2220 
2221 	pvt = mci->pvt_info;
2222 	memset(pvt, 0, sizeof(*pvt));
2223 
2224 	/* Associates i7core_dev and mci for future usage */
2225 	pvt->i7core_dev = i7core_dev;
2226 	i7core_dev->mci = mci;
2227 
2228 	/*
2229 	 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2230 	 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2231 	 * memory channels
2232 	 */
2233 	mci->mtype_cap = MEM_FLAG_DDR3;
2234 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
2235 	mci->edac_cap = EDAC_FLAG_NONE;
2236 	mci->mod_name = "i7core_edac.c";
2237 	mci->mod_ver = I7CORE_REVISION;
2238 	mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2239 				  i7core_dev->socket);
2240 	mci->dev_name = pci_name(i7core_dev->pdev[0]);
2241 	mci->ctl_page_to_phys = NULL;
2242 
2243 	/* Store pci devices at mci for faster access */
2244 	rc = mci_bind_devs(mci, i7core_dev);
2245 	if (unlikely(rc < 0))
2246 		goto fail0;
2247 
2248 
2249 	/* Get dimm basic config */
2250 	get_dimm_config(mci);
2251 	/* record ptr to the generic device */
2252 	mci->pdev = &i7core_dev->pdev[0]->dev;
2253 	/* Set the function pointer to an actual operation function */
2254 	mci->edac_check = i7core_check_error;
2255 
2256 	/* Enable scrubrate setting */
2257 	if (pvt->enable_scrub)
2258 		enable_sdram_scrub_setting(mci);
2259 
2260 	/* add this new MC control structure to EDAC's list of MCs */
2261 	if (unlikely(edac_mc_add_mc(mci))) {
2262 		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
2263 		/* FIXME: perhaps some code should go here that disables error
2264 		 * reporting if we just enabled it
2265 		 */
2266 
2267 		rc = -EINVAL;
2268 		goto fail0;
2269 	}
2270 	if (i7core_create_sysfs_devices(mci)) {
2271 		edac_dbg(0, "MC: failed to create sysfs nodes\n");
2272 		edac_mc_del_mc(mci->pdev);
2273 		rc = -EINVAL;
2274 		goto fail0;
2275 	}
2276 
2277 	/* Default error mask is any memory */
2278 	pvt->inject.channel = 0;
2279 	pvt->inject.dimm = -1;
2280 	pvt->inject.rank = -1;
2281 	pvt->inject.bank = -1;
2282 	pvt->inject.page = -1;
2283 	pvt->inject.col = -1;
2284 
2285 	/* allocating generic PCI control info */
2286 	i7core_pci_ctl_create(pvt);
2287 
2288 	/* DCLK for scrub rate setting */
2289 	pvt->dclk_freq = get_dclk_freq();
2290 
2291 	return 0;
2292 
2293 fail0:
2294 	kfree(mci->ctl_name);
2295 	edac_mc_free(mci);
2296 	i7core_dev->mci = NULL;
2297 	return rc;
2298 }
2299 
2300 /*
2301  *	i7core_probe	Probe for ONE instance of device to see if it is
2302  *			present.
2303  *	return:
2304  *		0 for FOUND a device
2305  *		< 0 for error code
2306  */
2307 
2308 static int i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2309 {
2310 	int rc, count = 0;
2311 	struct i7core_dev *i7core_dev;
2312 
2313 	/* get the pci devices we want to reserve for our use */
2314 	mutex_lock(&i7core_edac_lock);
2315 
2316 	/*
2317 	 * All memory controllers are allocated at the first pass.
2318 	 */
2319 	if (unlikely(probed >= 1)) {
2320 		mutex_unlock(&i7core_edac_lock);
2321 		return -ENODEV;
2322 	}
2323 	probed++;
2324 
2325 	rc = i7core_get_all_devices();
2326 	if (unlikely(rc < 0))
2327 		goto fail0;
2328 
2329 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2330 		count++;
2331 		rc = i7core_register_mci(i7core_dev);
2332 		if (unlikely(rc < 0))
2333 			goto fail1;
2334 	}
2335 
2336 	/*
2337 	 * Nehalem-EX uses a different memory controller. However, as the
2338 	 * memory controller is not visible on some Nehalem/Nehalem-EP, we
2339 	 * need to indirectly probe via a X58 PCI device. The same devices
2340 	 * are found on (some) Nehalem-EX. So, on those machines, the
2341 	 * probe routine needs to return -ENODEV, as the actual Memory
2342 	 * Controller registers won't be detected.
2343 	 */
2344 	if (!count) {
2345 		rc = -ENODEV;
2346 		goto fail1;
2347 	}
2348 
2349 	i7core_printk(KERN_INFO,
2350 		      "Driver loaded, %d memory controller(s) found.\n",
2351 		      count);
2352 
2353 	mutex_unlock(&i7core_edac_lock);
2354 	return 0;
2355 
2356 fail1:
2357 	list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2358 		i7core_unregister_mci(i7core_dev);
2359 
2360 	i7core_put_all_devices();
2361 fail0:
2362 	mutex_unlock(&i7core_edac_lock);
2363 	return rc;
2364 }
2365 
2366 /*
2367  *	i7core_remove	destructor for one instance of device
2368  *
2369  */
2370 static void i7core_remove(struct pci_dev *pdev)
2371 {
2372 	struct i7core_dev *i7core_dev;
2373 
2374 	edac_dbg(0, "\n");
2375 
2376 	/*
2377 	 * we have a trouble here: pdev value for removal will be wrong, since
2378 	 * it will point to the X58 register used to detect that the machine
2379 	 * is a Nehalem or upper design. However, due to the way several PCI
2380 	 * devices are grouped together to provide MC functionality, we need
2381 	 * to use a different method for releasing the devices
2382 	 */
2383 
2384 	mutex_lock(&i7core_edac_lock);
2385 
2386 	if (unlikely(!probed)) {
2387 		mutex_unlock(&i7core_edac_lock);
2388 		return;
2389 	}
2390 
2391 	list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2392 		i7core_unregister_mci(i7core_dev);
2393 
2394 	/* Release PCI resources */
2395 	i7core_put_all_devices();
2396 
2397 	probed--;
2398 
2399 	mutex_unlock(&i7core_edac_lock);
2400 }
2401 
2402 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2403 
2404 /*
2405  *	i7core_driver	pci_driver structure for this module
2406  *
2407  */
2408 static struct pci_driver i7core_driver = {
2409 	.name     = "i7core_edac",
2410 	.probe    = i7core_probe,
2411 	.remove   = i7core_remove,
2412 	.id_table = i7core_pci_tbl,
2413 };
2414 
2415 /*
2416  *	i7core_init		Module entry function
2417  *			Try to initialize this module for its devices
2418  */
2419 static int __init i7core_init(void)
2420 {
2421 	int pci_rc;
2422 
2423 	edac_dbg(2, "\n");
2424 
2425 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
2426 	opstate_init();
2427 
2428 	if (use_pci_fixup)
2429 		i7core_xeon_pci_fixup(pci_dev_table);
2430 
2431 	pci_rc = pci_register_driver(&i7core_driver);
2432 
2433 	if (pci_rc >= 0) {
2434 		mce_register_decode_chain(&i7_mce_dec);
2435 		return 0;
2436 	}
2437 
2438 	i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2439 		      pci_rc);
2440 
2441 	return pci_rc;
2442 }
2443 
2444 /*
2445  *	i7core_exit()	Module exit function
2446  *			Unregister the driver
2447  */
2448 static void __exit i7core_exit(void)
2449 {
2450 	edac_dbg(2, "\n");
2451 	pci_unregister_driver(&i7core_driver);
2452 	mce_unregister_decode_chain(&i7_mce_dec);
2453 }
2454 
2455 module_init(i7core_init);
2456 module_exit(i7core_exit);
2457 
2458 MODULE_LICENSE("GPL");
2459 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2460 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2461 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2462 		   I7CORE_REVISION);
2463 
2464 module_param(edac_op_state, int, 0444);
2465 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2466