xref: /openbmc/linux/drivers/edac/edac_mc.c (revision 95e9fd10)
1 /*
2  * edac_mc kernel module
3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
9  *	http://www.anime.net/~goemon/linux-ecc/
10  *
11  * Modified by Dave Peterson and Doug Thompson
12  *
13  */
14 
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/ctype.h>
29 #include <linux/edac.h>
30 #include <linux/bitops.h>
31 #include <asm/uaccess.h>
32 #include <asm/page.h>
33 #include <asm/edac.h>
34 #include "edac_core.h"
35 #include "edac_module.h"
36 
37 #define CREATE_TRACE_POINTS
38 #define TRACE_INCLUDE_PATH ../../include/ras
39 #include <ras/ras_event.h>
40 
41 /* lock to memory controller's control array */
42 static DEFINE_MUTEX(mem_ctls_mutex);
43 static LIST_HEAD(mc_devices);
44 
45 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
46 			         unsigned len)
47 {
48 	struct mem_ctl_info *mci = dimm->mci;
49 	int i, n, count = 0;
50 	char *p = buf;
51 
52 	for (i = 0; i < mci->n_layers; i++) {
53 		n = snprintf(p, len, "%s %d ",
54 			      edac_layer_name[mci->layers[i].type],
55 			      dimm->location[i]);
56 		p += n;
57 		len -= n;
58 		count += n;
59 		if (!len)
60 			break;
61 	}
62 
63 	return count;
64 }
65 
66 #ifdef CONFIG_EDAC_DEBUG
67 
68 static void edac_mc_dump_channel(struct rank_info *chan)
69 {
70 	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
71 	edac_dbg(4, "    channel = %p\n", chan);
72 	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
73 	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
74 }
75 
76 static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
77 {
78 	char location[80];
79 
80 	edac_dimm_info_location(dimm, location, sizeof(location));
81 
82 	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
83 		 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
84 		 number, location, dimm->csrow, dimm->cschannel);
85 	edac_dbg(4, "  dimm = %p\n", dimm);
86 	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
87 	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
88 	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
89 	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
90 }
91 
92 static void edac_mc_dump_csrow(struct csrow_info *csrow)
93 {
94 	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
95 	edac_dbg(4, "  csrow = %p\n", csrow);
96 	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
97 	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
98 	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
99 	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
100 	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
101 	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
102 }
103 
104 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
105 {
106 	edac_dbg(3, "\tmci = %p\n", mci);
107 	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
108 	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
109 	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
110 	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
111 	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
112 		 mci->nr_csrows, mci->csrows);
113 	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
114 		 mci->tot_dimms, mci->dimms);
115 	edac_dbg(3, "\tdev = %p\n", mci->pdev);
116 	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
117 		 mci->mod_name, mci->ctl_name);
118 	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
119 }
120 
121 #endif				/* CONFIG_EDAC_DEBUG */
122 
123 /*
124  * keep those in sync with the enum mem_type
125  */
126 const char *edac_mem_types[] = {
127 	"Empty csrow",
128 	"Reserved csrow type",
129 	"Unknown csrow type",
130 	"Fast page mode RAM",
131 	"Extended data out RAM",
132 	"Burst Extended data out RAM",
133 	"Single data rate SDRAM",
134 	"Registered single data rate SDRAM",
135 	"Double data rate SDRAM",
136 	"Registered Double data rate SDRAM",
137 	"Rambus DRAM",
138 	"Unbuffered DDR2 RAM",
139 	"Fully buffered DDR2",
140 	"Registered DDR2 RAM",
141 	"Rambus XDR",
142 	"Unbuffered DDR3 RAM",
143 	"Registered DDR3 RAM",
144 };
145 EXPORT_SYMBOL_GPL(edac_mem_types);
146 
147 /**
148  * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
149  * @p:		pointer to a pointer with the memory offset to be used. At
150  *		return, this will be incremented to point to the next offset
151  * @size:	Size of the data structure to be reserved
152  * @n_elems:	Number of elements that should be reserved
153  *
154  * If 'size' is a constant, the compiler will optimize this whole function
155  * down to either a no-op or the addition of a constant to the value of '*p'.
156  *
157  * The 'p' pointer is absolutely needed to keep the proper advancing
158  * further in memory to the proper offsets when allocating the struct along
159  * with its embedded structs, as edac_device_alloc_ctl_info() does it
160  * above, for example.
161  *
162  * At return, the pointer 'p' will be incremented to be used on a next call
163  * to this function.
164  */
165 void *edac_align_ptr(void **p, unsigned size, int n_elems)
166 {
167 	unsigned align, r;
168 	void *ptr = *p;
169 
170 	*p += size * n_elems;
171 
172 	/*
173 	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
174 	 * 'size'.  Adjust 'p' so that its alignment is at least as
175 	 * stringent as what the compiler would provide for X and return
176 	 * the aligned result.
177 	 * Here we assume that the alignment of a "long long" is the most
178 	 * stringent alignment that the compiler will ever provide by default.
179 	 * As far as I know, this is a reasonable assumption.
180 	 */
181 	if (size > sizeof(long))
182 		align = sizeof(long long);
183 	else if (size > sizeof(int))
184 		align = sizeof(long);
185 	else if (size > sizeof(short))
186 		align = sizeof(int);
187 	else if (size > sizeof(char))
188 		align = sizeof(short);
189 	else
190 		return (char *)ptr;
191 
192 	r = (unsigned long)p % align;
193 
194 	if (r == 0)
195 		return (char *)ptr;
196 
197 	*p += align - r;
198 
199 	return (void *)(((unsigned long)ptr) + align - r);
200 }
201 
202 /**
203  * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
204  * @mc_num:		Memory controller number
205  * @n_layers:		Number of MC hierarchy layers
206  * layers:		Describes each layer as seen by the Memory Controller
207  * @size_pvt:		size of private storage needed
208  *
209  *
210  * Everything is kmalloc'ed as one big chunk - more efficient.
211  * Only can be used if all structures have the same lifetime - otherwise
212  * you have to allocate and initialize your own structures.
213  *
214  * Use edac_mc_free() to free mc structures allocated by this function.
215  *
216  * NOTE: drivers handle multi-rank memories in different ways: in some
217  * drivers, one multi-rank memory stick is mapped as one entry, while, in
218  * others, a single multi-rank memory stick would be mapped into several
219  * entries. Currently, this function will allocate multiple struct dimm_info
220  * on such scenarios, as grouping the multiple ranks require drivers change.
221  *
222  * Returns:
223  *	On failure: NULL
224  *	On success: struct mem_ctl_info pointer
225  */
226 struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
227 				   unsigned n_layers,
228 				   struct edac_mc_layer *layers,
229 				   unsigned sz_pvt)
230 {
231 	struct mem_ctl_info *mci;
232 	struct edac_mc_layer *layer;
233 	struct csrow_info *csr;
234 	struct rank_info *chan;
235 	struct dimm_info *dimm;
236 	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
237 	unsigned pos[EDAC_MAX_LAYERS];
238 	unsigned size, tot_dimms = 1, count = 1;
239 	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
240 	void *pvt, *p, *ptr = NULL;
241 	int i, j, row, chn, n, len, off;
242 	bool per_rank = false;
243 
244 	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
245 	/*
246 	 * Calculate the total amount of dimms and csrows/cschannels while
247 	 * in the old API emulation mode
248 	 */
249 	for (i = 0; i < n_layers; i++) {
250 		tot_dimms *= layers[i].size;
251 		if (layers[i].is_virt_csrow)
252 			tot_csrows *= layers[i].size;
253 		else
254 			tot_channels *= layers[i].size;
255 
256 		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
257 			per_rank = true;
258 	}
259 
260 	/* Figure out the offsets of the various items from the start of an mc
261 	 * structure.  We want the alignment of each item to be at least as
262 	 * stringent as what the compiler would provide if we could simply
263 	 * hardcode everything into a single struct.
264 	 */
265 	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
266 	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
267 	for (i = 0; i < n_layers; i++) {
268 		count *= layers[i].size;
269 		edac_dbg(4, "errcount layer %d size %d\n", i, count);
270 		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
271 		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
272 		tot_errcount += 2 * count;
273 	}
274 
275 	edac_dbg(4, "allocating %d error counters\n", tot_errcount);
276 	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
277 	size = ((unsigned long)pvt) + sz_pvt;
278 
279 	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
280 		 size,
281 		 tot_dimms,
282 		 per_rank ? "ranks" : "dimms",
283 		 tot_csrows * tot_channels);
284 
285 	mci = kzalloc(size, GFP_KERNEL);
286 	if (mci == NULL)
287 		return NULL;
288 
289 	/* Adjust pointers so they point within the memory we just allocated
290 	 * rather than an imaginary chunk of memory located at address 0.
291 	 */
292 	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
293 	for (i = 0; i < n_layers; i++) {
294 		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
295 		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
296 	}
297 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
298 
299 	/* setup index and various internal pointers */
300 	mci->mc_idx = mc_num;
301 	mci->tot_dimms = tot_dimms;
302 	mci->pvt_info = pvt;
303 	mci->n_layers = n_layers;
304 	mci->layers = layer;
305 	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
306 	mci->nr_csrows = tot_csrows;
307 	mci->num_cschannel = tot_channels;
308 	mci->mem_is_per_rank = per_rank;
309 
310 	/*
311 	 * Alocate and fill the csrow/channels structs
312 	 */
313 	mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL);
314 	if (!mci->csrows)
315 		goto error;
316 	for (row = 0; row < tot_csrows; row++) {
317 		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
318 		if (!csr)
319 			goto error;
320 		mci->csrows[row] = csr;
321 		csr->csrow_idx = row;
322 		csr->mci = mci;
323 		csr->nr_channels = tot_channels;
324 		csr->channels = kcalloc(sizeof(*csr->channels), tot_channels,
325 					GFP_KERNEL);
326 		if (!csr->channels)
327 			goto error;
328 
329 		for (chn = 0; chn < tot_channels; chn++) {
330 			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
331 			if (!chan)
332 				goto error;
333 			csr->channels[chn] = chan;
334 			chan->chan_idx = chn;
335 			chan->csrow = csr;
336 		}
337 	}
338 
339 	/*
340 	 * Allocate and fill the dimm structs
341 	 */
342 	mci->dimms  = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL);
343 	if (!mci->dimms)
344 		goto error;
345 
346 	memset(&pos, 0, sizeof(pos));
347 	row = 0;
348 	chn = 0;
349 	for (i = 0; i < tot_dimms; i++) {
350 		chan = mci->csrows[row]->channels[chn];
351 		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
352 		if (off < 0 || off >= tot_dimms) {
353 			edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
354 			goto error;
355 		}
356 
357 		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
358 		if (!dimm)
359 			goto error;
360 		mci->dimms[off] = dimm;
361 		dimm->mci = mci;
362 
363 		/*
364 		 * Copy DIMM location and initialize it.
365 		 */
366 		len = sizeof(dimm->label);
367 		p = dimm->label;
368 		n = snprintf(p, len, "mc#%u", mc_num);
369 		p += n;
370 		len -= n;
371 		for (j = 0; j < n_layers; j++) {
372 			n = snprintf(p, len, "%s#%u",
373 				     edac_layer_name[layers[j].type],
374 				     pos[j]);
375 			p += n;
376 			len -= n;
377 			dimm->location[j] = pos[j];
378 
379 			if (len <= 0)
380 				break;
381 		}
382 
383 		/* Link it to the csrows old API data */
384 		chan->dimm = dimm;
385 		dimm->csrow = row;
386 		dimm->cschannel = chn;
387 
388 		/* Increment csrow location */
389 		row++;
390 		if (row == tot_csrows) {
391 			row = 0;
392 			chn++;
393 		}
394 
395 		/* Increment dimm location */
396 		for (j = n_layers - 1; j >= 0; j--) {
397 			pos[j]++;
398 			if (pos[j] < layers[j].size)
399 				break;
400 			pos[j] = 0;
401 		}
402 	}
403 
404 	mci->op_state = OP_ALLOC;
405 
406 	/* at this point, the root kobj is valid, and in order to
407 	 * 'free' the object, then the function:
408 	 *      edac_mc_unregister_sysfs_main_kobj() must be called
409 	 * which will perform kobj unregistration and the actual free
410 	 * will occur during the kobject callback operation
411 	 */
412 
413 	return mci;
414 
415 error:
416 	if (mci->dimms) {
417 		for (i = 0; i < tot_dimms; i++)
418 			kfree(mci->dimms[i]);
419 		kfree(mci->dimms);
420 	}
421 	if (mci->csrows) {
422 		for (chn = 0; chn < tot_channels; chn++) {
423 			csr = mci->csrows[chn];
424 			if (csr) {
425 				for (chn = 0; chn < tot_channels; chn++)
426 					kfree(csr->channels[chn]);
427 				kfree(csr);
428 			}
429 			kfree(mci->csrows[i]);
430 		}
431 		kfree(mci->csrows);
432 	}
433 	kfree(mci);
434 
435 	return NULL;
436 }
437 EXPORT_SYMBOL_GPL(edac_mc_alloc);
438 
439 /**
440  * edac_mc_free
441  *	'Free' a previously allocated 'mci' structure
442  * @mci: pointer to a struct mem_ctl_info structure
443  */
444 void edac_mc_free(struct mem_ctl_info *mci)
445 {
446 	edac_dbg(1, "\n");
447 
448 	/* the mci instance is freed here, when the sysfs object is dropped */
449 	edac_unregister_sysfs(mci);
450 }
451 EXPORT_SYMBOL_GPL(edac_mc_free);
452 
453 
454 /**
455  * find_mci_by_dev
456  *
457  *	scan list of controllers looking for the one that manages
458  *	the 'dev' device
459  * @dev: pointer to a struct device related with the MCI
460  */
461 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
462 {
463 	struct mem_ctl_info *mci;
464 	struct list_head *item;
465 
466 	edac_dbg(3, "\n");
467 
468 	list_for_each(item, &mc_devices) {
469 		mci = list_entry(item, struct mem_ctl_info, link);
470 
471 		if (mci->pdev == dev)
472 			return mci;
473 	}
474 
475 	return NULL;
476 }
477 EXPORT_SYMBOL_GPL(find_mci_by_dev);
478 
479 /*
480  * handler for EDAC to check if NMI type handler has asserted interrupt
481  */
482 static int edac_mc_assert_error_check_and_clear(void)
483 {
484 	int old_state;
485 
486 	if (edac_op_state == EDAC_OPSTATE_POLL)
487 		return 1;
488 
489 	old_state = edac_err_assert;
490 	edac_err_assert = 0;
491 
492 	return old_state;
493 }
494 
495 /*
496  * edac_mc_workq_function
497  *	performs the operation scheduled by a workq request
498  */
499 static void edac_mc_workq_function(struct work_struct *work_req)
500 {
501 	struct delayed_work *d_work = to_delayed_work(work_req);
502 	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
503 
504 	mutex_lock(&mem_ctls_mutex);
505 
506 	/* if this control struct has movd to offline state, we are done */
507 	if (mci->op_state == OP_OFFLINE) {
508 		mutex_unlock(&mem_ctls_mutex);
509 		return;
510 	}
511 
512 	/* Only poll controllers that are running polled and have a check */
513 	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
514 		mci->edac_check(mci);
515 
516 	mutex_unlock(&mem_ctls_mutex);
517 
518 	/* Reschedule */
519 	queue_delayed_work(edac_workqueue, &mci->work,
520 			msecs_to_jiffies(edac_mc_get_poll_msec()));
521 }
522 
523 /*
524  * edac_mc_workq_setup
525  *	initialize a workq item for this mci
526  *	passing in the new delay period in msec
527  *
528  *	locking model:
529  *
530  *		called with the mem_ctls_mutex held
531  */
532 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
533 {
534 	edac_dbg(0, "\n");
535 
536 	/* if this instance is not in the POLL state, then simply return */
537 	if (mci->op_state != OP_RUNNING_POLL)
538 		return;
539 
540 	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
541 	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
542 }
543 
544 /*
545  * edac_mc_workq_teardown
546  *	stop the workq processing on this mci
547  *
548  *	locking model:
549  *
550  *		called WITHOUT lock held
551  */
552 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
553 {
554 	int status;
555 
556 	if (mci->op_state != OP_RUNNING_POLL)
557 		return;
558 
559 	status = cancel_delayed_work(&mci->work);
560 	if (status == 0) {
561 		edac_dbg(0, "not canceled, flush the queue\n");
562 
563 		/* workq instance might be running, wait for it */
564 		flush_workqueue(edac_workqueue);
565 	}
566 }
567 
568 /*
569  * edac_mc_reset_delay_period(unsigned long value)
570  *
571  *	user space has updated our poll period value, need to
572  *	reset our workq delays
573  */
574 void edac_mc_reset_delay_period(int value)
575 {
576 	struct mem_ctl_info *mci;
577 	struct list_head *item;
578 
579 	mutex_lock(&mem_ctls_mutex);
580 
581 	/* scan the list and turn off all workq timers, doing so under lock
582 	 */
583 	list_for_each(item, &mc_devices) {
584 		mci = list_entry(item, struct mem_ctl_info, link);
585 
586 		if (mci->op_state == OP_RUNNING_POLL)
587 			cancel_delayed_work(&mci->work);
588 	}
589 
590 	mutex_unlock(&mem_ctls_mutex);
591 
592 
593 	/* re-walk the list, and reset the poll delay */
594 	mutex_lock(&mem_ctls_mutex);
595 
596 	list_for_each(item, &mc_devices) {
597 		mci = list_entry(item, struct mem_ctl_info, link);
598 
599 		edac_mc_workq_setup(mci, (unsigned long) value);
600 	}
601 
602 	mutex_unlock(&mem_ctls_mutex);
603 }
604 
605 
606 
607 /* Return 0 on success, 1 on failure.
608  * Before calling this function, caller must
609  * assign a unique value to mci->mc_idx.
610  *
611  *	locking model:
612  *
613  *		called with the mem_ctls_mutex lock held
614  */
615 static int add_mc_to_global_list(struct mem_ctl_info *mci)
616 {
617 	struct list_head *item, *insert_before;
618 	struct mem_ctl_info *p;
619 
620 	insert_before = &mc_devices;
621 
622 	p = find_mci_by_dev(mci->pdev);
623 	if (unlikely(p != NULL))
624 		goto fail0;
625 
626 	list_for_each(item, &mc_devices) {
627 		p = list_entry(item, struct mem_ctl_info, link);
628 
629 		if (p->mc_idx >= mci->mc_idx) {
630 			if (unlikely(p->mc_idx == mci->mc_idx))
631 				goto fail1;
632 
633 			insert_before = item;
634 			break;
635 		}
636 	}
637 
638 	list_add_tail_rcu(&mci->link, insert_before);
639 	atomic_inc(&edac_handlers);
640 	return 0;
641 
642 fail0:
643 	edac_printk(KERN_WARNING, EDAC_MC,
644 		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
645 		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
646 	return 1;
647 
648 fail1:
649 	edac_printk(KERN_WARNING, EDAC_MC,
650 		"bug in low-level driver: attempt to assign\n"
651 		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
652 	return 1;
653 }
654 
655 static void del_mc_from_global_list(struct mem_ctl_info *mci)
656 {
657 	atomic_dec(&edac_handlers);
658 	list_del_rcu(&mci->link);
659 
660 	/* these are for safe removal of devices from global list while
661 	 * NMI handlers may be traversing list
662 	 */
663 	synchronize_rcu();
664 	INIT_LIST_HEAD(&mci->link);
665 }
666 
667 /**
668  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
669  *
670  * If found, return a pointer to the structure.
671  * Else return NULL.
672  *
673  * Caller must hold mem_ctls_mutex.
674  */
675 struct mem_ctl_info *edac_mc_find(int idx)
676 {
677 	struct list_head *item;
678 	struct mem_ctl_info *mci;
679 
680 	list_for_each(item, &mc_devices) {
681 		mci = list_entry(item, struct mem_ctl_info, link);
682 
683 		if (mci->mc_idx >= idx) {
684 			if (mci->mc_idx == idx)
685 				return mci;
686 
687 			break;
688 		}
689 	}
690 
691 	return NULL;
692 }
693 EXPORT_SYMBOL(edac_mc_find);
694 
695 /**
696  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
697  *                 create sysfs entries associated with mci structure
698  * @mci: pointer to the mci structure to be added to the list
699  *
700  * Return:
701  *	0	Success
702  *	!0	Failure
703  */
704 
705 /* FIXME - should a warning be printed if no error detection? correction? */
706 int edac_mc_add_mc(struct mem_ctl_info *mci)
707 {
708 	edac_dbg(0, "\n");
709 
710 #ifdef CONFIG_EDAC_DEBUG
711 	if (edac_debug_level >= 3)
712 		edac_mc_dump_mci(mci);
713 
714 	if (edac_debug_level >= 4) {
715 		int i;
716 
717 		for (i = 0; i < mci->nr_csrows; i++) {
718 			struct csrow_info *csrow = mci->csrows[i];
719 			u32 nr_pages = 0;
720 			int j;
721 
722 			for (j = 0; j < csrow->nr_channels; j++)
723 				nr_pages += csrow->channels[j]->dimm->nr_pages;
724 			if (!nr_pages)
725 				continue;
726 			edac_mc_dump_csrow(csrow);
727 			for (j = 0; j < csrow->nr_channels; j++)
728 				if (csrow->channels[j]->dimm->nr_pages)
729 					edac_mc_dump_channel(csrow->channels[j]);
730 		}
731 		for (i = 0; i < mci->tot_dimms; i++)
732 			if (mci->dimms[i]->nr_pages)
733 				edac_mc_dump_dimm(mci->dimms[i], i);
734 	}
735 #endif
736 	mutex_lock(&mem_ctls_mutex);
737 
738 	if (add_mc_to_global_list(mci))
739 		goto fail0;
740 
741 	/* set load time so that error rate can be tracked */
742 	mci->start_time = jiffies;
743 
744 	if (edac_create_sysfs_mci_device(mci)) {
745 		edac_mc_printk(mci, KERN_WARNING,
746 			"failed to create sysfs device\n");
747 		goto fail1;
748 	}
749 
750 	/* If there IS a check routine, then we are running POLLED */
751 	if (mci->edac_check != NULL) {
752 		/* This instance is NOW RUNNING */
753 		mci->op_state = OP_RUNNING_POLL;
754 
755 		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
756 	} else {
757 		mci->op_state = OP_RUNNING_INTERRUPT;
758 	}
759 
760 	/* Report action taken */
761 	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
762 		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
763 
764 	mutex_unlock(&mem_ctls_mutex);
765 	return 0;
766 
767 fail1:
768 	del_mc_from_global_list(mci);
769 
770 fail0:
771 	mutex_unlock(&mem_ctls_mutex);
772 	return 1;
773 }
774 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
775 
776 /**
777  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
778  *                 remove mci structure from global list
779  * @pdev: Pointer to 'struct device' representing mci structure to remove.
780  *
781  * Return pointer to removed mci structure, or NULL if device not found.
782  */
783 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
784 {
785 	struct mem_ctl_info *mci;
786 
787 	edac_dbg(0, "\n");
788 
789 	mutex_lock(&mem_ctls_mutex);
790 
791 	/* find the requested mci struct in the global list */
792 	mci = find_mci_by_dev(dev);
793 	if (mci == NULL) {
794 		mutex_unlock(&mem_ctls_mutex);
795 		return NULL;
796 	}
797 
798 	del_mc_from_global_list(mci);
799 	mutex_unlock(&mem_ctls_mutex);
800 
801 	/* flush workq processes */
802 	edac_mc_workq_teardown(mci);
803 
804 	/* marking MCI offline */
805 	mci->op_state = OP_OFFLINE;
806 
807 	/* remove from sysfs */
808 	edac_remove_sysfs_mci_device(mci);
809 
810 	edac_printk(KERN_INFO, EDAC_MC,
811 		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
812 		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
813 
814 	return mci;
815 }
816 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
817 
818 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
819 				u32 size)
820 {
821 	struct page *pg;
822 	void *virt_addr;
823 	unsigned long flags = 0;
824 
825 	edac_dbg(3, "\n");
826 
827 	/* ECC error page was not in our memory. Ignore it. */
828 	if (!pfn_valid(page))
829 		return;
830 
831 	/* Find the actual page structure then map it and fix */
832 	pg = pfn_to_page(page);
833 
834 	if (PageHighMem(pg))
835 		local_irq_save(flags);
836 
837 	virt_addr = kmap_atomic(pg);
838 
839 	/* Perform architecture specific atomic scrub operation */
840 	atomic_scrub(virt_addr + offset, size);
841 
842 	/* Unmap and complete */
843 	kunmap_atomic(virt_addr);
844 
845 	if (PageHighMem(pg))
846 		local_irq_restore(flags);
847 }
848 
849 /* FIXME - should return -1 */
850 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
851 {
852 	struct csrow_info **csrows = mci->csrows;
853 	int row, i, j, n;
854 
855 	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
856 	row = -1;
857 
858 	for (i = 0; i < mci->nr_csrows; i++) {
859 		struct csrow_info *csrow = csrows[i];
860 		n = 0;
861 		for (j = 0; j < csrow->nr_channels; j++) {
862 			struct dimm_info *dimm = csrow->channels[j]->dimm;
863 			n += dimm->nr_pages;
864 		}
865 		if (n == 0)
866 			continue;
867 
868 		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
869 			 mci->mc_idx,
870 			 csrow->first_page, page, csrow->last_page,
871 			 csrow->page_mask);
872 
873 		if ((page >= csrow->first_page) &&
874 		    (page <= csrow->last_page) &&
875 		    ((page & csrow->page_mask) ==
876 		     (csrow->first_page & csrow->page_mask))) {
877 			row = i;
878 			break;
879 		}
880 	}
881 
882 	if (row == -1)
883 		edac_mc_printk(mci, KERN_ERR,
884 			"could not look up page error address %lx\n",
885 			(unsigned long)page);
886 
887 	return row;
888 }
889 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
890 
891 const char *edac_layer_name[] = {
892 	[EDAC_MC_LAYER_BRANCH] = "branch",
893 	[EDAC_MC_LAYER_CHANNEL] = "channel",
894 	[EDAC_MC_LAYER_SLOT] = "slot",
895 	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
896 };
897 EXPORT_SYMBOL_GPL(edac_layer_name);
898 
899 static void edac_inc_ce_error(struct mem_ctl_info *mci,
900 			      bool enable_per_layer_report,
901 			      const int pos[EDAC_MAX_LAYERS],
902 			      const u16 count)
903 {
904 	int i, index = 0;
905 
906 	mci->ce_mc += count;
907 
908 	if (!enable_per_layer_report) {
909 		mci->ce_noinfo_count += count;
910 		return;
911 	}
912 
913 	for (i = 0; i < mci->n_layers; i++) {
914 		if (pos[i] < 0)
915 			break;
916 		index += pos[i];
917 		mci->ce_per_layer[i][index] += count;
918 
919 		if (i < mci->n_layers - 1)
920 			index *= mci->layers[i + 1].size;
921 	}
922 }
923 
924 static void edac_inc_ue_error(struct mem_ctl_info *mci,
925 				    bool enable_per_layer_report,
926 				    const int pos[EDAC_MAX_LAYERS],
927 				    const u16 count)
928 {
929 	int i, index = 0;
930 
931 	mci->ue_mc += count;
932 
933 	if (!enable_per_layer_report) {
934 		mci->ce_noinfo_count += count;
935 		return;
936 	}
937 
938 	for (i = 0; i < mci->n_layers; i++) {
939 		if (pos[i] < 0)
940 			break;
941 		index += pos[i];
942 		mci->ue_per_layer[i][index] += count;
943 
944 		if (i < mci->n_layers - 1)
945 			index *= mci->layers[i + 1].size;
946 	}
947 }
948 
949 static void edac_ce_error(struct mem_ctl_info *mci,
950 			  const u16 error_count,
951 			  const int pos[EDAC_MAX_LAYERS],
952 			  const char *msg,
953 			  const char *location,
954 			  const char *label,
955 			  const char *detail,
956 			  const char *other_detail,
957 			  const bool enable_per_layer_report,
958 			  const unsigned long page_frame_number,
959 			  const unsigned long offset_in_page,
960 			  long grain)
961 {
962 	unsigned long remapped_page;
963 
964 	if (edac_mc_get_log_ce()) {
965 		if (other_detail && *other_detail)
966 			edac_mc_printk(mci, KERN_WARNING,
967 				       "%d CE %s on %s (%s %s - %s)\n",
968 				       error_count,
969 				       msg, label, location,
970 				       detail, other_detail);
971 		else
972 			edac_mc_printk(mci, KERN_WARNING,
973 				       "%d CE %s on %s (%s %s)\n",
974 				       error_count,
975 				       msg, label, location,
976 				       detail);
977 	}
978 	edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
979 
980 	if (mci->scrub_mode & SCRUB_SW_SRC) {
981 		/*
982 			* Some memory controllers (called MCs below) can remap
983 			* memory so that it is still available at a different
984 			* address when PCI devices map into memory.
985 			* MC's that can't do this, lose the memory where PCI
986 			* devices are mapped. This mapping is MC-dependent
987 			* and so we call back into the MC driver for it to
988 			* map the MC page to a physical (CPU) page which can
989 			* then be mapped to a virtual page - which can then
990 			* be scrubbed.
991 			*/
992 		remapped_page = mci->ctl_page_to_phys ?
993 			mci->ctl_page_to_phys(mci, page_frame_number) :
994 			page_frame_number;
995 
996 		edac_mc_scrub_block(remapped_page,
997 					offset_in_page, grain);
998 	}
999 }
1000 
1001 static void edac_ue_error(struct mem_ctl_info *mci,
1002 			  const u16 error_count,
1003 			  const int pos[EDAC_MAX_LAYERS],
1004 			  const char *msg,
1005 			  const char *location,
1006 			  const char *label,
1007 			  const char *detail,
1008 			  const char *other_detail,
1009 			  const bool enable_per_layer_report)
1010 {
1011 	if (edac_mc_get_log_ue()) {
1012 		if (other_detail && *other_detail)
1013 			edac_mc_printk(mci, KERN_WARNING,
1014 				       "%d UE %s on %s (%s %s - %s)\n",
1015 				       error_count,
1016 			               msg, label, location, detail,
1017 				       other_detail);
1018 		else
1019 			edac_mc_printk(mci, KERN_WARNING,
1020 				       "%d UE %s on %s (%s %s)\n",
1021 				       error_count,
1022 			               msg, label, location, detail);
1023 	}
1024 
1025 	if (edac_mc_get_panic_on_ue()) {
1026 		if (other_detail && *other_detail)
1027 			panic("UE %s on %s (%s%s - %s)\n",
1028 			      msg, label, location, detail, other_detail);
1029 		else
1030 			panic("UE %s on %s (%s%s)\n",
1031 			      msg, label, location, detail);
1032 	}
1033 
1034 	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1035 }
1036 
1037 #define OTHER_LABEL " or "
1038 
1039 /**
1040  * edac_mc_handle_error - reports a memory event to userspace
1041  *
1042  * @type:		severity of the error (CE/UE/Fatal)
1043  * @mci:		a struct mem_ctl_info pointer
1044  * @error_count:	Number of errors of the same type
1045  * @page_frame_number:	mem page where the error occurred
1046  * @offset_in_page:	offset of the error inside the page
1047  * @syndrome:		ECC syndrome
1048  * @top_layer:		Memory layer[0] position
1049  * @mid_layer:		Memory layer[1] position
1050  * @low_layer:		Memory layer[2] position
1051  * @msg:		Message meaningful to the end users that
1052  *			explains the event
1053  * @other_detail:	Technical details about the event that
1054  *			may help hardware manufacturers and
1055  *			EDAC developers to analyse the event
1056  */
1057 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1058 			  struct mem_ctl_info *mci,
1059 			  const u16 error_count,
1060 			  const unsigned long page_frame_number,
1061 			  const unsigned long offset_in_page,
1062 			  const unsigned long syndrome,
1063 			  const int top_layer,
1064 			  const int mid_layer,
1065 			  const int low_layer,
1066 			  const char *msg,
1067 			  const char *other_detail)
1068 {
1069 	/* FIXME: too much for stack: move it to some pre-alocated area */
1070 	char detail[80], location[80];
1071 	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1072 	char *p;
1073 	int row = -1, chan = -1;
1074 	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1075 	int i;
1076 	long grain;
1077 	bool enable_per_layer_report = false;
1078 	u8 grain_bits;
1079 
1080 	edac_dbg(3, "MC%d\n", mci->mc_idx);
1081 
1082 	/*
1083 	 * Check if the event report is consistent and if the memory
1084 	 * location is known. If it is known, enable_per_layer_report will be
1085 	 * true, the DIMM(s) label info will be filled and the per-layer
1086 	 * error counters will be incremented.
1087 	 */
1088 	for (i = 0; i < mci->n_layers; i++) {
1089 		if (pos[i] >= (int)mci->layers[i].size) {
1090 			if (type == HW_EVENT_ERR_CORRECTED)
1091 				p = "CE";
1092 			else
1093 				p = "UE";
1094 
1095 			edac_mc_printk(mci, KERN_ERR,
1096 				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1097 				       edac_layer_name[mci->layers[i].type],
1098 				       pos[i], mci->layers[i].size);
1099 			/*
1100 			 * Instead of just returning it, let's use what's
1101 			 * known about the error. The increment routines and
1102 			 * the DIMM filter logic will do the right thing by
1103 			 * pointing the likely damaged DIMMs.
1104 			 */
1105 			pos[i] = -1;
1106 		}
1107 		if (pos[i] >= 0)
1108 			enable_per_layer_report = true;
1109 	}
1110 
1111 	/*
1112 	 * Get the dimm label/grain that applies to the match criteria.
1113 	 * As the error algorithm may not be able to point to just one memory
1114 	 * stick, the logic here will get all possible labels that could
1115 	 * pottentially be affected by the error.
1116 	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1117 	 * to have only the MC channel and the MC dimm (also called "branch")
1118 	 * but the channel is not known, as the memory is arranged in pairs,
1119 	 * where each memory belongs to a separate channel within the same
1120 	 * branch.
1121 	 */
1122 	grain = 0;
1123 	p = label;
1124 	*p = '\0';
1125 	for (i = 0; i < mci->tot_dimms; i++) {
1126 		struct dimm_info *dimm = mci->dimms[i];
1127 
1128 		if (top_layer >= 0 && top_layer != dimm->location[0])
1129 			continue;
1130 		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1131 			continue;
1132 		if (low_layer >= 0 && low_layer != dimm->location[2])
1133 			continue;
1134 
1135 		/* get the max grain, over the error match range */
1136 		if (dimm->grain > grain)
1137 			grain = dimm->grain;
1138 
1139 		/*
1140 		 * If the error is memory-controller wide, there's no need to
1141 		 * seek for the affected DIMMs because the whole
1142 		 * channel/memory controller/...  may be affected.
1143 		 * Also, don't show errors for empty DIMM slots.
1144 		 */
1145 		if (enable_per_layer_report && dimm->nr_pages) {
1146 			if (p != label) {
1147 				strcpy(p, OTHER_LABEL);
1148 				p += strlen(OTHER_LABEL);
1149 			}
1150 			strcpy(p, dimm->label);
1151 			p += strlen(p);
1152 			*p = '\0';
1153 
1154 			/*
1155 			 * get csrow/channel of the DIMM, in order to allow
1156 			 * incrementing the compat API counters
1157 			 */
1158 			edac_dbg(4, "%s csrows map: (%d,%d)\n",
1159 				 mci->mem_is_per_rank ? "rank" : "dimm",
1160 				 dimm->csrow, dimm->cschannel);
1161 			if (row == -1)
1162 				row = dimm->csrow;
1163 			else if (row >= 0 && row != dimm->csrow)
1164 				row = -2;
1165 
1166 			if (chan == -1)
1167 				chan = dimm->cschannel;
1168 			else if (chan >= 0 && chan != dimm->cschannel)
1169 				chan = -2;
1170 		}
1171 	}
1172 
1173 	if (!enable_per_layer_report) {
1174 		strcpy(label, "any memory");
1175 	} else {
1176 		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1177 		if (p == label)
1178 			strcpy(label, "unknown memory");
1179 		if (type == HW_EVENT_ERR_CORRECTED) {
1180 			if (row >= 0) {
1181 				mci->csrows[row]->ce_count += error_count;
1182 				if (chan >= 0)
1183 					mci->csrows[row]->channels[chan]->ce_count += error_count;
1184 			}
1185 		} else
1186 			if (row >= 0)
1187 				mci->csrows[row]->ue_count += error_count;
1188 	}
1189 
1190 	/* Fill the RAM location data */
1191 	p = location;
1192 	for (i = 0; i < mci->n_layers; i++) {
1193 		if (pos[i] < 0)
1194 			continue;
1195 
1196 		p += sprintf(p, "%s:%d ",
1197 			     edac_layer_name[mci->layers[i].type],
1198 			     pos[i]);
1199 	}
1200 	if (p > location)
1201 		*(p - 1) = '\0';
1202 
1203 	/* Report the error via the trace interface */
1204 
1205 	grain_bits = fls_long(grain) + 1;
1206 	trace_mc_event(type, msg, label, error_count,
1207 		       mci->mc_idx, top_layer, mid_layer, low_layer,
1208 		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
1209 		       grain_bits, syndrome, other_detail);
1210 
1211 	/* Memory type dependent details about the error */
1212 	if (type == HW_EVENT_ERR_CORRECTED) {
1213 		snprintf(detail, sizeof(detail),
1214 			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1215 			page_frame_number, offset_in_page,
1216 			grain, syndrome);
1217 		edac_ce_error(mci, error_count, pos, msg, location, label,
1218 			      detail, other_detail, enable_per_layer_report,
1219 			      page_frame_number, offset_in_page, grain);
1220 	} else {
1221 		snprintf(detail, sizeof(detail),
1222 			"page:0x%lx offset:0x%lx grain:%ld",
1223 			page_frame_number, offset_in_page, grain);
1224 
1225 		edac_ue_error(mci, error_count, pos, msg, location, label,
1226 			      detail, other_detail, enable_per_layer_report);
1227 	}
1228 }
1229 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1230