xref: /openbmc/linux/drivers/edac/edac_mc.c (revision a09d2831)
1 /*
2  * edac_mc kernel module
3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
9  *	http://www.anime.net/~goemon/linux-ecc/
10  *
11  * Modified by Dave Peterson and Doug Thompson
12  *
13  */
14 
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/sysdev.h>
29 #include <linux/ctype.h>
30 #include <linux/edac.h>
31 #include <asm/uaccess.h>
32 #include <asm/page.h>
33 #include <asm/edac.h>
34 #include "edac_core.h"
35 #include "edac_module.h"
36 
37 /* lock to memory controller's control array */
38 static DEFINE_MUTEX(mem_ctls_mutex);
39 static LIST_HEAD(mc_devices);
40 
41 #ifdef CONFIG_EDAC_DEBUG
42 
43 static void edac_mc_dump_channel(struct channel_info *chan)
44 {
45 	debugf4("\tchannel = %p\n", chan);
46 	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
47 	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
48 	debugf4("\tchannel->label = '%s'\n", chan->label);
49 	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
50 }
51 
52 static void edac_mc_dump_csrow(struct csrow_info *csrow)
53 {
54 	debugf4("\tcsrow = %p\n", csrow);
55 	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56 	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
57 	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58 	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
59 	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
60 	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
61 	debugf4("\tcsrow->channels = %p\n", csrow->channels);
62 	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
63 }
64 
65 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
66 {
67 	debugf3("\tmci = %p\n", mci);
68 	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
69 	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
70 	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
71 	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
72 	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
73 		mci->nr_csrows, mci->csrows);
74 	debugf3("\tdev = %p\n", mci->dev);
75 	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
76 	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
77 }
78 
79 /*
80  * keep those in sync with the enum mem_type
81  */
82 const char *edac_mem_types[] = {
83 	"Empty csrow",
84 	"Reserved csrow type",
85 	"Unknown csrow type",
86 	"Fast page mode RAM",
87 	"Extended data out RAM",
88 	"Burst Extended data out RAM",
89 	"Single data rate SDRAM",
90 	"Registered single data rate SDRAM",
91 	"Double data rate SDRAM",
92 	"Registered Double data rate SDRAM",
93 	"Rambus DRAM",
94 	"Unbuffered DDR2 RAM",
95 	"Fully buffered DDR2",
96 	"Registered DDR2 RAM",
97 	"Rambus XDR",
98 	"Unbuffered DDR3 RAM",
99 	"Registered DDR3 RAM",
100 };
101 EXPORT_SYMBOL_GPL(edac_mem_types);
102 
103 #endif				/* CONFIG_EDAC_DEBUG */
104 
105 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
106  * Adjust 'ptr' so that its alignment is at least as stringent as what the
107  * compiler would provide for X and return the aligned result.
108  *
109  * If 'size' is a constant, the compiler will optimize this whole function
110  * down to either a no-op or the addition of a constant to the value of 'ptr'.
111  */
112 void *edac_align_ptr(void *ptr, unsigned size)
113 {
114 	unsigned align, r;
115 
116 	/* Here we assume that the alignment of a "long long" is the most
117 	 * stringent alignment that the compiler will ever provide by default.
118 	 * As far as I know, this is a reasonable assumption.
119 	 */
120 	if (size > sizeof(long))
121 		align = sizeof(long long);
122 	else if (size > sizeof(int))
123 		align = sizeof(long);
124 	else if (size > sizeof(short))
125 		align = sizeof(int);
126 	else if (size > sizeof(char))
127 		align = sizeof(short);
128 	else
129 		return (char *)ptr;
130 
131 	r = size % align;
132 
133 	if (r == 0)
134 		return (char *)ptr;
135 
136 	return (void *)(((unsigned long)ptr) + align - r);
137 }
138 
139 /**
140  * edac_mc_alloc: Allocate a struct mem_ctl_info structure
141  * @size_pvt:	size of private storage needed
142  * @nr_csrows:	Number of CWROWS needed for this MC
143  * @nr_chans:	Number of channels for the MC
144  *
145  * Everything is kmalloc'ed as one big chunk - more efficient.
146  * Only can be used if all structures have the same lifetime - otherwise
147  * you have to allocate and initialize your own structures.
148  *
149  * Use edac_mc_free() to free mc structures allocated by this function.
150  *
151  * Returns:
152  *	NULL allocation failed
153  *	struct mem_ctl_info pointer
154  */
155 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
156 				unsigned nr_chans, int edac_index)
157 {
158 	struct mem_ctl_info *mci;
159 	struct csrow_info *csi, *csrow;
160 	struct channel_info *chi, *chp, *chan;
161 	void *pvt;
162 	unsigned size;
163 	int row, chn;
164 	int err;
165 
166 	/* Figure out the offsets of the various items from the start of an mc
167 	 * structure.  We want the alignment of each item to be at least as
168 	 * stringent as what the compiler would provide if we could simply
169 	 * hardcode everything into a single struct.
170 	 */
171 	mci = (struct mem_ctl_info *)0;
172 	csi = edac_align_ptr(&mci[1], sizeof(*csi));
173 	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
174 	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
175 	size = ((unsigned long)pvt) + sz_pvt;
176 
177 	mci = kzalloc(size, GFP_KERNEL);
178 	if (mci == NULL)
179 		return NULL;
180 
181 	/* Adjust pointers so they point within the memory we just allocated
182 	 * rather than an imaginary chunk of memory located at address 0.
183 	 */
184 	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
185 	chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
186 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
187 
188 	/* setup index and various internal pointers */
189 	mci->mc_idx = edac_index;
190 	mci->csrows = csi;
191 	mci->pvt_info = pvt;
192 	mci->nr_csrows = nr_csrows;
193 
194 	for (row = 0; row < nr_csrows; row++) {
195 		csrow = &csi[row];
196 		csrow->csrow_idx = row;
197 		csrow->mci = mci;
198 		csrow->nr_channels = nr_chans;
199 		chp = &chi[row * nr_chans];
200 		csrow->channels = chp;
201 
202 		for (chn = 0; chn < nr_chans; chn++) {
203 			chan = &chp[chn];
204 			chan->chan_idx = chn;
205 			chan->csrow = csrow;
206 		}
207 	}
208 
209 	mci->op_state = OP_ALLOC;
210 
211 	/*
212 	 * Initialize the 'root' kobj for the edac_mc controller
213 	 */
214 	err = edac_mc_register_sysfs_main_kobj(mci);
215 	if (err) {
216 		kfree(mci);
217 		return NULL;
218 	}
219 
220 	/* at this point, the root kobj is valid, and in order to
221 	 * 'free' the object, then the function:
222 	 *      edac_mc_unregister_sysfs_main_kobj() must be called
223 	 * which will perform kobj unregistration and the actual free
224 	 * will occur during the kobject callback operation
225 	 */
226 	return mci;
227 }
228 EXPORT_SYMBOL_GPL(edac_mc_alloc);
229 
230 /**
231  * edac_mc_free
232  *	'Free' a previously allocated 'mci' structure
233  * @mci: pointer to a struct mem_ctl_info structure
234  */
235 void edac_mc_free(struct mem_ctl_info *mci)
236 {
237 	edac_mc_unregister_sysfs_main_kobj(mci);
238 }
239 EXPORT_SYMBOL_GPL(edac_mc_free);
240 
241 
242 /*
243  * find_mci_by_dev
244  *
245  *	scan list of controllers looking for the one that manages
246  *	the 'dev' device
247  */
248 static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
249 {
250 	struct mem_ctl_info *mci;
251 	struct list_head *item;
252 
253 	debugf3("%s()\n", __func__);
254 
255 	list_for_each(item, &mc_devices) {
256 		mci = list_entry(item, struct mem_ctl_info, link);
257 
258 		if (mci->dev == dev)
259 			return mci;
260 	}
261 
262 	return NULL;
263 }
264 
265 /*
266  * handler for EDAC to check if NMI type handler has asserted interrupt
267  */
268 static int edac_mc_assert_error_check_and_clear(void)
269 {
270 	int old_state;
271 
272 	if (edac_op_state == EDAC_OPSTATE_POLL)
273 		return 1;
274 
275 	old_state = edac_err_assert;
276 	edac_err_assert = 0;
277 
278 	return old_state;
279 }
280 
281 /*
282  * edac_mc_workq_function
283  *	performs the operation scheduled by a workq request
284  */
285 static void edac_mc_workq_function(struct work_struct *work_req)
286 {
287 	struct delayed_work *d_work = to_delayed_work(work_req);
288 	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
289 
290 	mutex_lock(&mem_ctls_mutex);
291 
292 	/* if this control struct has movd to offline state, we are done */
293 	if (mci->op_state == OP_OFFLINE) {
294 		mutex_unlock(&mem_ctls_mutex);
295 		return;
296 	}
297 
298 	/* Only poll controllers that are running polled and have a check */
299 	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
300 		mci->edac_check(mci);
301 
302 	mutex_unlock(&mem_ctls_mutex);
303 
304 	/* Reschedule */
305 	queue_delayed_work(edac_workqueue, &mci->work,
306 			msecs_to_jiffies(edac_mc_get_poll_msec()));
307 }
308 
309 /*
310  * edac_mc_workq_setup
311  *	initialize a workq item for this mci
312  *	passing in the new delay period in msec
313  *
314  *	locking model:
315  *
316  *		called with the mem_ctls_mutex held
317  */
318 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
319 {
320 	debugf0("%s()\n", __func__);
321 
322 	/* if this instance is not in the POLL state, then simply return */
323 	if (mci->op_state != OP_RUNNING_POLL)
324 		return;
325 
326 	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
327 	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
328 }
329 
330 /*
331  * edac_mc_workq_teardown
332  *	stop the workq processing on this mci
333  *
334  *	locking model:
335  *
336  *		called WITHOUT lock held
337  */
338 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
339 {
340 	int status;
341 
342 	status = cancel_delayed_work(&mci->work);
343 	if (status == 0) {
344 		debugf0("%s() not canceled, flush the queue\n",
345 			__func__);
346 
347 		/* workq instance might be running, wait for it */
348 		flush_workqueue(edac_workqueue);
349 	}
350 }
351 
352 /*
353  * edac_mc_reset_delay_period(unsigned long value)
354  *
355  *	user space has updated our poll period value, need to
356  *	reset our workq delays
357  */
358 void edac_mc_reset_delay_period(int value)
359 {
360 	struct mem_ctl_info *mci;
361 	struct list_head *item;
362 
363 	mutex_lock(&mem_ctls_mutex);
364 
365 	/* scan the list and turn off all workq timers, doing so under lock
366 	 */
367 	list_for_each(item, &mc_devices) {
368 		mci = list_entry(item, struct mem_ctl_info, link);
369 
370 		if (mci->op_state == OP_RUNNING_POLL)
371 			cancel_delayed_work(&mci->work);
372 	}
373 
374 	mutex_unlock(&mem_ctls_mutex);
375 
376 
377 	/* re-walk the list, and reset the poll delay */
378 	mutex_lock(&mem_ctls_mutex);
379 
380 	list_for_each(item, &mc_devices) {
381 		mci = list_entry(item, struct mem_ctl_info, link);
382 
383 		edac_mc_workq_setup(mci, (unsigned long) value);
384 	}
385 
386 	mutex_unlock(&mem_ctls_mutex);
387 }
388 
389 
390 
391 /* Return 0 on success, 1 on failure.
392  * Before calling this function, caller must
393  * assign a unique value to mci->mc_idx.
394  *
395  *	locking model:
396  *
397  *		called with the mem_ctls_mutex lock held
398  */
399 static int add_mc_to_global_list(struct mem_ctl_info *mci)
400 {
401 	struct list_head *item, *insert_before;
402 	struct mem_ctl_info *p;
403 
404 	insert_before = &mc_devices;
405 
406 	p = find_mci_by_dev(mci->dev);
407 	if (unlikely(p != NULL))
408 		goto fail0;
409 
410 	list_for_each(item, &mc_devices) {
411 		p = list_entry(item, struct mem_ctl_info, link);
412 
413 		if (p->mc_idx >= mci->mc_idx) {
414 			if (unlikely(p->mc_idx == mci->mc_idx))
415 				goto fail1;
416 
417 			insert_before = item;
418 			break;
419 		}
420 	}
421 
422 	list_add_tail_rcu(&mci->link, insert_before);
423 	atomic_inc(&edac_handlers);
424 	return 0;
425 
426 fail0:
427 	edac_printk(KERN_WARNING, EDAC_MC,
428 		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
429 		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
430 	return 1;
431 
432 fail1:
433 	edac_printk(KERN_WARNING, EDAC_MC,
434 		"bug in low-level driver: attempt to assign\n"
435 		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
436 	return 1;
437 }
438 
439 static void complete_mc_list_del(struct rcu_head *head)
440 {
441 	struct mem_ctl_info *mci;
442 
443 	mci = container_of(head, struct mem_ctl_info, rcu);
444 	INIT_LIST_HEAD(&mci->link);
445 }
446 
447 static void del_mc_from_global_list(struct mem_ctl_info *mci)
448 {
449 	atomic_dec(&edac_handlers);
450 	list_del_rcu(&mci->link);
451 	call_rcu(&mci->rcu, complete_mc_list_del);
452 	rcu_barrier();
453 }
454 
455 /**
456  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
457  *
458  * If found, return a pointer to the structure.
459  * Else return NULL.
460  *
461  * Caller must hold mem_ctls_mutex.
462  */
463 struct mem_ctl_info *edac_mc_find(int idx)
464 {
465 	struct list_head *item;
466 	struct mem_ctl_info *mci;
467 
468 	list_for_each(item, &mc_devices) {
469 		mci = list_entry(item, struct mem_ctl_info, link);
470 
471 		if (mci->mc_idx >= idx) {
472 			if (mci->mc_idx == idx)
473 				return mci;
474 
475 			break;
476 		}
477 	}
478 
479 	return NULL;
480 }
481 EXPORT_SYMBOL(edac_mc_find);
482 
483 /**
484  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
485  *                 create sysfs entries associated with mci structure
486  * @mci: pointer to the mci structure to be added to the list
487  * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
488  *
489  * Return:
490  *	0	Success
491  *	!0	Failure
492  */
493 
494 /* FIXME - should a warning be printed if no error detection? correction? */
495 int edac_mc_add_mc(struct mem_ctl_info *mci)
496 {
497 	debugf0("%s()\n", __func__);
498 
499 #ifdef CONFIG_EDAC_DEBUG
500 	if (edac_debug_level >= 3)
501 		edac_mc_dump_mci(mci);
502 
503 	if (edac_debug_level >= 4) {
504 		int i;
505 
506 		for (i = 0; i < mci->nr_csrows; i++) {
507 			int j;
508 
509 			edac_mc_dump_csrow(&mci->csrows[i]);
510 			for (j = 0; j < mci->csrows[i].nr_channels; j++)
511 				edac_mc_dump_channel(&mci->csrows[i].
512 						channels[j]);
513 		}
514 	}
515 #endif
516 	mutex_lock(&mem_ctls_mutex);
517 
518 	if (add_mc_to_global_list(mci))
519 		goto fail0;
520 
521 	/* set load time so that error rate can be tracked */
522 	mci->start_time = jiffies;
523 
524 	if (edac_create_sysfs_mci_device(mci)) {
525 		edac_mc_printk(mci, KERN_WARNING,
526 			"failed to create sysfs device\n");
527 		goto fail1;
528 	}
529 
530 	/* If there IS a check routine, then we are running POLLED */
531 	if (mci->edac_check != NULL) {
532 		/* This instance is NOW RUNNING */
533 		mci->op_state = OP_RUNNING_POLL;
534 
535 		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
536 	} else {
537 		mci->op_state = OP_RUNNING_INTERRUPT;
538 	}
539 
540 	/* Report action taken */
541 	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
542 		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
543 
544 	mutex_unlock(&mem_ctls_mutex);
545 	return 0;
546 
547 fail1:
548 	del_mc_from_global_list(mci);
549 
550 fail0:
551 	mutex_unlock(&mem_ctls_mutex);
552 	return 1;
553 }
554 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
555 
556 /**
557  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
558  *                 remove mci structure from global list
559  * @pdev: Pointer to 'struct device' representing mci structure to remove.
560  *
561  * Return pointer to removed mci structure, or NULL if device not found.
562  */
563 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
564 {
565 	struct mem_ctl_info *mci;
566 
567 	debugf0("%s()\n", __func__);
568 
569 	mutex_lock(&mem_ctls_mutex);
570 
571 	/* find the requested mci struct in the global list */
572 	mci = find_mci_by_dev(dev);
573 	if (mci == NULL) {
574 		mutex_unlock(&mem_ctls_mutex);
575 		return NULL;
576 	}
577 
578 	/* marking MCI offline */
579 	mci->op_state = OP_OFFLINE;
580 
581 	del_mc_from_global_list(mci);
582 	mutex_unlock(&mem_ctls_mutex);
583 
584 	/* flush workq processes and remove sysfs */
585 	edac_mc_workq_teardown(mci);
586 	edac_remove_sysfs_mci_device(mci);
587 
588 	edac_printk(KERN_INFO, EDAC_MC,
589 		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
590 		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
591 
592 	return mci;
593 }
594 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
595 
596 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
597 				u32 size)
598 {
599 	struct page *pg;
600 	void *virt_addr;
601 	unsigned long flags = 0;
602 
603 	debugf3("%s()\n", __func__);
604 
605 	/* ECC error page was not in our memory. Ignore it. */
606 	if (!pfn_valid(page))
607 		return;
608 
609 	/* Find the actual page structure then map it and fix */
610 	pg = pfn_to_page(page);
611 
612 	if (PageHighMem(pg))
613 		local_irq_save(flags);
614 
615 	virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
616 
617 	/* Perform architecture specific atomic scrub operation */
618 	atomic_scrub(virt_addr + offset, size);
619 
620 	/* Unmap and complete */
621 	kunmap_atomic(virt_addr, KM_BOUNCE_READ);
622 
623 	if (PageHighMem(pg))
624 		local_irq_restore(flags);
625 }
626 
627 /* FIXME - should return -1 */
628 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
629 {
630 	struct csrow_info *csrows = mci->csrows;
631 	int row, i;
632 
633 	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
634 	row = -1;
635 
636 	for (i = 0; i < mci->nr_csrows; i++) {
637 		struct csrow_info *csrow = &csrows[i];
638 
639 		if (csrow->nr_pages == 0)
640 			continue;
641 
642 		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
643 			"mask(0x%lx)\n", mci->mc_idx, __func__,
644 			csrow->first_page, page, csrow->last_page,
645 			csrow->page_mask);
646 
647 		if ((page >= csrow->first_page) &&
648 		    (page <= csrow->last_page) &&
649 		    ((page & csrow->page_mask) ==
650 		     (csrow->first_page & csrow->page_mask))) {
651 			row = i;
652 			break;
653 		}
654 	}
655 
656 	if (row == -1)
657 		edac_mc_printk(mci, KERN_ERR,
658 			"could not look up page error address %lx\n",
659 			(unsigned long)page);
660 
661 	return row;
662 }
663 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
664 
665 /* FIXME - setable log (warning/emerg) levels */
666 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
667 void edac_mc_handle_ce(struct mem_ctl_info *mci,
668 		unsigned long page_frame_number,
669 		unsigned long offset_in_page, unsigned long syndrome,
670 		int row, int channel, const char *msg)
671 {
672 	unsigned long remapped_page;
673 
674 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
675 
676 	/* FIXME - maybe make panic on INTERNAL ERROR an option */
677 	if (row >= mci->nr_csrows || row < 0) {
678 		/* something is wrong */
679 		edac_mc_printk(mci, KERN_ERR,
680 			"INTERNAL ERROR: row out of range "
681 			"(%d >= %d)\n", row, mci->nr_csrows);
682 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
683 		return;
684 	}
685 
686 	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
687 		/* something is wrong */
688 		edac_mc_printk(mci, KERN_ERR,
689 			"INTERNAL ERROR: channel out of range "
690 			"(%d >= %d)\n", channel,
691 			mci->csrows[row].nr_channels);
692 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
693 		return;
694 	}
695 
696 	if (edac_mc_get_log_ce())
697 		/* FIXME - put in DIMM location */
698 		edac_mc_printk(mci, KERN_WARNING,
699 			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
700 			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
701 			page_frame_number, offset_in_page,
702 			mci->csrows[row].grain, syndrome, row, channel,
703 			mci->csrows[row].channels[channel].label, msg);
704 
705 	mci->ce_count++;
706 	mci->csrows[row].ce_count++;
707 	mci->csrows[row].channels[channel].ce_count++;
708 
709 	if (mci->scrub_mode & SCRUB_SW_SRC) {
710 		/*
711 		 * Some MC's can remap memory so that it is still available
712 		 * at a different address when PCI devices map into memory.
713 		 * MC's that can't do this lose the memory where PCI devices
714 		 * are mapped.  This mapping is MC dependant and so we call
715 		 * back into the MC driver for it to map the MC page to
716 		 * a physical (CPU) page which can then be mapped to a virtual
717 		 * page - which can then be scrubbed.
718 		 */
719 		remapped_page = mci->ctl_page_to_phys ?
720 			mci->ctl_page_to_phys(mci, page_frame_number) :
721 			page_frame_number;
722 
723 		edac_mc_scrub_block(remapped_page, offset_in_page,
724 				mci->csrows[row].grain);
725 	}
726 }
727 EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
728 
729 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
730 {
731 	if (edac_mc_get_log_ce())
732 		edac_mc_printk(mci, KERN_WARNING,
733 			"CE - no information available: %s\n", msg);
734 
735 	mci->ce_noinfo_count++;
736 	mci->ce_count++;
737 }
738 EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
739 
740 void edac_mc_handle_ue(struct mem_ctl_info *mci,
741 		unsigned long page_frame_number,
742 		unsigned long offset_in_page, int row, const char *msg)
743 {
744 	int len = EDAC_MC_LABEL_LEN * 4;
745 	char labels[len + 1];
746 	char *pos = labels;
747 	int chan;
748 	int chars;
749 
750 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
751 
752 	/* FIXME - maybe make panic on INTERNAL ERROR an option */
753 	if (row >= mci->nr_csrows || row < 0) {
754 		/* something is wrong */
755 		edac_mc_printk(mci, KERN_ERR,
756 			"INTERNAL ERROR: row out of range "
757 			"(%d >= %d)\n", row, mci->nr_csrows);
758 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
759 		return;
760 	}
761 
762 	chars = snprintf(pos, len + 1, "%s",
763 			 mci->csrows[row].channels[0].label);
764 	len -= chars;
765 	pos += chars;
766 
767 	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
768 		chan++) {
769 		chars = snprintf(pos, len + 1, ":%s",
770 				 mci->csrows[row].channels[chan].label);
771 		len -= chars;
772 		pos += chars;
773 	}
774 
775 	if (edac_mc_get_log_ue())
776 		edac_mc_printk(mci, KERN_EMERG,
777 			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
778 			"labels \"%s\": %s\n", page_frame_number,
779 			offset_in_page, mci->csrows[row].grain, row,
780 			labels, msg);
781 
782 	if (edac_mc_get_panic_on_ue())
783 		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
784 			"row %d, labels \"%s\": %s\n", mci->mc_idx,
785 			page_frame_number, offset_in_page,
786 			mci->csrows[row].grain, row, labels, msg);
787 
788 	mci->ue_count++;
789 	mci->csrows[row].ue_count++;
790 }
791 EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
792 
793 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
794 {
795 	if (edac_mc_get_panic_on_ue())
796 		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
797 
798 	if (edac_mc_get_log_ue())
799 		edac_mc_printk(mci, KERN_WARNING,
800 			"UE - no information available: %s\n", msg);
801 	mci->ue_noinfo_count++;
802 	mci->ue_count++;
803 }
804 EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
805 
806 /*************************************************************
807  * On Fully Buffered DIMM modules, this help function is
808  * called to process UE events
809  */
810 void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
811 			unsigned int csrow,
812 			unsigned int channela,
813 			unsigned int channelb, char *msg)
814 {
815 	int len = EDAC_MC_LABEL_LEN * 4;
816 	char labels[len + 1];
817 	char *pos = labels;
818 	int chars;
819 
820 	if (csrow >= mci->nr_csrows) {
821 		/* something is wrong */
822 		edac_mc_printk(mci, KERN_ERR,
823 			"INTERNAL ERROR: row out of range (%d >= %d)\n",
824 			csrow, mci->nr_csrows);
825 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
826 		return;
827 	}
828 
829 	if (channela >= mci->csrows[csrow].nr_channels) {
830 		/* something is wrong */
831 		edac_mc_printk(mci, KERN_ERR,
832 			"INTERNAL ERROR: channel-a out of range "
833 			"(%d >= %d)\n",
834 			channela, mci->csrows[csrow].nr_channels);
835 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
836 		return;
837 	}
838 
839 	if (channelb >= mci->csrows[csrow].nr_channels) {
840 		/* something is wrong */
841 		edac_mc_printk(mci, KERN_ERR,
842 			"INTERNAL ERROR: channel-b out of range "
843 			"(%d >= %d)\n",
844 			channelb, mci->csrows[csrow].nr_channels);
845 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
846 		return;
847 	}
848 
849 	mci->ue_count++;
850 	mci->csrows[csrow].ue_count++;
851 
852 	/* Generate the DIMM labels from the specified channels */
853 	chars = snprintf(pos, len + 1, "%s",
854 			 mci->csrows[csrow].channels[channela].label);
855 	len -= chars;
856 	pos += chars;
857 	chars = snprintf(pos, len + 1, "-%s",
858 			 mci->csrows[csrow].channels[channelb].label);
859 
860 	if (edac_mc_get_log_ue())
861 		edac_mc_printk(mci, KERN_EMERG,
862 			"UE row %d, channel-a= %d channel-b= %d "
863 			"labels \"%s\": %s\n", csrow, channela, channelb,
864 			labels, msg);
865 
866 	if (edac_mc_get_panic_on_ue())
867 		panic("UE row %d, channel-a= %d channel-b= %d "
868 			"labels \"%s\": %s\n", csrow, channela,
869 			channelb, labels, msg);
870 }
871 EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
872 
873 /*************************************************************
874  * On Fully Buffered DIMM modules, this help function is
875  * called to process CE events
876  */
877 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
878 			unsigned int csrow, unsigned int channel, char *msg)
879 {
880 
881 	/* Ensure boundary values */
882 	if (csrow >= mci->nr_csrows) {
883 		/* something is wrong */
884 		edac_mc_printk(mci, KERN_ERR,
885 			"INTERNAL ERROR: row out of range (%d >= %d)\n",
886 			csrow, mci->nr_csrows);
887 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
888 		return;
889 	}
890 	if (channel >= mci->csrows[csrow].nr_channels) {
891 		/* something is wrong */
892 		edac_mc_printk(mci, KERN_ERR,
893 			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
894 			channel, mci->csrows[csrow].nr_channels);
895 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
896 		return;
897 	}
898 
899 	if (edac_mc_get_log_ce())
900 		/* FIXME - put in DIMM location */
901 		edac_mc_printk(mci, KERN_WARNING,
902 			"CE row %d, channel %d, label \"%s\": %s\n",
903 			csrow, channel,
904 			mci->csrows[csrow].channels[channel].label, msg);
905 
906 	mci->ce_count++;
907 	mci->csrows[csrow].ce_count++;
908 	mci->csrows[csrow].channels[channel].ce_count++;
909 }
910 EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
911