xref: /openbmc/linux/drivers/edac/edac_mc.c (revision a89aa749ece9c6fee7932163472d2ee0efd6ddd3)
1 /*
2  * edac_mc kernel module
3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
9  *	http://www.anime.net/~goemon/linux-ecc/
10  *
11  * Modified by Dave Peterson and Doug Thompson
12  *
13  */
14 
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/ctype.h>
29 #include <linux/edac.h>
30 #include <linux/bitops.h>
31 #include <linux/uaccess.h>
32 #include <asm/page.h>
33 #include "edac_mc.h"
34 #include "edac_module.h"
35 #include <ras/ras_event.h>
36 
37 #ifdef CONFIG_EDAC_ATOMIC_SCRUB
38 #include <asm/edac.h>
39 #else
40 #define edac_atomic_scrub(va, size) do { } while (0)
41 #endif
42 
43 int edac_op_state = EDAC_OPSTATE_INVAL;
44 EXPORT_SYMBOL_GPL(edac_op_state);
45 
46 static int edac_report = EDAC_REPORTING_ENABLED;
47 
48 /* lock to memory controller's control array */
49 static DEFINE_MUTEX(mem_ctls_mutex);
50 static LIST_HEAD(mc_devices);
51 
52 /*
53  * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
54  *	apei/ghes and i7core_edac to be used at the same time.
55  */
56 static const char *edac_mc_owner;
57 
58 static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e)
59 {
60 	return container_of(e, struct mem_ctl_info, error_desc);
61 }
62 
63 int edac_get_report_status(void)
64 {
65 	return edac_report;
66 }
67 EXPORT_SYMBOL_GPL(edac_get_report_status);
68 
69 void edac_set_report_status(int new)
70 {
71 	if (new == EDAC_REPORTING_ENABLED ||
72 	    new == EDAC_REPORTING_DISABLED ||
73 	    new == EDAC_REPORTING_FORCE)
74 		edac_report = new;
75 }
76 EXPORT_SYMBOL_GPL(edac_set_report_status);
77 
78 static int edac_report_set(const char *str, const struct kernel_param *kp)
79 {
80 	if (!str)
81 		return -EINVAL;
82 
83 	if (!strncmp(str, "on", 2))
84 		edac_report = EDAC_REPORTING_ENABLED;
85 	else if (!strncmp(str, "off", 3))
86 		edac_report = EDAC_REPORTING_DISABLED;
87 	else if (!strncmp(str, "force", 5))
88 		edac_report = EDAC_REPORTING_FORCE;
89 
90 	return 0;
91 }
92 
93 static int edac_report_get(char *buffer, const struct kernel_param *kp)
94 {
95 	int ret = 0;
96 
97 	switch (edac_report) {
98 	case EDAC_REPORTING_ENABLED:
99 		ret = sprintf(buffer, "on");
100 		break;
101 	case EDAC_REPORTING_DISABLED:
102 		ret = sprintf(buffer, "off");
103 		break;
104 	case EDAC_REPORTING_FORCE:
105 		ret = sprintf(buffer, "force");
106 		break;
107 	default:
108 		ret = -EINVAL;
109 		break;
110 	}
111 
112 	return ret;
113 }
114 
115 static const struct kernel_param_ops edac_report_ops = {
116 	.set = edac_report_set,
117 	.get = edac_report_get,
118 };
119 
120 module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644);
121 
122 unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf,
123 				     unsigned int len)
124 {
125 	struct mem_ctl_info *mci = dimm->mci;
126 	int i, n, count = 0;
127 	char *p = buf;
128 
129 	for (i = 0; i < mci->n_layers; i++) {
130 		n = snprintf(p, len, "%s %d ",
131 			      edac_layer_name[mci->layers[i].type],
132 			      dimm->location[i]);
133 		p += n;
134 		len -= n;
135 		count += n;
136 		if (!len)
137 			break;
138 	}
139 
140 	return count;
141 }
142 
143 #ifdef CONFIG_EDAC_DEBUG
144 
145 static void edac_mc_dump_channel(struct rank_info *chan)
146 {
147 	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
148 	edac_dbg(4, "    channel = %p\n", chan);
149 	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
150 	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
151 }
152 
153 static void edac_mc_dump_dimm(struct dimm_info *dimm)
154 {
155 	char location[80];
156 
157 	if (!dimm->nr_pages)
158 		return;
159 
160 	edac_dimm_info_location(dimm, location, sizeof(location));
161 
162 	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
163 		 dimm->mci->csbased ? "rank" : "dimm",
164 		 dimm->idx, location, dimm->csrow, dimm->cschannel);
165 	edac_dbg(4, "  dimm = %p\n", dimm);
166 	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
167 	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
168 	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
169 	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
170 }
171 
172 static void edac_mc_dump_csrow(struct csrow_info *csrow)
173 {
174 	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
175 	edac_dbg(4, "  csrow = %p\n", csrow);
176 	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
177 	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
178 	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
179 	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
180 	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
181 	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
182 }
183 
184 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
185 {
186 	edac_dbg(3, "\tmci = %p\n", mci);
187 	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
188 	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
189 	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
190 	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
191 	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
192 		 mci->nr_csrows, mci->csrows);
193 	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
194 		 mci->tot_dimms, mci->dimms);
195 	edac_dbg(3, "\tdev = %p\n", mci->pdev);
196 	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
197 		 mci->mod_name, mci->ctl_name);
198 	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
199 }
200 
201 #endif				/* CONFIG_EDAC_DEBUG */
202 
203 const char * const edac_mem_types[] = {
204 	[MEM_EMPTY]	= "Empty",
205 	[MEM_RESERVED]	= "Reserved",
206 	[MEM_UNKNOWN]	= "Unknown",
207 	[MEM_FPM]	= "FPM",
208 	[MEM_EDO]	= "EDO",
209 	[MEM_BEDO]	= "BEDO",
210 	[MEM_SDR]	= "Unbuffered-SDR",
211 	[MEM_RDR]	= "Registered-SDR",
212 	[MEM_DDR]	= "Unbuffered-DDR",
213 	[MEM_RDDR]	= "Registered-DDR",
214 	[MEM_RMBS]	= "RMBS",
215 	[MEM_DDR2]	= "Unbuffered-DDR2",
216 	[MEM_FB_DDR2]	= "FullyBuffered-DDR2",
217 	[MEM_RDDR2]	= "Registered-DDR2",
218 	[MEM_XDR]	= "XDR",
219 	[MEM_DDR3]	= "Unbuffered-DDR3",
220 	[MEM_RDDR3]	= "Registered-DDR3",
221 	[MEM_LRDDR3]	= "Load-Reduced-DDR3-RAM",
222 	[MEM_DDR4]	= "Unbuffered-DDR4",
223 	[MEM_RDDR4]	= "Registered-DDR4",
224 	[MEM_LRDDR4]	= "Load-Reduced-DDR4-RAM",
225 	[MEM_NVDIMM]	= "Non-volatile-RAM",
226 };
227 EXPORT_SYMBOL_GPL(edac_mem_types);
228 
229 /**
230  * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
231  * @p:		pointer to a pointer with the memory offset to be used. At
232  *		return, this will be incremented to point to the next offset
233  * @size:	Size of the data structure to be reserved
234  * @n_elems:	Number of elements that should be reserved
235  *
236  * If 'size' is a constant, the compiler will optimize this whole function
237  * down to either a no-op or the addition of a constant to the value of '*p'.
238  *
239  * The 'p' pointer is absolutely needed to keep the proper advancing
240  * further in memory to the proper offsets when allocating the struct along
241  * with its embedded structs, as edac_device_alloc_ctl_info() does it
242  * above, for example.
243  *
244  * At return, the pointer 'p' will be incremented to be used on a next call
245  * to this function.
246  */
247 void *edac_align_ptr(void **p, unsigned int size, int n_elems)
248 {
249 	unsigned int align, r;
250 	void *ptr = *p;
251 
252 	*p += size * n_elems;
253 
254 	/*
255 	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
256 	 * 'size'.  Adjust 'p' so that its alignment is at least as
257 	 * stringent as what the compiler would provide for X and return
258 	 * the aligned result.
259 	 * Here we assume that the alignment of a "long long" is the most
260 	 * stringent alignment that the compiler will ever provide by default.
261 	 * As far as I know, this is a reasonable assumption.
262 	 */
263 	if (size > sizeof(long))
264 		align = sizeof(long long);
265 	else if (size > sizeof(int))
266 		align = sizeof(long);
267 	else if (size > sizeof(short))
268 		align = sizeof(int);
269 	else if (size > sizeof(char))
270 		align = sizeof(short);
271 	else
272 		return (char *)ptr;
273 
274 	r = (unsigned long)p % align;
275 
276 	if (r == 0)
277 		return (char *)ptr;
278 
279 	*p += align - r;
280 
281 	return (void *)(((unsigned long)ptr) + align - r);
282 }
283 
284 static void _edac_mc_free(struct mem_ctl_info *mci)
285 {
286 	put_device(&mci->dev);
287 }
288 
289 static void mci_release(struct device *dev)
290 {
291 	struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
292 	struct csrow_info *csr;
293 	int i, chn, row;
294 
295 	if (mci->dimms) {
296 		for (i = 0; i < mci->tot_dimms; i++)
297 			kfree(mci->dimms[i]);
298 		kfree(mci->dimms);
299 	}
300 
301 	if (mci->csrows) {
302 		for (row = 0; row < mci->nr_csrows; row++) {
303 			csr = mci->csrows[row];
304 			if (!csr)
305 				continue;
306 
307 			if (csr->channels) {
308 				for (chn = 0; chn < mci->num_cschannel; chn++)
309 					kfree(csr->channels[chn]);
310 				kfree(csr->channels);
311 			}
312 			kfree(csr);
313 		}
314 		kfree(mci->csrows);
315 	}
316 	kfree(mci);
317 }
318 
319 static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
320 {
321 	unsigned int tot_channels = mci->num_cschannel;
322 	unsigned int tot_csrows = mci->nr_csrows;
323 	unsigned int row, chn;
324 
325 	/*
326 	 * Alocate and fill the csrow/channels structs
327 	 */
328 	mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
329 	if (!mci->csrows)
330 		return -ENOMEM;
331 
332 	for (row = 0; row < tot_csrows; row++) {
333 		struct csrow_info *csr;
334 
335 		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
336 		if (!csr)
337 			return -ENOMEM;
338 
339 		mci->csrows[row] = csr;
340 		csr->csrow_idx = row;
341 		csr->mci = mci;
342 		csr->nr_channels = tot_channels;
343 		csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
344 					GFP_KERNEL);
345 		if (!csr->channels)
346 			return -ENOMEM;
347 
348 		for (chn = 0; chn < tot_channels; chn++) {
349 			struct rank_info *chan;
350 
351 			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
352 			if (!chan)
353 				return -ENOMEM;
354 
355 			csr->channels[chn] = chan;
356 			chan->chan_idx = chn;
357 			chan->csrow = csr;
358 		}
359 	}
360 
361 	return 0;
362 }
363 
364 static int edac_mc_alloc_dimms(struct mem_ctl_info *mci)
365 {
366 	unsigned int pos[EDAC_MAX_LAYERS];
367 	unsigned int row, chn, idx;
368 	int layer;
369 	void *p;
370 
371 	/*
372 	 * Allocate and fill the dimm structs
373 	 */
374 	mci->dimms  = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
375 	if (!mci->dimms)
376 		return -ENOMEM;
377 
378 	memset(&pos, 0, sizeof(pos));
379 	row = 0;
380 	chn = 0;
381 	for (idx = 0; idx < mci->tot_dimms; idx++) {
382 		struct dimm_info *dimm;
383 		struct rank_info *chan;
384 		int n, len;
385 
386 		chan = mci->csrows[row]->channels[chn];
387 
388 		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
389 		if (!dimm)
390 			return -ENOMEM;
391 		mci->dimms[idx] = dimm;
392 		dimm->mci = mci;
393 		dimm->idx = idx;
394 
395 		/*
396 		 * Copy DIMM location and initialize it.
397 		 */
398 		len = sizeof(dimm->label);
399 		p = dimm->label;
400 		n = snprintf(p, len, "mc#%u", mci->mc_idx);
401 		p += n;
402 		len -= n;
403 		for (layer = 0; layer < mci->n_layers; layer++) {
404 			n = snprintf(p, len, "%s#%u",
405 				     edac_layer_name[mci->layers[layer].type],
406 				     pos[layer]);
407 			p += n;
408 			len -= n;
409 			dimm->location[layer] = pos[layer];
410 
411 			if (len <= 0)
412 				break;
413 		}
414 
415 		/* Link it to the csrows old API data */
416 		chan->dimm = dimm;
417 		dimm->csrow = row;
418 		dimm->cschannel = chn;
419 
420 		/* Increment csrow location */
421 		if (mci->layers[0].is_virt_csrow) {
422 			chn++;
423 			if (chn == mci->num_cschannel) {
424 				chn = 0;
425 				row++;
426 			}
427 		} else {
428 			row++;
429 			if (row == mci->nr_csrows) {
430 				row = 0;
431 				chn++;
432 			}
433 		}
434 
435 		/* Increment dimm location */
436 		for (layer = mci->n_layers - 1; layer >= 0; layer--) {
437 			pos[layer]++;
438 			if (pos[layer] < mci->layers[layer].size)
439 				break;
440 			pos[layer] = 0;
441 		}
442 	}
443 
444 	return 0;
445 }
446 
447 struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
448 				   unsigned int n_layers,
449 				   struct edac_mc_layer *layers,
450 				   unsigned int sz_pvt)
451 {
452 	struct mem_ctl_info *mci;
453 	struct edac_mc_layer *layer;
454 	unsigned int idx, size, tot_dimms = 1;
455 	unsigned int tot_csrows = 1, tot_channels = 1;
456 	void *pvt, *ptr = NULL;
457 	bool per_rank = false;
458 
459 	if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
460 		return NULL;
461 
462 	/*
463 	 * Calculate the total amount of dimms and csrows/cschannels while
464 	 * in the old API emulation mode
465 	 */
466 	for (idx = 0; idx < n_layers; idx++) {
467 		tot_dimms *= layers[idx].size;
468 
469 		if (layers[idx].is_virt_csrow)
470 			tot_csrows *= layers[idx].size;
471 		else
472 			tot_channels *= layers[idx].size;
473 
474 		if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT)
475 			per_rank = true;
476 	}
477 
478 	/* Figure out the offsets of the various items from the start of an mc
479 	 * structure.  We want the alignment of each item to be at least as
480 	 * stringent as what the compiler would provide if we could simply
481 	 * hardcode everything into a single struct.
482 	 */
483 	mci	= edac_align_ptr(&ptr, sizeof(*mci), 1);
484 	layer	= edac_align_ptr(&ptr, sizeof(*layer), n_layers);
485 	pvt	= edac_align_ptr(&ptr, sz_pvt, 1);
486 	size	= ((unsigned long)pvt) + sz_pvt;
487 
488 	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
489 		 size,
490 		 tot_dimms,
491 		 per_rank ? "ranks" : "dimms",
492 		 tot_csrows * tot_channels);
493 
494 	mci = kzalloc(size, GFP_KERNEL);
495 	if (mci == NULL)
496 		return NULL;
497 
498 	mci->dev.release = mci_release;
499 	device_initialize(&mci->dev);
500 
501 	/* Adjust pointers so they point within the memory we just allocated
502 	 * rather than an imaginary chunk of memory located at address 0.
503 	 */
504 	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
505 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
506 
507 	/* setup index and various internal pointers */
508 	mci->mc_idx = mc_num;
509 	mci->tot_dimms = tot_dimms;
510 	mci->pvt_info = pvt;
511 	mci->n_layers = n_layers;
512 	mci->layers = layer;
513 	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
514 	mci->nr_csrows = tot_csrows;
515 	mci->num_cschannel = tot_channels;
516 	mci->csbased = per_rank;
517 
518 	if (edac_mc_alloc_csrows(mci))
519 		goto error;
520 
521 	if (edac_mc_alloc_dimms(mci))
522 		goto error;
523 
524 	mci->op_state = OP_ALLOC;
525 
526 	return mci;
527 
528 error:
529 	_edac_mc_free(mci);
530 
531 	return NULL;
532 }
533 EXPORT_SYMBOL_GPL(edac_mc_alloc);
534 
535 void edac_mc_free(struct mem_ctl_info *mci)
536 {
537 	edac_dbg(1, "\n");
538 
539 	_edac_mc_free(mci);
540 }
541 EXPORT_SYMBOL_GPL(edac_mc_free);
542 
543 bool edac_has_mcs(void)
544 {
545 	bool ret;
546 
547 	mutex_lock(&mem_ctls_mutex);
548 
549 	ret = list_empty(&mc_devices);
550 
551 	mutex_unlock(&mem_ctls_mutex);
552 
553 	return !ret;
554 }
555 EXPORT_SYMBOL_GPL(edac_has_mcs);
556 
557 /* Caller must hold mem_ctls_mutex */
558 static struct mem_ctl_info *__find_mci_by_dev(struct device *dev)
559 {
560 	struct mem_ctl_info *mci;
561 	struct list_head *item;
562 
563 	edac_dbg(3, "\n");
564 
565 	list_for_each(item, &mc_devices) {
566 		mci = list_entry(item, struct mem_ctl_info, link);
567 
568 		if (mci->pdev == dev)
569 			return mci;
570 	}
571 
572 	return NULL;
573 }
574 
575 /**
576  * find_mci_by_dev
577  *
578  *	scan list of controllers looking for the one that manages
579  *	the 'dev' device
580  * @dev: pointer to a struct device related with the MCI
581  */
582 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
583 {
584 	struct mem_ctl_info *ret;
585 
586 	mutex_lock(&mem_ctls_mutex);
587 	ret = __find_mci_by_dev(dev);
588 	mutex_unlock(&mem_ctls_mutex);
589 
590 	return ret;
591 }
592 EXPORT_SYMBOL_GPL(find_mci_by_dev);
593 
594 /*
595  * edac_mc_workq_function
596  *	performs the operation scheduled by a workq request
597  */
598 static void edac_mc_workq_function(struct work_struct *work_req)
599 {
600 	struct delayed_work *d_work = to_delayed_work(work_req);
601 	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
602 
603 	mutex_lock(&mem_ctls_mutex);
604 
605 	if (mci->op_state != OP_RUNNING_POLL) {
606 		mutex_unlock(&mem_ctls_mutex);
607 		return;
608 	}
609 
610 	if (edac_op_state == EDAC_OPSTATE_POLL)
611 		mci->edac_check(mci);
612 
613 	mutex_unlock(&mem_ctls_mutex);
614 
615 	/* Queue ourselves again. */
616 	edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
617 }
618 
619 /*
620  * edac_mc_reset_delay_period(unsigned long value)
621  *
622  *	user space has updated our poll period value, need to
623  *	reset our workq delays
624  */
625 void edac_mc_reset_delay_period(unsigned long value)
626 {
627 	struct mem_ctl_info *mci;
628 	struct list_head *item;
629 
630 	mutex_lock(&mem_ctls_mutex);
631 
632 	list_for_each(item, &mc_devices) {
633 		mci = list_entry(item, struct mem_ctl_info, link);
634 
635 		if (mci->op_state == OP_RUNNING_POLL)
636 			edac_mod_work(&mci->work, value);
637 	}
638 	mutex_unlock(&mem_ctls_mutex);
639 }
640 
641 
642 
643 /* Return 0 on success, 1 on failure.
644  * Before calling this function, caller must
645  * assign a unique value to mci->mc_idx.
646  *
647  *	locking model:
648  *
649  *		called with the mem_ctls_mutex lock held
650  */
651 static int add_mc_to_global_list(struct mem_ctl_info *mci)
652 {
653 	struct list_head *item, *insert_before;
654 	struct mem_ctl_info *p;
655 
656 	insert_before = &mc_devices;
657 
658 	p = __find_mci_by_dev(mci->pdev);
659 	if (unlikely(p != NULL))
660 		goto fail0;
661 
662 	list_for_each(item, &mc_devices) {
663 		p = list_entry(item, struct mem_ctl_info, link);
664 
665 		if (p->mc_idx >= mci->mc_idx) {
666 			if (unlikely(p->mc_idx == mci->mc_idx))
667 				goto fail1;
668 
669 			insert_before = item;
670 			break;
671 		}
672 	}
673 
674 	list_add_tail_rcu(&mci->link, insert_before);
675 	return 0;
676 
677 fail0:
678 	edac_printk(KERN_WARNING, EDAC_MC,
679 		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
680 		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
681 	return 1;
682 
683 fail1:
684 	edac_printk(KERN_WARNING, EDAC_MC,
685 		"bug in low-level driver: attempt to assign\n"
686 		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
687 	return 1;
688 }
689 
690 static int del_mc_from_global_list(struct mem_ctl_info *mci)
691 {
692 	list_del_rcu(&mci->link);
693 
694 	/* these are for safe removal of devices from global list while
695 	 * NMI handlers may be traversing list
696 	 */
697 	synchronize_rcu();
698 	INIT_LIST_HEAD(&mci->link);
699 
700 	return list_empty(&mc_devices);
701 }
702 
703 struct mem_ctl_info *edac_mc_find(int idx)
704 {
705 	struct mem_ctl_info *mci;
706 	struct list_head *item;
707 
708 	mutex_lock(&mem_ctls_mutex);
709 
710 	list_for_each(item, &mc_devices) {
711 		mci = list_entry(item, struct mem_ctl_info, link);
712 		if (mci->mc_idx == idx)
713 			goto unlock;
714 	}
715 
716 	mci = NULL;
717 unlock:
718 	mutex_unlock(&mem_ctls_mutex);
719 	return mci;
720 }
721 EXPORT_SYMBOL(edac_mc_find);
722 
723 const char *edac_get_owner(void)
724 {
725 	return edac_mc_owner;
726 }
727 EXPORT_SYMBOL_GPL(edac_get_owner);
728 
729 /* FIXME - should a warning be printed if no error detection? correction? */
730 int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
731 			       const struct attribute_group **groups)
732 {
733 	int ret = -EINVAL;
734 	edac_dbg(0, "\n");
735 
736 #ifdef CONFIG_EDAC_DEBUG
737 	if (edac_debug_level >= 3)
738 		edac_mc_dump_mci(mci);
739 
740 	if (edac_debug_level >= 4) {
741 		struct dimm_info *dimm;
742 		int i;
743 
744 		for (i = 0; i < mci->nr_csrows; i++) {
745 			struct csrow_info *csrow = mci->csrows[i];
746 			u32 nr_pages = 0;
747 			int j;
748 
749 			for (j = 0; j < csrow->nr_channels; j++)
750 				nr_pages += csrow->channels[j]->dimm->nr_pages;
751 			if (!nr_pages)
752 				continue;
753 			edac_mc_dump_csrow(csrow);
754 			for (j = 0; j < csrow->nr_channels; j++)
755 				if (csrow->channels[j]->dimm->nr_pages)
756 					edac_mc_dump_channel(csrow->channels[j]);
757 		}
758 
759 		mci_for_each_dimm(mci, dimm)
760 			edac_mc_dump_dimm(dimm);
761 	}
762 #endif
763 	mutex_lock(&mem_ctls_mutex);
764 
765 	if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
766 		ret = -EPERM;
767 		goto fail0;
768 	}
769 
770 	if (add_mc_to_global_list(mci))
771 		goto fail0;
772 
773 	/* set load time so that error rate can be tracked */
774 	mci->start_time = jiffies;
775 
776 	mci->bus = edac_get_sysfs_subsys();
777 
778 	if (edac_create_sysfs_mci_device(mci, groups)) {
779 		edac_mc_printk(mci, KERN_WARNING,
780 			"failed to create sysfs device\n");
781 		goto fail1;
782 	}
783 
784 	if (mci->edac_check) {
785 		mci->op_state = OP_RUNNING_POLL;
786 
787 		INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
788 		edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
789 
790 	} else {
791 		mci->op_state = OP_RUNNING_INTERRUPT;
792 	}
793 
794 	/* Report action taken */
795 	edac_mc_printk(mci, KERN_INFO,
796 		"Giving out device to module %s controller %s: DEV %s (%s)\n",
797 		mci->mod_name, mci->ctl_name, mci->dev_name,
798 		edac_op_state_to_string(mci->op_state));
799 
800 	edac_mc_owner = mci->mod_name;
801 
802 	mutex_unlock(&mem_ctls_mutex);
803 	return 0;
804 
805 fail1:
806 	del_mc_from_global_list(mci);
807 
808 fail0:
809 	mutex_unlock(&mem_ctls_mutex);
810 	return ret;
811 }
812 EXPORT_SYMBOL_GPL(edac_mc_add_mc_with_groups);
813 
814 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
815 {
816 	struct mem_ctl_info *mci;
817 
818 	edac_dbg(0, "\n");
819 
820 	mutex_lock(&mem_ctls_mutex);
821 
822 	/* find the requested mci struct in the global list */
823 	mci = __find_mci_by_dev(dev);
824 	if (mci == NULL) {
825 		mutex_unlock(&mem_ctls_mutex);
826 		return NULL;
827 	}
828 
829 	/* mark MCI offline: */
830 	mci->op_state = OP_OFFLINE;
831 
832 	if (del_mc_from_global_list(mci))
833 		edac_mc_owner = NULL;
834 
835 	mutex_unlock(&mem_ctls_mutex);
836 
837 	if (mci->edac_check)
838 		edac_stop_work(&mci->work);
839 
840 	/* remove from sysfs */
841 	edac_remove_sysfs_mci_device(mci);
842 
843 	edac_printk(KERN_INFO, EDAC_MC,
844 		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
845 		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
846 
847 	return mci;
848 }
849 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
850 
851 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
852 				u32 size)
853 {
854 	struct page *pg;
855 	void *virt_addr;
856 	unsigned long flags = 0;
857 
858 	edac_dbg(3, "\n");
859 
860 	/* ECC error page was not in our memory. Ignore it. */
861 	if (!pfn_valid(page))
862 		return;
863 
864 	/* Find the actual page structure then map it and fix */
865 	pg = pfn_to_page(page);
866 
867 	if (PageHighMem(pg))
868 		local_irq_save(flags);
869 
870 	virt_addr = kmap_atomic(pg);
871 
872 	/* Perform architecture specific atomic scrub operation */
873 	edac_atomic_scrub(virt_addr + offset, size);
874 
875 	/* Unmap and complete */
876 	kunmap_atomic(virt_addr);
877 
878 	if (PageHighMem(pg))
879 		local_irq_restore(flags);
880 }
881 
882 /* FIXME - should return -1 */
883 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
884 {
885 	struct csrow_info **csrows = mci->csrows;
886 	int row, i, j, n;
887 
888 	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
889 	row = -1;
890 
891 	for (i = 0; i < mci->nr_csrows; i++) {
892 		struct csrow_info *csrow = csrows[i];
893 		n = 0;
894 		for (j = 0; j < csrow->nr_channels; j++) {
895 			struct dimm_info *dimm = csrow->channels[j]->dimm;
896 			n += dimm->nr_pages;
897 		}
898 		if (n == 0)
899 			continue;
900 
901 		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
902 			 mci->mc_idx,
903 			 csrow->first_page, page, csrow->last_page,
904 			 csrow->page_mask);
905 
906 		if ((page >= csrow->first_page) &&
907 		    (page <= csrow->last_page) &&
908 		    ((page & csrow->page_mask) ==
909 		     (csrow->first_page & csrow->page_mask))) {
910 			row = i;
911 			break;
912 		}
913 	}
914 
915 	if (row == -1)
916 		edac_mc_printk(mci, KERN_ERR,
917 			"could not look up page error address %lx\n",
918 			(unsigned long)page);
919 
920 	return row;
921 }
922 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
923 
924 const char *edac_layer_name[] = {
925 	[EDAC_MC_LAYER_BRANCH] = "branch",
926 	[EDAC_MC_LAYER_CHANNEL] = "channel",
927 	[EDAC_MC_LAYER_SLOT] = "slot",
928 	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
929 	[EDAC_MC_LAYER_ALL_MEM] = "memory",
930 };
931 EXPORT_SYMBOL_GPL(edac_layer_name);
932 
933 static void edac_inc_ce_error(struct edac_raw_error_desc *e)
934 {
935 	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
936 	struct mem_ctl_info *mci = error_desc_to_mci(e);
937 	struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
938 
939 	mci->ce_mc += e->error_count;
940 
941 	if (dimm)
942 		dimm->ce_count += e->error_count;
943 	else
944 		mci->ce_noinfo_count += e->error_count;
945 }
946 
947 static void edac_inc_ue_error(struct edac_raw_error_desc *e)
948 {
949 	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
950 	struct mem_ctl_info *mci = error_desc_to_mci(e);
951 	struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
952 
953 	mci->ue_mc += e->error_count;
954 
955 	if (dimm)
956 		dimm->ue_count += e->error_count;
957 	else
958 		mci->ue_noinfo_count += e->error_count;
959 }
960 
961 static void edac_ce_error(struct edac_raw_error_desc *e)
962 {
963 	struct mem_ctl_info *mci = error_desc_to_mci(e);
964 	unsigned long remapped_page;
965 
966 	if (edac_mc_get_log_ce()) {
967 		edac_mc_printk(mci, KERN_WARNING,
968 			"%d CE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx%s%s)\n",
969 			e->error_count, e->msg,
970 			*e->msg ? " " : "",
971 			e->label, e->location, e->page_frame_number, e->offset_in_page,
972 			e->grain, e->syndrome,
973 			*e->other_detail ? " - " : "",
974 			e->other_detail);
975 	}
976 
977 	edac_inc_ce_error(e);
978 
979 	if (mci->scrub_mode == SCRUB_SW_SRC) {
980 		/*
981 			* Some memory controllers (called MCs below) can remap
982 			* memory so that it is still available at a different
983 			* address when PCI devices map into memory.
984 			* MC's that can't do this, lose the memory where PCI
985 			* devices are mapped. This mapping is MC-dependent
986 			* and so we call back into the MC driver for it to
987 			* map the MC page to a physical (CPU) page which can
988 			* then be mapped to a virtual page - which can then
989 			* be scrubbed.
990 			*/
991 		remapped_page = mci->ctl_page_to_phys ?
992 			mci->ctl_page_to_phys(mci, e->page_frame_number) :
993 			e->page_frame_number;
994 
995 		edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain);
996 	}
997 }
998 
999 static void edac_ue_error(struct edac_raw_error_desc *e)
1000 {
1001 	struct mem_ctl_info *mci = error_desc_to_mci(e);
1002 
1003 	if (edac_mc_get_log_ue()) {
1004 		edac_mc_printk(mci, KERN_WARNING,
1005 			"%d UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
1006 			e->error_count, e->msg,
1007 			*e->msg ? " " : "",
1008 			e->label, e->location, e->page_frame_number, e->offset_in_page,
1009 			e->grain,
1010 			*e->other_detail ? " - " : "",
1011 			e->other_detail);
1012 	}
1013 
1014 	if (edac_mc_get_panic_on_ue()) {
1015 		panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
1016 			e->msg,
1017 			*e->msg ? " " : "",
1018 			e->label, e->location, e->page_frame_number, e->offset_in_page,
1019 			e->grain,
1020 			*e->other_detail ? " - " : "",
1021 			e->other_detail);
1022 	}
1023 
1024 	edac_inc_ue_error(e);
1025 }
1026 
1027 static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
1028 {
1029 	struct mem_ctl_info *mci = error_desc_to_mci(e);
1030 	enum hw_event_mc_err_type type = e->type;
1031 	u16 count = e->error_count;
1032 
1033 	if (row < 0)
1034 		return;
1035 
1036 	edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1037 
1038 	if (type == HW_EVENT_ERR_CORRECTED) {
1039 		mci->csrows[row]->ce_count += count;
1040 		if (chan >= 0)
1041 			mci->csrows[row]->channels[chan]->ce_count += count;
1042 	} else {
1043 		mci->csrows[row]->ue_count += count;
1044 	}
1045 }
1046 
1047 void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
1048 {
1049 	struct mem_ctl_info *mci = error_desc_to_mci(e);
1050 	u8 grain_bits;
1051 
1052 	/* Sanity-check driver-supplied grain value. */
1053 	if (WARN_ON_ONCE(!e->grain))
1054 		e->grain = 1;
1055 
1056 	grain_bits = fls_long(e->grain - 1);
1057 
1058 	/* Report the error via the trace interface */
1059 	if (IS_ENABLED(CONFIG_RAS))
1060 		trace_mc_event(e->type, e->msg, e->label, e->error_count,
1061 			       mci->mc_idx, e->top_layer, e->mid_layer,
1062 			       e->low_layer,
1063 			       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
1064 			       grain_bits, e->syndrome, e->other_detail);
1065 
1066 	if (e->type == HW_EVENT_ERR_CORRECTED)
1067 		edac_ce_error(e);
1068 	else
1069 		edac_ue_error(e);
1070 }
1071 EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
1072 
1073 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1074 			  struct mem_ctl_info *mci,
1075 			  const u16 error_count,
1076 			  const unsigned long page_frame_number,
1077 			  const unsigned long offset_in_page,
1078 			  const unsigned long syndrome,
1079 			  const int top_layer,
1080 			  const int mid_layer,
1081 			  const int low_layer,
1082 			  const char *msg,
1083 			  const char *other_detail)
1084 {
1085 	struct dimm_info *dimm;
1086 	char *p;
1087 	int row = -1, chan = -1;
1088 	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1089 	int i, n_labels = 0;
1090 	struct edac_raw_error_desc *e = &mci->error_desc;
1091 	bool any_memory = true;
1092 
1093 	edac_dbg(3, "MC%d\n", mci->mc_idx);
1094 
1095 	/* Fills the error report buffer */
1096 	memset(e, 0, sizeof (*e));
1097 	e->error_count = error_count;
1098 	e->type = type;
1099 	e->top_layer = top_layer;
1100 	e->mid_layer = mid_layer;
1101 	e->low_layer = low_layer;
1102 	e->page_frame_number = page_frame_number;
1103 	e->offset_in_page = offset_in_page;
1104 	e->syndrome = syndrome;
1105 	/* need valid strings here for both: */
1106 	e->msg = msg ?: "";
1107 	e->other_detail = other_detail ?: "";
1108 
1109 	/*
1110 	 * Check if the event report is consistent and if the memory location is
1111 	 * known. If it is, the DIMM(s) label info will be filled and the DIMM's
1112 	 * error counters will be incremented.
1113 	 */
1114 	for (i = 0; i < mci->n_layers; i++) {
1115 		if (pos[i] >= (int)mci->layers[i].size) {
1116 
1117 			edac_mc_printk(mci, KERN_ERR,
1118 				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1119 				       edac_layer_name[mci->layers[i].type],
1120 				       pos[i], mci->layers[i].size);
1121 			/*
1122 			 * Instead of just returning it, let's use what's
1123 			 * known about the error. The increment routines and
1124 			 * the DIMM filter logic will do the right thing by
1125 			 * pointing the likely damaged DIMMs.
1126 			 */
1127 			pos[i] = -1;
1128 		}
1129 		if (pos[i] >= 0)
1130 			any_memory = false;
1131 	}
1132 
1133 	/*
1134 	 * Get the dimm label/grain that applies to the match criteria.
1135 	 * As the error algorithm may not be able to point to just one memory
1136 	 * stick, the logic here will get all possible labels that could
1137 	 * pottentially be affected by the error.
1138 	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1139 	 * to have only the MC channel and the MC dimm (also called "branch")
1140 	 * but the channel is not known, as the memory is arranged in pairs,
1141 	 * where each memory belongs to a separate channel within the same
1142 	 * branch.
1143 	 */
1144 	p = e->label;
1145 	*p = '\0';
1146 
1147 	mci_for_each_dimm(mci, dimm) {
1148 		if (top_layer >= 0 && top_layer != dimm->location[0])
1149 			continue;
1150 		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1151 			continue;
1152 		if (low_layer >= 0 && low_layer != dimm->location[2])
1153 			continue;
1154 
1155 		/* get the max grain, over the error match range */
1156 		if (dimm->grain > e->grain)
1157 			e->grain = dimm->grain;
1158 
1159 		/*
1160 		 * If the error is memory-controller wide, there's no need to
1161 		 * seek for the affected DIMMs because the whole channel/memory
1162 		 * controller/... may be affected. Also, don't show errors for
1163 		 * empty DIMM slots.
1164 		 */
1165 		if (!dimm->nr_pages)
1166 			continue;
1167 
1168 		n_labels++;
1169 		if (n_labels > EDAC_MAX_LABELS) {
1170 			p = e->label;
1171 			*p = '\0';
1172 		} else {
1173 			if (p != e->label) {
1174 				strcpy(p, OTHER_LABEL);
1175 				p += strlen(OTHER_LABEL);
1176 			}
1177 			strcpy(p, dimm->label);
1178 			p += strlen(p);
1179 		}
1180 
1181 		/*
1182 		 * get csrow/channel of the DIMM, in order to allow
1183 		 * incrementing the compat API counters
1184 		 */
1185 		edac_dbg(4, "%s csrows map: (%d,%d)\n",
1186 			mci->csbased ? "rank" : "dimm",
1187 			dimm->csrow, dimm->cschannel);
1188 		if (row == -1)
1189 			row = dimm->csrow;
1190 		else if (row >= 0 && row != dimm->csrow)
1191 			row = -2;
1192 
1193 		if (chan == -1)
1194 			chan = dimm->cschannel;
1195 		else if (chan >= 0 && chan != dimm->cschannel)
1196 			chan = -2;
1197 	}
1198 
1199 	if (any_memory)
1200 		strcpy(e->label, "any memory");
1201 	else if (!*e->label)
1202 		strcpy(e->label, "unknown memory");
1203 
1204 	edac_inc_csrow(e, row, chan);
1205 
1206 	/* Fill the RAM location data */
1207 	p = e->location;
1208 
1209 	for (i = 0; i < mci->n_layers; i++) {
1210 		if (pos[i] < 0)
1211 			continue;
1212 
1213 		p += sprintf(p, "%s:%d ",
1214 			     edac_layer_name[mci->layers[i].type],
1215 			     pos[i]);
1216 	}
1217 	if (p > e->location)
1218 		*(p - 1) = '\0';
1219 
1220 	edac_raw_mc_handle_error(e);
1221 }
1222 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1223