1 /* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15 #include <linux/module.h> 16 #include <linux/proc_fs.h> 17 #include <linux/kernel.h> 18 #include <linux/types.h> 19 #include <linux/smp.h> 20 #include <linux/init.h> 21 #include <linux/sysctl.h> 22 #include <linux/highmem.h> 23 #include <linux/timer.h> 24 #include <linux/slab.h> 25 #include <linux/jiffies.h> 26 #include <linux/spinlock.h> 27 #include <linux/list.h> 28 #include <linux/sysdev.h> 29 #include <linux/ctype.h> 30 #include <linux/edac.h> 31 #include <asm/uaccess.h> 32 #include <asm/page.h> 33 #include <asm/edac.h> 34 #include "edac_core.h" 35 #include "edac_module.h" 36 37 /* lock to memory controller's control array */ 38 static DEFINE_MUTEX(mem_ctls_mutex); 39 static LIST_HEAD(mc_devices); 40 41 #ifdef CONFIG_EDAC_DEBUG 42 43 static void edac_mc_dump_channel(struct channel_info *chan) 44 { 45 debugf4("\tchannel = %p\n", chan); 46 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 47 debugf4("\tchannel->ce_count = %d\n", chan->ce_count); 48 debugf4("\tchannel->label = '%s'\n", chan->label); 49 debugf4("\tchannel->csrow = %p\n\n", chan->csrow); 50 } 51 52 static void edac_mc_dump_csrow(struct csrow_info *csrow) 53 { 54 debugf4("\tcsrow = %p\n", csrow); 55 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 56 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); 57 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); 58 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 59 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages); 60 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); 61 debugf4("\tcsrow->channels = %p\n", csrow->channels); 62 debugf4("\tcsrow->mci = %p\n\n", csrow->mci); 63 } 64 65 static void edac_mc_dump_mci(struct mem_ctl_info *mci) 66 { 67 debugf3("\tmci = %p\n", mci); 68 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); 69 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 70 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); 71 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 72 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 73 mci->nr_csrows, mci->csrows); 74 debugf3("\tdev = %p\n", mci->dev); 75 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); 76 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 77 } 78 79 /* 80 * keep those in sync with the enum mem_type 81 */ 82 const char *edac_mem_types[] = { 83 "Empty csrow", 84 "Reserved csrow type", 85 "Unknown csrow type", 86 "Fast page mode RAM", 87 "Extended data out RAM", 88 "Burst Extended data out RAM", 89 "Single data rate SDRAM", 90 "Registered single data rate SDRAM", 91 "Double data rate SDRAM", 92 "Registered Double data rate SDRAM", 93 "Rambus DRAM", 94 "Unbuffered DDR2 RAM", 95 "Fully buffered DDR2", 96 "Registered DDR2 RAM", 97 "Rambus XDR", 98 "Unbuffered DDR3 RAM", 99 "Registered DDR3 RAM", 100 }; 101 EXPORT_SYMBOL_GPL(edac_mem_types); 102 103 #endif /* CONFIG_EDAC_DEBUG */ 104 105 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. 106 * Adjust 'ptr' so that its alignment is at least as stringent as what the 107 * compiler would provide for X and return the aligned result. 108 * 109 * If 'size' is a constant, the compiler will optimize this whole function 110 * down to either a no-op or the addition of a constant to the value of 'ptr'. 111 */ 112 void *edac_align_ptr(void *ptr, unsigned size) 113 { 114 unsigned align, r; 115 116 /* Here we assume that the alignment of a "long long" is the most 117 * stringent alignment that the compiler will ever provide by default. 118 * As far as I know, this is a reasonable assumption. 119 */ 120 if (size > sizeof(long)) 121 align = sizeof(long long); 122 else if (size > sizeof(int)) 123 align = sizeof(long); 124 else if (size > sizeof(short)) 125 align = sizeof(int); 126 else if (size > sizeof(char)) 127 align = sizeof(short); 128 else 129 return (char *)ptr; 130 131 r = size % align; 132 133 if (r == 0) 134 return (char *)ptr; 135 136 return (void *)(((unsigned long)ptr) + align - r); 137 } 138 139 /** 140 * edac_mc_alloc: Allocate a struct mem_ctl_info structure 141 * @size_pvt: size of private storage needed 142 * @nr_csrows: Number of CWROWS needed for this MC 143 * @nr_chans: Number of channels for the MC 144 * 145 * Everything is kmalloc'ed as one big chunk - more efficient. 146 * Only can be used if all structures have the same lifetime - otherwise 147 * you have to allocate and initialize your own structures. 148 * 149 * Use edac_mc_free() to free mc structures allocated by this function. 150 * 151 * Returns: 152 * NULL allocation failed 153 * struct mem_ctl_info pointer 154 */ 155 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, 156 unsigned nr_chans, int edac_index) 157 { 158 struct mem_ctl_info *mci; 159 struct csrow_info *csi, *csrow; 160 struct channel_info *chi, *chp, *chan; 161 void *pvt; 162 unsigned size; 163 int row, chn; 164 int err; 165 166 /* Figure out the offsets of the various items from the start of an mc 167 * structure. We want the alignment of each item to be at least as 168 * stringent as what the compiler would provide if we could simply 169 * hardcode everything into a single struct. 170 */ 171 mci = (struct mem_ctl_info *)0; 172 csi = edac_align_ptr(&mci[1], sizeof(*csi)); 173 chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi)); 174 pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt); 175 size = ((unsigned long)pvt) + sz_pvt; 176 177 mci = kzalloc(size, GFP_KERNEL); 178 if (mci == NULL) 179 return NULL; 180 181 /* Adjust pointers so they point within the memory we just allocated 182 * rather than an imaginary chunk of memory located at address 0. 183 */ 184 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); 185 chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi)); 186 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 187 188 /* setup index and various internal pointers */ 189 mci->mc_idx = edac_index; 190 mci->csrows = csi; 191 mci->pvt_info = pvt; 192 mci->nr_csrows = nr_csrows; 193 194 for (row = 0; row < nr_csrows; row++) { 195 csrow = &csi[row]; 196 csrow->csrow_idx = row; 197 csrow->mci = mci; 198 csrow->nr_channels = nr_chans; 199 chp = &chi[row * nr_chans]; 200 csrow->channels = chp; 201 202 for (chn = 0; chn < nr_chans; chn++) { 203 chan = &chp[chn]; 204 chan->chan_idx = chn; 205 chan->csrow = csrow; 206 } 207 } 208 209 mci->op_state = OP_ALLOC; 210 211 /* 212 * Initialize the 'root' kobj for the edac_mc controller 213 */ 214 err = edac_mc_register_sysfs_main_kobj(mci); 215 if (err) { 216 kfree(mci); 217 return NULL; 218 } 219 220 /* at this point, the root kobj is valid, and in order to 221 * 'free' the object, then the function: 222 * edac_mc_unregister_sysfs_main_kobj() must be called 223 * which will perform kobj unregistration and the actual free 224 * will occur during the kobject callback operation 225 */ 226 return mci; 227 } 228 EXPORT_SYMBOL_GPL(edac_mc_alloc); 229 230 /** 231 * edac_mc_free 232 * 'Free' a previously allocated 'mci' structure 233 * @mci: pointer to a struct mem_ctl_info structure 234 */ 235 void edac_mc_free(struct mem_ctl_info *mci) 236 { 237 edac_mc_unregister_sysfs_main_kobj(mci); 238 } 239 EXPORT_SYMBOL_GPL(edac_mc_free); 240 241 242 /* 243 * find_mci_by_dev 244 * 245 * scan list of controllers looking for the one that manages 246 * the 'dev' device 247 */ 248 static struct mem_ctl_info *find_mci_by_dev(struct device *dev) 249 { 250 struct mem_ctl_info *mci; 251 struct list_head *item; 252 253 debugf3("%s()\n", __func__); 254 255 list_for_each(item, &mc_devices) { 256 mci = list_entry(item, struct mem_ctl_info, link); 257 258 if (mci->dev == dev) 259 return mci; 260 } 261 262 return NULL; 263 } 264 265 /* 266 * handler for EDAC to check if NMI type handler has asserted interrupt 267 */ 268 static int edac_mc_assert_error_check_and_clear(void) 269 { 270 int old_state; 271 272 if (edac_op_state == EDAC_OPSTATE_POLL) 273 return 1; 274 275 old_state = edac_err_assert; 276 edac_err_assert = 0; 277 278 return old_state; 279 } 280 281 /* 282 * edac_mc_workq_function 283 * performs the operation scheduled by a workq request 284 */ 285 static void edac_mc_workq_function(struct work_struct *work_req) 286 { 287 struct delayed_work *d_work = to_delayed_work(work_req); 288 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 289 290 mutex_lock(&mem_ctls_mutex); 291 292 /* if this control struct has movd to offline state, we are done */ 293 if (mci->op_state == OP_OFFLINE) { 294 mutex_unlock(&mem_ctls_mutex); 295 return; 296 } 297 298 /* Only poll controllers that are running polled and have a check */ 299 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 300 mci->edac_check(mci); 301 302 mutex_unlock(&mem_ctls_mutex); 303 304 /* Reschedule */ 305 queue_delayed_work(edac_workqueue, &mci->work, 306 msecs_to_jiffies(edac_mc_get_poll_msec())); 307 } 308 309 /* 310 * edac_mc_workq_setup 311 * initialize a workq item for this mci 312 * passing in the new delay period in msec 313 * 314 * locking model: 315 * 316 * called with the mem_ctls_mutex held 317 */ 318 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 319 { 320 debugf0("%s()\n", __func__); 321 322 /* if this instance is not in the POLL state, then simply return */ 323 if (mci->op_state != OP_RUNNING_POLL) 324 return; 325 326 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); 327 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); 328 } 329 330 /* 331 * edac_mc_workq_teardown 332 * stop the workq processing on this mci 333 * 334 * locking model: 335 * 336 * called WITHOUT lock held 337 */ 338 static void edac_mc_workq_teardown(struct mem_ctl_info *mci) 339 { 340 int status; 341 342 status = cancel_delayed_work(&mci->work); 343 if (status == 0) { 344 debugf0("%s() not canceled, flush the queue\n", 345 __func__); 346 347 /* workq instance might be running, wait for it */ 348 flush_workqueue(edac_workqueue); 349 } 350 } 351 352 /* 353 * edac_mc_reset_delay_period(unsigned long value) 354 * 355 * user space has updated our poll period value, need to 356 * reset our workq delays 357 */ 358 void edac_mc_reset_delay_period(int value) 359 { 360 struct mem_ctl_info *mci; 361 struct list_head *item; 362 363 mutex_lock(&mem_ctls_mutex); 364 365 /* scan the list and turn off all workq timers, doing so under lock 366 */ 367 list_for_each(item, &mc_devices) { 368 mci = list_entry(item, struct mem_ctl_info, link); 369 370 if (mci->op_state == OP_RUNNING_POLL) 371 cancel_delayed_work(&mci->work); 372 } 373 374 mutex_unlock(&mem_ctls_mutex); 375 376 377 /* re-walk the list, and reset the poll delay */ 378 mutex_lock(&mem_ctls_mutex); 379 380 list_for_each(item, &mc_devices) { 381 mci = list_entry(item, struct mem_ctl_info, link); 382 383 edac_mc_workq_setup(mci, (unsigned long) value); 384 } 385 386 mutex_unlock(&mem_ctls_mutex); 387 } 388 389 390 391 /* Return 0 on success, 1 on failure. 392 * Before calling this function, caller must 393 * assign a unique value to mci->mc_idx. 394 * 395 * locking model: 396 * 397 * called with the mem_ctls_mutex lock held 398 */ 399 static int add_mc_to_global_list(struct mem_ctl_info *mci) 400 { 401 struct list_head *item, *insert_before; 402 struct mem_ctl_info *p; 403 404 insert_before = &mc_devices; 405 406 p = find_mci_by_dev(mci->dev); 407 if (unlikely(p != NULL)) 408 goto fail0; 409 410 list_for_each(item, &mc_devices) { 411 p = list_entry(item, struct mem_ctl_info, link); 412 413 if (p->mc_idx >= mci->mc_idx) { 414 if (unlikely(p->mc_idx == mci->mc_idx)) 415 goto fail1; 416 417 insert_before = item; 418 break; 419 } 420 } 421 422 list_add_tail_rcu(&mci->link, insert_before); 423 atomic_inc(&edac_handlers); 424 return 0; 425 426 fail0: 427 edac_printk(KERN_WARNING, EDAC_MC, 428 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev), 429 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 430 return 1; 431 432 fail1: 433 edac_printk(KERN_WARNING, EDAC_MC, 434 "bug in low-level driver: attempt to assign\n" 435 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 436 return 1; 437 } 438 439 static void complete_mc_list_del(struct rcu_head *head) 440 { 441 struct mem_ctl_info *mci; 442 443 mci = container_of(head, struct mem_ctl_info, rcu); 444 INIT_LIST_HEAD(&mci->link); 445 } 446 447 static void del_mc_from_global_list(struct mem_ctl_info *mci) 448 { 449 atomic_dec(&edac_handlers); 450 list_del_rcu(&mci->link); 451 call_rcu(&mci->rcu, complete_mc_list_del); 452 rcu_barrier(); 453 } 454 455 /** 456 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. 457 * 458 * If found, return a pointer to the structure. 459 * Else return NULL. 460 * 461 * Caller must hold mem_ctls_mutex. 462 */ 463 struct mem_ctl_info *edac_mc_find(int idx) 464 { 465 struct list_head *item; 466 struct mem_ctl_info *mci; 467 468 list_for_each(item, &mc_devices) { 469 mci = list_entry(item, struct mem_ctl_info, link); 470 471 if (mci->mc_idx >= idx) { 472 if (mci->mc_idx == idx) 473 return mci; 474 475 break; 476 } 477 } 478 479 return NULL; 480 } 481 EXPORT_SYMBOL(edac_mc_find); 482 483 /** 484 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 485 * create sysfs entries associated with mci structure 486 * @mci: pointer to the mci structure to be added to the list 487 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure. 488 * 489 * Return: 490 * 0 Success 491 * !0 Failure 492 */ 493 494 /* FIXME - should a warning be printed if no error detection? correction? */ 495 int edac_mc_add_mc(struct mem_ctl_info *mci) 496 { 497 debugf0("%s()\n", __func__); 498 499 #ifdef CONFIG_EDAC_DEBUG 500 if (edac_debug_level >= 3) 501 edac_mc_dump_mci(mci); 502 503 if (edac_debug_level >= 4) { 504 int i; 505 506 for (i = 0; i < mci->nr_csrows; i++) { 507 int j; 508 509 edac_mc_dump_csrow(&mci->csrows[i]); 510 for (j = 0; j < mci->csrows[i].nr_channels; j++) 511 edac_mc_dump_channel(&mci->csrows[i]. 512 channels[j]); 513 } 514 } 515 #endif 516 mutex_lock(&mem_ctls_mutex); 517 518 if (add_mc_to_global_list(mci)) 519 goto fail0; 520 521 /* set load time so that error rate can be tracked */ 522 mci->start_time = jiffies; 523 524 if (edac_create_sysfs_mci_device(mci)) { 525 edac_mc_printk(mci, KERN_WARNING, 526 "failed to create sysfs device\n"); 527 goto fail1; 528 } 529 530 /* If there IS a check routine, then we are running POLLED */ 531 if (mci->edac_check != NULL) { 532 /* This instance is NOW RUNNING */ 533 mci->op_state = OP_RUNNING_POLL; 534 535 edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); 536 } else { 537 mci->op_state = OP_RUNNING_INTERRUPT; 538 } 539 540 /* Report action taken */ 541 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 542 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 543 544 mutex_unlock(&mem_ctls_mutex); 545 return 0; 546 547 fail1: 548 del_mc_from_global_list(mci); 549 550 fail0: 551 mutex_unlock(&mem_ctls_mutex); 552 return 1; 553 } 554 EXPORT_SYMBOL_GPL(edac_mc_add_mc); 555 556 /** 557 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 558 * remove mci structure from global list 559 * @pdev: Pointer to 'struct device' representing mci structure to remove. 560 * 561 * Return pointer to removed mci structure, or NULL if device not found. 562 */ 563 struct mem_ctl_info *edac_mc_del_mc(struct device *dev) 564 { 565 struct mem_ctl_info *mci; 566 567 debugf0("%s()\n", __func__); 568 569 mutex_lock(&mem_ctls_mutex); 570 571 /* find the requested mci struct in the global list */ 572 mci = find_mci_by_dev(dev); 573 if (mci == NULL) { 574 mutex_unlock(&mem_ctls_mutex); 575 return NULL; 576 } 577 578 /* marking MCI offline */ 579 mci->op_state = OP_OFFLINE; 580 581 del_mc_from_global_list(mci); 582 mutex_unlock(&mem_ctls_mutex); 583 584 /* flush workq processes and remove sysfs */ 585 edac_mc_workq_teardown(mci); 586 edac_remove_sysfs_mci_device(mci); 587 588 edac_printk(KERN_INFO, EDAC_MC, 589 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 590 mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 591 592 return mci; 593 } 594 EXPORT_SYMBOL_GPL(edac_mc_del_mc); 595 596 static void edac_mc_scrub_block(unsigned long page, unsigned long offset, 597 u32 size) 598 { 599 struct page *pg; 600 void *virt_addr; 601 unsigned long flags = 0; 602 603 debugf3("%s()\n", __func__); 604 605 /* ECC error page was not in our memory. Ignore it. */ 606 if (!pfn_valid(page)) 607 return; 608 609 /* Find the actual page structure then map it and fix */ 610 pg = pfn_to_page(page); 611 612 if (PageHighMem(pg)) 613 local_irq_save(flags); 614 615 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ); 616 617 /* Perform architecture specific atomic scrub operation */ 618 atomic_scrub(virt_addr + offset, size); 619 620 /* Unmap and complete */ 621 kunmap_atomic(virt_addr, KM_BOUNCE_READ); 622 623 if (PageHighMem(pg)) 624 local_irq_restore(flags); 625 } 626 627 /* FIXME - should return -1 */ 628 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 629 { 630 struct csrow_info *csrows = mci->csrows; 631 int row, i; 632 633 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); 634 row = -1; 635 636 for (i = 0; i < mci->nr_csrows; i++) { 637 struct csrow_info *csrow = &csrows[i]; 638 639 if (csrow->nr_pages == 0) 640 continue; 641 642 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " 643 "mask(0x%lx)\n", mci->mc_idx, __func__, 644 csrow->first_page, page, csrow->last_page, 645 csrow->page_mask); 646 647 if ((page >= csrow->first_page) && 648 (page <= csrow->last_page) && 649 ((page & csrow->page_mask) == 650 (csrow->first_page & csrow->page_mask))) { 651 row = i; 652 break; 653 } 654 } 655 656 if (row == -1) 657 edac_mc_printk(mci, KERN_ERR, 658 "could not look up page error address %lx\n", 659 (unsigned long)page); 660 661 return row; 662 } 663 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 664 665 /* FIXME - setable log (warning/emerg) levels */ 666 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */ 667 void edac_mc_handle_ce(struct mem_ctl_info *mci, 668 unsigned long page_frame_number, 669 unsigned long offset_in_page, unsigned long syndrome, 670 int row, int channel, const char *msg) 671 { 672 unsigned long remapped_page; 673 674 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 675 676 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 677 if (row >= mci->nr_csrows || row < 0) { 678 /* something is wrong */ 679 edac_mc_printk(mci, KERN_ERR, 680 "INTERNAL ERROR: row out of range " 681 "(%d >= %d)\n", row, mci->nr_csrows); 682 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 683 return; 684 } 685 686 if (channel >= mci->csrows[row].nr_channels || channel < 0) { 687 /* something is wrong */ 688 edac_mc_printk(mci, KERN_ERR, 689 "INTERNAL ERROR: channel out of range " 690 "(%d >= %d)\n", channel, 691 mci->csrows[row].nr_channels); 692 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 693 return; 694 } 695 696 if (edac_mc_get_log_ce()) 697 /* FIXME - put in DIMM location */ 698 edac_mc_printk(mci, KERN_WARNING, 699 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome " 700 "0x%lx, row %d, channel %d, label \"%s\": %s\n", 701 page_frame_number, offset_in_page, 702 mci->csrows[row].grain, syndrome, row, channel, 703 mci->csrows[row].channels[channel].label, msg); 704 705 mci->ce_count++; 706 mci->csrows[row].ce_count++; 707 mci->csrows[row].channels[channel].ce_count++; 708 709 if (mci->scrub_mode & SCRUB_SW_SRC) { 710 /* 711 * Some MC's can remap memory so that it is still available 712 * at a different address when PCI devices map into memory. 713 * MC's that can't do this lose the memory where PCI devices 714 * are mapped. This mapping is MC dependant and so we call 715 * back into the MC driver for it to map the MC page to 716 * a physical (CPU) page which can then be mapped to a virtual 717 * page - which can then be scrubbed. 718 */ 719 remapped_page = mci->ctl_page_to_phys ? 720 mci->ctl_page_to_phys(mci, page_frame_number) : 721 page_frame_number; 722 723 edac_mc_scrub_block(remapped_page, offset_in_page, 724 mci->csrows[row].grain); 725 } 726 } 727 EXPORT_SYMBOL_GPL(edac_mc_handle_ce); 728 729 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) 730 { 731 if (edac_mc_get_log_ce()) 732 edac_mc_printk(mci, KERN_WARNING, 733 "CE - no information available: %s\n", msg); 734 735 mci->ce_noinfo_count++; 736 mci->ce_count++; 737 } 738 EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info); 739 740 void edac_mc_handle_ue(struct mem_ctl_info *mci, 741 unsigned long page_frame_number, 742 unsigned long offset_in_page, int row, const char *msg) 743 { 744 int len = EDAC_MC_LABEL_LEN * 4; 745 char labels[len + 1]; 746 char *pos = labels; 747 int chan; 748 int chars; 749 750 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 751 752 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 753 if (row >= mci->nr_csrows || row < 0) { 754 /* something is wrong */ 755 edac_mc_printk(mci, KERN_ERR, 756 "INTERNAL ERROR: row out of range " 757 "(%d >= %d)\n", row, mci->nr_csrows); 758 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 759 return; 760 } 761 762 chars = snprintf(pos, len + 1, "%s", 763 mci->csrows[row].channels[0].label); 764 len -= chars; 765 pos += chars; 766 767 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0); 768 chan++) { 769 chars = snprintf(pos, len + 1, ":%s", 770 mci->csrows[row].channels[chan].label); 771 len -= chars; 772 pos += chars; 773 } 774 775 if (edac_mc_get_log_ue()) 776 edac_mc_printk(mci, KERN_EMERG, 777 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, " 778 "labels \"%s\": %s\n", page_frame_number, 779 offset_in_page, mci->csrows[row].grain, row, 780 labels, msg); 781 782 if (edac_mc_get_panic_on_ue()) 783 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, " 784 "row %d, labels \"%s\": %s\n", mci->mc_idx, 785 page_frame_number, offset_in_page, 786 mci->csrows[row].grain, row, labels, msg); 787 788 mci->ue_count++; 789 mci->csrows[row].ue_count++; 790 } 791 EXPORT_SYMBOL_GPL(edac_mc_handle_ue); 792 793 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) 794 { 795 if (edac_mc_get_panic_on_ue()) 796 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx); 797 798 if (edac_mc_get_log_ue()) 799 edac_mc_printk(mci, KERN_WARNING, 800 "UE - no information available: %s\n", msg); 801 mci->ue_noinfo_count++; 802 mci->ue_count++; 803 } 804 EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); 805 806 /************************************************************* 807 * On Fully Buffered DIMM modules, this help function is 808 * called to process UE events 809 */ 810 void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, 811 unsigned int csrow, 812 unsigned int channela, 813 unsigned int channelb, char *msg) 814 { 815 int len = EDAC_MC_LABEL_LEN * 4; 816 char labels[len + 1]; 817 char *pos = labels; 818 int chars; 819 820 if (csrow >= mci->nr_csrows) { 821 /* something is wrong */ 822 edac_mc_printk(mci, KERN_ERR, 823 "INTERNAL ERROR: row out of range (%d >= %d)\n", 824 csrow, mci->nr_csrows); 825 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 826 return; 827 } 828 829 if (channela >= mci->csrows[csrow].nr_channels) { 830 /* something is wrong */ 831 edac_mc_printk(mci, KERN_ERR, 832 "INTERNAL ERROR: channel-a out of range " 833 "(%d >= %d)\n", 834 channela, mci->csrows[csrow].nr_channels); 835 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 836 return; 837 } 838 839 if (channelb >= mci->csrows[csrow].nr_channels) { 840 /* something is wrong */ 841 edac_mc_printk(mci, KERN_ERR, 842 "INTERNAL ERROR: channel-b out of range " 843 "(%d >= %d)\n", 844 channelb, mci->csrows[csrow].nr_channels); 845 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 846 return; 847 } 848 849 mci->ue_count++; 850 mci->csrows[csrow].ue_count++; 851 852 /* Generate the DIMM labels from the specified channels */ 853 chars = snprintf(pos, len + 1, "%s", 854 mci->csrows[csrow].channels[channela].label); 855 len -= chars; 856 pos += chars; 857 chars = snprintf(pos, len + 1, "-%s", 858 mci->csrows[csrow].channels[channelb].label); 859 860 if (edac_mc_get_log_ue()) 861 edac_mc_printk(mci, KERN_EMERG, 862 "UE row %d, channel-a= %d channel-b= %d " 863 "labels \"%s\": %s\n", csrow, channela, channelb, 864 labels, msg); 865 866 if (edac_mc_get_panic_on_ue()) 867 panic("UE row %d, channel-a= %d channel-b= %d " 868 "labels \"%s\": %s\n", csrow, channela, 869 channelb, labels, msg); 870 } 871 EXPORT_SYMBOL(edac_mc_handle_fbd_ue); 872 873 /************************************************************* 874 * On Fully Buffered DIMM modules, this help function is 875 * called to process CE events 876 */ 877 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, 878 unsigned int csrow, unsigned int channel, char *msg) 879 { 880 881 /* Ensure boundary values */ 882 if (csrow >= mci->nr_csrows) { 883 /* something is wrong */ 884 edac_mc_printk(mci, KERN_ERR, 885 "INTERNAL ERROR: row out of range (%d >= %d)\n", 886 csrow, mci->nr_csrows); 887 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 888 return; 889 } 890 if (channel >= mci->csrows[csrow].nr_channels) { 891 /* something is wrong */ 892 edac_mc_printk(mci, KERN_ERR, 893 "INTERNAL ERROR: channel out of range (%d >= %d)\n", 894 channel, mci->csrows[csrow].nr_channels); 895 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 896 return; 897 } 898 899 if (edac_mc_get_log_ce()) 900 /* FIXME - put in DIMM location */ 901 edac_mc_printk(mci, KERN_WARNING, 902 "CE row %d, channel %d, label \"%s\": %s\n", 903 csrow, channel, 904 mci->csrows[csrow].channels[channel].label, msg); 905 906 mci->ce_count++; 907 mci->csrows[csrow].ce_count++; 908 mci->csrows[csrow].channels[channel].ce_count++; 909 } 910 EXPORT_SYMBOL(edac_mc_handle_fbd_ce); 911