1 2 /* 3 * edac_device.c 4 * (C) 2007 www.douglaskthompson.com 5 * 6 * This file may be distributed under the terms of the 7 * GNU General Public License. 8 * 9 * Written by Doug Thompson <norsk5@xmission.com> 10 * 11 * edac_device API implementation 12 * 19 Jan 2007 13 */ 14 15 #include <asm/page.h> 16 #include <linux/uaccess.h> 17 #include <linux/ctype.h> 18 #include <linux/highmem.h> 19 #include <linux/init.h> 20 #include <linux/jiffies.h> 21 #include <linux/module.h> 22 #include <linux/slab.h> 23 #include <linux/smp.h> 24 #include <linux/spinlock.h> 25 #include <linux/sysctl.h> 26 #include <linux/timer.h> 27 28 #include "edac_device.h" 29 #include "edac_module.h" 30 31 /* lock for the list: 'edac_device_list', manipulation of this list 32 * is protected by the 'device_ctls_mutex' lock 33 */ 34 static DEFINE_MUTEX(device_ctls_mutex); 35 static LIST_HEAD(edac_device_list); 36 37 #ifdef CONFIG_EDAC_DEBUG 38 static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) 39 { 40 edac_dbg(3, "\tedac_dev = %p dev_idx=%d\n", 41 edac_dev, edac_dev->dev_idx); 42 edac_dbg(4, "\tedac_dev->edac_check = %p\n", edac_dev->edac_check); 43 edac_dbg(3, "\tdev = %p\n", edac_dev->dev); 44 edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n", 45 edac_dev->mod_name, edac_dev->ctl_name); 46 edac_dbg(3, "\tpvt_info = %p\n\n", edac_dev->pvt_info); 47 } 48 #endif /* CONFIG_EDAC_DEBUG */ 49 50 struct edac_device_ctl_info *edac_device_alloc_ctl_info( 51 unsigned sz_private, 52 char *edac_device_name, unsigned nr_instances, 53 char *edac_block_name, unsigned nr_blocks, 54 unsigned offset_value, /* zero, 1, or other based offset */ 55 struct edac_dev_sysfs_block_attribute *attrib_spec, unsigned nr_attrib, 56 int device_index) 57 { 58 struct edac_device_ctl_info *dev_ctl; 59 struct edac_device_instance *dev_inst, *inst; 60 struct edac_device_block *dev_blk, *blk_p, *blk; 61 struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib; 62 unsigned total_size; 63 unsigned count; 64 unsigned instance, block, attr; 65 void *pvt, *p; 66 int err; 67 68 edac_dbg(4, "instances=%d blocks=%d\n", nr_instances, nr_blocks); 69 70 /* Calculate the size of memory we need to allocate AND 71 * determine the offsets of the various item arrays 72 * (instance,block,attrib) from the start of an allocated structure. 73 * We want the alignment of each item (instance,block,attrib) 74 * to be at least as stringent as what the compiler would 75 * provide if we could simply hardcode everything into a single struct. 76 */ 77 p = NULL; 78 dev_ctl = edac_align_ptr(&p, sizeof(*dev_ctl), 1); 79 80 /* Calc the 'end' offset past end of ONE ctl_info structure 81 * which will become the start of the 'instance' array 82 */ 83 dev_inst = edac_align_ptr(&p, sizeof(*dev_inst), nr_instances); 84 85 /* Calc the 'end' offset past the instance array within the ctl_info 86 * which will become the start of the block array 87 */ 88 count = nr_instances * nr_blocks; 89 dev_blk = edac_align_ptr(&p, sizeof(*dev_blk), count); 90 91 /* Calc the 'end' offset past the dev_blk array 92 * which will become the start of the attrib array, if any. 93 */ 94 /* calc how many nr_attrib we need */ 95 if (nr_attrib > 0) 96 count *= nr_attrib; 97 dev_attrib = edac_align_ptr(&p, sizeof(*dev_attrib), count); 98 99 /* Calc the 'end' offset past the attributes array */ 100 pvt = edac_align_ptr(&p, sz_private, 1); 101 102 /* 'pvt' now points to where the private data area is. 103 * At this point 'pvt' (like dev_inst,dev_blk and dev_attrib) 104 * is baselined at ZERO 105 */ 106 total_size = ((unsigned long)pvt) + sz_private; 107 108 /* Allocate the amount of memory for the set of control structures */ 109 dev_ctl = kzalloc(total_size, GFP_KERNEL); 110 if (dev_ctl == NULL) 111 return NULL; 112 113 /* Adjust pointers so they point within the actual memory we 114 * just allocated rather than an imaginary chunk of memory 115 * located at address 0. 116 * 'dev_ctl' points to REAL memory, while the others are 117 * ZERO based and thus need to be adjusted to point within 118 * the allocated memory. 119 */ 120 dev_inst = (struct edac_device_instance *) 121 (((char *)dev_ctl) + ((unsigned long)dev_inst)); 122 dev_blk = (struct edac_device_block *) 123 (((char *)dev_ctl) + ((unsigned long)dev_blk)); 124 dev_attrib = (struct edac_dev_sysfs_block_attribute *) 125 (((char *)dev_ctl) + ((unsigned long)dev_attrib)); 126 pvt = sz_private ? (((char *)dev_ctl) + ((unsigned long)pvt)) : NULL; 127 128 /* Begin storing the information into the control info structure */ 129 dev_ctl->dev_idx = device_index; 130 dev_ctl->nr_instances = nr_instances; 131 dev_ctl->instances = dev_inst; 132 dev_ctl->pvt_info = pvt; 133 134 /* Default logging of CEs and UEs */ 135 dev_ctl->log_ce = 1; 136 dev_ctl->log_ue = 1; 137 138 /* Name of this edac device */ 139 snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name); 140 141 edac_dbg(4, "edac_dev=%p next after end=%p\n", 142 dev_ctl, pvt + sz_private); 143 144 /* Initialize every Instance */ 145 for (instance = 0; instance < nr_instances; instance++) { 146 inst = &dev_inst[instance]; 147 inst->ctl = dev_ctl; 148 inst->nr_blocks = nr_blocks; 149 blk_p = &dev_blk[instance * nr_blocks]; 150 inst->blocks = blk_p; 151 152 /* name of this instance */ 153 snprintf(inst->name, sizeof(inst->name), 154 "%s%u", edac_device_name, instance); 155 156 /* Initialize every block in each instance */ 157 for (block = 0; block < nr_blocks; block++) { 158 blk = &blk_p[block]; 159 blk->instance = inst; 160 snprintf(blk->name, sizeof(blk->name), 161 "%s%d", edac_block_name, block+offset_value); 162 163 edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n", 164 instance, inst, block, blk, blk->name); 165 166 /* if there are NO attributes OR no attribute pointer 167 * then continue on to next block iteration 168 */ 169 if ((nr_attrib == 0) || (attrib_spec == NULL)) 170 continue; 171 172 /* setup the attribute array for this block */ 173 blk->nr_attribs = nr_attrib; 174 attrib_p = &dev_attrib[block*nr_instances*nr_attrib]; 175 blk->block_attributes = attrib_p; 176 177 edac_dbg(4, "THIS BLOCK_ATTRIB=%p\n", 178 blk->block_attributes); 179 180 /* Initialize every user specified attribute in this 181 * block with the data the caller passed in 182 * Each block gets its own copy of pointers, 183 * and its unique 'value' 184 */ 185 for (attr = 0; attr < nr_attrib; attr++) { 186 attrib = &attrib_p[attr]; 187 188 /* populate the unique per attrib 189 * with the code pointers and info 190 */ 191 attrib->attr = attrib_spec[attr].attr; 192 attrib->show = attrib_spec[attr].show; 193 attrib->store = attrib_spec[attr].store; 194 195 attrib->block = blk; /* up link */ 196 197 edac_dbg(4, "alloc-attrib=%p attrib_name='%s' attrib-spec=%p spec-name=%s\n", 198 attrib, attrib->attr.name, 199 &attrib_spec[attr], 200 attrib_spec[attr].attr.name 201 ); 202 } 203 } 204 } 205 206 /* Mark this instance as merely ALLOCATED */ 207 dev_ctl->op_state = OP_ALLOC; 208 209 /* 210 * Initialize the 'root' kobj for the edac_device controller 211 */ 212 err = edac_device_register_sysfs_main_kobj(dev_ctl); 213 if (err) { 214 kfree(dev_ctl); 215 return NULL; 216 } 217 218 /* at this point, the root kobj is valid, and in order to 219 * 'free' the object, then the function: 220 * edac_device_unregister_sysfs_main_kobj() must be called 221 * which will perform kobj unregistration and the actual free 222 * will occur during the kobject callback operation 223 */ 224 225 return dev_ctl; 226 } 227 EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info); 228 229 void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info) 230 { 231 edac_device_unregister_sysfs_main_kobj(ctl_info); 232 } 233 EXPORT_SYMBOL_GPL(edac_device_free_ctl_info); 234 235 /* 236 * find_edac_device_by_dev 237 * scans the edac_device list for a specific 'struct device *' 238 * 239 * lock to be held prior to call: device_ctls_mutex 240 * 241 * Return: 242 * pointer to control structure managing 'dev' 243 * NULL if not found on list 244 */ 245 static struct edac_device_ctl_info *find_edac_device_by_dev(struct device *dev) 246 { 247 struct edac_device_ctl_info *edac_dev; 248 struct list_head *item; 249 250 edac_dbg(0, "\n"); 251 252 list_for_each(item, &edac_device_list) { 253 edac_dev = list_entry(item, struct edac_device_ctl_info, link); 254 255 if (edac_dev->dev == dev) 256 return edac_dev; 257 } 258 259 return NULL; 260 } 261 262 /* 263 * add_edac_dev_to_global_list 264 * Before calling this function, caller must 265 * assign a unique value to edac_dev->dev_idx. 266 * 267 * lock to be held prior to call: device_ctls_mutex 268 * 269 * Return: 270 * 0 on success 271 * 1 on failure. 272 */ 273 static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev) 274 { 275 struct list_head *item, *insert_before; 276 struct edac_device_ctl_info *rover; 277 278 insert_before = &edac_device_list; 279 280 /* Determine if already on the list */ 281 rover = find_edac_device_by_dev(edac_dev->dev); 282 if (unlikely(rover != NULL)) 283 goto fail0; 284 285 /* Insert in ascending order by 'dev_idx', so find position */ 286 list_for_each(item, &edac_device_list) { 287 rover = list_entry(item, struct edac_device_ctl_info, link); 288 289 if (rover->dev_idx >= edac_dev->dev_idx) { 290 if (unlikely(rover->dev_idx == edac_dev->dev_idx)) 291 goto fail1; 292 293 insert_before = item; 294 break; 295 } 296 } 297 298 list_add_tail_rcu(&edac_dev->link, insert_before); 299 return 0; 300 301 fail0: 302 edac_printk(KERN_WARNING, EDAC_MC, 303 "%s (%s) %s %s already assigned %d\n", 304 dev_name(rover->dev), edac_dev_name(rover), 305 rover->mod_name, rover->ctl_name, rover->dev_idx); 306 return 1; 307 308 fail1: 309 edac_printk(KERN_WARNING, EDAC_MC, 310 "bug in low-level driver: attempt to assign\n" 311 " duplicate dev_idx %d in %s()\n", rover->dev_idx, 312 __func__); 313 return 1; 314 } 315 316 /* 317 * del_edac_device_from_global_list 318 */ 319 static void del_edac_device_from_global_list(struct edac_device_ctl_info 320 *edac_device) 321 { 322 list_del_rcu(&edac_device->link); 323 324 /* these are for safe removal of devices from global list while 325 * NMI handlers may be traversing list 326 */ 327 synchronize_rcu(); 328 INIT_LIST_HEAD(&edac_device->link); 329 } 330 331 /* 332 * edac_device_workq_function 333 * performs the operation scheduled by a workq request 334 * 335 * this workq is embedded within an edac_device_ctl_info 336 * structure, that needs to be polled for possible error events. 337 * 338 * This operation is to acquire the list mutex lock 339 * (thus preventing insertation or deletion) 340 * and then call the device's poll function IFF this device is 341 * running polled and there is a poll function defined. 342 */ 343 static void edac_device_workq_function(struct work_struct *work_req) 344 { 345 struct delayed_work *d_work = to_delayed_work(work_req); 346 struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work); 347 348 mutex_lock(&device_ctls_mutex); 349 350 /* If we are being removed, bail out immediately */ 351 if (edac_dev->op_state == OP_OFFLINE) { 352 mutex_unlock(&device_ctls_mutex); 353 return; 354 } 355 356 /* Only poll controllers that are running polled and have a check */ 357 if ((edac_dev->op_state == OP_RUNNING_POLL) && 358 (edac_dev->edac_check != NULL)) { 359 edac_dev->edac_check(edac_dev); 360 } 361 362 mutex_unlock(&device_ctls_mutex); 363 364 /* Reschedule the workq for the next time period to start again 365 * if the number of msec is for 1 sec, then adjust to the next 366 * whole one second to save timers firing all over the period 367 * between integral seconds 368 */ 369 if (edac_dev->poll_msec == 1000) 370 edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); 371 else 372 edac_queue_work(&edac_dev->work, edac_dev->delay); 373 } 374 375 /* 376 * edac_device_workq_setup 377 * initialize a workq item for this edac_device instance 378 * passing in the new delay period in msec 379 */ 380 static void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, 381 unsigned msec) 382 { 383 edac_dbg(0, "\n"); 384 385 /* take the arg 'msec' and set it into the control structure 386 * to used in the time period calculation 387 * then calc the number of jiffies that represents 388 */ 389 edac_dev->poll_msec = msec; 390 edac_dev->delay = msecs_to_jiffies(msec); 391 392 INIT_DELAYED_WORK(&edac_dev->work, edac_device_workq_function); 393 394 /* optimize here for the 1 second case, which will be normal value, to 395 * fire ON the 1 second time event. This helps reduce all sorts of 396 * timers firing on sub-second basis, while they are happy 397 * to fire together on the 1 second exactly 398 */ 399 if (edac_dev->poll_msec == 1000) 400 edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); 401 else 402 edac_queue_work(&edac_dev->work, edac_dev->delay); 403 } 404 405 /* 406 * edac_device_workq_teardown 407 * stop the workq processing on this edac_dev 408 */ 409 static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) 410 { 411 if (!edac_dev->edac_check) 412 return; 413 414 edac_dev->op_state = OP_OFFLINE; 415 416 edac_stop_work(&edac_dev->work); 417 } 418 419 /* 420 * edac_device_reset_delay_period 421 * 422 * need to stop any outstanding workq queued up at this time 423 * because we will be resetting the sleep time. 424 * Then restart the workq on the new delay 425 */ 426 void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, 427 unsigned long value) 428 { 429 unsigned long jiffs = msecs_to_jiffies(value); 430 431 if (value == 1000) 432 jiffs = round_jiffies_relative(value); 433 434 edac_dev->poll_msec = value; 435 edac_dev->delay = jiffs; 436 437 edac_mod_work(&edac_dev->work, jiffs); 438 } 439 440 int edac_device_alloc_index(void) 441 { 442 static atomic_t device_indexes = ATOMIC_INIT(0); 443 444 return atomic_inc_return(&device_indexes) - 1; 445 } 446 EXPORT_SYMBOL_GPL(edac_device_alloc_index); 447 448 int edac_device_add_device(struct edac_device_ctl_info *edac_dev) 449 { 450 edac_dbg(0, "\n"); 451 452 #ifdef CONFIG_EDAC_DEBUG 453 if (edac_debug_level >= 3) 454 edac_device_dump_device(edac_dev); 455 #endif 456 mutex_lock(&device_ctls_mutex); 457 458 if (add_edac_dev_to_global_list(edac_dev)) 459 goto fail0; 460 461 /* set load time so that error rate can be tracked */ 462 edac_dev->start_time = jiffies; 463 464 /* create this instance's sysfs entries */ 465 if (edac_device_create_sysfs(edac_dev)) { 466 edac_device_printk(edac_dev, KERN_WARNING, 467 "failed to create sysfs device\n"); 468 goto fail1; 469 } 470 471 /* If there IS a check routine, then we are running POLLED */ 472 if (edac_dev->edac_check != NULL) { 473 /* This instance is NOW RUNNING */ 474 edac_dev->op_state = OP_RUNNING_POLL; 475 476 /* 477 * enable workq processing on this instance, 478 * default = 1000 msec 479 */ 480 edac_device_workq_setup(edac_dev, 1000); 481 } else { 482 edac_dev->op_state = OP_RUNNING_INTERRUPT; 483 } 484 485 /* Report action taken */ 486 edac_device_printk(edac_dev, KERN_INFO, 487 "Giving out device to module %s controller %s: DEV %s (%s)\n", 488 edac_dev->mod_name, edac_dev->ctl_name, edac_dev->dev_name, 489 edac_op_state_to_string(edac_dev->op_state)); 490 491 mutex_unlock(&device_ctls_mutex); 492 return 0; 493 494 fail1: 495 /* Some error, so remove the entry from the lsit */ 496 del_edac_device_from_global_list(edac_dev); 497 498 fail0: 499 mutex_unlock(&device_ctls_mutex); 500 return 1; 501 } 502 EXPORT_SYMBOL_GPL(edac_device_add_device); 503 504 struct edac_device_ctl_info *edac_device_del_device(struct device *dev) 505 { 506 struct edac_device_ctl_info *edac_dev; 507 508 edac_dbg(0, "\n"); 509 510 mutex_lock(&device_ctls_mutex); 511 512 /* Find the structure on the list, if not there, then leave */ 513 edac_dev = find_edac_device_by_dev(dev); 514 if (edac_dev == NULL) { 515 mutex_unlock(&device_ctls_mutex); 516 return NULL; 517 } 518 519 /* mark this instance as OFFLINE */ 520 edac_dev->op_state = OP_OFFLINE; 521 522 /* deregister from global list */ 523 del_edac_device_from_global_list(edac_dev); 524 525 mutex_unlock(&device_ctls_mutex); 526 527 /* clear workq processing on this instance */ 528 edac_device_workq_teardown(edac_dev); 529 530 /* Tear down the sysfs entries for this instance */ 531 edac_device_remove_sysfs(edac_dev); 532 533 edac_printk(KERN_INFO, EDAC_MC, 534 "Removed device %d for %s %s: DEV %s\n", 535 edac_dev->dev_idx, 536 edac_dev->mod_name, edac_dev->ctl_name, edac_dev_name(edac_dev)); 537 538 return edac_dev; 539 } 540 EXPORT_SYMBOL_GPL(edac_device_del_device); 541 542 static inline int edac_device_get_log_ce(struct edac_device_ctl_info *edac_dev) 543 { 544 return edac_dev->log_ce; 545 } 546 547 static inline int edac_device_get_log_ue(struct edac_device_ctl_info *edac_dev) 548 { 549 return edac_dev->log_ue; 550 } 551 552 static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info 553 *edac_dev) 554 { 555 return edac_dev->panic_on_ue; 556 } 557 558 void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev, 559 unsigned int count, int inst_nr, int block_nr, 560 const char *msg) 561 { 562 struct edac_device_instance *instance; 563 struct edac_device_block *block = NULL; 564 565 if (!count) 566 return; 567 568 if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { 569 edac_device_printk(edac_dev, KERN_ERR, 570 "INTERNAL ERROR: 'instance' out of range " 571 "(%d >= %d)\n", inst_nr, 572 edac_dev->nr_instances); 573 return; 574 } 575 576 instance = edac_dev->instances + inst_nr; 577 578 if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) { 579 edac_device_printk(edac_dev, KERN_ERR, 580 "INTERNAL ERROR: instance %d 'block' " 581 "out of range (%d >= %d)\n", 582 inst_nr, block_nr, 583 instance->nr_blocks); 584 return; 585 } 586 587 if (instance->nr_blocks > 0) { 588 block = instance->blocks + block_nr; 589 block->counters.ce_count += count; 590 } 591 592 /* Propagate the count up the 'totals' tree */ 593 instance->counters.ce_count += count; 594 edac_dev->counters.ce_count += count; 595 596 if (edac_device_get_log_ce(edac_dev)) 597 edac_device_printk(edac_dev, KERN_WARNING, 598 "CE: %s instance: %s block: %s count: %d '%s'\n", 599 edac_dev->ctl_name, instance->name, 600 block ? block->name : "N/A", count, msg); 601 } 602 EXPORT_SYMBOL_GPL(edac_device_handle_ce_count); 603 604 void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, 605 unsigned int count, int inst_nr, int block_nr, 606 const char *msg) 607 { 608 struct edac_device_instance *instance; 609 struct edac_device_block *block = NULL; 610 611 if (!count) 612 return; 613 614 if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { 615 edac_device_printk(edac_dev, KERN_ERR, 616 "INTERNAL ERROR: 'instance' out of range " 617 "(%d >= %d)\n", inst_nr, 618 edac_dev->nr_instances); 619 return; 620 } 621 622 instance = edac_dev->instances + inst_nr; 623 624 if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) { 625 edac_device_printk(edac_dev, KERN_ERR, 626 "INTERNAL ERROR: instance %d 'block' " 627 "out of range (%d >= %d)\n", 628 inst_nr, block_nr, 629 instance->nr_blocks); 630 return; 631 } 632 633 if (instance->nr_blocks > 0) { 634 block = instance->blocks + block_nr; 635 block->counters.ue_count += count; 636 } 637 638 /* Propagate the count up the 'totals' tree */ 639 instance->counters.ue_count += count; 640 edac_dev->counters.ue_count += count; 641 642 if (edac_device_get_log_ue(edac_dev)) 643 edac_device_printk(edac_dev, KERN_EMERG, 644 "UE: %s instance: %s block: %s count: %d '%s'\n", 645 edac_dev->ctl_name, instance->name, 646 block ? block->name : "N/A", count, msg); 647 648 if (edac_device_get_panic_on_ue(edac_dev)) 649 panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n", 650 edac_dev->ctl_name, instance->name, 651 block ? block->name : "N/A", count, msg); 652 } 653 EXPORT_SYMBOL_GPL(edac_device_handle_ue_count); 654