1 /* 2 * APEI Generic Hardware Error Source support 3 * 4 * Generic Hardware Error Source provides a way to report platform 5 * hardware errors (such as that from chipset). It works in so called 6 * "Firmware First" mode, that is, hardware errors are reported to 7 * firmware firstly, then reported to Linux by firmware. This way, 8 * some non-standard hardware error registers or non-standard hardware 9 * link can be checked by firmware to produce more hardware error 10 * information for Linux. 11 * 12 * For more information about Generic Hardware Error Source, please 13 * refer to ACPI Specification version 4.0, section 17.3.2.6 14 * 15 * Copyright 2010,2011 Intel Corp. 16 * Author: Huang Ying <ying.huang@intel.com> 17 * 18 * This program is free software; you can redistribute it and/or 19 * modify it under the terms of the GNU General Public License version 20 * 2 as published by the Free Software Foundation; 21 * 22 * This program is distributed in the hope that it will be useful, 23 * but WITHOUT ANY WARRANTY; without even the implied warranty of 24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 25 * GNU General Public License for more details. 26 * 27 * You should have received a copy of the GNU General Public License 28 * along with this program; if not, write to the Free Software 29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 30 */ 31 32 #include <linux/kernel.h> 33 #include <linux/module.h> 34 #include <linux/init.h> 35 #include <linux/acpi.h> 36 #include <linux/acpi_io.h> 37 #include <linux/io.h> 38 #include <linux/interrupt.h> 39 #include <linux/timer.h> 40 #include <linux/cper.h> 41 #include <linux/kdebug.h> 42 #include <linux/platform_device.h> 43 #include <linux/mutex.h> 44 #include <linux/ratelimit.h> 45 #include <linux/vmalloc.h> 46 #include <linux/irq_work.h> 47 #include <linux/llist.h> 48 #include <linux/genalloc.h> 49 #include <linux/pci.h> 50 #include <linux/aer.h> 51 #include <acpi/apei.h> 52 #include <acpi/hed.h> 53 #include <asm/mce.h> 54 #include <asm/tlbflush.h> 55 #include <asm/nmi.h> 56 57 #include "apei-internal.h" 58 59 #define GHES_PFX "GHES: " 60 61 #define GHES_ESTATUS_MAX_SIZE 65536 62 #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 63 64 #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 65 66 /* This is just an estimation for memory pool allocation */ 67 #define GHES_ESTATUS_CACHE_AVG_SIZE 512 68 69 #define GHES_ESTATUS_CACHES_SIZE 4 70 71 #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL 72 /* Prevent too many caches are allocated because of RCU */ 73 #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) 74 75 #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ 76 (sizeof(struct ghes_estatus_cache) + (estatus_len)) 77 #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ 78 ((struct acpi_hest_generic_status *) \ 79 ((struct ghes_estatus_cache *)(estatus_cache) + 1)) 80 81 #define GHES_ESTATUS_NODE_LEN(estatus_len) \ 82 (sizeof(struct ghes_estatus_node) + (estatus_len)) 83 #define GHES_ESTATUS_FROM_NODE(estatus_node) \ 84 ((struct acpi_hest_generic_status *) \ 85 ((struct ghes_estatus_node *)(estatus_node) + 1)) 86 87 /* 88 * One struct ghes is created for each generic hardware error source. 89 * It provides the context for APEI hardware error timer/IRQ/SCI/NMI 90 * handler. 91 * 92 * estatus: memory buffer for error status block, allocated during 93 * HEST parsing. 94 */ 95 #define GHES_TO_CLEAR 0x0001 96 #define GHES_EXITING 0x0002 97 98 struct ghes { 99 struct acpi_hest_generic *generic; 100 struct acpi_hest_generic_status *estatus; 101 u64 buffer_paddr; 102 unsigned long flags; 103 union { 104 struct list_head list; 105 struct timer_list timer; 106 unsigned int irq; 107 }; 108 }; 109 110 struct ghes_estatus_node { 111 struct llist_node llnode; 112 struct acpi_hest_generic *generic; 113 }; 114 115 struct ghes_estatus_cache { 116 u32 estatus_len; 117 atomic_t count; 118 struct acpi_hest_generic *generic; 119 unsigned long long time_in; 120 struct rcu_head rcu; 121 }; 122 123 bool ghes_disable; 124 module_param_named(disable, ghes_disable, bool, 0); 125 126 static int ghes_panic_timeout __read_mostly = 30; 127 128 /* 129 * All error sources notified with SCI shares one notifier function, 130 * so they need to be linked and checked one by one. This is applied 131 * to NMI too. 132 * 133 * RCU is used for these lists, so ghes_list_mutex is only used for 134 * list changing, not for traversing. 135 */ 136 static LIST_HEAD(ghes_sci); 137 static LIST_HEAD(ghes_nmi); 138 static DEFINE_MUTEX(ghes_list_mutex); 139 140 /* 141 * NMI may be triggered on any CPU, so ghes_nmi_lock is used for 142 * mutual exclusion. 143 */ 144 static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); 145 146 /* 147 * Because the memory area used to transfer hardware error information 148 * from BIOS to Linux can be determined only in NMI, IRQ or timer 149 * handler, but general ioremap can not be used in atomic context, so 150 * a special version of atomic ioremap is implemented for that. 151 */ 152 153 /* 154 * Two virtual pages are used, one for NMI context, the other for 155 * IRQ/PROCESS context 156 */ 157 #define GHES_IOREMAP_PAGES 2 158 #define GHES_IOREMAP_NMI_PAGE(base) (base) 159 #define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE) 160 161 /* virtual memory area for atomic ioremap */ 162 static struct vm_struct *ghes_ioremap_area; 163 /* 164 * These 2 spinlock is used to prevent atomic ioremap virtual memory 165 * area from being mapped simultaneously. 166 */ 167 static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); 168 static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); 169 170 /* 171 * printk is not safe in NMI context. So in NMI handler, we allocate 172 * required memory from lock-less memory allocator 173 * (ghes_estatus_pool), save estatus into it, put them into lock-less 174 * list (ghes_estatus_llist), then delay printk into IRQ context via 175 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record 176 * required pool size by all NMI error source. 177 */ 178 static struct gen_pool *ghes_estatus_pool; 179 static unsigned long ghes_estatus_pool_size_request; 180 static struct llist_head ghes_estatus_llist; 181 static struct irq_work ghes_proc_irq_work; 182 183 struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; 184 static atomic_t ghes_estatus_cache_alloced; 185 186 static int ghes_ioremap_init(void) 187 { 188 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, 189 VM_IOREMAP, VMALLOC_START, VMALLOC_END); 190 if (!ghes_ioremap_area) { 191 pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n"); 192 return -ENOMEM; 193 } 194 195 return 0; 196 } 197 198 static void ghes_ioremap_exit(void) 199 { 200 free_vm_area(ghes_ioremap_area); 201 } 202 203 static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) 204 { 205 unsigned long vaddr; 206 207 vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr); 208 ioremap_page_range(vaddr, vaddr + PAGE_SIZE, 209 pfn << PAGE_SHIFT, PAGE_KERNEL); 210 211 return (void __iomem *)vaddr; 212 } 213 214 static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) 215 { 216 unsigned long vaddr; 217 218 vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr); 219 ioremap_page_range(vaddr, vaddr + PAGE_SIZE, 220 pfn << PAGE_SHIFT, PAGE_KERNEL); 221 222 return (void __iomem *)vaddr; 223 } 224 225 static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) 226 { 227 unsigned long vaddr = (unsigned long __force)vaddr_ptr; 228 void *base = ghes_ioremap_area->addr; 229 230 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); 231 unmap_kernel_range_noflush(vaddr, PAGE_SIZE); 232 __flush_tlb_one(vaddr); 233 } 234 235 static void ghes_iounmap_irq(void __iomem *vaddr_ptr) 236 { 237 unsigned long vaddr = (unsigned long __force)vaddr_ptr; 238 void *base = ghes_ioremap_area->addr; 239 240 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); 241 unmap_kernel_range_noflush(vaddr, PAGE_SIZE); 242 __flush_tlb_one(vaddr); 243 } 244 245 static int ghes_estatus_pool_init(void) 246 { 247 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); 248 if (!ghes_estatus_pool) 249 return -ENOMEM; 250 return 0; 251 } 252 253 static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, 254 struct gen_pool_chunk *chunk, 255 void *data) 256 { 257 free_page(chunk->start_addr); 258 } 259 260 static void ghes_estatus_pool_exit(void) 261 { 262 gen_pool_for_each_chunk(ghes_estatus_pool, 263 ghes_estatus_pool_free_chunk_page, NULL); 264 gen_pool_destroy(ghes_estatus_pool); 265 } 266 267 static int ghes_estatus_pool_expand(unsigned long len) 268 { 269 unsigned long i, pages, size, addr; 270 int ret; 271 272 ghes_estatus_pool_size_request += PAGE_ALIGN(len); 273 size = gen_pool_size(ghes_estatus_pool); 274 if (size >= ghes_estatus_pool_size_request) 275 return 0; 276 pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; 277 for (i = 0; i < pages; i++) { 278 addr = __get_free_page(GFP_KERNEL); 279 if (!addr) 280 return -ENOMEM; 281 ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); 282 if (ret) 283 return ret; 284 } 285 286 return 0; 287 } 288 289 static void ghes_estatus_pool_shrink(unsigned long len) 290 { 291 ghes_estatus_pool_size_request -= PAGE_ALIGN(len); 292 } 293 294 static struct ghes *ghes_new(struct acpi_hest_generic *generic) 295 { 296 struct ghes *ghes; 297 unsigned int error_block_length; 298 int rc; 299 300 ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); 301 if (!ghes) 302 return ERR_PTR(-ENOMEM); 303 ghes->generic = generic; 304 rc = acpi_os_map_generic_address(&generic->error_status_address); 305 if (rc) 306 goto err_free; 307 error_block_length = generic->error_block_length; 308 if (error_block_length > GHES_ESTATUS_MAX_SIZE) { 309 pr_warning(FW_WARN GHES_PFX 310 "Error status block length is too long: %u for " 311 "generic hardware error source: %d.\n", 312 error_block_length, generic->header.source_id); 313 error_block_length = GHES_ESTATUS_MAX_SIZE; 314 } 315 ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); 316 if (!ghes->estatus) { 317 rc = -ENOMEM; 318 goto err_unmap; 319 } 320 321 return ghes; 322 323 err_unmap: 324 acpi_os_unmap_generic_address(&generic->error_status_address); 325 err_free: 326 kfree(ghes); 327 return ERR_PTR(rc); 328 } 329 330 static void ghes_fini(struct ghes *ghes) 331 { 332 kfree(ghes->estatus); 333 acpi_os_unmap_generic_address(&ghes->generic->error_status_address); 334 } 335 336 enum { 337 GHES_SEV_NO = 0x0, 338 GHES_SEV_CORRECTED = 0x1, 339 GHES_SEV_RECOVERABLE = 0x2, 340 GHES_SEV_PANIC = 0x3, 341 }; 342 343 static inline int ghes_severity(int severity) 344 { 345 switch (severity) { 346 case CPER_SEV_INFORMATIONAL: 347 return GHES_SEV_NO; 348 case CPER_SEV_CORRECTED: 349 return GHES_SEV_CORRECTED; 350 case CPER_SEV_RECOVERABLE: 351 return GHES_SEV_RECOVERABLE; 352 case CPER_SEV_FATAL: 353 return GHES_SEV_PANIC; 354 default: 355 /* Unknown, go panic */ 356 return GHES_SEV_PANIC; 357 } 358 } 359 360 static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, 361 int from_phys) 362 { 363 void __iomem *vaddr; 364 unsigned long flags = 0; 365 int in_nmi = in_nmi(); 366 u64 offset; 367 u32 trunk; 368 369 while (len > 0) { 370 offset = paddr - (paddr & PAGE_MASK); 371 if (in_nmi) { 372 raw_spin_lock(&ghes_ioremap_lock_nmi); 373 vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT); 374 } else { 375 spin_lock_irqsave(&ghes_ioremap_lock_irq, flags); 376 vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT); 377 } 378 trunk = PAGE_SIZE - offset; 379 trunk = min(trunk, len); 380 if (from_phys) 381 memcpy_fromio(buffer, vaddr + offset, trunk); 382 else 383 memcpy_toio(vaddr + offset, buffer, trunk); 384 len -= trunk; 385 paddr += trunk; 386 buffer += trunk; 387 if (in_nmi) { 388 ghes_iounmap_nmi(vaddr); 389 raw_spin_unlock(&ghes_ioremap_lock_nmi); 390 } else { 391 ghes_iounmap_irq(vaddr); 392 spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); 393 } 394 } 395 } 396 397 static int ghes_read_estatus(struct ghes *ghes, int silent) 398 { 399 struct acpi_hest_generic *g = ghes->generic; 400 u64 buf_paddr; 401 u32 len; 402 int rc; 403 404 rc = apei_read(&buf_paddr, &g->error_status_address); 405 if (rc) { 406 if (!silent && printk_ratelimit()) 407 pr_warning(FW_WARN GHES_PFX 408 "Failed to read error status block address for hardware error source: %d.\n", 409 g->header.source_id); 410 return -EIO; 411 } 412 if (!buf_paddr) 413 return -ENOENT; 414 415 ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, 416 sizeof(*ghes->estatus), 1); 417 if (!ghes->estatus->block_status) 418 return -ENOENT; 419 420 ghes->buffer_paddr = buf_paddr; 421 ghes->flags |= GHES_TO_CLEAR; 422 423 rc = -EIO; 424 len = apei_estatus_len(ghes->estatus); 425 if (len < sizeof(*ghes->estatus)) 426 goto err_read_block; 427 if (len > ghes->generic->error_block_length) 428 goto err_read_block; 429 if (apei_estatus_check_header(ghes->estatus)) 430 goto err_read_block; 431 ghes_copy_tofrom_phys(ghes->estatus + 1, 432 buf_paddr + sizeof(*ghes->estatus), 433 len - sizeof(*ghes->estatus), 1); 434 if (apei_estatus_check(ghes->estatus)) 435 goto err_read_block; 436 rc = 0; 437 438 err_read_block: 439 if (rc && !silent && printk_ratelimit()) 440 pr_warning(FW_WARN GHES_PFX 441 "Failed to read error status block!\n"); 442 return rc; 443 } 444 445 static void ghes_clear_estatus(struct ghes *ghes) 446 { 447 ghes->estatus->block_status = 0; 448 if (!(ghes->flags & GHES_TO_CLEAR)) 449 return; 450 ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, 451 sizeof(ghes->estatus->block_status), 0); 452 ghes->flags &= ~GHES_TO_CLEAR; 453 } 454 455 static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) 456 { 457 int sev, sec_sev; 458 struct acpi_hest_generic_data *gdata; 459 460 sev = ghes_severity(estatus->error_severity); 461 apei_estatus_for_each_section(estatus, gdata) { 462 sec_sev = ghes_severity(gdata->error_severity); 463 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 464 CPER_SEC_PLATFORM_MEM)) { 465 struct cper_sec_mem_err *mem_err; 466 mem_err = (struct cper_sec_mem_err *)(gdata+1); 467 #ifdef CONFIG_X86_MCE 468 apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, 469 mem_err); 470 #endif 471 #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE 472 if (sev == GHES_SEV_RECOVERABLE && 473 sec_sev == GHES_SEV_RECOVERABLE && 474 mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { 475 unsigned long pfn; 476 pfn = mem_err->physical_addr >> PAGE_SHIFT; 477 memory_failure_queue(pfn, 0, 0); 478 } 479 #endif 480 } 481 #ifdef CONFIG_ACPI_APEI_PCIEAER 482 else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 483 CPER_SEC_PCIE)) { 484 struct cper_sec_pcie *pcie_err; 485 pcie_err = (struct cper_sec_pcie *)(gdata+1); 486 if (sev == GHES_SEV_RECOVERABLE && 487 sec_sev == GHES_SEV_RECOVERABLE && 488 pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && 489 pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { 490 unsigned int devfn; 491 int aer_severity; 492 devfn = PCI_DEVFN(pcie_err->device_id.device, 493 pcie_err->device_id.function); 494 aer_severity = cper_severity_to_aer(sev); 495 aer_recover_queue(pcie_err->device_id.segment, 496 pcie_err->device_id.bus, 497 devfn, aer_severity); 498 } 499 500 } 501 #endif 502 } 503 } 504 505 static void __ghes_print_estatus(const char *pfx, 506 const struct acpi_hest_generic *generic, 507 const struct acpi_hest_generic_status *estatus) 508 { 509 static atomic_t seqno; 510 unsigned int curr_seqno; 511 char pfx_seq[64]; 512 513 if (pfx == NULL) { 514 if (ghes_severity(estatus->error_severity) <= 515 GHES_SEV_CORRECTED) 516 pfx = KERN_WARNING; 517 else 518 pfx = KERN_ERR; 519 } 520 curr_seqno = atomic_inc_return(&seqno); 521 snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno); 522 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", 523 pfx_seq, generic->header.source_id); 524 apei_estatus_print(pfx_seq, estatus); 525 } 526 527 static int ghes_print_estatus(const char *pfx, 528 const struct acpi_hest_generic *generic, 529 const struct acpi_hest_generic_status *estatus) 530 { 531 /* Not more than 2 messages every 5 seconds */ 532 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); 533 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); 534 struct ratelimit_state *ratelimit; 535 536 if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) 537 ratelimit = &ratelimit_corrected; 538 else 539 ratelimit = &ratelimit_uncorrected; 540 if (__ratelimit(ratelimit)) { 541 __ghes_print_estatus(pfx, generic, estatus); 542 return 1; 543 } 544 return 0; 545 } 546 547 /* 548 * GHES error status reporting throttle, to report more kinds of 549 * errors, instead of just most frequently occurred errors. 550 */ 551 static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) 552 { 553 u32 len; 554 int i, cached = 0; 555 unsigned long long now; 556 struct ghes_estatus_cache *cache; 557 struct acpi_hest_generic_status *cache_estatus; 558 559 len = apei_estatus_len(estatus); 560 rcu_read_lock(); 561 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { 562 cache = rcu_dereference(ghes_estatus_caches[i]); 563 if (cache == NULL) 564 continue; 565 if (len != cache->estatus_len) 566 continue; 567 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); 568 if (memcmp(estatus, cache_estatus, len)) 569 continue; 570 atomic_inc(&cache->count); 571 now = sched_clock(); 572 if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) 573 cached = 1; 574 break; 575 } 576 rcu_read_unlock(); 577 return cached; 578 } 579 580 static struct ghes_estatus_cache *ghes_estatus_cache_alloc( 581 struct acpi_hest_generic *generic, 582 struct acpi_hest_generic_status *estatus) 583 { 584 int alloced; 585 u32 len, cache_len; 586 struct ghes_estatus_cache *cache; 587 struct acpi_hest_generic_status *cache_estatus; 588 589 alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); 590 if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { 591 atomic_dec(&ghes_estatus_cache_alloced); 592 return NULL; 593 } 594 len = apei_estatus_len(estatus); 595 cache_len = GHES_ESTATUS_CACHE_LEN(len); 596 cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); 597 if (!cache) { 598 atomic_dec(&ghes_estatus_cache_alloced); 599 return NULL; 600 } 601 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); 602 memcpy(cache_estatus, estatus, len); 603 cache->estatus_len = len; 604 atomic_set(&cache->count, 0); 605 cache->generic = generic; 606 cache->time_in = sched_clock(); 607 return cache; 608 } 609 610 static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) 611 { 612 u32 len; 613 614 len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); 615 len = GHES_ESTATUS_CACHE_LEN(len); 616 gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); 617 atomic_dec(&ghes_estatus_cache_alloced); 618 } 619 620 static void ghes_estatus_cache_rcu_free(struct rcu_head *head) 621 { 622 struct ghes_estatus_cache *cache; 623 624 cache = container_of(head, struct ghes_estatus_cache, rcu); 625 ghes_estatus_cache_free(cache); 626 } 627 628 static void ghes_estatus_cache_add( 629 struct acpi_hest_generic *generic, 630 struct acpi_hest_generic_status *estatus) 631 { 632 int i, slot = -1, count; 633 unsigned long long now, duration, period, max_period = 0; 634 struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; 635 636 new_cache = ghes_estatus_cache_alloc(generic, estatus); 637 if (new_cache == NULL) 638 return; 639 rcu_read_lock(); 640 now = sched_clock(); 641 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { 642 cache = rcu_dereference(ghes_estatus_caches[i]); 643 if (cache == NULL) { 644 slot = i; 645 slot_cache = NULL; 646 break; 647 } 648 duration = now - cache->time_in; 649 if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { 650 slot = i; 651 slot_cache = cache; 652 break; 653 } 654 count = atomic_read(&cache->count); 655 period = duration; 656 do_div(period, (count + 1)); 657 if (period > max_period) { 658 max_period = period; 659 slot = i; 660 slot_cache = cache; 661 } 662 } 663 /* new_cache must be put into array after its contents are written */ 664 smp_wmb(); 665 if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, 666 slot_cache, new_cache) == slot_cache) { 667 if (slot_cache) 668 call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); 669 } else 670 ghes_estatus_cache_free(new_cache); 671 rcu_read_unlock(); 672 } 673 674 static int ghes_proc(struct ghes *ghes) 675 { 676 int rc; 677 678 rc = ghes_read_estatus(ghes, 0); 679 if (rc) 680 goto out; 681 if (!ghes_estatus_cached(ghes->estatus)) { 682 if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) 683 ghes_estatus_cache_add(ghes->generic, ghes->estatus); 684 } 685 ghes_do_proc(ghes->estatus); 686 out: 687 ghes_clear_estatus(ghes); 688 return 0; 689 } 690 691 static void ghes_add_timer(struct ghes *ghes) 692 { 693 struct acpi_hest_generic *g = ghes->generic; 694 unsigned long expire; 695 696 if (!g->notify.poll_interval) { 697 pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", 698 g->header.source_id); 699 return; 700 } 701 expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); 702 ghes->timer.expires = round_jiffies_relative(expire); 703 add_timer(&ghes->timer); 704 } 705 706 static void ghes_poll_func(unsigned long data) 707 { 708 struct ghes *ghes = (void *)data; 709 710 ghes_proc(ghes); 711 if (!(ghes->flags & GHES_EXITING)) 712 ghes_add_timer(ghes); 713 } 714 715 static irqreturn_t ghes_irq_func(int irq, void *data) 716 { 717 struct ghes *ghes = data; 718 int rc; 719 720 rc = ghes_proc(ghes); 721 if (rc) 722 return IRQ_NONE; 723 724 return IRQ_HANDLED; 725 } 726 727 static int ghes_notify_sci(struct notifier_block *this, 728 unsigned long event, void *data) 729 { 730 struct ghes *ghes; 731 int ret = NOTIFY_DONE; 732 733 rcu_read_lock(); 734 list_for_each_entry_rcu(ghes, &ghes_sci, list) { 735 if (!ghes_proc(ghes)) 736 ret = NOTIFY_OK; 737 } 738 rcu_read_unlock(); 739 740 return ret; 741 } 742 743 static struct llist_node *llist_nodes_reverse(struct llist_node *llnode) 744 { 745 struct llist_node *next, *tail = NULL; 746 747 while (llnode) { 748 next = llnode->next; 749 llnode->next = tail; 750 tail = llnode; 751 llnode = next; 752 } 753 754 return tail; 755 } 756 757 static void ghes_proc_in_irq(struct irq_work *irq_work) 758 { 759 struct llist_node *llnode, *next; 760 struct ghes_estatus_node *estatus_node; 761 struct acpi_hest_generic *generic; 762 struct acpi_hest_generic_status *estatus; 763 u32 len, node_len; 764 765 llnode = llist_del_all(&ghes_estatus_llist); 766 /* 767 * Because the time order of estatus in list is reversed, 768 * revert it back to proper order. 769 */ 770 llnode = llist_nodes_reverse(llnode); 771 while (llnode) { 772 next = llnode->next; 773 estatus_node = llist_entry(llnode, struct ghes_estatus_node, 774 llnode); 775 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 776 len = apei_estatus_len(estatus); 777 node_len = GHES_ESTATUS_NODE_LEN(len); 778 ghes_do_proc(estatus); 779 if (!ghes_estatus_cached(estatus)) { 780 generic = estatus_node->generic; 781 if (ghes_print_estatus(NULL, generic, estatus)) 782 ghes_estatus_cache_add(generic, estatus); 783 } 784 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, 785 node_len); 786 llnode = next; 787 } 788 } 789 790 static void ghes_print_queued_estatus(void) 791 { 792 struct llist_node *llnode; 793 struct ghes_estatus_node *estatus_node; 794 struct acpi_hest_generic *generic; 795 struct acpi_hest_generic_status *estatus; 796 u32 len, node_len; 797 798 llnode = llist_del_all(&ghes_estatus_llist); 799 /* 800 * Because the time order of estatus in list is reversed, 801 * revert it back to proper order. 802 */ 803 llnode = llist_nodes_reverse(llnode); 804 while (llnode) { 805 estatus_node = llist_entry(llnode, struct ghes_estatus_node, 806 llnode); 807 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 808 len = apei_estatus_len(estatus); 809 node_len = GHES_ESTATUS_NODE_LEN(len); 810 generic = estatus_node->generic; 811 ghes_print_estatus(NULL, generic, estatus); 812 llnode = llnode->next; 813 } 814 } 815 816 static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) 817 { 818 struct ghes *ghes, *ghes_global = NULL; 819 int sev, sev_global = -1; 820 int ret = NMI_DONE; 821 822 raw_spin_lock(&ghes_nmi_lock); 823 list_for_each_entry_rcu(ghes, &ghes_nmi, list) { 824 if (ghes_read_estatus(ghes, 1)) { 825 ghes_clear_estatus(ghes); 826 continue; 827 } 828 sev = ghes_severity(ghes->estatus->error_severity); 829 if (sev > sev_global) { 830 sev_global = sev; 831 ghes_global = ghes; 832 } 833 ret = NMI_HANDLED; 834 } 835 836 if (ret == NMI_DONE) 837 goto out; 838 839 if (sev_global >= GHES_SEV_PANIC) { 840 oops_begin(); 841 ghes_print_queued_estatus(); 842 __ghes_print_estatus(KERN_EMERG, ghes_global->generic, 843 ghes_global->estatus); 844 /* reboot to log the error! */ 845 if (panic_timeout == 0) 846 panic_timeout = ghes_panic_timeout; 847 panic("Fatal hardware error!"); 848 } 849 850 list_for_each_entry_rcu(ghes, &ghes_nmi, list) { 851 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 852 u32 len, node_len; 853 struct ghes_estatus_node *estatus_node; 854 struct acpi_hest_generic_status *estatus; 855 #endif 856 if (!(ghes->flags & GHES_TO_CLEAR)) 857 continue; 858 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 859 if (ghes_estatus_cached(ghes->estatus)) 860 goto next; 861 /* Save estatus for further processing in IRQ context */ 862 len = apei_estatus_len(ghes->estatus); 863 node_len = GHES_ESTATUS_NODE_LEN(len); 864 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, 865 node_len); 866 if (estatus_node) { 867 estatus_node->generic = ghes->generic; 868 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 869 memcpy(estatus, ghes->estatus, len); 870 llist_add(&estatus_node->llnode, &ghes_estatus_llist); 871 } 872 next: 873 #endif 874 ghes_clear_estatus(ghes); 875 } 876 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 877 irq_work_queue(&ghes_proc_irq_work); 878 #endif 879 880 out: 881 raw_spin_unlock(&ghes_nmi_lock); 882 return ret; 883 } 884 885 static struct notifier_block ghes_notifier_sci = { 886 .notifier_call = ghes_notify_sci, 887 }; 888 889 static unsigned long ghes_esource_prealloc_size( 890 const struct acpi_hest_generic *generic) 891 { 892 unsigned long block_length, prealloc_records, prealloc_size; 893 894 block_length = min_t(unsigned long, generic->error_block_length, 895 GHES_ESTATUS_MAX_SIZE); 896 prealloc_records = max_t(unsigned long, 897 generic->records_to_preallocate, 1); 898 prealloc_size = min_t(unsigned long, block_length * prealloc_records, 899 GHES_ESOURCE_PREALLOC_MAX_SIZE); 900 901 return prealloc_size; 902 } 903 904 static int __devinit ghes_probe(struct platform_device *ghes_dev) 905 { 906 struct acpi_hest_generic *generic; 907 struct ghes *ghes = NULL; 908 unsigned long len; 909 int rc = -EINVAL; 910 911 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 912 if (!generic->enabled) 913 return -ENODEV; 914 915 switch (generic->notify.type) { 916 case ACPI_HEST_NOTIFY_POLLED: 917 case ACPI_HEST_NOTIFY_EXTERNAL: 918 case ACPI_HEST_NOTIFY_SCI: 919 case ACPI_HEST_NOTIFY_NMI: 920 break; 921 case ACPI_HEST_NOTIFY_LOCAL: 922 pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", 923 generic->header.source_id); 924 goto err; 925 default: 926 pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", 927 generic->notify.type, generic->header.source_id); 928 goto err; 929 } 930 931 rc = -EIO; 932 if (generic->error_block_length < 933 sizeof(struct acpi_hest_generic_status)) { 934 pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", 935 generic->error_block_length, 936 generic->header.source_id); 937 goto err; 938 } 939 ghes = ghes_new(generic); 940 if (IS_ERR(ghes)) { 941 rc = PTR_ERR(ghes); 942 ghes = NULL; 943 goto err; 944 } 945 switch (generic->notify.type) { 946 case ACPI_HEST_NOTIFY_POLLED: 947 ghes->timer.function = ghes_poll_func; 948 ghes->timer.data = (unsigned long)ghes; 949 init_timer_deferrable(&ghes->timer); 950 ghes_add_timer(ghes); 951 break; 952 case ACPI_HEST_NOTIFY_EXTERNAL: 953 /* External interrupt vector is GSI */ 954 if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) { 955 pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", 956 generic->header.source_id); 957 goto err; 958 } 959 if (request_irq(ghes->irq, ghes_irq_func, 960 0, "GHES IRQ", ghes)) { 961 pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", 962 generic->header.source_id); 963 goto err; 964 } 965 break; 966 case ACPI_HEST_NOTIFY_SCI: 967 mutex_lock(&ghes_list_mutex); 968 if (list_empty(&ghes_sci)) 969 register_acpi_hed_notifier(&ghes_notifier_sci); 970 list_add_rcu(&ghes->list, &ghes_sci); 971 mutex_unlock(&ghes_list_mutex); 972 break; 973 case ACPI_HEST_NOTIFY_NMI: 974 len = ghes_esource_prealloc_size(generic); 975 ghes_estatus_pool_expand(len); 976 mutex_lock(&ghes_list_mutex); 977 if (list_empty(&ghes_nmi)) 978 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, 979 "ghes"); 980 list_add_rcu(&ghes->list, &ghes_nmi); 981 mutex_unlock(&ghes_list_mutex); 982 break; 983 default: 984 BUG(); 985 } 986 platform_set_drvdata(ghes_dev, ghes); 987 988 return 0; 989 err: 990 if (ghes) { 991 ghes_fini(ghes); 992 kfree(ghes); 993 } 994 return rc; 995 } 996 997 static int __devexit ghes_remove(struct platform_device *ghes_dev) 998 { 999 struct ghes *ghes; 1000 struct acpi_hest_generic *generic; 1001 unsigned long len; 1002 1003 ghes = platform_get_drvdata(ghes_dev); 1004 generic = ghes->generic; 1005 1006 ghes->flags |= GHES_EXITING; 1007 switch (generic->notify.type) { 1008 case ACPI_HEST_NOTIFY_POLLED: 1009 del_timer_sync(&ghes->timer); 1010 break; 1011 case ACPI_HEST_NOTIFY_EXTERNAL: 1012 free_irq(ghes->irq, ghes); 1013 break; 1014 case ACPI_HEST_NOTIFY_SCI: 1015 mutex_lock(&ghes_list_mutex); 1016 list_del_rcu(&ghes->list); 1017 if (list_empty(&ghes_sci)) 1018 unregister_acpi_hed_notifier(&ghes_notifier_sci); 1019 mutex_unlock(&ghes_list_mutex); 1020 break; 1021 case ACPI_HEST_NOTIFY_NMI: 1022 mutex_lock(&ghes_list_mutex); 1023 list_del_rcu(&ghes->list); 1024 if (list_empty(&ghes_nmi)) 1025 unregister_nmi_handler(NMI_LOCAL, "ghes"); 1026 mutex_unlock(&ghes_list_mutex); 1027 /* 1028 * To synchronize with NMI handler, ghes can only be 1029 * freed after NMI handler finishes. 1030 */ 1031 synchronize_rcu(); 1032 len = ghes_esource_prealloc_size(generic); 1033 ghes_estatus_pool_shrink(len); 1034 break; 1035 default: 1036 BUG(); 1037 break; 1038 } 1039 1040 ghes_fini(ghes); 1041 kfree(ghes); 1042 1043 platform_set_drvdata(ghes_dev, NULL); 1044 1045 return 0; 1046 } 1047 1048 static struct platform_driver ghes_platform_driver = { 1049 .driver = { 1050 .name = "GHES", 1051 .owner = THIS_MODULE, 1052 }, 1053 .probe = ghes_probe, 1054 .remove = ghes_remove, 1055 }; 1056 1057 static int __init ghes_init(void) 1058 { 1059 int rc; 1060 1061 if (acpi_disabled) 1062 return -ENODEV; 1063 1064 if (hest_disable) { 1065 pr_info(GHES_PFX "HEST is not enabled!\n"); 1066 return -EINVAL; 1067 } 1068 1069 if (ghes_disable) { 1070 pr_info(GHES_PFX "GHES is not enabled!\n"); 1071 return -EINVAL; 1072 } 1073 1074 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); 1075 1076 rc = ghes_ioremap_init(); 1077 if (rc) 1078 goto err; 1079 1080 rc = ghes_estatus_pool_init(); 1081 if (rc) 1082 goto err_ioremap_exit; 1083 1084 rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * 1085 GHES_ESTATUS_CACHE_ALLOCED_MAX); 1086 if (rc) 1087 goto err_pool_exit; 1088 1089 rc = platform_driver_register(&ghes_platform_driver); 1090 if (rc) 1091 goto err_pool_exit; 1092 1093 rc = apei_osc_setup(); 1094 if (rc == 0 && osc_sb_apei_support_acked) 1095 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); 1096 else if (rc == 0 && !osc_sb_apei_support_acked) 1097 pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); 1098 else if (rc && osc_sb_apei_support_acked) 1099 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); 1100 else 1101 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); 1102 1103 return 0; 1104 err_pool_exit: 1105 ghes_estatus_pool_exit(); 1106 err_ioremap_exit: 1107 ghes_ioremap_exit(); 1108 err: 1109 return rc; 1110 } 1111 1112 static void __exit ghes_exit(void) 1113 { 1114 platform_driver_unregister(&ghes_platform_driver); 1115 ghes_estatus_pool_exit(); 1116 ghes_ioremap_exit(); 1117 } 1118 1119 module_init(ghes_init); 1120 module_exit(ghes_exit); 1121 1122 MODULE_AUTHOR("Huang Ying"); 1123 MODULE_DESCRIPTION("APEI Generic Hardware Error Source support"); 1124 MODULE_LICENSE("GPL"); 1125 MODULE_ALIAS("platform:GHES"); 1126