1 /* 2 * APEI Generic Hardware Error Source support 3 * 4 * Generic Hardware Error Source provides a way to report platform 5 * hardware errors (such as that from chipset). It works in so called 6 * "Firmware First" mode, that is, hardware errors are reported to 7 * firmware firstly, then reported to Linux by firmware. This way, 8 * some non-standard hardware error registers or non-standard hardware 9 * link can be checked by firmware to produce more hardware error 10 * information for Linux. 11 * 12 * For more information about Generic Hardware Error Source, please 13 * refer to ACPI Specification version 4.0, section 17.3.2.6 14 * 15 * Copyright 2010,2011 Intel Corp. 16 * Author: Huang Ying <ying.huang@intel.com> 17 * 18 * This program is free software; you can redistribute it and/or 19 * modify it under the terms of the GNU General Public License version 20 * 2 as published by the Free Software Foundation; 21 * 22 * This program is distributed in the hope that it will be useful, 23 * but WITHOUT ANY WARRANTY; without even the implied warranty of 24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 25 * GNU General Public License for more details. 26 */ 27 28 #include <linux/kernel.h> 29 #include <linux/moduleparam.h> 30 #include <linux/init.h> 31 #include <linux/acpi.h> 32 #include <linux/io.h> 33 #include <linux/interrupt.h> 34 #include <linux/timer.h> 35 #include <linux/cper.h> 36 #include <linux/kdebug.h> 37 #include <linux/platform_device.h> 38 #include <linux/mutex.h> 39 #include <linux/ratelimit.h> 40 #include <linux/vmalloc.h> 41 #include <linux/irq_work.h> 42 #include <linux/llist.h> 43 #include <linux/genalloc.h> 44 #include <linux/pci.h> 45 #include <linux/aer.h> 46 #include <linux/nmi.h> 47 #include <linux/sched/clock.h> 48 49 #include <acpi/ghes.h> 50 #include <acpi/apei.h> 51 #include <asm/tlbflush.h> 52 53 #include "apei-internal.h" 54 55 #define GHES_PFX "GHES: " 56 57 #define GHES_ESTATUS_MAX_SIZE 65536 58 #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 59 60 #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 61 62 /* This is just an estimation for memory pool allocation */ 63 #define GHES_ESTATUS_CACHE_AVG_SIZE 512 64 65 #define GHES_ESTATUS_CACHES_SIZE 4 66 67 #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL 68 /* Prevent too many caches are allocated because of RCU */ 69 #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) 70 71 #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ 72 (sizeof(struct ghes_estatus_cache) + (estatus_len)) 73 #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ 74 ((struct acpi_hest_generic_status *) \ 75 ((struct ghes_estatus_cache *)(estatus_cache) + 1)) 76 77 #define GHES_ESTATUS_NODE_LEN(estatus_len) \ 78 (sizeof(struct ghes_estatus_node) + (estatus_len)) 79 #define GHES_ESTATUS_FROM_NODE(estatus_node) \ 80 ((struct acpi_hest_generic_status *) \ 81 ((struct ghes_estatus_node *)(estatus_node) + 1)) 82 83 /* 84 * This driver isn't really modular, however for the time being, 85 * continuing to use module_param is the easiest way to remain 86 * compatible with existing boot arg use cases. 87 */ 88 bool ghes_disable; 89 module_param_named(disable, ghes_disable, bool, 0); 90 91 /* 92 * All error sources notified with SCI shares one notifier function, 93 * so they need to be linked and checked one by one. This is applied 94 * to NMI too. 95 * 96 * RCU is used for these lists, so ghes_list_mutex is only used for 97 * list changing, not for traversing. 98 */ 99 static LIST_HEAD(ghes_sci); 100 static DEFINE_MUTEX(ghes_list_mutex); 101 102 /* 103 * Because the memory area used to transfer hardware error information 104 * from BIOS to Linux can be determined only in NMI, IRQ or timer 105 * handler, but general ioremap can not be used in atomic context, so 106 * a special version of atomic ioremap is implemented for that. 107 */ 108 109 /* 110 * Two virtual pages are used, one for IRQ/PROCESS context, the other for 111 * NMI context (optionally). 112 */ 113 #ifdef CONFIG_HAVE_ACPI_APEI_NMI 114 #define GHES_IOREMAP_PAGES 2 115 #else 116 #define GHES_IOREMAP_PAGES 1 117 #endif 118 #define GHES_IOREMAP_IRQ_PAGE(base) (base) 119 #define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE) 120 121 /* virtual memory area for atomic ioremap */ 122 static struct vm_struct *ghes_ioremap_area; 123 /* 124 * These 2 spinlock is used to prevent atomic ioremap virtual memory 125 * area from being mapped simultaneously. 126 */ 127 static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); 128 static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); 129 130 static struct gen_pool *ghes_estatus_pool; 131 static unsigned long ghes_estatus_pool_size_request; 132 133 static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; 134 static atomic_t ghes_estatus_cache_alloced; 135 136 static int ghes_ioremap_init(void) 137 { 138 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, 139 VM_IOREMAP, VMALLOC_START, VMALLOC_END); 140 if (!ghes_ioremap_area) { 141 pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n"); 142 return -ENOMEM; 143 } 144 145 return 0; 146 } 147 148 static void ghes_ioremap_exit(void) 149 { 150 free_vm_area(ghes_ioremap_area); 151 } 152 153 static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) 154 { 155 unsigned long vaddr; 156 157 vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr); 158 ioremap_page_range(vaddr, vaddr + PAGE_SIZE, 159 pfn << PAGE_SHIFT, PAGE_KERNEL); 160 161 return (void __iomem *)vaddr; 162 } 163 164 static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) 165 { 166 unsigned long vaddr, paddr; 167 pgprot_t prot; 168 169 vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr); 170 171 paddr = pfn << PAGE_SHIFT; 172 prot = arch_apei_get_mem_attribute(paddr); 173 174 ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); 175 176 return (void __iomem *)vaddr; 177 } 178 179 static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) 180 { 181 unsigned long vaddr = (unsigned long __force)vaddr_ptr; 182 void *base = ghes_ioremap_area->addr; 183 184 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); 185 unmap_kernel_range_noflush(vaddr, PAGE_SIZE); 186 arch_apei_flush_tlb_one(vaddr); 187 } 188 189 static void ghes_iounmap_irq(void __iomem *vaddr_ptr) 190 { 191 unsigned long vaddr = (unsigned long __force)vaddr_ptr; 192 void *base = ghes_ioremap_area->addr; 193 194 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); 195 unmap_kernel_range_noflush(vaddr, PAGE_SIZE); 196 arch_apei_flush_tlb_one(vaddr); 197 } 198 199 static int ghes_estatus_pool_init(void) 200 { 201 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); 202 if (!ghes_estatus_pool) 203 return -ENOMEM; 204 return 0; 205 } 206 207 static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, 208 struct gen_pool_chunk *chunk, 209 void *data) 210 { 211 free_page(chunk->start_addr); 212 } 213 214 static void ghes_estatus_pool_exit(void) 215 { 216 gen_pool_for_each_chunk(ghes_estatus_pool, 217 ghes_estatus_pool_free_chunk_page, NULL); 218 gen_pool_destroy(ghes_estatus_pool); 219 } 220 221 static int ghes_estatus_pool_expand(unsigned long len) 222 { 223 unsigned long i, pages, size, addr; 224 int ret; 225 226 ghes_estatus_pool_size_request += PAGE_ALIGN(len); 227 size = gen_pool_size(ghes_estatus_pool); 228 if (size >= ghes_estatus_pool_size_request) 229 return 0; 230 pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; 231 for (i = 0; i < pages; i++) { 232 addr = __get_free_page(GFP_KERNEL); 233 if (!addr) 234 return -ENOMEM; 235 ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); 236 if (ret) 237 return ret; 238 } 239 240 return 0; 241 } 242 243 static struct ghes *ghes_new(struct acpi_hest_generic *generic) 244 { 245 struct ghes *ghes; 246 unsigned int error_block_length; 247 int rc; 248 249 ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); 250 if (!ghes) 251 return ERR_PTR(-ENOMEM); 252 ghes->generic = generic; 253 rc = apei_map_generic_address(&generic->error_status_address); 254 if (rc) 255 goto err_free; 256 error_block_length = generic->error_block_length; 257 if (error_block_length > GHES_ESTATUS_MAX_SIZE) { 258 pr_warning(FW_WARN GHES_PFX 259 "Error status block length is too long: %u for " 260 "generic hardware error source: %d.\n", 261 error_block_length, generic->header.source_id); 262 error_block_length = GHES_ESTATUS_MAX_SIZE; 263 } 264 ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); 265 if (!ghes->estatus) { 266 rc = -ENOMEM; 267 goto err_unmap; 268 } 269 270 return ghes; 271 272 err_unmap: 273 apei_unmap_generic_address(&generic->error_status_address); 274 err_free: 275 kfree(ghes); 276 return ERR_PTR(rc); 277 } 278 279 static void ghes_fini(struct ghes *ghes) 280 { 281 kfree(ghes->estatus); 282 apei_unmap_generic_address(&ghes->generic->error_status_address); 283 } 284 285 static inline int ghes_severity(int severity) 286 { 287 switch (severity) { 288 case CPER_SEV_INFORMATIONAL: 289 return GHES_SEV_NO; 290 case CPER_SEV_CORRECTED: 291 return GHES_SEV_CORRECTED; 292 case CPER_SEV_RECOVERABLE: 293 return GHES_SEV_RECOVERABLE; 294 case CPER_SEV_FATAL: 295 return GHES_SEV_PANIC; 296 default: 297 /* Unknown, go panic */ 298 return GHES_SEV_PANIC; 299 } 300 } 301 302 static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, 303 int from_phys) 304 { 305 void __iomem *vaddr; 306 unsigned long flags = 0; 307 int in_nmi = in_nmi(); 308 u64 offset; 309 u32 trunk; 310 311 while (len > 0) { 312 offset = paddr - (paddr & PAGE_MASK); 313 if (in_nmi) { 314 raw_spin_lock(&ghes_ioremap_lock_nmi); 315 vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT); 316 } else { 317 spin_lock_irqsave(&ghes_ioremap_lock_irq, flags); 318 vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT); 319 } 320 trunk = PAGE_SIZE - offset; 321 trunk = min(trunk, len); 322 if (from_phys) 323 memcpy_fromio(buffer, vaddr + offset, trunk); 324 else 325 memcpy_toio(vaddr + offset, buffer, trunk); 326 len -= trunk; 327 paddr += trunk; 328 buffer += trunk; 329 if (in_nmi) { 330 ghes_iounmap_nmi(vaddr); 331 raw_spin_unlock(&ghes_ioremap_lock_nmi); 332 } else { 333 ghes_iounmap_irq(vaddr); 334 spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); 335 } 336 } 337 } 338 339 static int ghes_read_estatus(struct ghes *ghes, int silent) 340 { 341 struct acpi_hest_generic *g = ghes->generic; 342 u64 buf_paddr; 343 u32 len; 344 int rc; 345 346 rc = apei_read(&buf_paddr, &g->error_status_address); 347 if (rc) { 348 if (!silent && printk_ratelimit()) 349 pr_warning(FW_WARN GHES_PFX 350 "Failed to read error status block address for hardware error source: %d.\n", 351 g->header.source_id); 352 return -EIO; 353 } 354 if (!buf_paddr) 355 return -ENOENT; 356 357 ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, 358 sizeof(*ghes->estatus), 1); 359 if (!ghes->estatus->block_status) 360 return -ENOENT; 361 362 ghes->buffer_paddr = buf_paddr; 363 ghes->flags |= GHES_TO_CLEAR; 364 365 rc = -EIO; 366 len = cper_estatus_len(ghes->estatus); 367 if (len < sizeof(*ghes->estatus)) 368 goto err_read_block; 369 if (len > ghes->generic->error_block_length) 370 goto err_read_block; 371 if (cper_estatus_check_header(ghes->estatus)) 372 goto err_read_block; 373 ghes_copy_tofrom_phys(ghes->estatus + 1, 374 buf_paddr + sizeof(*ghes->estatus), 375 len - sizeof(*ghes->estatus), 1); 376 if (cper_estatus_check(ghes->estatus)) 377 goto err_read_block; 378 rc = 0; 379 380 err_read_block: 381 if (rc && !silent && printk_ratelimit()) 382 pr_warning(FW_WARN GHES_PFX 383 "Failed to read error status block!\n"); 384 return rc; 385 } 386 387 static void ghes_clear_estatus(struct ghes *ghes) 388 { 389 ghes->estatus->block_status = 0; 390 if (!(ghes->flags & GHES_TO_CLEAR)) 391 return; 392 ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, 393 sizeof(ghes->estatus->block_status), 0); 394 ghes->flags &= ~GHES_TO_CLEAR; 395 } 396 397 static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) 398 { 399 #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE 400 unsigned long pfn; 401 int flags = -1; 402 int sec_sev = ghes_severity(gdata->error_severity); 403 struct cper_sec_mem_err *mem_err; 404 mem_err = (struct cper_sec_mem_err *)(gdata + 1); 405 406 if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) 407 return; 408 409 pfn = mem_err->physical_addr >> PAGE_SHIFT; 410 if (!pfn_valid(pfn)) { 411 pr_warn_ratelimited(FW_WARN GHES_PFX 412 "Invalid address in generic error data: %#llx\n", 413 mem_err->physical_addr); 414 return; 415 } 416 417 /* iff following two events can be handled properly by now */ 418 if (sec_sev == GHES_SEV_CORRECTED && 419 (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) 420 flags = MF_SOFT_OFFLINE; 421 if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) 422 flags = 0; 423 424 if (flags != -1) 425 memory_failure_queue(pfn, 0, flags); 426 #endif 427 } 428 429 static void ghes_do_proc(struct ghes *ghes, 430 const struct acpi_hest_generic_status *estatus) 431 { 432 int sev, sec_sev; 433 struct acpi_hest_generic_data *gdata; 434 435 sev = ghes_severity(estatus->error_severity); 436 apei_estatus_for_each_section(estatus, gdata) { 437 sec_sev = ghes_severity(gdata->error_severity); 438 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 439 CPER_SEC_PLATFORM_MEM)) { 440 struct cper_sec_mem_err *mem_err; 441 mem_err = (struct cper_sec_mem_err *)(gdata+1); 442 ghes_edac_report_mem_error(ghes, sev, mem_err); 443 444 arch_apei_report_mem_error(sev, mem_err); 445 ghes_handle_memory_failure(gdata, sev); 446 } 447 #ifdef CONFIG_ACPI_APEI_PCIEAER 448 else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 449 CPER_SEC_PCIE)) { 450 struct cper_sec_pcie *pcie_err; 451 pcie_err = (struct cper_sec_pcie *)(gdata+1); 452 if (sev == GHES_SEV_RECOVERABLE && 453 sec_sev == GHES_SEV_RECOVERABLE && 454 pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && 455 pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { 456 unsigned int devfn; 457 int aer_severity; 458 459 devfn = PCI_DEVFN(pcie_err->device_id.device, 460 pcie_err->device_id.function); 461 aer_severity = cper_severity_to_aer(gdata->error_severity); 462 463 /* 464 * If firmware reset the component to contain 465 * the error, we must reinitialize it before 466 * use, so treat it as a fatal AER error. 467 */ 468 if (gdata->flags & CPER_SEC_RESET) 469 aer_severity = AER_FATAL; 470 471 aer_recover_queue(pcie_err->device_id.segment, 472 pcie_err->device_id.bus, 473 devfn, aer_severity, 474 (struct aer_capability_regs *) 475 pcie_err->aer_info); 476 } 477 478 } 479 #endif 480 } 481 } 482 483 static void __ghes_print_estatus(const char *pfx, 484 const struct acpi_hest_generic *generic, 485 const struct acpi_hest_generic_status *estatus) 486 { 487 static atomic_t seqno; 488 unsigned int curr_seqno; 489 char pfx_seq[64]; 490 491 if (pfx == NULL) { 492 if (ghes_severity(estatus->error_severity) <= 493 GHES_SEV_CORRECTED) 494 pfx = KERN_WARNING; 495 else 496 pfx = KERN_ERR; 497 } 498 curr_seqno = atomic_inc_return(&seqno); 499 snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno); 500 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", 501 pfx_seq, generic->header.source_id); 502 cper_estatus_print(pfx_seq, estatus); 503 } 504 505 static int ghes_print_estatus(const char *pfx, 506 const struct acpi_hest_generic *generic, 507 const struct acpi_hest_generic_status *estatus) 508 { 509 /* Not more than 2 messages every 5 seconds */ 510 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); 511 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); 512 struct ratelimit_state *ratelimit; 513 514 if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) 515 ratelimit = &ratelimit_corrected; 516 else 517 ratelimit = &ratelimit_uncorrected; 518 if (__ratelimit(ratelimit)) { 519 __ghes_print_estatus(pfx, generic, estatus); 520 return 1; 521 } 522 return 0; 523 } 524 525 /* 526 * GHES error status reporting throttle, to report more kinds of 527 * errors, instead of just most frequently occurred errors. 528 */ 529 static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) 530 { 531 u32 len; 532 int i, cached = 0; 533 unsigned long long now; 534 struct ghes_estatus_cache *cache; 535 struct acpi_hest_generic_status *cache_estatus; 536 537 len = cper_estatus_len(estatus); 538 rcu_read_lock(); 539 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { 540 cache = rcu_dereference(ghes_estatus_caches[i]); 541 if (cache == NULL) 542 continue; 543 if (len != cache->estatus_len) 544 continue; 545 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); 546 if (memcmp(estatus, cache_estatus, len)) 547 continue; 548 atomic_inc(&cache->count); 549 now = sched_clock(); 550 if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) 551 cached = 1; 552 break; 553 } 554 rcu_read_unlock(); 555 return cached; 556 } 557 558 static struct ghes_estatus_cache *ghes_estatus_cache_alloc( 559 struct acpi_hest_generic *generic, 560 struct acpi_hest_generic_status *estatus) 561 { 562 int alloced; 563 u32 len, cache_len; 564 struct ghes_estatus_cache *cache; 565 struct acpi_hest_generic_status *cache_estatus; 566 567 alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); 568 if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { 569 atomic_dec(&ghes_estatus_cache_alloced); 570 return NULL; 571 } 572 len = cper_estatus_len(estatus); 573 cache_len = GHES_ESTATUS_CACHE_LEN(len); 574 cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); 575 if (!cache) { 576 atomic_dec(&ghes_estatus_cache_alloced); 577 return NULL; 578 } 579 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); 580 memcpy(cache_estatus, estatus, len); 581 cache->estatus_len = len; 582 atomic_set(&cache->count, 0); 583 cache->generic = generic; 584 cache->time_in = sched_clock(); 585 return cache; 586 } 587 588 static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) 589 { 590 u32 len; 591 592 len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); 593 len = GHES_ESTATUS_CACHE_LEN(len); 594 gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); 595 atomic_dec(&ghes_estatus_cache_alloced); 596 } 597 598 static void ghes_estatus_cache_rcu_free(struct rcu_head *head) 599 { 600 struct ghes_estatus_cache *cache; 601 602 cache = container_of(head, struct ghes_estatus_cache, rcu); 603 ghes_estatus_cache_free(cache); 604 } 605 606 static void ghes_estatus_cache_add( 607 struct acpi_hest_generic *generic, 608 struct acpi_hest_generic_status *estatus) 609 { 610 int i, slot = -1, count; 611 unsigned long long now, duration, period, max_period = 0; 612 struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; 613 614 new_cache = ghes_estatus_cache_alloc(generic, estatus); 615 if (new_cache == NULL) 616 return; 617 rcu_read_lock(); 618 now = sched_clock(); 619 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { 620 cache = rcu_dereference(ghes_estatus_caches[i]); 621 if (cache == NULL) { 622 slot = i; 623 slot_cache = NULL; 624 break; 625 } 626 duration = now - cache->time_in; 627 if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { 628 slot = i; 629 slot_cache = cache; 630 break; 631 } 632 count = atomic_read(&cache->count); 633 period = duration; 634 do_div(period, (count + 1)); 635 if (period > max_period) { 636 max_period = period; 637 slot = i; 638 slot_cache = cache; 639 } 640 } 641 /* new_cache must be put into array after its contents are written */ 642 smp_wmb(); 643 if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, 644 slot_cache, new_cache) == slot_cache) { 645 if (slot_cache) 646 call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); 647 } else 648 ghes_estatus_cache_free(new_cache); 649 rcu_read_unlock(); 650 } 651 652 static int ghes_proc(struct ghes *ghes) 653 { 654 int rc; 655 656 rc = ghes_read_estatus(ghes, 0); 657 if (rc) 658 goto out; 659 if (!ghes_estatus_cached(ghes->estatus)) { 660 if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) 661 ghes_estatus_cache_add(ghes->generic, ghes->estatus); 662 } 663 ghes_do_proc(ghes, ghes->estatus); 664 out: 665 ghes_clear_estatus(ghes); 666 return rc; 667 } 668 669 static void ghes_add_timer(struct ghes *ghes) 670 { 671 struct acpi_hest_generic *g = ghes->generic; 672 unsigned long expire; 673 674 if (!g->notify.poll_interval) { 675 pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", 676 g->header.source_id); 677 return; 678 } 679 expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); 680 ghes->timer.expires = round_jiffies_relative(expire); 681 add_timer(&ghes->timer); 682 } 683 684 static void ghes_poll_func(unsigned long data) 685 { 686 struct ghes *ghes = (void *)data; 687 688 ghes_proc(ghes); 689 if (!(ghes->flags & GHES_EXITING)) 690 ghes_add_timer(ghes); 691 } 692 693 static irqreturn_t ghes_irq_func(int irq, void *data) 694 { 695 struct ghes *ghes = data; 696 int rc; 697 698 rc = ghes_proc(ghes); 699 if (rc) 700 return IRQ_NONE; 701 702 return IRQ_HANDLED; 703 } 704 705 static int ghes_notify_sci(struct notifier_block *this, 706 unsigned long event, void *data) 707 { 708 struct ghes *ghes; 709 int ret = NOTIFY_DONE; 710 711 rcu_read_lock(); 712 list_for_each_entry_rcu(ghes, &ghes_sci, list) { 713 if (!ghes_proc(ghes)) 714 ret = NOTIFY_OK; 715 } 716 rcu_read_unlock(); 717 718 return ret; 719 } 720 721 static struct notifier_block ghes_notifier_sci = { 722 .notifier_call = ghes_notify_sci, 723 }; 724 725 #ifdef CONFIG_HAVE_ACPI_APEI_NMI 726 /* 727 * printk is not safe in NMI context. So in NMI handler, we allocate 728 * required memory from lock-less memory allocator 729 * (ghes_estatus_pool), save estatus into it, put them into lock-less 730 * list (ghes_estatus_llist), then delay printk into IRQ context via 731 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record 732 * required pool size by all NMI error source. 733 */ 734 static struct llist_head ghes_estatus_llist; 735 static struct irq_work ghes_proc_irq_work; 736 737 /* 738 * NMI may be triggered on any CPU, so ghes_in_nmi is used for 739 * having only one concurrent reader. 740 */ 741 static atomic_t ghes_in_nmi = ATOMIC_INIT(0); 742 743 static LIST_HEAD(ghes_nmi); 744 745 static int ghes_panic_timeout __read_mostly = 30; 746 747 static void ghes_proc_in_irq(struct irq_work *irq_work) 748 { 749 struct llist_node *llnode, *next; 750 struct ghes_estatus_node *estatus_node; 751 struct acpi_hest_generic *generic; 752 struct acpi_hest_generic_status *estatus; 753 u32 len, node_len; 754 755 llnode = llist_del_all(&ghes_estatus_llist); 756 /* 757 * Because the time order of estatus in list is reversed, 758 * revert it back to proper order. 759 */ 760 llnode = llist_reverse_order(llnode); 761 while (llnode) { 762 next = llnode->next; 763 estatus_node = llist_entry(llnode, struct ghes_estatus_node, 764 llnode); 765 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 766 len = cper_estatus_len(estatus); 767 node_len = GHES_ESTATUS_NODE_LEN(len); 768 ghes_do_proc(estatus_node->ghes, estatus); 769 if (!ghes_estatus_cached(estatus)) { 770 generic = estatus_node->generic; 771 if (ghes_print_estatus(NULL, generic, estatus)) 772 ghes_estatus_cache_add(generic, estatus); 773 } 774 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, 775 node_len); 776 llnode = next; 777 } 778 } 779 780 static void ghes_print_queued_estatus(void) 781 { 782 struct llist_node *llnode; 783 struct ghes_estatus_node *estatus_node; 784 struct acpi_hest_generic *generic; 785 struct acpi_hest_generic_status *estatus; 786 u32 len, node_len; 787 788 llnode = llist_del_all(&ghes_estatus_llist); 789 /* 790 * Because the time order of estatus in list is reversed, 791 * revert it back to proper order. 792 */ 793 llnode = llist_reverse_order(llnode); 794 while (llnode) { 795 estatus_node = llist_entry(llnode, struct ghes_estatus_node, 796 llnode); 797 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 798 len = cper_estatus_len(estatus); 799 node_len = GHES_ESTATUS_NODE_LEN(len); 800 generic = estatus_node->generic; 801 ghes_print_estatus(NULL, generic, estatus); 802 llnode = llnode->next; 803 } 804 } 805 806 /* Save estatus for further processing in IRQ context */ 807 static void __process_error(struct ghes *ghes) 808 { 809 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 810 u32 len, node_len; 811 struct ghes_estatus_node *estatus_node; 812 struct acpi_hest_generic_status *estatus; 813 814 if (ghes_estatus_cached(ghes->estatus)) 815 return; 816 817 len = cper_estatus_len(ghes->estatus); 818 node_len = GHES_ESTATUS_NODE_LEN(len); 819 820 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); 821 if (!estatus_node) 822 return; 823 824 estatus_node->ghes = ghes; 825 estatus_node->generic = ghes->generic; 826 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 827 memcpy(estatus, ghes->estatus, len); 828 llist_add(&estatus_node->llnode, &ghes_estatus_llist); 829 #endif 830 } 831 832 static void __ghes_panic(struct ghes *ghes) 833 { 834 oops_begin(); 835 ghes_print_queued_estatus(); 836 __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); 837 838 /* reboot to log the error! */ 839 if (panic_timeout == 0) 840 panic_timeout = ghes_panic_timeout; 841 panic("Fatal hardware error!"); 842 } 843 844 static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) 845 { 846 struct ghes *ghes; 847 int sev, ret = NMI_DONE; 848 849 if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) 850 return ret; 851 852 list_for_each_entry_rcu(ghes, &ghes_nmi, list) { 853 if (ghes_read_estatus(ghes, 1)) { 854 ghes_clear_estatus(ghes); 855 continue; 856 } else { 857 ret = NMI_HANDLED; 858 } 859 860 sev = ghes_severity(ghes->estatus->error_severity); 861 if (sev >= GHES_SEV_PANIC) 862 __ghes_panic(ghes); 863 864 if (!(ghes->flags & GHES_TO_CLEAR)) 865 continue; 866 867 __process_error(ghes); 868 ghes_clear_estatus(ghes); 869 } 870 871 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 872 if (ret == NMI_HANDLED) 873 irq_work_queue(&ghes_proc_irq_work); 874 #endif 875 atomic_dec(&ghes_in_nmi); 876 return ret; 877 } 878 879 static unsigned long ghes_esource_prealloc_size( 880 const struct acpi_hest_generic *generic) 881 { 882 unsigned long block_length, prealloc_records, prealloc_size; 883 884 block_length = min_t(unsigned long, generic->error_block_length, 885 GHES_ESTATUS_MAX_SIZE); 886 prealloc_records = max_t(unsigned long, 887 generic->records_to_preallocate, 1); 888 prealloc_size = min_t(unsigned long, block_length * prealloc_records, 889 GHES_ESOURCE_PREALLOC_MAX_SIZE); 890 891 return prealloc_size; 892 } 893 894 static void ghes_estatus_pool_shrink(unsigned long len) 895 { 896 ghes_estatus_pool_size_request -= PAGE_ALIGN(len); 897 } 898 899 static void ghes_nmi_add(struct ghes *ghes) 900 { 901 unsigned long len; 902 903 len = ghes_esource_prealloc_size(ghes->generic); 904 ghes_estatus_pool_expand(len); 905 mutex_lock(&ghes_list_mutex); 906 if (list_empty(&ghes_nmi)) 907 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); 908 list_add_rcu(&ghes->list, &ghes_nmi); 909 mutex_unlock(&ghes_list_mutex); 910 } 911 912 static void ghes_nmi_remove(struct ghes *ghes) 913 { 914 unsigned long len; 915 916 mutex_lock(&ghes_list_mutex); 917 list_del_rcu(&ghes->list); 918 if (list_empty(&ghes_nmi)) 919 unregister_nmi_handler(NMI_LOCAL, "ghes"); 920 mutex_unlock(&ghes_list_mutex); 921 /* 922 * To synchronize with NMI handler, ghes can only be 923 * freed after NMI handler finishes. 924 */ 925 synchronize_rcu(); 926 len = ghes_esource_prealloc_size(ghes->generic); 927 ghes_estatus_pool_shrink(len); 928 } 929 930 static void ghes_nmi_init_cxt(void) 931 { 932 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); 933 } 934 #else /* CONFIG_HAVE_ACPI_APEI_NMI */ 935 static inline void ghes_nmi_add(struct ghes *ghes) 936 { 937 pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n", 938 ghes->generic->header.source_id); 939 BUG(); 940 } 941 942 static inline void ghes_nmi_remove(struct ghes *ghes) 943 { 944 pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n", 945 ghes->generic->header.source_id); 946 BUG(); 947 } 948 949 static inline void ghes_nmi_init_cxt(void) 950 { 951 } 952 #endif /* CONFIG_HAVE_ACPI_APEI_NMI */ 953 954 static int ghes_probe(struct platform_device *ghes_dev) 955 { 956 struct acpi_hest_generic *generic; 957 struct ghes *ghes = NULL; 958 959 int rc = -EINVAL; 960 961 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 962 if (!generic->enabled) 963 return -ENODEV; 964 965 switch (generic->notify.type) { 966 case ACPI_HEST_NOTIFY_POLLED: 967 case ACPI_HEST_NOTIFY_EXTERNAL: 968 case ACPI_HEST_NOTIFY_SCI: 969 break; 970 case ACPI_HEST_NOTIFY_NMI: 971 if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { 972 pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", 973 generic->header.source_id); 974 goto err; 975 } 976 break; 977 case ACPI_HEST_NOTIFY_LOCAL: 978 pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", 979 generic->header.source_id); 980 goto err; 981 default: 982 pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", 983 generic->notify.type, generic->header.source_id); 984 goto err; 985 } 986 987 rc = -EIO; 988 if (generic->error_block_length < 989 sizeof(struct acpi_hest_generic_status)) { 990 pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", 991 generic->error_block_length, 992 generic->header.source_id); 993 goto err; 994 } 995 ghes = ghes_new(generic); 996 if (IS_ERR(ghes)) { 997 rc = PTR_ERR(ghes); 998 ghes = NULL; 999 goto err; 1000 } 1001 1002 rc = ghes_edac_register(ghes, &ghes_dev->dev); 1003 if (rc < 0) 1004 goto err; 1005 1006 switch (generic->notify.type) { 1007 case ACPI_HEST_NOTIFY_POLLED: 1008 ghes->timer.function = ghes_poll_func; 1009 ghes->timer.data = (unsigned long)ghes; 1010 init_timer_deferrable(&ghes->timer); 1011 ghes_add_timer(ghes); 1012 break; 1013 case ACPI_HEST_NOTIFY_EXTERNAL: 1014 /* External interrupt vector is GSI */ 1015 rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq); 1016 if (rc) { 1017 pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", 1018 generic->header.source_id); 1019 goto err_edac_unreg; 1020 } 1021 rc = request_irq(ghes->irq, ghes_irq_func, 0, "GHES IRQ", ghes); 1022 if (rc) { 1023 pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", 1024 generic->header.source_id); 1025 goto err_edac_unreg; 1026 } 1027 break; 1028 case ACPI_HEST_NOTIFY_SCI: 1029 mutex_lock(&ghes_list_mutex); 1030 if (list_empty(&ghes_sci)) 1031 register_acpi_hed_notifier(&ghes_notifier_sci); 1032 list_add_rcu(&ghes->list, &ghes_sci); 1033 mutex_unlock(&ghes_list_mutex); 1034 break; 1035 case ACPI_HEST_NOTIFY_NMI: 1036 ghes_nmi_add(ghes); 1037 break; 1038 default: 1039 BUG(); 1040 } 1041 platform_set_drvdata(ghes_dev, ghes); 1042 1043 return 0; 1044 err_edac_unreg: 1045 ghes_edac_unregister(ghes); 1046 err: 1047 if (ghes) { 1048 ghes_fini(ghes); 1049 kfree(ghes); 1050 } 1051 return rc; 1052 } 1053 1054 static int ghes_remove(struct platform_device *ghes_dev) 1055 { 1056 struct ghes *ghes; 1057 struct acpi_hest_generic *generic; 1058 1059 ghes = platform_get_drvdata(ghes_dev); 1060 generic = ghes->generic; 1061 1062 ghes->flags |= GHES_EXITING; 1063 switch (generic->notify.type) { 1064 case ACPI_HEST_NOTIFY_POLLED: 1065 del_timer_sync(&ghes->timer); 1066 break; 1067 case ACPI_HEST_NOTIFY_EXTERNAL: 1068 free_irq(ghes->irq, ghes); 1069 break; 1070 case ACPI_HEST_NOTIFY_SCI: 1071 mutex_lock(&ghes_list_mutex); 1072 list_del_rcu(&ghes->list); 1073 if (list_empty(&ghes_sci)) 1074 unregister_acpi_hed_notifier(&ghes_notifier_sci); 1075 mutex_unlock(&ghes_list_mutex); 1076 break; 1077 case ACPI_HEST_NOTIFY_NMI: 1078 ghes_nmi_remove(ghes); 1079 break; 1080 default: 1081 BUG(); 1082 break; 1083 } 1084 1085 ghes_fini(ghes); 1086 1087 ghes_edac_unregister(ghes); 1088 1089 kfree(ghes); 1090 1091 platform_set_drvdata(ghes_dev, NULL); 1092 1093 return 0; 1094 } 1095 1096 static struct platform_driver ghes_platform_driver = { 1097 .driver = { 1098 .name = "GHES", 1099 }, 1100 .probe = ghes_probe, 1101 .remove = ghes_remove, 1102 }; 1103 1104 static int __init ghes_init(void) 1105 { 1106 int rc; 1107 1108 if (acpi_disabled) 1109 return -ENODEV; 1110 1111 if (hest_disable) { 1112 pr_info(GHES_PFX "HEST is not enabled!\n"); 1113 return -EINVAL; 1114 } 1115 1116 if (ghes_disable) { 1117 pr_info(GHES_PFX "GHES is not enabled!\n"); 1118 return -EINVAL; 1119 } 1120 1121 ghes_nmi_init_cxt(); 1122 1123 rc = ghes_ioremap_init(); 1124 if (rc) 1125 goto err; 1126 1127 rc = ghes_estatus_pool_init(); 1128 if (rc) 1129 goto err_ioremap_exit; 1130 1131 rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * 1132 GHES_ESTATUS_CACHE_ALLOCED_MAX); 1133 if (rc) 1134 goto err_pool_exit; 1135 1136 rc = platform_driver_register(&ghes_platform_driver); 1137 if (rc) 1138 goto err_pool_exit; 1139 1140 rc = apei_osc_setup(); 1141 if (rc == 0 && osc_sb_apei_support_acked) 1142 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); 1143 else if (rc == 0 && !osc_sb_apei_support_acked) 1144 pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); 1145 else if (rc && osc_sb_apei_support_acked) 1146 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); 1147 else 1148 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); 1149 1150 return 0; 1151 err_pool_exit: 1152 ghes_estatus_pool_exit(); 1153 err_ioremap_exit: 1154 ghes_ioremap_exit(); 1155 err: 1156 return rc; 1157 } 1158 device_initcall(ghes_init); 1159