1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * APEI Generic Hardware Error Source support 4 * 5 * Generic Hardware Error Source provides a way to report platform 6 * hardware errors (such as that from chipset). It works in so called 7 * "Firmware First" mode, that is, hardware errors are reported to 8 * firmware firstly, then reported to Linux by firmware. This way, 9 * some non-standard hardware error registers or non-standard hardware 10 * link can be checked by firmware to produce more hardware error 11 * information for Linux. 12 * 13 * For more information about Generic Hardware Error Source, please 14 * refer to ACPI Specification version 4.0, section 17.3.2.6 15 * 16 * Copyright 2010,2011 Intel Corp. 17 * Author: Huang Ying <ying.huang@intel.com> 18 */ 19 20 #include <linux/arm_sdei.h> 21 #include <linux/kernel.h> 22 #include <linux/moduleparam.h> 23 #include <linux/init.h> 24 #include <linux/acpi.h> 25 #include <linux/io.h> 26 #include <linux/interrupt.h> 27 #include <linux/timer.h> 28 #include <linux/cper.h> 29 #include <linux/platform_device.h> 30 #include <linux/mutex.h> 31 #include <linux/ratelimit.h> 32 #include <linux/vmalloc.h> 33 #include <linux/irq_work.h> 34 #include <linux/llist.h> 35 #include <linux/genalloc.h> 36 #include <linux/pci.h> 37 #include <linux/pfn.h> 38 #include <linux/aer.h> 39 #include <linux/nmi.h> 40 #include <linux/sched/clock.h> 41 #include <linux/uuid.h> 42 #include <linux/ras.h> 43 #include <linux/task_work.h> 44 45 #include <acpi/actbl1.h> 46 #include <acpi/ghes.h> 47 #include <acpi/apei.h> 48 #include <asm/fixmap.h> 49 #include <asm/tlbflush.h> 50 #include <ras/ras_event.h> 51 52 #include "apei-internal.h" 53 54 #define GHES_PFX "GHES: " 55 56 #define GHES_ESTATUS_MAX_SIZE 65536 57 #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 58 59 #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 60 61 /* This is just an estimation for memory pool allocation */ 62 #define GHES_ESTATUS_CACHE_AVG_SIZE 512 63 64 #define GHES_ESTATUS_CACHES_SIZE 4 65 66 #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL 67 /* Prevent too many caches are allocated because of RCU */ 68 #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) 69 70 #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ 71 (sizeof(struct ghes_estatus_cache) + (estatus_len)) 72 #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ 73 ((struct acpi_hest_generic_status *) \ 74 ((struct ghes_estatus_cache *)(estatus_cache) + 1)) 75 76 #define GHES_ESTATUS_NODE_LEN(estatus_len) \ 77 (sizeof(struct ghes_estatus_node) + (estatus_len)) 78 #define GHES_ESTATUS_FROM_NODE(estatus_node) \ 79 ((struct acpi_hest_generic_status *) \ 80 ((struct ghes_estatus_node *)(estatus_node) + 1)) 81 82 /* 83 * NMI-like notifications vary by architecture, before the compiler can prune 84 * unused static functions it needs a value for these enums. 85 */ 86 #ifndef CONFIG_ARM_SDE_INTERFACE 87 #define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses 88 #define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses 89 #endif 90 91 static inline bool is_hest_type_generic_v2(struct ghes *ghes) 92 { 93 return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; 94 } 95 96 /* 97 * This driver isn't really modular, however for the time being, 98 * continuing to use module_param is the easiest way to remain 99 * compatible with existing boot arg use cases. 100 */ 101 bool ghes_disable; 102 module_param_named(disable, ghes_disable, bool, 0); 103 104 /* 105 * All error sources notified with HED (Hardware Error Device) share a 106 * single notifier callback, so they need to be linked and checked one 107 * by one. This holds true for NMI too. 108 * 109 * RCU is used for these lists, so ghes_list_mutex is only used for 110 * list changing, not for traversing. 111 */ 112 static LIST_HEAD(ghes_hed); 113 static DEFINE_MUTEX(ghes_list_mutex); 114 115 /* 116 * Because the memory area used to transfer hardware error information 117 * from BIOS to Linux can be determined only in NMI, IRQ or timer 118 * handler, but general ioremap can not be used in atomic context, so 119 * the fixmap is used instead. 120 * 121 * This spinlock is used to prevent the fixmap entry from being used 122 * simultaneously. 123 */ 124 static DEFINE_SPINLOCK(ghes_notify_lock_irq); 125 126 static struct gen_pool *ghes_estatus_pool; 127 static unsigned long ghes_estatus_pool_size_request; 128 129 static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; 130 static atomic_t ghes_estatus_cache_alloced; 131 132 static int ghes_panic_timeout __read_mostly = 30; 133 134 static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx) 135 { 136 phys_addr_t paddr; 137 pgprot_t prot; 138 139 paddr = PFN_PHYS(pfn); 140 prot = arch_apei_get_mem_attribute(paddr); 141 __set_fixmap(fixmap_idx, paddr, prot); 142 143 return (void __iomem *) __fix_to_virt(fixmap_idx); 144 } 145 146 static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx) 147 { 148 int _idx = virt_to_fix((unsigned long)vaddr); 149 150 WARN_ON_ONCE(fixmap_idx != _idx); 151 clear_fixmap(fixmap_idx); 152 } 153 154 int ghes_estatus_pool_init(int num_ghes) 155 { 156 unsigned long addr, len; 157 int rc; 158 159 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); 160 if (!ghes_estatus_pool) 161 return -ENOMEM; 162 163 len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX; 164 len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE); 165 166 ghes_estatus_pool_size_request = PAGE_ALIGN(len); 167 addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); 168 if (!addr) 169 goto err_pool_alloc; 170 171 rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); 172 if (rc) 173 goto err_pool_add; 174 175 return 0; 176 177 err_pool_add: 178 vfree((void *)addr); 179 180 err_pool_alloc: 181 gen_pool_destroy(ghes_estatus_pool); 182 183 return -ENOMEM; 184 } 185 186 static int map_gen_v2(struct ghes *ghes) 187 { 188 return apei_map_generic_address(&ghes->generic_v2->read_ack_register); 189 } 190 191 static void unmap_gen_v2(struct ghes *ghes) 192 { 193 apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); 194 } 195 196 static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2) 197 { 198 int rc; 199 u64 val = 0; 200 201 rc = apei_read(&val, &gv2->read_ack_register); 202 if (rc) 203 return; 204 205 val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; 206 val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; 207 208 apei_write(val, &gv2->read_ack_register); 209 } 210 211 static struct ghes *ghes_new(struct acpi_hest_generic *generic) 212 { 213 struct ghes *ghes; 214 unsigned int error_block_length; 215 int rc; 216 217 ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); 218 if (!ghes) 219 return ERR_PTR(-ENOMEM); 220 221 ghes->generic = generic; 222 if (is_hest_type_generic_v2(ghes)) { 223 rc = map_gen_v2(ghes); 224 if (rc) 225 goto err_free; 226 } 227 228 rc = apei_map_generic_address(&generic->error_status_address); 229 if (rc) 230 goto err_unmap_read_ack_addr; 231 error_block_length = generic->error_block_length; 232 if (error_block_length > GHES_ESTATUS_MAX_SIZE) { 233 pr_warn(FW_WARN GHES_PFX 234 "Error status block length is too long: %u for " 235 "generic hardware error source: %d.\n", 236 error_block_length, generic->header.source_id); 237 error_block_length = GHES_ESTATUS_MAX_SIZE; 238 } 239 ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); 240 if (!ghes->estatus) { 241 rc = -ENOMEM; 242 goto err_unmap_status_addr; 243 } 244 245 return ghes; 246 247 err_unmap_status_addr: 248 apei_unmap_generic_address(&generic->error_status_address); 249 err_unmap_read_ack_addr: 250 if (is_hest_type_generic_v2(ghes)) 251 unmap_gen_v2(ghes); 252 err_free: 253 kfree(ghes); 254 return ERR_PTR(rc); 255 } 256 257 static void ghes_fini(struct ghes *ghes) 258 { 259 kfree(ghes->estatus); 260 apei_unmap_generic_address(&ghes->generic->error_status_address); 261 if (is_hest_type_generic_v2(ghes)) 262 unmap_gen_v2(ghes); 263 } 264 265 static inline int ghes_severity(int severity) 266 { 267 switch (severity) { 268 case CPER_SEV_INFORMATIONAL: 269 return GHES_SEV_NO; 270 case CPER_SEV_CORRECTED: 271 return GHES_SEV_CORRECTED; 272 case CPER_SEV_RECOVERABLE: 273 return GHES_SEV_RECOVERABLE; 274 case CPER_SEV_FATAL: 275 return GHES_SEV_PANIC; 276 default: 277 /* Unknown, go panic */ 278 return GHES_SEV_PANIC; 279 } 280 } 281 282 static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, 283 int from_phys, 284 enum fixed_addresses fixmap_idx) 285 { 286 void __iomem *vaddr; 287 u64 offset; 288 u32 trunk; 289 290 while (len > 0) { 291 offset = paddr - (paddr & PAGE_MASK); 292 vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx); 293 trunk = PAGE_SIZE - offset; 294 trunk = min(trunk, len); 295 if (from_phys) 296 memcpy_fromio(buffer, vaddr + offset, trunk); 297 else 298 memcpy_toio(vaddr + offset, buffer, trunk); 299 len -= trunk; 300 paddr += trunk; 301 buffer += trunk; 302 ghes_unmap(vaddr, fixmap_idx); 303 } 304 } 305 306 /* Check the top-level record header has an appropriate size. */ 307 static int __ghes_check_estatus(struct ghes *ghes, 308 struct acpi_hest_generic_status *estatus) 309 { 310 u32 len = cper_estatus_len(estatus); 311 312 if (len < sizeof(*estatus)) { 313 pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n"); 314 return -EIO; 315 } 316 317 if (len > ghes->generic->error_block_length) { 318 pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n"); 319 return -EIO; 320 } 321 322 if (cper_estatus_check_header(estatus)) { 323 pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n"); 324 return -EIO; 325 } 326 327 return 0; 328 } 329 330 /* Read the CPER block, returning its address, and header in estatus. */ 331 static int __ghes_peek_estatus(struct ghes *ghes, 332 struct acpi_hest_generic_status *estatus, 333 u64 *buf_paddr, enum fixed_addresses fixmap_idx) 334 { 335 struct acpi_hest_generic *g = ghes->generic; 336 int rc; 337 338 rc = apei_read(buf_paddr, &g->error_status_address); 339 if (rc) { 340 *buf_paddr = 0; 341 pr_warn_ratelimited(FW_WARN GHES_PFX 342 "Failed to read error status block address for hardware error source: %d.\n", 343 g->header.source_id); 344 return -EIO; 345 } 346 if (!*buf_paddr) 347 return -ENOENT; 348 349 ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1, 350 fixmap_idx); 351 if (!estatus->block_status) { 352 *buf_paddr = 0; 353 return -ENOENT; 354 } 355 356 return 0; 357 } 358 359 static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus, 360 u64 buf_paddr, enum fixed_addresses fixmap_idx, 361 size_t buf_len) 362 { 363 ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx); 364 if (cper_estatus_check(estatus)) { 365 pr_warn_ratelimited(FW_WARN GHES_PFX 366 "Failed to read error status block!\n"); 367 return -EIO; 368 } 369 370 return 0; 371 } 372 373 static int ghes_read_estatus(struct ghes *ghes, 374 struct acpi_hest_generic_status *estatus, 375 u64 *buf_paddr, enum fixed_addresses fixmap_idx) 376 { 377 int rc; 378 379 rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx); 380 if (rc) 381 return rc; 382 383 rc = __ghes_check_estatus(ghes, estatus); 384 if (rc) 385 return rc; 386 387 return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx, 388 cper_estatus_len(estatus)); 389 } 390 391 static void ghes_clear_estatus(struct ghes *ghes, 392 struct acpi_hest_generic_status *estatus, 393 u64 buf_paddr, enum fixed_addresses fixmap_idx) 394 { 395 estatus->block_status = 0; 396 397 if (!buf_paddr) 398 return; 399 400 ghes_copy_tofrom_phys(estatus, buf_paddr, 401 sizeof(estatus->block_status), 0, 402 fixmap_idx); 403 404 /* 405 * GHESv2 type HEST entries introduce support for error acknowledgment, 406 * so only acknowledge the error if this support is present. 407 */ 408 if (is_hest_type_generic_v2(ghes)) 409 ghes_ack_error(ghes->generic_v2); 410 } 411 412 /* 413 * Called as task_work before returning to user-space. 414 * Ensure any queued work has been done before we return to the context that 415 * triggered the notification. 416 */ 417 static void ghes_kick_task_work(struct callback_head *head) 418 { 419 struct acpi_hest_generic_status *estatus; 420 struct ghes_estatus_node *estatus_node; 421 u32 node_len; 422 423 estatus_node = container_of(head, struct ghes_estatus_node, task_work); 424 if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) 425 memory_failure_queue_kick(estatus_node->task_work_cpu); 426 427 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 428 node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus)); 429 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); 430 } 431 432 static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, 433 int sev) 434 { 435 unsigned long pfn; 436 int flags = -1; 437 int sec_sev = ghes_severity(gdata->error_severity); 438 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); 439 440 if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) 441 return false; 442 443 if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) 444 return false; 445 446 pfn = mem_err->physical_addr >> PAGE_SHIFT; 447 if (!pfn_valid(pfn)) { 448 pr_warn_ratelimited(FW_WARN GHES_PFX 449 "Invalid address in generic error data: %#llx\n", 450 mem_err->physical_addr); 451 return false; 452 } 453 454 /* iff following two events can be handled properly by now */ 455 if (sec_sev == GHES_SEV_CORRECTED && 456 (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) 457 flags = MF_SOFT_OFFLINE; 458 if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) 459 flags = 0; 460 461 if (flags != -1) { 462 memory_failure_queue(pfn, flags); 463 return true; 464 } 465 466 return false; 467 } 468 469 /* 470 * PCIe AER errors need to be sent to the AER driver for reporting and 471 * recovery. The GHES severities map to the following AER severities and 472 * require the following handling: 473 * 474 * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE 475 * These need to be reported by the AER driver but no recovery is 476 * necessary. 477 * GHES_SEV_RECOVERABLE -> AER_NONFATAL 478 * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL 479 * These both need to be reported and recovered from by the AER driver. 480 * GHES_SEV_PANIC does not make it to this handling since the kernel must 481 * panic. 482 */ 483 static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) 484 { 485 #ifdef CONFIG_ACPI_APEI_PCIEAER 486 struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); 487 488 if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && 489 pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { 490 unsigned int devfn; 491 int aer_severity; 492 493 devfn = PCI_DEVFN(pcie_err->device_id.device, 494 pcie_err->device_id.function); 495 aer_severity = cper_severity_to_aer(gdata->error_severity); 496 497 /* 498 * If firmware reset the component to contain 499 * the error, we must reinitialize it before 500 * use, so treat it as a fatal AER error. 501 */ 502 if (gdata->flags & CPER_SEC_RESET) 503 aer_severity = AER_FATAL; 504 505 aer_recover_queue(pcie_err->device_id.segment, 506 pcie_err->device_id.bus, 507 devfn, aer_severity, 508 (struct aer_capability_regs *) 509 pcie_err->aer_info); 510 } 511 #endif 512 } 513 514 static bool ghes_do_proc(struct ghes *ghes, 515 const struct acpi_hest_generic_status *estatus) 516 { 517 int sev, sec_sev; 518 struct acpi_hest_generic_data *gdata; 519 guid_t *sec_type; 520 const guid_t *fru_id = &guid_null; 521 char *fru_text = ""; 522 bool queued = false; 523 524 sev = ghes_severity(estatus->error_severity); 525 apei_estatus_for_each_section(estatus, gdata) { 526 sec_type = (guid_t *)gdata->section_type; 527 sec_sev = ghes_severity(gdata->error_severity); 528 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) 529 fru_id = (guid_t *)gdata->fru_id; 530 531 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) 532 fru_text = gdata->fru_text; 533 534 if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { 535 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); 536 537 ghes_edac_report_mem_error(sev, mem_err); 538 539 arch_apei_report_mem_error(sev, mem_err); 540 queued = ghes_handle_memory_failure(gdata, sev); 541 } 542 else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { 543 ghes_handle_aer(gdata); 544 } 545 else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { 546 struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); 547 548 log_arm_hw_error(err); 549 } else { 550 void *err = acpi_hest_get_payload(gdata); 551 552 log_non_standard_event(sec_type, fru_id, fru_text, 553 sec_sev, err, 554 gdata->error_data_length); 555 } 556 } 557 558 return queued; 559 } 560 561 static void __ghes_print_estatus(const char *pfx, 562 const struct acpi_hest_generic *generic, 563 const struct acpi_hest_generic_status *estatus) 564 { 565 static atomic_t seqno; 566 unsigned int curr_seqno; 567 char pfx_seq[64]; 568 569 if (pfx == NULL) { 570 if (ghes_severity(estatus->error_severity) <= 571 GHES_SEV_CORRECTED) 572 pfx = KERN_WARNING; 573 else 574 pfx = KERN_ERR; 575 } 576 curr_seqno = atomic_inc_return(&seqno); 577 snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno); 578 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", 579 pfx_seq, generic->header.source_id); 580 cper_estatus_print(pfx_seq, estatus); 581 } 582 583 static int ghes_print_estatus(const char *pfx, 584 const struct acpi_hest_generic *generic, 585 const struct acpi_hest_generic_status *estatus) 586 { 587 /* Not more than 2 messages every 5 seconds */ 588 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); 589 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); 590 struct ratelimit_state *ratelimit; 591 592 if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) 593 ratelimit = &ratelimit_corrected; 594 else 595 ratelimit = &ratelimit_uncorrected; 596 if (__ratelimit(ratelimit)) { 597 __ghes_print_estatus(pfx, generic, estatus); 598 return 1; 599 } 600 return 0; 601 } 602 603 /* 604 * GHES error status reporting throttle, to report more kinds of 605 * errors, instead of just most frequently occurred errors. 606 */ 607 static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) 608 { 609 u32 len; 610 int i, cached = 0; 611 unsigned long long now; 612 struct ghes_estatus_cache *cache; 613 struct acpi_hest_generic_status *cache_estatus; 614 615 len = cper_estatus_len(estatus); 616 rcu_read_lock(); 617 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { 618 cache = rcu_dereference(ghes_estatus_caches[i]); 619 if (cache == NULL) 620 continue; 621 if (len != cache->estatus_len) 622 continue; 623 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); 624 if (memcmp(estatus, cache_estatus, len)) 625 continue; 626 atomic_inc(&cache->count); 627 now = sched_clock(); 628 if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) 629 cached = 1; 630 break; 631 } 632 rcu_read_unlock(); 633 return cached; 634 } 635 636 static struct ghes_estatus_cache *ghes_estatus_cache_alloc( 637 struct acpi_hest_generic *generic, 638 struct acpi_hest_generic_status *estatus) 639 { 640 int alloced; 641 u32 len, cache_len; 642 struct ghes_estatus_cache *cache; 643 struct acpi_hest_generic_status *cache_estatus; 644 645 alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); 646 if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { 647 atomic_dec(&ghes_estatus_cache_alloced); 648 return NULL; 649 } 650 len = cper_estatus_len(estatus); 651 cache_len = GHES_ESTATUS_CACHE_LEN(len); 652 cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); 653 if (!cache) { 654 atomic_dec(&ghes_estatus_cache_alloced); 655 return NULL; 656 } 657 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); 658 memcpy(cache_estatus, estatus, len); 659 cache->estatus_len = len; 660 atomic_set(&cache->count, 0); 661 cache->generic = generic; 662 cache->time_in = sched_clock(); 663 return cache; 664 } 665 666 static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) 667 { 668 u32 len; 669 670 len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); 671 len = GHES_ESTATUS_CACHE_LEN(len); 672 gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); 673 atomic_dec(&ghes_estatus_cache_alloced); 674 } 675 676 static void ghes_estatus_cache_rcu_free(struct rcu_head *head) 677 { 678 struct ghes_estatus_cache *cache; 679 680 cache = container_of(head, struct ghes_estatus_cache, rcu); 681 ghes_estatus_cache_free(cache); 682 } 683 684 static void ghes_estatus_cache_add( 685 struct acpi_hest_generic *generic, 686 struct acpi_hest_generic_status *estatus) 687 { 688 int i, slot = -1, count; 689 unsigned long long now, duration, period, max_period = 0; 690 struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; 691 692 new_cache = ghes_estatus_cache_alloc(generic, estatus); 693 if (new_cache == NULL) 694 return; 695 rcu_read_lock(); 696 now = sched_clock(); 697 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { 698 cache = rcu_dereference(ghes_estatus_caches[i]); 699 if (cache == NULL) { 700 slot = i; 701 slot_cache = NULL; 702 break; 703 } 704 duration = now - cache->time_in; 705 if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { 706 slot = i; 707 slot_cache = cache; 708 break; 709 } 710 count = atomic_read(&cache->count); 711 period = duration; 712 do_div(period, (count + 1)); 713 if (period > max_period) { 714 max_period = period; 715 slot = i; 716 slot_cache = cache; 717 } 718 } 719 /* new_cache must be put into array after its contents are written */ 720 smp_wmb(); 721 if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, 722 slot_cache, new_cache) == slot_cache) { 723 if (slot_cache) 724 call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); 725 } else 726 ghes_estatus_cache_free(new_cache); 727 rcu_read_unlock(); 728 } 729 730 static void __ghes_panic(struct ghes *ghes, 731 struct acpi_hest_generic_status *estatus, 732 u64 buf_paddr, enum fixed_addresses fixmap_idx) 733 { 734 __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); 735 736 ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); 737 738 /* reboot to log the error! */ 739 if (!panic_timeout) 740 panic_timeout = ghes_panic_timeout; 741 panic("Fatal hardware error!"); 742 } 743 744 static int ghes_proc(struct ghes *ghes) 745 { 746 struct acpi_hest_generic_status *estatus = ghes->estatus; 747 u64 buf_paddr; 748 int rc; 749 750 rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ); 751 if (rc) 752 goto out; 753 754 if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC) 755 __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); 756 757 if (!ghes_estatus_cached(estatus)) { 758 if (ghes_print_estatus(NULL, ghes->generic, estatus)) 759 ghes_estatus_cache_add(ghes->generic, estatus); 760 } 761 ghes_do_proc(ghes, estatus); 762 763 out: 764 ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); 765 766 return rc; 767 } 768 769 static void ghes_add_timer(struct ghes *ghes) 770 { 771 struct acpi_hest_generic *g = ghes->generic; 772 unsigned long expire; 773 774 if (!g->notify.poll_interval) { 775 pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", 776 g->header.source_id); 777 return; 778 } 779 expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); 780 ghes->timer.expires = round_jiffies_relative(expire); 781 add_timer(&ghes->timer); 782 } 783 784 static void ghes_poll_func(struct timer_list *t) 785 { 786 struct ghes *ghes = from_timer(ghes, t, timer); 787 unsigned long flags; 788 789 spin_lock_irqsave(&ghes_notify_lock_irq, flags); 790 ghes_proc(ghes); 791 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); 792 if (!(ghes->flags & GHES_EXITING)) 793 ghes_add_timer(ghes); 794 } 795 796 static irqreturn_t ghes_irq_func(int irq, void *data) 797 { 798 struct ghes *ghes = data; 799 unsigned long flags; 800 int rc; 801 802 spin_lock_irqsave(&ghes_notify_lock_irq, flags); 803 rc = ghes_proc(ghes); 804 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); 805 if (rc) 806 return IRQ_NONE; 807 808 return IRQ_HANDLED; 809 } 810 811 static int ghes_notify_hed(struct notifier_block *this, unsigned long event, 812 void *data) 813 { 814 struct ghes *ghes; 815 unsigned long flags; 816 int ret = NOTIFY_DONE; 817 818 spin_lock_irqsave(&ghes_notify_lock_irq, flags); 819 rcu_read_lock(); 820 list_for_each_entry_rcu(ghes, &ghes_hed, list) { 821 if (!ghes_proc(ghes)) 822 ret = NOTIFY_OK; 823 } 824 rcu_read_unlock(); 825 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); 826 827 return ret; 828 } 829 830 static struct notifier_block ghes_notifier_hed = { 831 .notifier_call = ghes_notify_hed, 832 }; 833 834 /* 835 * Handlers for CPER records may not be NMI safe. For example, 836 * memory_failure_queue() takes spinlocks and calls schedule_work_on(). 837 * In any NMI-like handler, memory from ghes_estatus_pool is used to save 838 * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes 839 * ghes_proc_in_irq() to run in IRQ context where each estatus in 840 * ghes_estatus_llist is processed. 841 * 842 * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache 843 * to suppress frequent messages. 844 */ 845 static struct llist_head ghes_estatus_llist; 846 static struct irq_work ghes_proc_irq_work; 847 848 static void ghes_proc_in_irq(struct irq_work *irq_work) 849 { 850 struct llist_node *llnode, *next; 851 struct ghes_estatus_node *estatus_node; 852 struct acpi_hest_generic *generic; 853 struct acpi_hest_generic_status *estatus; 854 bool task_work_pending; 855 u32 len, node_len; 856 int ret; 857 858 llnode = llist_del_all(&ghes_estatus_llist); 859 /* 860 * Because the time order of estatus in list is reversed, 861 * revert it back to proper order. 862 */ 863 llnode = llist_reverse_order(llnode); 864 while (llnode) { 865 next = llnode->next; 866 estatus_node = llist_entry(llnode, struct ghes_estatus_node, 867 llnode); 868 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 869 len = cper_estatus_len(estatus); 870 node_len = GHES_ESTATUS_NODE_LEN(len); 871 task_work_pending = ghes_do_proc(estatus_node->ghes, estatus); 872 if (!ghes_estatus_cached(estatus)) { 873 generic = estatus_node->generic; 874 if (ghes_print_estatus(NULL, generic, estatus)) 875 ghes_estatus_cache_add(generic, estatus); 876 } 877 878 if (task_work_pending && current->mm != &init_mm) { 879 estatus_node->task_work.func = ghes_kick_task_work; 880 estatus_node->task_work_cpu = smp_processor_id(); 881 ret = task_work_add(current, &estatus_node->task_work, 882 true); 883 if (ret) 884 estatus_node->task_work.func = NULL; 885 } 886 887 if (!estatus_node->task_work.func) 888 gen_pool_free(ghes_estatus_pool, 889 (unsigned long)estatus_node, node_len); 890 891 llnode = next; 892 } 893 } 894 895 static void ghes_print_queued_estatus(void) 896 { 897 struct llist_node *llnode; 898 struct ghes_estatus_node *estatus_node; 899 struct acpi_hest_generic *generic; 900 struct acpi_hest_generic_status *estatus; 901 902 llnode = llist_del_all(&ghes_estatus_llist); 903 /* 904 * Because the time order of estatus in list is reversed, 905 * revert it back to proper order. 906 */ 907 llnode = llist_reverse_order(llnode); 908 while (llnode) { 909 estatus_node = llist_entry(llnode, struct ghes_estatus_node, 910 llnode); 911 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 912 generic = estatus_node->generic; 913 ghes_print_estatus(NULL, generic, estatus); 914 llnode = llnode->next; 915 } 916 } 917 918 static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, 919 enum fixed_addresses fixmap_idx) 920 { 921 struct acpi_hest_generic_status *estatus, tmp_header; 922 struct ghes_estatus_node *estatus_node; 923 u32 len, node_len; 924 u64 buf_paddr; 925 int sev, rc; 926 927 if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) 928 return -EOPNOTSUPP; 929 930 rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx); 931 if (rc) { 932 ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); 933 return rc; 934 } 935 936 rc = __ghes_check_estatus(ghes, &tmp_header); 937 if (rc) { 938 ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); 939 return rc; 940 } 941 942 len = cper_estatus_len(&tmp_header); 943 node_len = GHES_ESTATUS_NODE_LEN(len); 944 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); 945 if (!estatus_node) 946 return -ENOMEM; 947 948 estatus_node->ghes = ghes; 949 estatus_node->generic = ghes->generic; 950 estatus_node->task_work.func = NULL; 951 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 952 953 if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { 954 ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); 955 rc = -ENOENT; 956 goto no_work; 957 } 958 959 sev = ghes_severity(estatus->error_severity); 960 if (sev >= GHES_SEV_PANIC) { 961 ghes_print_queued_estatus(); 962 __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); 963 } 964 965 ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); 966 967 /* This error has been reported before, don't process it again. */ 968 if (ghes_estatus_cached(estatus)) 969 goto no_work; 970 971 llist_add(&estatus_node->llnode, &ghes_estatus_llist); 972 973 return rc; 974 975 no_work: 976 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, 977 node_len); 978 979 return rc; 980 } 981 982 static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list, 983 enum fixed_addresses fixmap_idx) 984 { 985 int ret = -ENOENT; 986 struct ghes *ghes; 987 988 rcu_read_lock(); 989 list_for_each_entry_rcu(ghes, rcu_list, list) { 990 if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) 991 ret = 0; 992 } 993 rcu_read_unlock(); 994 995 if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret) 996 irq_work_queue(&ghes_proc_irq_work); 997 998 return ret; 999 } 1000 1001 #ifdef CONFIG_ACPI_APEI_SEA 1002 static LIST_HEAD(ghes_sea); 1003 1004 /* 1005 * Return 0 only if one of the SEA error sources successfully reported an error 1006 * record sent from the firmware. 1007 */ 1008 int ghes_notify_sea(void) 1009 { 1010 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea); 1011 int rv; 1012 1013 raw_spin_lock(&ghes_notify_lock_sea); 1014 rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA); 1015 raw_spin_unlock(&ghes_notify_lock_sea); 1016 1017 return rv; 1018 } 1019 1020 static void ghes_sea_add(struct ghes *ghes) 1021 { 1022 mutex_lock(&ghes_list_mutex); 1023 list_add_rcu(&ghes->list, &ghes_sea); 1024 mutex_unlock(&ghes_list_mutex); 1025 } 1026 1027 static void ghes_sea_remove(struct ghes *ghes) 1028 { 1029 mutex_lock(&ghes_list_mutex); 1030 list_del_rcu(&ghes->list); 1031 mutex_unlock(&ghes_list_mutex); 1032 synchronize_rcu(); 1033 } 1034 #else /* CONFIG_ACPI_APEI_SEA */ 1035 static inline void ghes_sea_add(struct ghes *ghes) { } 1036 static inline void ghes_sea_remove(struct ghes *ghes) { } 1037 #endif /* CONFIG_ACPI_APEI_SEA */ 1038 1039 #ifdef CONFIG_HAVE_ACPI_APEI_NMI 1040 /* 1041 * NMI may be triggered on any CPU, so ghes_in_nmi is used for 1042 * having only one concurrent reader. 1043 */ 1044 static atomic_t ghes_in_nmi = ATOMIC_INIT(0); 1045 1046 static LIST_HEAD(ghes_nmi); 1047 1048 static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) 1049 { 1050 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi); 1051 int ret = NMI_DONE; 1052 1053 if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) 1054 return ret; 1055 1056 raw_spin_lock(&ghes_notify_lock_nmi); 1057 if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI)) 1058 ret = NMI_HANDLED; 1059 raw_spin_unlock(&ghes_notify_lock_nmi); 1060 1061 atomic_dec(&ghes_in_nmi); 1062 return ret; 1063 } 1064 1065 static void ghes_nmi_add(struct ghes *ghes) 1066 { 1067 mutex_lock(&ghes_list_mutex); 1068 if (list_empty(&ghes_nmi)) 1069 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); 1070 list_add_rcu(&ghes->list, &ghes_nmi); 1071 mutex_unlock(&ghes_list_mutex); 1072 } 1073 1074 static void ghes_nmi_remove(struct ghes *ghes) 1075 { 1076 mutex_lock(&ghes_list_mutex); 1077 list_del_rcu(&ghes->list); 1078 if (list_empty(&ghes_nmi)) 1079 unregister_nmi_handler(NMI_LOCAL, "ghes"); 1080 mutex_unlock(&ghes_list_mutex); 1081 /* 1082 * To synchronize with NMI handler, ghes can only be 1083 * freed after NMI handler finishes. 1084 */ 1085 synchronize_rcu(); 1086 } 1087 #else /* CONFIG_HAVE_ACPI_APEI_NMI */ 1088 static inline void ghes_nmi_add(struct ghes *ghes) { } 1089 static inline void ghes_nmi_remove(struct ghes *ghes) { } 1090 #endif /* CONFIG_HAVE_ACPI_APEI_NMI */ 1091 1092 static void ghes_nmi_init_cxt(void) 1093 { 1094 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); 1095 } 1096 1097 static int __ghes_sdei_callback(struct ghes *ghes, 1098 enum fixed_addresses fixmap_idx) 1099 { 1100 if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) { 1101 irq_work_queue(&ghes_proc_irq_work); 1102 1103 return 0; 1104 } 1105 1106 return -ENOENT; 1107 } 1108 1109 static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs, 1110 void *arg) 1111 { 1112 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal); 1113 struct ghes *ghes = arg; 1114 int err; 1115 1116 raw_spin_lock(&ghes_notify_lock_sdei_normal); 1117 err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL); 1118 raw_spin_unlock(&ghes_notify_lock_sdei_normal); 1119 1120 return err; 1121 } 1122 1123 static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs, 1124 void *arg) 1125 { 1126 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical); 1127 struct ghes *ghes = arg; 1128 int err; 1129 1130 raw_spin_lock(&ghes_notify_lock_sdei_critical); 1131 err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL); 1132 raw_spin_unlock(&ghes_notify_lock_sdei_critical); 1133 1134 return err; 1135 } 1136 1137 static int apei_sdei_register_ghes(struct ghes *ghes) 1138 { 1139 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) 1140 return -EOPNOTSUPP; 1141 1142 return sdei_register_ghes(ghes, ghes_sdei_normal_callback, 1143 ghes_sdei_critical_callback); 1144 } 1145 1146 static int apei_sdei_unregister_ghes(struct ghes *ghes) 1147 { 1148 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) 1149 return -EOPNOTSUPP; 1150 1151 return sdei_unregister_ghes(ghes); 1152 } 1153 1154 static int ghes_probe(struct platform_device *ghes_dev) 1155 { 1156 struct acpi_hest_generic *generic; 1157 struct ghes *ghes = NULL; 1158 unsigned long flags; 1159 1160 int rc = -EINVAL; 1161 1162 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 1163 if (!generic->enabled) 1164 return -ENODEV; 1165 1166 switch (generic->notify.type) { 1167 case ACPI_HEST_NOTIFY_POLLED: 1168 case ACPI_HEST_NOTIFY_EXTERNAL: 1169 case ACPI_HEST_NOTIFY_SCI: 1170 case ACPI_HEST_NOTIFY_GSIV: 1171 case ACPI_HEST_NOTIFY_GPIO: 1172 break; 1173 1174 case ACPI_HEST_NOTIFY_SEA: 1175 if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { 1176 pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", 1177 generic->header.source_id); 1178 rc = -ENOTSUPP; 1179 goto err; 1180 } 1181 break; 1182 case ACPI_HEST_NOTIFY_NMI: 1183 if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { 1184 pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", 1185 generic->header.source_id); 1186 goto err; 1187 } 1188 break; 1189 case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: 1190 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) { 1191 pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n", 1192 generic->header.source_id); 1193 goto err; 1194 } 1195 break; 1196 case ACPI_HEST_NOTIFY_LOCAL: 1197 pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", 1198 generic->header.source_id); 1199 goto err; 1200 default: 1201 pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", 1202 generic->notify.type, generic->header.source_id); 1203 goto err; 1204 } 1205 1206 rc = -EIO; 1207 if (generic->error_block_length < 1208 sizeof(struct acpi_hest_generic_status)) { 1209 pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", 1210 generic->error_block_length, generic->header.source_id); 1211 goto err; 1212 } 1213 ghes = ghes_new(generic); 1214 if (IS_ERR(ghes)) { 1215 rc = PTR_ERR(ghes); 1216 ghes = NULL; 1217 goto err; 1218 } 1219 1220 switch (generic->notify.type) { 1221 case ACPI_HEST_NOTIFY_POLLED: 1222 timer_setup(&ghes->timer, ghes_poll_func, 0); 1223 ghes_add_timer(ghes); 1224 break; 1225 case ACPI_HEST_NOTIFY_EXTERNAL: 1226 /* External interrupt vector is GSI */ 1227 rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq); 1228 if (rc) { 1229 pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", 1230 generic->header.source_id); 1231 goto err; 1232 } 1233 rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED, 1234 "GHES IRQ", ghes); 1235 if (rc) { 1236 pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", 1237 generic->header.source_id); 1238 goto err; 1239 } 1240 break; 1241 1242 case ACPI_HEST_NOTIFY_SCI: 1243 case ACPI_HEST_NOTIFY_GSIV: 1244 case ACPI_HEST_NOTIFY_GPIO: 1245 mutex_lock(&ghes_list_mutex); 1246 if (list_empty(&ghes_hed)) 1247 register_acpi_hed_notifier(&ghes_notifier_hed); 1248 list_add_rcu(&ghes->list, &ghes_hed); 1249 mutex_unlock(&ghes_list_mutex); 1250 break; 1251 1252 case ACPI_HEST_NOTIFY_SEA: 1253 ghes_sea_add(ghes); 1254 break; 1255 case ACPI_HEST_NOTIFY_NMI: 1256 ghes_nmi_add(ghes); 1257 break; 1258 case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: 1259 rc = apei_sdei_register_ghes(ghes); 1260 if (rc) 1261 goto err; 1262 break; 1263 default: 1264 BUG(); 1265 } 1266 1267 platform_set_drvdata(ghes_dev, ghes); 1268 1269 ghes_edac_register(ghes, &ghes_dev->dev); 1270 1271 /* Handle any pending errors right away */ 1272 spin_lock_irqsave(&ghes_notify_lock_irq, flags); 1273 ghes_proc(ghes); 1274 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); 1275 1276 return 0; 1277 1278 err: 1279 if (ghes) { 1280 ghes_fini(ghes); 1281 kfree(ghes); 1282 } 1283 return rc; 1284 } 1285 1286 static int ghes_remove(struct platform_device *ghes_dev) 1287 { 1288 int rc; 1289 struct ghes *ghes; 1290 struct acpi_hest_generic *generic; 1291 1292 ghes = platform_get_drvdata(ghes_dev); 1293 generic = ghes->generic; 1294 1295 ghes->flags |= GHES_EXITING; 1296 switch (generic->notify.type) { 1297 case ACPI_HEST_NOTIFY_POLLED: 1298 del_timer_sync(&ghes->timer); 1299 break; 1300 case ACPI_HEST_NOTIFY_EXTERNAL: 1301 free_irq(ghes->irq, ghes); 1302 break; 1303 1304 case ACPI_HEST_NOTIFY_SCI: 1305 case ACPI_HEST_NOTIFY_GSIV: 1306 case ACPI_HEST_NOTIFY_GPIO: 1307 mutex_lock(&ghes_list_mutex); 1308 list_del_rcu(&ghes->list); 1309 if (list_empty(&ghes_hed)) 1310 unregister_acpi_hed_notifier(&ghes_notifier_hed); 1311 mutex_unlock(&ghes_list_mutex); 1312 synchronize_rcu(); 1313 break; 1314 1315 case ACPI_HEST_NOTIFY_SEA: 1316 ghes_sea_remove(ghes); 1317 break; 1318 case ACPI_HEST_NOTIFY_NMI: 1319 ghes_nmi_remove(ghes); 1320 break; 1321 case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: 1322 rc = apei_sdei_unregister_ghes(ghes); 1323 if (rc) 1324 return rc; 1325 break; 1326 default: 1327 BUG(); 1328 break; 1329 } 1330 1331 ghes_fini(ghes); 1332 1333 ghes_edac_unregister(ghes); 1334 1335 kfree(ghes); 1336 1337 platform_set_drvdata(ghes_dev, NULL); 1338 1339 return 0; 1340 } 1341 1342 static struct platform_driver ghes_platform_driver = { 1343 .driver = { 1344 .name = "GHES", 1345 }, 1346 .probe = ghes_probe, 1347 .remove = ghes_remove, 1348 }; 1349 1350 static int __init ghes_init(void) 1351 { 1352 int rc; 1353 1354 if (acpi_disabled) 1355 return -ENODEV; 1356 1357 switch (hest_disable) { 1358 case HEST_NOT_FOUND: 1359 return -ENODEV; 1360 case HEST_DISABLED: 1361 pr_info(GHES_PFX "HEST is not enabled!\n"); 1362 return -EINVAL; 1363 default: 1364 break; 1365 } 1366 1367 if (ghes_disable) { 1368 pr_info(GHES_PFX "GHES is not enabled!\n"); 1369 return -EINVAL; 1370 } 1371 1372 ghes_nmi_init_cxt(); 1373 1374 rc = platform_driver_register(&ghes_platform_driver); 1375 if (rc) 1376 goto err; 1377 1378 rc = apei_osc_setup(); 1379 if (rc == 0 && osc_sb_apei_support_acked) 1380 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); 1381 else if (rc == 0 && !osc_sb_apei_support_acked) 1382 pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); 1383 else if (rc && osc_sb_apei_support_acked) 1384 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); 1385 else 1386 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); 1387 1388 return 0; 1389 err: 1390 return rc; 1391 } 1392 device_initcall(ghes_init); 1393