1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Firmware Assisted dump: A robust mechanism to get reliable kernel crash 4 * dump with assistance from firmware. This approach does not use kexec, 5 * instead firmware assists in booting the kdump kernel while preserving 6 * memory contents. The most of the code implementation has been adapted 7 * from phyp assisted dump implementation written by Linas Vepstas and 8 * Manish Ahuja 9 * 10 * Copyright 2011 IBM Corporation 11 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 12 */ 13 14 #undef DEBUG 15 #define pr_fmt(fmt) "fadump: " fmt 16 17 #include <linux/string.h> 18 #include <linux/memblock.h> 19 #include <linux/delay.h> 20 #include <linux/seq_file.h> 21 #include <linux/crash_dump.h> 22 #include <linux/kobject.h> 23 #include <linux/sysfs.h> 24 #include <linux/slab.h> 25 #include <linux/cma.h> 26 #include <linux/hugetlb.h> 27 #include <linux/debugfs.h> 28 #include <linux/of.h> 29 #include <linux/of_fdt.h> 30 31 #include <asm/page.h> 32 #include <asm/fadump.h> 33 #include <asm/fadump-internal.h> 34 #include <asm/setup.h> 35 #include <asm/interrupt.h> 36 37 /* 38 * The CPU who acquired the lock to trigger the fadump crash should 39 * wait for other CPUs to enter. 40 * 41 * The timeout is in milliseconds. 42 */ 43 #define CRASH_TIMEOUT 500 44 45 static struct fw_dump fw_dump; 46 47 static void __init fadump_reserve_crash_area(u64 base); 48 49 #ifndef CONFIG_PRESERVE_FA_DUMP 50 51 static struct kobject *fadump_kobj; 52 53 static atomic_t cpus_in_fadump; 54 static DEFINE_MUTEX(fadump_mutex); 55 56 static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; 57 58 #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ 59 #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ 60 sizeof(struct fadump_memory_range)) 61 static struct fadump_memory_range rngs[RESERVED_RNGS_CNT]; 62 static struct fadump_mrange_info 63 reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true }; 64 65 static void __init early_init_dt_scan_reserved_ranges(unsigned long node); 66 67 #ifdef CONFIG_CMA 68 static struct cma *fadump_cma; 69 70 /* 71 * fadump_cma_init() - Initialize CMA area from a fadump reserved memory 72 * 73 * This function initializes CMA area from fadump reserved memory. 74 * The total size of fadump reserved memory covers for boot memory size 75 * + cpu data size + hpte size and metadata. 76 * Initialize only the area equivalent to boot memory size for CMA use. 77 * The remaining portion of fadump reserved memory will be not given 78 * to CMA and pages for those will stay reserved. boot memory size is 79 * aligned per CMA requirement to satisy cma_init_reserved_mem() call. 80 * But for some reason even if it fails we still have the memory reservation 81 * with us and we can still continue doing fadump. 82 */ 83 void __init fadump_cma_init(void) 84 { 85 unsigned long long base, size; 86 int rc; 87 88 if (!fw_dump.fadump_supported || !fw_dump.fadump_enabled || 89 fw_dump.dump_active) 90 return; 91 /* 92 * Do not use CMA if user has provided fadump=nocma kernel parameter. 93 */ 94 if (fw_dump.nocma || !fw_dump.boot_memory_size) 95 return; 96 97 base = fw_dump.reserve_dump_area_start; 98 size = fw_dump.boot_memory_size; 99 100 rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma); 101 if (rc) { 102 pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc); 103 /* 104 * Though the CMA init has failed we still have memory 105 * reservation with us. The reserved memory will be 106 * blocked from production system usage. Hence return 1, 107 * so that we can continue with fadump. 108 */ 109 return; 110 } 111 112 /* 113 * If CMA activation fails, keep the pages reserved, instead of 114 * exposing them to buddy allocator. Same as 'fadump=nocma' case. 115 */ 116 cma_reserve_pages_on_error(fadump_cma); 117 118 /* 119 * So we now have successfully initialized cma area for fadump. 120 */ 121 pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx " 122 "bytes of memory reserved for firmware-assisted dump\n", 123 cma_get_size(fadump_cma), 124 (unsigned long)cma_get_base(fadump_cma) >> 20, 125 fw_dump.reserve_dump_area_size); 126 } 127 #endif /* CONFIG_CMA */ 128 129 /* Scan the Firmware Assisted dump configuration details. */ 130 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, 131 int depth, void *data) 132 { 133 if (depth == 0) { 134 early_init_dt_scan_reserved_ranges(node); 135 return 0; 136 } 137 138 if (depth != 1) 139 return 0; 140 141 if (strcmp(uname, "rtas") == 0) { 142 rtas_fadump_dt_scan(&fw_dump, node); 143 return 1; 144 } 145 146 if (strcmp(uname, "ibm,opal") == 0) { 147 opal_fadump_dt_scan(&fw_dump, node); 148 return 1; 149 } 150 151 return 0; 152 } 153 154 /* 155 * If fadump is registered, check if the memory provided 156 * falls within boot memory area and reserved memory area. 157 */ 158 int is_fadump_memory_area(u64 addr, unsigned long size) 159 { 160 u64 d_start, d_end; 161 162 if (!fw_dump.dump_registered) 163 return 0; 164 165 if (!size) 166 return 0; 167 168 d_start = fw_dump.reserve_dump_area_start; 169 d_end = d_start + fw_dump.reserve_dump_area_size; 170 if (((addr + size) > d_start) && (addr <= d_end)) 171 return 1; 172 173 return (addr <= fw_dump.boot_mem_top); 174 } 175 176 int should_fadump_crash(void) 177 { 178 if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) 179 return 0; 180 return 1; 181 } 182 183 int is_fadump_active(void) 184 { 185 return fw_dump.dump_active; 186 } 187 188 /* 189 * Returns true, if there are no holes in memory area between d_start to d_end, 190 * false otherwise. 191 */ 192 static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) 193 { 194 phys_addr_t reg_start, reg_end; 195 bool ret = false; 196 u64 i, start, end; 197 198 for_each_mem_range(i, ®_start, ®_end) { 199 start = max_t(u64, d_start, reg_start); 200 end = min_t(u64, d_end, reg_end); 201 if (d_start < end) { 202 /* Memory hole from d_start to start */ 203 if (start > d_start) 204 break; 205 206 if (end == d_end) { 207 ret = true; 208 break; 209 } 210 211 d_start = end + 1; 212 } 213 } 214 215 return ret; 216 } 217 218 /* 219 * Returns true, if there are no holes in boot memory area, 220 * false otherwise. 221 */ 222 bool is_fadump_boot_mem_contiguous(void) 223 { 224 unsigned long d_start, d_end; 225 bool ret = false; 226 int i; 227 228 for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { 229 d_start = fw_dump.boot_mem_addr[i]; 230 d_end = d_start + fw_dump.boot_mem_sz[i]; 231 232 ret = is_fadump_mem_area_contiguous(d_start, d_end); 233 if (!ret) 234 break; 235 } 236 237 return ret; 238 } 239 240 /* 241 * Returns true, if there are no holes in reserved memory area, 242 * false otherwise. 243 */ 244 bool is_fadump_reserved_mem_contiguous(void) 245 { 246 u64 d_start, d_end; 247 248 d_start = fw_dump.reserve_dump_area_start; 249 d_end = d_start + fw_dump.reserve_dump_area_size; 250 return is_fadump_mem_area_contiguous(d_start, d_end); 251 } 252 253 /* Print firmware assisted dump configurations for debugging purpose. */ 254 static void __init fadump_show_config(void) 255 { 256 int i; 257 258 pr_debug("Support for firmware-assisted dump (fadump): %s\n", 259 (fw_dump.fadump_supported ? "present" : "no support")); 260 261 if (!fw_dump.fadump_supported) 262 return; 263 264 pr_debug("Fadump enabled : %s\n", 265 (fw_dump.fadump_enabled ? "yes" : "no")); 266 pr_debug("Dump Active : %s\n", 267 (fw_dump.dump_active ? "yes" : "no")); 268 pr_debug("Dump section sizes:\n"); 269 pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); 270 pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); 271 pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size); 272 pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top); 273 pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt); 274 for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { 275 pr_debug("[%03d] base = %llx, size = %llx\n", i, 276 fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]); 277 } 278 } 279 280 /** 281 * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM 282 * 283 * Function to find the largest memory size we need to reserve during early 284 * boot process. This will be the size of the memory that is required for a 285 * kernel to boot successfully. 286 * 287 * This function has been taken from phyp-assisted dump feature implementation. 288 * 289 * returns larger of 256MB or 5% rounded down to multiples of 256MB. 290 * 291 * TODO: Come up with better approach to find out more accurate memory size 292 * that is required for a kernel to boot successfully. 293 * 294 */ 295 static __init u64 fadump_calculate_reserve_size(void) 296 { 297 u64 base, size, bootmem_min; 298 int ret; 299 300 if (fw_dump.reserve_bootvar) 301 pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n"); 302 303 /* 304 * Check if the size is specified through crashkernel= cmdline 305 * option. If yes, then use that but ignore base as fadump reserves 306 * memory at a predefined offset. 307 */ 308 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 309 &size, &base); 310 if (ret == 0 && size > 0) { 311 unsigned long max_size; 312 313 if (fw_dump.reserve_bootvar) 314 pr_info("Using 'crashkernel=' parameter for memory reservation.\n"); 315 316 fw_dump.reserve_bootvar = (unsigned long)size; 317 318 /* 319 * Adjust if the boot memory size specified is above 320 * the upper limit. 321 */ 322 max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO; 323 if (fw_dump.reserve_bootvar > max_size) { 324 fw_dump.reserve_bootvar = max_size; 325 pr_info("Adjusted boot memory size to %luMB\n", 326 (fw_dump.reserve_bootvar >> 20)); 327 } 328 329 return fw_dump.reserve_bootvar; 330 } else if (fw_dump.reserve_bootvar) { 331 /* 332 * 'fadump_reserve_mem=' is being used to reserve memory 333 * for firmware-assisted dump. 334 */ 335 return fw_dump.reserve_bootvar; 336 } 337 338 /* divide by 20 to get 5% of value */ 339 size = memblock_phys_mem_size() / 20; 340 341 /* round it down in multiples of 256 */ 342 size = size & ~0x0FFFFFFFUL; 343 344 /* Truncate to memory_limit. We don't want to over reserve the memory.*/ 345 if (memory_limit && size > memory_limit) 346 size = memory_limit; 347 348 bootmem_min = fw_dump.ops->fadump_get_bootmem_min(); 349 return (size > bootmem_min ? size : bootmem_min); 350 } 351 352 /* 353 * Calculate the total memory size required to be reserved for 354 * firmware-assisted dump registration. 355 */ 356 static unsigned long __init get_fadump_area_size(void) 357 { 358 unsigned long size = 0; 359 360 size += fw_dump.cpu_state_data_size; 361 size += fw_dump.hpte_region_size; 362 /* 363 * Account for pagesize alignment of boot memory area destination address. 364 * This faciliates in mmap reading of first kernel's memory. 365 */ 366 size = PAGE_ALIGN(size); 367 size += fw_dump.boot_memory_size; 368 size += sizeof(struct fadump_crash_info_header); 369 size += sizeof(struct elfhdr); /* ELF core header.*/ 370 size += sizeof(struct elf_phdr); /* place holder for cpu notes */ 371 /* Program headers for crash memory regions. */ 372 size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); 373 374 size = PAGE_ALIGN(size); 375 376 /* This is to hold kernel metadata on platforms that support it */ 377 size += (fw_dump.ops->fadump_get_metadata_size ? 378 fw_dump.ops->fadump_get_metadata_size() : 0); 379 return size; 380 } 381 382 static int __init add_boot_mem_region(unsigned long rstart, 383 unsigned long rsize) 384 { 385 int i = fw_dump.boot_mem_regs_cnt++; 386 387 if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) { 388 fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS; 389 return 0; 390 } 391 392 pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n", 393 i, rstart, (rstart + rsize)); 394 fw_dump.boot_mem_addr[i] = rstart; 395 fw_dump.boot_mem_sz[i] = rsize; 396 return 1; 397 } 398 399 /* 400 * Firmware usually has a hard limit on the data it can copy per region. 401 * Honour that by splitting a memory range into multiple regions. 402 */ 403 static int __init add_boot_mem_regions(unsigned long mstart, 404 unsigned long msize) 405 { 406 unsigned long rstart, rsize, max_size; 407 int ret = 1; 408 409 rstart = mstart; 410 max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize; 411 while (msize) { 412 if (msize > max_size) 413 rsize = max_size; 414 else 415 rsize = msize; 416 417 ret = add_boot_mem_region(rstart, rsize); 418 if (!ret) 419 break; 420 421 msize -= rsize; 422 rstart += rsize; 423 } 424 425 return ret; 426 } 427 428 static int __init fadump_get_boot_mem_regions(void) 429 { 430 unsigned long size, cur_size, hole_size, last_end; 431 unsigned long mem_size = fw_dump.boot_memory_size; 432 phys_addr_t reg_start, reg_end; 433 int ret = 1; 434 u64 i; 435 436 fw_dump.boot_mem_regs_cnt = 0; 437 438 last_end = 0; 439 hole_size = 0; 440 cur_size = 0; 441 for_each_mem_range(i, ®_start, ®_end) { 442 size = reg_end - reg_start; 443 hole_size += (reg_start - last_end); 444 445 if ((cur_size + size) >= mem_size) { 446 size = (mem_size - cur_size); 447 ret = add_boot_mem_regions(reg_start, size); 448 break; 449 } 450 451 mem_size -= size; 452 cur_size += size; 453 ret = add_boot_mem_regions(reg_start, size); 454 if (!ret) 455 break; 456 457 last_end = reg_end; 458 } 459 fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size); 460 461 return ret; 462 } 463 464 /* 465 * Returns true, if the given range overlaps with reserved memory ranges 466 * starting at idx. Also, updates idx to index of overlapping memory range 467 * with the given memory range. 468 * False, otherwise. 469 */ 470 static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx) 471 { 472 bool ret = false; 473 int i; 474 475 for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) { 476 u64 rbase = reserved_mrange_info.mem_ranges[i].base; 477 u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size; 478 479 if (end <= rbase) 480 break; 481 482 if ((end > rbase) && (base < rend)) { 483 *idx = i; 484 ret = true; 485 break; 486 } 487 } 488 489 return ret; 490 } 491 492 /* 493 * Locate a suitable memory area to reserve memory for FADump. While at it, 494 * lookup reserved-ranges & avoid overlap with them, as they are used by F/W. 495 */ 496 static u64 __init fadump_locate_reserve_mem(u64 base, u64 size) 497 { 498 struct fadump_memory_range *mrngs; 499 phys_addr_t mstart, mend; 500 int idx = 0; 501 u64 i, ret = 0; 502 503 mrngs = reserved_mrange_info.mem_ranges; 504 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, 505 &mstart, &mend, NULL) { 506 pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n", 507 i, mstart, mend, base); 508 509 if (mstart > base) 510 base = PAGE_ALIGN(mstart); 511 512 while ((mend > base) && ((mend - base) >= size)) { 513 if (!overlaps_reserved_ranges(base, base+size, &idx)) { 514 ret = base; 515 goto out; 516 } 517 518 base = mrngs[idx].base + mrngs[idx].size; 519 base = PAGE_ALIGN(base); 520 } 521 } 522 523 out: 524 return ret; 525 } 526 527 int __init fadump_reserve_mem(void) 528 { 529 u64 base, size, mem_boundary, bootmem_min; 530 int ret = 1; 531 532 if (!fw_dump.fadump_enabled) 533 return 0; 534 535 if (!fw_dump.fadump_supported) { 536 pr_info("Firmware-Assisted Dump is not supported on this hardware\n"); 537 goto error_out; 538 } 539 540 /* 541 * Initialize boot memory size 542 * If dump is active then we have already calculated the size during 543 * first kernel. 544 */ 545 if (!fw_dump.dump_active) { 546 fw_dump.boot_memory_size = 547 PAGE_ALIGN(fadump_calculate_reserve_size()); 548 #ifdef CONFIG_CMA 549 if (!fw_dump.nocma) { 550 fw_dump.boot_memory_size = 551 ALIGN(fw_dump.boot_memory_size, 552 CMA_MIN_ALIGNMENT_BYTES); 553 } 554 #endif 555 556 bootmem_min = fw_dump.ops->fadump_get_bootmem_min(); 557 if (fw_dump.boot_memory_size < bootmem_min) { 558 pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n", 559 fw_dump.boot_memory_size, bootmem_min); 560 goto error_out; 561 } 562 563 if (!fadump_get_boot_mem_regions()) { 564 pr_err("Too many holes in boot memory area to enable fadump\n"); 565 goto error_out; 566 } 567 } 568 569 /* 570 * Calculate the memory boundary. 571 * If memory_limit is less than actual memory boundary then reserve 572 * the memory for fadump beyond the memory_limit and adjust the 573 * memory_limit accordingly, so that the running kernel can run with 574 * specified memory_limit. 575 */ 576 if (memory_limit && memory_limit < memblock_end_of_DRAM()) { 577 size = get_fadump_area_size(); 578 if ((memory_limit + size) < memblock_end_of_DRAM()) 579 memory_limit += size; 580 else 581 memory_limit = memblock_end_of_DRAM(); 582 printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" 583 " dump, now %#016llx\n", memory_limit); 584 } 585 if (memory_limit) 586 mem_boundary = memory_limit; 587 else 588 mem_boundary = memblock_end_of_DRAM(); 589 590 base = fw_dump.boot_mem_top; 591 size = get_fadump_area_size(); 592 fw_dump.reserve_dump_area_size = size; 593 if (fw_dump.dump_active) { 594 pr_info("Firmware-assisted dump is active.\n"); 595 596 #ifdef CONFIG_HUGETLB_PAGE 597 /* 598 * FADump capture kernel doesn't care much about hugepages. 599 * In fact, handling hugepages in capture kernel is asking for 600 * trouble. So, disable HugeTLB support when fadump is active. 601 */ 602 hugetlb_disabled = true; 603 #endif 604 /* 605 * If last boot has crashed then reserve all the memory 606 * above boot memory size so that we don't touch it until 607 * dump is written to disk by userspace tool. This memory 608 * can be released for general use by invalidating fadump. 609 */ 610 fadump_reserve_crash_area(base); 611 612 pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr); 613 pr_debug("Reserve dump area start address: 0x%lx\n", 614 fw_dump.reserve_dump_area_start); 615 } else { 616 /* 617 * Reserve memory at an offset closer to bottom of the RAM to 618 * minimize the impact of memory hot-remove operation. 619 */ 620 base = fadump_locate_reserve_mem(base, size); 621 622 if (!base || (base + size > mem_boundary)) { 623 pr_err("Failed to find memory chunk for reservation!\n"); 624 goto error_out; 625 } 626 fw_dump.reserve_dump_area_start = base; 627 628 /* 629 * Calculate the kernel metadata address and register it with 630 * f/w if the platform supports. 631 */ 632 if (fw_dump.ops->fadump_setup_metadata && 633 (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0)) 634 goto error_out; 635 636 if (memblock_reserve(base, size)) { 637 pr_err("Failed to reserve memory!\n"); 638 goto error_out; 639 } 640 641 pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n", 642 (size >> 20), base, (memblock_phys_mem_size() >> 20)); 643 } 644 645 return ret; 646 error_out: 647 fw_dump.fadump_enabled = 0; 648 fw_dump.reserve_dump_area_size = 0; 649 return 0; 650 } 651 652 /* Look for fadump= cmdline option. */ 653 static int __init early_fadump_param(char *p) 654 { 655 if (!p) 656 return 1; 657 658 if (strncmp(p, "on", 2) == 0) 659 fw_dump.fadump_enabled = 1; 660 else if (strncmp(p, "off", 3) == 0) 661 fw_dump.fadump_enabled = 0; 662 else if (strncmp(p, "nocma", 5) == 0) { 663 fw_dump.fadump_enabled = 1; 664 fw_dump.nocma = 1; 665 } 666 667 return 0; 668 } 669 early_param("fadump", early_fadump_param); 670 671 /* 672 * Look for fadump_reserve_mem= cmdline option 673 * TODO: Remove references to 'fadump_reserve_mem=' parameter, 674 * the sooner 'crashkernel=' parameter is accustomed to. 675 */ 676 static int __init early_fadump_reserve_mem(char *p) 677 { 678 if (p) 679 fw_dump.reserve_bootvar = memparse(p, &p); 680 return 0; 681 } 682 early_param("fadump_reserve_mem", early_fadump_reserve_mem); 683 684 void crash_fadump(struct pt_regs *regs, const char *str) 685 { 686 unsigned int msecs; 687 struct fadump_crash_info_header *fdh = NULL; 688 int old_cpu, this_cpu; 689 /* Do not include first CPU */ 690 unsigned int ncpus = num_online_cpus() - 1; 691 692 if (!should_fadump_crash()) 693 return; 694 695 /* 696 * old_cpu == -1 means this is the first CPU which has come here, 697 * go ahead and trigger fadump. 698 * 699 * old_cpu != -1 means some other CPU has already on it's way 700 * to trigger fadump, just keep looping here. 701 */ 702 this_cpu = smp_processor_id(); 703 old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); 704 705 if (old_cpu != -1) { 706 atomic_inc(&cpus_in_fadump); 707 708 /* 709 * We can't loop here indefinitely. Wait as long as fadump 710 * is in force. If we race with fadump un-registration this 711 * loop will break and then we go down to normal panic path 712 * and reboot. If fadump is in force the first crashing 713 * cpu will definitely trigger fadump. 714 */ 715 while (fw_dump.dump_registered) 716 cpu_relax(); 717 return; 718 } 719 720 fdh = __va(fw_dump.fadumphdr_addr); 721 fdh->crashing_cpu = crashing_cpu; 722 crash_save_vmcoreinfo(); 723 724 if (regs) 725 fdh->regs = *regs; 726 else 727 ppc_save_regs(&fdh->regs); 728 729 fdh->cpu_mask = *cpu_online_mask; 730 731 /* 732 * If we came in via system reset, wait a while for the secondary 733 * CPUs to enter. 734 */ 735 if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) { 736 msecs = CRASH_TIMEOUT; 737 while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) 738 mdelay(1); 739 } 740 741 fw_dump.ops->fadump_trigger(fdh, str); 742 } 743 744 u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) 745 { 746 struct elf_prstatus prstatus; 747 748 memset(&prstatus, 0, sizeof(prstatus)); 749 /* 750 * FIXME: How do i get PID? Do I really need it? 751 * prstatus.pr_pid = ???? 752 */ 753 elf_core_copy_regs(&prstatus.pr_reg, regs); 754 buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, 755 &prstatus, sizeof(prstatus)); 756 return buf; 757 } 758 759 void __init fadump_update_elfcore_header(char *bufp) 760 { 761 struct elf_phdr *phdr; 762 763 bufp += sizeof(struct elfhdr); 764 765 /* First note is a place holder for cpu notes info. */ 766 phdr = (struct elf_phdr *)bufp; 767 768 if (phdr->p_type == PT_NOTE) { 769 phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr); 770 phdr->p_offset = phdr->p_paddr; 771 phdr->p_filesz = fw_dump.cpu_notes_buf_size; 772 phdr->p_memsz = fw_dump.cpu_notes_buf_size; 773 } 774 return; 775 } 776 777 static void *__init fadump_alloc_buffer(unsigned long size) 778 { 779 unsigned long count, i; 780 struct page *page; 781 void *vaddr; 782 783 vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); 784 if (!vaddr) 785 return NULL; 786 787 count = PAGE_ALIGN(size) / PAGE_SIZE; 788 page = virt_to_page(vaddr); 789 for (i = 0; i < count; i++) 790 mark_page_reserved(page + i); 791 return vaddr; 792 } 793 794 static void fadump_free_buffer(unsigned long vaddr, unsigned long size) 795 { 796 free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL); 797 } 798 799 s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus) 800 { 801 /* Allocate buffer to hold cpu crash notes. */ 802 fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); 803 fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); 804 fw_dump.cpu_notes_buf_vaddr = 805 (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size); 806 if (!fw_dump.cpu_notes_buf_vaddr) { 807 pr_err("Failed to allocate %ld bytes for CPU notes buffer\n", 808 fw_dump.cpu_notes_buf_size); 809 return -ENOMEM; 810 } 811 812 pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n", 813 fw_dump.cpu_notes_buf_size, 814 fw_dump.cpu_notes_buf_vaddr); 815 return 0; 816 } 817 818 void fadump_free_cpu_notes_buf(void) 819 { 820 if (!fw_dump.cpu_notes_buf_vaddr) 821 return; 822 823 fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr, 824 fw_dump.cpu_notes_buf_size); 825 fw_dump.cpu_notes_buf_vaddr = 0; 826 fw_dump.cpu_notes_buf_size = 0; 827 } 828 829 static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info) 830 { 831 if (mrange_info->is_static) { 832 mrange_info->mem_range_cnt = 0; 833 return; 834 } 835 836 kfree(mrange_info->mem_ranges); 837 memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0, 838 (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ)); 839 } 840 841 /* 842 * Allocate or reallocate mem_ranges array in incremental units 843 * of PAGE_SIZE. 844 */ 845 static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info) 846 { 847 struct fadump_memory_range *new_array; 848 u64 new_size; 849 850 new_size = mrange_info->mem_ranges_sz + PAGE_SIZE; 851 pr_debug("Allocating %llu bytes of memory for %s memory ranges\n", 852 new_size, mrange_info->name); 853 854 new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL); 855 if (new_array == NULL) { 856 pr_err("Insufficient memory for setting up %s memory ranges\n", 857 mrange_info->name); 858 fadump_free_mem_ranges(mrange_info); 859 return -ENOMEM; 860 } 861 862 mrange_info->mem_ranges = new_array; 863 mrange_info->mem_ranges_sz = new_size; 864 mrange_info->max_mem_ranges = (new_size / 865 sizeof(struct fadump_memory_range)); 866 return 0; 867 } 868 static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, 869 u64 base, u64 end) 870 { 871 struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges; 872 bool is_adjacent = false; 873 u64 start, size; 874 875 if (base == end) 876 return 0; 877 878 /* 879 * Fold adjacent memory ranges to bring down the memory ranges/ 880 * PT_LOAD segments count. 881 */ 882 if (mrange_info->mem_range_cnt) { 883 start = mem_ranges[mrange_info->mem_range_cnt - 1].base; 884 size = mem_ranges[mrange_info->mem_range_cnt - 1].size; 885 886 /* 887 * Boot memory area needs separate PT_LOAD segment(s) as it 888 * is moved to a different location at the time of crash. 889 * So, fold only if the region is not boot memory area. 890 */ 891 if ((start + size) == base && start >= fw_dump.boot_mem_top) 892 is_adjacent = true; 893 } 894 if (!is_adjacent) { 895 /* resize the array on reaching the limit */ 896 if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) { 897 int ret; 898 899 if (mrange_info->is_static) { 900 pr_err("Reached array size limit for %s memory ranges\n", 901 mrange_info->name); 902 return -ENOSPC; 903 } 904 905 ret = fadump_alloc_mem_ranges(mrange_info); 906 if (ret) 907 return ret; 908 909 /* Update to the new resized array */ 910 mem_ranges = mrange_info->mem_ranges; 911 } 912 913 start = base; 914 mem_ranges[mrange_info->mem_range_cnt].base = start; 915 mrange_info->mem_range_cnt++; 916 } 917 918 mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start); 919 pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", 920 mrange_info->name, (mrange_info->mem_range_cnt - 1), 921 start, end - 1, (end - start)); 922 return 0; 923 } 924 925 static int fadump_exclude_reserved_area(u64 start, u64 end) 926 { 927 u64 ra_start, ra_end; 928 int ret = 0; 929 930 ra_start = fw_dump.reserve_dump_area_start; 931 ra_end = ra_start + fw_dump.reserve_dump_area_size; 932 933 if ((ra_start < end) && (ra_end > start)) { 934 if ((start < ra_start) && (end > ra_end)) { 935 ret = fadump_add_mem_range(&crash_mrange_info, 936 start, ra_start); 937 if (ret) 938 return ret; 939 940 ret = fadump_add_mem_range(&crash_mrange_info, 941 ra_end, end); 942 } else if (start < ra_start) { 943 ret = fadump_add_mem_range(&crash_mrange_info, 944 start, ra_start); 945 } else if (ra_end < end) { 946 ret = fadump_add_mem_range(&crash_mrange_info, 947 ra_end, end); 948 } 949 } else 950 ret = fadump_add_mem_range(&crash_mrange_info, start, end); 951 952 return ret; 953 } 954 955 static int fadump_init_elfcore_header(char *bufp) 956 { 957 struct elfhdr *elf; 958 959 elf = (struct elfhdr *) bufp; 960 bufp += sizeof(struct elfhdr); 961 memcpy(elf->e_ident, ELFMAG, SELFMAG); 962 elf->e_ident[EI_CLASS] = ELF_CLASS; 963 elf->e_ident[EI_DATA] = ELF_DATA; 964 elf->e_ident[EI_VERSION] = EV_CURRENT; 965 elf->e_ident[EI_OSABI] = ELF_OSABI; 966 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); 967 elf->e_type = ET_CORE; 968 elf->e_machine = ELF_ARCH; 969 elf->e_version = EV_CURRENT; 970 elf->e_entry = 0; 971 elf->e_phoff = sizeof(struct elfhdr); 972 elf->e_shoff = 0; 973 974 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) 975 elf->e_flags = 2; 976 else if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) 977 elf->e_flags = 1; 978 else 979 elf->e_flags = 0; 980 981 elf->e_ehsize = sizeof(struct elfhdr); 982 elf->e_phentsize = sizeof(struct elf_phdr); 983 elf->e_phnum = 0; 984 elf->e_shentsize = 0; 985 elf->e_shnum = 0; 986 elf->e_shstrndx = 0; 987 988 return 0; 989 } 990 991 /* 992 * Traverse through memblock structure and setup crash memory ranges. These 993 * ranges will be used create PT_LOAD program headers in elfcore header. 994 */ 995 static int fadump_setup_crash_memory_ranges(void) 996 { 997 u64 i, start, end; 998 int ret; 999 1000 pr_debug("Setup crash memory ranges.\n"); 1001 crash_mrange_info.mem_range_cnt = 0; 1002 1003 /* 1004 * Boot memory region(s) registered with firmware are moved to 1005 * different location at the time of crash. Create separate program 1006 * header(s) for this memory chunk(s) with the correct offset. 1007 */ 1008 for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { 1009 start = fw_dump.boot_mem_addr[i]; 1010 end = start + fw_dump.boot_mem_sz[i]; 1011 ret = fadump_add_mem_range(&crash_mrange_info, start, end); 1012 if (ret) 1013 return ret; 1014 } 1015 1016 for_each_mem_range(i, &start, &end) { 1017 /* 1018 * skip the memory chunk that is already added 1019 * (0 through boot_memory_top). 1020 */ 1021 if (start < fw_dump.boot_mem_top) { 1022 if (end > fw_dump.boot_mem_top) 1023 start = fw_dump.boot_mem_top; 1024 else 1025 continue; 1026 } 1027 1028 /* add this range excluding the reserved dump area. */ 1029 ret = fadump_exclude_reserved_area(start, end); 1030 if (ret) 1031 return ret; 1032 } 1033 1034 return 0; 1035 } 1036 1037 /* 1038 * If the given physical address falls within the boot memory region then 1039 * return the relocated address that points to the dump region reserved 1040 * for saving initial boot memory contents. 1041 */ 1042 static inline unsigned long fadump_relocate(unsigned long paddr) 1043 { 1044 unsigned long raddr, rstart, rend, rlast, hole_size; 1045 int i; 1046 1047 hole_size = 0; 1048 rlast = 0; 1049 raddr = paddr; 1050 for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { 1051 rstart = fw_dump.boot_mem_addr[i]; 1052 rend = rstart + fw_dump.boot_mem_sz[i]; 1053 hole_size += (rstart - rlast); 1054 1055 if (paddr >= rstart && paddr < rend) { 1056 raddr += fw_dump.boot_mem_dest_addr - hole_size; 1057 break; 1058 } 1059 1060 rlast = rend; 1061 } 1062 1063 pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr); 1064 return raddr; 1065 } 1066 1067 static int fadump_create_elfcore_headers(char *bufp) 1068 { 1069 unsigned long long raddr, offset; 1070 struct elf_phdr *phdr; 1071 struct elfhdr *elf; 1072 int i, j; 1073 1074 fadump_init_elfcore_header(bufp); 1075 elf = (struct elfhdr *)bufp; 1076 bufp += sizeof(struct elfhdr); 1077 1078 /* 1079 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info 1080 * will be populated during second kernel boot after crash. Hence 1081 * this PT_NOTE will always be the first elf note. 1082 * 1083 * NOTE: Any new ELF note addition should be placed after this note. 1084 */ 1085 phdr = (struct elf_phdr *)bufp; 1086 bufp += sizeof(struct elf_phdr); 1087 phdr->p_type = PT_NOTE; 1088 phdr->p_flags = 0; 1089 phdr->p_vaddr = 0; 1090 phdr->p_align = 0; 1091 1092 phdr->p_offset = 0; 1093 phdr->p_paddr = 0; 1094 phdr->p_filesz = 0; 1095 phdr->p_memsz = 0; 1096 1097 (elf->e_phnum)++; 1098 1099 /* setup ELF PT_NOTE for vmcoreinfo */ 1100 phdr = (struct elf_phdr *)bufp; 1101 bufp += sizeof(struct elf_phdr); 1102 phdr->p_type = PT_NOTE; 1103 phdr->p_flags = 0; 1104 phdr->p_vaddr = 0; 1105 phdr->p_align = 0; 1106 1107 phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); 1108 phdr->p_offset = phdr->p_paddr; 1109 phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; 1110 1111 /* Increment number of program headers. */ 1112 (elf->e_phnum)++; 1113 1114 /* setup PT_LOAD sections. */ 1115 j = 0; 1116 offset = 0; 1117 raddr = fw_dump.boot_mem_addr[0]; 1118 for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) { 1119 u64 mbase, msize; 1120 1121 mbase = crash_mrange_info.mem_ranges[i].base; 1122 msize = crash_mrange_info.mem_ranges[i].size; 1123 if (!msize) 1124 continue; 1125 1126 phdr = (struct elf_phdr *)bufp; 1127 bufp += sizeof(struct elf_phdr); 1128 phdr->p_type = PT_LOAD; 1129 phdr->p_flags = PF_R|PF_W|PF_X; 1130 phdr->p_offset = mbase; 1131 1132 if (mbase == raddr) { 1133 /* 1134 * The entire real memory region will be moved by 1135 * firmware to the specified destination_address. 1136 * Hence set the correct offset. 1137 */ 1138 phdr->p_offset = fw_dump.boot_mem_dest_addr + offset; 1139 if (j < (fw_dump.boot_mem_regs_cnt - 1)) { 1140 offset += fw_dump.boot_mem_sz[j]; 1141 raddr = fw_dump.boot_mem_addr[++j]; 1142 } 1143 } 1144 1145 phdr->p_paddr = mbase; 1146 phdr->p_vaddr = (unsigned long)__va(mbase); 1147 phdr->p_filesz = msize; 1148 phdr->p_memsz = msize; 1149 phdr->p_align = 0; 1150 1151 /* Increment number of program headers. */ 1152 (elf->e_phnum)++; 1153 } 1154 return 0; 1155 } 1156 1157 static unsigned long init_fadump_header(unsigned long addr) 1158 { 1159 struct fadump_crash_info_header *fdh; 1160 1161 if (!addr) 1162 return 0; 1163 1164 fdh = __va(addr); 1165 addr += sizeof(struct fadump_crash_info_header); 1166 1167 memset(fdh, 0, sizeof(struct fadump_crash_info_header)); 1168 fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; 1169 fdh->elfcorehdr_addr = addr; 1170 /* We will set the crashing cpu id in crash_fadump() during crash. */ 1171 fdh->crashing_cpu = FADUMP_CPU_UNKNOWN; 1172 /* 1173 * When LPAR is terminated by PYHP, ensure all possible CPUs' 1174 * register data is processed while exporting the vmcore. 1175 */ 1176 fdh->cpu_mask = *cpu_possible_mask; 1177 1178 return addr; 1179 } 1180 1181 static int register_fadump(void) 1182 { 1183 unsigned long addr; 1184 void *vaddr; 1185 int ret; 1186 1187 /* 1188 * If no memory is reserved then we can not register for firmware- 1189 * assisted dump. 1190 */ 1191 if (!fw_dump.reserve_dump_area_size) 1192 return -ENODEV; 1193 1194 ret = fadump_setup_crash_memory_ranges(); 1195 if (ret) 1196 return ret; 1197 1198 addr = fw_dump.fadumphdr_addr; 1199 1200 /* Initialize fadump crash info header. */ 1201 addr = init_fadump_header(addr); 1202 vaddr = __va(addr); 1203 1204 pr_debug("Creating ELF core headers at %#016lx\n", addr); 1205 fadump_create_elfcore_headers(vaddr); 1206 1207 /* register the future kernel dump with firmware. */ 1208 pr_debug("Registering for firmware-assisted kernel dump...\n"); 1209 return fw_dump.ops->fadump_register(&fw_dump); 1210 } 1211 1212 void fadump_cleanup(void) 1213 { 1214 if (!fw_dump.fadump_supported) 1215 return; 1216 1217 /* Invalidate the registration only if dump is active. */ 1218 if (fw_dump.dump_active) { 1219 pr_debug("Invalidating firmware-assisted dump registration\n"); 1220 fw_dump.ops->fadump_invalidate(&fw_dump); 1221 } else if (fw_dump.dump_registered) { 1222 /* Un-register Firmware-assisted dump if it was registered. */ 1223 fw_dump.ops->fadump_unregister(&fw_dump); 1224 fadump_free_mem_ranges(&crash_mrange_info); 1225 } 1226 1227 if (fw_dump.ops->fadump_cleanup) 1228 fw_dump.ops->fadump_cleanup(&fw_dump); 1229 } 1230 1231 static void fadump_free_reserved_memory(unsigned long start_pfn, 1232 unsigned long end_pfn) 1233 { 1234 unsigned long pfn; 1235 unsigned long time_limit = jiffies + HZ; 1236 1237 pr_info("freeing reserved memory (0x%llx - 0x%llx)\n", 1238 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); 1239 1240 for (pfn = start_pfn; pfn < end_pfn; pfn++) { 1241 free_reserved_page(pfn_to_page(pfn)); 1242 1243 if (time_after(jiffies, time_limit)) { 1244 cond_resched(); 1245 time_limit = jiffies + HZ; 1246 } 1247 } 1248 } 1249 1250 /* 1251 * Skip memory holes and free memory that was actually reserved. 1252 */ 1253 static void fadump_release_reserved_area(u64 start, u64 end) 1254 { 1255 unsigned long reg_spfn, reg_epfn; 1256 u64 tstart, tend, spfn, epfn; 1257 int i; 1258 1259 spfn = PHYS_PFN(start); 1260 epfn = PHYS_PFN(end); 1261 1262 for_each_mem_pfn_range(i, MAX_NUMNODES, ®_spfn, ®_epfn, NULL) { 1263 tstart = max_t(u64, spfn, reg_spfn); 1264 tend = min_t(u64, epfn, reg_epfn); 1265 1266 if (tstart < tend) { 1267 fadump_free_reserved_memory(tstart, tend); 1268 1269 if (tend == epfn) 1270 break; 1271 1272 spfn = tend; 1273 } 1274 } 1275 } 1276 1277 /* 1278 * Sort the mem ranges in-place and merge adjacent ranges 1279 * to minimize the memory ranges count. 1280 */ 1281 static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info) 1282 { 1283 struct fadump_memory_range *mem_ranges; 1284 u64 base, size; 1285 int i, j, idx; 1286 1287 if (!reserved_mrange_info.mem_range_cnt) 1288 return; 1289 1290 /* Sort the memory ranges */ 1291 mem_ranges = mrange_info->mem_ranges; 1292 for (i = 0; i < mrange_info->mem_range_cnt; i++) { 1293 idx = i; 1294 for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) { 1295 if (mem_ranges[idx].base > mem_ranges[j].base) 1296 idx = j; 1297 } 1298 if (idx != i) 1299 swap(mem_ranges[idx], mem_ranges[i]); 1300 } 1301 1302 /* Merge adjacent reserved ranges */ 1303 idx = 0; 1304 for (i = 1; i < mrange_info->mem_range_cnt; i++) { 1305 base = mem_ranges[i-1].base; 1306 size = mem_ranges[i-1].size; 1307 if (mem_ranges[i].base == (base + size)) 1308 mem_ranges[idx].size += mem_ranges[i].size; 1309 else { 1310 idx++; 1311 if (i == idx) 1312 continue; 1313 1314 mem_ranges[idx] = mem_ranges[i]; 1315 } 1316 } 1317 mrange_info->mem_range_cnt = idx + 1; 1318 } 1319 1320 /* 1321 * Scan reserved-ranges to consider them while reserving/releasing 1322 * memory for FADump. 1323 */ 1324 static void __init early_init_dt_scan_reserved_ranges(unsigned long node) 1325 { 1326 const __be32 *prop; 1327 int len, ret = -1; 1328 unsigned long i; 1329 1330 /* reserved-ranges already scanned */ 1331 if (reserved_mrange_info.mem_range_cnt != 0) 1332 return; 1333 1334 prop = of_get_flat_dt_prop(node, "reserved-ranges", &len); 1335 if (!prop) 1336 return; 1337 1338 /* 1339 * Each reserved range is an (address,size) pair, 2 cells each, 1340 * totalling 4 cells per range. 1341 */ 1342 for (i = 0; i < len / (sizeof(*prop) * 4); i++) { 1343 u64 base, size; 1344 1345 base = of_read_number(prop + (i * 4) + 0, 2); 1346 size = of_read_number(prop + (i * 4) + 2, 2); 1347 1348 if (size) { 1349 ret = fadump_add_mem_range(&reserved_mrange_info, 1350 base, base + size); 1351 if (ret < 0) { 1352 pr_warn("some reserved ranges are ignored!\n"); 1353 break; 1354 } 1355 } 1356 } 1357 1358 /* Compact reserved ranges */ 1359 sort_and_merge_mem_ranges(&reserved_mrange_info); 1360 } 1361 1362 /* 1363 * Release the memory that was reserved during early boot to preserve the 1364 * crash'ed kernel's memory contents except reserved dump area (permanent 1365 * reservation) and reserved ranges used by F/W. The released memory will 1366 * be available for general use. 1367 */ 1368 static void fadump_release_memory(u64 begin, u64 end) 1369 { 1370 u64 ra_start, ra_end, tstart; 1371 int i, ret; 1372 1373 ra_start = fw_dump.reserve_dump_area_start; 1374 ra_end = ra_start + fw_dump.reserve_dump_area_size; 1375 1376 /* 1377 * If reserved ranges array limit is hit, overwrite the last reserved 1378 * memory range with reserved dump area to ensure it is excluded from 1379 * the memory being released (reused for next FADump registration). 1380 */ 1381 if (reserved_mrange_info.mem_range_cnt == 1382 reserved_mrange_info.max_mem_ranges) 1383 reserved_mrange_info.mem_range_cnt--; 1384 1385 ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); 1386 if (ret != 0) 1387 return; 1388 1389 /* Get the reserved ranges list in order first. */ 1390 sort_and_merge_mem_ranges(&reserved_mrange_info); 1391 1392 /* Exclude reserved ranges and release remaining memory */ 1393 tstart = begin; 1394 for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) { 1395 ra_start = reserved_mrange_info.mem_ranges[i].base; 1396 ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size; 1397 1398 if (tstart >= ra_end) 1399 continue; 1400 1401 if (tstart < ra_start) 1402 fadump_release_reserved_area(tstart, ra_start); 1403 tstart = ra_end; 1404 } 1405 1406 if (tstart < end) 1407 fadump_release_reserved_area(tstart, end); 1408 } 1409 1410 static void fadump_invalidate_release_mem(void) 1411 { 1412 mutex_lock(&fadump_mutex); 1413 if (!fw_dump.dump_active) { 1414 mutex_unlock(&fadump_mutex); 1415 return; 1416 } 1417 1418 fadump_cleanup(); 1419 mutex_unlock(&fadump_mutex); 1420 1421 fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM()); 1422 fadump_free_cpu_notes_buf(); 1423 1424 /* 1425 * Setup kernel metadata and initialize the kernel dump 1426 * memory structure for FADump re-registration. 1427 */ 1428 if (fw_dump.ops->fadump_setup_metadata && 1429 (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0)) 1430 pr_warn("Failed to setup kernel metadata!\n"); 1431 fw_dump.ops->fadump_init_mem_struct(&fw_dump); 1432 } 1433 1434 static ssize_t release_mem_store(struct kobject *kobj, 1435 struct kobj_attribute *attr, 1436 const char *buf, size_t count) 1437 { 1438 int input = -1; 1439 1440 if (!fw_dump.dump_active) 1441 return -EPERM; 1442 1443 if (kstrtoint(buf, 0, &input)) 1444 return -EINVAL; 1445 1446 if (input == 1) { 1447 /* 1448 * Take away the '/proc/vmcore'. We are releasing the dump 1449 * memory, hence it will not be valid anymore. 1450 */ 1451 #ifdef CONFIG_PROC_VMCORE 1452 vmcore_cleanup(); 1453 #endif 1454 fadump_invalidate_release_mem(); 1455 1456 } else 1457 return -EINVAL; 1458 return count; 1459 } 1460 1461 /* Release the reserved memory and disable the FADump */ 1462 static void __init unregister_fadump(void) 1463 { 1464 fadump_cleanup(); 1465 fadump_release_memory(fw_dump.reserve_dump_area_start, 1466 fw_dump.reserve_dump_area_size); 1467 fw_dump.fadump_enabled = 0; 1468 kobject_put(fadump_kobj); 1469 } 1470 1471 static ssize_t enabled_show(struct kobject *kobj, 1472 struct kobj_attribute *attr, 1473 char *buf) 1474 { 1475 return sprintf(buf, "%d\n", fw_dump.fadump_enabled); 1476 } 1477 1478 static ssize_t mem_reserved_show(struct kobject *kobj, 1479 struct kobj_attribute *attr, 1480 char *buf) 1481 { 1482 return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size); 1483 } 1484 1485 static ssize_t registered_show(struct kobject *kobj, 1486 struct kobj_attribute *attr, 1487 char *buf) 1488 { 1489 return sprintf(buf, "%d\n", fw_dump.dump_registered); 1490 } 1491 1492 static ssize_t registered_store(struct kobject *kobj, 1493 struct kobj_attribute *attr, 1494 const char *buf, size_t count) 1495 { 1496 int ret = 0; 1497 int input = -1; 1498 1499 if (!fw_dump.fadump_enabled || fw_dump.dump_active) 1500 return -EPERM; 1501 1502 if (kstrtoint(buf, 0, &input)) 1503 return -EINVAL; 1504 1505 mutex_lock(&fadump_mutex); 1506 1507 switch (input) { 1508 case 0: 1509 if (fw_dump.dump_registered == 0) { 1510 goto unlock_out; 1511 } 1512 1513 /* Un-register Firmware-assisted dump */ 1514 pr_debug("Un-register firmware-assisted dump\n"); 1515 fw_dump.ops->fadump_unregister(&fw_dump); 1516 break; 1517 case 1: 1518 if (fw_dump.dump_registered == 1) { 1519 /* Un-register Firmware-assisted dump */ 1520 fw_dump.ops->fadump_unregister(&fw_dump); 1521 } 1522 /* Register Firmware-assisted dump */ 1523 ret = register_fadump(); 1524 break; 1525 default: 1526 ret = -EINVAL; 1527 break; 1528 } 1529 1530 unlock_out: 1531 mutex_unlock(&fadump_mutex); 1532 return ret < 0 ? ret : count; 1533 } 1534 1535 static int fadump_region_show(struct seq_file *m, void *private) 1536 { 1537 if (!fw_dump.fadump_enabled) 1538 return 0; 1539 1540 mutex_lock(&fadump_mutex); 1541 fw_dump.ops->fadump_region_show(&fw_dump, m); 1542 mutex_unlock(&fadump_mutex); 1543 return 0; 1544 } 1545 1546 static struct kobj_attribute release_attr = __ATTR_WO(release_mem); 1547 static struct kobj_attribute enable_attr = __ATTR_RO(enabled); 1548 static struct kobj_attribute register_attr = __ATTR_RW(registered); 1549 static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); 1550 1551 static struct attribute *fadump_attrs[] = { 1552 &enable_attr.attr, 1553 ®ister_attr.attr, 1554 &mem_reserved_attr.attr, 1555 NULL, 1556 }; 1557 1558 ATTRIBUTE_GROUPS(fadump); 1559 1560 DEFINE_SHOW_ATTRIBUTE(fadump_region); 1561 1562 static void __init fadump_init_files(void) 1563 { 1564 int rc = 0; 1565 1566 fadump_kobj = kobject_create_and_add("fadump", kernel_kobj); 1567 if (!fadump_kobj) { 1568 pr_err("failed to create fadump kobject\n"); 1569 return; 1570 } 1571 1572 debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL, 1573 &fadump_region_fops); 1574 1575 if (fw_dump.dump_active) { 1576 rc = sysfs_create_file(fadump_kobj, &release_attr.attr); 1577 if (rc) 1578 pr_err("unable to create release_mem sysfs file (%d)\n", 1579 rc); 1580 } 1581 1582 rc = sysfs_create_groups(fadump_kobj, fadump_groups); 1583 if (rc) { 1584 pr_err("sysfs group creation failed (%d), unregistering FADump", 1585 rc); 1586 unregister_fadump(); 1587 return; 1588 } 1589 1590 /* 1591 * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to 1592 * create symlink at old location to maintain backward compatibility. 1593 * 1594 * - fadump_enabled -> fadump/enabled 1595 * - fadump_registered -> fadump/registered 1596 * - fadump_release_mem -> fadump/release_mem 1597 */ 1598 rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj, 1599 "enabled", "fadump_enabled"); 1600 if (rc) { 1601 pr_err("unable to create fadump_enabled symlink (%d)", rc); 1602 return; 1603 } 1604 1605 rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj, 1606 "registered", 1607 "fadump_registered"); 1608 if (rc) { 1609 pr_err("unable to create fadump_registered symlink (%d)", rc); 1610 sysfs_remove_link(kernel_kobj, "fadump_enabled"); 1611 return; 1612 } 1613 1614 if (fw_dump.dump_active) { 1615 rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, 1616 fadump_kobj, 1617 "release_mem", 1618 "fadump_release_mem"); 1619 if (rc) 1620 pr_err("unable to create fadump_release_mem symlink (%d)", 1621 rc); 1622 } 1623 return; 1624 } 1625 1626 /* 1627 * Prepare for firmware-assisted dump. 1628 */ 1629 int __init setup_fadump(void) 1630 { 1631 if (!fw_dump.fadump_supported) 1632 return 0; 1633 1634 fadump_init_files(); 1635 fadump_show_config(); 1636 1637 if (!fw_dump.fadump_enabled) 1638 return 1; 1639 1640 /* 1641 * If dump data is available then see if it is valid and prepare for 1642 * saving it to the disk. 1643 */ 1644 if (fw_dump.dump_active) { 1645 /* 1646 * if dump process fails then invalidate the registration 1647 * and release memory before proceeding for re-registration. 1648 */ 1649 if (fw_dump.ops->fadump_process(&fw_dump) < 0) 1650 fadump_invalidate_release_mem(); 1651 } 1652 /* Initialize the kernel dump memory structure and register with f/w */ 1653 else if (fw_dump.reserve_dump_area_size) { 1654 fw_dump.ops->fadump_init_mem_struct(&fw_dump); 1655 register_fadump(); 1656 } 1657 1658 /* 1659 * In case of panic, fadump is triggered via ppc_panic_event() 1660 * panic notifier. Setting crash_kexec_post_notifiers to 'true' 1661 * lets panic() function take crash friendly path before panic 1662 * notifiers are invoked. 1663 */ 1664 crash_kexec_post_notifiers = true; 1665 1666 return 1; 1667 } 1668 /* 1669 * Use subsys_initcall_sync() here because there is dependency with 1670 * crash_save_vmcoreinfo_init(), which must run first to ensure vmcoreinfo initialization 1671 * is done before registering with f/w. 1672 */ 1673 subsys_initcall_sync(setup_fadump); 1674 #else /* !CONFIG_PRESERVE_FA_DUMP */ 1675 1676 /* Scan the Firmware Assisted dump configuration details. */ 1677 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, 1678 int depth, void *data) 1679 { 1680 if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0)) 1681 return 0; 1682 1683 opal_fadump_dt_scan(&fw_dump, node); 1684 return 1; 1685 } 1686 1687 /* 1688 * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel, 1689 * preserve crash data. The subsequent memory preserving kernel boot 1690 * is likely to process this crash data. 1691 */ 1692 int __init fadump_reserve_mem(void) 1693 { 1694 if (fw_dump.dump_active) { 1695 /* 1696 * If last boot has crashed then reserve all the memory 1697 * above boot memory to preserve crash data. 1698 */ 1699 pr_info("Preserving crash data for processing in next boot.\n"); 1700 fadump_reserve_crash_area(fw_dump.boot_mem_top); 1701 } else 1702 pr_debug("FADump-aware kernel..\n"); 1703 1704 return 1; 1705 } 1706 #endif /* CONFIG_PRESERVE_FA_DUMP */ 1707 1708 /* Preserve everything above the base address */ 1709 static void __init fadump_reserve_crash_area(u64 base) 1710 { 1711 u64 i, mstart, mend, msize; 1712 1713 for_each_mem_range(i, &mstart, &mend) { 1714 msize = mend - mstart; 1715 1716 if ((mstart + msize) < base) 1717 continue; 1718 1719 if (mstart < base) { 1720 msize -= (base - mstart); 1721 mstart = base; 1722 } 1723 1724 pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data", 1725 (msize >> 20), mstart); 1726 memblock_reserve(mstart, msize); 1727 } 1728 } 1729 1730 unsigned long __init arch_reserved_kernel_pages(void) 1731 { 1732 return memblock_reserved_size() / PAGE_SIZE; 1733 } 1734