1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Firmware Assisted dump: A robust mechanism to get reliable kernel crash 4 * dump with assistance from firmware. This approach does not use kexec, 5 * instead firmware assists in booting the kdump kernel while preserving 6 * memory contents. The most of the code implementation has been adapted 7 * from phyp assisted dump implementation written by Linas Vepstas and 8 * Manish Ahuja 9 * 10 * Copyright 2011 IBM Corporation 11 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 12 */ 13 14 #undef DEBUG 15 #define pr_fmt(fmt) "fadump: " fmt 16 17 #include <linux/string.h> 18 #include <linux/memblock.h> 19 #include <linux/delay.h> 20 #include <linux/seq_file.h> 21 #include <linux/crash_dump.h> 22 #include <linux/kobject.h> 23 #include <linux/sysfs.h> 24 #include <linux/slab.h> 25 #include <linux/cma.h> 26 #include <linux/hugetlb.h> 27 28 #include <asm/debugfs.h> 29 #include <asm/page.h> 30 #include <asm/prom.h> 31 #include <asm/fadump.h> 32 #include <asm/fadump-internal.h> 33 #include <asm/setup.h> 34 35 static struct fw_dump fw_dump; 36 37 static DEFINE_MUTEX(fadump_mutex); 38 struct fad_crash_memory_ranges *crash_memory_ranges; 39 int crash_memory_ranges_size; 40 int crash_mem_ranges; 41 int max_crash_mem_ranges; 42 43 #ifdef CONFIG_CMA 44 static struct cma *fadump_cma; 45 46 /* 47 * fadump_cma_init() - Initialize CMA area from a fadump reserved memory 48 * 49 * This function initializes CMA area from fadump reserved memory. 50 * The total size of fadump reserved memory covers for boot memory size 51 * + cpu data size + hpte size and metadata. 52 * Initialize only the area equivalent to boot memory size for CMA use. 53 * The reamining portion of fadump reserved memory will be not given 54 * to CMA and pages for thoes will stay reserved. boot memory size is 55 * aligned per CMA requirement to satisy cma_init_reserved_mem() call. 56 * But for some reason even if it fails we still have the memory reservation 57 * with us and we can still continue doing fadump. 58 */ 59 int __init fadump_cma_init(void) 60 { 61 unsigned long long base, size; 62 int rc; 63 64 if (!fw_dump.fadump_enabled) 65 return 0; 66 67 /* 68 * Do not use CMA if user has provided fadump=nocma kernel parameter. 69 * Return 1 to continue with fadump old behaviour. 70 */ 71 if (fw_dump.nocma) 72 return 1; 73 74 base = fw_dump.reserve_dump_area_start; 75 size = fw_dump.boot_memory_size; 76 77 if (!size) 78 return 0; 79 80 rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma); 81 if (rc) { 82 pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc); 83 /* 84 * Though the CMA init has failed we still have memory 85 * reservation with us. The reserved memory will be 86 * blocked from production system usage. Hence return 1, 87 * so that we can continue with fadump. 88 */ 89 return 1; 90 } 91 92 /* 93 * So we now have successfully initialized cma area for fadump. 94 */ 95 pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx " 96 "bytes of memory reserved for firmware-assisted dump\n", 97 cma_get_size(fadump_cma), 98 (unsigned long)cma_get_base(fadump_cma) >> 20, 99 fw_dump.reserve_dump_area_size); 100 return 1; 101 } 102 #else 103 static int __init fadump_cma_init(void) { return 1; } 104 #endif /* CONFIG_CMA */ 105 106 /* Scan the Firmware Assisted dump configuration details. */ 107 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, 108 int depth, void *data) 109 { 110 if (depth != 1) 111 return 0; 112 113 if (strcmp(uname, "rtas") == 0) { 114 rtas_fadump_dt_scan(&fw_dump, node); 115 return 1; 116 } 117 118 if (strcmp(uname, "ibm,opal") == 0) { 119 opal_fadump_dt_scan(&fw_dump, node); 120 return 1; 121 } 122 123 return 0; 124 } 125 126 /* 127 * If fadump is registered, check if the memory provided 128 * falls within boot memory area and reserved memory area. 129 */ 130 int is_fadump_memory_area(u64 addr, ulong size) 131 { 132 u64 d_start = fw_dump.reserve_dump_area_start; 133 u64 d_end = d_start + fw_dump.reserve_dump_area_size; 134 135 if (!fw_dump.dump_registered) 136 return 0; 137 138 if (((addr + size) > d_start) && (addr <= d_end)) 139 return 1; 140 141 return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size; 142 } 143 144 int should_fadump_crash(void) 145 { 146 if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) 147 return 0; 148 return 1; 149 } 150 151 int is_fadump_active(void) 152 { 153 return fw_dump.dump_active; 154 } 155 156 /* 157 * Returns true, if there are no holes in memory area between d_start to d_end, 158 * false otherwise. 159 */ 160 static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) 161 { 162 struct memblock_region *reg; 163 bool ret = false; 164 u64 start, end; 165 166 for_each_memblock(memory, reg) { 167 start = max_t(u64, d_start, reg->base); 168 end = min_t(u64, d_end, (reg->base + reg->size)); 169 if (d_start < end) { 170 /* Memory hole from d_start to start */ 171 if (start > d_start) 172 break; 173 174 if (end == d_end) { 175 ret = true; 176 break; 177 } 178 179 d_start = end + 1; 180 } 181 } 182 183 return ret; 184 } 185 186 /* 187 * Returns true, if there are no holes in boot memory area, 188 * false otherwise. 189 */ 190 bool is_fadump_boot_mem_contiguous(void) 191 { 192 return is_fadump_mem_area_contiguous(0, fw_dump.boot_memory_size); 193 } 194 195 /* 196 * Returns true, if there are no holes in reserved memory area, 197 * false otherwise. 198 */ 199 bool is_fadump_reserved_mem_contiguous(void) 200 { 201 u64 d_start, d_end; 202 203 d_start = fw_dump.reserve_dump_area_start; 204 d_end = d_start + fw_dump.reserve_dump_area_size; 205 return is_fadump_mem_area_contiguous(d_start, d_end); 206 } 207 208 /* Print firmware assisted dump configurations for debugging purpose. */ 209 static void fadump_show_config(void) 210 { 211 pr_debug("Support for firmware-assisted dump (fadump): %s\n", 212 (fw_dump.fadump_supported ? "present" : "no support")); 213 214 if (!fw_dump.fadump_supported) 215 return; 216 217 pr_debug("Fadump enabled : %s\n", 218 (fw_dump.fadump_enabled ? "yes" : "no")); 219 pr_debug("Dump Active : %s\n", 220 (fw_dump.dump_active ? "yes" : "no")); 221 pr_debug("Dump section sizes:\n"); 222 pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); 223 pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); 224 pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size); 225 } 226 227 /** 228 * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM 229 * 230 * Function to find the largest memory size we need to reserve during early 231 * boot process. This will be the size of the memory that is required for a 232 * kernel to boot successfully. 233 * 234 * This function has been taken from phyp-assisted dump feature implementation. 235 * 236 * returns larger of 256MB or 5% rounded down to multiples of 256MB. 237 * 238 * TODO: Come up with better approach to find out more accurate memory size 239 * that is required for a kernel to boot successfully. 240 * 241 */ 242 static inline unsigned long fadump_calculate_reserve_size(void) 243 { 244 int ret; 245 unsigned long long base, size; 246 247 if (fw_dump.reserve_bootvar) 248 pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n"); 249 250 /* 251 * Check if the size is specified through crashkernel= cmdline 252 * option. If yes, then use that but ignore base as fadump reserves 253 * memory at a predefined offset. 254 */ 255 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 256 &size, &base); 257 if (ret == 0 && size > 0) { 258 unsigned long max_size; 259 260 if (fw_dump.reserve_bootvar) 261 pr_info("Using 'crashkernel=' parameter for memory reservation.\n"); 262 263 fw_dump.reserve_bootvar = (unsigned long)size; 264 265 /* 266 * Adjust if the boot memory size specified is above 267 * the upper limit. 268 */ 269 max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO; 270 if (fw_dump.reserve_bootvar > max_size) { 271 fw_dump.reserve_bootvar = max_size; 272 pr_info("Adjusted boot memory size to %luMB\n", 273 (fw_dump.reserve_bootvar >> 20)); 274 } 275 276 return fw_dump.reserve_bootvar; 277 } else if (fw_dump.reserve_bootvar) { 278 /* 279 * 'fadump_reserve_mem=' is being used to reserve memory 280 * for firmware-assisted dump. 281 */ 282 return fw_dump.reserve_bootvar; 283 } 284 285 /* divide by 20 to get 5% of value */ 286 size = memblock_phys_mem_size() / 20; 287 288 /* round it down in multiples of 256 */ 289 size = size & ~0x0FFFFFFFUL; 290 291 /* Truncate to memory_limit. We don't want to over reserve the memory.*/ 292 if (memory_limit && size > memory_limit) 293 size = memory_limit; 294 295 return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM); 296 } 297 298 /* 299 * Calculate the total memory size required to be reserved for 300 * firmware-assisted dump registration. 301 */ 302 static unsigned long get_fadump_area_size(void) 303 { 304 unsigned long size = 0; 305 306 size += fw_dump.cpu_state_data_size; 307 size += fw_dump.hpte_region_size; 308 size += fw_dump.boot_memory_size; 309 size += sizeof(struct fadump_crash_info_header); 310 size += sizeof(struct elfhdr); /* ELF core header.*/ 311 size += sizeof(struct elf_phdr); /* place holder for cpu notes */ 312 /* Program headers for crash memory regions. */ 313 size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); 314 315 size = PAGE_ALIGN(size); 316 317 /* This is to hold kernel metadata on platforms that support it */ 318 size += (fw_dump.ops->fadump_get_metadata_size ? 319 fw_dump.ops->fadump_get_metadata_size() : 0); 320 return size; 321 } 322 323 static void __init fadump_reserve_crash_area(unsigned long base, 324 unsigned long size) 325 { 326 struct memblock_region *reg; 327 unsigned long mstart, mend, msize; 328 329 for_each_memblock(memory, reg) { 330 mstart = max_t(unsigned long, base, reg->base); 331 mend = reg->base + reg->size; 332 mend = min(base + size, mend); 333 334 if (mstart < mend) { 335 msize = mend - mstart; 336 memblock_reserve(mstart, msize); 337 pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n", 338 (msize >> 20), mstart); 339 } 340 } 341 } 342 343 int __init fadump_reserve_mem(void) 344 { 345 u64 base, size, mem_boundary; 346 int ret = 1; 347 348 if (!fw_dump.fadump_enabled) 349 return 0; 350 351 if (!fw_dump.fadump_supported) { 352 pr_info("Firmware-Assisted Dump is not supported on this hardware\n"); 353 goto error_out; 354 } 355 356 /* 357 * Initialize boot memory size 358 * If dump is active then we have already calculated the size during 359 * first kernel. 360 */ 361 if (!fw_dump.dump_active) { 362 fw_dump.boot_memory_size = 363 PAGE_ALIGN(fadump_calculate_reserve_size()); 364 #ifdef CONFIG_CMA 365 if (!fw_dump.nocma) 366 fw_dump.boot_memory_size = 367 ALIGN(fw_dump.boot_memory_size, 368 FADUMP_CMA_ALIGNMENT); 369 #endif 370 } 371 372 /* 373 * Calculate the memory boundary. 374 * If memory_limit is less than actual memory boundary then reserve 375 * the memory for fadump beyond the memory_limit and adjust the 376 * memory_limit accordingly, so that the running kernel can run with 377 * specified memory_limit. 378 */ 379 if (memory_limit && memory_limit < memblock_end_of_DRAM()) { 380 size = get_fadump_area_size(); 381 if ((memory_limit + size) < memblock_end_of_DRAM()) 382 memory_limit += size; 383 else 384 memory_limit = memblock_end_of_DRAM(); 385 printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" 386 " dump, now %#016llx\n", memory_limit); 387 } 388 if (memory_limit) 389 mem_boundary = memory_limit; 390 else 391 mem_boundary = memblock_end_of_DRAM(); 392 393 base = fw_dump.boot_memory_size; 394 size = get_fadump_area_size(); 395 fw_dump.reserve_dump_area_size = size; 396 if (fw_dump.dump_active) { 397 pr_info("Firmware-assisted dump is active.\n"); 398 399 #ifdef CONFIG_HUGETLB_PAGE 400 /* 401 * FADump capture kernel doesn't care much about hugepages. 402 * In fact, handling hugepages in capture kernel is asking for 403 * trouble. So, disable HugeTLB support when fadump is active. 404 */ 405 hugetlb_disabled = true; 406 #endif 407 /* 408 * If last boot has crashed then reserve all the memory 409 * above boot_memory_size so that we don't touch it until 410 * dump is written to disk by userspace tool. This memory 411 * will be released for general use once the dump is saved. 412 */ 413 size = mem_boundary - base; 414 fadump_reserve_crash_area(base, size); 415 416 pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr); 417 pr_debug("Reserve dump area start address: 0x%lx\n", 418 fw_dump.reserve_dump_area_start); 419 } else { 420 /* 421 * Reserve memory at an offset closer to bottom of the RAM to 422 * minimize the impact of memory hot-remove operation. We can't 423 * use memblock_find_in_range() here since it doesn't allocate 424 * from bottom to top. 425 */ 426 while (base <= (mem_boundary - size)) { 427 if (memblock_is_region_memory(base, size) && 428 !memblock_is_region_reserved(base, size)) 429 break; 430 431 base += size; 432 } 433 434 if (base > (mem_boundary - size)) { 435 pr_err("Failed to find memory chunk for reservation!\n"); 436 goto error_out; 437 } 438 fw_dump.reserve_dump_area_start = base; 439 440 /* 441 * Calculate the kernel metadata address and register it with 442 * f/w if the platform supports. 443 */ 444 if (fw_dump.ops->fadump_setup_metadata && 445 (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0)) 446 goto error_out; 447 448 if (memblock_reserve(base, size)) { 449 pr_err("Failed to reserve memory!\n"); 450 goto error_out; 451 } 452 453 pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n", 454 (size >> 20), base, (memblock_phys_mem_size() >> 20)); 455 456 ret = fadump_cma_init(); 457 } 458 459 return ret; 460 error_out: 461 fw_dump.fadump_enabled = 0; 462 return 0; 463 } 464 465 unsigned long __init arch_reserved_kernel_pages(void) 466 { 467 return memblock_reserved_size() / PAGE_SIZE; 468 } 469 470 /* Look for fadump= cmdline option. */ 471 static int __init early_fadump_param(char *p) 472 { 473 if (!p) 474 return 1; 475 476 if (strncmp(p, "on", 2) == 0) 477 fw_dump.fadump_enabled = 1; 478 else if (strncmp(p, "off", 3) == 0) 479 fw_dump.fadump_enabled = 0; 480 else if (strncmp(p, "nocma", 5) == 0) { 481 fw_dump.fadump_enabled = 1; 482 fw_dump.nocma = 1; 483 } 484 485 return 0; 486 } 487 early_param("fadump", early_fadump_param); 488 489 /* 490 * Look for fadump_reserve_mem= cmdline option 491 * TODO: Remove references to 'fadump_reserve_mem=' parameter, 492 * the sooner 'crashkernel=' parameter is accustomed to. 493 */ 494 static int __init early_fadump_reserve_mem(char *p) 495 { 496 if (p) 497 fw_dump.reserve_bootvar = memparse(p, &p); 498 return 0; 499 } 500 early_param("fadump_reserve_mem", early_fadump_reserve_mem); 501 502 void crash_fadump(struct pt_regs *regs, const char *str) 503 { 504 struct fadump_crash_info_header *fdh = NULL; 505 int old_cpu, this_cpu; 506 507 if (!should_fadump_crash()) 508 return; 509 510 /* 511 * old_cpu == -1 means this is the first CPU which has come here, 512 * go ahead and trigger fadump. 513 * 514 * old_cpu != -1 means some other CPU has already on it's way 515 * to trigger fadump, just keep looping here. 516 */ 517 this_cpu = smp_processor_id(); 518 old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); 519 520 if (old_cpu != -1) { 521 /* 522 * We can't loop here indefinitely. Wait as long as fadump 523 * is in force. If we race with fadump un-registration this 524 * loop will break and then we go down to normal panic path 525 * and reboot. If fadump is in force the first crashing 526 * cpu will definitely trigger fadump. 527 */ 528 while (fw_dump.dump_registered) 529 cpu_relax(); 530 return; 531 } 532 533 fdh = __va(fw_dump.fadumphdr_addr); 534 fdh->crashing_cpu = crashing_cpu; 535 crash_save_vmcoreinfo(); 536 537 if (regs) 538 fdh->regs = *regs; 539 else 540 ppc_save_regs(&fdh->regs); 541 542 fdh->online_mask = *cpu_online_mask; 543 544 fw_dump.ops->fadump_trigger(fdh, str); 545 } 546 547 u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) 548 { 549 struct elf_prstatus prstatus; 550 551 memset(&prstatus, 0, sizeof(prstatus)); 552 /* 553 * FIXME: How do i get PID? Do I really need it? 554 * prstatus.pr_pid = ???? 555 */ 556 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); 557 buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, 558 &prstatus, sizeof(prstatus)); 559 return buf; 560 } 561 562 void fadump_update_elfcore_header(char *bufp) 563 { 564 struct elfhdr *elf; 565 struct elf_phdr *phdr; 566 567 elf = (struct elfhdr *)bufp; 568 bufp += sizeof(struct elfhdr); 569 570 /* First note is a place holder for cpu notes info. */ 571 phdr = (struct elf_phdr *)bufp; 572 573 if (phdr->p_type == PT_NOTE) { 574 phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr); 575 phdr->p_offset = phdr->p_paddr; 576 phdr->p_filesz = fw_dump.cpu_notes_buf_size; 577 phdr->p_memsz = fw_dump.cpu_notes_buf_size; 578 } 579 return; 580 } 581 582 static void *fadump_alloc_buffer(unsigned long size) 583 { 584 unsigned long count, i; 585 struct page *page; 586 void *vaddr; 587 588 vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); 589 if (!vaddr) 590 return NULL; 591 592 count = PAGE_ALIGN(size) / PAGE_SIZE; 593 page = virt_to_page(vaddr); 594 for (i = 0; i < count; i++) 595 mark_page_reserved(page + i); 596 return vaddr; 597 } 598 599 static void fadump_free_buffer(unsigned long vaddr, unsigned long size) 600 { 601 free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL); 602 } 603 604 s32 fadump_setup_cpu_notes_buf(u32 num_cpus) 605 { 606 /* Allocate buffer to hold cpu crash notes. */ 607 fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); 608 fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); 609 fw_dump.cpu_notes_buf_vaddr = 610 (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size); 611 if (!fw_dump.cpu_notes_buf_vaddr) { 612 pr_err("Failed to allocate %ld bytes for CPU notes buffer\n", 613 fw_dump.cpu_notes_buf_size); 614 return -ENOMEM; 615 } 616 617 pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n", 618 fw_dump.cpu_notes_buf_size, 619 fw_dump.cpu_notes_buf_vaddr); 620 return 0; 621 } 622 623 void fadump_free_cpu_notes_buf(void) 624 { 625 if (!fw_dump.cpu_notes_buf_vaddr) 626 return; 627 628 fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr, 629 fw_dump.cpu_notes_buf_size); 630 fw_dump.cpu_notes_buf_vaddr = 0; 631 fw_dump.cpu_notes_buf_size = 0; 632 } 633 634 static void free_crash_memory_ranges(void) 635 { 636 kfree(crash_memory_ranges); 637 crash_memory_ranges = NULL; 638 crash_memory_ranges_size = 0; 639 max_crash_mem_ranges = 0; 640 } 641 642 /* 643 * Allocate or reallocate crash memory ranges array in incremental units 644 * of PAGE_SIZE. 645 */ 646 static int allocate_crash_memory_ranges(void) 647 { 648 struct fad_crash_memory_ranges *new_array; 649 u64 new_size; 650 651 new_size = crash_memory_ranges_size + PAGE_SIZE; 652 pr_debug("Allocating %llu bytes of memory for crash memory ranges\n", 653 new_size); 654 655 new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL); 656 if (new_array == NULL) { 657 pr_err("Insufficient memory for setting up crash memory ranges\n"); 658 free_crash_memory_ranges(); 659 return -ENOMEM; 660 } 661 662 crash_memory_ranges = new_array; 663 crash_memory_ranges_size = new_size; 664 max_crash_mem_ranges = (new_size / 665 sizeof(struct fad_crash_memory_ranges)); 666 return 0; 667 } 668 669 static inline int fadump_add_crash_memory(unsigned long long base, 670 unsigned long long end) 671 { 672 u64 start, size; 673 bool is_adjacent = false; 674 675 if (base == end) 676 return 0; 677 678 /* 679 * Fold adjacent memory ranges to bring down the memory ranges/ 680 * PT_LOAD segments count. 681 */ 682 if (crash_mem_ranges) { 683 start = crash_memory_ranges[crash_mem_ranges - 1].base; 684 size = crash_memory_ranges[crash_mem_ranges - 1].size; 685 686 if ((start + size) == base) 687 is_adjacent = true; 688 } 689 if (!is_adjacent) { 690 /* resize the array on reaching the limit */ 691 if (crash_mem_ranges == max_crash_mem_ranges) { 692 int ret; 693 694 ret = allocate_crash_memory_ranges(); 695 if (ret) 696 return ret; 697 } 698 699 start = base; 700 crash_memory_ranges[crash_mem_ranges].base = start; 701 crash_mem_ranges++; 702 } 703 704 crash_memory_ranges[crash_mem_ranges - 1].size = (end - start); 705 pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", 706 (crash_mem_ranges - 1), start, end - 1, (end - start)); 707 return 0; 708 } 709 710 static int fadump_exclude_reserved_area(unsigned long long start, 711 unsigned long long end) 712 { 713 unsigned long long ra_start, ra_end; 714 int ret = 0; 715 716 ra_start = fw_dump.reserve_dump_area_start; 717 ra_end = ra_start + fw_dump.reserve_dump_area_size; 718 719 if ((ra_start < end) && (ra_end > start)) { 720 if ((start < ra_start) && (end > ra_end)) { 721 ret = fadump_add_crash_memory(start, ra_start); 722 if (ret) 723 return ret; 724 725 ret = fadump_add_crash_memory(ra_end, end); 726 } else if (start < ra_start) { 727 ret = fadump_add_crash_memory(start, ra_start); 728 } else if (ra_end < end) { 729 ret = fadump_add_crash_memory(ra_end, end); 730 } 731 } else 732 ret = fadump_add_crash_memory(start, end); 733 734 return ret; 735 } 736 737 static int fadump_init_elfcore_header(char *bufp) 738 { 739 struct elfhdr *elf; 740 741 elf = (struct elfhdr *) bufp; 742 bufp += sizeof(struct elfhdr); 743 memcpy(elf->e_ident, ELFMAG, SELFMAG); 744 elf->e_ident[EI_CLASS] = ELF_CLASS; 745 elf->e_ident[EI_DATA] = ELF_DATA; 746 elf->e_ident[EI_VERSION] = EV_CURRENT; 747 elf->e_ident[EI_OSABI] = ELF_OSABI; 748 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); 749 elf->e_type = ET_CORE; 750 elf->e_machine = ELF_ARCH; 751 elf->e_version = EV_CURRENT; 752 elf->e_entry = 0; 753 elf->e_phoff = sizeof(struct elfhdr); 754 elf->e_shoff = 0; 755 #if defined(_CALL_ELF) 756 elf->e_flags = _CALL_ELF; 757 #else 758 elf->e_flags = 0; 759 #endif 760 elf->e_ehsize = sizeof(struct elfhdr); 761 elf->e_phentsize = sizeof(struct elf_phdr); 762 elf->e_phnum = 0; 763 elf->e_shentsize = 0; 764 elf->e_shnum = 0; 765 elf->e_shstrndx = 0; 766 767 return 0; 768 } 769 770 /* 771 * Traverse through memblock structure and setup crash memory ranges. These 772 * ranges will be used create PT_LOAD program headers in elfcore header. 773 */ 774 static int fadump_setup_crash_memory_ranges(void) 775 { 776 struct memblock_region *reg; 777 unsigned long long start, end; 778 int ret; 779 780 pr_debug("Setup crash memory ranges.\n"); 781 crash_mem_ranges = 0; 782 783 /* 784 * add the first memory chunk (RMA_START through boot_memory_size) as 785 * a separate memory chunk. The reason is, at the time crash firmware 786 * will move the content of this memory chunk to different location 787 * specified during fadump registration. We need to create a separate 788 * program header for this chunk with the correct offset. 789 */ 790 ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); 791 if (ret) 792 return ret; 793 794 for_each_memblock(memory, reg) { 795 start = (unsigned long long)reg->base; 796 end = start + (unsigned long long)reg->size; 797 798 /* 799 * skip the first memory chunk that is already added (RMA_START 800 * through boot_memory_size). This logic needs a relook if and 801 * when RMA_START changes to a non-zero value. 802 */ 803 BUILD_BUG_ON(RMA_START != 0); 804 if (start < fw_dump.boot_memory_size) { 805 if (end > fw_dump.boot_memory_size) 806 start = fw_dump.boot_memory_size; 807 else 808 continue; 809 } 810 811 /* add this range excluding the reserved dump area. */ 812 ret = fadump_exclude_reserved_area(start, end); 813 if (ret) 814 return ret; 815 } 816 817 return 0; 818 } 819 820 /* 821 * If the given physical address falls within the boot memory region then 822 * return the relocated address that points to the dump region reserved 823 * for saving initial boot memory contents. 824 */ 825 static inline unsigned long fadump_relocate(unsigned long paddr) 826 { 827 if (paddr > RMA_START && paddr < fw_dump.boot_memory_size) 828 return fw_dump.boot_mem_dest_addr + paddr; 829 else 830 return paddr; 831 } 832 833 static int fadump_create_elfcore_headers(char *bufp) 834 { 835 struct elfhdr *elf; 836 struct elf_phdr *phdr; 837 int i; 838 839 fadump_init_elfcore_header(bufp); 840 elf = (struct elfhdr *)bufp; 841 bufp += sizeof(struct elfhdr); 842 843 /* 844 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info 845 * will be populated during second kernel boot after crash. Hence 846 * this PT_NOTE will always be the first elf note. 847 * 848 * NOTE: Any new ELF note addition should be placed after this note. 849 */ 850 phdr = (struct elf_phdr *)bufp; 851 bufp += sizeof(struct elf_phdr); 852 phdr->p_type = PT_NOTE; 853 phdr->p_flags = 0; 854 phdr->p_vaddr = 0; 855 phdr->p_align = 0; 856 857 phdr->p_offset = 0; 858 phdr->p_paddr = 0; 859 phdr->p_filesz = 0; 860 phdr->p_memsz = 0; 861 862 (elf->e_phnum)++; 863 864 /* setup ELF PT_NOTE for vmcoreinfo */ 865 phdr = (struct elf_phdr *)bufp; 866 bufp += sizeof(struct elf_phdr); 867 phdr->p_type = PT_NOTE; 868 phdr->p_flags = 0; 869 phdr->p_vaddr = 0; 870 phdr->p_align = 0; 871 872 phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); 873 phdr->p_offset = phdr->p_paddr; 874 phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; 875 876 /* Increment number of program headers. */ 877 (elf->e_phnum)++; 878 879 /* setup PT_LOAD sections. */ 880 881 for (i = 0; i < crash_mem_ranges; i++) { 882 unsigned long long mbase, msize; 883 mbase = crash_memory_ranges[i].base; 884 msize = crash_memory_ranges[i].size; 885 886 if (!msize) 887 continue; 888 889 phdr = (struct elf_phdr *)bufp; 890 bufp += sizeof(struct elf_phdr); 891 phdr->p_type = PT_LOAD; 892 phdr->p_flags = PF_R|PF_W|PF_X; 893 phdr->p_offset = mbase; 894 895 if (mbase == RMA_START) { 896 /* 897 * The entire RMA region will be moved by firmware 898 * to the specified destination_address. Hence set 899 * the correct offset. 900 */ 901 phdr->p_offset = fw_dump.boot_mem_dest_addr; 902 } 903 904 phdr->p_paddr = mbase; 905 phdr->p_vaddr = (unsigned long)__va(mbase); 906 phdr->p_filesz = msize; 907 phdr->p_memsz = msize; 908 phdr->p_align = 0; 909 910 /* Increment number of program headers. */ 911 (elf->e_phnum)++; 912 } 913 return 0; 914 } 915 916 static unsigned long init_fadump_header(unsigned long addr) 917 { 918 struct fadump_crash_info_header *fdh; 919 920 if (!addr) 921 return 0; 922 923 fdh = __va(addr); 924 addr += sizeof(struct fadump_crash_info_header); 925 926 memset(fdh, 0, sizeof(struct fadump_crash_info_header)); 927 fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; 928 fdh->elfcorehdr_addr = addr; 929 /* We will set the crashing cpu id in crash_fadump() during crash. */ 930 fdh->crashing_cpu = FADUMP_CPU_UNKNOWN; 931 932 return addr; 933 } 934 935 static int register_fadump(void) 936 { 937 unsigned long addr; 938 void *vaddr; 939 int ret; 940 941 /* 942 * If no memory is reserved then we can not register for firmware- 943 * assisted dump. 944 */ 945 if (!fw_dump.reserve_dump_area_size) 946 return -ENODEV; 947 948 ret = fadump_setup_crash_memory_ranges(); 949 if (ret) 950 return ret; 951 952 addr = fw_dump.fadumphdr_addr; 953 954 /* Initialize fadump crash info header. */ 955 addr = init_fadump_header(addr); 956 vaddr = __va(addr); 957 958 pr_debug("Creating ELF core headers at %#016lx\n", addr); 959 fadump_create_elfcore_headers(vaddr); 960 961 /* register the future kernel dump with firmware. */ 962 pr_debug("Registering for firmware-assisted kernel dump...\n"); 963 return fw_dump.ops->fadump_register(&fw_dump); 964 } 965 966 void fadump_cleanup(void) 967 { 968 /* Invalidate the registration only if dump is active. */ 969 if (fw_dump.dump_active) { 970 pr_debug("Invalidating firmware-assisted dump registration\n"); 971 fw_dump.ops->fadump_invalidate(&fw_dump); 972 } else if (fw_dump.dump_registered) { 973 /* Un-register Firmware-assisted dump if it was registered. */ 974 fw_dump.ops->fadump_unregister(&fw_dump); 975 free_crash_memory_ranges(); 976 } 977 } 978 979 static void fadump_free_reserved_memory(unsigned long start_pfn, 980 unsigned long end_pfn) 981 { 982 unsigned long pfn; 983 unsigned long time_limit = jiffies + HZ; 984 985 pr_info("freeing reserved memory (0x%llx - 0x%llx)\n", 986 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); 987 988 for (pfn = start_pfn; pfn < end_pfn; pfn++) { 989 free_reserved_page(pfn_to_page(pfn)); 990 991 if (time_after(jiffies, time_limit)) { 992 cond_resched(); 993 time_limit = jiffies + HZ; 994 } 995 } 996 } 997 998 /* 999 * Skip memory holes and free memory that was actually reserved. 1000 */ 1001 static void fadump_release_reserved_area(unsigned long start, unsigned long end) 1002 { 1003 struct memblock_region *reg; 1004 unsigned long tstart, tend; 1005 unsigned long start_pfn = PHYS_PFN(start); 1006 unsigned long end_pfn = PHYS_PFN(end); 1007 1008 for_each_memblock(memory, reg) { 1009 tstart = max(start_pfn, memblock_region_memory_base_pfn(reg)); 1010 tend = min(end_pfn, memblock_region_memory_end_pfn(reg)); 1011 if (tstart < tend) { 1012 fadump_free_reserved_memory(tstart, tend); 1013 1014 if (tend == end_pfn) 1015 break; 1016 1017 start_pfn = tend + 1; 1018 } 1019 } 1020 } 1021 1022 /* 1023 * Release the memory that was reserved in early boot to preserve the memory 1024 * contents. The released memory will be available for general use. 1025 */ 1026 static void fadump_release_memory(unsigned long begin, unsigned long end) 1027 { 1028 unsigned long ra_start, ra_end; 1029 1030 ra_start = fw_dump.reserve_dump_area_start; 1031 ra_end = ra_start + fw_dump.reserve_dump_area_size; 1032 1033 /* 1034 * exclude the dump reserve area. Will reuse it for next 1035 * fadump registration. 1036 */ 1037 if (begin < ra_end && end > ra_start) { 1038 if (begin < ra_start) 1039 fadump_release_reserved_area(begin, ra_start); 1040 if (end > ra_end) 1041 fadump_release_reserved_area(ra_end, end); 1042 } else 1043 fadump_release_reserved_area(begin, end); 1044 } 1045 1046 static void fadump_invalidate_release_mem(void) 1047 { 1048 mutex_lock(&fadump_mutex); 1049 if (!fw_dump.dump_active) { 1050 mutex_unlock(&fadump_mutex); 1051 return; 1052 } 1053 1054 fadump_cleanup(); 1055 mutex_unlock(&fadump_mutex); 1056 1057 fadump_release_memory(fw_dump.boot_memory_size, memblock_end_of_DRAM()); 1058 fadump_free_cpu_notes_buf(); 1059 1060 /* Initialize the kernel dump memory structure for FAD registration. */ 1061 fw_dump.ops->fadump_init_mem_struct(&fw_dump); 1062 } 1063 1064 static ssize_t fadump_release_memory_store(struct kobject *kobj, 1065 struct kobj_attribute *attr, 1066 const char *buf, size_t count) 1067 { 1068 int input = -1; 1069 1070 if (!fw_dump.dump_active) 1071 return -EPERM; 1072 1073 if (kstrtoint(buf, 0, &input)) 1074 return -EINVAL; 1075 1076 if (input == 1) { 1077 /* 1078 * Take away the '/proc/vmcore'. We are releasing the dump 1079 * memory, hence it will not be valid anymore. 1080 */ 1081 #ifdef CONFIG_PROC_VMCORE 1082 vmcore_cleanup(); 1083 #endif 1084 fadump_invalidate_release_mem(); 1085 1086 } else 1087 return -EINVAL; 1088 return count; 1089 } 1090 1091 static ssize_t fadump_enabled_show(struct kobject *kobj, 1092 struct kobj_attribute *attr, 1093 char *buf) 1094 { 1095 return sprintf(buf, "%d\n", fw_dump.fadump_enabled); 1096 } 1097 1098 static ssize_t fadump_register_show(struct kobject *kobj, 1099 struct kobj_attribute *attr, 1100 char *buf) 1101 { 1102 return sprintf(buf, "%d\n", fw_dump.dump_registered); 1103 } 1104 1105 static ssize_t fadump_register_store(struct kobject *kobj, 1106 struct kobj_attribute *attr, 1107 const char *buf, size_t count) 1108 { 1109 int ret = 0; 1110 int input = -1; 1111 1112 if (!fw_dump.fadump_enabled || fw_dump.dump_active) 1113 return -EPERM; 1114 1115 if (kstrtoint(buf, 0, &input)) 1116 return -EINVAL; 1117 1118 mutex_lock(&fadump_mutex); 1119 1120 switch (input) { 1121 case 0: 1122 if (fw_dump.dump_registered == 0) { 1123 goto unlock_out; 1124 } 1125 1126 /* Un-register Firmware-assisted dump */ 1127 pr_debug("Un-register firmware-assisted dump\n"); 1128 fw_dump.ops->fadump_unregister(&fw_dump); 1129 break; 1130 case 1: 1131 if (fw_dump.dump_registered == 1) { 1132 /* Un-register Firmware-assisted dump */ 1133 fw_dump.ops->fadump_unregister(&fw_dump); 1134 } 1135 /* Register Firmware-assisted dump */ 1136 ret = register_fadump(); 1137 break; 1138 default: 1139 ret = -EINVAL; 1140 break; 1141 } 1142 1143 unlock_out: 1144 mutex_unlock(&fadump_mutex); 1145 return ret < 0 ? ret : count; 1146 } 1147 1148 static int fadump_region_show(struct seq_file *m, void *private) 1149 { 1150 if (!fw_dump.fadump_enabled) 1151 return 0; 1152 1153 mutex_lock(&fadump_mutex); 1154 fw_dump.ops->fadump_region_show(&fw_dump, m); 1155 mutex_unlock(&fadump_mutex); 1156 return 0; 1157 } 1158 1159 static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem, 1160 0200, NULL, 1161 fadump_release_memory_store); 1162 static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled, 1163 0444, fadump_enabled_show, 1164 NULL); 1165 static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered, 1166 0644, fadump_register_show, 1167 fadump_register_store); 1168 1169 DEFINE_SHOW_ATTRIBUTE(fadump_region); 1170 1171 static void fadump_init_files(void) 1172 { 1173 struct dentry *debugfs_file; 1174 int rc = 0; 1175 1176 rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr); 1177 if (rc) 1178 printk(KERN_ERR "fadump: unable to create sysfs file" 1179 " fadump_enabled (%d)\n", rc); 1180 1181 rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr); 1182 if (rc) 1183 printk(KERN_ERR "fadump: unable to create sysfs file" 1184 " fadump_registered (%d)\n", rc); 1185 1186 debugfs_file = debugfs_create_file("fadump_region", 0444, 1187 powerpc_debugfs_root, NULL, 1188 &fadump_region_fops); 1189 if (!debugfs_file) 1190 printk(KERN_ERR "fadump: unable to create debugfs file" 1191 " fadump_region\n"); 1192 1193 if (fw_dump.dump_active) { 1194 rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr); 1195 if (rc) 1196 printk(KERN_ERR "fadump: unable to create sysfs file" 1197 " fadump_release_mem (%d)\n", rc); 1198 } 1199 return; 1200 } 1201 1202 /* 1203 * Prepare for firmware-assisted dump. 1204 */ 1205 int __init setup_fadump(void) 1206 { 1207 if (!fw_dump.fadump_enabled) 1208 return 0; 1209 1210 if (!fw_dump.fadump_supported) { 1211 printk(KERN_ERR "Firmware-assisted dump is not supported on" 1212 " this hardware\n"); 1213 return 0; 1214 } 1215 1216 fadump_show_config(); 1217 /* 1218 * If dump data is available then see if it is valid and prepare for 1219 * saving it to the disk. 1220 */ 1221 if (fw_dump.dump_active) { 1222 /* 1223 * if dump process fails then invalidate the registration 1224 * and release memory before proceeding for re-registration. 1225 */ 1226 if (fw_dump.ops->fadump_process(&fw_dump) < 0) 1227 fadump_invalidate_release_mem(); 1228 } 1229 /* Initialize the kernel dump memory structure for FAD registration. */ 1230 else if (fw_dump.reserve_dump_area_size) 1231 fw_dump.ops->fadump_init_mem_struct(&fw_dump); 1232 1233 fadump_init_files(); 1234 1235 return 1; 1236 } 1237 subsys_initcall(setup_fadump); 1238