1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * crash.c - kernel crash support code. 4 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7 #include <linux/buildid.h> 8 #include <linux/crash_core.h> 9 #include <linux/init.h> 10 #include <linux/utsname.h> 11 #include <linux/vmalloc.h> 12 #include <linux/sizes.h> 13 #include <linux/kexec.h> 14 #include <linux/memory.h> 15 #include <linux/cpuhotplug.h> 16 17 #include <asm/page.h> 18 #include <asm/sections.h> 19 20 #include <crypto/sha1.h> 21 22 #include "kallsyms_internal.h" 23 #include "kexec_internal.h" 24 25 /* Per cpu memory for storing cpu states in case of system crash. */ 26 note_buf_t __percpu *crash_notes; 27 28 /* vmcoreinfo stuff */ 29 unsigned char *vmcoreinfo_data; 30 size_t vmcoreinfo_size; 31 u32 *vmcoreinfo_note; 32 33 /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ 34 static unsigned char *vmcoreinfo_data_safecopy; 35 36 /* 37 * parsing the "crashkernel" commandline 38 * 39 * this code is intended to be called from architecture specific code 40 */ 41 42 43 /* 44 * This function parses command lines in the format 45 * 46 * crashkernel=ramsize-range:size[,...][@offset] 47 * 48 * The function returns 0 on success and -EINVAL on failure. 49 */ 50 static int __init parse_crashkernel_mem(char *cmdline, 51 unsigned long long system_ram, 52 unsigned long long *crash_size, 53 unsigned long long *crash_base) 54 { 55 char *cur = cmdline, *tmp; 56 unsigned long long total_mem = system_ram; 57 58 /* 59 * Firmware sometimes reserves some memory regions for its own use, 60 * so the system memory size is less than the actual physical memory 61 * size. Work around this by rounding up the total size to 128M, 62 * which is enough for most test cases. 63 */ 64 total_mem = roundup(total_mem, SZ_128M); 65 66 /* for each entry of the comma-separated list */ 67 do { 68 unsigned long long start, end = ULLONG_MAX, size; 69 70 /* get the start of the range */ 71 start = memparse(cur, &tmp); 72 if (cur == tmp) { 73 pr_warn("crashkernel: Memory value expected\n"); 74 return -EINVAL; 75 } 76 cur = tmp; 77 if (*cur != '-') { 78 pr_warn("crashkernel: '-' expected\n"); 79 return -EINVAL; 80 } 81 cur++; 82 83 /* if no ':' is here, than we read the end */ 84 if (*cur != ':') { 85 end = memparse(cur, &tmp); 86 if (cur == tmp) { 87 pr_warn("crashkernel: Memory value expected\n"); 88 return -EINVAL; 89 } 90 cur = tmp; 91 if (end <= start) { 92 pr_warn("crashkernel: end <= start\n"); 93 return -EINVAL; 94 } 95 } 96 97 if (*cur != ':') { 98 pr_warn("crashkernel: ':' expected\n"); 99 return -EINVAL; 100 } 101 cur++; 102 103 size = memparse(cur, &tmp); 104 if (cur == tmp) { 105 pr_warn("Memory value expected\n"); 106 return -EINVAL; 107 } 108 cur = tmp; 109 if (size >= total_mem) { 110 pr_warn("crashkernel: invalid size\n"); 111 return -EINVAL; 112 } 113 114 /* match ? */ 115 if (total_mem >= start && total_mem < end) { 116 *crash_size = size; 117 break; 118 } 119 } while (*cur++ == ','); 120 121 if (*crash_size > 0) { 122 while (*cur && *cur != ' ' && *cur != '@') 123 cur++; 124 if (*cur == '@') { 125 cur++; 126 *crash_base = memparse(cur, &tmp); 127 if (cur == tmp) { 128 pr_warn("Memory value expected after '@'\n"); 129 return -EINVAL; 130 } 131 } 132 } else 133 pr_info("crashkernel size resulted in zero bytes\n"); 134 135 return 0; 136 } 137 138 /* 139 * That function parses "simple" (old) crashkernel command lines like 140 * 141 * crashkernel=size[@offset] 142 * 143 * It returns 0 on success and -EINVAL on failure. 144 */ 145 static int __init parse_crashkernel_simple(char *cmdline, 146 unsigned long long *crash_size, 147 unsigned long long *crash_base) 148 { 149 char *cur = cmdline; 150 151 *crash_size = memparse(cmdline, &cur); 152 if (cmdline == cur) { 153 pr_warn("crashkernel: memory value expected\n"); 154 return -EINVAL; 155 } 156 157 if (*cur == '@') 158 *crash_base = memparse(cur+1, &cur); 159 else if (*cur != ' ' && *cur != '\0') { 160 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 161 return -EINVAL; 162 } 163 164 return 0; 165 } 166 167 #define SUFFIX_HIGH 0 168 #define SUFFIX_LOW 1 169 #define SUFFIX_NULL 2 170 static __initdata char *suffix_tbl[] = { 171 [SUFFIX_HIGH] = ",high", 172 [SUFFIX_LOW] = ",low", 173 [SUFFIX_NULL] = NULL, 174 }; 175 176 /* 177 * That function parses "suffix" crashkernel command lines like 178 * 179 * crashkernel=size,[high|low] 180 * 181 * It returns 0 on success and -EINVAL on failure. 182 */ 183 static int __init parse_crashkernel_suffix(char *cmdline, 184 unsigned long long *crash_size, 185 const char *suffix) 186 { 187 char *cur = cmdline; 188 189 *crash_size = memparse(cmdline, &cur); 190 if (cmdline == cur) { 191 pr_warn("crashkernel: memory value expected\n"); 192 return -EINVAL; 193 } 194 195 /* check with suffix */ 196 if (strncmp(cur, suffix, strlen(suffix))) { 197 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 198 return -EINVAL; 199 } 200 cur += strlen(suffix); 201 if (*cur != ' ' && *cur != '\0') { 202 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 203 return -EINVAL; 204 } 205 206 return 0; 207 } 208 209 static __init char *get_last_crashkernel(char *cmdline, 210 const char *name, 211 const char *suffix) 212 { 213 char *p = cmdline, *ck_cmdline = NULL; 214 215 /* find crashkernel and use the last one if there are more */ 216 p = strstr(p, name); 217 while (p) { 218 char *end_p = strchr(p, ' '); 219 char *q; 220 221 if (!end_p) 222 end_p = p + strlen(p); 223 224 if (!suffix) { 225 int i; 226 227 /* skip the one with any known suffix */ 228 for (i = 0; suffix_tbl[i]; i++) { 229 q = end_p - strlen(suffix_tbl[i]); 230 if (!strncmp(q, suffix_tbl[i], 231 strlen(suffix_tbl[i]))) 232 goto next; 233 } 234 ck_cmdline = p; 235 } else { 236 q = end_p - strlen(suffix); 237 if (!strncmp(q, suffix, strlen(suffix))) 238 ck_cmdline = p; 239 } 240 next: 241 p = strstr(p+1, name); 242 } 243 244 return ck_cmdline; 245 } 246 247 static int __init __parse_crashkernel(char *cmdline, 248 unsigned long long system_ram, 249 unsigned long long *crash_size, 250 unsigned long long *crash_base, 251 const char *name, 252 const char *suffix) 253 { 254 char *first_colon, *first_space; 255 char *ck_cmdline; 256 257 BUG_ON(!crash_size || !crash_base); 258 *crash_size = 0; 259 *crash_base = 0; 260 261 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 262 if (!ck_cmdline) 263 return -ENOENT; 264 265 ck_cmdline += strlen(name); 266 267 if (suffix) 268 return parse_crashkernel_suffix(ck_cmdline, crash_size, 269 suffix); 270 /* 271 * if the commandline contains a ':', then that's the extended 272 * syntax -- if not, it must be the classic syntax 273 */ 274 first_colon = strchr(ck_cmdline, ':'); 275 first_space = strchr(ck_cmdline, ' '); 276 if (first_colon && (!first_space || first_colon < first_space)) 277 return parse_crashkernel_mem(ck_cmdline, system_ram, 278 crash_size, crash_base); 279 280 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 281 } 282 283 /* 284 * That function is the entry point for command line parsing and should be 285 * called from the arch-specific code. 286 */ 287 int __init parse_crashkernel(char *cmdline, 288 unsigned long long system_ram, 289 unsigned long long *crash_size, 290 unsigned long long *crash_base) 291 { 292 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 293 "crashkernel=", NULL); 294 } 295 296 int __init parse_crashkernel_high(char *cmdline, 297 unsigned long long system_ram, 298 unsigned long long *crash_size, 299 unsigned long long *crash_base) 300 { 301 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 302 "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 303 } 304 305 int __init parse_crashkernel_low(char *cmdline, 306 unsigned long long system_ram, 307 unsigned long long *crash_size, 308 unsigned long long *crash_base) 309 { 310 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 311 "crashkernel=", suffix_tbl[SUFFIX_LOW]); 312 } 313 314 /* 315 * Add a dummy early_param handler to mark crashkernel= as a known command line 316 * parameter and suppress incorrect warnings in init/main.c. 317 */ 318 static int __init parse_crashkernel_dummy(char *arg) 319 { 320 return 0; 321 } 322 early_param("crashkernel", parse_crashkernel_dummy); 323 324 int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, 325 void **addr, unsigned long *sz) 326 { 327 Elf64_Ehdr *ehdr; 328 Elf64_Phdr *phdr; 329 unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; 330 unsigned char *buf; 331 unsigned int cpu, i; 332 unsigned long long notes_addr; 333 unsigned long mstart, mend; 334 335 /* extra phdr for vmcoreinfo ELF note */ 336 nr_phdr = nr_cpus + 1; 337 nr_phdr += mem->nr_ranges; 338 339 /* 340 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping 341 * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). 342 * I think this is required by tools like gdb. So same physical 343 * memory will be mapped in two ELF headers. One will contain kernel 344 * text virtual addresses and other will have __va(physical) addresses. 345 */ 346 347 nr_phdr++; 348 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); 349 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); 350 351 buf = vzalloc(elf_sz); 352 if (!buf) 353 return -ENOMEM; 354 355 ehdr = (Elf64_Ehdr *)buf; 356 phdr = (Elf64_Phdr *)(ehdr + 1); 357 memcpy(ehdr->e_ident, ELFMAG, SELFMAG); 358 ehdr->e_ident[EI_CLASS] = ELFCLASS64; 359 ehdr->e_ident[EI_DATA] = ELFDATA2LSB; 360 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 361 ehdr->e_ident[EI_OSABI] = ELF_OSABI; 362 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); 363 ehdr->e_type = ET_CORE; 364 ehdr->e_machine = ELF_ARCH; 365 ehdr->e_version = EV_CURRENT; 366 ehdr->e_phoff = sizeof(Elf64_Ehdr); 367 ehdr->e_ehsize = sizeof(Elf64_Ehdr); 368 ehdr->e_phentsize = sizeof(Elf64_Phdr); 369 370 /* Prepare one phdr of type PT_NOTE for each possible CPU */ 371 for_each_possible_cpu(cpu) { 372 phdr->p_type = PT_NOTE; 373 notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); 374 phdr->p_offset = phdr->p_paddr = notes_addr; 375 phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); 376 (ehdr->e_phnum)++; 377 phdr++; 378 } 379 380 /* Prepare one PT_NOTE header for vmcoreinfo */ 381 phdr->p_type = PT_NOTE; 382 phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); 383 phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; 384 (ehdr->e_phnum)++; 385 phdr++; 386 387 /* Prepare PT_LOAD type program header for kernel text region */ 388 if (need_kernel_map) { 389 phdr->p_type = PT_LOAD; 390 phdr->p_flags = PF_R|PF_W|PF_X; 391 phdr->p_vaddr = (unsigned long) _text; 392 phdr->p_filesz = phdr->p_memsz = _end - _text; 393 phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); 394 ehdr->e_phnum++; 395 phdr++; 396 } 397 398 /* Go through all the ranges in mem->ranges[] and prepare phdr */ 399 for (i = 0; i < mem->nr_ranges; i++) { 400 mstart = mem->ranges[i].start; 401 mend = mem->ranges[i].end; 402 403 phdr->p_type = PT_LOAD; 404 phdr->p_flags = PF_R|PF_W|PF_X; 405 phdr->p_offset = mstart; 406 407 phdr->p_paddr = mstart; 408 phdr->p_vaddr = (unsigned long) __va(mstart); 409 phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; 410 phdr->p_align = 0; 411 ehdr->e_phnum++; 412 pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", 413 phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, 414 ehdr->e_phnum, phdr->p_offset); 415 phdr++; 416 } 417 418 *addr = buf; 419 *sz = elf_sz; 420 return 0; 421 } 422 423 int crash_exclude_mem_range(struct crash_mem *mem, 424 unsigned long long mstart, unsigned long long mend) 425 { 426 int i, j; 427 unsigned long long start, end, p_start, p_end; 428 struct range temp_range = {0, 0}; 429 430 for (i = 0; i < mem->nr_ranges; i++) { 431 start = mem->ranges[i].start; 432 end = mem->ranges[i].end; 433 p_start = mstart; 434 p_end = mend; 435 436 if (mstart > end || mend < start) 437 continue; 438 439 /* Truncate any area outside of range */ 440 if (mstart < start) 441 p_start = start; 442 if (mend > end) 443 p_end = end; 444 445 /* Found completely overlapping range */ 446 if (p_start == start && p_end == end) { 447 mem->ranges[i].start = 0; 448 mem->ranges[i].end = 0; 449 if (i < mem->nr_ranges - 1) { 450 /* Shift rest of the ranges to left */ 451 for (j = i; j < mem->nr_ranges - 1; j++) { 452 mem->ranges[j].start = 453 mem->ranges[j+1].start; 454 mem->ranges[j].end = 455 mem->ranges[j+1].end; 456 } 457 458 /* 459 * Continue to check if there are another overlapping ranges 460 * from the current position because of shifting the above 461 * mem ranges. 462 */ 463 i--; 464 mem->nr_ranges--; 465 continue; 466 } 467 mem->nr_ranges--; 468 return 0; 469 } 470 471 if (p_start > start && p_end < end) { 472 /* Split original range */ 473 mem->ranges[i].end = p_start - 1; 474 temp_range.start = p_end + 1; 475 temp_range.end = end; 476 } else if (p_start != start) 477 mem->ranges[i].end = p_start - 1; 478 else 479 mem->ranges[i].start = p_end + 1; 480 break; 481 } 482 483 /* If a split happened, add the split to array */ 484 if (!temp_range.end) 485 return 0; 486 487 /* Split happened */ 488 if (i == mem->max_nr_ranges - 1) 489 return -ENOMEM; 490 491 /* Location where new range should go */ 492 j = i + 1; 493 if (j < mem->nr_ranges) { 494 /* Move over all ranges one slot towards the end */ 495 for (i = mem->nr_ranges - 1; i >= j; i--) 496 mem->ranges[i + 1] = mem->ranges[i]; 497 } 498 499 mem->ranges[j].start = temp_range.start; 500 mem->ranges[j].end = temp_range.end; 501 mem->nr_ranges++; 502 return 0; 503 } 504 505 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, 506 void *data, size_t data_len) 507 { 508 struct elf_note *note = (struct elf_note *)buf; 509 510 note->n_namesz = strlen(name) + 1; 511 note->n_descsz = data_len; 512 note->n_type = type; 513 buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); 514 memcpy(buf, name, note->n_namesz); 515 buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); 516 memcpy(buf, data, data_len); 517 buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); 518 519 return buf; 520 } 521 522 void final_note(Elf_Word *buf) 523 { 524 memset(buf, 0, sizeof(struct elf_note)); 525 } 526 527 static void update_vmcoreinfo_note(void) 528 { 529 u32 *buf = vmcoreinfo_note; 530 531 if (!vmcoreinfo_size) 532 return; 533 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 534 vmcoreinfo_size); 535 final_note(buf); 536 } 537 538 void crash_update_vmcoreinfo_safecopy(void *ptr) 539 { 540 if (ptr) 541 memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); 542 543 vmcoreinfo_data_safecopy = ptr; 544 } 545 546 void crash_save_vmcoreinfo(void) 547 { 548 if (!vmcoreinfo_note) 549 return; 550 551 /* Use the safe copy to generate vmcoreinfo note if have */ 552 if (vmcoreinfo_data_safecopy) 553 vmcoreinfo_data = vmcoreinfo_data_safecopy; 554 555 vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); 556 update_vmcoreinfo_note(); 557 } 558 559 void vmcoreinfo_append_str(const char *fmt, ...) 560 { 561 va_list args; 562 char buf[0x50]; 563 size_t r; 564 565 va_start(args, fmt); 566 r = vscnprintf(buf, sizeof(buf), fmt, args); 567 va_end(args); 568 569 r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); 570 571 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 572 573 vmcoreinfo_size += r; 574 575 WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES, 576 "vmcoreinfo data exceeds allocated size, truncating"); 577 } 578 579 /* 580 * provide an empty default implementation here -- architecture 581 * code may override this 582 */ 583 void __weak arch_crash_save_vmcoreinfo(void) 584 {} 585 586 phys_addr_t __weak paddr_vmcoreinfo_note(void) 587 { 588 return __pa(vmcoreinfo_note); 589 } 590 EXPORT_SYMBOL(paddr_vmcoreinfo_note); 591 592 static int __init crash_save_vmcoreinfo_init(void) 593 { 594 vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); 595 if (!vmcoreinfo_data) { 596 pr_warn("Memory allocation for vmcoreinfo_data failed\n"); 597 return -ENOMEM; 598 } 599 600 vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, 601 GFP_KERNEL | __GFP_ZERO); 602 if (!vmcoreinfo_note) { 603 free_page((unsigned long)vmcoreinfo_data); 604 vmcoreinfo_data = NULL; 605 pr_warn("Memory allocation for vmcoreinfo_note failed\n"); 606 return -ENOMEM; 607 } 608 609 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 610 VMCOREINFO_BUILD_ID(); 611 VMCOREINFO_PAGESIZE(PAGE_SIZE); 612 613 VMCOREINFO_SYMBOL(init_uts_ns); 614 VMCOREINFO_OFFSET(uts_namespace, name); 615 VMCOREINFO_SYMBOL(node_online_map); 616 #ifdef CONFIG_MMU 617 VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); 618 #endif 619 VMCOREINFO_SYMBOL(_stext); 620 VMCOREINFO_SYMBOL(vmap_area_list); 621 622 #ifndef CONFIG_NUMA 623 VMCOREINFO_SYMBOL(mem_map); 624 VMCOREINFO_SYMBOL(contig_page_data); 625 #endif 626 #ifdef CONFIG_SPARSEMEM 627 VMCOREINFO_SYMBOL_ARRAY(mem_section); 628 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 629 VMCOREINFO_STRUCT_SIZE(mem_section); 630 VMCOREINFO_OFFSET(mem_section, section_mem_map); 631 VMCOREINFO_NUMBER(SECTION_SIZE_BITS); 632 VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); 633 #endif 634 VMCOREINFO_STRUCT_SIZE(page); 635 VMCOREINFO_STRUCT_SIZE(pglist_data); 636 VMCOREINFO_STRUCT_SIZE(zone); 637 VMCOREINFO_STRUCT_SIZE(free_area); 638 VMCOREINFO_STRUCT_SIZE(list_head); 639 VMCOREINFO_SIZE(nodemask_t); 640 VMCOREINFO_OFFSET(page, flags); 641 VMCOREINFO_OFFSET(page, _refcount); 642 VMCOREINFO_OFFSET(page, mapping); 643 VMCOREINFO_OFFSET(page, lru); 644 VMCOREINFO_OFFSET(page, _mapcount); 645 VMCOREINFO_OFFSET(page, private); 646 VMCOREINFO_OFFSET(page, compound_head); 647 VMCOREINFO_OFFSET(pglist_data, node_zones); 648 VMCOREINFO_OFFSET(pglist_data, nr_zones); 649 #ifdef CONFIG_FLATMEM 650 VMCOREINFO_OFFSET(pglist_data, node_mem_map); 651 #endif 652 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 653 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 654 VMCOREINFO_OFFSET(pglist_data, node_id); 655 VMCOREINFO_OFFSET(zone, free_area); 656 VMCOREINFO_OFFSET(zone, vm_stat); 657 VMCOREINFO_OFFSET(zone, spanned_pages); 658 VMCOREINFO_OFFSET(free_area, free_list); 659 VMCOREINFO_OFFSET(list_head, next); 660 VMCOREINFO_OFFSET(list_head, prev); 661 VMCOREINFO_OFFSET(vmap_area, va_start); 662 VMCOREINFO_OFFSET(vmap_area, list); 663 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER + 1); 664 log_buf_vmcoreinfo_setup(); 665 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 666 VMCOREINFO_NUMBER(NR_FREE_PAGES); 667 VMCOREINFO_NUMBER(PG_lru); 668 VMCOREINFO_NUMBER(PG_private); 669 VMCOREINFO_NUMBER(PG_swapcache); 670 VMCOREINFO_NUMBER(PG_swapbacked); 671 VMCOREINFO_NUMBER(PG_slab); 672 #ifdef CONFIG_MEMORY_FAILURE 673 VMCOREINFO_NUMBER(PG_hwpoison); 674 #endif 675 VMCOREINFO_NUMBER(PG_head_mask); 676 #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) 677 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 678 #ifdef CONFIG_HUGETLB_PAGE 679 VMCOREINFO_NUMBER(PG_hugetlb); 680 #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) 681 VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); 682 #endif 683 684 #ifdef CONFIG_KALLSYMS 685 VMCOREINFO_SYMBOL(kallsyms_names); 686 VMCOREINFO_SYMBOL(kallsyms_num_syms); 687 VMCOREINFO_SYMBOL(kallsyms_token_table); 688 VMCOREINFO_SYMBOL(kallsyms_token_index); 689 #ifdef CONFIG_KALLSYMS_BASE_RELATIVE 690 VMCOREINFO_SYMBOL(kallsyms_offsets); 691 VMCOREINFO_SYMBOL(kallsyms_relative_base); 692 #else 693 VMCOREINFO_SYMBOL(kallsyms_addresses); 694 #endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ 695 #endif /* CONFIG_KALLSYMS */ 696 697 arch_crash_save_vmcoreinfo(); 698 update_vmcoreinfo_note(); 699 700 return 0; 701 } 702 703 subsys_initcall(crash_save_vmcoreinfo_init); 704 705 static int __init crash_notes_memory_init(void) 706 { 707 /* Allocate memory for saving cpu registers. */ 708 size_t size, align; 709 710 /* 711 * crash_notes could be allocated across 2 vmalloc pages when percpu 712 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc 713 * pages are also on 2 continuous physical pages. In this case the 714 * 2nd part of crash_notes in 2nd page could be lost since only the 715 * starting address and size of crash_notes are exported through sysfs. 716 * Here round up the size of crash_notes to the nearest power of two 717 * and pass it to __alloc_percpu as align value. This can make sure 718 * crash_notes is allocated inside one physical page. 719 */ 720 size = sizeof(note_buf_t); 721 align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); 722 723 /* 724 * Break compile if size is bigger than PAGE_SIZE since crash_notes 725 * definitely will be in 2 pages with that. 726 */ 727 BUILD_BUG_ON(size > PAGE_SIZE); 728 729 crash_notes = __alloc_percpu(size, align); 730 if (!crash_notes) { 731 pr_warn("Memory allocation for saving cpu register states failed\n"); 732 return -ENOMEM; 733 } 734 return 0; 735 } 736 subsys_initcall(crash_notes_memory_init); 737 738 #ifdef CONFIG_CRASH_HOTPLUG 739 #undef pr_fmt 740 #define pr_fmt(fmt) "crash hp: " fmt 741 742 /* 743 * Different than kexec/kdump loading/unloading/jumping/shrinking which 744 * usually rarely happen, there will be many crash hotplug events notified 745 * during one short period, e.g one memory board is hot added and memory 746 * regions are online. So mutex lock __crash_hotplug_lock is used to 747 * serialize the crash hotplug handling specifically. 748 */ 749 DEFINE_MUTEX(__crash_hotplug_lock); 750 #define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock) 751 #define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock) 752 753 /* 754 * This routine utilized when the crash_hotplug sysfs node is read. 755 * It reflects the kernel's ability/permission to update the crash 756 * elfcorehdr directly. 757 */ 758 int crash_check_update_elfcorehdr(void) 759 { 760 int rc = 0; 761 762 crash_hotplug_lock(); 763 /* Obtain lock while reading crash information */ 764 if (!kexec_trylock()) { 765 pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); 766 crash_hotplug_unlock(); 767 return 0; 768 } 769 if (kexec_crash_image) { 770 if (kexec_crash_image->file_mode) 771 rc = 1; 772 else 773 rc = kexec_crash_image->update_elfcorehdr; 774 } 775 /* Release lock now that update complete */ 776 kexec_unlock(); 777 crash_hotplug_unlock(); 778 779 return rc; 780 } 781 782 /* 783 * To accurately reflect hot un/plug changes of cpu and memory resources 784 * (including onling and offlining of those resources), the elfcorehdr 785 * (which is passed to the crash kernel via the elfcorehdr= parameter) 786 * must be updated with the new list of CPUs and memories. 787 * 788 * In order to make changes to elfcorehdr, two conditions are needed: 789 * First, the segment containing the elfcorehdr must be large enough 790 * to permit a growing number of resources; the elfcorehdr memory size 791 * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES. 792 * Second, purgatory must explicitly exclude the elfcorehdr from the 793 * list of segments it checks (since the elfcorehdr changes and thus 794 * would require an update to purgatory itself to update the digest). 795 */ 796 static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu) 797 { 798 struct kimage *image; 799 800 crash_hotplug_lock(); 801 /* Obtain lock while changing crash information */ 802 if (!kexec_trylock()) { 803 pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); 804 crash_hotplug_unlock(); 805 return; 806 } 807 808 /* Check kdump is not loaded */ 809 if (!kexec_crash_image) 810 goto out; 811 812 image = kexec_crash_image; 813 814 /* Check that updating elfcorehdr is permitted */ 815 if (!(image->file_mode || image->update_elfcorehdr)) 816 goto out; 817 818 if (hp_action == KEXEC_CRASH_HP_ADD_CPU || 819 hp_action == KEXEC_CRASH_HP_REMOVE_CPU) 820 pr_debug("hp_action %u, cpu %u\n", hp_action, cpu); 821 else 822 pr_debug("hp_action %u\n", hp_action); 823 824 /* 825 * The elfcorehdr_index is set to -1 when the struct kimage 826 * is allocated. Find the segment containing the elfcorehdr, 827 * if not already found. 828 */ 829 if (image->elfcorehdr_index < 0) { 830 unsigned long mem; 831 unsigned char *ptr; 832 unsigned int n; 833 834 for (n = 0; n < image->nr_segments; n++) { 835 mem = image->segment[n].mem; 836 ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); 837 if (ptr) { 838 /* The segment containing elfcorehdr */ 839 if (memcmp(ptr, ELFMAG, SELFMAG) == 0) 840 image->elfcorehdr_index = (int)n; 841 kunmap_local(ptr); 842 } 843 } 844 } 845 846 if (image->elfcorehdr_index < 0) { 847 pr_err("unable to locate elfcorehdr segment"); 848 goto out; 849 } 850 851 /* Needed in order for the segments to be updated */ 852 arch_kexec_unprotect_crashkres(); 853 854 /* Differentiate between normal load and hotplug update */ 855 image->hp_action = hp_action; 856 857 /* Now invoke arch-specific update handler */ 858 arch_crash_handle_hotplug_event(image); 859 860 /* No longer handling a hotplug event */ 861 image->hp_action = KEXEC_CRASH_HP_NONE; 862 image->elfcorehdr_updated = true; 863 864 /* Change back to read-only */ 865 arch_kexec_protect_crashkres(); 866 867 /* Errors in the callback is not a reason to rollback state */ 868 out: 869 /* Release lock now that update complete */ 870 kexec_unlock(); 871 crash_hotplug_unlock(); 872 } 873 874 static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v) 875 { 876 switch (val) { 877 case MEM_ONLINE: 878 crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 879 KEXEC_CRASH_HP_INVALID_CPU); 880 break; 881 882 case MEM_OFFLINE: 883 crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 884 KEXEC_CRASH_HP_INVALID_CPU); 885 break; 886 } 887 return NOTIFY_OK; 888 } 889 890 static struct notifier_block crash_memhp_nb = { 891 .notifier_call = crash_memhp_notifier, 892 .priority = 0 893 }; 894 895 static int crash_cpuhp_online(unsigned int cpu) 896 { 897 crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu); 898 return 0; 899 } 900 901 static int crash_cpuhp_offline(unsigned int cpu) 902 { 903 crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu); 904 return 0; 905 } 906 907 static int __init crash_hotplug_init(void) 908 { 909 int result = 0; 910 911 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 912 register_memory_notifier(&crash_memhp_nb); 913 914 if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { 915 result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN, 916 "crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline); 917 } 918 919 return result; 920 } 921 922 subsys_initcall(crash_hotplug_init); 923 #endif 924