1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/proc/kcore.c kernel ELF core dumper 4 * 5 * Modelled on fs/exec.c:aout_core_dump() 6 * Jeremy Fitzhardinge <jeremy@sw.oz.au> 7 * ELF version written by David Howells <David.Howells@nexor.co.uk> 8 * Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com> 9 * Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com> 10 * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com> 11 */ 12 13 #include <linux/crash_core.h> 14 #include <linux/mm.h> 15 #include <linux/proc_fs.h> 16 #include <linux/kcore.h> 17 #include <linux/user.h> 18 #include <linux/capability.h> 19 #include <linux/elf.h> 20 #include <linux/elfcore.h> 21 #include <linux/notifier.h> 22 #include <linux/vmalloc.h> 23 #include <linux/highmem.h> 24 #include <linux/printk.h> 25 #include <linux/memblock.h> 26 #include <linux/init.h> 27 #include <linux/slab.h> 28 #include <linux/uaccess.h> 29 #include <asm/io.h> 30 #include <linux/list.h> 31 #include <linux/ioport.h> 32 #include <linux/memory.h> 33 #include <linux/sched/task.h> 34 #include <asm/sections.h> 35 #include "internal.h" 36 37 #define CORE_STR "CORE" 38 39 #ifndef ELF_CORE_EFLAGS 40 #define ELF_CORE_EFLAGS 0 41 #endif 42 43 static struct proc_dir_entry *proc_root_kcore; 44 45 46 #ifndef kc_vaddr_to_offset 47 #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) 48 #endif 49 #ifndef kc_offset_to_vaddr 50 #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) 51 #endif 52 53 static LIST_HEAD(kclist_head); 54 static DECLARE_RWSEM(kclist_lock); 55 static int kcore_need_update = 1; 56 57 /* This doesn't grab kclist_lock, so it should only be used at init time. */ 58 void __init kclist_add(struct kcore_list *new, void *addr, size_t size, 59 int type) 60 { 61 new->addr = (unsigned long)addr; 62 new->size = size; 63 new->type = type; 64 65 list_add_tail(&new->list, &kclist_head); 66 } 67 68 static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len, 69 size_t *data_offset) 70 { 71 size_t try, size; 72 struct kcore_list *m; 73 74 *nphdr = 1; /* PT_NOTE */ 75 size = 0; 76 77 list_for_each_entry(m, &kclist_head, list) { 78 try = kc_vaddr_to_offset((size_t)m->addr + m->size); 79 if (try > size) 80 size = try; 81 *nphdr = *nphdr + 1; 82 } 83 84 *phdrs_len = *nphdr * sizeof(struct elf_phdr); 85 *notes_len = (4 * sizeof(struct elf_note) + 86 3 * ALIGN(sizeof(CORE_STR), 4) + 87 VMCOREINFO_NOTE_NAME_BYTES + 88 ALIGN(sizeof(struct elf_prstatus), 4) + 89 ALIGN(sizeof(struct elf_prpsinfo), 4) + 90 ALIGN(arch_task_struct_size, 4) + 91 ALIGN(vmcoreinfo_size, 4)); 92 *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len + 93 *notes_len); 94 return *data_offset + size; 95 } 96 97 #ifdef CONFIG_HIGHMEM 98 /* 99 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory 100 * because memory hole is not as big as !HIGHMEM case. 101 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.) 102 */ 103 static int kcore_ram_list(struct list_head *head) 104 { 105 struct kcore_list *ent; 106 107 ent = kmalloc(sizeof(*ent), GFP_KERNEL); 108 if (!ent) 109 return -ENOMEM; 110 ent->addr = (unsigned long)__va(0); 111 ent->size = max_low_pfn << PAGE_SHIFT; 112 ent->type = KCORE_RAM; 113 list_add(&ent->list, head); 114 return 0; 115 } 116 117 #else /* !CONFIG_HIGHMEM */ 118 119 #ifdef CONFIG_SPARSEMEM_VMEMMAP 120 /* calculate vmemmap's address from given system ram pfn and register it */ 121 static int 122 get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) 123 { 124 unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; 125 unsigned long nr_pages = ent->size >> PAGE_SHIFT; 126 unsigned long start, end; 127 struct kcore_list *vmm, *tmp; 128 129 130 start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK; 131 end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1; 132 end = PAGE_ALIGN(end); 133 /* overlap check (because we have to align page */ 134 list_for_each_entry(tmp, head, list) { 135 if (tmp->type != KCORE_VMEMMAP) 136 continue; 137 if (start < tmp->addr + tmp->size) 138 if (end > tmp->addr) 139 end = tmp->addr; 140 } 141 if (start < end) { 142 vmm = kmalloc(sizeof(*vmm), GFP_KERNEL); 143 if (!vmm) 144 return 0; 145 vmm->addr = start; 146 vmm->size = end - start; 147 vmm->type = KCORE_VMEMMAP; 148 list_add_tail(&vmm->list, head); 149 } 150 return 1; 151 152 } 153 #else 154 static int 155 get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) 156 { 157 return 1; 158 } 159 160 #endif 161 162 static int 163 kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) 164 { 165 struct list_head *head = (struct list_head *)arg; 166 struct kcore_list *ent; 167 struct page *p; 168 169 if (!pfn_valid(pfn)) 170 return 1; 171 172 p = pfn_to_page(pfn); 173 if (!memmap_valid_within(pfn, p, page_zone(p))) 174 return 1; 175 176 ent = kmalloc(sizeof(*ent), GFP_KERNEL); 177 if (!ent) 178 return -ENOMEM; 179 ent->addr = (unsigned long)page_to_virt(p); 180 ent->size = nr_pages << PAGE_SHIFT; 181 182 if (!virt_addr_valid(ent->addr)) 183 goto free_out; 184 185 /* cut not-mapped area. ....from ppc-32 code. */ 186 if (ULONG_MAX - ent->addr < ent->size) 187 ent->size = ULONG_MAX - ent->addr; 188 189 /* 190 * We've already checked virt_addr_valid so we know this address 191 * is a valid pointer, therefore we can check against it to determine 192 * if we need to trim 193 */ 194 if (VMALLOC_START > ent->addr) { 195 if (VMALLOC_START - ent->addr < ent->size) 196 ent->size = VMALLOC_START - ent->addr; 197 } 198 199 ent->type = KCORE_RAM; 200 list_add_tail(&ent->list, head); 201 202 if (!get_sparsemem_vmemmap_info(ent, head)) { 203 list_del(&ent->list); 204 goto free_out; 205 } 206 207 return 0; 208 free_out: 209 kfree(ent); 210 return 1; 211 } 212 213 static int kcore_ram_list(struct list_head *list) 214 { 215 int nid, ret; 216 unsigned long end_pfn; 217 218 /* Not inialized....update now */ 219 /* find out "max pfn" */ 220 end_pfn = 0; 221 for_each_node_state(nid, N_MEMORY) { 222 unsigned long node_end; 223 node_end = node_end_pfn(nid); 224 if (end_pfn < node_end) 225 end_pfn = node_end; 226 } 227 /* scan 0 to max_pfn */ 228 ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private); 229 if (ret) 230 return -ENOMEM; 231 return 0; 232 } 233 #endif /* CONFIG_HIGHMEM */ 234 235 static int kcore_update_ram(void) 236 { 237 LIST_HEAD(list); 238 LIST_HEAD(garbage); 239 int nphdr; 240 size_t phdrs_len, notes_len, data_offset; 241 struct kcore_list *tmp, *pos; 242 int ret = 0; 243 244 down_write(&kclist_lock); 245 if (!xchg(&kcore_need_update, 0)) 246 goto out; 247 248 ret = kcore_ram_list(&list); 249 if (ret) { 250 /* Couldn't get the RAM list, try again next time. */ 251 WRITE_ONCE(kcore_need_update, 1); 252 list_splice_tail(&list, &garbage); 253 goto out; 254 } 255 256 list_for_each_entry_safe(pos, tmp, &kclist_head, list) { 257 if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP) 258 list_move(&pos->list, &garbage); 259 } 260 list_splice_tail(&list, &kclist_head); 261 262 proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, ¬es_len, 263 &data_offset); 264 265 out: 266 up_write(&kclist_lock); 267 list_for_each_entry_safe(pos, tmp, &garbage, list) { 268 list_del(&pos->list); 269 kfree(pos); 270 } 271 return ret; 272 } 273 274 static void append_kcore_note(char *notes, size_t *i, const char *name, 275 unsigned int type, const void *desc, 276 size_t descsz) 277 { 278 struct elf_note *note = (struct elf_note *)¬es[*i]; 279 280 note->n_namesz = strlen(name) + 1; 281 note->n_descsz = descsz; 282 note->n_type = type; 283 *i += sizeof(*note); 284 memcpy(¬es[*i], name, note->n_namesz); 285 *i = ALIGN(*i + note->n_namesz, 4); 286 memcpy(¬es[*i], desc, descsz); 287 *i = ALIGN(*i + descsz, 4); 288 } 289 290 static ssize_t 291 read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) 292 { 293 char *buf = file->private_data; 294 size_t phdrs_offset, notes_offset, data_offset; 295 size_t phdrs_len, notes_len; 296 struct kcore_list *m; 297 size_t tsz; 298 int nphdr; 299 unsigned long start; 300 size_t orig_buflen = buflen; 301 int ret = 0; 302 303 down_read(&kclist_lock); 304 305 get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset); 306 phdrs_offset = sizeof(struct elfhdr); 307 notes_offset = phdrs_offset + phdrs_len; 308 309 /* ELF file header. */ 310 if (buflen && *fpos < sizeof(struct elfhdr)) { 311 struct elfhdr ehdr = { 312 .e_ident = { 313 [EI_MAG0] = ELFMAG0, 314 [EI_MAG1] = ELFMAG1, 315 [EI_MAG2] = ELFMAG2, 316 [EI_MAG3] = ELFMAG3, 317 [EI_CLASS] = ELF_CLASS, 318 [EI_DATA] = ELF_DATA, 319 [EI_VERSION] = EV_CURRENT, 320 [EI_OSABI] = ELF_OSABI, 321 }, 322 .e_type = ET_CORE, 323 .e_machine = ELF_ARCH, 324 .e_version = EV_CURRENT, 325 .e_phoff = sizeof(struct elfhdr), 326 .e_flags = ELF_CORE_EFLAGS, 327 .e_ehsize = sizeof(struct elfhdr), 328 .e_phentsize = sizeof(struct elf_phdr), 329 .e_phnum = nphdr, 330 }; 331 332 tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos); 333 if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) { 334 ret = -EFAULT; 335 goto out; 336 } 337 338 buffer += tsz; 339 buflen -= tsz; 340 *fpos += tsz; 341 } 342 343 /* ELF program headers. */ 344 if (buflen && *fpos < phdrs_offset + phdrs_len) { 345 struct elf_phdr *phdrs, *phdr; 346 347 phdrs = kzalloc(phdrs_len, GFP_KERNEL); 348 if (!phdrs) { 349 ret = -ENOMEM; 350 goto out; 351 } 352 353 phdrs[0].p_type = PT_NOTE; 354 phdrs[0].p_offset = notes_offset; 355 phdrs[0].p_filesz = notes_len; 356 357 phdr = &phdrs[1]; 358 list_for_each_entry(m, &kclist_head, list) { 359 phdr->p_type = PT_LOAD; 360 phdr->p_flags = PF_R | PF_W | PF_X; 361 phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset; 362 if (m->type == KCORE_REMAP) 363 phdr->p_vaddr = (size_t)m->vaddr; 364 else 365 phdr->p_vaddr = (size_t)m->addr; 366 if (m->type == KCORE_RAM || m->type == KCORE_REMAP) 367 phdr->p_paddr = __pa(m->addr); 368 else if (m->type == KCORE_TEXT) 369 phdr->p_paddr = __pa_symbol(m->addr); 370 else 371 phdr->p_paddr = (elf_addr_t)-1; 372 phdr->p_filesz = phdr->p_memsz = m->size; 373 phdr->p_align = PAGE_SIZE; 374 phdr++; 375 } 376 377 tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos); 378 if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset, 379 tsz)) { 380 kfree(phdrs); 381 ret = -EFAULT; 382 goto out; 383 } 384 kfree(phdrs); 385 386 buffer += tsz; 387 buflen -= tsz; 388 *fpos += tsz; 389 } 390 391 /* ELF note segment. */ 392 if (buflen && *fpos < notes_offset + notes_len) { 393 struct elf_prstatus prstatus = {}; 394 struct elf_prpsinfo prpsinfo = { 395 .pr_sname = 'R', 396 .pr_fname = "vmlinux", 397 }; 398 char *notes; 399 size_t i = 0; 400 401 strlcpy(prpsinfo.pr_psargs, saved_command_line, 402 sizeof(prpsinfo.pr_psargs)); 403 404 notes = kzalloc(notes_len, GFP_KERNEL); 405 if (!notes) { 406 ret = -ENOMEM; 407 goto out; 408 } 409 410 append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus, 411 sizeof(prstatus)); 412 append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo, 413 sizeof(prpsinfo)); 414 append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current, 415 arch_task_struct_size); 416 /* 417 * vmcoreinfo_size is mostly constant after init time, but it 418 * can be changed by crash_save_vmcoreinfo(). Racing here with a 419 * panic on another CPU before the machine goes down is insanely 420 * unlikely, but it's better to not leave potential buffer 421 * overflows lying around, regardless. 422 */ 423 append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0, 424 vmcoreinfo_data, 425 min(vmcoreinfo_size, notes_len - i)); 426 427 tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos); 428 if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) { 429 kfree(notes); 430 ret = -EFAULT; 431 goto out; 432 } 433 kfree(notes); 434 435 buffer += tsz; 436 buflen -= tsz; 437 *fpos += tsz; 438 } 439 440 /* 441 * Check to see if our file offset matches with any of 442 * the addresses in the elf_phdr on our list. 443 */ 444 start = kc_offset_to_vaddr(*fpos - data_offset); 445 if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) 446 tsz = buflen; 447 448 m = NULL; 449 while (buflen) { 450 /* 451 * If this is the first iteration or the address is not within 452 * the previous entry, search for a matching entry. 453 */ 454 if (!m || start < m->addr || start >= m->addr + m->size) { 455 list_for_each_entry(m, &kclist_head, list) { 456 if (start >= m->addr && 457 start < m->addr + m->size) 458 break; 459 } 460 } 461 462 if (&m->list == &kclist_head) { 463 if (clear_user(buffer, tsz)) { 464 ret = -EFAULT; 465 goto out; 466 } 467 m = NULL; /* skip the list anchor */ 468 } else if (m->type == KCORE_VMALLOC) { 469 vread(buf, (char *)start, tsz); 470 /* we have to zero-fill user buffer even if no read */ 471 if (copy_to_user(buffer, buf, tsz)) { 472 ret = -EFAULT; 473 goto out; 474 } 475 } else if (m->type == KCORE_USER) { 476 /* User page is handled prior to normal kernel page: */ 477 if (copy_to_user(buffer, (char *)start, tsz)) { 478 ret = -EFAULT; 479 goto out; 480 } 481 } else { 482 if (kern_addr_valid(start)) { 483 /* 484 * Using bounce buffer to bypass the 485 * hardened user copy kernel text checks. 486 */ 487 if (probe_kernel_read(buf, (void *) start, tsz)) { 488 if (clear_user(buffer, tsz)) { 489 ret = -EFAULT; 490 goto out; 491 } 492 } else { 493 if (copy_to_user(buffer, buf, tsz)) { 494 ret = -EFAULT; 495 goto out; 496 } 497 } 498 } else { 499 if (clear_user(buffer, tsz)) { 500 ret = -EFAULT; 501 goto out; 502 } 503 } 504 } 505 buflen -= tsz; 506 *fpos += tsz; 507 buffer += tsz; 508 start += tsz; 509 tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen); 510 } 511 512 out: 513 up_read(&kclist_lock); 514 if (ret) 515 return ret; 516 return orig_buflen - buflen; 517 } 518 519 static int open_kcore(struct inode *inode, struct file *filp) 520 { 521 if (!capable(CAP_SYS_RAWIO)) 522 return -EPERM; 523 524 filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); 525 if (!filp->private_data) 526 return -ENOMEM; 527 528 if (kcore_need_update) 529 kcore_update_ram(); 530 if (i_size_read(inode) != proc_root_kcore->size) { 531 inode_lock(inode); 532 i_size_write(inode, proc_root_kcore->size); 533 inode_unlock(inode); 534 } 535 return 0; 536 } 537 538 static int release_kcore(struct inode *inode, struct file *file) 539 { 540 kfree(file->private_data); 541 return 0; 542 } 543 544 static const struct file_operations proc_kcore_operations = { 545 .read = read_kcore, 546 .open = open_kcore, 547 .release = release_kcore, 548 .llseek = default_llseek, 549 }; 550 551 /* just remember that we have to update kcore */ 552 static int __meminit kcore_callback(struct notifier_block *self, 553 unsigned long action, void *arg) 554 { 555 switch (action) { 556 case MEM_ONLINE: 557 case MEM_OFFLINE: 558 kcore_need_update = 1; 559 break; 560 } 561 return NOTIFY_OK; 562 } 563 564 static struct notifier_block kcore_callback_nb __meminitdata = { 565 .notifier_call = kcore_callback, 566 .priority = 0, 567 }; 568 569 static struct kcore_list kcore_vmalloc; 570 571 #ifdef CONFIG_ARCH_PROC_KCORE_TEXT 572 static struct kcore_list kcore_text; 573 /* 574 * If defined, special segment is used for mapping kernel text instead of 575 * direct-map area. We need to create special TEXT section. 576 */ 577 static void __init proc_kcore_text_init(void) 578 { 579 kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT); 580 } 581 #else 582 static void __init proc_kcore_text_init(void) 583 { 584 } 585 #endif 586 587 #if defined(CONFIG_MODULES) && defined(MODULES_VADDR) 588 /* 589 * MODULES_VADDR has no intersection with VMALLOC_ADDR. 590 */ 591 struct kcore_list kcore_modules; 592 static void __init add_modules_range(void) 593 { 594 if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { 595 kclist_add(&kcore_modules, (void *)MODULES_VADDR, 596 MODULES_END - MODULES_VADDR, KCORE_VMALLOC); 597 } 598 } 599 #else 600 static void __init add_modules_range(void) 601 { 602 } 603 #endif 604 605 static int __init proc_kcore_init(void) 606 { 607 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, 608 &proc_kcore_operations); 609 if (!proc_root_kcore) { 610 pr_err("couldn't create /proc/kcore\n"); 611 return 0; /* Always returns 0. */ 612 } 613 /* Store text area if it's special */ 614 proc_kcore_text_init(); 615 /* Store vmalloc area */ 616 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 617 VMALLOC_END - VMALLOC_START, KCORE_VMALLOC); 618 add_modules_range(); 619 /* Store direct-map area from physical memory map */ 620 kcore_update_ram(); 621 register_hotmemory_notifier(&kcore_callback_nb); 622 623 return 0; 624 } 625 fs_initcall(proc_kcore_init); 626