1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/proc/kcore.c kernel ELF core dumper 4 * 5 * Modelled on fs/exec.c:aout_core_dump() 6 * Jeremy Fitzhardinge <jeremy@sw.oz.au> 7 * ELF version written by David Howells <David.Howells@nexor.co.uk> 8 * Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com> 9 * Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com> 10 * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com> 11 */ 12 13 #include <linux/crash_core.h> 14 #include <linux/mm.h> 15 #include <linux/proc_fs.h> 16 #include <linux/kcore.h> 17 #include <linux/user.h> 18 #include <linux/capability.h> 19 #include <linux/elf.h> 20 #include <linux/elfcore.h> 21 #include <linux/notifier.h> 22 #include <linux/vmalloc.h> 23 #include <linux/highmem.h> 24 #include <linux/printk.h> 25 #include <linux/bootmem.h> 26 #include <linux/init.h> 27 #include <linux/slab.h> 28 #include <linux/uaccess.h> 29 #include <asm/io.h> 30 #include <linux/list.h> 31 #include <linux/ioport.h> 32 #include <linux/memory.h> 33 #include <linux/sched/task.h> 34 #include <asm/sections.h> 35 #include "internal.h" 36 37 #define CORE_STR "CORE" 38 39 #ifndef ELF_CORE_EFLAGS 40 #define ELF_CORE_EFLAGS 0 41 #endif 42 43 static struct proc_dir_entry *proc_root_kcore; 44 45 46 #ifndef kc_vaddr_to_offset 47 #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) 48 #endif 49 #ifndef kc_offset_to_vaddr 50 #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) 51 #endif 52 53 static LIST_HEAD(kclist_head); 54 static DECLARE_RWSEM(kclist_lock); 55 static int kcore_need_update = 1; 56 57 /* This doesn't grab kclist_lock, so it should only be used at init time. */ 58 void __init kclist_add(struct kcore_list *new, void *addr, size_t size, 59 int type) 60 { 61 new->addr = (unsigned long)addr; 62 new->size = size; 63 new->type = type; 64 65 list_add_tail(&new->list, &kclist_head); 66 } 67 68 static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len, 69 size_t *data_offset) 70 { 71 size_t try, size; 72 struct kcore_list *m; 73 74 *nphdr = 1; /* PT_NOTE */ 75 size = 0; 76 77 list_for_each_entry(m, &kclist_head, list) { 78 try = kc_vaddr_to_offset((size_t)m->addr + m->size); 79 if (try > size) 80 size = try; 81 *nphdr = *nphdr + 1; 82 } 83 84 *phdrs_len = *nphdr * sizeof(struct elf_phdr); 85 *notes_len = (4 * sizeof(struct elf_note) + 86 3 * ALIGN(sizeof(CORE_STR), 4) + 87 VMCOREINFO_NOTE_NAME_BYTES + 88 ALIGN(sizeof(struct elf_prstatus), 4) + 89 ALIGN(sizeof(struct elf_prpsinfo), 4) + 90 ALIGN(arch_task_struct_size, 4) + 91 ALIGN(vmcoreinfo_size, 4)); 92 *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len + 93 *notes_len); 94 return *data_offset + size; 95 } 96 97 #ifdef CONFIG_HIGHMEM 98 /* 99 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory 100 * because memory hole is not as big as !HIGHMEM case. 101 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.) 102 */ 103 static int kcore_ram_list(struct list_head *head) 104 { 105 struct kcore_list *ent; 106 107 ent = kmalloc(sizeof(*ent), GFP_KERNEL); 108 if (!ent) 109 return -ENOMEM; 110 ent->addr = (unsigned long)__va(0); 111 ent->size = max_low_pfn << PAGE_SHIFT; 112 ent->type = KCORE_RAM; 113 list_add(&ent->list, head); 114 return 0; 115 } 116 117 #else /* !CONFIG_HIGHMEM */ 118 119 #ifdef CONFIG_SPARSEMEM_VMEMMAP 120 /* calculate vmemmap's address from given system ram pfn and register it */ 121 static int 122 get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) 123 { 124 unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; 125 unsigned long nr_pages = ent->size >> PAGE_SHIFT; 126 unsigned long start, end; 127 struct kcore_list *vmm, *tmp; 128 129 130 start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK; 131 end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1; 132 end = PAGE_ALIGN(end); 133 /* overlap check (because we have to align page */ 134 list_for_each_entry(tmp, head, list) { 135 if (tmp->type != KCORE_VMEMMAP) 136 continue; 137 if (start < tmp->addr + tmp->size) 138 if (end > tmp->addr) 139 end = tmp->addr; 140 } 141 if (start < end) { 142 vmm = kmalloc(sizeof(*vmm), GFP_KERNEL); 143 if (!vmm) 144 return 0; 145 vmm->addr = start; 146 vmm->size = end - start; 147 vmm->type = KCORE_VMEMMAP; 148 list_add_tail(&vmm->list, head); 149 } 150 return 1; 151 152 } 153 #else 154 static int 155 get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) 156 { 157 return 1; 158 } 159 160 #endif 161 162 static int 163 kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) 164 { 165 struct list_head *head = (struct list_head *)arg; 166 struct kcore_list *ent; 167 struct page *p; 168 169 if (!pfn_valid(pfn)) 170 return 1; 171 172 p = pfn_to_page(pfn); 173 if (!memmap_valid_within(pfn, p, page_zone(p))) 174 return 1; 175 176 ent = kmalloc(sizeof(*ent), GFP_KERNEL); 177 if (!ent) 178 return -ENOMEM; 179 ent->addr = (unsigned long)page_to_virt(p); 180 ent->size = nr_pages << PAGE_SHIFT; 181 182 if (!virt_addr_valid(ent->addr)) 183 goto free_out; 184 185 /* cut not-mapped area. ....from ppc-32 code. */ 186 if (ULONG_MAX - ent->addr < ent->size) 187 ent->size = ULONG_MAX - ent->addr; 188 189 /* 190 * We've already checked virt_addr_valid so we know this address 191 * is a valid pointer, therefore we can check against it to determine 192 * if we need to trim 193 */ 194 if (VMALLOC_START > ent->addr) { 195 if (VMALLOC_START - ent->addr < ent->size) 196 ent->size = VMALLOC_START - ent->addr; 197 } 198 199 ent->type = KCORE_RAM; 200 list_add_tail(&ent->list, head); 201 202 if (!get_sparsemem_vmemmap_info(ent, head)) { 203 list_del(&ent->list); 204 goto free_out; 205 } 206 207 return 0; 208 free_out: 209 kfree(ent); 210 return 1; 211 } 212 213 static int kcore_ram_list(struct list_head *list) 214 { 215 int nid, ret; 216 unsigned long end_pfn; 217 218 /* Not inialized....update now */ 219 /* find out "max pfn" */ 220 end_pfn = 0; 221 for_each_node_state(nid, N_MEMORY) { 222 unsigned long node_end; 223 node_end = node_end_pfn(nid); 224 if (end_pfn < node_end) 225 end_pfn = node_end; 226 } 227 /* scan 0 to max_pfn */ 228 ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private); 229 if (ret) 230 return -ENOMEM; 231 return 0; 232 } 233 #endif /* CONFIG_HIGHMEM */ 234 235 static int kcore_update_ram(void) 236 { 237 LIST_HEAD(list); 238 LIST_HEAD(garbage); 239 int nphdr; 240 size_t phdrs_len, notes_len, data_offset; 241 struct kcore_list *tmp, *pos; 242 int ret = 0; 243 244 down_write(&kclist_lock); 245 if (!xchg(&kcore_need_update, 0)) 246 goto out; 247 248 ret = kcore_ram_list(&list); 249 if (ret) { 250 /* Couldn't get the RAM list, try again next time. */ 251 WRITE_ONCE(kcore_need_update, 1); 252 list_splice_tail(&list, &garbage); 253 goto out; 254 } 255 256 list_for_each_entry_safe(pos, tmp, &kclist_head, list) { 257 if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP) 258 list_move(&pos->list, &garbage); 259 } 260 list_splice_tail(&list, &kclist_head); 261 262 proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, ¬es_len, 263 &data_offset); 264 265 out: 266 up_write(&kclist_lock); 267 list_for_each_entry_safe(pos, tmp, &garbage, list) { 268 list_del(&pos->list); 269 kfree(pos); 270 } 271 return ret; 272 } 273 274 static void append_kcore_note(char *notes, size_t *i, const char *name, 275 unsigned int type, const void *desc, 276 size_t descsz) 277 { 278 struct elf_note *note = (struct elf_note *)¬es[*i]; 279 280 note->n_namesz = strlen(name) + 1; 281 note->n_descsz = descsz; 282 note->n_type = type; 283 *i += sizeof(*note); 284 memcpy(¬es[*i], name, note->n_namesz); 285 *i = ALIGN(*i + note->n_namesz, 4); 286 memcpy(¬es[*i], desc, descsz); 287 *i = ALIGN(*i + descsz, 4); 288 } 289 290 static ssize_t 291 read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) 292 { 293 char *buf = file->private_data; 294 size_t phdrs_offset, notes_offset, data_offset; 295 size_t phdrs_len, notes_len; 296 struct kcore_list *m; 297 size_t tsz; 298 int nphdr; 299 unsigned long start; 300 size_t orig_buflen = buflen; 301 int ret = 0; 302 303 down_read(&kclist_lock); 304 305 get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset); 306 phdrs_offset = sizeof(struct elfhdr); 307 notes_offset = phdrs_offset + phdrs_len; 308 309 /* ELF file header. */ 310 if (buflen && *fpos < sizeof(struct elfhdr)) { 311 struct elfhdr ehdr = { 312 .e_ident = { 313 [EI_MAG0] = ELFMAG0, 314 [EI_MAG1] = ELFMAG1, 315 [EI_MAG2] = ELFMAG2, 316 [EI_MAG3] = ELFMAG3, 317 [EI_CLASS] = ELF_CLASS, 318 [EI_DATA] = ELF_DATA, 319 [EI_VERSION] = EV_CURRENT, 320 [EI_OSABI] = ELF_OSABI, 321 }, 322 .e_type = ET_CORE, 323 .e_machine = ELF_ARCH, 324 .e_version = EV_CURRENT, 325 .e_phoff = sizeof(struct elfhdr), 326 .e_flags = ELF_CORE_EFLAGS, 327 .e_ehsize = sizeof(struct elfhdr), 328 .e_phentsize = sizeof(struct elf_phdr), 329 .e_phnum = nphdr, 330 }; 331 332 tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos); 333 if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) { 334 ret = -EFAULT; 335 goto out; 336 } 337 338 buffer += tsz; 339 buflen -= tsz; 340 *fpos += tsz; 341 } 342 343 /* ELF program headers. */ 344 if (buflen && *fpos < phdrs_offset + phdrs_len) { 345 struct elf_phdr *phdrs, *phdr; 346 347 phdrs = kzalloc(phdrs_len, GFP_KERNEL); 348 if (!phdrs) { 349 ret = -ENOMEM; 350 goto out; 351 } 352 353 phdrs[0].p_type = PT_NOTE; 354 phdrs[0].p_offset = notes_offset; 355 phdrs[0].p_filesz = notes_len; 356 357 phdr = &phdrs[1]; 358 list_for_each_entry(m, &kclist_head, list) { 359 phdr->p_type = PT_LOAD; 360 phdr->p_flags = PF_R | PF_W | PF_X; 361 phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset; 362 phdr->p_vaddr = (size_t)m->addr; 363 if (m->type == KCORE_RAM) 364 phdr->p_paddr = __pa(m->addr); 365 else if (m->type == KCORE_TEXT) 366 phdr->p_paddr = __pa_symbol(m->addr); 367 else 368 phdr->p_paddr = (elf_addr_t)-1; 369 phdr->p_filesz = phdr->p_memsz = m->size; 370 phdr->p_align = PAGE_SIZE; 371 phdr++; 372 } 373 374 tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos); 375 if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset, 376 tsz)) { 377 kfree(phdrs); 378 ret = -EFAULT; 379 goto out; 380 } 381 kfree(phdrs); 382 383 buffer += tsz; 384 buflen -= tsz; 385 *fpos += tsz; 386 } 387 388 /* ELF note segment. */ 389 if (buflen && *fpos < notes_offset + notes_len) { 390 struct elf_prstatus prstatus = {}; 391 struct elf_prpsinfo prpsinfo = { 392 .pr_sname = 'R', 393 .pr_fname = "vmlinux", 394 }; 395 char *notes; 396 size_t i = 0; 397 398 strlcpy(prpsinfo.pr_psargs, saved_command_line, 399 sizeof(prpsinfo.pr_psargs)); 400 401 notes = kzalloc(notes_len, GFP_KERNEL); 402 if (!notes) { 403 ret = -ENOMEM; 404 goto out; 405 } 406 407 append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus, 408 sizeof(prstatus)); 409 append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo, 410 sizeof(prpsinfo)); 411 append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current, 412 arch_task_struct_size); 413 /* 414 * vmcoreinfo_size is mostly constant after init time, but it 415 * can be changed by crash_save_vmcoreinfo(). Racing here with a 416 * panic on another CPU before the machine goes down is insanely 417 * unlikely, but it's better to not leave potential buffer 418 * overflows lying around, regardless. 419 */ 420 append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0, 421 vmcoreinfo_data, 422 min(vmcoreinfo_size, notes_len - i)); 423 424 tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos); 425 if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) { 426 kfree(notes); 427 ret = -EFAULT; 428 goto out; 429 } 430 kfree(notes); 431 432 buffer += tsz; 433 buflen -= tsz; 434 *fpos += tsz; 435 } 436 437 /* 438 * Check to see if our file offset matches with any of 439 * the addresses in the elf_phdr on our list. 440 */ 441 start = kc_offset_to_vaddr(*fpos - data_offset); 442 if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) 443 tsz = buflen; 444 445 m = NULL; 446 while (buflen) { 447 /* 448 * If this is the first iteration or the address is not within 449 * the previous entry, search for a matching entry. 450 */ 451 if (!m || start < m->addr || start >= m->addr + m->size) { 452 list_for_each_entry(m, &kclist_head, list) { 453 if (start >= m->addr && 454 start < m->addr + m->size) 455 break; 456 } 457 } 458 459 if (&m->list == &kclist_head) { 460 if (clear_user(buffer, tsz)) { 461 ret = -EFAULT; 462 goto out; 463 } 464 } else if (m->type == KCORE_VMALLOC) { 465 vread(buf, (char *)start, tsz); 466 /* we have to zero-fill user buffer even if no read */ 467 if (copy_to_user(buffer, buf, tsz)) { 468 ret = -EFAULT; 469 goto out; 470 } 471 } else if (m->type == KCORE_USER) { 472 /* User page is handled prior to normal kernel page: */ 473 if (copy_to_user(buffer, (char *)start, tsz)) { 474 ret = -EFAULT; 475 goto out; 476 } 477 } else { 478 if (kern_addr_valid(start)) { 479 /* 480 * Using bounce buffer to bypass the 481 * hardened user copy kernel text checks. 482 */ 483 if (probe_kernel_read(buf, (void *) start, tsz)) { 484 if (clear_user(buffer, tsz)) { 485 ret = -EFAULT; 486 goto out; 487 } 488 } else { 489 if (copy_to_user(buffer, buf, tsz)) { 490 ret = -EFAULT; 491 goto out; 492 } 493 } 494 } else { 495 if (clear_user(buffer, tsz)) { 496 ret = -EFAULT; 497 goto out; 498 } 499 } 500 } 501 buflen -= tsz; 502 *fpos += tsz; 503 buffer += tsz; 504 start += tsz; 505 tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen); 506 } 507 508 out: 509 up_read(&kclist_lock); 510 if (ret) 511 return ret; 512 return orig_buflen - buflen; 513 } 514 515 static int open_kcore(struct inode *inode, struct file *filp) 516 { 517 if (!capable(CAP_SYS_RAWIO)) 518 return -EPERM; 519 520 filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); 521 if (!filp->private_data) 522 return -ENOMEM; 523 524 if (kcore_need_update) 525 kcore_update_ram(); 526 if (i_size_read(inode) != proc_root_kcore->size) { 527 inode_lock(inode); 528 i_size_write(inode, proc_root_kcore->size); 529 inode_unlock(inode); 530 } 531 return 0; 532 } 533 534 static int release_kcore(struct inode *inode, struct file *file) 535 { 536 kfree(file->private_data); 537 return 0; 538 } 539 540 static const struct file_operations proc_kcore_operations = { 541 .read = read_kcore, 542 .open = open_kcore, 543 .release = release_kcore, 544 .llseek = default_llseek, 545 }; 546 547 /* just remember that we have to update kcore */ 548 static int __meminit kcore_callback(struct notifier_block *self, 549 unsigned long action, void *arg) 550 { 551 switch (action) { 552 case MEM_ONLINE: 553 case MEM_OFFLINE: 554 kcore_need_update = 1; 555 break; 556 } 557 return NOTIFY_OK; 558 } 559 560 static struct notifier_block kcore_callback_nb __meminitdata = { 561 .notifier_call = kcore_callback, 562 .priority = 0, 563 }; 564 565 static struct kcore_list kcore_vmalloc; 566 567 #ifdef CONFIG_ARCH_PROC_KCORE_TEXT 568 static struct kcore_list kcore_text; 569 /* 570 * If defined, special segment is used for mapping kernel text instead of 571 * direct-map area. We need to create special TEXT section. 572 */ 573 static void __init proc_kcore_text_init(void) 574 { 575 kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT); 576 } 577 #else 578 static void __init proc_kcore_text_init(void) 579 { 580 } 581 #endif 582 583 #if defined(CONFIG_MODULES) && defined(MODULES_VADDR) 584 /* 585 * MODULES_VADDR has no intersection with VMALLOC_ADDR. 586 */ 587 struct kcore_list kcore_modules; 588 static void __init add_modules_range(void) 589 { 590 if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { 591 kclist_add(&kcore_modules, (void *)MODULES_VADDR, 592 MODULES_END - MODULES_VADDR, KCORE_VMALLOC); 593 } 594 } 595 #else 596 static void __init add_modules_range(void) 597 { 598 } 599 #endif 600 601 static int __init proc_kcore_init(void) 602 { 603 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, 604 &proc_kcore_operations); 605 if (!proc_root_kcore) { 606 pr_err("couldn't create /proc/kcore\n"); 607 return 0; /* Always returns 0. */ 608 } 609 /* Store text area if it's special */ 610 proc_kcore_text_init(); 611 /* Store vmalloc area */ 612 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 613 VMALLOC_END - VMALLOC_START, KCORE_VMALLOC); 614 add_modules_range(); 615 /* Store direct-map area from physical memory map */ 616 kcore_update_ram(); 617 register_hotmemory_notifier(&kcore_callback_nb); 618 619 return 0; 620 } 621 fs_initcall(proc_kcore_init); 622