1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/slab.h> 3 #include <linux/file.h> 4 #include <linux/fdtable.h> 5 #include <linux/freezer.h> 6 #include <linux/mm.h> 7 #include <linux/stat.h> 8 #include <linux/fcntl.h> 9 #include <linux/swap.h> 10 #include <linux/ctype.h> 11 #include <linux/string.h> 12 #include <linux/init.h> 13 #include <linux/pagemap.h> 14 #include <linux/perf_event.h> 15 #include <linux/highmem.h> 16 #include <linux/spinlock.h> 17 #include <linux/key.h> 18 #include <linux/personality.h> 19 #include <linux/binfmts.h> 20 #include <linux/coredump.h> 21 #include <linux/sched/coredump.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/task_stack.h> 24 #include <linux/utsname.h> 25 #include <linux/pid_namespace.h> 26 #include <linux/module.h> 27 #include <linux/namei.h> 28 #include <linux/mount.h> 29 #include <linux/security.h> 30 #include <linux/syscalls.h> 31 #include <linux/tsacct_kern.h> 32 #include <linux/cn_proc.h> 33 #include <linux/audit.h> 34 #include <linux/tracehook.h> 35 #include <linux/kmod.h> 36 #include <linux/fsnotify.h> 37 #include <linux/fs_struct.h> 38 #include <linux/pipe_fs_i.h> 39 #include <linux/oom.h> 40 #include <linux/compat.h> 41 #include <linux/fs.h> 42 #include <linux/path.h> 43 #include <linux/timekeeping.h> 44 #include <linux/elf.h> 45 46 #include <linux/uaccess.h> 47 #include <asm/mmu_context.h> 48 #include <asm/tlb.h> 49 #include <asm/exec.h> 50 51 #include <trace/events/task.h> 52 #include "internal.h" 53 54 #include <trace/events/sched.h> 55 56 static bool dump_vma_snapshot(struct coredump_params *cprm); 57 static void free_vma_snapshot(struct coredump_params *cprm); 58 59 int core_uses_pid; 60 unsigned int core_pipe_limit; 61 char core_pattern[CORENAME_MAX_SIZE] = "core"; 62 static int core_name_size = CORENAME_MAX_SIZE; 63 64 struct core_name { 65 char *corename; 66 int used, size; 67 }; 68 69 /* The maximal length of core_pattern is also specified in sysctl.c */ 70 71 static int expand_corename(struct core_name *cn, int size) 72 { 73 char *corename = krealloc(cn->corename, size, GFP_KERNEL); 74 75 if (!corename) 76 return -ENOMEM; 77 78 if (size > core_name_size) /* racy but harmless */ 79 core_name_size = size; 80 81 cn->size = ksize(corename); 82 cn->corename = corename; 83 return 0; 84 } 85 86 static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt, 87 va_list arg) 88 { 89 int free, need; 90 va_list arg_copy; 91 92 again: 93 free = cn->size - cn->used; 94 95 va_copy(arg_copy, arg); 96 need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy); 97 va_end(arg_copy); 98 99 if (need < free) { 100 cn->used += need; 101 return 0; 102 } 103 104 if (!expand_corename(cn, cn->size + need - free + 1)) 105 goto again; 106 107 return -ENOMEM; 108 } 109 110 static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...) 111 { 112 va_list arg; 113 int ret; 114 115 va_start(arg, fmt); 116 ret = cn_vprintf(cn, fmt, arg); 117 va_end(arg); 118 119 return ret; 120 } 121 122 static __printf(2, 3) 123 int cn_esc_printf(struct core_name *cn, const char *fmt, ...) 124 { 125 int cur = cn->used; 126 va_list arg; 127 int ret; 128 129 va_start(arg, fmt); 130 ret = cn_vprintf(cn, fmt, arg); 131 va_end(arg); 132 133 if (ret == 0) { 134 /* 135 * Ensure that this coredump name component can't cause the 136 * resulting corefile path to consist of a ".." or ".". 137 */ 138 if ((cn->used - cur == 1 && cn->corename[cur] == '.') || 139 (cn->used - cur == 2 && cn->corename[cur] == '.' 140 && cn->corename[cur+1] == '.')) 141 cn->corename[cur] = '!'; 142 143 /* 144 * Empty names are fishy and could be used to create a "//" in a 145 * corefile name, causing the coredump to happen one directory 146 * level too high. Enforce that all components of the core 147 * pattern are at least one character long. 148 */ 149 if (cn->used == cur) 150 ret = cn_printf(cn, "!"); 151 } 152 153 for (; cur < cn->used; ++cur) { 154 if (cn->corename[cur] == '/') 155 cn->corename[cur] = '!'; 156 } 157 return ret; 158 } 159 160 static int cn_print_exe_file(struct core_name *cn, bool name_only) 161 { 162 struct file *exe_file; 163 char *pathbuf, *path, *ptr; 164 int ret; 165 166 exe_file = get_mm_exe_file(current->mm); 167 if (!exe_file) 168 return cn_esc_printf(cn, "%s (path unknown)", current->comm); 169 170 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); 171 if (!pathbuf) { 172 ret = -ENOMEM; 173 goto put_exe_file; 174 } 175 176 path = file_path(exe_file, pathbuf, PATH_MAX); 177 if (IS_ERR(path)) { 178 ret = PTR_ERR(path); 179 goto free_buf; 180 } 181 182 if (name_only) { 183 ptr = strrchr(path, '/'); 184 if (ptr) 185 path = ptr + 1; 186 } 187 ret = cn_esc_printf(cn, "%s", path); 188 189 free_buf: 190 kfree(pathbuf); 191 put_exe_file: 192 fput(exe_file); 193 return ret; 194 } 195 196 /* format_corename will inspect the pattern parameter, and output a 197 * name into corename, which must have space for at least 198 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 199 */ 200 static int format_corename(struct core_name *cn, struct coredump_params *cprm, 201 size_t **argv, int *argc) 202 { 203 const struct cred *cred = current_cred(); 204 const char *pat_ptr = core_pattern; 205 int ispipe = (*pat_ptr == '|'); 206 bool was_space = false; 207 int pid_in_pattern = 0; 208 int err = 0; 209 210 cn->used = 0; 211 cn->corename = NULL; 212 if (expand_corename(cn, core_name_size)) 213 return -ENOMEM; 214 cn->corename[0] = '\0'; 215 216 if (ispipe) { 217 int argvs = sizeof(core_pattern) / 2; 218 (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL); 219 if (!(*argv)) 220 return -ENOMEM; 221 (*argv)[(*argc)++] = 0; 222 ++pat_ptr; 223 if (!(*pat_ptr)) 224 return -ENOMEM; 225 } 226 227 /* Repeat as long as we have more pattern to process and more output 228 space */ 229 while (*pat_ptr) { 230 /* 231 * Split on spaces before doing template expansion so that 232 * %e and %E don't get split if they have spaces in them 233 */ 234 if (ispipe) { 235 if (isspace(*pat_ptr)) { 236 if (cn->used != 0) 237 was_space = true; 238 pat_ptr++; 239 continue; 240 } else if (was_space) { 241 was_space = false; 242 err = cn_printf(cn, "%c", '\0'); 243 if (err) 244 return err; 245 (*argv)[(*argc)++] = cn->used; 246 } 247 } 248 if (*pat_ptr != '%') { 249 err = cn_printf(cn, "%c", *pat_ptr++); 250 } else { 251 switch (*++pat_ptr) { 252 /* single % at the end, drop that */ 253 case 0: 254 goto out; 255 /* Double percent, output one percent */ 256 case '%': 257 err = cn_printf(cn, "%c", '%'); 258 break; 259 /* pid */ 260 case 'p': 261 pid_in_pattern = 1; 262 err = cn_printf(cn, "%d", 263 task_tgid_vnr(current)); 264 break; 265 /* global pid */ 266 case 'P': 267 err = cn_printf(cn, "%d", 268 task_tgid_nr(current)); 269 break; 270 case 'i': 271 err = cn_printf(cn, "%d", 272 task_pid_vnr(current)); 273 break; 274 case 'I': 275 err = cn_printf(cn, "%d", 276 task_pid_nr(current)); 277 break; 278 /* uid */ 279 case 'u': 280 err = cn_printf(cn, "%u", 281 from_kuid(&init_user_ns, 282 cred->uid)); 283 break; 284 /* gid */ 285 case 'g': 286 err = cn_printf(cn, "%u", 287 from_kgid(&init_user_ns, 288 cred->gid)); 289 break; 290 case 'd': 291 err = cn_printf(cn, "%d", 292 __get_dumpable(cprm->mm_flags)); 293 break; 294 /* signal that caused the coredump */ 295 case 's': 296 err = cn_printf(cn, "%d", 297 cprm->siginfo->si_signo); 298 break; 299 /* UNIX time of coredump */ 300 case 't': { 301 time64_t time; 302 303 time = ktime_get_real_seconds(); 304 err = cn_printf(cn, "%lld", time); 305 break; 306 } 307 /* hostname */ 308 case 'h': 309 down_read(&uts_sem); 310 err = cn_esc_printf(cn, "%s", 311 utsname()->nodename); 312 up_read(&uts_sem); 313 break; 314 /* executable, could be changed by prctl PR_SET_NAME etc */ 315 case 'e': 316 err = cn_esc_printf(cn, "%s", current->comm); 317 break; 318 /* file name of executable */ 319 case 'f': 320 err = cn_print_exe_file(cn, true); 321 break; 322 case 'E': 323 err = cn_print_exe_file(cn, false); 324 break; 325 /* core limit size */ 326 case 'c': 327 err = cn_printf(cn, "%lu", 328 rlimit(RLIMIT_CORE)); 329 break; 330 default: 331 break; 332 } 333 ++pat_ptr; 334 } 335 336 if (err) 337 return err; 338 } 339 340 out: 341 /* Backward compatibility with core_uses_pid: 342 * 343 * If core_pattern does not include a %p (as is the default) 344 * and core_uses_pid is set, then .%pid will be appended to 345 * the filename. Do not do this for piped commands. */ 346 if (!ispipe && !pid_in_pattern && core_uses_pid) { 347 err = cn_printf(cn, ".%d", task_tgid_vnr(current)); 348 if (err) 349 return err; 350 } 351 return ispipe; 352 } 353 354 static int zap_process(struct task_struct *start, int exit_code, int flags) 355 { 356 struct task_struct *t; 357 int nr = 0; 358 359 /* ignore all signals except SIGKILL, see prepare_signal() */ 360 start->signal->flags = SIGNAL_GROUP_COREDUMP | flags; 361 start->signal->group_exit_code = exit_code; 362 start->signal->group_stop_count = 0; 363 364 for_each_thread(start, t) { 365 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 366 if (t != current && t->mm) { 367 sigaddset(&t->pending.signal, SIGKILL); 368 signal_wake_up(t, 1); 369 nr++; 370 } 371 } 372 373 return nr; 374 } 375 376 static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 377 struct core_state *core_state, int exit_code) 378 { 379 struct task_struct *g, *p; 380 unsigned long flags; 381 int nr = -EAGAIN; 382 383 spin_lock_irq(&tsk->sighand->siglock); 384 if (!signal_group_exit(tsk->signal)) { 385 mm->core_state = core_state; 386 tsk->signal->group_exit_task = tsk; 387 nr = zap_process(tsk, exit_code, 0); 388 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 389 } 390 spin_unlock_irq(&tsk->sighand->siglock); 391 if (unlikely(nr < 0)) 392 return nr; 393 394 tsk->flags |= PF_DUMPCORE; 395 if (atomic_read(&mm->mm_users) == nr + 1) 396 goto done; 397 /* 398 * We should find and kill all tasks which use this mm, and we should 399 * count them correctly into ->nr_threads. We don't take tasklist 400 * lock, but this is safe wrt: 401 * 402 * fork: 403 * None of sub-threads can fork after zap_process(leader). All 404 * processes which were created before this point should be 405 * visible to zap_threads() because copy_process() adds the new 406 * process to the tail of init_task.tasks list, and lock/unlock 407 * of ->siglock provides a memory barrier. 408 * 409 * do_exit: 410 * The caller holds mm->mmap_lock. This means that the task which 411 * uses this mm can't pass exit_mm(), so it can't exit or clear 412 * its ->mm. 413 * 414 * de_thread: 415 * It does list_replace_rcu(&leader->tasks, ¤t->tasks), 416 * we must see either old or new leader, this does not matter. 417 * However, it can change p->sighand, so lock_task_sighand(p) 418 * must be used. Since p->mm != NULL and we hold ->mmap_lock 419 * it can't fail. 420 * 421 * Note also that "g" can be the old leader with ->mm == NULL 422 * and already unhashed and thus removed from ->thread_group. 423 * This is OK, __unhash_process()->list_del_rcu() does not 424 * clear the ->next pointer, we will find the new leader via 425 * next_thread(). 426 */ 427 rcu_read_lock(); 428 for_each_process(g) { 429 if (g == tsk->group_leader) 430 continue; 431 if (g->flags & PF_KTHREAD) 432 continue; 433 434 for_each_thread(g, p) { 435 if (unlikely(!p->mm)) 436 continue; 437 if (unlikely(p->mm == mm)) { 438 lock_task_sighand(p, &flags); 439 nr += zap_process(p, exit_code, 440 SIGNAL_GROUP_EXIT); 441 unlock_task_sighand(p, &flags); 442 } 443 break; 444 } 445 } 446 rcu_read_unlock(); 447 done: 448 atomic_set(&core_state->nr_threads, nr); 449 return nr; 450 } 451 452 static int coredump_wait(int exit_code, struct core_state *core_state) 453 { 454 struct task_struct *tsk = current; 455 struct mm_struct *mm = tsk->mm; 456 int core_waiters = -EBUSY; 457 458 init_completion(&core_state->startup); 459 core_state->dumper.task = tsk; 460 core_state->dumper.next = NULL; 461 462 if (mmap_write_lock_killable(mm)) 463 return -EINTR; 464 465 if (!mm->core_state) 466 core_waiters = zap_threads(tsk, mm, core_state, exit_code); 467 mmap_write_unlock(mm); 468 469 if (core_waiters > 0) { 470 struct core_thread *ptr; 471 472 freezer_do_not_count(); 473 wait_for_completion(&core_state->startup); 474 freezer_count(); 475 /* 476 * Wait for all the threads to become inactive, so that 477 * all the thread context (extended register state, like 478 * fpu etc) gets copied to the memory. 479 */ 480 ptr = core_state->dumper.next; 481 while (ptr != NULL) { 482 wait_task_inactive(ptr->task, 0); 483 ptr = ptr->next; 484 } 485 } 486 487 return core_waiters; 488 } 489 490 static void coredump_finish(struct mm_struct *mm, bool core_dumped) 491 { 492 struct core_thread *curr, *next; 493 struct task_struct *task; 494 495 spin_lock_irq(¤t->sighand->siglock); 496 if (core_dumped && !__fatal_signal_pending(current)) 497 current->signal->group_exit_code |= 0x80; 498 current->signal->group_exit_task = NULL; 499 current->signal->flags = SIGNAL_GROUP_EXIT; 500 spin_unlock_irq(¤t->sighand->siglock); 501 502 next = mm->core_state->dumper.next; 503 while ((curr = next) != NULL) { 504 next = curr->next; 505 task = curr->task; 506 /* 507 * see exit_mm(), curr->task must not see 508 * ->task == NULL before we read ->next. 509 */ 510 smp_mb(); 511 curr->task = NULL; 512 wake_up_process(task); 513 } 514 515 mm->core_state = NULL; 516 } 517 518 static bool dump_interrupted(void) 519 { 520 /* 521 * SIGKILL or freezing() interrupt the coredumping. Perhaps we 522 * can do try_to_freeze() and check __fatal_signal_pending(), 523 * but then we need to teach dump_write() to restart and clear 524 * TIF_SIGPENDING. 525 */ 526 return fatal_signal_pending(current) || freezing(current); 527 } 528 529 static void wait_for_dump_helpers(struct file *file) 530 { 531 struct pipe_inode_info *pipe = file->private_data; 532 533 pipe_lock(pipe); 534 pipe->readers++; 535 pipe->writers--; 536 wake_up_interruptible_sync(&pipe->rd_wait); 537 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 538 pipe_unlock(pipe); 539 540 /* 541 * We actually want wait_event_freezable() but then we need 542 * to clear TIF_SIGPENDING and improve dump_interrupted(). 543 */ 544 wait_event_interruptible(pipe->rd_wait, pipe->readers == 1); 545 546 pipe_lock(pipe); 547 pipe->readers--; 548 pipe->writers++; 549 pipe_unlock(pipe); 550 } 551 552 /* 553 * umh_pipe_setup 554 * helper function to customize the process used 555 * to collect the core in userspace. Specifically 556 * it sets up a pipe and installs it as fd 0 (stdin) 557 * for the process. Returns 0 on success, or 558 * PTR_ERR on failure. 559 * Note that it also sets the core limit to 1. This 560 * is a special value that we use to trap recursive 561 * core dumps 562 */ 563 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 564 { 565 struct file *files[2]; 566 struct coredump_params *cp = (struct coredump_params *)info->data; 567 int err = create_pipe_files(files, 0); 568 if (err) 569 return err; 570 571 cp->file = files[1]; 572 573 err = replace_fd(0, files[0], 0); 574 fput(files[0]); 575 /* and disallow core files too */ 576 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 577 578 return err; 579 } 580 581 void do_coredump(const kernel_siginfo_t *siginfo) 582 { 583 struct core_state core_state; 584 struct core_name cn; 585 struct mm_struct *mm = current->mm; 586 struct linux_binfmt * binfmt; 587 const struct cred *old_cred; 588 struct cred *cred; 589 int retval = 0; 590 int ispipe; 591 size_t *argv = NULL; 592 int argc = 0; 593 /* require nonrelative corefile path and be extra careful */ 594 bool need_suid_safe = false; 595 bool core_dumped = false; 596 static atomic_t core_dump_count = ATOMIC_INIT(0); 597 struct coredump_params cprm = { 598 .siginfo = siginfo, 599 .regs = signal_pt_regs(), 600 .limit = rlimit(RLIMIT_CORE), 601 /* 602 * We must use the same mm->flags while dumping core to avoid 603 * inconsistency of bit flags, since this flag is not protected 604 * by any locks. 605 */ 606 .mm_flags = mm->flags, 607 .vma_meta = NULL, 608 }; 609 610 audit_core_dumps(siginfo->si_signo); 611 612 binfmt = mm->binfmt; 613 if (!binfmt || !binfmt->core_dump) 614 goto fail; 615 if (!__get_dumpable(cprm.mm_flags)) 616 goto fail; 617 618 cred = prepare_creds(); 619 if (!cred) 620 goto fail; 621 /* 622 * We cannot trust fsuid as being the "true" uid of the process 623 * nor do we know its entire history. We only know it was tainted 624 * so we dump it as root in mode 2, and only into a controlled 625 * environment (pipe handler or fully qualified path). 626 */ 627 if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { 628 /* Setuid core dump mode */ 629 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 630 need_suid_safe = true; 631 } 632 633 retval = coredump_wait(siginfo->si_signo, &core_state); 634 if (retval < 0) 635 goto fail_creds; 636 637 old_cred = override_creds(cred); 638 639 ispipe = format_corename(&cn, &cprm, &argv, &argc); 640 641 if (ispipe) { 642 int argi; 643 int dump_count; 644 char **helper_argv; 645 struct subprocess_info *sub_info; 646 647 if (ispipe < 0) { 648 printk(KERN_WARNING "format_corename failed\n"); 649 printk(KERN_WARNING "Aborting core\n"); 650 goto fail_unlock; 651 } 652 653 if (cprm.limit == 1) { 654 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. 655 * 656 * Normally core limits are irrelevant to pipes, since 657 * we're not writing to the file system, but we use 658 * cprm.limit of 1 here as a special value, this is a 659 * consistent way to catch recursive crashes. 660 * We can still crash if the core_pattern binary sets 661 * RLIM_CORE = !1, but it runs as root, and can do 662 * lots of stupid things. 663 * 664 * Note that we use task_tgid_vnr here to grab the pid 665 * of the process group leader. That way we get the 666 * right pid if a thread in a multi-threaded 667 * core_pattern process dies. 668 */ 669 printk(KERN_WARNING 670 "Process %d(%s) has RLIMIT_CORE set to 1\n", 671 task_tgid_vnr(current), current->comm); 672 printk(KERN_WARNING "Aborting core\n"); 673 goto fail_unlock; 674 } 675 cprm.limit = RLIM_INFINITY; 676 677 dump_count = atomic_inc_return(&core_dump_count); 678 if (core_pipe_limit && (core_pipe_limit < dump_count)) { 679 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", 680 task_tgid_vnr(current), current->comm); 681 printk(KERN_WARNING "Skipping core dump\n"); 682 goto fail_dropcount; 683 } 684 685 helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), 686 GFP_KERNEL); 687 if (!helper_argv) { 688 printk(KERN_WARNING "%s failed to allocate memory\n", 689 __func__); 690 goto fail_dropcount; 691 } 692 for (argi = 0; argi < argc; argi++) 693 helper_argv[argi] = cn.corename + argv[argi]; 694 helper_argv[argi] = NULL; 695 696 retval = -ENOMEM; 697 sub_info = call_usermodehelper_setup(helper_argv[0], 698 helper_argv, NULL, GFP_KERNEL, 699 umh_pipe_setup, NULL, &cprm); 700 if (sub_info) 701 retval = call_usermodehelper_exec(sub_info, 702 UMH_WAIT_EXEC); 703 704 kfree(helper_argv); 705 if (retval) { 706 printk(KERN_INFO "Core dump to |%s pipe failed\n", 707 cn.corename); 708 goto close_fail; 709 } 710 } else { 711 struct user_namespace *mnt_userns; 712 struct inode *inode; 713 int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW | 714 O_LARGEFILE | O_EXCL; 715 716 if (cprm.limit < binfmt->min_coredump) 717 goto fail_unlock; 718 719 if (need_suid_safe && cn.corename[0] != '/') { 720 printk(KERN_WARNING "Pid %d(%s) can only dump core "\ 721 "to fully qualified path!\n", 722 task_tgid_vnr(current), current->comm); 723 printk(KERN_WARNING "Skipping core dump\n"); 724 goto fail_unlock; 725 } 726 727 /* 728 * Unlink the file if it exists unless this is a SUID 729 * binary - in that case, we're running around with root 730 * privs and don't want to unlink another user's coredump. 731 */ 732 if (!need_suid_safe) { 733 /* 734 * If it doesn't exist, that's fine. If there's some 735 * other problem, we'll catch it at the filp_open(). 736 */ 737 do_unlinkat(AT_FDCWD, getname_kernel(cn.corename)); 738 } 739 740 /* 741 * There is a race between unlinking and creating the 742 * file, but if that causes an EEXIST here, that's 743 * fine - another process raced with us while creating 744 * the corefile, and the other process won. To userspace, 745 * what matters is that at least one of the two processes 746 * writes its coredump successfully, not which one. 747 */ 748 if (need_suid_safe) { 749 /* 750 * Using user namespaces, normal user tasks can change 751 * their current->fs->root to point to arbitrary 752 * directories. Since the intention of the "only dump 753 * with a fully qualified path" rule is to control where 754 * coredumps may be placed using root privileges, 755 * current->fs->root must not be used. Instead, use the 756 * root directory of init_task. 757 */ 758 struct path root; 759 760 task_lock(&init_task); 761 get_fs_root(init_task.fs, &root); 762 task_unlock(&init_task); 763 cprm.file = file_open_root(&root, cn.corename, 764 open_flags, 0600); 765 path_put(&root); 766 } else { 767 cprm.file = filp_open(cn.corename, open_flags, 0600); 768 } 769 if (IS_ERR(cprm.file)) 770 goto fail_unlock; 771 772 inode = file_inode(cprm.file); 773 if (inode->i_nlink > 1) 774 goto close_fail; 775 if (d_unhashed(cprm.file->f_path.dentry)) 776 goto close_fail; 777 /* 778 * AK: actually i see no reason to not allow this for named 779 * pipes etc, but keep the previous behaviour for now. 780 */ 781 if (!S_ISREG(inode->i_mode)) 782 goto close_fail; 783 /* 784 * Don't dump core if the filesystem changed owner or mode 785 * of the file during file creation. This is an issue when 786 * a process dumps core while its cwd is e.g. on a vfat 787 * filesystem. 788 */ 789 mnt_userns = file_mnt_user_ns(cprm.file); 790 if (!uid_eq(i_uid_into_mnt(mnt_userns, inode), 791 current_fsuid())) { 792 pr_info_ratelimited("Core dump to %s aborted: cannot preserve file owner\n", 793 cn.corename); 794 goto close_fail; 795 } 796 if ((inode->i_mode & 0677) != 0600) { 797 pr_info_ratelimited("Core dump to %s aborted: cannot preserve file permissions\n", 798 cn.corename); 799 goto close_fail; 800 } 801 if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) 802 goto close_fail; 803 if (do_truncate(mnt_userns, cprm.file->f_path.dentry, 804 0, 0, cprm.file)) 805 goto close_fail; 806 } 807 808 /* get us an unshared descriptor table; almost always a no-op */ 809 /* The cell spufs coredump code reads the file descriptor tables */ 810 retval = unshare_files(); 811 if (retval) 812 goto close_fail; 813 if (!dump_interrupted()) { 814 /* 815 * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would 816 * have this set to NULL. 817 */ 818 if (!cprm.file) { 819 pr_info("Core dump to |%s disabled\n", cn.corename); 820 goto close_fail; 821 } 822 if (!dump_vma_snapshot(&cprm)) 823 goto close_fail; 824 825 file_start_write(cprm.file); 826 core_dumped = binfmt->core_dump(&cprm); 827 /* 828 * Ensures that file size is big enough to contain the current 829 * file postion. This prevents gdb from complaining about 830 * a truncated file if the last "write" to the file was 831 * dump_skip. 832 */ 833 if (cprm.to_skip) { 834 cprm.to_skip--; 835 dump_emit(&cprm, "", 1); 836 } 837 file_end_write(cprm.file); 838 free_vma_snapshot(&cprm); 839 } 840 if (ispipe && core_pipe_limit) 841 wait_for_dump_helpers(cprm.file); 842 close_fail: 843 if (cprm.file) 844 filp_close(cprm.file, NULL); 845 fail_dropcount: 846 if (ispipe) 847 atomic_dec(&core_dump_count); 848 fail_unlock: 849 kfree(argv); 850 kfree(cn.corename); 851 coredump_finish(mm, core_dumped); 852 revert_creds(old_cred); 853 fail_creds: 854 put_cred(cred); 855 fail: 856 return; 857 } 858 859 /* 860 * Core dumping helper functions. These are the only things you should 861 * do on a core-file: use only these functions to write out all the 862 * necessary info. 863 */ 864 static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr) 865 { 866 struct file *file = cprm->file; 867 loff_t pos = file->f_pos; 868 ssize_t n; 869 if (cprm->written + nr > cprm->limit) 870 return 0; 871 872 873 if (dump_interrupted()) 874 return 0; 875 n = __kernel_write(file, addr, nr, &pos); 876 if (n != nr) 877 return 0; 878 file->f_pos = pos; 879 cprm->written += n; 880 cprm->pos += n; 881 882 return 1; 883 } 884 885 static int __dump_skip(struct coredump_params *cprm, size_t nr) 886 { 887 static char zeroes[PAGE_SIZE]; 888 struct file *file = cprm->file; 889 if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 890 if (dump_interrupted() || 891 file->f_op->llseek(file, nr, SEEK_CUR) < 0) 892 return 0; 893 cprm->pos += nr; 894 return 1; 895 } else { 896 while (nr > PAGE_SIZE) { 897 if (!__dump_emit(cprm, zeroes, PAGE_SIZE)) 898 return 0; 899 nr -= PAGE_SIZE; 900 } 901 return __dump_emit(cprm, zeroes, nr); 902 } 903 } 904 905 int dump_emit(struct coredump_params *cprm, const void *addr, int nr) 906 { 907 if (cprm->to_skip) { 908 if (!__dump_skip(cprm, cprm->to_skip)) 909 return 0; 910 cprm->to_skip = 0; 911 } 912 return __dump_emit(cprm, addr, nr); 913 } 914 EXPORT_SYMBOL(dump_emit); 915 916 void dump_skip_to(struct coredump_params *cprm, unsigned long pos) 917 { 918 cprm->to_skip = pos - cprm->pos; 919 } 920 EXPORT_SYMBOL(dump_skip_to); 921 922 void dump_skip(struct coredump_params *cprm, size_t nr) 923 { 924 cprm->to_skip += nr; 925 } 926 EXPORT_SYMBOL(dump_skip); 927 928 #ifdef CONFIG_ELF_CORE 929 int dump_user_range(struct coredump_params *cprm, unsigned long start, 930 unsigned long len) 931 { 932 unsigned long addr; 933 934 for (addr = start; addr < start + len; addr += PAGE_SIZE) { 935 struct page *page; 936 int stop; 937 938 /* 939 * To avoid having to allocate page tables for virtual address 940 * ranges that have never been used yet, and also to make it 941 * easy to generate sparse core files, use a helper that returns 942 * NULL when encountering an empty page table entry that would 943 * otherwise have been filled with the zero page. 944 */ 945 page = get_dump_page(addr); 946 if (page) { 947 void *kaddr = kmap_local_page(page); 948 949 stop = !dump_emit(cprm, kaddr, PAGE_SIZE); 950 kunmap_local(kaddr); 951 put_page(page); 952 if (stop) 953 return 0; 954 } else { 955 dump_skip(cprm, PAGE_SIZE); 956 } 957 } 958 return 1; 959 } 960 #endif 961 962 int dump_align(struct coredump_params *cprm, int align) 963 { 964 unsigned mod = (cprm->pos + cprm->to_skip) & (align - 1); 965 if (align & (align - 1)) 966 return 0; 967 if (mod) 968 cprm->to_skip += align - mod; 969 return 1; 970 } 971 EXPORT_SYMBOL(dump_align); 972 973 /* 974 * The purpose of always_dump_vma() is to make sure that special kernel mappings 975 * that are useful for post-mortem analysis are included in every core dump. 976 * In that way we ensure that the core dump is fully interpretable later 977 * without matching up the same kernel and hardware config to see what PC values 978 * meant. These special mappings include - vDSO, vsyscall, and other 979 * architecture specific mappings 980 */ 981 static bool always_dump_vma(struct vm_area_struct *vma) 982 { 983 /* Any vsyscall mappings? */ 984 if (vma == get_gate_vma(vma->vm_mm)) 985 return true; 986 987 /* 988 * Assume that all vmas with a .name op should always be dumped. 989 * If this changes, a new vm_ops field can easily be added. 990 */ 991 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)) 992 return true; 993 994 /* 995 * arch_vma_name() returns non-NULL for special architecture mappings, 996 * such as vDSO sections. 997 */ 998 if (arch_vma_name(vma)) 999 return true; 1000 1001 return false; 1002 } 1003 1004 #define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1 1005 1006 /* 1007 * Decide how much of @vma's contents should be included in a core dump. 1008 */ 1009 static unsigned long vma_dump_size(struct vm_area_struct *vma, 1010 unsigned long mm_flags) 1011 { 1012 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) 1013 1014 /* always dump the vdso and vsyscall sections */ 1015 if (always_dump_vma(vma)) 1016 goto whole; 1017 1018 if (vma->vm_flags & VM_DONTDUMP) 1019 return 0; 1020 1021 /* support for DAX */ 1022 if (vma_is_dax(vma)) { 1023 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) 1024 goto whole; 1025 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) 1026 goto whole; 1027 return 0; 1028 } 1029 1030 /* Hugetlb memory check */ 1031 if (is_vm_hugetlb_page(vma)) { 1032 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) 1033 goto whole; 1034 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) 1035 goto whole; 1036 return 0; 1037 } 1038 1039 /* Do not dump I/O mapped devices or special mappings */ 1040 if (vma->vm_flags & VM_IO) 1041 return 0; 1042 1043 /* By default, dump shared memory if mapped from an anonymous file. */ 1044 if (vma->vm_flags & VM_SHARED) { 1045 if (file_inode(vma->vm_file)->i_nlink == 0 ? 1046 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) 1047 goto whole; 1048 return 0; 1049 } 1050 1051 /* Dump segments that have been written to. */ 1052 if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE)) 1053 goto whole; 1054 if (vma->vm_file == NULL) 1055 return 0; 1056 1057 if (FILTER(MAPPED_PRIVATE)) 1058 goto whole; 1059 1060 /* 1061 * If this is the beginning of an executable file mapping, 1062 * dump the first page to aid in determining what was mapped here. 1063 */ 1064 if (FILTER(ELF_HEADERS) && 1065 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { 1066 if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0) 1067 return PAGE_SIZE; 1068 1069 /* 1070 * ELF libraries aren't always executable. 1071 * We'll want to check whether the mapping starts with the ELF 1072 * magic, but not now - we're holding the mmap lock, 1073 * so copy_from_user() doesn't work here. 1074 * Use a placeholder instead, and fix it up later in 1075 * dump_vma_snapshot(). 1076 */ 1077 return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER; 1078 } 1079 1080 #undef FILTER 1081 1082 return 0; 1083 1084 whole: 1085 return vma->vm_end - vma->vm_start; 1086 } 1087 1088 static struct vm_area_struct *first_vma(struct task_struct *tsk, 1089 struct vm_area_struct *gate_vma) 1090 { 1091 struct vm_area_struct *ret = tsk->mm->mmap; 1092 1093 if (ret) 1094 return ret; 1095 return gate_vma; 1096 } 1097 1098 /* 1099 * Helper function for iterating across a vma list. It ensures that the caller 1100 * will visit `gate_vma' prior to terminating the search. 1101 */ 1102 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, 1103 struct vm_area_struct *gate_vma) 1104 { 1105 struct vm_area_struct *ret; 1106 1107 ret = this_vma->vm_next; 1108 if (ret) 1109 return ret; 1110 if (this_vma == gate_vma) 1111 return NULL; 1112 return gate_vma; 1113 } 1114 1115 static void free_vma_snapshot(struct coredump_params *cprm) 1116 { 1117 if (cprm->vma_meta) { 1118 int i; 1119 for (i = 0; i < cprm->vma_count; i++) { 1120 struct file *file = cprm->vma_meta[i].file; 1121 if (file) 1122 fput(file); 1123 } 1124 kvfree(cprm->vma_meta); 1125 cprm->vma_meta = NULL; 1126 } 1127 } 1128 1129 /* 1130 * Under the mmap_lock, take a snapshot of relevant information about the task's 1131 * VMAs. 1132 */ 1133 static bool dump_vma_snapshot(struct coredump_params *cprm) 1134 { 1135 struct vm_area_struct *vma, *gate_vma; 1136 struct mm_struct *mm = current->mm; 1137 int i; 1138 1139 /* 1140 * Once the stack expansion code is fixed to not change VMA bounds 1141 * under mmap_lock in read mode, this can be changed to take the 1142 * mmap_lock in read mode. 1143 */ 1144 if (mmap_write_lock_killable(mm)) 1145 return false; 1146 1147 cprm->vma_data_size = 0; 1148 gate_vma = get_gate_vma(mm); 1149 cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0); 1150 1151 cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL); 1152 if (!cprm->vma_meta) { 1153 mmap_write_unlock(mm); 1154 return false; 1155 } 1156 1157 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 1158 vma = next_vma(vma, gate_vma), i++) { 1159 struct core_vma_metadata *m = cprm->vma_meta + i; 1160 1161 m->start = vma->vm_start; 1162 m->end = vma->vm_end; 1163 m->flags = vma->vm_flags; 1164 m->dump_size = vma_dump_size(vma, cprm->mm_flags); 1165 m->pgoff = vma->vm_pgoff; 1166 1167 m->file = vma->vm_file; 1168 if (m->file) 1169 get_file(m->file); 1170 } 1171 1172 mmap_write_unlock(mm); 1173 1174 for (i = 0; i < cprm->vma_count; i++) { 1175 struct core_vma_metadata *m = cprm->vma_meta + i; 1176 1177 if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) { 1178 char elfmag[SELFMAG]; 1179 1180 if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) || 1181 memcmp(elfmag, ELFMAG, SELFMAG) != 0) { 1182 m->dump_size = 0; 1183 } else { 1184 m->dump_size = PAGE_SIZE; 1185 } 1186 } 1187 1188 cprm->vma_data_size += m->dump_size; 1189 } 1190 1191 return true; 1192 } 1193