1 #include <linux/slab.h> 2 #include <linux/file.h> 3 #include <linux/fdtable.h> 4 #include <linux/mm.h> 5 #include <linux/stat.h> 6 #include <linux/fcntl.h> 7 #include <linux/swap.h> 8 #include <linux/string.h> 9 #include <linux/init.h> 10 #include <linux/pagemap.h> 11 #include <linux/perf_event.h> 12 #include <linux/highmem.h> 13 #include <linux/spinlock.h> 14 #include <linux/key.h> 15 #include <linux/personality.h> 16 #include <linux/binfmts.h> 17 #include <linux/coredump.h> 18 #include <linux/utsname.h> 19 #include <linux/pid_namespace.h> 20 #include <linux/module.h> 21 #include <linux/namei.h> 22 #include <linux/mount.h> 23 #include <linux/security.h> 24 #include <linux/syscalls.h> 25 #include <linux/tsacct_kern.h> 26 #include <linux/cn_proc.h> 27 #include <linux/audit.h> 28 #include <linux/tracehook.h> 29 #include <linux/kmod.h> 30 #include <linux/fsnotify.h> 31 #include <linux/fs_struct.h> 32 #include <linux/pipe_fs_i.h> 33 #include <linux/oom.h> 34 #include <linux/compat.h> 35 36 #include <asm/uaccess.h> 37 #include <asm/mmu_context.h> 38 #include <asm/tlb.h> 39 #include <asm/exec.h> 40 41 #include <trace/events/task.h> 42 #include "internal.h" 43 44 #include <trace/events/sched.h> 45 46 int core_uses_pid; 47 unsigned int core_pipe_limit; 48 char core_pattern[CORENAME_MAX_SIZE] = "core"; 49 static int core_name_size = CORENAME_MAX_SIZE; 50 51 struct core_name { 52 char *corename; 53 int used, size; 54 }; 55 56 /* The maximal length of core_pattern is also specified in sysctl.c */ 57 58 static int expand_corename(struct core_name *cn, int size) 59 { 60 char *corename = krealloc(cn->corename, size, GFP_KERNEL); 61 62 if (!corename) 63 return -ENOMEM; 64 65 if (size > core_name_size) /* racy but harmless */ 66 core_name_size = size; 67 68 cn->size = ksize(corename); 69 cn->corename = corename; 70 return 0; 71 } 72 73 static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) 74 { 75 int free, need; 76 va_list arg_copy; 77 78 again: 79 free = cn->size - cn->used; 80 81 va_copy(arg_copy, arg); 82 need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy); 83 va_end(arg_copy); 84 85 if (need < free) { 86 cn->used += need; 87 return 0; 88 } 89 90 if (!expand_corename(cn, cn->size + need - free + 1)) 91 goto again; 92 93 return -ENOMEM; 94 } 95 96 static int cn_printf(struct core_name *cn, const char *fmt, ...) 97 { 98 va_list arg; 99 int ret; 100 101 va_start(arg, fmt); 102 ret = cn_vprintf(cn, fmt, arg); 103 va_end(arg); 104 105 return ret; 106 } 107 108 static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) 109 { 110 int cur = cn->used; 111 va_list arg; 112 int ret; 113 114 va_start(arg, fmt); 115 ret = cn_vprintf(cn, fmt, arg); 116 va_end(arg); 117 118 for (; cur < cn->used; ++cur) { 119 if (cn->corename[cur] == '/') 120 cn->corename[cur] = '!'; 121 } 122 return ret; 123 } 124 125 static int cn_print_exe_file(struct core_name *cn) 126 { 127 struct file *exe_file; 128 char *pathbuf, *path; 129 int ret; 130 131 exe_file = get_mm_exe_file(current->mm); 132 if (!exe_file) 133 return cn_esc_printf(cn, "%s (path unknown)", current->comm); 134 135 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); 136 if (!pathbuf) { 137 ret = -ENOMEM; 138 goto put_exe_file; 139 } 140 141 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); 142 if (IS_ERR(path)) { 143 ret = PTR_ERR(path); 144 goto free_buf; 145 } 146 147 ret = cn_esc_printf(cn, "%s", path); 148 149 free_buf: 150 kfree(pathbuf); 151 put_exe_file: 152 fput(exe_file); 153 return ret; 154 } 155 156 /* format_corename will inspect the pattern parameter, and output a 157 * name into corename, which must have space for at least 158 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 159 */ 160 static int format_corename(struct core_name *cn, struct coredump_params *cprm) 161 { 162 const struct cred *cred = current_cred(); 163 const char *pat_ptr = core_pattern; 164 int ispipe = (*pat_ptr == '|'); 165 int pid_in_pattern = 0; 166 int err = 0; 167 168 cn->used = 0; 169 cn->corename = NULL; 170 if (expand_corename(cn, core_name_size)) 171 return -ENOMEM; 172 cn->corename[0] = '\0'; 173 174 if (ispipe) 175 ++pat_ptr; 176 177 /* Repeat as long as we have more pattern to process and more output 178 space */ 179 while (*pat_ptr) { 180 if (*pat_ptr != '%') { 181 err = cn_printf(cn, "%c", *pat_ptr++); 182 } else { 183 switch (*++pat_ptr) { 184 /* single % at the end, drop that */ 185 case 0: 186 goto out; 187 /* Double percent, output one percent */ 188 case '%': 189 err = cn_printf(cn, "%c", '%'); 190 break; 191 /* pid */ 192 case 'p': 193 pid_in_pattern = 1; 194 err = cn_printf(cn, "%d", 195 task_tgid_vnr(current)); 196 break; 197 /* global pid */ 198 case 'P': 199 err = cn_printf(cn, "%d", 200 task_tgid_nr(current)); 201 break; 202 case 'i': 203 err = cn_printf(cn, "%d", 204 task_pid_vnr(current)); 205 break; 206 case 'I': 207 err = cn_printf(cn, "%d", 208 task_pid_nr(current)); 209 break; 210 /* uid */ 211 case 'u': 212 err = cn_printf(cn, "%d", cred->uid); 213 break; 214 /* gid */ 215 case 'g': 216 err = cn_printf(cn, "%d", cred->gid); 217 break; 218 case 'd': 219 err = cn_printf(cn, "%d", 220 __get_dumpable(cprm->mm_flags)); 221 break; 222 /* signal that caused the coredump */ 223 case 's': 224 err = cn_printf(cn, "%ld", cprm->siginfo->si_signo); 225 break; 226 /* UNIX time of coredump */ 227 case 't': { 228 struct timeval tv; 229 do_gettimeofday(&tv); 230 err = cn_printf(cn, "%lu", tv.tv_sec); 231 break; 232 } 233 /* hostname */ 234 case 'h': 235 down_read(&uts_sem); 236 err = cn_esc_printf(cn, "%s", 237 utsname()->nodename); 238 up_read(&uts_sem); 239 break; 240 /* executable */ 241 case 'e': 242 err = cn_esc_printf(cn, "%s", current->comm); 243 break; 244 case 'E': 245 err = cn_print_exe_file(cn); 246 break; 247 /* core limit size */ 248 case 'c': 249 err = cn_printf(cn, "%lu", 250 rlimit(RLIMIT_CORE)); 251 break; 252 default: 253 break; 254 } 255 ++pat_ptr; 256 } 257 258 if (err) 259 return err; 260 } 261 262 out: 263 /* Backward compatibility with core_uses_pid: 264 * 265 * If core_pattern does not include a %p (as is the default) 266 * and core_uses_pid is set, then .%pid will be appended to 267 * the filename. Do not do this for piped commands. */ 268 if (!ispipe && !pid_in_pattern && core_uses_pid) { 269 err = cn_printf(cn, ".%d", task_tgid_vnr(current)); 270 if (err) 271 return err; 272 } 273 return ispipe; 274 } 275 276 static int zap_process(struct task_struct *start, int exit_code) 277 { 278 struct task_struct *t; 279 int nr = 0; 280 281 start->signal->group_exit_code = exit_code; 282 start->signal->group_stop_count = 0; 283 284 t = start; 285 do { 286 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 287 if (t != current && t->mm) { 288 sigaddset(&t->pending.signal, SIGKILL); 289 signal_wake_up(t, 1); 290 nr++; 291 } 292 } while_each_thread(start, t); 293 294 return nr; 295 } 296 297 static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 298 struct core_state *core_state, int exit_code) 299 { 300 struct task_struct *g, *p; 301 unsigned long flags; 302 int nr = -EAGAIN; 303 304 spin_lock_irq(&tsk->sighand->siglock); 305 if (!signal_group_exit(tsk->signal)) { 306 mm->core_state = core_state; 307 nr = zap_process(tsk, exit_code); 308 tsk->signal->group_exit_task = tsk; 309 /* ignore all signals except SIGKILL, see prepare_signal() */ 310 tsk->signal->flags = SIGNAL_GROUP_COREDUMP; 311 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 312 } 313 spin_unlock_irq(&tsk->sighand->siglock); 314 if (unlikely(nr < 0)) 315 return nr; 316 317 tsk->flags |= PF_DUMPCORE; 318 if (atomic_read(&mm->mm_users) == nr + 1) 319 goto done; 320 /* 321 * We should find and kill all tasks which use this mm, and we should 322 * count them correctly into ->nr_threads. We don't take tasklist 323 * lock, but this is safe wrt: 324 * 325 * fork: 326 * None of sub-threads can fork after zap_process(leader). All 327 * processes which were created before this point should be 328 * visible to zap_threads() because copy_process() adds the new 329 * process to the tail of init_task.tasks list, and lock/unlock 330 * of ->siglock provides a memory barrier. 331 * 332 * do_exit: 333 * The caller holds mm->mmap_sem. This means that the task which 334 * uses this mm can't pass exit_mm(), so it can't exit or clear 335 * its ->mm. 336 * 337 * de_thread: 338 * It does list_replace_rcu(&leader->tasks, ¤t->tasks), 339 * we must see either old or new leader, this does not matter. 340 * However, it can change p->sighand, so lock_task_sighand(p) 341 * must be used. Since p->mm != NULL and we hold ->mmap_sem 342 * it can't fail. 343 * 344 * Note also that "g" can be the old leader with ->mm == NULL 345 * and already unhashed and thus removed from ->thread_group. 346 * This is OK, __unhash_process()->list_del_rcu() does not 347 * clear the ->next pointer, we will find the new leader via 348 * next_thread(). 349 */ 350 rcu_read_lock(); 351 for_each_process(g) { 352 if (g == tsk->group_leader) 353 continue; 354 if (g->flags & PF_KTHREAD) 355 continue; 356 p = g; 357 do { 358 if (p->mm) { 359 if (unlikely(p->mm == mm)) { 360 lock_task_sighand(p, &flags); 361 nr += zap_process(p, exit_code); 362 p->signal->flags = SIGNAL_GROUP_EXIT; 363 unlock_task_sighand(p, &flags); 364 } 365 break; 366 } 367 } while_each_thread(g, p); 368 } 369 rcu_read_unlock(); 370 done: 371 atomic_set(&core_state->nr_threads, nr); 372 return nr; 373 } 374 375 static int coredump_wait(int exit_code, struct core_state *core_state) 376 { 377 struct task_struct *tsk = current; 378 struct mm_struct *mm = tsk->mm; 379 int core_waiters = -EBUSY; 380 381 init_completion(&core_state->startup); 382 core_state->dumper.task = tsk; 383 core_state->dumper.next = NULL; 384 385 down_write(&mm->mmap_sem); 386 if (!mm->core_state) 387 core_waiters = zap_threads(tsk, mm, core_state, exit_code); 388 up_write(&mm->mmap_sem); 389 390 if (core_waiters > 0) { 391 struct core_thread *ptr; 392 393 wait_for_completion(&core_state->startup); 394 /* 395 * Wait for all the threads to become inactive, so that 396 * all the thread context (extended register state, like 397 * fpu etc) gets copied to the memory. 398 */ 399 ptr = core_state->dumper.next; 400 while (ptr != NULL) { 401 wait_task_inactive(ptr->task, 0); 402 ptr = ptr->next; 403 } 404 } 405 406 return core_waiters; 407 } 408 409 static void coredump_finish(struct mm_struct *mm, bool core_dumped) 410 { 411 struct core_thread *curr, *next; 412 struct task_struct *task; 413 414 spin_lock_irq(¤t->sighand->siglock); 415 if (core_dumped && !__fatal_signal_pending(current)) 416 current->signal->group_exit_code |= 0x80; 417 current->signal->group_exit_task = NULL; 418 current->signal->flags = SIGNAL_GROUP_EXIT; 419 spin_unlock_irq(¤t->sighand->siglock); 420 421 next = mm->core_state->dumper.next; 422 while ((curr = next) != NULL) { 423 next = curr->next; 424 task = curr->task; 425 /* 426 * see exit_mm(), curr->task must not see 427 * ->task == NULL before we read ->next. 428 */ 429 smp_mb(); 430 curr->task = NULL; 431 wake_up_process(task); 432 } 433 434 mm->core_state = NULL; 435 } 436 437 static bool dump_interrupted(void) 438 { 439 /* 440 * SIGKILL or freezing() interrupt the coredumping. Perhaps we 441 * can do try_to_freeze() and check __fatal_signal_pending(), 442 * but then we need to teach dump_write() to restart and clear 443 * TIF_SIGPENDING. 444 */ 445 return signal_pending(current); 446 } 447 448 static void wait_for_dump_helpers(struct file *file) 449 { 450 struct pipe_inode_info *pipe = file->private_data; 451 452 pipe_lock(pipe); 453 pipe->readers++; 454 pipe->writers--; 455 wake_up_interruptible_sync(&pipe->wait); 456 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 457 pipe_unlock(pipe); 458 459 /* 460 * We actually want wait_event_freezable() but then we need 461 * to clear TIF_SIGPENDING and improve dump_interrupted(). 462 */ 463 wait_event_interruptible(pipe->wait, pipe->readers == 1); 464 465 pipe_lock(pipe); 466 pipe->readers--; 467 pipe->writers++; 468 pipe_unlock(pipe); 469 } 470 471 /* 472 * umh_pipe_setup 473 * helper function to customize the process used 474 * to collect the core in userspace. Specifically 475 * it sets up a pipe and installs it as fd 0 (stdin) 476 * for the process. Returns 0 on success, or 477 * PTR_ERR on failure. 478 * Note that it also sets the core limit to 1. This 479 * is a special value that we use to trap recursive 480 * core dumps 481 */ 482 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 483 { 484 struct file *files[2]; 485 struct coredump_params *cp = (struct coredump_params *)info->data; 486 int err = create_pipe_files(files, 0); 487 if (err) 488 return err; 489 490 cp->file = files[1]; 491 492 err = replace_fd(0, files[0], 0); 493 fput(files[0]); 494 /* and disallow core files too */ 495 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 496 497 return err; 498 } 499 500 void do_coredump(const siginfo_t *siginfo) 501 { 502 struct core_state core_state; 503 struct core_name cn; 504 struct mm_struct *mm = current->mm; 505 struct linux_binfmt * binfmt; 506 const struct cred *old_cred; 507 struct cred *cred; 508 int retval = 0; 509 int flag = 0; 510 int ispipe; 511 struct files_struct *displaced; 512 bool need_nonrelative = false; 513 bool core_dumped = false; 514 static atomic_t core_dump_count = ATOMIC_INIT(0); 515 struct coredump_params cprm = { 516 .siginfo = siginfo, 517 .regs = signal_pt_regs(), 518 .limit = rlimit(RLIMIT_CORE), 519 /* 520 * We must use the same mm->flags while dumping core to avoid 521 * inconsistency of bit flags, since this flag is not protected 522 * by any locks. 523 */ 524 .mm_flags = mm->flags, 525 }; 526 527 audit_core_dumps(siginfo->si_signo); 528 529 binfmt = mm->binfmt; 530 if (!binfmt || !binfmt->core_dump) 531 goto fail; 532 if (!__get_dumpable(cprm.mm_flags)) 533 goto fail; 534 535 cred = prepare_creds(); 536 if (!cred) 537 goto fail; 538 /* 539 * We cannot trust fsuid as being the "true" uid of the process 540 * nor do we know its entire history. We only know it was tainted 541 * so we dump it as root in mode 2, and only into a controlled 542 * environment (pipe handler or fully qualified path). 543 */ 544 if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { 545 /* Setuid core dump mode */ 546 flag = O_EXCL; /* Stop rewrite attacks */ 547 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 548 need_nonrelative = true; 549 } 550 551 retval = coredump_wait(siginfo->si_signo, &core_state); 552 if (retval < 0) 553 goto fail_creds; 554 555 old_cred = override_creds(cred); 556 557 ispipe = format_corename(&cn, &cprm); 558 559 if (ispipe) { 560 int dump_count; 561 char **helper_argv; 562 struct subprocess_info *sub_info; 563 564 if (ispipe < 0) { 565 printk(KERN_WARNING "format_corename failed\n"); 566 printk(KERN_WARNING "Aborting core\n"); 567 goto fail_unlock; 568 } 569 570 if (cprm.limit == 1) { 571 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. 572 * 573 * Normally core limits are irrelevant to pipes, since 574 * we're not writing to the file system, but we use 575 * cprm.limit of 1 here as a special value, this is a 576 * consistent way to catch recursive crashes. 577 * We can still crash if the core_pattern binary sets 578 * RLIM_CORE = !1, but it runs as root, and can do 579 * lots of stupid things. 580 * 581 * Note that we use task_tgid_vnr here to grab the pid 582 * of the process group leader. That way we get the 583 * right pid if a thread in a multi-threaded 584 * core_pattern process dies. 585 */ 586 printk(KERN_WARNING 587 "Process %d(%s) has RLIMIT_CORE set to 1\n", 588 task_tgid_vnr(current), current->comm); 589 printk(KERN_WARNING "Aborting core\n"); 590 goto fail_unlock; 591 } 592 cprm.limit = RLIM_INFINITY; 593 594 dump_count = atomic_inc_return(&core_dump_count); 595 if (core_pipe_limit && (core_pipe_limit < dump_count)) { 596 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", 597 task_tgid_vnr(current), current->comm); 598 printk(KERN_WARNING "Skipping core dump\n"); 599 goto fail_dropcount; 600 } 601 602 helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); 603 if (!helper_argv) { 604 printk(KERN_WARNING "%s failed to allocate memory\n", 605 __func__); 606 goto fail_dropcount; 607 } 608 609 retval = -ENOMEM; 610 sub_info = call_usermodehelper_setup(helper_argv[0], 611 helper_argv, NULL, GFP_KERNEL, 612 umh_pipe_setup, NULL, &cprm); 613 if (sub_info) 614 retval = call_usermodehelper_exec(sub_info, 615 UMH_WAIT_EXEC); 616 617 argv_free(helper_argv); 618 if (retval) { 619 printk(KERN_INFO "Core dump to |%s pipe failed\n", 620 cn.corename); 621 goto close_fail; 622 } 623 } else { 624 struct inode *inode; 625 626 if (cprm.limit < binfmt->min_coredump) 627 goto fail_unlock; 628 629 if (need_nonrelative && cn.corename[0] != '/') { 630 printk(KERN_WARNING "Pid %d(%s) can only dump core "\ 631 "to fully qualified path!\n", 632 task_tgid_vnr(current), current->comm); 633 printk(KERN_WARNING "Skipping core dump\n"); 634 goto fail_unlock; 635 } 636 637 cprm.file = filp_open(cn.corename, 638 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 639 0600); 640 if (IS_ERR(cprm.file)) 641 goto fail_unlock; 642 643 inode = file_inode(cprm.file); 644 if (inode->i_nlink > 1) 645 goto close_fail; 646 if (d_unhashed(cprm.file->f_path.dentry)) 647 goto close_fail; 648 /* 649 * AK: actually i see no reason to not allow this for named 650 * pipes etc, but keep the previous behaviour for now. 651 */ 652 if (!S_ISREG(inode->i_mode)) 653 goto close_fail; 654 /* 655 * Dont allow local users get cute and trick others to coredump 656 * into their pre-created files. 657 */ 658 if (!uid_eq(inode->i_uid, current_fsuid())) 659 goto close_fail; 660 if (!cprm.file->f_op->write) 661 goto close_fail; 662 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 663 goto close_fail; 664 } 665 666 /* get us an unshared descriptor table; almost always a no-op */ 667 retval = unshare_files(&displaced); 668 if (retval) 669 goto close_fail; 670 if (displaced) 671 put_files_struct(displaced); 672 if (!dump_interrupted()) { 673 file_start_write(cprm.file); 674 core_dumped = binfmt->core_dump(&cprm); 675 file_end_write(cprm.file); 676 } 677 if (ispipe && core_pipe_limit) 678 wait_for_dump_helpers(cprm.file); 679 close_fail: 680 if (cprm.file) 681 filp_close(cprm.file, NULL); 682 fail_dropcount: 683 if (ispipe) 684 atomic_dec(&core_dump_count); 685 fail_unlock: 686 kfree(cn.corename); 687 coredump_finish(mm, core_dumped); 688 revert_creds(old_cred); 689 fail_creds: 690 put_cred(cred); 691 fail: 692 return; 693 } 694 695 /* 696 * Core dumping helper functions. These are the only things you should 697 * do on a core-file: use only these functions to write out all the 698 * necessary info. 699 */ 700 int dump_emit(struct coredump_params *cprm, const void *addr, int nr) 701 { 702 struct file *file = cprm->file; 703 loff_t pos = file->f_pos; 704 ssize_t n; 705 if (cprm->written + nr > cprm->limit) 706 return 0; 707 while (nr) { 708 if (dump_interrupted()) 709 return 0; 710 n = __kernel_write(file, addr, nr, &pos); 711 if (n <= 0) 712 return 0; 713 file->f_pos = pos; 714 cprm->written += n; 715 nr -= n; 716 } 717 return 1; 718 } 719 EXPORT_SYMBOL(dump_emit); 720 721 int dump_skip(struct coredump_params *cprm, size_t nr) 722 { 723 static char zeroes[PAGE_SIZE]; 724 struct file *file = cprm->file; 725 if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 726 if (cprm->written + nr > cprm->limit) 727 return 0; 728 if (dump_interrupted() || 729 file->f_op->llseek(file, nr, SEEK_CUR) < 0) 730 return 0; 731 cprm->written += nr; 732 return 1; 733 } else { 734 while (nr > PAGE_SIZE) { 735 if (!dump_emit(cprm, zeroes, PAGE_SIZE)) 736 return 0; 737 nr -= PAGE_SIZE; 738 } 739 return dump_emit(cprm, zeroes, nr); 740 } 741 } 742 EXPORT_SYMBOL(dump_skip); 743 744 int dump_align(struct coredump_params *cprm, int align) 745 { 746 unsigned mod = cprm->written & (align - 1); 747 if (align & (align - 1)) 748 return 0; 749 return mod ? dump_skip(cprm, align - mod) : 1; 750 } 751 EXPORT_SYMBOL(dump_align); 752