1 #include <linux/slab.h> 2 #include <linux/file.h> 3 #include <linux/fdtable.h> 4 #include <linux/mm.h> 5 #include <linux/stat.h> 6 #include <linux/fcntl.h> 7 #include <linux/swap.h> 8 #include <linux/string.h> 9 #include <linux/init.h> 10 #include <linux/pagemap.h> 11 #include <linux/perf_event.h> 12 #include <linux/highmem.h> 13 #include <linux/spinlock.h> 14 #include <linux/key.h> 15 #include <linux/personality.h> 16 #include <linux/binfmts.h> 17 #include <linux/coredump.h> 18 #include <linux/utsname.h> 19 #include <linux/pid_namespace.h> 20 #include <linux/module.h> 21 #include <linux/namei.h> 22 #include <linux/mount.h> 23 #include <linux/security.h> 24 #include <linux/syscalls.h> 25 #include <linux/tsacct_kern.h> 26 #include <linux/cn_proc.h> 27 #include <linux/audit.h> 28 #include <linux/tracehook.h> 29 #include <linux/kmod.h> 30 #include <linux/fsnotify.h> 31 #include <linux/fs_struct.h> 32 #include <linux/pipe_fs_i.h> 33 #include <linux/oom.h> 34 #include <linux/compat.h> 35 36 #include <asm/uaccess.h> 37 #include <asm/mmu_context.h> 38 #include <asm/tlb.h> 39 #include <asm/exec.h> 40 41 #include <trace/events/task.h> 42 #include "internal.h" 43 44 #include <trace/events/sched.h> 45 46 int core_uses_pid; 47 unsigned int core_pipe_limit; 48 char core_pattern[CORENAME_MAX_SIZE] = "core"; 49 static int core_name_size = CORENAME_MAX_SIZE; 50 51 struct core_name { 52 char *corename; 53 int used, size; 54 }; 55 56 /* The maximal length of core_pattern is also specified in sysctl.c */ 57 58 static int expand_corename(struct core_name *cn, int size) 59 { 60 char *corename = krealloc(cn->corename, size, GFP_KERNEL); 61 62 if (!corename) 63 return -ENOMEM; 64 65 if (size > core_name_size) /* racy but harmless */ 66 core_name_size = size; 67 68 cn->size = ksize(corename); 69 cn->corename = corename; 70 return 0; 71 } 72 73 static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) 74 { 75 int free, need; 76 va_list arg_copy; 77 78 again: 79 free = cn->size - cn->used; 80 81 va_copy(arg_copy, arg); 82 need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy); 83 va_end(arg_copy); 84 85 if (need < free) { 86 cn->used += need; 87 return 0; 88 } 89 90 if (!expand_corename(cn, cn->size + need - free + 1)) 91 goto again; 92 93 return -ENOMEM; 94 } 95 96 static int cn_printf(struct core_name *cn, const char *fmt, ...) 97 { 98 va_list arg; 99 int ret; 100 101 va_start(arg, fmt); 102 ret = cn_vprintf(cn, fmt, arg); 103 va_end(arg); 104 105 return ret; 106 } 107 108 static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) 109 { 110 int cur = cn->used; 111 va_list arg; 112 int ret; 113 114 va_start(arg, fmt); 115 ret = cn_vprintf(cn, fmt, arg); 116 va_end(arg); 117 118 for (; cur < cn->used; ++cur) { 119 if (cn->corename[cur] == '/') 120 cn->corename[cur] = '!'; 121 } 122 return ret; 123 } 124 125 static int cn_print_exe_file(struct core_name *cn) 126 { 127 struct file *exe_file; 128 char *pathbuf, *path; 129 int ret; 130 131 exe_file = get_mm_exe_file(current->mm); 132 if (!exe_file) 133 return cn_esc_printf(cn, "%s (path unknown)", current->comm); 134 135 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); 136 if (!pathbuf) { 137 ret = -ENOMEM; 138 goto put_exe_file; 139 } 140 141 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); 142 if (IS_ERR(path)) { 143 ret = PTR_ERR(path); 144 goto free_buf; 145 } 146 147 ret = cn_esc_printf(cn, "%s", path); 148 149 free_buf: 150 kfree(pathbuf); 151 put_exe_file: 152 fput(exe_file); 153 return ret; 154 } 155 156 /* format_corename will inspect the pattern parameter, and output a 157 * name into corename, which must have space for at least 158 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 159 */ 160 static int format_corename(struct core_name *cn, struct coredump_params *cprm) 161 { 162 const struct cred *cred = current_cred(); 163 const char *pat_ptr = core_pattern; 164 int ispipe = (*pat_ptr == '|'); 165 int pid_in_pattern = 0; 166 int err = 0; 167 168 cn->used = 0; 169 cn->corename = NULL; 170 if (expand_corename(cn, core_name_size)) 171 return -ENOMEM; 172 cn->corename[0] = '\0'; 173 174 if (ispipe) 175 ++pat_ptr; 176 177 /* Repeat as long as we have more pattern to process and more output 178 space */ 179 while (*pat_ptr) { 180 if (*pat_ptr != '%') { 181 err = cn_printf(cn, "%c", *pat_ptr++); 182 } else { 183 switch (*++pat_ptr) { 184 /* single % at the end, drop that */ 185 case 0: 186 goto out; 187 /* Double percent, output one percent */ 188 case '%': 189 err = cn_printf(cn, "%c", '%'); 190 break; 191 /* pid */ 192 case 'p': 193 pid_in_pattern = 1; 194 err = cn_printf(cn, "%d", 195 task_tgid_vnr(current)); 196 break; 197 /* global pid */ 198 case 'P': 199 err = cn_printf(cn, "%d", 200 task_tgid_nr(current)); 201 break; 202 /* uid */ 203 case 'u': 204 err = cn_printf(cn, "%d", cred->uid); 205 break; 206 /* gid */ 207 case 'g': 208 err = cn_printf(cn, "%d", cred->gid); 209 break; 210 case 'd': 211 err = cn_printf(cn, "%d", 212 __get_dumpable(cprm->mm_flags)); 213 break; 214 /* signal that caused the coredump */ 215 case 's': 216 err = cn_printf(cn, "%ld", cprm->siginfo->si_signo); 217 break; 218 /* UNIX time of coredump */ 219 case 't': { 220 struct timeval tv; 221 do_gettimeofday(&tv); 222 err = cn_printf(cn, "%lu", tv.tv_sec); 223 break; 224 } 225 /* hostname */ 226 case 'h': 227 down_read(&uts_sem); 228 err = cn_esc_printf(cn, "%s", 229 utsname()->nodename); 230 up_read(&uts_sem); 231 break; 232 /* executable */ 233 case 'e': 234 err = cn_esc_printf(cn, "%s", current->comm); 235 break; 236 case 'E': 237 err = cn_print_exe_file(cn); 238 break; 239 /* core limit size */ 240 case 'c': 241 err = cn_printf(cn, "%lu", 242 rlimit(RLIMIT_CORE)); 243 break; 244 default: 245 break; 246 } 247 ++pat_ptr; 248 } 249 250 if (err) 251 return err; 252 } 253 254 out: 255 /* Backward compatibility with core_uses_pid: 256 * 257 * If core_pattern does not include a %p (as is the default) 258 * and core_uses_pid is set, then .%pid will be appended to 259 * the filename. Do not do this for piped commands. */ 260 if (!ispipe && !pid_in_pattern && core_uses_pid) { 261 err = cn_printf(cn, ".%d", task_tgid_vnr(current)); 262 if (err) 263 return err; 264 } 265 return ispipe; 266 } 267 268 static int zap_process(struct task_struct *start, int exit_code) 269 { 270 struct task_struct *t; 271 int nr = 0; 272 273 start->signal->group_exit_code = exit_code; 274 start->signal->group_stop_count = 0; 275 276 t = start; 277 do { 278 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 279 if (t != current && t->mm) { 280 sigaddset(&t->pending.signal, SIGKILL); 281 signal_wake_up(t, 1); 282 nr++; 283 } 284 } while_each_thread(start, t); 285 286 return nr; 287 } 288 289 static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 290 struct core_state *core_state, int exit_code) 291 { 292 struct task_struct *g, *p; 293 unsigned long flags; 294 int nr = -EAGAIN; 295 296 spin_lock_irq(&tsk->sighand->siglock); 297 if (!signal_group_exit(tsk->signal)) { 298 mm->core_state = core_state; 299 nr = zap_process(tsk, exit_code); 300 tsk->signal->group_exit_task = tsk; 301 /* ignore all signals except SIGKILL, see prepare_signal() */ 302 tsk->signal->flags = SIGNAL_GROUP_COREDUMP; 303 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 304 } 305 spin_unlock_irq(&tsk->sighand->siglock); 306 if (unlikely(nr < 0)) 307 return nr; 308 309 tsk->flags = PF_DUMPCORE; 310 if (atomic_read(&mm->mm_users) == nr + 1) 311 goto done; 312 /* 313 * We should find and kill all tasks which use this mm, and we should 314 * count them correctly into ->nr_threads. We don't take tasklist 315 * lock, but this is safe wrt: 316 * 317 * fork: 318 * None of sub-threads can fork after zap_process(leader). All 319 * processes which were created before this point should be 320 * visible to zap_threads() because copy_process() adds the new 321 * process to the tail of init_task.tasks list, and lock/unlock 322 * of ->siglock provides a memory barrier. 323 * 324 * do_exit: 325 * The caller holds mm->mmap_sem. This means that the task which 326 * uses this mm can't pass exit_mm(), so it can't exit or clear 327 * its ->mm. 328 * 329 * de_thread: 330 * It does list_replace_rcu(&leader->tasks, ¤t->tasks), 331 * we must see either old or new leader, this does not matter. 332 * However, it can change p->sighand, so lock_task_sighand(p) 333 * must be used. Since p->mm != NULL and we hold ->mmap_sem 334 * it can't fail. 335 * 336 * Note also that "g" can be the old leader with ->mm == NULL 337 * and already unhashed and thus removed from ->thread_group. 338 * This is OK, __unhash_process()->list_del_rcu() does not 339 * clear the ->next pointer, we will find the new leader via 340 * next_thread(). 341 */ 342 rcu_read_lock(); 343 for_each_process(g) { 344 if (g == tsk->group_leader) 345 continue; 346 if (g->flags & PF_KTHREAD) 347 continue; 348 p = g; 349 do { 350 if (p->mm) { 351 if (unlikely(p->mm == mm)) { 352 lock_task_sighand(p, &flags); 353 nr += zap_process(p, exit_code); 354 p->signal->flags = SIGNAL_GROUP_EXIT; 355 unlock_task_sighand(p, &flags); 356 } 357 break; 358 } 359 } while_each_thread(g, p); 360 } 361 rcu_read_unlock(); 362 done: 363 atomic_set(&core_state->nr_threads, nr); 364 return nr; 365 } 366 367 static int coredump_wait(int exit_code, struct core_state *core_state) 368 { 369 struct task_struct *tsk = current; 370 struct mm_struct *mm = tsk->mm; 371 int core_waiters = -EBUSY; 372 373 init_completion(&core_state->startup); 374 core_state->dumper.task = tsk; 375 core_state->dumper.next = NULL; 376 377 down_write(&mm->mmap_sem); 378 if (!mm->core_state) 379 core_waiters = zap_threads(tsk, mm, core_state, exit_code); 380 up_write(&mm->mmap_sem); 381 382 if (core_waiters > 0) { 383 struct core_thread *ptr; 384 385 wait_for_completion(&core_state->startup); 386 /* 387 * Wait for all the threads to become inactive, so that 388 * all the thread context (extended register state, like 389 * fpu etc) gets copied to the memory. 390 */ 391 ptr = core_state->dumper.next; 392 while (ptr != NULL) { 393 wait_task_inactive(ptr->task, 0); 394 ptr = ptr->next; 395 } 396 } 397 398 return core_waiters; 399 } 400 401 static void coredump_finish(struct mm_struct *mm, bool core_dumped) 402 { 403 struct core_thread *curr, *next; 404 struct task_struct *task; 405 406 spin_lock_irq(¤t->sighand->siglock); 407 if (core_dumped && !__fatal_signal_pending(current)) 408 current->signal->group_exit_code |= 0x80; 409 current->signal->group_exit_task = NULL; 410 current->signal->flags = SIGNAL_GROUP_EXIT; 411 spin_unlock_irq(¤t->sighand->siglock); 412 413 next = mm->core_state->dumper.next; 414 while ((curr = next) != NULL) { 415 next = curr->next; 416 task = curr->task; 417 /* 418 * see exit_mm(), curr->task must not see 419 * ->task == NULL before we read ->next. 420 */ 421 smp_mb(); 422 curr->task = NULL; 423 wake_up_process(task); 424 } 425 426 mm->core_state = NULL; 427 } 428 429 static bool dump_interrupted(void) 430 { 431 /* 432 * SIGKILL or freezing() interrupt the coredumping. Perhaps we 433 * can do try_to_freeze() and check __fatal_signal_pending(), 434 * but then we need to teach dump_write() to restart and clear 435 * TIF_SIGPENDING. 436 */ 437 return signal_pending(current); 438 } 439 440 static void wait_for_dump_helpers(struct file *file) 441 { 442 struct pipe_inode_info *pipe = file->private_data; 443 444 pipe_lock(pipe); 445 pipe->readers++; 446 pipe->writers--; 447 wake_up_interruptible_sync(&pipe->wait); 448 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 449 pipe_unlock(pipe); 450 451 /* 452 * We actually want wait_event_freezable() but then we need 453 * to clear TIF_SIGPENDING and improve dump_interrupted(). 454 */ 455 wait_event_interruptible(pipe->wait, pipe->readers == 1); 456 457 pipe_lock(pipe); 458 pipe->readers--; 459 pipe->writers++; 460 pipe_unlock(pipe); 461 } 462 463 /* 464 * umh_pipe_setup 465 * helper function to customize the process used 466 * to collect the core in userspace. Specifically 467 * it sets up a pipe and installs it as fd 0 (stdin) 468 * for the process. Returns 0 on success, or 469 * PTR_ERR on failure. 470 * Note that it also sets the core limit to 1. This 471 * is a special value that we use to trap recursive 472 * core dumps 473 */ 474 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 475 { 476 struct file *files[2]; 477 struct coredump_params *cp = (struct coredump_params *)info->data; 478 int err = create_pipe_files(files, 0); 479 if (err) 480 return err; 481 482 cp->file = files[1]; 483 484 err = replace_fd(0, files[0], 0); 485 fput(files[0]); 486 /* and disallow core files too */ 487 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 488 489 return err; 490 } 491 492 void do_coredump(const siginfo_t *siginfo) 493 { 494 struct core_state core_state; 495 struct core_name cn; 496 struct mm_struct *mm = current->mm; 497 struct linux_binfmt * binfmt; 498 const struct cred *old_cred; 499 struct cred *cred; 500 int retval = 0; 501 int flag = 0; 502 int ispipe; 503 struct files_struct *displaced; 504 bool need_nonrelative = false; 505 bool core_dumped = false; 506 static atomic_t core_dump_count = ATOMIC_INIT(0); 507 struct coredump_params cprm = { 508 .siginfo = siginfo, 509 .regs = signal_pt_regs(), 510 .limit = rlimit(RLIMIT_CORE), 511 /* 512 * We must use the same mm->flags while dumping core to avoid 513 * inconsistency of bit flags, since this flag is not protected 514 * by any locks. 515 */ 516 .mm_flags = mm->flags, 517 }; 518 519 audit_core_dumps(siginfo->si_signo); 520 521 binfmt = mm->binfmt; 522 if (!binfmt || !binfmt->core_dump) 523 goto fail; 524 if (!__get_dumpable(cprm.mm_flags)) 525 goto fail; 526 527 cred = prepare_creds(); 528 if (!cred) 529 goto fail; 530 /* 531 * We cannot trust fsuid as being the "true" uid of the process 532 * nor do we know its entire history. We only know it was tainted 533 * so we dump it as root in mode 2, and only into a controlled 534 * environment (pipe handler or fully qualified path). 535 */ 536 if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { 537 /* Setuid core dump mode */ 538 flag = O_EXCL; /* Stop rewrite attacks */ 539 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 540 need_nonrelative = true; 541 } 542 543 retval = coredump_wait(siginfo->si_signo, &core_state); 544 if (retval < 0) 545 goto fail_creds; 546 547 old_cred = override_creds(cred); 548 549 ispipe = format_corename(&cn, &cprm); 550 551 if (ispipe) { 552 int dump_count; 553 char **helper_argv; 554 struct subprocess_info *sub_info; 555 556 if (ispipe < 0) { 557 printk(KERN_WARNING "format_corename failed\n"); 558 printk(KERN_WARNING "Aborting core\n"); 559 goto fail_unlock; 560 } 561 562 if (cprm.limit == 1) { 563 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. 564 * 565 * Normally core limits are irrelevant to pipes, since 566 * we're not writing to the file system, but we use 567 * cprm.limit of 1 here as a speacial value, this is a 568 * consistent way to catch recursive crashes. 569 * We can still crash if the core_pattern binary sets 570 * RLIM_CORE = !1, but it runs as root, and can do 571 * lots of stupid things. 572 * 573 * Note that we use task_tgid_vnr here to grab the pid 574 * of the process group leader. That way we get the 575 * right pid if a thread in a multi-threaded 576 * core_pattern process dies. 577 */ 578 printk(KERN_WARNING 579 "Process %d(%s) has RLIMIT_CORE set to 1\n", 580 task_tgid_vnr(current), current->comm); 581 printk(KERN_WARNING "Aborting core\n"); 582 goto fail_unlock; 583 } 584 cprm.limit = RLIM_INFINITY; 585 586 dump_count = atomic_inc_return(&core_dump_count); 587 if (core_pipe_limit && (core_pipe_limit < dump_count)) { 588 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", 589 task_tgid_vnr(current), current->comm); 590 printk(KERN_WARNING "Skipping core dump\n"); 591 goto fail_dropcount; 592 } 593 594 helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); 595 if (!helper_argv) { 596 printk(KERN_WARNING "%s failed to allocate memory\n", 597 __func__); 598 goto fail_dropcount; 599 } 600 601 retval = -ENOMEM; 602 sub_info = call_usermodehelper_setup(helper_argv[0], 603 helper_argv, NULL, GFP_KERNEL, 604 umh_pipe_setup, NULL, &cprm); 605 if (sub_info) 606 retval = call_usermodehelper_exec(sub_info, 607 UMH_WAIT_EXEC); 608 609 argv_free(helper_argv); 610 if (retval) { 611 printk(KERN_INFO "Core dump to |%s pipe failed\n", 612 cn.corename); 613 goto close_fail; 614 } 615 } else { 616 struct inode *inode; 617 618 if (cprm.limit < binfmt->min_coredump) 619 goto fail_unlock; 620 621 if (need_nonrelative && cn.corename[0] != '/') { 622 printk(KERN_WARNING "Pid %d(%s) can only dump core "\ 623 "to fully qualified path!\n", 624 task_tgid_vnr(current), current->comm); 625 printk(KERN_WARNING "Skipping core dump\n"); 626 goto fail_unlock; 627 } 628 629 cprm.file = filp_open(cn.corename, 630 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 631 0600); 632 if (IS_ERR(cprm.file)) 633 goto fail_unlock; 634 635 inode = file_inode(cprm.file); 636 if (inode->i_nlink > 1) 637 goto close_fail; 638 if (d_unhashed(cprm.file->f_path.dentry)) 639 goto close_fail; 640 /* 641 * AK: actually i see no reason to not allow this for named 642 * pipes etc, but keep the previous behaviour for now. 643 */ 644 if (!S_ISREG(inode->i_mode)) 645 goto close_fail; 646 /* 647 * Dont allow local users get cute and trick others to coredump 648 * into their pre-created files. 649 */ 650 if (!uid_eq(inode->i_uid, current_fsuid())) 651 goto close_fail; 652 if (!cprm.file->f_op->write) 653 goto close_fail; 654 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 655 goto close_fail; 656 } 657 658 /* get us an unshared descriptor table; almost always a no-op */ 659 retval = unshare_files(&displaced); 660 if (retval) 661 goto close_fail; 662 if (displaced) 663 put_files_struct(displaced); 664 if (!dump_interrupted()) { 665 file_start_write(cprm.file); 666 core_dumped = binfmt->core_dump(&cprm); 667 file_end_write(cprm.file); 668 } 669 if (ispipe && core_pipe_limit) 670 wait_for_dump_helpers(cprm.file); 671 close_fail: 672 if (cprm.file) 673 filp_close(cprm.file, NULL); 674 fail_dropcount: 675 if (ispipe) 676 atomic_dec(&core_dump_count); 677 fail_unlock: 678 kfree(cn.corename); 679 coredump_finish(mm, core_dumped); 680 revert_creds(old_cred); 681 fail_creds: 682 put_cred(cred); 683 fail: 684 return; 685 } 686 687 /* 688 * Core dumping helper functions. These are the only things you should 689 * do on a core-file: use only these functions to write out all the 690 * necessary info. 691 */ 692 int dump_emit(struct coredump_params *cprm, const void *addr, int nr) 693 { 694 struct file *file = cprm->file; 695 loff_t pos = file->f_pos; 696 ssize_t n; 697 if (cprm->written + nr > cprm->limit) 698 return 0; 699 while (nr) { 700 if (dump_interrupted()) 701 return 0; 702 n = __kernel_write(file, addr, nr, &pos); 703 if (n <= 0) 704 return 0; 705 file->f_pos = pos; 706 cprm->written += n; 707 nr -= n; 708 } 709 return 1; 710 } 711 EXPORT_SYMBOL(dump_emit); 712 713 int dump_skip(struct coredump_params *cprm, size_t nr) 714 { 715 static char zeroes[PAGE_SIZE]; 716 struct file *file = cprm->file; 717 if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 718 if (cprm->written + nr > cprm->limit) 719 return 0; 720 if (dump_interrupted() || 721 file->f_op->llseek(file, nr, SEEK_CUR) < 0) 722 return 0; 723 cprm->written += nr; 724 return 1; 725 } else { 726 while (nr > PAGE_SIZE) { 727 if (!dump_emit(cprm, zeroes, PAGE_SIZE)) 728 return 0; 729 nr -= PAGE_SIZE; 730 } 731 return dump_emit(cprm, zeroes, nr); 732 } 733 } 734 EXPORT_SYMBOL(dump_skip); 735 736 int dump_align(struct coredump_params *cprm, int align) 737 { 738 unsigned mod = cprm->written & (align - 1); 739 if (align & (align - 1)) 740 return 0; 741 return mod ? dump_skip(cprm, align - mod) : 1; 742 } 743 EXPORT_SYMBOL(dump_align); 744