1 #include <linux/slab.h> 2 #include <linux/file.h> 3 #include <linux/fdtable.h> 4 #include <linux/mm.h> 5 #include <linux/stat.h> 6 #include <linux/fcntl.h> 7 #include <linux/swap.h> 8 #include <linux/string.h> 9 #include <linux/init.h> 10 #include <linux/pagemap.h> 11 #include <linux/perf_event.h> 12 #include <linux/highmem.h> 13 #include <linux/spinlock.h> 14 #include <linux/key.h> 15 #include <linux/personality.h> 16 #include <linux/binfmts.h> 17 #include <linux/coredump.h> 18 #include <linux/utsname.h> 19 #include <linux/pid_namespace.h> 20 #include <linux/module.h> 21 #include <linux/namei.h> 22 #include <linux/mount.h> 23 #include <linux/security.h> 24 #include <linux/syscalls.h> 25 #include <linux/tsacct_kern.h> 26 #include <linux/cn_proc.h> 27 #include <linux/audit.h> 28 #include <linux/tracehook.h> 29 #include <linux/kmod.h> 30 #include <linux/fsnotify.h> 31 #include <linux/fs_struct.h> 32 #include <linux/pipe_fs_i.h> 33 #include <linux/oom.h> 34 #include <linux/compat.h> 35 36 #include <asm/uaccess.h> 37 #include <asm/mmu_context.h> 38 #include <asm/tlb.h> 39 #include <asm/exec.h> 40 41 #include <trace/events/task.h> 42 #include "internal.h" 43 44 #include <trace/events/sched.h> 45 46 int core_uses_pid; 47 unsigned int core_pipe_limit; 48 char core_pattern[CORENAME_MAX_SIZE] = "core"; 49 static int core_name_size = CORENAME_MAX_SIZE; 50 51 struct core_name { 52 char *corename; 53 int used, size; 54 }; 55 56 /* The maximal length of core_pattern is also specified in sysctl.c */ 57 58 static int expand_corename(struct core_name *cn, int size) 59 { 60 char *corename = krealloc(cn->corename, size, GFP_KERNEL); 61 62 if (!corename) 63 return -ENOMEM; 64 65 if (size > core_name_size) /* racy but harmless */ 66 core_name_size = size; 67 68 cn->size = ksize(corename); 69 cn->corename = corename; 70 return 0; 71 } 72 73 static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) 74 { 75 int free, need; 76 77 again: 78 free = cn->size - cn->used; 79 need = vsnprintf(cn->corename + cn->used, free, fmt, arg); 80 if (need < free) { 81 cn->used += need; 82 return 0; 83 } 84 85 if (!expand_corename(cn, cn->size + need - free + 1)) 86 goto again; 87 88 return -ENOMEM; 89 } 90 91 static int cn_printf(struct core_name *cn, const char *fmt, ...) 92 { 93 va_list arg; 94 int ret; 95 96 va_start(arg, fmt); 97 ret = cn_vprintf(cn, fmt, arg); 98 va_end(arg); 99 100 return ret; 101 } 102 103 static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) 104 { 105 int cur = cn->used; 106 va_list arg; 107 int ret; 108 109 va_start(arg, fmt); 110 ret = cn_vprintf(cn, fmt, arg); 111 va_end(arg); 112 113 for (; cur < cn->used; ++cur) { 114 if (cn->corename[cur] == '/') 115 cn->corename[cur] = '!'; 116 } 117 return ret; 118 } 119 120 static int cn_print_exe_file(struct core_name *cn) 121 { 122 struct file *exe_file; 123 char *pathbuf, *path; 124 int ret; 125 126 exe_file = get_mm_exe_file(current->mm); 127 if (!exe_file) 128 return cn_esc_printf(cn, "%s (path unknown)", current->comm); 129 130 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); 131 if (!pathbuf) { 132 ret = -ENOMEM; 133 goto put_exe_file; 134 } 135 136 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); 137 if (IS_ERR(path)) { 138 ret = PTR_ERR(path); 139 goto free_buf; 140 } 141 142 ret = cn_esc_printf(cn, "%s", path); 143 144 free_buf: 145 kfree(pathbuf); 146 put_exe_file: 147 fput(exe_file); 148 return ret; 149 } 150 151 /* format_corename will inspect the pattern parameter, and output a 152 * name into corename, which must have space for at least 153 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 154 */ 155 static int format_corename(struct core_name *cn, struct coredump_params *cprm) 156 { 157 const struct cred *cred = current_cred(); 158 const char *pat_ptr = core_pattern; 159 int ispipe = (*pat_ptr == '|'); 160 int pid_in_pattern = 0; 161 int err = 0; 162 163 cn->used = 0; 164 cn->corename = NULL; 165 if (expand_corename(cn, core_name_size)) 166 return -ENOMEM; 167 cn->corename[0] = '\0'; 168 169 if (ispipe) 170 ++pat_ptr; 171 172 /* Repeat as long as we have more pattern to process and more output 173 space */ 174 while (*pat_ptr) { 175 if (*pat_ptr != '%') { 176 err = cn_printf(cn, "%c", *pat_ptr++); 177 } else { 178 switch (*++pat_ptr) { 179 /* single % at the end, drop that */ 180 case 0: 181 goto out; 182 /* Double percent, output one percent */ 183 case '%': 184 err = cn_printf(cn, "%c", '%'); 185 break; 186 /* pid */ 187 case 'p': 188 pid_in_pattern = 1; 189 err = cn_printf(cn, "%d", 190 task_tgid_vnr(current)); 191 break; 192 /* global pid */ 193 case 'P': 194 err = cn_printf(cn, "%d", 195 task_tgid_nr(current)); 196 break; 197 /* uid */ 198 case 'u': 199 err = cn_printf(cn, "%d", cred->uid); 200 break; 201 /* gid */ 202 case 'g': 203 err = cn_printf(cn, "%d", cred->gid); 204 break; 205 case 'd': 206 err = cn_printf(cn, "%d", 207 __get_dumpable(cprm->mm_flags)); 208 break; 209 /* signal that caused the coredump */ 210 case 's': 211 err = cn_printf(cn, "%ld", cprm->siginfo->si_signo); 212 break; 213 /* UNIX time of coredump */ 214 case 't': { 215 struct timeval tv; 216 do_gettimeofday(&tv); 217 err = cn_printf(cn, "%lu", tv.tv_sec); 218 break; 219 } 220 /* hostname */ 221 case 'h': 222 down_read(&uts_sem); 223 err = cn_esc_printf(cn, "%s", 224 utsname()->nodename); 225 up_read(&uts_sem); 226 break; 227 /* executable */ 228 case 'e': 229 err = cn_esc_printf(cn, "%s", current->comm); 230 break; 231 case 'E': 232 err = cn_print_exe_file(cn); 233 break; 234 /* core limit size */ 235 case 'c': 236 err = cn_printf(cn, "%lu", 237 rlimit(RLIMIT_CORE)); 238 break; 239 default: 240 break; 241 } 242 ++pat_ptr; 243 } 244 245 if (err) 246 return err; 247 } 248 249 out: 250 /* Backward compatibility with core_uses_pid: 251 * 252 * If core_pattern does not include a %p (as is the default) 253 * and core_uses_pid is set, then .%pid will be appended to 254 * the filename. Do not do this for piped commands. */ 255 if (!ispipe && !pid_in_pattern && core_uses_pid) { 256 err = cn_printf(cn, ".%d", task_tgid_vnr(current)); 257 if (err) 258 return err; 259 } 260 return ispipe; 261 } 262 263 static int zap_process(struct task_struct *start, int exit_code) 264 { 265 struct task_struct *t; 266 int nr = 0; 267 268 start->signal->group_exit_code = exit_code; 269 start->signal->group_stop_count = 0; 270 271 t = start; 272 do { 273 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 274 if (t != current && t->mm) { 275 sigaddset(&t->pending.signal, SIGKILL); 276 signal_wake_up(t, 1); 277 nr++; 278 } 279 } while_each_thread(start, t); 280 281 return nr; 282 } 283 284 static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 285 struct core_state *core_state, int exit_code) 286 { 287 struct task_struct *g, *p; 288 unsigned long flags; 289 int nr = -EAGAIN; 290 291 spin_lock_irq(&tsk->sighand->siglock); 292 if (!signal_group_exit(tsk->signal)) { 293 mm->core_state = core_state; 294 nr = zap_process(tsk, exit_code); 295 tsk->signal->group_exit_task = tsk; 296 /* ignore all signals except SIGKILL, see prepare_signal() */ 297 tsk->signal->flags = SIGNAL_GROUP_COREDUMP; 298 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 299 } 300 spin_unlock_irq(&tsk->sighand->siglock); 301 if (unlikely(nr < 0)) 302 return nr; 303 304 tsk->flags = PF_DUMPCORE; 305 if (atomic_read(&mm->mm_users) == nr + 1) 306 goto done; 307 /* 308 * We should find and kill all tasks which use this mm, and we should 309 * count them correctly into ->nr_threads. We don't take tasklist 310 * lock, but this is safe wrt: 311 * 312 * fork: 313 * None of sub-threads can fork after zap_process(leader). All 314 * processes which were created before this point should be 315 * visible to zap_threads() because copy_process() adds the new 316 * process to the tail of init_task.tasks list, and lock/unlock 317 * of ->siglock provides a memory barrier. 318 * 319 * do_exit: 320 * The caller holds mm->mmap_sem. This means that the task which 321 * uses this mm can't pass exit_mm(), so it can't exit or clear 322 * its ->mm. 323 * 324 * de_thread: 325 * It does list_replace_rcu(&leader->tasks, ¤t->tasks), 326 * we must see either old or new leader, this does not matter. 327 * However, it can change p->sighand, so lock_task_sighand(p) 328 * must be used. Since p->mm != NULL and we hold ->mmap_sem 329 * it can't fail. 330 * 331 * Note also that "g" can be the old leader with ->mm == NULL 332 * and already unhashed and thus removed from ->thread_group. 333 * This is OK, __unhash_process()->list_del_rcu() does not 334 * clear the ->next pointer, we will find the new leader via 335 * next_thread(). 336 */ 337 rcu_read_lock(); 338 for_each_process(g) { 339 if (g == tsk->group_leader) 340 continue; 341 if (g->flags & PF_KTHREAD) 342 continue; 343 p = g; 344 do { 345 if (p->mm) { 346 if (unlikely(p->mm == mm)) { 347 lock_task_sighand(p, &flags); 348 nr += zap_process(p, exit_code); 349 p->signal->flags = SIGNAL_GROUP_EXIT; 350 unlock_task_sighand(p, &flags); 351 } 352 break; 353 } 354 } while_each_thread(g, p); 355 } 356 rcu_read_unlock(); 357 done: 358 atomic_set(&core_state->nr_threads, nr); 359 return nr; 360 } 361 362 static int coredump_wait(int exit_code, struct core_state *core_state) 363 { 364 struct task_struct *tsk = current; 365 struct mm_struct *mm = tsk->mm; 366 int core_waiters = -EBUSY; 367 368 init_completion(&core_state->startup); 369 core_state->dumper.task = tsk; 370 core_state->dumper.next = NULL; 371 372 down_write(&mm->mmap_sem); 373 if (!mm->core_state) 374 core_waiters = zap_threads(tsk, mm, core_state, exit_code); 375 up_write(&mm->mmap_sem); 376 377 if (core_waiters > 0) { 378 struct core_thread *ptr; 379 380 wait_for_completion(&core_state->startup); 381 /* 382 * Wait for all the threads to become inactive, so that 383 * all the thread context (extended register state, like 384 * fpu etc) gets copied to the memory. 385 */ 386 ptr = core_state->dumper.next; 387 while (ptr != NULL) { 388 wait_task_inactive(ptr->task, 0); 389 ptr = ptr->next; 390 } 391 } 392 393 return core_waiters; 394 } 395 396 static void coredump_finish(struct mm_struct *mm, bool core_dumped) 397 { 398 struct core_thread *curr, *next; 399 struct task_struct *task; 400 401 spin_lock_irq(¤t->sighand->siglock); 402 if (core_dumped && !__fatal_signal_pending(current)) 403 current->signal->group_exit_code |= 0x80; 404 current->signal->group_exit_task = NULL; 405 current->signal->flags = SIGNAL_GROUP_EXIT; 406 spin_unlock_irq(¤t->sighand->siglock); 407 408 next = mm->core_state->dumper.next; 409 while ((curr = next) != NULL) { 410 next = curr->next; 411 task = curr->task; 412 /* 413 * see exit_mm(), curr->task must not see 414 * ->task == NULL before we read ->next. 415 */ 416 smp_mb(); 417 curr->task = NULL; 418 wake_up_process(task); 419 } 420 421 mm->core_state = NULL; 422 } 423 424 static bool dump_interrupted(void) 425 { 426 /* 427 * SIGKILL or freezing() interrupt the coredumping. Perhaps we 428 * can do try_to_freeze() and check __fatal_signal_pending(), 429 * but then we need to teach dump_write() to restart and clear 430 * TIF_SIGPENDING. 431 */ 432 return signal_pending(current); 433 } 434 435 static void wait_for_dump_helpers(struct file *file) 436 { 437 struct pipe_inode_info *pipe = file->private_data; 438 439 pipe_lock(pipe); 440 pipe->readers++; 441 pipe->writers--; 442 wake_up_interruptible_sync(&pipe->wait); 443 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 444 pipe_unlock(pipe); 445 446 /* 447 * We actually want wait_event_freezable() but then we need 448 * to clear TIF_SIGPENDING and improve dump_interrupted(). 449 */ 450 wait_event_interruptible(pipe->wait, pipe->readers == 1); 451 452 pipe_lock(pipe); 453 pipe->readers--; 454 pipe->writers++; 455 pipe_unlock(pipe); 456 } 457 458 /* 459 * umh_pipe_setup 460 * helper function to customize the process used 461 * to collect the core in userspace. Specifically 462 * it sets up a pipe and installs it as fd 0 (stdin) 463 * for the process. Returns 0 on success, or 464 * PTR_ERR on failure. 465 * Note that it also sets the core limit to 1. This 466 * is a special value that we use to trap recursive 467 * core dumps 468 */ 469 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 470 { 471 struct file *files[2]; 472 struct coredump_params *cp = (struct coredump_params *)info->data; 473 int err = create_pipe_files(files, 0); 474 if (err) 475 return err; 476 477 cp->file = files[1]; 478 479 err = replace_fd(0, files[0], 0); 480 fput(files[0]); 481 /* and disallow core files too */ 482 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 483 484 return err; 485 } 486 487 void do_coredump(const siginfo_t *siginfo) 488 { 489 struct core_state core_state; 490 struct core_name cn; 491 struct mm_struct *mm = current->mm; 492 struct linux_binfmt * binfmt; 493 const struct cred *old_cred; 494 struct cred *cred; 495 int retval = 0; 496 int flag = 0; 497 int ispipe; 498 struct files_struct *displaced; 499 bool need_nonrelative = false; 500 bool core_dumped = false; 501 static atomic_t core_dump_count = ATOMIC_INIT(0); 502 struct coredump_params cprm = { 503 .siginfo = siginfo, 504 .regs = signal_pt_regs(), 505 .limit = rlimit(RLIMIT_CORE), 506 /* 507 * We must use the same mm->flags while dumping core to avoid 508 * inconsistency of bit flags, since this flag is not protected 509 * by any locks. 510 */ 511 .mm_flags = mm->flags, 512 }; 513 514 audit_core_dumps(siginfo->si_signo); 515 516 binfmt = mm->binfmt; 517 if (!binfmt || !binfmt->core_dump) 518 goto fail; 519 if (!__get_dumpable(cprm.mm_flags)) 520 goto fail; 521 522 cred = prepare_creds(); 523 if (!cred) 524 goto fail; 525 /* 526 * We cannot trust fsuid as being the "true" uid of the process 527 * nor do we know its entire history. We only know it was tainted 528 * so we dump it as root in mode 2, and only into a controlled 529 * environment (pipe handler or fully qualified path). 530 */ 531 if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { 532 /* Setuid core dump mode */ 533 flag = O_EXCL; /* Stop rewrite attacks */ 534 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 535 need_nonrelative = true; 536 } 537 538 retval = coredump_wait(siginfo->si_signo, &core_state); 539 if (retval < 0) 540 goto fail_creds; 541 542 old_cred = override_creds(cred); 543 544 ispipe = format_corename(&cn, &cprm); 545 546 if (ispipe) { 547 int dump_count; 548 char **helper_argv; 549 struct subprocess_info *sub_info; 550 551 if (ispipe < 0) { 552 printk(KERN_WARNING "format_corename failed\n"); 553 printk(KERN_WARNING "Aborting core\n"); 554 goto fail_unlock; 555 } 556 557 if (cprm.limit == 1) { 558 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. 559 * 560 * Normally core limits are irrelevant to pipes, since 561 * we're not writing to the file system, but we use 562 * cprm.limit of 1 here as a speacial value, this is a 563 * consistent way to catch recursive crashes. 564 * We can still crash if the core_pattern binary sets 565 * RLIM_CORE = !1, but it runs as root, and can do 566 * lots of stupid things. 567 * 568 * Note that we use task_tgid_vnr here to grab the pid 569 * of the process group leader. That way we get the 570 * right pid if a thread in a multi-threaded 571 * core_pattern process dies. 572 */ 573 printk(KERN_WARNING 574 "Process %d(%s) has RLIMIT_CORE set to 1\n", 575 task_tgid_vnr(current), current->comm); 576 printk(KERN_WARNING "Aborting core\n"); 577 goto fail_unlock; 578 } 579 cprm.limit = RLIM_INFINITY; 580 581 dump_count = atomic_inc_return(&core_dump_count); 582 if (core_pipe_limit && (core_pipe_limit < dump_count)) { 583 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", 584 task_tgid_vnr(current), current->comm); 585 printk(KERN_WARNING "Skipping core dump\n"); 586 goto fail_dropcount; 587 } 588 589 helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); 590 if (!helper_argv) { 591 printk(KERN_WARNING "%s failed to allocate memory\n", 592 __func__); 593 goto fail_dropcount; 594 } 595 596 retval = -ENOMEM; 597 sub_info = call_usermodehelper_setup(helper_argv[0], 598 helper_argv, NULL, GFP_KERNEL, 599 umh_pipe_setup, NULL, &cprm); 600 if (sub_info) 601 retval = call_usermodehelper_exec(sub_info, 602 UMH_WAIT_EXEC); 603 604 argv_free(helper_argv); 605 if (retval) { 606 printk(KERN_INFO "Core dump to |%s pipe failed\n", 607 cn.corename); 608 goto close_fail; 609 } 610 } else { 611 struct inode *inode; 612 613 if (cprm.limit < binfmt->min_coredump) 614 goto fail_unlock; 615 616 if (need_nonrelative && cn.corename[0] != '/') { 617 printk(KERN_WARNING "Pid %d(%s) can only dump core "\ 618 "to fully qualified path!\n", 619 task_tgid_vnr(current), current->comm); 620 printk(KERN_WARNING "Skipping core dump\n"); 621 goto fail_unlock; 622 } 623 624 cprm.file = filp_open(cn.corename, 625 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 626 0600); 627 if (IS_ERR(cprm.file)) 628 goto fail_unlock; 629 630 inode = file_inode(cprm.file); 631 if (inode->i_nlink > 1) 632 goto close_fail; 633 if (d_unhashed(cprm.file->f_path.dentry)) 634 goto close_fail; 635 /* 636 * AK: actually i see no reason to not allow this for named 637 * pipes etc, but keep the previous behaviour for now. 638 */ 639 if (!S_ISREG(inode->i_mode)) 640 goto close_fail; 641 /* 642 * Dont allow local users get cute and trick others to coredump 643 * into their pre-created files. 644 */ 645 if (!uid_eq(inode->i_uid, current_fsuid())) 646 goto close_fail; 647 if (!cprm.file->f_op->write) 648 goto close_fail; 649 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 650 goto close_fail; 651 } 652 653 /* get us an unshared descriptor table; almost always a no-op */ 654 retval = unshare_files(&displaced); 655 if (retval) 656 goto close_fail; 657 if (displaced) 658 put_files_struct(displaced); 659 if (!dump_interrupted()) { 660 file_start_write(cprm.file); 661 core_dumped = binfmt->core_dump(&cprm); 662 file_end_write(cprm.file); 663 } 664 if (ispipe && core_pipe_limit) 665 wait_for_dump_helpers(cprm.file); 666 close_fail: 667 if (cprm.file) 668 filp_close(cprm.file, NULL); 669 fail_dropcount: 670 if (ispipe) 671 atomic_dec(&core_dump_count); 672 fail_unlock: 673 kfree(cn.corename); 674 coredump_finish(mm, core_dumped); 675 revert_creds(old_cred); 676 fail_creds: 677 put_cred(cred); 678 fail: 679 return; 680 } 681 682 /* 683 * Core dumping helper functions. These are the only things you should 684 * do on a core-file: use only these functions to write out all the 685 * necessary info. 686 */ 687 int dump_emit(struct coredump_params *cprm, const void *addr, int nr) 688 { 689 struct file *file = cprm->file; 690 loff_t pos = file->f_pos; 691 ssize_t n; 692 if (cprm->written + nr > cprm->limit) 693 return 0; 694 while (nr) { 695 if (dump_interrupted()) 696 return 0; 697 n = __kernel_write(file, addr, nr, &pos); 698 if (n <= 0) 699 return 0; 700 file->f_pos = pos; 701 cprm->written += n; 702 nr -= n; 703 } 704 return 1; 705 } 706 EXPORT_SYMBOL(dump_emit); 707 708 int dump_skip(struct coredump_params *cprm, size_t nr) 709 { 710 static char zeroes[PAGE_SIZE]; 711 struct file *file = cprm->file; 712 if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 713 if (cprm->written + nr > cprm->limit) 714 return 0; 715 if (dump_interrupted() || 716 file->f_op->llseek(file, nr, SEEK_CUR) < 0) 717 return 0; 718 cprm->written += nr; 719 return 1; 720 } else { 721 while (nr > PAGE_SIZE) { 722 if (!dump_emit(cprm, zeroes, PAGE_SIZE)) 723 return 0; 724 nr -= PAGE_SIZE; 725 } 726 return dump_emit(cprm, zeroes, nr); 727 } 728 } 729 EXPORT_SYMBOL(dump_skip); 730 731 int dump_align(struct coredump_params *cprm, int align) 732 { 733 unsigned mod = cprm->written & (align - 1); 734 if (align & (align - 1)) 735 return 0; 736 return mod ? dump_skip(cprm, align - mod) : 1; 737 } 738 EXPORT_SYMBOL(dump_align); 739