1 #include <linux/slab.h> 2 #include <linux/file.h> 3 #include <linux/fdtable.h> 4 #include <linux/mm.h> 5 #include <linux/stat.h> 6 #include <linux/fcntl.h> 7 #include <linux/swap.h> 8 #include <linux/string.h> 9 #include <linux/init.h> 10 #include <linux/pagemap.h> 11 #include <linux/perf_event.h> 12 #include <linux/highmem.h> 13 #include <linux/spinlock.h> 14 #include <linux/key.h> 15 #include <linux/personality.h> 16 #include <linux/binfmts.h> 17 #include <linux/coredump.h> 18 #include <linux/utsname.h> 19 #include <linux/pid_namespace.h> 20 #include <linux/module.h> 21 #include <linux/namei.h> 22 #include <linux/mount.h> 23 #include <linux/security.h> 24 #include <linux/syscalls.h> 25 #include <linux/tsacct_kern.h> 26 #include <linux/cn_proc.h> 27 #include <linux/audit.h> 28 #include <linux/tracehook.h> 29 #include <linux/kmod.h> 30 #include <linux/fsnotify.h> 31 #include <linux/fs_struct.h> 32 #include <linux/pipe_fs_i.h> 33 #include <linux/oom.h> 34 #include <linux/compat.h> 35 36 #include <asm/uaccess.h> 37 #include <asm/mmu_context.h> 38 #include <asm/tlb.h> 39 #include <asm/exec.h> 40 41 #include <trace/events/task.h> 42 #include "internal.h" 43 #include "coredump.h" 44 45 #include <trace/events/sched.h> 46 47 int core_uses_pid; 48 unsigned int core_pipe_limit; 49 char core_pattern[CORENAME_MAX_SIZE] = "core"; 50 static int core_name_size = CORENAME_MAX_SIZE; 51 52 struct core_name { 53 char *corename; 54 int used, size; 55 }; 56 57 /* The maximal length of core_pattern is also specified in sysctl.c */ 58 59 static int expand_corename(struct core_name *cn, int size) 60 { 61 char *corename = krealloc(cn->corename, size, GFP_KERNEL); 62 63 if (!corename) 64 return -ENOMEM; 65 66 if (size > core_name_size) /* racy but harmless */ 67 core_name_size = size; 68 69 cn->size = ksize(corename); 70 cn->corename = corename; 71 return 0; 72 } 73 74 static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) 75 { 76 int free, need; 77 78 again: 79 free = cn->size - cn->used; 80 need = vsnprintf(cn->corename + cn->used, free, fmt, arg); 81 if (need < free) { 82 cn->used += need; 83 return 0; 84 } 85 86 if (!expand_corename(cn, cn->size + need - free + 1)) 87 goto again; 88 89 return -ENOMEM; 90 } 91 92 static int cn_printf(struct core_name *cn, const char *fmt, ...) 93 { 94 va_list arg; 95 int ret; 96 97 va_start(arg, fmt); 98 ret = cn_vprintf(cn, fmt, arg); 99 va_end(arg); 100 101 return ret; 102 } 103 104 static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) 105 { 106 int cur = cn->used; 107 va_list arg; 108 int ret; 109 110 va_start(arg, fmt); 111 ret = cn_vprintf(cn, fmt, arg); 112 va_end(arg); 113 114 for (; cur < cn->used; ++cur) { 115 if (cn->corename[cur] == '/') 116 cn->corename[cur] = '!'; 117 } 118 return ret; 119 } 120 121 static int cn_print_exe_file(struct core_name *cn) 122 { 123 struct file *exe_file; 124 char *pathbuf, *path; 125 int ret; 126 127 exe_file = get_mm_exe_file(current->mm); 128 if (!exe_file) 129 return cn_esc_printf(cn, "%s (path unknown)", current->comm); 130 131 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); 132 if (!pathbuf) { 133 ret = -ENOMEM; 134 goto put_exe_file; 135 } 136 137 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); 138 if (IS_ERR(path)) { 139 ret = PTR_ERR(path); 140 goto free_buf; 141 } 142 143 ret = cn_esc_printf(cn, "%s", path); 144 145 free_buf: 146 kfree(pathbuf); 147 put_exe_file: 148 fput(exe_file); 149 return ret; 150 } 151 152 /* format_corename will inspect the pattern parameter, and output a 153 * name into corename, which must have space for at least 154 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 155 */ 156 static int format_corename(struct core_name *cn, struct coredump_params *cprm) 157 { 158 const struct cred *cred = current_cred(); 159 const char *pat_ptr = core_pattern; 160 int ispipe = (*pat_ptr == '|'); 161 int pid_in_pattern = 0; 162 int err = 0; 163 164 cn->used = 0; 165 cn->corename = NULL; 166 if (expand_corename(cn, core_name_size)) 167 return -ENOMEM; 168 cn->corename[0] = '\0'; 169 170 if (ispipe) 171 ++pat_ptr; 172 173 /* Repeat as long as we have more pattern to process and more output 174 space */ 175 while (*pat_ptr) { 176 if (*pat_ptr != '%') { 177 err = cn_printf(cn, "%c", *pat_ptr++); 178 } else { 179 switch (*++pat_ptr) { 180 /* single % at the end, drop that */ 181 case 0: 182 goto out; 183 /* Double percent, output one percent */ 184 case '%': 185 err = cn_printf(cn, "%c", '%'); 186 break; 187 /* pid */ 188 case 'p': 189 pid_in_pattern = 1; 190 err = cn_printf(cn, "%d", 191 task_tgid_vnr(current)); 192 break; 193 /* global pid */ 194 case 'P': 195 err = cn_printf(cn, "%d", 196 task_tgid_nr(current)); 197 break; 198 /* uid */ 199 case 'u': 200 err = cn_printf(cn, "%d", cred->uid); 201 break; 202 /* gid */ 203 case 'g': 204 err = cn_printf(cn, "%d", cred->gid); 205 break; 206 case 'd': 207 err = cn_printf(cn, "%d", 208 __get_dumpable(cprm->mm_flags)); 209 break; 210 /* signal that caused the coredump */ 211 case 's': 212 err = cn_printf(cn, "%ld", cprm->siginfo->si_signo); 213 break; 214 /* UNIX time of coredump */ 215 case 't': { 216 struct timeval tv; 217 do_gettimeofday(&tv); 218 err = cn_printf(cn, "%lu", tv.tv_sec); 219 break; 220 } 221 /* hostname */ 222 case 'h': 223 down_read(&uts_sem); 224 err = cn_esc_printf(cn, "%s", 225 utsname()->nodename); 226 up_read(&uts_sem); 227 break; 228 /* executable */ 229 case 'e': 230 err = cn_esc_printf(cn, "%s", current->comm); 231 break; 232 case 'E': 233 err = cn_print_exe_file(cn); 234 break; 235 /* core limit size */ 236 case 'c': 237 err = cn_printf(cn, "%lu", 238 rlimit(RLIMIT_CORE)); 239 break; 240 default: 241 break; 242 } 243 ++pat_ptr; 244 } 245 246 if (err) 247 return err; 248 } 249 250 out: 251 /* Backward compatibility with core_uses_pid: 252 * 253 * If core_pattern does not include a %p (as is the default) 254 * and core_uses_pid is set, then .%pid will be appended to 255 * the filename. Do not do this for piped commands. */ 256 if (!ispipe && !pid_in_pattern && core_uses_pid) { 257 err = cn_printf(cn, ".%d", task_tgid_vnr(current)); 258 if (err) 259 return err; 260 } 261 return ispipe; 262 } 263 264 static int zap_process(struct task_struct *start, int exit_code) 265 { 266 struct task_struct *t; 267 int nr = 0; 268 269 start->signal->group_exit_code = exit_code; 270 start->signal->group_stop_count = 0; 271 272 t = start; 273 do { 274 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); 275 if (t != current && t->mm) { 276 sigaddset(&t->pending.signal, SIGKILL); 277 signal_wake_up(t, 1); 278 nr++; 279 } 280 } while_each_thread(start, t); 281 282 return nr; 283 } 284 285 static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 286 struct core_state *core_state, int exit_code) 287 { 288 struct task_struct *g, *p; 289 unsigned long flags; 290 int nr = -EAGAIN; 291 292 spin_lock_irq(&tsk->sighand->siglock); 293 if (!signal_group_exit(tsk->signal)) { 294 mm->core_state = core_state; 295 nr = zap_process(tsk, exit_code); 296 tsk->signal->group_exit_task = tsk; 297 /* ignore all signals except SIGKILL, see prepare_signal() */ 298 tsk->signal->flags = SIGNAL_GROUP_COREDUMP; 299 clear_tsk_thread_flag(tsk, TIF_SIGPENDING); 300 } 301 spin_unlock_irq(&tsk->sighand->siglock); 302 if (unlikely(nr < 0)) 303 return nr; 304 305 tsk->flags = PF_DUMPCORE; 306 if (atomic_read(&mm->mm_users) == nr + 1) 307 goto done; 308 /* 309 * We should find and kill all tasks which use this mm, and we should 310 * count them correctly into ->nr_threads. We don't take tasklist 311 * lock, but this is safe wrt: 312 * 313 * fork: 314 * None of sub-threads can fork after zap_process(leader). All 315 * processes which were created before this point should be 316 * visible to zap_threads() because copy_process() adds the new 317 * process to the tail of init_task.tasks list, and lock/unlock 318 * of ->siglock provides a memory barrier. 319 * 320 * do_exit: 321 * The caller holds mm->mmap_sem. This means that the task which 322 * uses this mm can't pass exit_mm(), so it can't exit or clear 323 * its ->mm. 324 * 325 * de_thread: 326 * It does list_replace_rcu(&leader->tasks, ¤t->tasks), 327 * we must see either old or new leader, this does not matter. 328 * However, it can change p->sighand, so lock_task_sighand(p) 329 * must be used. Since p->mm != NULL and we hold ->mmap_sem 330 * it can't fail. 331 * 332 * Note also that "g" can be the old leader with ->mm == NULL 333 * and already unhashed and thus removed from ->thread_group. 334 * This is OK, __unhash_process()->list_del_rcu() does not 335 * clear the ->next pointer, we will find the new leader via 336 * next_thread(). 337 */ 338 rcu_read_lock(); 339 for_each_process(g) { 340 if (g == tsk->group_leader) 341 continue; 342 if (g->flags & PF_KTHREAD) 343 continue; 344 p = g; 345 do { 346 if (p->mm) { 347 if (unlikely(p->mm == mm)) { 348 lock_task_sighand(p, &flags); 349 nr += zap_process(p, exit_code); 350 p->signal->flags = SIGNAL_GROUP_EXIT; 351 unlock_task_sighand(p, &flags); 352 } 353 break; 354 } 355 } while_each_thread(g, p); 356 } 357 rcu_read_unlock(); 358 done: 359 atomic_set(&core_state->nr_threads, nr); 360 return nr; 361 } 362 363 static int coredump_wait(int exit_code, struct core_state *core_state) 364 { 365 struct task_struct *tsk = current; 366 struct mm_struct *mm = tsk->mm; 367 int core_waiters = -EBUSY; 368 369 init_completion(&core_state->startup); 370 core_state->dumper.task = tsk; 371 core_state->dumper.next = NULL; 372 373 down_write(&mm->mmap_sem); 374 if (!mm->core_state) 375 core_waiters = zap_threads(tsk, mm, core_state, exit_code); 376 up_write(&mm->mmap_sem); 377 378 if (core_waiters > 0) { 379 struct core_thread *ptr; 380 381 wait_for_completion(&core_state->startup); 382 /* 383 * Wait for all the threads to become inactive, so that 384 * all the thread context (extended register state, like 385 * fpu etc) gets copied to the memory. 386 */ 387 ptr = core_state->dumper.next; 388 while (ptr != NULL) { 389 wait_task_inactive(ptr->task, 0); 390 ptr = ptr->next; 391 } 392 } 393 394 return core_waiters; 395 } 396 397 static void coredump_finish(struct mm_struct *mm, bool core_dumped) 398 { 399 struct core_thread *curr, *next; 400 struct task_struct *task; 401 402 spin_lock_irq(¤t->sighand->siglock); 403 if (core_dumped && !__fatal_signal_pending(current)) 404 current->signal->group_exit_code |= 0x80; 405 current->signal->group_exit_task = NULL; 406 current->signal->flags = SIGNAL_GROUP_EXIT; 407 spin_unlock_irq(¤t->sighand->siglock); 408 409 next = mm->core_state->dumper.next; 410 while ((curr = next) != NULL) { 411 next = curr->next; 412 task = curr->task; 413 /* 414 * see exit_mm(), curr->task must not see 415 * ->task == NULL before we read ->next. 416 */ 417 smp_mb(); 418 curr->task = NULL; 419 wake_up_process(task); 420 } 421 422 mm->core_state = NULL; 423 } 424 425 static bool dump_interrupted(void) 426 { 427 /* 428 * SIGKILL or freezing() interrupt the coredumping. Perhaps we 429 * can do try_to_freeze() and check __fatal_signal_pending(), 430 * but then we need to teach dump_write() to restart and clear 431 * TIF_SIGPENDING. 432 */ 433 return signal_pending(current); 434 } 435 436 static void wait_for_dump_helpers(struct file *file) 437 { 438 struct pipe_inode_info *pipe = file->private_data; 439 440 pipe_lock(pipe); 441 pipe->readers++; 442 pipe->writers--; 443 wake_up_interruptible_sync(&pipe->wait); 444 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 445 pipe_unlock(pipe); 446 447 /* 448 * We actually want wait_event_freezable() but then we need 449 * to clear TIF_SIGPENDING and improve dump_interrupted(). 450 */ 451 wait_event_interruptible(pipe->wait, pipe->readers == 1); 452 453 pipe_lock(pipe); 454 pipe->readers--; 455 pipe->writers++; 456 pipe_unlock(pipe); 457 } 458 459 /* 460 * umh_pipe_setup 461 * helper function to customize the process used 462 * to collect the core in userspace. Specifically 463 * it sets up a pipe and installs it as fd 0 (stdin) 464 * for the process. Returns 0 on success, or 465 * PTR_ERR on failure. 466 * Note that it also sets the core limit to 1. This 467 * is a special value that we use to trap recursive 468 * core dumps 469 */ 470 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 471 { 472 struct file *files[2]; 473 struct coredump_params *cp = (struct coredump_params *)info->data; 474 int err = create_pipe_files(files, 0); 475 if (err) 476 return err; 477 478 cp->file = files[1]; 479 480 err = replace_fd(0, files[0], 0); 481 fput(files[0]); 482 /* and disallow core files too */ 483 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 484 485 return err; 486 } 487 488 void do_coredump(siginfo_t *siginfo) 489 { 490 struct core_state core_state; 491 struct core_name cn; 492 struct mm_struct *mm = current->mm; 493 struct linux_binfmt * binfmt; 494 const struct cred *old_cred; 495 struct cred *cred; 496 int retval = 0; 497 int flag = 0; 498 int ispipe; 499 struct files_struct *displaced; 500 bool need_nonrelative = false; 501 bool core_dumped = false; 502 static atomic_t core_dump_count = ATOMIC_INIT(0); 503 struct coredump_params cprm = { 504 .siginfo = siginfo, 505 .regs = signal_pt_regs(), 506 .limit = rlimit(RLIMIT_CORE), 507 /* 508 * We must use the same mm->flags while dumping core to avoid 509 * inconsistency of bit flags, since this flag is not protected 510 * by any locks. 511 */ 512 .mm_flags = mm->flags, 513 }; 514 515 audit_core_dumps(siginfo->si_signo); 516 517 binfmt = mm->binfmt; 518 if (!binfmt || !binfmt->core_dump) 519 goto fail; 520 if (!__get_dumpable(cprm.mm_flags)) 521 goto fail; 522 523 cred = prepare_creds(); 524 if (!cred) 525 goto fail; 526 /* 527 * We cannot trust fsuid as being the "true" uid of the process 528 * nor do we know its entire history. We only know it was tainted 529 * so we dump it as root in mode 2, and only into a controlled 530 * environment (pipe handler or fully qualified path). 531 */ 532 if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { 533 /* Setuid core dump mode */ 534 flag = O_EXCL; /* Stop rewrite attacks */ 535 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 536 need_nonrelative = true; 537 } 538 539 retval = coredump_wait(siginfo->si_signo, &core_state); 540 if (retval < 0) 541 goto fail_creds; 542 543 old_cred = override_creds(cred); 544 545 ispipe = format_corename(&cn, &cprm); 546 547 if (ispipe) { 548 int dump_count; 549 char **helper_argv; 550 struct subprocess_info *sub_info; 551 552 if (ispipe < 0) { 553 printk(KERN_WARNING "format_corename failed\n"); 554 printk(KERN_WARNING "Aborting core\n"); 555 goto fail_unlock; 556 } 557 558 if (cprm.limit == 1) { 559 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. 560 * 561 * Normally core limits are irrelevant to pipes, since 562 * we're not writing to the file system, but we use 563 * cprm.limit of 1 here as a speacial value, this is a 564 * consistent way to catch recursive crashes. 565 * We can still crash if the core_pattern binary sets 566 * RLIM_CORE = !1, but it runs as root, and can do 567 * lots of stupid things. 568 * 569 * Note that we use task_tgid_vnr here to grab the pid 570 * of the process group leader. That way we get the 571 * right pid if a thread in a multi-threaded 572 * core_pattern process dies. 573 */ 574 printk(KERN_WARNING 575 "Process %d(%s) has RLIMIT_CORE set to 1\n", 576 task_tgid_vnr(current), current->comm); 577 printk(KERN_WARNING "Aborting core\n"); 578 goto fail_unlock; 579 } 580 cprm.limit = RLIM_INFINITY; 581 582 dump_count = atomic_inc_return(&core_dump_count); 583 if (core_pipe_limit && (core_pipe_limit < dump_count)) { 584 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", 585 task_tgid_vnr(current), current->comm); 586 printk(KERN_WARNING "Skipping core dump\n"); 587 goto fail_dropcount; 588 } 589 590 helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); 591 if (!helper_argv) { 592 printk(KERN_WARNING "%s failed to allocate memory\n", 593 __func__); 594 goto fail_dropcount; 595 } 596 597 retval = -ENOMEM; 598 sub_info = call_usermodehelper_setup(helper_argv[0], 599 helper_argv, NULL, GFP_KERNEL, 600 umh_pipe_setup, NULL, &cprm); 601 if (sub_info) 602 retval = call_usermodehelper_exec(sub_info, 603 UMH_WAIT_EXEC); 604 605 argv_free(helper_argv); 606 if (retval) { 607 printk(KERN_INFO "Core dump to |%s pipe failed\n", 608 cn.corename); 609 goto close_fail; 610 } 611 } else { 612 struct inode *inode; 613 614 if (cprm.limit < binfmt->min_coredump) 615 goto fail_unlock; 616 617 if (need_nonrelative && cn.corename[0] != '/') { 618 printk(KERN_WARNING "Pid %d(%s) can only dump core "\ 619 "to fully qualified path!\n", 620 task_tgid_vnr(current), current->comm); 621 printk(KERN_WARNING "Skipping core dump\n"); 622 goto fail_unlock; 623 } 624 625 cprm.file = filp_open(cn.corename, 626 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 627 0600); 628 if (IS_ERR(cprm.file)) 629 goto fail_unlock; 630 631 inode = file_inode(cprm.file); 632 if (inode->i_nlink > 1) 633 goto close_fail; 634 if (d_unhashed(cprm.file->f_path.dentry)) 635 goto close_fail; 636 /* 637 * AK: actually i see no reason to not allow this for named 638 * pipes etc, but keep the previous behaviour for now. 639 */ 640 if (!S_ISREG(inode->i_mode)) 641 goto close_fail; 642 /* 643 * Dont allow local users get cute and trick others to coredump 644 * into their pre-created files. 645 */ 646 if (!uid_eq(inode->i_uid, current_fsuid())) 647 goto close_fail; 648 if (!cprm.file->f_op || !cprm.file->f_op->write) 649 goto close_fail; 650 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 651 goto close_fail; 652 } 653 654 /* get us an unshared descriptor table; almost always a no-op */ 655 retval = unshare_files(&displaced); 656 if (retval) 657 goto close_fail; 658 if (displaced) 659 put_files_struct(displaced); 660 if (!dump_interrupted()) { 661 file_start_write(cprm.file); 662 core_dumped = binfmt->core_dump(&cprm); 663 file_end_write(cprm.file); 664 } 665 if (ispipe && core_pipe_limit) 666 wait_for_dump_helpers(cprm.file); 667 close_fail: 668 if (cprm.file) 669 filp_close(cprm.file, NULL); 670 fail_dropcount: 671 if (ispipe) 672 atomic_dec(&core_dump_count); 673 fail_unlock: 674 kfree(cn.corename); 675 coredump_finish(mm, core_dumped); 676 revert_creds(old_cred); 677 fail_creds: 678 put_cred(cred); 679 fail: 680 return; 681 } 682 683 /* 684 * Core dumping helper functions. These are the only things you should 685 * do on a core-file: use only these functions to write out all the 686 * necessary info. 687 */ 688 int dump_write(struct file *file, const void *addr, int nr) 689 { 690 return !dump_interrupted() && 691 access_ok(VERIFY_READ, addr, nr) && 692 file->f_op->write(file, addr, nr, &file->f_pos) == nr; 693 } 694 EXPORT_SYMBOL(dump_write); 695 696 int dump_seek(struct file *file, loff_t off) 697 { 698 int ret = 1; 699 700 if (file->f_op->llseek && file->f_op->llseek != no_llseek) { 701 if (dump_interrupted() || 702 file->f_op->llseek(file, off, SEEK_CUR) < 0) 703 return 0; 704 } else { 705 char *buf = (char *)get_zeroed_page(GFP_KERNEL); 706 707 if (!buf) 708 return 0; 709 while (off > 0) { 710 unsigned long n = off; 711 712 if (n > PAGE_SIZE) 713 n = PAGE_SIZE; 714 if (!dump_write(file, buf, n)) { 715 ret = 0; 716 break; 717 } 718 off -= n; 719 } 720 free_page((unsigned long)buf); 721 } 722 return ret; 723 } 724 EXPORT_SYMBOL(dump_seek); 725