1 /* 2 * linux/fs/proc/base.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * proc base directory handling functions 7 * 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 9 * Instead of using magical inumbers to determine the kind of object 10 * we allocate and fill in-core inodes upon lookup. They don't even 11 * go into icache. We cache the reference to task_struct upon lookup too. 12 * Eventually it should become a filesystem in its own. We don't use the 13 * rest of procfs anymore. 14 * 15 * 16 * Changelog: 17 * 17-Jan-2005 18 * Allan Bezerra 19 * Bruna Moreira <bruna.moreira@indt.org.br> 20 * Edjard Mota <edjard.mota@indt.org.br> 21 * Ilias Biris <ilias.biris@indt.org.br> 22 * Mauricio Lin <mauricio.lin@indt.org.br> 23 * 24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 25 * 26 * A new process specific entry (smaps) included in /proc. It shows the 27 * size of rss for each memory area. The maps entry lacks information 28 * about physical memory size (rss) for each mapped file, i.e., 29 * rss information for executables and library files. 30 * This additional information is useful for any tools that need to know 31 * about physical memory consumption for a process specific library. 32 * 33 * Changelog: 34 * 21-Feb-2005 35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 36 * Pud inclusion in the page table walking. 37 * 38 * ChangeLog: 39 * 10-Mar-2005 40 * 10LE Instituto Nokia de Tecnologia - INdT: 41 * A better way to walks through the page table as suggested by Hugh Dickins. 42 * 43 * Simo Piiroinen <simo.piiroinen@nokia.com>: 44 * Smaps information related to shared, private, clean and dirty pages. 45 * 46 * Paul Mundt <paul.mundt@nokia.com>: 47 * Overall revision about smaps. 48 */ 49 50 #include <linux/uaccess.h> 51 52 #include <linux/errno.h> 53 #include <linux/time.h> 54 #include <linux/proc_fs.h> 55 #include <linux/stat.h> 56 #include <linux/task_io_accounting_ops.h> 57 #include <linux/init.h> 58 #include <linux/capability.h> 59 #include <linux/file.h> 60 #include <linux/fdtable.h> 61 #include <linux/string.h> 62 #include <linux/seq_file.h> 63 #include <linux/namei.h> 64 #include <linux/mnt_namespace.h> 65 #include <linux/mm.h> 66 #include <linux/swap.h> 67 #include <linux/rcupdate.h> 68 #include <linux/kallsyms.h> 69 #include <linux/stacktrace.h> 70 #include <linux/resource.h> 71 #include <linux/module.h> 72 #include <linux/mount.h> 73 #include <linux/security.h> 74 #include <linux/ptrace.h> 75 #include <linux/tracehook.h> 76 #include <linux/printk.h> 77 #include <linux/cgroup.h> 78 #include <linux/cpuset.h> 79 #include <linux/audit.h> 80 #include <linux/poll.h> 81 #include <linux/nsproxy.h> 82 #include <linux/oom.h> 83 #include <linux/elf.h> 84 #include <linux/pid_namespace.h> 85 #include <linux/user_namespace.h> 86 #include <linux/fs_struct.h> 87 #include <linux/slab.h> 88 #include <linux/flex_array.h> 89 #include <linux/posix-timers.h> 90 #ifdef CONFIG_HARDWALL 91 #include <asm/hardwall.h> 92 #endif 93 #include <trace/events/oom.h> 94 #include "internal.h" 95 #include "fd.h" 96 97 /* NOTE: 98 * Implementing inode permission operations in /proc is almost 99 * certainly an error. Permission checks need to happen during 100 * each system call not at open time. The reason is that most of 101 * what we wish to check for permissions in /proc varies at runtime. 102 * 103 * The classic example of a problem is opening file descriptors 104 * in /proc for a task before it execs a suid executable. 105 */ 106 107 static u8 nlink_tid; 108 static u8 nlink_tgid; 109 110 struct pid_entry { 111 const char *name; 112 unsigned int len; 113 umode_t mode; 114 const struct inode_operations *iop; 115 const struct file_operations *fop; 116 union proc_op op; 117 }; 118 119 #define NOD(NAME, MODE, IOP, FOP, OP) { \ 120 .name = (NAME), \ 121 .len = sizeof(NAME) - 1, \ 122 .mode = MODE, \ 123 .iop = IOP, \ 124 .fop = FOP, \ 125 .op = OP, \ 126 } 127 128 #define DIR(NAME, MODE, iops, fops) \ 129 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) 130 #define LNK(NAME, get_link) \ 131 NOD(NAME, (S_IFLNK|S_IRWXUGO), \ 132 &proc_pid_link_inode_operations, NULL, \ 133 { .proc_get_link = get_link } ) 134 #define REG(NAME, MODE, fops) \ 135 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 136 #define ONE(NAME, MODE, show) \ 137 NOD(NAME, (S_IFREG|(MODE)), \ 138 NULL, &proc_single_file_operations, \ 139 { .proc_show = show } ) 140 141 /* 142 * Count the number of hardlinks for the pid_entry table, excluding the . 143 * and .. links. 144 */ 145 static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, 146 unsigned int n) 147 { 148 unsigned int i; 149 unsigned int count; 150 151 count = 2; 152 for (i = 0; i < n; ++i) { 153 if (S_ISDIR(entries[i].mode)) 154 ++count; 155 } 156 157 return count; 158 } 159 160 static int get_task_root(struct task_struct *task, struct path *root) 161 { 162 int result = -ENOENT; 163 164 task_lock(task); 165 if (task->fs) { 166 get_fs_root(task->fs, root); 167 result = 0; 168 } 169 task_unlock(task); 170 return result; 171 } 172 173 static int proc_cwd_link(struct dentry *dentry, struct path *path) 174 { 175 struct task_struct *task = get_proc_task(d_inode(dentry)); 176 int result = -ENOENT; 177 178 if (task) { 179 task_lock(task); 180 if (task->fs) { 181 get_fs_pwd(task->fs, path); 182 result = 0; 183 } 184 task_unlock(task); 185 put_task_struct(task); 186 } 187 return result; 188 } 189 190 static int proc_root_link(struct dentry *dentry, struct path *path) 191 { 192 struct task_struct *task = get_proc_task(d_inode(dentry)); 193 int result = -ENOENT; 194 195 if (task) { 196 result = get_task_root(task, path); 197 put_task_struct(task); 198 } 199 return result; 200 } 201 202 static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, 203 size_t _count, loff_t *pos) 204 { 205 struct task_struct *tsk; 206 struct mm_struct *mm; 207 char *page; 208 unsigned long count = _count; 209 unsigned long arg_start, arg_end, env_start, env_end; 210 unsigned long len1, len2, len; 211 unsigned long p; 212 char c; 213 ssize_t rv; 214 215 BUG_ON(*pos < 0); 216 217 tsk = get_proc_task(file_inode(file)); 218 if (!tsk) 219 return -ESRCH; 220 mm = get_task_mm(tsk); 221 put_task_struct(tsk); 222 if (!mm) 223 return 0; 224 /* Check if process spawned far enough to have cmdline. */ 225 if (!mm->env_end) { 226 rv = 0; 227 goto out_mmput; 228 } 229 230 page = (char *)__get_free_page(GFP_TEMPORARY); 231 if (!page) { 232 rv = -ENOMEM; 233 goto out_mmput; 234 } 235 236 down_read(&mm->mmap_sem); 237 arg_start = mm->arg_start; 238 arg_end = mm->arg_end; 239 env_start = mm->env_start; 240 env_end = mm->env_end; 241 up_read(&mm->mmap_sem); 242 243 BUG_ON(arg_start > arg_end); 244 BUG_ON(env_start > env_end); 245 246 len1 = arg_end - arg_start; 247 len2 = env_end - env_start; 248 249 /* Empty ARGV. */ 250 if (len1 == 0) { 251 rv = 0; 252 goto out_free_page; 253 } 254 /* 255 * Inherently racy -- command line shares address space 256 * with code and data. 257 */ 258 rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); 259 if (rv <= 0) 260 goto out_free_page; 261 262 rv = 0; 263 264 if (c == '\0') { 265 /* Command line (set of strings) occupies whole ARGV. */ 266 if (len1 <= *pos) 267 goto out_free_page; 268 269 p = arg_start + *pos; 270 len = len1 - *pos; 271 while (count > 0 && len > 0) { 272 unsigned int _count; 273 int nr_read; 274 275 _count = min3(count, len, PAGE_SIZE); 276 nr_read = access_remote_vm(mm, p, page, _count, 0); 277 if (nr_read < 0) 278 rv = nr_read; 279 if (nr_read <= 0) 280 goto out_free_page; 281 282 if (copy_to_user(buf, page, nr_read)) { 283 rv = -EFAULT; 284 goto out_free_page; 285 } 286 287 p += nr_read; 288 len -= nr_read; 289 buf += nr_read; 290 count -= nr_read; 291 rv += nr_read; 292 } 293 } else { 294 /* 295 * Command line (1 string) occupies ARGV and 296 * extends into ENVP. 297 */ 298 struct { 299 unsigned long p; 300 unsigned long len; 301 } cmdline[2] = { 302 { .p = arg_start, .len = len1 }, 303 { .p = env_start, .len = len2 }, 304 }; 305 loff_t pos1 = *pos; 306 unsigned int i; 307 308 i = 0; 309 while (i < 2 && pos1 >= cmdline[i].len) { 310 pos1 -= cmdline[i].len; 311 i++; 312 } 313 while (i < 2) { 314 p = cmdline[i].p + pos1; 315 len = cmdline[i].len - pos1; 316 while (count > 0 && len > 0) { 317 unsigned int _count, l; 318 int nr_read; 319 bool final; 320 321 _count = min3(count, len, PAGE_SIZE); 322 nr_read = access_remote_vm(mm, p, page, _count, 0); 323 if (nr_read < 0) 324 rv = nr_read; 325 if (nr_read <= 0) 326 goto out_free_page; 327 328 /* 329 * Command line can be shorter than whole ARGV 330 * even if last "marker" byte says it is not. 331 */ 332 final = false; 333 l = strnlen(page, nr_read); 334 if (l < nr_read) { 335 nr_read = l; 336 final = true; 337 } 338 339 if (copy_to_user(buf, page, nr_read)) { 340 rv = -EFAULT; 341 goto out_free_page; 342 } 343 344 p += nr_read; 345 len -= nr_read; 346 buf += nr_read; 347 count -= nr_read; 348 rv += nr_read; 349 350 if (final) 351 goto out_free_page; 352 } 353 354 /* Only first chunk can be read partially. */ 355 pos1 = 0; 356 i++; 357 } 358 } 359 360 out_free_page: 361 free_page((unsigned long)page); 362 out_mmput: 363 mmput(mm); 364 if (rv > 0) 365 *pos += rv; 366 return rv; 367 } 368 369 static const struct file_operations proc_pid_cmdline_ops = { 370 .read = proc_pid_cmdline_read, 371 .llseek = generic_file_llseek, 372 }; 373 374 #ifdef CONFIG_KALLSYMS 375 /* 376 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 377 * Returns the resolved symbol. If that fails, simply return the address. 378 */ 379 static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, 380 struct pid *pid, struct task_struct *task) 381 { 382 unsigned long wchan; 383 char symname[KSYM_NAME_LEN]; 384 385 wchan = get_wchan(task); 386 387 if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) 388 && !lookup_symbol_name(wchan, symname)) 389 seq_printf(m, "%s", symname); 390 else 391 seq_putc(m, '0'); 392 393 return 0; 394 } 395 #endif /* CONFIG_KALLSYMS */ 396 397 static int lock_trace(struct task_struct *task) 398 { 399 int err = mutex_lock_killable(&task->signal->cred_guard_mutex); 400 if (err) 401 return err; 402 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { 403 mutex_unlock(&task->signal->cred_guard_mutex); 404 return -EPERM; 405 } 406 return 0; 407 } 408 409 static void unlock_trace(struct task_struct *task) 410 { 411 mutex_unlock(&task->signal->cred_guard_mutex); 412 } 413 414 #ifdef CONFIG_STACKTRACE 415 416 #define MAX_STACK_TRACE_DEPTH 64 417 418 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, 419 struct pid *pid, struct task_struct *task) 420 { 421 struct stack_trace trace; 422 unsigned long *entries; 423 int err; 424 int i; 425 426 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 427 if (!entries) 428 return -ENOMEM; 429 430 trace.nr_entries = 0; 431 trace.max_entries = MAX_STACK_TRACE_DEPTH; 432 trace.entries = entries; 433 trace.skip = 0; 434 435 err = lock_trace(task); 436 if (!err) { 437 save_stack_trace_tsk(task, &trace); 438 439 for (i = 0; i < trace.nr_entries; i++) { 440 seq_printf(m, "[<%pK>] %pB\n", 441 (void *)entries[i], (void *)entries[i]); 442 } 443 unlock_trace(task); 444 } 445 kfree(entries); 446 447 return err; 448 } 449 #endif 450 451 #ifdef CONFIG_SCHED_INFO 452 /* 453 * Provides /proc/PID/schedstat 454 */ 455 static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, 456 struct pid *pid, struct task_struct *task) 457 { 458 if (unlikely(!sched_info_on())) 459 seq_printf(m, "0 0 0\n"); 460 else 461 seq_printf(m, "%llu %llu %lu\n", 462 (unsigned long long)task->se.sum_exec_runtime, 463 (unsigned long long)task->sched_info.run_delay, 464 task->sched_info.pcount); 465 466 return 0; 467 } 468 #endif 469 470 #ifdef CONFIG_LATENCYTOP 471 static int lstats_show_proc(struct seq_file *m, void *v) 472 { 473 int i; 474 struct inode *inode = m->private; 475 struct task_struct *task = get_proc_task(inode); 476 477 if (!task) 478 return -ESRCH; 479 seq_puts(m, "Latency Top version : v0.1\n"); 480 for (i = 0; i < 32; i++) { 481 struct latency_record *lr = &task->latency_record[i]; 482 if (lr->backtrace[0]) { 483 int q; 484 seq_printf(m, "%i %li %li", 485 lr->count, lr->time, lr->max); 486 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 487 unsigned long bt = lr->backtrace[q]; 488 if (!bt) 489 break; 490 if (bt == ULONG_MAX) 491 break; 492 seq_printf(m, " %ps", (void *)bt); 493 } 494 seq_putc(m, '\n'); 495 } 496 497 } 498 put_task_struct(task); 499 return 0; 500 } 501 502 static int lstats_open(struct inode *inode, struct file *file) 503 { 504 return single_open(file, lstats_show_proc, inode); 505 } 506 507 static ssize_t lstats_write(struct file *file, const char __user *buf, 508 size_t count, loff_t *offs) 509 { 510 struct task_struct *task = get_proc_task(file_inode(file)); 511 512 if (!task) 513 return -ESRCH; 514 clear_all_latency_tracing(task); 515 put_task_struct(task); 516 517 return count; 518 } 519 520 static const struct file_operations proc_lstats_operations = { 521 .open = lstats_open, 522 .read = seq_read, 523 .write = lstats_write, 524 .llseek = seq_lseek, 525 .release = single_release, 526 }; 527 528 #endif 529 530 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, 531 struct pid *pid, struct task_struct *task) 532 { 533 unsigned long totalpages = totalram_pages + total_swap_pages; 534 unsigned long points = 0; 535 536 points = oom_badness(task, NULL, NULL, totalpages) * 537 1000 / totalpages; 538 seq_printf(m, "%lu\n", points); 539 540 return 0; 541 } 542 543 struct limit_names { 544 const char *name; 545 const char *unit; 546 }; 547 548 static const struct limit_names lnames[RLIM_NLIMITS] = { 549 [RLIMIT_CPU] = {"Max cpu time", "seconds"}, 550 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 551 [RLIMIT_DATA] = {"Max data size", "bytes"}, 552 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 553 [RLIMIT_CORE] = {"Max core file size", "bytes"}, 554 [RLIMIT_RSS] = {"Max resident set", "bytes"}, 555 [RLIMIT_NPROC] = {"Max processes", "processes"}, 556 [RLIMIT_NOFILE] = {"Max open files", "files"}, 557 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, 558 [RLIMIT_AS] = {"Max address space", "bytes"}, 559 [RLIMIT_LOCKS] = {"Max file locks", "locks"}, 560 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, 561 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, 562 [RLIMIT_NICE] = {"Max nice priority", NULL}, 563 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, 564 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, 565 }; 566 567 /* Display limits for a process */ 568 static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, 569 struct pid *pid, struct task_struct *task) 570 { 571 unsigned int i; 572 unsigned long flags; 573 574 struct rlimit rlim[RLIM_NLIMITS]; 575 576 if (!lock_task_sighand(task, &flags)) 577 return 0; 578 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); 579 unlock_task_sighand(task, &flags); 580 581 /* 582 * print the file header 583 */ 584 seq_printf(m, "%-25s %-20s %-20s %-10s\n", 585 "Limit", "Soft Limit", "Hard Limit", "Units"); 586 587 for (i = 0; i < RLIM_NLIMITS; i++) { 588 if (rlim[i].rlim_cur == RLIM_INFINITY) 589 seq_printf(m, "%-25s %-20s ", 590 lnames[i].name, "unlimited"); 591 else 592 seq_printf(m, "%-25s %-20lu ", 593 lnames[i].name, rlim[i].rlim_cur); 594 595 if (rlim[i].rlim_max == RLIM_INFINITY) 596 seq_printf(m, "%-20s ", "unlimited"); 597 else 598 seq_printf(m, "%-20lu ", rlim[i].rlim_max); 599 600 if (lnames[i].unit) 601 seq_printf(m, "%-10s\n", lnames[i].unit); 602 else 603 seq_putc(m, '\n'); 604 } 605 606 return 0; 607 } 608 609 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 610 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, 611 struct pid *pid, struct task_struct *task) 612 { 613 long nr; 614 unsigned long args[6], sp, pc; 615 int res; 616 617 res = lock_trace(task); 618 if (res) 619 return res; 620 621 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 622 seq_puts(m, "running\n"); 623 else if (nr < 0) 624 seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 625 else 626 seq_printf(m, 627 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 628 nr, 629 args[0], args[1], args[2], args[3], args[4], args[5], 630 sp, pc); 631 unlock_trace(task); 632 633 return 0; 634 } 635 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 636 637 /************************************************************************/ 638 /* Here the fs part begins */ 639 /************************************************************************/ 640 641 /* permission checks */ 642 static int proc_fd_access_allowed(struct inode *inode) 643 { 644 struct task_struct *task; 645 int allowed = 0; 646 /* Allow access to a task's file descriptors if it is us or we 647 * may use ptrace attach to the process and find out that 648 * information. 649 */ 650 task = get_proc_task(inode); 651 if (task) { 652 allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 653 put_task_struct(task); 654 } 655 return allowed; 656 } 657 658 int proc_setattr(struct dentry *dentry, struct iattr *attr) 659 { 660 int error; 661 struct inode *inode = d_inode(dentry); 662 663 if (attr->ia_valid & ATTR_MODE) 664 return -EPERM; 665 666 error = setattr_prepare(dentry, attr); 667 if (error) 668 return error; 669 670 setattr_copy(inode, attr); 671 mark_inode_dirty(inode); 672 return 0; 673 } 674 675 /* 676 * May current process learn task's sched/cmdline info (for hide_pid_min=1) 677 * or euid/egid (for hide_pid_min=2)? 678 */ 679 static bool has_pid_permissions(struct pid_namespace *pid, 680 struct task_struct *task, 681 int hide_pid_min) 682 { 683 if (pid->hide_pid < hide_pid_min) 684 return true; 685 if (in_group_p(pid->pid_gid)) 686 return true; 687 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 688 } 689 690 691 static int proc_pid_permission(struct inode *inode, int mask) 692 { 693 struct pid_namespace *pid = inode->i_sb->s_fs_info; 694 struct task_struct *task; 695 bool has_perms; 696 697 task = get_proc_task(inode); 698 if (!task) 699 return -ESRCH; 700 has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS); 701 put_task_struct(task); 702 703 if (!has_perms) { 704 if (pid->hide_pid == HIDEPID_INVISIBLE) { 705 /* 706 * Let's make getdents(), stat(), and open() 707 * consistent with each other. If a process 708 * may not stat() a file, it shouldn't be seen 709 * in procfs at all. 710 */ 711 return -ENOENT; 712 } 713 714 return -EPERM; 715 } 716 return generic_permission(inode, mask); 717 } 718 719 720 721 static const struct inode_operations proc_def_inode_operations = { 722 .setattr = proc_setattr, 723 }; 724 725 static int proc_single_show(struct seq_file *m, void *v) 726 { 727 struct inode *inode = m->private; 728 struct pid_namespace *ns; 729 struct pid *pid; 730 struct task_struct *task; 731 int ret; 732 733 ns = inode->i_sb->s_fs_info; 734 pid = proc_pid(inode); 735 task = get_pid_task(pid, PIDTYPE_PID); 736 if (!task) 737 return -ESRCH; 738 739 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); 740 741 put_task_struct(task); 742 return ret; 743 } 744 745 static int proc_single_open(struct inode *inode, struct file *filp) 746 { 747 return single_open(filp, proc_single_show, inode); 748 } 749 750 static const struct file_operations proc_single_file_operations = { 751 .open = proc_single_open, 752 .read = seq_read, 753 .llseek = seq_lseek, 754 .release = single_release, 755 }; 756 757 758 struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) 759 { 760 struct task_struct *task = get_proc_task(inode); 761 struct mm_struct *mm = ERR_PTR(-ESRCH); 762 763 if (task) { 764 mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); 765 put_task_struct(task); 766 767 if (!IS_ERR_OR_NULL(mm)) { 768 /* ensure this mm_struct can't be freed */ 769 atomic_inc(&mm->mm_count); 770 /* but do not pin its memory */ 771 mmput(mm); 772 } 773 } 774 775 return mm; 776 } 777 778 static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) 779 { 780 struct mm_struct *mm = proc_mem_open(inode, mode); 781 782 if (IS_ERR(mm)) 783 return PTR_ERR(mm); 784 785 file->private_data = mm; 786 return 0; 787 } 788 789 static int mem_open(struct inode *inode, struct file *file) 790 { 791 int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); 792 793 /* OK to pass negative loff_t, we can catch out-of-range */ 794 file->f_mode |= FMODE_UNSIGNED_OFFSET; 795 796 return ret; 797 } 798 799 static ssize_t mem_rw(struct file *file, char __user *buf, 800 size_t count, loff_t *ppos, int write) 801 { 802 struct mm_struct *mm = file->private_data; 803 unsigned long addr = *ppos; 804 ssize_t copied; 805 char *page; 806 unsigned int flags; 807 808 if (!mm) 809 return 0; 810 811 page = (char *)__get_free_page(GFP_TEMPORARY); 812 if (!page) 813 return -ENOMEM; 814 815 copied = 0; 816 if (!atomic_inc_not_zero(&mm->mm_users)) 817 goto free; 818 819 /* Maybe we should limit FOLL_FORCE to actual ptrace users? */ 820 flags = FOLL_FORCE; 821 if (write) 822 flags |= FOLL_WRITE; 823 824 while (count > 0) { 825 int this_len = min_t(int, count, PAGE_SIZE); 826 827 if (write && copy_from_user(page, buf, this_len)) { 828 copied = -EFAULT; 829 break; 830 } 831 832 this_len = access_remote_vm(mm, addr, page, this_len, flags); 833 if (!this_len) { 834 if (!copied) 835 copied = -EIO; 836 break; 837 } 838 839 if (!write && copy_to_user(buf, page, this_len)) { 840 copied = -EFAULT; 841 break; 842 } 843 844 buf += this_len; 845 addr += this_len; 846 copied += this_len; 847 count -= this_len; 848 } 849 *ppos = addr; 850 851 mmput(mm); 852 free: 853 free_page((unsigned long) page); 854 return copied; 855 } 856 857 static ssize_t mem_read(struct file *file, char __user *buf, 858 size_t count, loff_t *ppos) 859 { 860 return mem_rw(file, buf, count, ppos, 0); 861 } 862 863 static ssize_t mem_write(struct file *file, const char __user *buf, 864 size_t count, loff_t *ppos) 865 { 866 return mem_rw(file, (char __user*)buf, count, ppos, 1); 867 } 868 869 loff_t mem_lseek(struct file *file, loff_t offset, int orig) 870 { 871 switch (orig) { 872 case 0: 873 file->f_pos = offset; 874 break; 875 case 1: 876 file->f_pos += offset; 877 break; 878 default: 879 return -EINVAL; 880 } 881 force_successful_syscall_return(); 882 return file->f_pos; 883 } 884 885 static int mem_release(struct inode *inode, struct file *file) 886 { 887 struct mm_struct *mm = file->private_data; 888 if (mm) 889 mmdrop(mm); 890 return 0; 891 } 892 893 static const struct file_operations proc_mem_operations = { 894 .llseek = mem_lseek, 895 .read = mem_read, 896 .write = mem_write, 897 .open = mem_open, 898 .release = mem_release, 899 }; 900 901 static int environ_open(struct inode *inode, struct file *file) 902 { 903 return __mem_open(inode, file, PTRACE_MODE_READ); 904 } 905 906 static ssize_t environ_read(struct file *file, char __user *buf, 907 size_t count, loff_t *ppos) 908 { 909 char *page; 910 unsigned long src = *ppos; 911 int ret = 0; 912 struct mm_struct *mm = file->private_data; 913 unsigned long env_start, env_end; 914 915 /* Ensure the process spawned far enough to have an environment. */ 916 if (!mm || !mm->env_end) 917 return 0; 918 919 page = (char *)__get_free_page(GFP_TEMPORARY); 920 if (!page) 921 return -ENOMEM; 922 923 ret = 0; 924 if (!atomic_inc_not_zero(&mm->mm_users)) 925 goto free; 926 927 down_read(&mm->mmap_sem); 928 env_start = mm->env_start; 929 env_end = mm->env_end; 930 up_read(&mm->mmap_sem); 931 932 while (count > 0) { 933 size_t this_len, max_len; 934 int retval; 935 936 if (src >= (env_end - env_start)) 937 break; 938 939 this_len = env_end - (env_start + src); 940 941 max_len = min_t(size_t, PAGE_SIZE, count); 942 this_len = min(max_len, this_len); 943 944 retval = access_remote_vm(mm, (env_start + src), page, this_len, 0); 945 946 if (retval <= 0) { 947 ret = retval; 948 break; 949 } 950 951 if (copy_to_user(buf, page, retval)) { 952 ret = -EFAULT; 953 break; 954 } 955 956 ret += retval; 957 src += retval; 958 buf += retval; 959 count -= retval; 960 } 961 *ppos = src; 962 mmput(mm); 963 964 free: 965 free_page((unsigned long) page); 966 return ret; 967 } 968 969 static const struct file_operations proc_environ_operations = { 970 .open = environ_open, 971 .read = environ_read, 972 .llseek = generic_file_llseek, 973 .release = mem_release, 974 }; 975 976 static int auxv_open(struct inode *inode, struct file *file) 977 { 978 return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); 979 } 980 981 static ssize_t auxv_read(struct file *file, char __user *buf, 982 size_t count, loff_t *ppos) 983 { 984 struct mm_struct *mm = file->private_data; 985 unsigned int nwords = 0; 986 987 if (!mm) 988 return 0; 989 do { 990 nwords += 2; 991 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 992 return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv, 993 nwords * sizeof(mm->saved_auxv[0])); 994 } 995 996 static const struct file_operations proc_auxv_operations = { 997 .open = auxv_open, 998 .read = auxv_read, 999 .llseek = generic_file_llseek, 1000 .release = mem_release, 1001 }; 1002 1003 static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, 1004 loff_t *ppos) 1005 { 1006 struct task_struct *task = get_proc_task(file_inode(file)); 1007 char buffer[PROC_NUMBUF]; 1008 int oom_adj = OOM_ADJUST_MIN; 1009 size_t len; 1010 1011 if (!task) 1012 return -ESRCH; 1013 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) 1014 oom_adj = OOM_ADJUST_MAX; 1015 else 1016 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / 1017 OOM_SCORE_ADJ_MAX; 1018 put_task_struct(task); 1019 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); 1020 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1021 } 1022 1023 static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) 1024 { 1025 static DEFINE_MUTEX(oom_adj_mutex); 1026 struct mm_struct *mm = NULL; 1027 struct task_struct *task; 1028 int err = 0; 1029 1030 task = get_proc_task(file_inode(file)); 1031 if (!task) 1032 return -ESRCH; 1033 1034 mutex_lock(&oom_adj_mutex); 1035 if (legacy) { 1036 if (oom_adj < task->signal->oom_score_adj && 1037 !capable(CAP_SYS_RESOURCE)) { 1038 err = -EACCES; 1039 goto err_unlock; 1040 } 1041 /* 1042 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use 1043 * /proc/pid/oom_score_adj instead. 1044 */ 1045 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", 1046 current->comm, task_pid_nr(current), task_pid_nr(task), 1047 task_pid_nr(task)); 1048 } else { 1049 if ((short)oom_adj < task->signal->oom_score_adj_min && 1050 !capable(CAP_SYS_RESOURCE)) { 1051 err = -EACCES; 1052 goto err_unlock; 1053 } 1054 } 1055 1056 /* 1057 * Make sure we will check other processes sharing the mm if this is 1058 * not vfrok which wants its own oom_score_adj. 1059 * pin the mm so it doesn't go away and get reused after task_unlock 1060 */ 1061 if (!task->vfork_done) { 1062 struct task_struct *p = find_lock_task_mm(task); 1063 1064 if (p) { 1065 if (atomic_read(&p->mm->mm_users) > 1) { 1066 mm = p->mm; 1067 atomic_inc(&mm->mm_count); 1068 } 1069 task_unlock(p); 1070 } 1071 } 1072 1073 task->signal->oom_score_adj = oom_adj; 1074 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1075 task->signal->oom_score_adj_min = (short)oom_adj; 1076 trace_oom_score_adj_update(task); 1077 1078 if (mm) { 1079 struct task_struct *p; 1080 1081 rcu_read_lock(); 1082 for_each_process(p) { 1083 if (same_thread_group(task, p)) 1084 continue; 1085 1086 /* do not touch kernel threads or the global init */ 1087 if (p->flags & PF_KTHREAD || is_global_init(p)) 1088 continue; 1089 1090 task_lock(p); 1091 if (!p->vfork_done && process_shares_mm(p, mm)) { 1092 pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n", 1093 task_pid_nr(p), p->comm, 1094 p->signal->oom_score_adj, oom_adj, 1095 task_pid_nr(task), task->comm); 1096 p->signal->oom_score_adj = oom_adj; 1097 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1098 p->signal->oom_score_adj_min = (short)oom_adj; 1099 } 1100 task_unlock(p); 1101 } 1102 rcu_read_unlock(); 1103 mmdrop(mm); 1104 } 1105 err_unlock: 1106 mutex_unlock(&oom_adj_mutex); 1107 put_task_struct(task); 1108 return err; 1109 } 1110 1111 /* 1112 * /proc/pid/oom_adj exists solely for backwards compatibility with previous 1113 * kernels. The effective policy is defined by oom_score_adj, which has a 1114 * different scale: oom_adj grew exponentially and oom_score_adj grows linearly. 1115 * Values written to oom_adj are simply mapped linearly to oom_score_adj. 1116 * Processes that become oom disabled via oom_adj will still be oom disabled 1117 * with this implementation. 1118 * 1119 * oom_adj cannot be removed since existing userspace binaries use it. 1120 */ 1121 static ssize_t oom_adj_write(struct file *file, const char __user *buf, 1122 size_t count, loff_t *ppos) 1123 { 1124 char buffer[PROC_NUMBUF]; 1125 int oom_adj; 1126 int err; 1127 1128 memset(buffer, 0, sizeof(buffer)); 1129 if (count > sizeof(buffer) - 1) 1130 count = sizeof(buffer) - 1; 1131 if (copy_from_user(buffer, buf, count)) { 1132 err = -EFAULT; 1133 goto out; 1134 } 1135 1136 err = kstrtoint(strstrip(buffer), 0, &oom_adj); 1137 if (err) 1138 goto out; 1139 if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && 1140 oom_adj != OOM_DISABLE) { 1141 err = -EINVAL; 1142 goto out; 1143 } 1144 1145 /* 1146 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum 1147 * value is always attainable. 1148 */ 1149 if (oom_adj == OOM_ADJUST_MAX) 1150 oom_adj = OOM_SCORE_ADJ_MAX; 1151 else 1152 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; 1153 1154 err = __set_oom_adj(file, oom_adj, true); 1155 out: 1156 return err < 0 ? err : count; 1157 } 1158 1159 static const struct file_operations proc_oom_adj_operations = { 1160 .read = oom_adj_read, 1161 .write = oom_adj_write, 1162 .llseek = generic_file_llseek, 1163 }; 1164 1165 static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 1166 size_t count, loff_t *ppos) 1167 { 1168 struct task_struct *task = get_proc_task(file_inode(file)); 1169 char buffer[PROC_NUMBUF]; 1170 short oom_score_adj = OOM_SCORE_ADJ_MIN; 1171 size_t len; 1172 1173 if (!task) 1174 return -ESRCH; 1175 oom_score_adj = task->signal->oom_score_adj; 1176 put_task_struct(task); 1177 len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); 1178 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1179 } 1180 1181 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, 1182 size_t count, loff_t *ppos) 1183 { 1184 char buffer[PROC_NUMBUF]; 1185 int oom_score_adj; 1186 int err; 1187 1188 memset(buffer, 0, sizeof(buffer)); 1189 if (count > sizeof(buffer) - 1) 1190 count = sizeof(buffer) - 1; 1191 if (copy_from_user(buffer, buf, count)) { 1192 err = -EFAULT; 1193 goto out; 1194 } 1195 1196 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); 1197 if (err) 1198 goto out; 1199 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1200 oom_score_adj > OOM_SCORE_ADJ_MAX) { 1201 err = -EINVAL; 1202 goto out; 1203 } 1204 1205 err = __set_oom_adj(file, oom_score_adj, false); 1206 out: 1207 return err < 0 ? err : count; 1208 } 1209 1210 static const struct file_operations proc_oom_score_adj_operations = { 1211 .read = oom_score_adj_read, 1212 .write = oom_score_adj_write, 1213 .llseek = default_llseek, 1214 }; 1215 1216 #ifdef CONFIG_AUDITSYSCALL 1217 #define TMPBUFLEN 11 1218 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1219 size_t count, loff_t *ppos) 1220 { 1221 struct inode * inode = file_inode(file); 1222 struct task_struct *task = get_proc_task(inode); 1223 ssize_t length; 1224 char tmpbuf[TMPBUFLEN]; 1225 1226 if (!task) 1227 return -ESRCH; 1228 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1229 from_kuid(file->f_cred->user_ns, 1230 audit_get_loginuid(task))); 1231 put_task_struct(task); 1232 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1233 } 1234 1235 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 1236 size_t count, loff_t *ppos) 1237 { 1238 struct inode * inode = file_inode(file); 1239 uid_t loginuid; 1240 kuid_t kloginuid; 1241 int rv; 1242 1243 rcu_read_lock(); 1244 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { 1245 rcu_read_unlock(); 1246 return -EPERM; 1247 } 1248 rcu_read_unlock(); 1249 1250 if (*ppos != 0) { 1251 /* No partial writes. */ 1252 return -EINVAL; 1253 } 1254 1255 rv = kstrtou32_from_user(buf, count, 10, &loginuid); 1256 if (rv < 0) 1257 return rv; 1258 1259 /* is userspace tring to explicitly UNSET the loginuid? */ 1260 if (loginuid == AUDIT_UID_UNSET) { 1261 kloginuid = INVALID_UID; 1262 } else { 1263 kloginuid = make_kuid(file->f_cred->user_ns, loginuid); 1264 if (!uid_valid(kloginuid)) 1265 return -EINVAL; 1266 } 1267 1268 rv = audit_set_loginuid(kloginuid); 1269 if (rv < 0) 1270 return rv; 1271 return count; 1272 } 1273 1274 static const struct file_operations proc_loginuid_operations = { 1275 .read = proc_loginuid_read, 1276 .write = proc_loginuid_write, 1277 .llseek = generic_file_llseek, 1278 }; 1279 1280 static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1281 size_t count, loff_t *ppos) 1282 { 1283 struct inode * inode = file_inode(file); 1284 struct task_struct *task = get_proc_task(inode); 1285 ssize_t length; 1286 char tmpbuf[TMPBUFLEN]; 1287 1288 if (!task) 1289 return -ESRCH; 1290 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1291 audit_get_sessionid(task)); 1292 put_task_struct(task); 1293 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1294 } 1295 1296 static const struct file_operations proc_sessionid_operations = { 1297 .read = proc_sessionid_read, 1298 .llseek = generic_file_llseek, 1299 }; 1300 #endif 1301 1302 #ifdef CONFIG_FAULT_INJECTION 1303 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 1304 size_t count, loff_t *ppos) 1305 { 1306 struct task_struct *task = get_proc_task(file_inode(file)); 1307 char buffer[PROC_NUMBUF]; 1308 size_t len; 1309 int make_it_fail; 1310 1311 if (!task) 1312 return -ESRCH; 1313 make_it_fail = task->make_it_fail; 1314 put_task_struct(task); 1315 1316 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); 1317 1318 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1319 } 1320 1321 static ssize_t proc_fault_inject_write(struct file * file, 1322 const char __user * buf, size_t count, loff_t *ppos) 1323 { 1324 struct task_struct *task; 1325 char buffer[PROC_NUMBUF]; 1326 int make_it_fail; 1327 int rv; 1328 1329 if (!capable(CAP_SYS_RESOURCE)) 1330 return -EPERM; 1331 memset(buffer, 0, sizeof(buffer)); 1332 if (count > sizeof(buffer) - 1) 1333 count = sizeof(buffer) - 1; 1334 if (copy_from_user(buffer, buf, count)) 1335 return -EFAULT; 1336 rv = kstrtoint(strstrip(buffer), 0, &make_it_fail); 1337 if (rv < 0) 1338 return rv; 1339 if (make_it_fail < 0 || make_it_fail > 1) 1340 return -EINVAL; 1341 1342 task = get_proc_task(file_inode(file)); 1343 if (!task) 1344 return -ESRCH; 1345 task->make_it_fail = make_it_fail; 1346 put_task_struct(task); 1347 1348 return count; 1349 } 1350 1351 static const struct file_operations proc_fault_inject_operations = { 1352 .read = proc_fault_inject_read, 1353 .write = proc_fault_inject_write, 1354 .llseek = generic_file_llseek, 1355 }; 1356 #endif 1357 1358 1359 #ifdef CONFIG_SCHED_DEBUG 1360 /* 1361 * Print out various scheduling related per-task fields: 1362 */ 1363 static int sched_show(struct seq_file *m, void *v) 1364 { 1365 struct inode *inode = m->private; 1366 struct task_struct *p; 1367 1368 p = get_proc_task(inode); 1369 if (!p) 1370 return -ESRCH; 1371 proc_sched_show_task(p, m); 1372 1373 put_task_struct(p); 1374 1375 return 0; 1376 } 1377 1378 static ssize_t 1379 sched_write(struct file *file, const char __user *buf, 1380 size_t count, loff_t *offset) 1381 { 1382 struct inode *inode = file_inode(file); 1383 struct task_struct *p; 1384 1385 p = get_proc_task(inode); 1386 if (!p) 1387 return -ESRCH; 1388 proc_sched_set_task(p); 1389 1390 put_task_struct(p); 1391 1392 return count; 1393 } 1394 1395 static int sched_open(struct inode *inode, struct file *filp) 1396 { 1397 return single_open(filp, sched_show, inode); 1398 } 1399 1400 static const struct file_operations proc_pid_sched_operations = { 1401 .open = sched_open, 1402 .read = seq_read, 1403 .write = sched_write, 1404 .llseek = seq_lseek, 1405 .release = single_release, 1406 }; 1407 1408 #endif 1409 1410 #ifdef CONFIG_SCHED_AUTOGROUP 1411 /* 1412 * Print out autogroup related information: 1413 */ 1414 static int sched_autogroup_show(struct seq_file *m, void *v) 1415 { 1416 struct inode *inode = m->private; 1417 struct task_struct *p; 1418 1419 p = get_proc_task(inode); 1420 if (!p) 1421 return -ESRCH; 1422 proc_sched_autogroup_show_task(p, m); 1423 1424 put_task_struct(p); 1425 1426 return 0; 1427 } 1428 1429 static ssize_t 1430 sched_autogroup_write(struct file *file, const char __user *buf, 1431 size_t count, loff_t *offset) 1432 { 1433 struct inode *inode = file_inode(file); 1434 struct task_struct *p; 1435 char buffer[PROC_NUMBUF]; 1436 int nice; 1437 int err; 1438 1439 memset(buffer, 0, sizeof(buffer)); 1440 if (count > sizeof(buffer) - 1) 1441 count = sizeof(buffer) - 1; 1442 if (copy_from_user(buffer, buf, count)) 1443 return -EFAULT; 1444 1445 err = kstrtoint(strstrip(buffer), 0, &nice); 1446 if (err < 0) 1447 return err; 1448 1449 p = get_proc_task(inode); 1450 if (!p) 1451 return -ESRCH; 1452 1453 err = proc_sched_autogroup_set_nice(p, nice); 1454 if (err) 1455 count = err; 1456 1457 put_task_struct(p); 1458 1459 return count; 1460 } 1461 1462 static int sched_autogroup_open(struct inode *inode, struct file *filp) 1463 { 1464 int ret; 1465 1466 ret = single_open(filp, sched_autogroup_show, NULL); 1467 if (!ret) { 1468 struct seq_file *m = filp->private_data; 1469 1470 m->private = inode; 1471 } 1472 return ret; 1473 } 1474 1475 static const struct file_operations proc_pid_sched_autogroup_operations = { 1476 .open = sched_autogroup_open, 1477 .read = seq_read, 1478 .write = sched_autogroup_write, 1479 .llseek = seq_lseek, 1480 .release = single_release, 1481 }; 1482 1483 #endif /* CONFIG_SCHED_AUTOGROUP */ 1484 1485 static ssize_t comm_write(struct file *file, const char __user *buf, 1486 size_t count, loff_t *offset) 1487 { 1488 struct inode *inode = file_inode(file); 1489 struct task_struct *p; 1490 char buffer[TASK_COMM_LEN]; 1491 const size_t maxlen = sizeof(buffer) - 1; 1492 1493 memset(buffer, 0, sizeof(buffer)); 1494 if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) 1495 return -EFAULT; 1496 1497 p = get_proc_task(inode); 1498 if (!p) 1499 return -ESRCH; 1500 1501 if (same_thread_group(current, p)) 1502 set_task_comm(p, buffer); 1503 else 1504 count = -EINVAL; 1505 1506 put_task_struct(p); 1507 1508 return count; 1509 } 1510 1511 static int comm_show(struct seq_file *m, void *v) 1512 { 1513 struct inode *inode = m->private; 1514 struct task_struct *p; 1515 1516 p = get_proc_task(inode); 1517 if (!p) 1518 return -ESRCH; 1519 1520 task_lock(p); 1521 seq_printf(m, "%s\n", p->comm); 1522 task_unlock(p); 1523 1524 put_task_struct(p); 1525 1526 return 0; 1527 } 1528 1529 static int comm_open(struct inode *inode, struct file *filp) 1530 { 1531 return single_open(filp, comm_show, inode); 1532 } 1533 1534 static const struct file_operations proc_pid_set_comm_operations = { 1535 .open = comm_open, 1536 .read = seq_read, 1537 .write = comm_write, 1538 .llseek = seq_lseek, 1539 .release = single_release, 1540 }; 1541 1542 static int proc_exe_link(struct dentry *dentry, struct path *exe_path) 1543 { 1544 struct task_struct *task; 1545 struct file *exe_file; 1546 1547 task = get_proc_task(d_inode(dentry)); 1548 if (!task) 1549 return -ENOENT; 1550 exe_file = get_task_exe_file(task); 1551 put_task_struct(task); 1552 if (exe_file) { 1553 *exe_path = exe_file->f_path; 1554 path_get(&exe_file->f_path); 1555 fput(exe_file); 1556 return 0; 1557 } else 1558 return -ENOENT; 1559 } 1560 1561 static const char *proc_pid_get_link(struct dentry *dentry, 1562 struct inode *inode, 1563 struct delayed_call *done) 1564 { 1565 struct path path; 1566 int error = -EACCES; 1567 1568 if (!dentry) 1569 return ERR_PTR(-ECHILD); 1570 1571 /* Are we allowed to snoop on the tasks file descriptors? */ 1572 if (!proc_fd_access_allowed(inode)) 1573 goto out; 1574 1575 error = PROC_I(inode)->op.proc_get_link(dentry, &path); 1576 if (error) 1577 goto out; 1578 1579 nd_jump_link(&path); 1580 return NULL; 1581 out: 1582 return ERR_PTR(error); 1583 } 1584 1585 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) 1586 { 1587 char *tmp = (char*)__get_free_page(GFP_TEMPORARY); 1588 char *pathname; 1589 int len; 1590 1591 if (!tmp) 1592 return -ENOMEM; 1593 1594 pathname = d_path(path, tmp, PAGE_SIZE); 1595 len = PTR_ERR(pathname); 1596 if (IS_ERR(pathname)) 1597 goto out; 1598 len = tmp + PAGE_SIZE - 1 - pathname; 1599 1600 if (len > buflen) 1601 len = buflen; 1602 if (copy_to_user(buffer, pathname, len)) 1603 len = -EFAULT; 1604 out: 1605 free_page((unsigned long)tmp); 1606 return len; 1607 } 1608 1609 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1610 { 1611 int error = -EACCES; 1612 struct inode *inode = d_inode(dentry); 1613 struct path path; 1614 1615 /* Are we allowed to snoop on the tasks file descriptors? */ 1616 if (!proc_fd_access_allowed(inode)) 1617 goto out; 1618 1619 error = PROC_I(inode)->op.proc_get_link(dentry, &path); 1620 if (error) 1621 goto out; 1622 1623 error = do_proc_readlink(&path, buffer, buflen); 1624 path_put(&path); 1625 out: 1626 return error; 1627 } 1628 1629 const struct inode_operations proc_pid_link_inode_operations = { 1630 .readlink = proc_pid_readlink, 1631 .get_link = proc_pid_get_link, 1632 .setattr = proc_setattr, 1633 }; 1634 1635 1636 /* building an inode */ 1637 1638 void task_dump_owner(struct task_struct *task, mode_t mode, 1639 kuid_t *ruid, kgid_t *rgid) 1640 { 1641 /* Depending on the state of dumpable compute who should own a 1642 * proc file for a task. 1643 */ 1644 const struct cred *cred; 1645 kuid_t uid; 1646 kgid_t gid; 1647 1648 /* Default to the tasks effective ownership */ 1649 rcu_read_lock(); 1650 cred = __task_cred(task); 1651 uid = cred->euid; 1652 gid = cred->egid; 1653 rcu_read_unlock(); 1654 1655 /* 1656 * Before the /proc/pid/status file was created the only way to read 1657 * the effective uid of a /process was to stat /proc/pid. Reading 1658 * /proc/pid/status is slow enough that procps and other packages 1659 * kept stating /proc/pid. To keep the rules in /proc simple I have 1660 * made this apply to all per process world readable and executable 1661 * directories. 1662 */ 1663 if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) { 1664 struct mm_struct *mm; 1665 task_lock(task); 1666 mm = task->mm; 1667 /* Make non-dumpable tasks owned by some root */ 1668 if (mm) { 1669 if (get_dumpable(mm) != SUID_DUMP_USER) { 1670 struct user_namespace *user_ns = mm->user_ns; 1671 1672 uid = make_kuid(user_ns, 0); 1673 if (!uid_valid(uid)) 1674 uid = GLOBAL_ROOT_UID; 1675 1676 gid = make_kgid(user_ns, 0); 1677 if (!gid_valid(gid)) 1678 gid = GLOBAL_ROOT_GID; 1679 } 1680 } else { 1681 uid = GLOBAL_ROOT_UID; 1682 gid = GLOBAL_ROOT_GID; 1683 } 1684 task_unlock(task); 1685 } 1686 *ruid = uid; 1687 *rgid = gid; 1688 } 1689 1690 struct inode *proc_pid_make_inode(struct super_block * sb, 1691 struct task_struct *task, umode_t mode) 1692 { 1693 struct inode * inode; 1694 struct proc_inode *ei; 1695 1696 /* We need a new inode */ 1697 1698 inode = new_inode(sb); 1699 if (!inode) 1700 goto out; 1701 1702 /* Common stuff */ 1703 ei = PROC_I(inode); 1704 inode->i_mode = mode; 1705 inode->i_ino = get_next_ino(); 1706 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 1707 inode->i_op = &proc_def_inode_operations; 1708 1709 /* 1710 * grab the reference to task. 1711 */ 1712 ei->pid = get_task_pid(task, PIDTYPE_PID); 1713 if (!ei->pid) 1714 goto out_unlock; 1715 1716 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); 1717 security_task_to_inode(task, inode); 1718 1719 out: 1720 return inode; 1721 1722 out_unlock: 1723 iput(inode); 1724 return NULL; 1725 } 1726 1727 int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1728 { 1729 struct inode *inode = d_inode(dentry); 1730 struct task_struct *task; 1731 struct pid_namespace *pid = dentry->d_sb->s_fs_info; 1732 1733 generic_fillattr(inode, stat); 1734 1735 rcu_read_lock(); 1736 stat->uid = GLOBAL_ROOT_UID; 1737 stat->gid = GLOBAL_ROOT_GID; 1738 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1739 if (task) { 1740 if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { 1741 rcu_read_unlock(); 1742 /* 1743 * This doesn't prevent learning whether PID exists, 1744 * it only makes getattr() consistent with readdir(). 1745 */ 1746 return -ENOENT; 1747 } 1748 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); 1749 } 1750 rcu_read_unlock(); 1751 return 0; 1752 } 1753 1754 /* dentry stuff */ 1755 1756 /* 1757 * Exceptional case: normally we are not allowed to unhash a busy 1758 * directory. In this case, however, we can do it - no aliasing problems 1759 * due to the way we treat inodes. 1760 * 1761 * Rewrite the inode's ownerships here because the owning task may have 1762 * performed a setuid(), etc. 1763 * 1764 */ 1765 int pid_revalidate(struct dentry *dentry, unsigned int flags) 1766 { 1767 struct inode *inode; 1768 struct task_struct *task; 1769 1770 if (flags & LOOKUP_RCU) 1771 return -ECHILD; 1772 1773 inode = d_inode(dentry); 1774 task = get_proc_task(inode); 1775 1776 if (task) { 1777 task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); 1778 1779 inode->i_mode &= ~(S_ISUID | S_ISGID); 1780 security_task_to_inode(task, inode); 1781 put_task_struct(task); 1782 return 1; 1783 } 1784 return 0; 1785 } 1786 1787 static inline bool proc_inode_is_dead(struct inode *inode) 1788 { 1789 return !proc_pid(inode)->tasks[PIDTYPE_PID].first; 1790 } 1791 1792 int pid_delete_dentry(const struct dentry *dentry) 1793 { 1794 /* Is the task we represent dead? 1795 * If so, then don't put the dentry on the lru list, 1796 * kill it immediately. 1797 */ 1798 return proc_inode_is_dead(d_inode(dentry)); 1799 } 1800 1801 const struct dentry_operations pid_dentry_operations = 1802 { 1803 .d_revalidate = pid_revalidate, 1804 .d_delete = pid_delete_dentry, 1805 }; 1806 1807 /* Lookups */ 1808 1809 /* 1810 * Fill a directory entry. 1811 * 1812 * If possible create the dcache entry and derive our inode number and 1813 * file type from dcache entry. 1814 * 1815 * Since all of the proc inode numbers are dynamically generated, the inode 1816 * numbers do not exist until the inode is cache. This means creating the 1817 * the dcache entry in readdir is necessary to keep the inode numbers 1818 * reported by readdir in sync with the inode numbers reported 1819 * by stat. 1820 */ 1821 bool proc_fill_cache(struct file *file, struct dir_context *ctx, 1822 const char *name, int len, 1823 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1824 { 1825 struct dentry *child, *dir = file->f_path.dentry; 1826 struct qstr qname = QSTR_INIT(name, len); 1827 struct inode *inode; 1828 unsigned type; 1829 ino_t ino; 1830 1831 child = d_hash_and_lookup(dir, &qname); 1832 if (!child) { 1833 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); 1834 child = d_alloc_parallel(dir, &qname, &wq); 1835 if (IS_ERR(child)) 1836 goto end_instantiate; 1837 if (d_in_lookup(child)) { 1838 int err = instantiate(d_inode(dir), child, task, ptr); 1839 d_lookup_done(child); 1840 if (err < 0) { 1841 dput(child); 1842 goto end_instantiate; 1843 } 1844 } 1845 } 1846 inode = d_inode(child); 1847 ino = inode->i_ino; 1848 type = inode->i_mode >> 12; 1849 dput(child); 1850 return dir_emit(ctx, name, len, ino, type); 1851 1852 end_instantiate: 1853 return dir_emit(ctx, name, len, 1, DT_UNKNOWN); 1854 } 1855 1856 /* 1857 * dname_to_vma_addr - maps a dentry name into two unsigned longs 1858 * which represent vma start and end addresses. 1859 */ 1860 static int dname_to_vma_addr(struct dentry *dentry, 1861 unsigned long *start, unsigned long *end) 1862 { 1863 if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) 1864 return -EINVAL; 1865 1866 return 0; 1867 } 1868 1869 static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) 1870 { 1871 unsigned long vm_start, vm_end; 1872 bool exact_vma_exists = false; 1873 struct mm_struct *mm = NULL; 1874 struct task_struct *task; 1875 struct inode *inode; 1876 int status = 0; 1877 1878 if (flags & LOOKUP_RCU) 1879 return -ECHILD; 1880 1881 inode = d_inode(dentry); 1882 task = get_proc_task(inode); 1883 if (!task) 1884 goto out_notask; 1885 1886 mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); 1887 if (IS_ERR_OR_NULL(mm)) 1888 goto out; 1889 1890 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { 1891 down_read(&mm->mmap_sem); 1892 exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); 1893 up_read(&mm->mmap_sem); 1894 } 1895 1896 mmput(mm); 1897 1898 if (exact_vma_exists) { 1899 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); 1900 1901 security_task_to_inode(task, inode); 1902 status = 1; 1903 } 1904 1905 out: 1906 put_task_struct(task); 1907 1908 out_notask: 1909 return status; 1910 } 1911 1912 static const struct dentry_operations tid_map_files_dentry_operations = { 1913 .d_revalidate = map_files_d_revalidate, 1914 .d_delete = pid_delete_dentry, 1915 }; 1916 1917 static int map_files_get_link(struct dentry *dentry, struct path *path) 1918 { 1919 unsigned long vm_start, vm_end; 1920 struct vm_area_struct *vma; 1921 struct task_struct *task; 1922 struct mm_struct *mm; 1923 int rc; 1924 1925 rc = -ENOENT; 1926 task = get_proc_task(d_inode(dentry)); 1927 if (!task) 1928 goto out; 1929 1930 mm = get_task_mm(task); 1931 put_task_struct(task); 1932 if (!mm) 1933 goto out; 1934 1935 rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); 1936 if (rc) 1937 goto out_mmput; 1938 1939 rc = -ENOENT; 1940 down_read(&mm->mmap_sem); 1941 vma = find_exact_vma(mm, vm_start, vm_end); 1942 if (vma && vma->vm_file) { 1943 *path = vma->vm_file->f_path; 1944 path_get(path); 1945 rc = 0; 1946 } 1947 up_read(&mm->mmap_sem); 1948 1949 out_mmput: 1950 mmput(mm); 1951 out: 1952 return rc; 1953 } 1954 1955 struct map_files_info { 1956 fmode_t mode; 1957 unsigned int len; 1958 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ 1959 }; 1960 1961 /* 1962 * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the 1963 * symlinks may be used to bypass permissions on ancestor directories in the 1964 * path to the file in question. 1965 */ 1966 static const char * 1967 proc_map_files_get_link(struct dentry *dentry, 1968 struct inode *inode, 1969 struct delayed_call *done) 1970 { 1971 if (!capable(CAP_SYS_ADMIN)) 1972 return ERR_PTR(-EPERM); 1973 1974 return proc_pid_get_link(dentry, inode, done); 1975 } 1976 1977 /* 1978 * Identical to proc_pid_link_inode_operations except for get_link() 1979 */ 1980 static const struct inode_operations proc_map_files_link_inode_operations = { 1981 .readlink = proc_pid_readlink, 1982 .get_link = proc_map_files_get_link, 1983 .setattr = proc_setattr, 1984 }; 1985 1986 static int 1987 proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, 1988 struct task_struct *task, const void *ptr) 1989 { 1990 fmode_t mode = (fmode_t)(unsigned long)ptr; 1991 struct proc_inode *ei; 1992 struct inode *inode; 1993 1994 inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | 1995 ((mode & FMODE_READ ) ? S_IRUSR : 0) | 1996 ((mode & FMODE_WRITE) ? S_IWUSR : 0)); 1997 if (!inode) 1998 return -ENOENT; 1999 2000 ei = PROC_I(inode); 2001 ei->op.proc_get_link = map_files_get_link; 2002 2003 inode->i_op = &proc_map_files_link_inode_operations; 2004 inode->i_size = 64; 2005 2006 d_set_d_op(dentry, &tid_map_files_dentry_operations); 2007 d_add(dentry, inode); 2008 2009 return 0; 2010 } 2011 2012 static struct dentry *proc_map_files_lookup(struct inode *dir, 2013 struct dentry *dentry, unsigned int flags) 2014 { 2015 unsigned long vm_start, vm_end; 2016 struct vm_area_struct *vma; 2017 struct task_struct *task; 2018 int result; 2019 struct mm_struct *mm; 2020 2021 result = -ENOENT; 2022 task = get_proc_task(dir); 2023 if (!task) 2024 goto out; 2025 2026 result = -EACCES; 2027 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 2028 goto out_put_task; 2029 2030 result = -ENOENT; 2031 if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) 2032 goto out_put_task; 2033 2034 mm = get_task_mm(task); 2035 if (!mm) 2036 goto out_put_task; 2037 2038 down_read(&mm->mmap_sem); 2039 vma = find_exact_vma(mm, vm_start, vm_end); 2040 if (!vma) 2041 goto out_no_vma; 2042 2043 if (vma->vm_file) 2044 result = proc_map_files_instantiate(dir, dentry, task, 2045 (void *)(unsigned long)vma->vm_file->f_mode); 2046 2047 out_no_vma: 2048 up_read(&mm->mmap_sem); 2049 mmput(mm); 2050 out_put_task: 2051 put_task_struct(task); 2052 out: 2053 return ERR_PTR(result); 2054 } 2055 2056 static const struct inode_operations proc_map_files_inode_operations = { 2057 .lookup = proc_map_files_lookup, 2058 .permission = proc_fd_permission, 2059 .setattr = proc_setattr, 2060 }; 2061 2062 static int 2063 proc_map_files_readdir(struct file *file, struct dir_context *ctx) 2064 { 2065 struct vm_area_struct *vma; 2066 struct task_struct *task; 2067 struct mm_struct *mm; 2068 unsigned long nr_files, pos, i; 2069 struct flex_array *fa = NULL; 2070 struct map_files_info info; 2071 struct map_files_info *p; 2072 int ret; 2073 2074 ret = -ENOENT; 2075 task = get_proc_task(file_inode(file)); 2076 if (!task) 2077 goto out; 2078 2079 ret = -EACCES; 2080 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 2081 goto out_put_task; 2082 2083 ret = 0; 2084 if (!dir_emit_dots(file, ctx)) 2085 goto out_put_task; 2086 2087 mm = get_task_mm(task); 2088 if (!mm) 2089 goto out_put_task; 2090 down_read(&mm->mmap_sem); 2091 2092 nr_files = 0; 2093 2094 /* 2095 * We need two passes here: 2096 * 2097 * 1) Collect vmas of mapped files with mmap_sem taken 2098 * 2) Release mmap_sem and instantiate entries 2099 * 2100 * otherwise we get lockdep complained, since filldir() 2101 * routine might require mmap_sem taken in might_fault(). 2102 */ 2103 2104 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { 2105 if (vma->vm_file && ++pos > ctx->pos) 2106 nr_files++; 2107 } 2108 2109 if (nr_files) { 2110 fa = flex_array_alloc(sizeof(info), nr_files, 2111 GFP_KERNEL); 2112 if (!fa || flex_array_prealloc(fa, 0, nr_files, 2113 GFP_KERNEL)) { 2114 ret = -ENOMEM; 2115 if (fa) 2116 flex_array_free(fa); 2117 up_read(&mm->mmap_sem); 2118 mmput(mm); 2119 goto out_put_task; 2120 } 2121 for (i = 0, vma = mm->mmap, pos = 2; vma; 2122 vma = vma->vm_next) { 2123 if (!vma->vm_file) 2124 continue; 2125 if (++pos <= ctx->pos) 2126 continue; 2127 2128 info.mode = vma->vm_file->f_mode; 2129 info.len = snprintf(info.name, 2130 sizeof(info.name), "%lx-%lx", 2131 vma->vm_start, vma->vm_end); 2132 if (flex_array_put(fa, i++, &info, GFP_KERNEL)) 2133 BUG(); 2134 } 2135 } 2136 up_read(&mm->mmap_sem); 2137 2138 for (i = 0; i < nr_files; i++) { 2139 p = flex_array_get(fa, i); 2140 if (!proc_fill_cache(file, ctx, 2141 p->name, p->len, 2142 proc_map_files_instantiate, 2143 task, 2144 (void *)(unsigned long)p->mode)) 2145 break; 2146 ctx->pos++; 2147 } 2148 if (fa) 2149 flex_array_free(fa); 2150 mmput(mm); 2151 2152 out_put_task: 2153 put_task_struct(task); 2154 out: 2155 return ret; 2156 } 2157 2158 static const struct file_operations proc_map_files_operations = { 2159 .read = generic_read_dir, 2160 .iterate_shared = proc_map_files_readdir, 2161 .llseek = generic_file_llseek, 2162 }; 2163 2164 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) 2165 struct timers_private { 2166 struct pid *pid; 2167 struct task_struct *task; 2168 struct sighand_struct *sighand; 2169 struct pid_namespace *ns; 2170 unsigned long flags; 2171 }; 2172 2173 static void *timers_start(struct seq_file *m, loff_t *pos) 2174 { 2175 struct timers_private *tp = m->private; 2176 2177 tp->task = get_pid_task(tp->pid, PIDTYPE_PID); 2178 if (!tp->task) 2179 return ERR_PTR(-ESRCH); 2180 2181 tp->sighand = lock_task_sighand(tp->task, &tp->flags); 2182 if (!tp->sighand) 2183 return ERR_PTR(-ESRCH); 2184 2185 return seq_list_start(&tp->task->signal->posix_timers, *pos); 2186 } 2187 2188 static void *timers_next(struct seq_file *m, void *v, loff_t *pos) 2189 { 2190 struct timers_private *tp = m->private; 2191 return seq_list_next(v, &tp->task->signal->posix_timers, pos); 2192 } 2193 2194 static void timers_stop(struct seq_file *m, void *v) 2195 { 2196 struct timers_private *tp = m->private; 2197 2198 if (tp->sighand) { 2199 unlock_task_sighand(tp->task, &tp->flags); 2200 tp->sighand = NULL; 2201 } 2202 2203 if (tp->task) { 2204 put_task_struct(tp->task); 2205 tp->task = NULL; 2206 } 2207 } 2208 2209 static int show_timer(struct seq_file *m, void *v) 2210 { 2211 struct k_itimer *timer; 2212 struct timers_private *tp = m->private; 2213 int notify; 2214 static const char * const nstr[] = { 2215 [SIGEV_SIGNAL] = "signal", 2216 [SIGEV_NONE] = "none", 2217 [SIGEV_THREAD] = "thread", 2218 }; 2219 2220 timer = list_entry((struct list_head *)v, struct k_itimer, list); 2221 notify = timer->it_sigev_notify; 2222 2223 seq_printf(m, "ID: %d\n", timer->it_id); 2224 seq_printf(m, "signal: %d/%p\n", 2225 timer->sigq->info.si_signo, 2226 timer->sigq->info.si_value.sival_ptr); 2227 seq_printf(m, "notify: %s/%s.%d\n", 2228 nstr[notify & ~SIGEV_THREAD_ID], 2229 (notify & SIGEV_THREAD_ID) ? "tid" : "pid", 2230 pid_nr_ns(timer->it_pid, tp->ns)); 2231 seq_printf(m, "ClockID: %d\n", timer->it_clock); 2232 2233 return 0; 2234 } 2235 2236 static const struct seq_operations proc_timers_seq_ops = { 2237 .start = timers_start, 2238 .next = timers_next, 2239 .stop = timers_stop, 2240 .show = show_timer, 2241 }; 2242 2243 static int proc_timers_open(struct inode *inode, struct file *file) 2244 { 2245 struct timers_private *tp; 2246 2247 tp = __seq_open_private(file, &proc_timers_seq_ops, 2248 sizeof(struct timers_private)); 2249 if (!tp) 2250 return -ENOMEM; 2251 2252 tp->pid = proc_pid(inode); 2253 tp->ns = inode->i_sb->s_fs_info; 2254 return 0; 2255 } 2256 2257 static const struct file_operations proc_timers_operations = { 2258 .open = proc_timers_open, 2259 .read = seq_read, 2260 .llseek = seq_lseek, 2261 .release = seq_release_private, 2262 }; 2263 #endif 2264 2265 static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, 2266 size_t count, loff_t *offset) 2267 { 2268 struct inode *inode = file_inode(file); 2269 struct task_struct *p; 2270 u64 slack_ns; 2271 int err; 2272 2273 err = kstrtoull_from_user(buf, count, 10, &slack_ns); 2274 if (err < 0) 2275 return err; 2276 2277 p = get_proc_task(inode); 2278 if (!p) 2279 return -ESRCH; 2280 2281 if (p != current) { 2282 if (!capable(CAP_SYS_NICE)) { 2283 count = -EPERM; 2284 goto out; 2285 } 2286 2287 err = security_task_setscheduler(p); 2288 if (err) { 2289 count = err; 2290 goto out; 2291 } 2292 } 2293 2294 task_lock(p); 2295 if (slack_ns == 0) 2296 p->timer_slack_ns = p->default_timer_slack_ns; 2297 else 2298 p->timer_slack_ns = slack_ns; 2299 task_unlock(p); 2300 2301 out: 2302 put_task_struct(p); 2303 2304 return count; 2305 } 2306 2307 static int timerslack_ns_show(struct seq_file *m, void *v) 2308 { 2309 struct inode *inode = m->private; 2310 struct task_struct *p; 2311 int err = 0; 2312 2313 p = get_proc_task(inode); 2314 if (!p) 2315 return -ESRCH; 2316 2317 if (p != current) { 2318 2319 if (!capable(CAP_SYS_NICE)) { 2320 err = -EPERM; 2321 goto out; 2322 } 2323 err = security_task_getscheduler(p); 2324 if (err) 2325 goto out; 2326 } 2327 2328 task_lock(p); 2329 seq_printf(m, "%llu\n", p->timer_slack_ns); 2330 task_unlock(p); 2331 2332 out: 2333 put_task_struct(p); 2334 2335 return err; 2336 } 2337 2338 static int timerslack_ns_open(struct inode *inode, struct file *filp) 2339 { 2340 return single_open(filp, timerslack_ns_show, inode); 2341 } 2342 2343 static const struct file_operations proc_pid_set_timerslack_ns_operations = { 2344 .open = timerslack_ns_open, 2345 .read = seq_read, 2346 .write = timerslack_ns_write, 2347 .llseek = seq_lseek, 2348 .release = single_release, 2349 }; 2350 2351 static int proc_pident_instantiate(struct inode *dir, 2352 struct dentry *dentry, struct task_struct *task, const void *ptr) 2353 { 2354 const struct pid_entry *p = ptr; 2355 struct inode *inode; 2356 struct proc_inode *ei; 2357 2358 inode = proc_pid_make_inode(dir->i_sb, task, p->mode); 2359 if (!inode) 2360 goto out; 2361 2362 ei = PROC_I(inode); 2363 if (S_ISDIR(inode->i_mode)) 2364 set_nlink(inode, 2); /* Use getattr to fix if necessary */ 2365 if (p->iop) 2366 inode->i_op = p->iop; 2367 if (p->fop) 2368 inode->i_fop = p->fop; 2369 ei->op = p->op; 2370 d_set_d_op(dentry, &pid_dentry_operations); 2371 d_add(dentry, inode); 2372 /* Close the race of the process dying before we return the dentry */ 2373 if (pid_revalidate(dentry, 0)) 2374 return 0; 2375 out: 2376 return -ENOENT; 2377 } 2378 2379 static struct dentry *proc_pident_lookup(struct inode *dir, 2380 struct dentry *dentry, 2381 const struct pid_entry *ents, 2382 unsigned int nents) 2383 { 2384 int error; 2385 struct task_struct *task = get_proc_task(dir); 2386 const struct pid_entry *p, *last; 2387 2388 error = -ENOENT; 2389 2390 if (!task) 2391 goto out_no_task; 2392 2393 /* 2394 * Yes, it does not scale. And it should not. Don't add 2395 * new entries into /proc/<tgid>/ without very good reasons. 2396 */ 2397 last = &ents[nents]; 2398 for (p = ents; p < last; p++) { 2399 if (p->len != dentry->d_name.len) 2400 continue; 2401 if (!memcmp(dentry->d_name.name, p->name, p->len)) 2402 break; 2403 } 2404 if (p >= last) 2405 goto out; 2406 2407 error = proc_pident_instantiate(dir, dentry, task, p); 2408 out: 2409 put_task_struct(task); 2410 out_no_task: 2411 return ERR_PTR(error); 2412 } 2413 2414 static int proc_pident_readdir(struct file *file, struct dir_context *ctx, 2415 const struct pid_entry *ents, unsigned int nents) 2416 { 2417 struct task_struct *task = get_proc_task(file_inode(file)); 2418 const struct pid_entry *p; 2419 2420 if (!task) 2421 return -ENOENT; 2422 2423 if (!dir_emit_dots(file, ctx)) 2424 goto out; 2425 2426 if (ctx->pos >= nents + 2) 2427 goto out; 2428 2429 for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { 2430 if (!proc_fill_cache(file, ctx, p->name, p->len, 2431 proc_pident_instantiate, task, p)) 2432 break; 2433 ctx->pos++; 2434 } 2435 out: 2436 put_task_struct(task); 2437 return 0; 2438 } 2439 2440 #ifdef CONFIG_SECURITY 2441 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 2442 size_t count, loff_t *ppos) 2443 { 2444 struct inode * inode = file_inode(file); 2445 char *p = NULL; 2446 ssize_t length; 2447 struct task_struct *task = get_proc_task(inode); 2448 2449 if (!task) 2450 return -ESRCH; 2451 2452 length = security_getprocattr(task, 2453 (char*)file->f_path.dentry->d_name.name, 2454 &p); 2455 put_task_struct(task); 2456 if (length > 0) 2457 length = simple_read_from_buffer(buf, count, ppos, p, length); 2458 kfree(p); 2459 return length; 2460 } 2461 2462 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 2463 size_t count, loff_t *ppos) 2464 { 2465 struct inode * inode = file_inode(file); 2466 void *page; 2467 ssize_t length; 2468 struct task_struct *task = get_proc_task(inode); 2469 2470 length = -ESRCH; 2471 if (!task) 2472 goto out_no_task; 2473 2474 /* A task may only write its own attributes. */ 2475 length = -EACCES; 2476 if (current != task) 2477 goto out; 2478 2479 if (count > PAGE_SIZE) 2480 count = PAGE_SIZE; 2481 2482 /* No partial writes. */ 2483 length = -EINVAL; 2484 if (*ppos != 0) 2485 goto out; 2486 2487 page = memdup_user(buf, count); 2488 if (IS_ERR(page)) { 2489 length = PTR_ERR(page); 2490 goto out; 2491 } 2492 2493 /* Guard against adverse ptrace interaction */ 2494 length = mutex_lock_interruptible(¤t->signal->cred_guard_mutex); 2495 if (length < 0) 2496 goto out_free; 2497 2498 length = security_setprocattr(file->f_path.dentry->d_name.name, 2499 page, count); 2500 mutex_unlock(¤t->signal->cred_guard_mutex); 2501 out_free: 2502 kfree(page); 2503 out: 2504 put_task_struct(task); 2505 out_no_task: 2506 return length; 2507 } 2508 2509 static const struct file_operations proc_pid_attr_operations = { 2510 .read = proc_pid_attr_read, 2511 .write = proc_pid_attr_write, 2512 .llseek = generic_file_llseek, 2513 }; 2514 2515 static const struct pid_entry attr_dir_stuff[] = { 2516 REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2517 REG("prev", S_IRUGO, proc_pid_attr_operations), 2518 REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2519 REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2520 REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2521 REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2522 }; 2523 2524 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) 2525 { 2526 return proc_pident_readdir(file, ctx, 2527 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2528 } 2529 2530 static const struct file_operations proc_attr_dir_operations = { 2531 .read = generic_read_dir, 2532 .iterate_shared = proc_attr_dir_readdir, 2533 .llseek = generic_file_llseek, 2534 }; 2535 2536 static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2537 struct dentry *dentry, unsigned int flags) 2538 { 2539 return proc_pident_lookup(dir, dentry, 2540 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2541 } 2542 2543 static const struct inode_operations proc_attr_dir_inode_operations = { 2544 .lookup = proc_attr_dir_lookup, 2545 .getattr = pid_getattr, 2546 .setattr = proc_setattr, 2547 }; 2548 2549 #endif 2550 2551 #ifdef CONFIG_ELF_CORE 2552 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2553 size_t count, loff_t *ppos) 2554 { 2555 struct task_struct *task = get_proc_task(file_inode(file)); 2556 struct mm_struct *mm; 2557 char buffer[PROC_NUMBUF]; 2558 size_t len; 2559 int ret; 2560 2561 if (!task) 2562 return -ESRCH; 2563 2564 ret = 0; 2565 mm = get_task_mm(task); 2566 if (mm) { 2567 len = snprintf(buffer, sizeof(buffer), "%08lx\n", 2568 ((mm->flags & MMF_DUMP_FILTER_MASK) >> 2569 MMF_DUMP_FILTER_SHIFT)); 2570 mmput(mm); 2571 ret = simple_read_from_buffer(buf, count, ppos, buffer, len); 2572 } 2573 2574 put_task_struct(task); 2575 2576 return ret; 2577 } 2578 2579 static ssize_t proc_coredump_filter_write(struct file *file, 2580 const char __user *buf, 2581 size_t count, 2582 loff_t *ppos) 2583 { 2584 struct task_struct *task; 2585 struct mm_struct *mm; 2586 unsigned int val; 2587 int ret; 2588 int i; 2589 unsigned long mask; 2590 2591 ret = kstrtouint_from_user(buf, count, 0, &val); 2592 if (ret < 0) 2593 return ret; 2594 2595 ret = -ESRCH; 2596 task = get_proc_task(file_inode(file)); 2597 if (!task) 2598 goto out_no_task; 2599 2600 mm = get_task_mm(task); 2601 if (!mm) 2602 goto out_no_mm; 2603 ret = 0; 2604 2605 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { 2606 if (val & mask) 2607 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2608 else 2609 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2610 } 2611 2612 mmput(mm); 2613 out_no_mm: 2614 put_task_struct(task); 2615 out_no_task: 2616 if (ret < 0) 2617 return ret; 2618 return count; 2619 } 2620 2621 static const struct file_operations proc_coredump_filter_operations = { 2622 .read = proc_coredump_filter_read, 2623 .write = proc_coredump_filter_write, 2624 .llseek = generic_file_llseek, 2625 }; 2626 #endif 2627 2628 #ifdef CONFIG_TASK_IO_ACCOUNTING 2629 static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole) 2630 { 2631 struct task_io_accounting acct = task->ioac; 2632 unsigned long flags; 2633 int result; 2634 2635 result = mutex_lock_killable(&task->signal->cred_guard_mutex); 2636 if (result) 2637 return result; 2638 2639 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { 2640 result = -EACCES; 2641 goto out_unlock; 2642 } 2643 2644 if (whole && lock_task_sighand(task, &flags)) { 2645 struct task_struct *t = task; 2646 2647 task_io_accounting_add(&acct, &task->signal->ioac); 2648 while_each_thread(task, t) 2649 task_io_accounting_add(&acct, &t->ioac); 2650 2651 unlock_task_sighand(task, &flags); 2652 } 2653 seq_printf(m, 2654 "rchar: %llu\n" 2655 "wchar: %llu\n" 2656 "syscr: %llu\n" 2657 "syscw: %llu\n" 2658 "read_bytes: %llu\n" 2659 "write_bytes: %llu\n" 2660 "cancelled_write_bytes: %llu\n", 2661 (unsigned long long)acct.rchar, 2662 (unsigned long long)acct.wchar, 2663 (unsigned long long)acct.syscr, 2664 (unsigned long long)acct.syscw, 2665 (unsigned long long)acct.read_bytes, 2666 (unsigned long long)acct.write_bytes, 2667 (unsigned long long)acct.cancelled_write_bytes); 2668 result = 0; 2669 2670 out_unlock: 2671 mutex_unlock(&task->signal->cred_guard_mutex); 2672 return result; 2673 } 2674 2675 static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns, 2676 struct pid *pid, struct task_struct *task) 2677 { 2678 return do_io_accounting(task, m, 0); 2679 } 2680 2681 static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns, 2682 struct pid *pid, struct task_struct *task) 2683 { 2684 return do_io_accounting(task, m, 1); 2685 } 2686 #endif /* CONFIG_TASK_IO_ACCOUNTING */ 2687 2688 #ifdef CONFIG_USER_NS 2689 static int proc_id_map_open(struct inode *inode, struct file *file, 2690 const struct seq_operations *seq_ops) 2691 { 2692 struct user_namespace *ns = NULL; 2693 struct task_struct *task; 2694 struct seq_file *seq; 2695 int ret = -EINVAL; 2696 2697 task = get_proc_task(inode); 2698 if (task) { 2699 rcu_read_lock(); 2700 ns = get_user_ns(task_cred_xxx(task, user_ns)); 2701 rcu_read_unlock(); 2702 put_task_struct(task); 2703 } 2704 if (!ns) 2705 goto err; 2706 2707 ret = seq_open(file, seq_ops); 2708 if (ret) 2709 goto err_put_ns; 2710 2711 seq = file->private_data; 2712 seq->private = ns; 2713 2714 return 0; 2715 err_put_ns: 2716 put_user_ns(ns); 2717 err: 2718 return ret; 2719 } 2720 2721 static int proc_id_map_release(struct inode *inode, struct file *file) 2722 { 2723 struct seq_file *seq = file->private_data; 2724 struct user_namespace *ns = seq->private; 2725 put_user_ns(ns); 2726 return seq_release(inode, file); 2727 } 2728 2729 static int proc_uid_map_open(struct inode *inode, struct file *file) 2730 { 2731 return proc_id_map_open(inode, file, &proc_uid_seq_operations); 2732 } 2733 2734 static int proc_gid_map_open(struct inode *inode, struct file *file) 2735 { 2736 return proc_id_map_open(inode, file, &proc_gid_seq_operations); 2737 } 2738 2739 static int proc_projid_map_open(struct inode *inode, struct file *file) 2740 { 2741 return proc_id_map_open(inode, file, &proc_projid_seq_operations); 2742 } 2743 2744 static const struct file_operations proc_uid_map_operations = { 2745 .open = proc_uid_map_open, 2746 .write = proc_uid_map_write, 2747 .read = seq_read, 2748 .llseek = seq_lseek, 2749 .release = proc_id_map_release, 2750 }; 2751 2752 static const struct file_operations proc_gid_map_operations = { 2753 .open = proc_gid_map_open, 2754 .write = proc_gid_map_write, 2755 .read = seq_read, 2756 .llseek = seq_lseek, 2757 .release = proc_id_map_release, 2758 }; 2759 2760 static const struct file_operations proc_projid_map_operations = { 2761 .open = proc_projid_map_open, 2762 .write = proc_projid_map_write, 2763 .read = seq_read, 2764 .llseek = seq_lseek, 2765 .release = proc_id_map_release, 2766 }; 2767 2768 static int proc_setgroups_open(struct inode *inode, struct file *file) 2769 { 2770 struct user_namespace *ns = NULL; 2771 struct task_struct *task; 2772 int ret; 2773 2774 ret = -ESRCH; 2775 task = get_proc_task(inode); 2776 if (task) { 2777 rcu_read_lock(); 2778 ns = get_user_ns(task_cred_xxx(task, user_ns)); 2779 rcu_read_unlock(); 2780 put_task_struct(task); 2781 } 2782 if (!ns) 2783 goto err; 2784 2785 if (file->f_mode & FMODE_WRITE) { 2786 ret = -EACCES; 2787 if (!ns_capable(ns, CAP_SYS_ADMIN)) 2788 goto err_put_ns; 2789 } 2790 2791 ret = single_open(file, &proc_setgroups_show, ns); 2792 if (ret) 2793 goto err_put_ns; 2794 2795 return 0; 2796 err_put_ns: 2797 put_user_ns(ns); 2798 err: 2799 return ret; 2800 } 2801 2802 static int proc_setgroups_release(struct inode *inode, struct file *file) 2803 { 2804 struct seq_file *seq = file->private_data; 2805 struct user_namespace *ns = seq->private; 2806 int ret = single_release(inode, file); 2807 put_user_ns(ns); 2808 return ret; 2809 } 2810 2811 static const struct file_operations proc_setgroups_operations = { 2812 .open = proc_setgroups_open, 2813 .write = proc_setgroups_write, 2814 .read = seq_read, 2815 .llseek = seq_lseek, 2816 .release = proc_setgroups_release, 2817 }; 2818 #endif /* CONFIG_USER_NS */ 2819 2820 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2821 struct pid *pid, struct task_struct *task) 2822 { 2823 int err = lock_trace(task); 2824 if (!err) { 2825 seq_printf(m, "%08x\n", task->personality); 2826 unlock_trace(task); 2827 } 2828 return err; 2829 } 2830 2831 /* 2832 * Thread groups 2833 */ 2834 static const struct file_operations proc_task_operations; 2835 static const struct inode_operations proc_task_inode_operations; 2836 2837 static const struct pid_entry tgid_base_stuff[] = { 2838 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2839 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2840 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), 2841 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2842 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 2843 #ifdef CONFIG_NET 2844 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2845 #endif 2846 REG("environ", S_IRUSR, proc_environ_operations), 2847 REG("auxv", S_IRUSR, proc_auxv_operations), 2848 ONE("status", S_IRUGO, proc_pid_status), 2849 ONE("personality", S_IRUSR, proc_pid_personality), 2850 ONE("limits", S_IRUGO, proc_pid_limits), 2851 #ifdef CONFIG_SCHED_DEBUG 2852 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2853 #endif 2854 #ifdef CONFIG_SCHED_AUTOGROUP 2855 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), 2856 #endif 2857 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2858 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2859 ONE("syscall", S_IRUSR, proc_pid_syscall), 2860 #endif 2861 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), 2862 ONE("stat", S_IRUGO, proc_tgid_stat), 2863 ONE("statm", S_IRUGO, proc_pid_statm), 2864 REG("maps", S_IRUGO, proc_pid_maps_operations), 2865 #ifdef CONFIG_NUMA 2866 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), 2867 #endif 2868 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2869 LNK("cwd", proc_cwd_link), 2870 LNK("root", proc_root_link), 2871 LNK("exe", proc_exe_link), 2872 REG("mounts", S_IRUGO, proc_mounts_operations), 2873 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 2874 REG("mountstats", S_IRUSR, proc_mountstats_operations), 2875 #ifdef CONFIG_PROC_PAGE_MONITOR 2876 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2877 REG("smaps", S_IRUGO, proc_pid_smaps_operations), 2878 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2879 #endif 2880 #ifdef CONFIG_SECURITY 2881 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2882 #endif 2883 #ifdef CONFIG_KALLSYMS 2884 ONE("wchan", S_IRUGO, proc_pid_wchan), 2885 #endif 2886 #ifdef CONFIG_STACKTRACE 2887 ONE("stack", S_IRUSR, proc_pid_stack), 2888 #endif 2889 #ifdef CONFIG_SCHED_INFO 2890 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 2891 #endif 2892 #ifdef CONFIG_LATENCYTOP 2893 REG("latency", S_IRUGO, proc_lstats_operations), 2894 #endif 2895 #ifdef CONFIG_PROC_PID_CPUSET 2896 ONE("cpuset", S_IRUGO, proc_cpuset_show), 2897 #endif 2898 #ifdef CONFIG_CGROUPS 2899 ONE("cgroup", S_IRUGO, proc_cgroup_show), 2900 #endif 2901 ONE("oom_score", S_IRUGO, proc_oom_score), 2902 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 2903 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2904 #ifdef CONFIG_AUDITSYSCALL 2905 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2906 REG("sessionid", S_IRUGO, proc_sessionid_operations), 2907 #endif 2908 #ifdef CONFIG_FAULT_INJECTION 2909 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2910 #endif 2911 #ifdef CONFIG_ELF_CORE 2912 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2913 #endif 2914 #ifdef CONFIG_TASK_IO_ACCOUNTING 2915 ONE("io", S_IRUSR, proc_tgid_io_accounting), 2916 #endif 2917 #ifdef CONFIG_HARDWALL 2918 ONE("hardwall", S_IRUGO, proc_pid_hardwall), 2919 #endif 2920 #ifdef CONFIG_USER_NS 2921 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2922 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2923 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 2924 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), 2925 #endif 2926 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) 2927 REG("timers", S_IRUGO, proc_timers_operations), 2928 #endif 2929 REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), 2930 }; 2931 2932 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) 2933 { 2934 return proc_pident_readdir(file, ctx, 2935 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 2936 } 2937 2938 static const struct file_operations proc_tgid_base_operations = { 2939 .read = generic_read_dir, 2940 .iterate_shared = proc_tgid_base_readdir, 2941 .llseek = generic_file_llseek, 2942 }; 2943 2944 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 2945 { 2946 return proc_pident_lookup(dir, dentry, 2947 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 2948 } 2949 2950 static const struct inode_operations proc_tgid_base_inode_operations = { 2951 .lookup = proc_tgid_base_lookup, 2952 .getattr = pid_getattr, 2953 .setattr = proc_setattr, 2954 .permission = proc_pid_permission, 2955 }; 2956 2957 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 2958 { 2959 struct dentry *dentry, *leader, *dir; 2960 char buf[PROC_NUMBUF]; 2961 struct qstr name; 2962 2963 name.name = buf; 2964 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2965 /* no ->d_hash() rejects on procfs */ 2966 dentry = d_hash_and_lookup(mnt->mnt_root, &name); 2967 if (dentry) { 2968 d_invalidate(dentry); 2969 dput(dentry); 2970 } 2971 2972 if (pid == tgid) 2973 return; 2974 2975 name.name = buf; 2976 name.len = snprintf(buf, sizeof(buf), "%d", tgid); 2977 leader = d_hash_and_lookup(mnt->mnt_root, &name); 2978 if (!leader) 2979 goto out; 2980 2981 name.name = "task"; 2982 name.len = strlen(name.name); 2983 dir = d_hash_and_lookup(leader, &name); 2984 if (!dir) 2985 goto out_put_leader; 2986 2987 name.name = buf; 2988 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2989 dentry = d_hash_and_lookup(dir, &name); 2990 if (dentry) { 2991 d_invalidate(dentry); 2992 dput(dentry); 2993 } 2994 2995 dput(dir); 2996 out_put_leader: 2997 dput(leader); 2998 out: 2999 return; 3000 } 3001 3002 /** 3003 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 3004 * @task: task that should be flushed. 3005 * 3006 * When flushing dentries from proc, one needs to flush them from global 3007 * proc (proc_mnt) and from all the namespaces' procs this task was seen 3008 * in. This call is supposed to do all of this job. 3009 * 3010 * Looks in the dcache for 3011 * /proc/@pid 3012 * /proc/@tgid/task/@pid 3013 * if either directory is present flushes it and all of it'ts children 3014 * from the dcache. 3015 * 3016 * It is safe and reasonable to cache /proc entries for a task until 3017 * that task exits. After that they just clog up the dcache with 3018 * useless entries, possibly causing useful dcache entries to be 3019 * flushed instead. This routine is proved to flush those useless 3020 * dcache entries at process exit time. 3021 * 3022 * NOTE: This routine is just an optimization so it does not guarantee 3023 * that no dcache entries will exist at process exit time it 3024 * just makes it very unlikely that any will persist. 3025 */ 3026 3027 void proc_flush_task(struct task_struct *task) 3028 { 3029 int i; 3030 struct pid *pid, *tgid; 3031 struct upid *upid; 3032 3033 pid = task_pid(task); 3034 tgid = task_tgid(task); 3035 3036 for (i = 0; i <= pid->level; i++) { 3037 upid = &pid->numbers[i]; 3038 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 3039 tgid->numbers[i].nr); 3040 } 3041 } 3042 3043 static int proc_pid_instantiate(struct inode *dir, 3044 struct dentry * dentry, 3045 struct task_struct *task, const void *ptr) 3046 { 3047 struct inode *inode; 3048 3049 inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); 3050 if (!inode) 3051 goto out; 3052 3053 inode->i_op = &proc_tgid_base_inode_operations; 3054 inode->i_fop = &proc_tgid_base_operations; 3055 inode->i_flags|=S_IMMUTABLE; 3056 3057 set_nlink(inode, nlink_tgid); 3058 3059 d_set_d_op(dentry, &pid_dentry_operations); 3060 3061 d_add(dentry, inode); 3062 /* Close the race of the process dying before we return the dentry */ 3063 if (pid_revalidate(dentry, 0)) 3064 return 0; 3065 out: 3066 return -ENOENT; 3067 } 3068 3069 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 3070 { 3071 int result = -ENOENT; 3072 struct task_struct *task; 3073 unsigned tgid; 3074 struct pid_namespace *ns; 3075 3076 tgid = name_to_int(&dentry->d_name); 3077 if (tgid == ~0U) 3078 goto out; 3079 3080 ns = dentry->d_sb->s_fs_info; 3081 rcu_read_lock(); 3082 task = find_task_by_pid_ns(tgid, ns); 3083 if (task) 3084 get_task_struct(task); 3085 rcu_read_unlock(); 3086 if (!task) 3087 goto out; 3088 3089 result = proc_pid_instantiate(dir, dentry, task, NULL); 3090 put_task_struct(task); 3091 out: 3092 return ERR_PTR(result); 3093 } 3094 3095 /* 3096 * Find the first task with tgid >= tgid 3097 * 3098 */ 3099 struct tgid_iter { 3100 unsigned int tgid; 3101 struct task_struct *task; 3102 }; 3103 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) 3104 { 3105 struct pid *pid; 3106 3107 if (iter.task) 3108 put_task_struct(iter.task); 3109 rcu_read_lock(); 3110 retry: 3111 iter.task = NULL; 3112 pid = find_ge_pid(iter.tgid, ns); 3113 if (pid) { 3114 iter.tgid = pid_nr_ns(pid, ns); 3115 iter.task = pid_task(pid, PIDTYPE_PID); 3116 /* What we to know is if the pid we have find is the 3117 * pid of a thread_group_leader. Testing for task 3118 * being a thread_group_leader is the obvious thing 3119 * todo but there is a window when it fails, due to 3120 * the pid transfer logic in de_thread. 3121 * 3122 * So we perform the straight forward test of seeing 3123 * if the pid we have found is the pid of a thread 3124 * group leader, and don't worry if the task we have 3125 * found doesn't happen to be a thread group leader. 3126 * As we don't care in the case of readdir. 3127 */ 3128 if (!iter.task || !has_group_leader_pid(iter.task)) { 3129 iter.tgid += 1; 3130 goto retry; 3131 } 3132 get_task_struct(iter.task); 3133 } 3134 rcu_read_unlock(); 3135 return iter; 3136 } 3137 3138 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) 3139 3140 /* for the /proc/ directory itself, after non-process stuff has been done */ 3141 int proc_pid_readdir(struct file *file, struct dir_context *ctx) 3142 { 3143 struct tgid_iter iter; 3144 struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info; 3145 loff_t pos = ctx->pos; 3146 3147 if (pos >= PID_MAX_LIMIT + TGID_OFFSET) 3148 return 0; 3149 3150 if (pos == TGID_OFFSET - 2) { 3151 struct inode *inode = d_inode(ns->proc_self); 3152 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) 3153 return 0; 3154 ctx->pos = pos = pos + 1; 3155 } 3156 if (pos == TGID_OFFSET - 1) { 3157 struct inode *inode = d_inode(ns->proc_thread_self); 3158 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) 3159 return 0; 3160 ctx->pos = pos = pos + 1; 3161 } 3162 iter.tgid = pos - TGID_OFFSET; 3163 iter.task = NULL; 3164 for (iter = next_tgid(ns, iter); 3165 iter.task; 3166 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3167 char name[PROC_NUMBUF]; 3168 int len; 3169 3170 cond_resched(); 3171 if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) 3172 continue; 3173 3174 len = snprintf(name, sizeof(name), "%d", iter.tgid); 3175 ctx->pos = iter.tgid + TGID_OFFSET; 3176 if (!proc_fill_cache(file, ctx, name, len, 3177 proc_pid_instantiate, iter.task, NULL)) { 3178 put_task_struct(iter.task); 3179 return 0; 3180 } 3181 } 3182 ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; 3183 return 0; 3184 } 3185 3186 /* 3187 * proc_tid_comm_permission is a special permission function exclusively 3188 * used for the node /proc/<pid>/task/<tid>/comm. 3189 * It bypasses generic permission checks in the case where a task of the same 3190 * task group attempts to access the node. 3191 * The rationale behind this is that glibc and bionic access this node for 3192 * cross thread naming (pthread_set/getname_np(!self)). However, if 3193 * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0, 3194 * which locks out the cross thread naming implementation. 3195 * This function makes sure that the node is always accessible for members of 3196 * same thread group. 3197 */ 3198 static int proc_tid_comm_permission(struct inode *inode, int mask) 3199 { 3200 bool is_same_tgroup; 3201 struct task_struct *task; 3202 3203 task = get_proc_task(inode); 3204 if (!task) 3205 return -ESRCH; 3206 is_same_tgroup = same_thread_group(current, task); 3207 put_task_struct(task); 3208 3209 if (likely(is_same_tgroup && !(mask & MAY_EXEC))) { 3210 /* This file (/proc/<pid>/task/<tid>/comm) can always be 3211 * read or written by the members of the corresponding 3212 * thread group. 3213 */ 3214 return 0; 3215 } 3216 3217 return generic_permission(inode, mask); 3218 } 3219 3220 static const struct inode_operations proc_tid_comm_inode_operations = { 3221 .permission = proc_tid_comm_permission, 3222 }; 3223 3224 /* 3225 * Tasks 3226 */ 3227 static const struct pid_entry tid_base_stuff[] = { 3228 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3229 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3230 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3231 #ifdef CONFIG_NET 3232 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 3233 #endif 3234 REG("environ", S_IRUSR, proc_environ_operations), 3235 REG("auxv", S_IRUSR, proc_auxv_operations), 3236 ONE("status", S_IRUGO, proc_pid_status), 3237 ONE("personality", S_IRUSR, proc_pid_personality), 3238 ONE("limits", S_IRUGO, proc_pid_limits), 3239 #ifdef CONFIG_SCHED_DEBUG 3240 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3241 #endif 3242 NOD("comm", S_IFREG|S_IRUGO|S_IWUSR, 3243 &proc_tid_comm_inode_operations, 3244 &proc_pid_set_comm_operations, {}), 3245 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3246 ONE("syscall", S_IRUSR, proc_pid_syscall), 3247 #endif 3248 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), 3249 ONE("stat", S_IRUGO, proc_tid_stat), 3250 ONE("statm", S_IRUGO, proc_pid_statm), 3251 REG("maps", S_IRUGO, proc_tid_maps_operations), 3252 #ifdef CONFIG_PROC_CHILDREN 3253 REG("children", S_IRUGO, proc_tid_children_operations), 3254 #endif 3255 #ifdef CONFIG_NUMA 3256 REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), 3257 #endif 3258 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3259 LNK("cwd", proc_cwd_link), 3260 LNK("root", proc_root_link), 3261 LNK("exe", proc_exe_link), 3262 REG("mounts", S_IRUGO, proc_mounts_operations), 3263 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3264 #ifdef CONFIG_PROC_PAGE_MONITOR 3265 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3266 REG("smaps", S_IRUGO, proc_tid_smaps_operations), 3267 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3268 #endif 3269 #ifdef CONFIG_SECURITY 3270 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3271 #endif 3272 #ifdef CONFIG_KALLSYMS 3273 ONE("wchan", S_IRUGO, proc_pid_wchan), 3274 #endif 3275 #ifdef CONFIG_STACKTRACE 3276 ONE("stack", S_IRUSR, proc_pid_stack), 3277 #endif 3278 #ifdef CONFIG_SCHED_INFO 3279 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 3280 #endif 3281 #ifdef CONFIG_LATENCYTOP 3282 REG("latency", S_IRUGO, proc_lstats_operations), 3283 #endif 3284 #ifdef CONFIG_PROC_PID_CPUSET 3285 ONE("cpuset", S_IRUGO, proc_cpuset_show), 3286 #endif 3287 #ifdef CONFIG_CGROUPS 3288 ONE("cgroup", S_IRUGO, proc_cgroup_show), 3289 #endif 3290 ONE("oom_score", S_IRUGO, proc_oom_score), 3291 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 3292 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3293 #ifdef CONFIG_AUDITSYSCALL 3294 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3295 REG("sessionid", S_IRUGO, proc_sessionid_operations), 3296 #endif 3297 #ifdef CONFIG_FAULT_INJECTION 3298 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3299 #endif 3300 #ifdef CONFIG_TASK_IO_ACCOUNTING 3301 ONE("io", S_IRUSR, proc_tid_io_accounting), 3302 #endif 3303 #ifdef CONFIG_HARDWALL 3304 ONE("hardwall", S_IRUGO, proc_pid_hardwall), 3305 #endif 3306 #ifdef CONFIG_USER_NS 3307 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 3308 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 3309 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 3310 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), 3311 #endif 3312 }; 3313 3314 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) 3315 { 3316 return proc_pident_readdir(file, ctx, 3317 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3318 } 3319 3320 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 3321 { 3322 return proc_pident_lookup(dir, dentry, 3323 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3324 } 3325 3326 static const struct file_operations proc_tid_base_operations = { 3327 .read = generic_read_dir, 3328 .iterate_shared = proc_tid_base_readdir, 3329 .llseek = generic_file_llseek, 3330 }; 3331 3332 static const struct inode_operations proc_tid_base_inode_operations = { 3333 .lookup = proc_tid_base_lookup, 3334 .getattr = pid_getattr, 3335 .setattr = proc_setattr, 3336 }; 3337 3338 static int proc_task_instantiate(struct inode *dir, 3339 struct dentry *dentry, struct task_struct *task, const void *ptr) 3340 { 3341 struct inode *inode; 3342 inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); 3343 3344 if (!inode) 3345 goto out; 3346 inode->i_op = &proc_tid_base_inode_operations; 3347 inode->i_fop = &proc_tid_base_operations; 3348 inode->i_flags|=S_IMMUTABLE; 3349 3350 set_nlink(inode, nlink_tid); 3351 3352 d_set_d_op(dentry, &pid_dentry_operations); 3353 3354 d_add(dentry, inode); 3355 /* Close the race of the process dying before we return the dentry */ 3356 if (pid_revalidate(dentry, 0)) 3357 return 0; 3358 out: 3359 return -ENOENT; 3360 } 3361 3362 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 3363 { 3364 int result = -ENOENT; 3365 struct task_struct *task; 3366 struct task_struct *leader = get_proc_task(dir); 3367 unsigned tid; 3368 struct pid_namespace *ns; 3369 3370 if (!leader) 3371 goto out_no_task; 3372 3373 tid = name_to_int(&dentry->d_name); 3374 if (tid == ~0U) 3375 goto out; 3376 3377 ns = dentry->d_sb->s_fs_info; 3378 rcu_read_lock(); 3379 task = find_task_by_pid_ns(tid, ns); 3380 if (task) 3381 get_task_struct(task); 3382 rcu_read_unlock(); 3383 if (!task) 3384 goto out; 3385 if (!same_thread_group(leader, task)) 3386 goto out_drop_task; 3387 3388 result = proc_task_instantiate(dir, dentry, task, NULL); 3389 out_drop_task: 3390 put_task_struct(task); 3391 out: 3392 put_task_struct(leader); 3393 out_no_task: 3394 return ERR_PTR(result); 3395 } 3396 3397 /* 3398 * Find the first tid of a thread group to return to user space. 3399 * 3400 * Usually this is just the thread group leader, but if the users 3401 * buffer was too small or there was a seek into the middle of the 3402 * directory we have more work todo. 3403 * 3404 * In the case of a short read we start with find_task_by_pid. 3405 * 3406 * In the case of a seek we start with the leader and walk nr 3407 * threads past it. 3408 */ 3409 static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, 3410 struct pid_namespace *ns) 3411 { 3412 struct task_struct *pos, *task; 3413 unsigned long nr = f_pos; 3414 3415 if (nr != f_pos) /* 32bit overflow? */ 3416 return NULL; 3417 3418 rcu_read_lock(); 3419 task = pid_task(pid, PIDTYPE_PID); 3420 if (!task) 3421 goto fail; 3422 3423 /* Attempt to start with the tid of a thread */ 3424 if (tid && nr) { 3425 pos = find_task_by_pid_ns(tid, ns); 3426 if (pos && same_thread_group(pos, task)) 3427 goto found; 3428 } 3429 3430 /* If nr exceeds the number of threads there is nothing todo */ 3431 if (nr >= get_nr_threads(task)) 3432 goto fail; 3433 3434 /* If we haven't found our starting place yet start 3435 * with the leader and walk nr threads forward. 3436 */ 3437 pos = task = task->group_leader; 3438 do { 3439 if (!nr--) 3440 goto found; 3441 } while_each_thread(task, pos); 3442 fail: 3443 pos = NULL; 3444 goto out; 3445 found: 3446 get_task_struct(pos); 3447 out: 3448 rcu_read_unlock(); 3449 return pos; 3450 } 3451 3452 /* 3453 * Find the next thread in the thread list. 3454 * Return NULL if there is an error or no next thread. 3455 * 3456 * The reference to the input task_struct is released. 3457 */ 3458 static struct task_struct *next_tid(struct task_struct *start) 3459 { 3460 struct task_struct *pos = NULL; 3461 rcu_read_lock(); 3462 if (pid_alive(start)) { 3463 pos = next_thread(start); 3464 if (thread_group_leader(pos)) 3465 pos = NULL; 3466 else 3467 get_task_struct(pos); 3468 } 3469 rcu_read_unlock(); 3470 put_task_struct(start); 3471 return pos; 3472 } 3473 3474 /* for the /proc/TGID/task/ directories */ 3475 static int proc_task_readdir(struct file *file, struct dir_context *ctx) 3476 { 3477 struct inode *inode = file_inode(file); 3478 struct task_struct *task; 3479 struct pid_namespace *ns; 3480 int tid; 3481 3482 if (proc_inode_is_dead(inode)) 3483 return -ENOENT; 3484 3485 if (!dir_emit_dots(file, ctx)) 3486 return 0; 3487 3488 /* f_version caches the tgid value that the last readdir call couldn't 3489 * return. lseek aka telldir automagically resets f_version to 0. 3490 */ 3491 ns = inode->i_sb->s_fs_info; 3492 tid = (int)file->f_version; 3493 file->f_version = 0; 3494 for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); 3495 task; 3496 task = next_tid(task), ctx->pos++) { 3497 char name[PROC_NUMBUF]; 3498 int len; 3499 tid = task_pid_nr_ns(task, ns); 3500 len = snprintf(name, sizeof(name), "%d", tid); 3501 if (!proc_fill_cache(file, ctx, name, len, 3502 proc_task_instantiate, task, NULL)) { 3503 /* returning this tgid failed, save it as the first 3504 * pid for the next readir call */ 3505 file->f_version = (u64)tid; 3506 put_task_struct(task); 3507 break; 3508 } 3509 } 3510 3511 return 0; 3512 } 3513 3514 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 3515 { 3516 struct inode *inode = d_inode(dentry); 3517 struct task_struct *p = get_proc_task(inode); 3518 generic_fillattr(inode, stat); 3519 3520 if (p) { 3521 stat->nlink += get_nr_threads(p); 3522 put_task_struct(p); 3523 } 3524 3525 return 0; 3526 } 3527 3528 static const struct inode_operations proc_task_inode_operations = { 3529 .lookup = proc_task_lookup, 3530 .getattr = proc_task_getattr, 3531 .setattr = proc_setattr, 3532 .permission = proc_pid_permission, 3533 }; 3534 3535 static const struct file_operations proc_task_operations = { 3536 .read = generic_read_dir, 3537 .iterate_shared = proc_task_readdir, 3538 .llseek = generic_file_llseek, 3539 }; 3540 3541 void __init set_proc_pid_nlink(void) 3542 { 3543 nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3544 nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3545 } 3546