1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/proc/base.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 * 7 * proc base directory handling functions 8 * 9 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 10 * Instead of using magical inumbers to determine the kind of object 11 * we allocate and fill in-core inodes upon lookup. They don't even 12 * go into icache. We cache the reference to task_struct upon lookup too. 13 * Eventually it should become a filesystem in its own. We don't use the 14 * rest of procfs anymore. 15 * 16 * 17 * Changelog: 18 * 17-Jan-2005 19 * Allan Bezerra 20 * Bruna Moreira <bruna.moreira@indt.org.br> 21 * Edjard Mota <edjard.mota@indt.org.br> 22 * Ilias Biris <ilias.biris@indt.org.br> 23 * Mauricio Lin <mauricio.lin@indt.org.br> 24 * 25 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 26 * 27 * A new process specific entry (smaps) included in /proc. It shows the 28 * size of rss for each memory area. The maps entry lacks information 29 * about physical memory size (rss) for each mapped file, i.e., 30 * rss information for executables and library files. 31 * This additional information is useful for any tools that need to know 32 * about physical memory consumption for a process specific library. 33 * 34 * Changelog: 35 * 21-Feb-2005 36 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 37 * Pud inclusion in the page table walking. 38 * 39 * ChangeLog: 40 * 10-Mar-2005 41 * 10LE Instituto Nokia de Tecnologia - INdT: 42 * A better way to walks through the page table as suggested by Hugh Dickins. 43 * 44 * Simo Piiroinen <simo.piiroinen@nokia.com>: 45 * Smaps information related to shared, private, clean and dirty pages. 46 * 47 * Paul Mundt <paul.mundt@nokia.com>: 48 * Overall revision about smaps. 49 */ 50 51 #include <linux/uaccess.h> 52 53 #include <linux/errno.h> 54 #include <linux/time.h> 55 #include <linux/proc_fs.h> 56 #include <linux/stat.h> 57 #include <linux/task_io_accounting_ops.h> 58 #include <linux/init.h> 59 #include <linux/capability.h> 60 #include <linux/file.h> 61 #include <linux/fdtable.h> 62 #include <linux/string.h> 63 #include <linux/seq_file.h> 64 #include <linux/namei.h> 65 #include <linux/mnt_namespace.h> 66 #include <linux/mm.h> 67 #include <linux/swap.h> 68 #include <linux/rcupdate.h> 69 #include <linux/kallsyms.h> 70 #include <linux/stacktrace.h> 71 #include <linux/resource.h> 72 #include <linux/module.h> 73 #include <linux/mount.h> 74 #include <linux/security.h> 75 #include <linux/ptrace.h> 76 #include <linux/tracehook.h> 77 #include <linux/printk.h> 78 #include <linux/cgroup.h> 79 #include <linux/cpuset.h> 80 #include <linux/audit.h> 81 #include <linux/poll.h> 82 #include <linux/nsproxy.h> 83 #include <linux/oom.h> 84 #include <linux/elf.h> 85 #include <linux/pid_namespace.h> 86 #include <linux/user_namespace.h> 87 #include <linux/fs_struct.h> 88 #include <linux/slab.h> 89 #include <linux/sched/autogroup.h> 90 #include <linux/sched/mm.h> 91 #include <linux/sched/coredump.h> 92 #include <linux/sched/debug.h> 93 #include <linux/sched/stat.h> 94 #include <linux/flex_array.h> 95 #include <linux/posix-timers.h> 96 #ifdef CONFIG_HARDWALL 97 #include <asm/hardwall.h> 98 #endif 99 #include <trace/events/oom.h> 100 #include "internal.h" 101 #include "fd.h" 102 103 /* NOTE: 104 * Implementing inode permission operations in /proc is almost 105 * certainly an error. Permission checks need to happen during 106 * each system call not at open time. The reason is that most of 107 * what we wish to check for permissions in /proc varies at runtime. 108 * 109 * The classic example of a problem is opening file descriptors 110 * in /proc for a task before it execs a suid executable. 111 */ 112 113 static u8 nlink_tid; 114 static u8 nlink_tgid; 115 116 struct pid_entry { 117 const char *name; 118 unsigned int len; 119 umode_t mode; 120 const struct inode_operations *iop; 121 const struct file_operations *fop; 122 union proc_op op; 123 }; 124 125 #define NOD(NAME, MODE, IOP, FOP, OP) { \ 126 .name = (NAME), \ 127 .len = sizeof(NAME) - 1, \ 128 .mode = MODE, \ 129 .iop = IOP, \ 130 .fop = FOP, \ 131 .op = OP, \ 132 } 133 134 #define DIR(NAME, MODE, iops, fops) \ 135 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) 136 #define LNK(NAME, get_link) \ 137 NOD(NAME, (S_IFLNK|S_IRWXUGO), \ 138 &proc_pid_link_inode_operations, NULL, \ 139 { .proc_get_link = get_link } ) 140 #define REG(NAME, MODE, fops) \ 141 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 142 #define ONE(NAME, MODE, show) \ 143 NOD(NAME, (S_IFREG|(MODE)), \ 144 NULL, &proc_single_file_operations, \ 145 { .proc_show = show } ) 146 147 /* 148 * Count the number of hardlinks for the pid_entry table, excluding the . 149 * and .. links. 150 */ 151 static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, 152 unsigned int n) 153 { 154 unsigned int i; 155 unsigned int count; 156 157 count = 2; 158 for (i = 0; i < n; ++i) { 159 if (S_ISDIR(entries[i].mode)) 160 ++count; 161 } 162 163 return count; 164 } 165 166 static int get_task_root(struct task_struct *task, struct path *root) 167 { 168 int result = -ENOENT; 169 170 task_lock(task); 171 if (task->fs) { 172 get_fs_root(task->fs, root); 173 result = 0; 174 } 175 task_unlock(task); 176 return result; 177 } 178 179 static int proc_cwd_link(struct dentry *dentry, struct path *path) 180 { 181 struct task_struct *task = get_proc_task(d_inode(dentry)); 182 int result = -ENOENT; 183 184 if (task) { 185 task_lock(task); 186 if (task->fs) { 187 get_fs_pwd(task->fs, path); 188 result = 0; 189 } 190 task_unlock(task); 191 put_task_struct(task); 192 } 193 return result; 194 } 195 196 static int proc_root_link(struct dentry *dentry, struct path *path) 197 { 198 struct task_struct *task = get_proc_task(d_inode(dentry)); 199 int result = -ENOENT; 200 201 if (task) { 202 result = get_task_root(task, path); 203 put_task_struct(task); 204 } 205 return result; 206 } 207 208 static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, 209 size_t _count, loff_t *pos) 210 { 211 struct task_struct *tsk; 212 struct mm_struct *mm; 213 char *page; 214 unsigned long count = _count; 215 unsigned long arg_start, arg_end, env_start, env_end; 216 unsigned long len1, len2, len; 217 unsigned long p; 218 char c; 219 ssize_t rv; 220 221 BUG_ON(*pos < 0); 222 223 tsk = get_proc_task(file_inode(file)); 224 if (!tsk) 225 return -ESRCH; 226 mm = get_task_mm(tsk); 227 put_task_struct(tsk); 228 if (!mm) 229 return 0; 230 /* Check if process spawned far enough to have cmdline. */ 231 if (!mm->env_end) { 232 rv = 0; 233 goto out_mmput; 234 } 235 236 page = (char *)__get_free_page(GFP_KERNEL); 237 if (!page) { 238 rv = -ENOMEM; 239 goto out_mmput; 240 } 241 242 down_read(&mm->mmap_sem); 243 arg_start = mm->arg_start; 244 arg_end = mm->arg_end; 245 env_start = mm->env_start; 246 env_end = mm->env_end; 247 up_read(&mm->mmap_sem); 248 249 BUG_ON(arg_start > arg_end); 250 BUG_ON(env_start > env_end); 251 252 len1 = arg_end - arg_start; 253 len2 = env_end - env_start; 254 255 /* Empty ARGV. */ 256 if (len1 == 0) { 257 rv = 0; 258 goto out_free_page; 259 } 260 /* 261 * Inherently racy -- command line shares address space 262 * with code and data. 263 */ 264 rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); 265 if (rv <= 0) 266 goto out_free_page; 267 268 rv = 0; 269 270 if (c == '\0') { 271 /* Command line (set of strings) occupies whole ARGV. */ 272 if (len1 <= *pos) 273 goto out_free_page; 274 275 p = arg_start + *pos; 276 len = len1 - *pos; 277 while (count > 0 && len > 0) { 278 unsigned int _count; 279 int nr_read; 280 281 _count = min3(count, len, PAGE_SIZE); 282 nr_read = access_remote_vm(mm, p, page, _count, 0); 283 if (nr_read < 0) 284 rv = nr_read; 285 if (nr_read <= 0) 286 goto out_free_page; 287 288 if (copy_to_user(buf, page, nr_read)) { 289 rv = -EFAULT; 290 goto out_free_page; 291 } 292 293 p += nr_read; 294 len -= nr_read; 295 buf += nr_read; 296 count -= nr_read; 297 rv += nr_read; 298 } 299 } else { 300 /* 301 * Command line (1 string) occupies ARGV and 302 * extends into ENVP. 303 */ 304 struct { 305 unsigned long p; 306 unsigned long len; 307 } cmdline[2] = { 308 { .p = arg_start, .len = len1 }, 309 { .p = env_start, .len = len2 }, 310 }; 311 loff_t pos1 = *pos; 312 unsigned int i; 313 314 i = 0; 315 while (i < 2 && pos1 >= cmdline[i].len) { 316 pos1 -= cmdline[i].len; 317 i++; 318 } 319 while (i < 2) { 320 p = cmdline[i].p + pos1; 321 len = cmdline[i].len - pos1; 322 while (count > 0 && len > 0) { 323 unsigned int _count, l; 324 int nr_read; 325 bool final; 326 327 _count = min3(count, len, PAGE_SIZE); 328 nr_read = access_remote_vm(mm, p, page, _count, 0); 329 if (nr_read < 0) 330 rv = nr_read; 331 if (nr_read <= 0) 332 goto out_free_page; 333 334 /* 335 * Command line can be shorter than whole ARGV 336 * even if last "marker" byte says it is not. 337 */ 338 final = false; 339 l = strnlen(page, nr_read); 340 if (l < nr_read) { 341 nr_read = l; 342 final = true; 343 } 344 345 if (copy_to_user(buf, page, nr_read)) { 346 rv = -EFAULT; 347 goto out_free_page; 348 } 349 350 p += nr_read; 351 len -= nr_read; 352 buf += nr_read; 353 count -= nr_read; 354 rv += nr_read; 355 356 if (final) 357 goto out_free_page; 358 } 359 360 /* Only first chunk can be read partially. */ 361 pos1 = 0; 362 i++; 363 } 364 } 365 366 out_free_page: 367 free_page((unsigned long)page); 368 out_mmput: 369 mmput(mm); 370 if (rv > 0) 371 *pos += rv; 372 return rv; 373 } 374 375 static const struct file_operations proc_pid_cmdline_ops = { 376 .read = proc_pid_cmdline_read, 377 .llseek = generic_file_llseek, 378 }; 379 380 #ifdef CONFIG_KALLSYMS 381 /* 382 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 383 * Returns the resolved symbol. If that fails, simply return the address. 384 */ 385 static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, 386 struct pid *pid, struct task_struct *task) 387 { 388 unsigned long wchan; 389 char symname[KSYM_NAME_LEN]; 390 391 wchan = get_wchan(task); 392 393 if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) 394 && !lookup_symbol_name(wchan, symname)) 395 seq_printf(m, "%s", symname); 396 else 397 seq_putc(m, '0'); 398 399 return 0; 400 } 401 #endif /* CONFIG_KALLSYMS */ 402 403 static int lock_trace(struct task_struct *task) 404 { 405 int err = mutex_lock_killable(&task->signal->cred_guard_mutex); 406 if (err) 407 return err; 408 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { 409 mutex_unlock(&task->signal->cred_guard_mutex); 410 return -EPERM; 411 } 412 return 0; 413 } 414 415 static void unlock_trace(struct task_struct *task) 416 { 417 mutex_unlock(&task->signal->cred_guard_mutex); 418 } 419 420 #ifdef CONFIG_STACKTRACE 421 422 #define MAX_STACK_TRACE_DEPTH 64 423 424 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, 425 struct pid *pid, struct task_struct *task) 426 { 427 struct stack_trace trace; 428 unsigned long *entries; 429 int err; 430 int i; 431 432 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 433 if (!entries) 434 return -ENOMEM; 435 436 trace.nr_entries = 0; 437 trace.max_entries = MAX_STACK_TRACE_DEPTH; 438 trace.entries = entries; 439 trace.skip = 0; 440 441 err = lock_trace(task); 442 if (!err) { 443 save_stack_trace_tsk(task, &trace); 444 445 for (i = 0; i < trace.nr_entries; i++) { 446 seq_printf(m, "[<%pK>] %pB\n", 447 (void *)entries[i], (void *)entries[i]); 448 } 449 unlock_trace(task); 450 } 451 kfree(entries); 452 453 return err; 454 } 455 #endif 456 457 #ifdef CONFIG_SCHED_INFO 458 /* 459 * Provides /proc/PID/schedstat 460 */ 461 static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, 462 struct pid *pid, struct task_struct *task) 463 { 464 if (unlikely(!sched_info_on())) 465 seq_printf(m, "0 0 0\n"); 466 else 467 seq_printf(m, "%llu %llu %lu\n", 468 (unsigned long long)task->se.sum_exec_runtime, 469 (unsigned long long)task->sched_info.run_delay, 470 task->sched_info.pcount); 471 472 return 0; 473 } 474 #endif 475 476 #ifdef CONFIG_LATENCYTOP 477 static int lstats_show_proc(struct seq_file *m, void *v) 478 { 479 int i; 480 struct inode *inode = m->private; 481 struct task_struct *task = get_proc_task(inode); 482 483 if (!task) 484 return -ESRCH; 485 seq_puts(m, "Latency Top version : v0.1\n"); 486 for (i = 0; i < 32; i++) { 487 struct latency_record *lr = &task->latency_record[i]; 488 if (lr->backtrace[0]) { 489 int q; 490 seq_printf(m, "%i %li %li", 491 lr->count, lr->time, lr->max); 492 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 493 unsigned long bt = lr->backtrace[q]; 494 if (!bt) 495 break; 496 if (bt == ULONG_MAX) 497 break; 498 seq_printf(m, " %ps", (void *)bt); 499 } 500 seq_putc(m, '\n'); 501 } 502 503 } 504 put_task_struct(task); 505 return 0; 506 } 507 508 static int lstats_open(struct inode *inode, struct file *file) 509 { 510 return single_open(file, lstats_show_proc, inode); 511 } 512 513 static ssize_t lstats_write(struct file *file, const char __user *buf, 514 size_t count, loff_t *offs) 515 { 516 struct task_struct *task = get_proc_task(file_inode(file)); 517 518 if (!task) 519 return -ESRCH; 520 clear_all_latency_tracing(task); 521 put_task_struct(task); 522 523 return count; 524 } 525 526 static const struct file_operations proc_lstats_operations = { 527 .open = lstats_open, 528 .read = seq_read, 529 .write = lstats_write, 530 .llseek = seq_lseek, 531 .release = single_release, 532 }; 533 534 #endif 535 536 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, 537 struct pid *pid, struct task_struct *task) 538 { 539 unsigned long totalpages = totalram_pages + total_swap_pages; 540 unsigned long points = 0; 541 542 points = oom_badness(task, NULL, NULL, totalpages) * 543 1000 / totalpages; 544 seq_printf(m, "%lu\n", points); 545 546 return 0; 547 } 548 549 struct limit_names { 550 const char *name; 551 const char *unit; 552 }; 553 554 static const struct limit_names lnames[RLIM_NLIMITS] = { 555 [RLIMIT_CPU] = {"Max cpu time", "seconds"}, 556 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 557 [RLIMIT_DATA] = {"Max data size", "bytes"}, 558 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 559 [RLIMIT_CORE] = {"Max core file size", "bytes"}, 560 [RLIMIT_RSS] = {"Max resident set", "bytes"}, 561 [RLIMIT_NPROC] = {"Max processes", "processes"}, 562 [RLIMIT_NOFILE] = {"Max open files", "files"}, 563 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, 564 [RLIMIT_AS] = {"Max address space", "bytes"}, 565 [RLIMIT_LOCKS] = {"Max file locks", "locks"}, 566 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, 567 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, 568 [RLIMIT_NICE] = {"Max nice priority", NULL}, 569 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, 570 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, 571 }; 572 573 /* Display limits for a process */ 574 static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, 575 struct pid *pid, struct task_struct *task) 576 { 577 unsigned int i; 578 unsigned long flags; 579 580 struct rlimit rlim[RLIM_NLIMITS]; 581 582 if (!lock_task_sighand(task, &flags)) 583 return 0; 584 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); 585 unlock_task_sighand(task, &flags); 586 587 /* 588 * print the file header 589 */ 590 seq_printf(m, "%-25s %-20s %-20s %-10s\n", 591 "Limit", "Soft Limit", "Hard Limit", "Units"); 592 593 for (i = 0; i < RLIM_NLIMITS; i++) { 594 if (rlim[i].rlim_cur == RLIM_INFINITY) 595 seq_printf(m, "%-25s %-20s ", 596 lnames[i].name, "unlimited"); 597 else 598 seq_printf(m, "%-25s %-20lu ", 599 lnames[i].name, rlim[i].rlim_cur); 600 601 if (rlim[i].rlim_max == RLIM_INFINITY) 602 seq_printf(m, "%-20s ", "unlimited"); 603 else 604 seq_printf(m, "%-20lu ", rlim[i].rlim_max); 605 606 if (lnames[i].unit) 607 seq_printf(m, "%-10s\n", lnames[i].unit); 608 else 609 seq_putc(m, '\n'); 610 } 611 612 return 0; 613 } 614 615 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 616 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, 617 struct pid *pid, struct task_struct *task) 618 { 619 long nr; 620 unsigned long args[6], sp, pc; 621 int res; 622 623 res = lock_trace(task); 624 if (res) 625 return res; 626 627 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 628 seq_puts(m, "running\n"); 629 else if (nr < 0) 630 seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 631 else 632 seq_printf(m, 633 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 634 nr, 635 args[0], args[1], args[2], args[3], args[4], args[5], 636 sp, pc); 637 unlock_trace(task); 638 639 return 0; 640 } 641 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 642 643 /************************************************************************/ 644 /* Here the fs part begins */ 645 /************************************************************************/ 646 647 /* permission checks */ 648 static int proc_fd_access_allowed(struct inode *inode) 649 { 650 struct task_struct *task; 651 int allowed = 0; 652 /* Allow access to a task's file descriptors if it is us or we 653 * may use ptrace attach to the process and find out that 654 * information. 655 */ 656 task = get_proc_task(inode); 657 if (task) { 658 allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 659 put_task_struct(task); 660 } 661 return allowed; 662 } 663 664 int proc_setattr(struct dentry *dentry, struct iattr *attr) 665 { 666 int error; 667 struct inode *inode = d_inode(dentry); 668 669 if (attr->ia_valid & ATTR_MODE) 670 return -EPERM; 671 672 error = setattr_prepare(dentry, attr); 673 if (error) 674 return error; 675 676 setattr_copy(inode, attr); 677 mark_inode_dirty(inode); 678 return 0; 679 } 680 681 /* 682 * May current process learn task's sched/cmdline info (for hide_pid_min=1) 683 * or euid/egid (for hide_pid_min=2)? 684 */ 685 static bool has_pid_permissions(struct pid_namespace *pid, 686 struct task_struct *task, 687 int hide_pid_min) 688 { 689 if (pid->hide_pid < hide_pid_min) 690 return true; 691 if (in_group_p(pid->pid_gid)) 692 return true; 693 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 694 } 695 696 697 static int proc_pid_permission(struct inode *inode, int mask) 698 { 699 struct pid_namespace *pid = inode->i_sb->s_fs_info; 700 struct task_struct *task; 701 bool has_perms; 702 703 task = get_proc_task(inode); 704 if (!task) 705 return -ESRCH; 706 has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS); 707 put_task_struct(task); 708 709 if (!has_perms) { 710 if (pid->hide_pid == HIDEPID_INVISIBLE) { 711 /* 712 * Let's make getdents(), stat(), and open() 713 * consistent with each other. If a process 714 * may not stat() a file, it shouldn't be seen 715 * in procfs at all. 716 */ 717 return -ENOENT; 718 } 719 720 return -EPERM; 721 } 722 return generic_permission(inode, mask); 723 } 724 725 726 727 static const struct inode_operations proc_def_inode_operations = { 728 .setattr = proc_setattr, 729 }; 730 731 static int proc_single_show(struct seq_file *m, void *v) 732 { 733 struct inode *inode = m->private; 734 struct pid_namespace *ns; 735 struct pid *pid; 736 struct task_struct *task; 737 int ret; 738 739 ns = inode->i_sb->s_fs_info; 740 pid = proc_pid(inode); 741 task = get_pid_task(pid, PIDTYPE_PID); 742 if (!task) 743 return -ESRCH; 744 745 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); 746 747 put_task_struct(task); 748 return ret; 749 } 750 751 static int proc_single_open(struct inode *inode, struct file *filp) 752 { 753 return single_open(filp, proc_single_show, inode); 754 } 755 756 static const struct file_operations proc_single_file_operations = { 757 .open = proc_single_open, 758 .read = seq_read, 759 .llseek = seq_lseek, 760 .release = single_release, 761 }; 762 763 764 struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) 765 { 766 struct task_struct *task = get_proc_task(inode); 767 struct mm_struct *mm = ERR_PTR(-ESRCH); 768 769 if (task) { 770 mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); 771 put_task_struct(task); 772 773 if (!IS_ERR_OR_NULL(mm)) { 774 /* ensure this mm_struct can't be freed */ 775 mmgrab(mm); 776 /* but do not pin its memory */ 777 mmput(mm); 778 } 779 } 780 781 return mm; 782 } 783 784 static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) 785 { 786 struct mm_struct *mm = proc_mem_open(inode, mode); 787 788 if (IS_ERR(mm)) 789 return PTR_ERR(mm); 790 791 file->private_data = mm; 792 return 0; 793 } 794 795 static int mem_open(struct inode *inode, struct file *file) 796 { 797 int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); 798 799 /* OK to pass negative loff_t, we can catch out-of-range */ 800 file->f_mode |= FMODE_UNSIGNED_OFFSET; 801 802 return ret; 803 } 804 805 static ssize_t mem_rw(struct file *file, char __user *buf, 806 size_t count, loff_t *ppos, int write) 807 { 808 struct mm_struct *mm = file->private_data; 809 unsigned long addr = *ppos; 810 ssize_t copied; 811 char *page; 812 unsigned int flags; 813 814 if (!mm) 815 return 0; 816 817 page = (char *)__get_free_page(GFP_KERNEL); 818 if (!page) 819 return -ENOMEM; 820 821 copied = 0; 822 if (!mmget_not_zero(mm)) 823 goto free; 824 825 flags = FOLL_FORCE | (write ? FOLL_WRITE : 0); 826 827 while (count > 0) { 828 int this_len = min_t(int, count, PAGE_SIZE); 829 830 if (write && copy_from_user(page, buf, this_len)) { 831 copied = -EFAULT; 832 break; 833 } 834 835 this_len = access_remote_vm(mm, addr, page, this_len, flags); 836 if (!this_len) { 837 if (!copied) 838 copied = -EIO; 839 break; 840 } 841 842 if (!write && copy_to_user(buf, page, this_len)) { 843 copied = -EFAULT; 844 break; 845 } 846 847 buf += this_len; 848 addr += this_len; 849 copied += this_len; 850 count -= this_len; 851 } 852 *ppos = addr; 853 854 mmput(mm); 855 free: 856 free_page((unsigned long) page); 857 return copied; 858 } 859 860 static ssize_t mem_read(struct file *file, char __user *buf, 861 size_t count, loff_t *ppos) 862 { 863 return mem_rw(file, buf, count, ppos, 0); 864 } 865 866 static ssize_t mem_write(struct file *file, const char __user *buf, 867 size_t count, loff_t *ppos) 868 { 869 return mem_rw(file, (char __user*)buf, count, ppos, 1); 870 } 871 872 loff_t mem_lseek(struct file *file, loff_t offset, int orig) 873 { 874 switch (orig) { 875 case 0: 876 file->f_pos = offset; 877 break; 878 case 1: 879 file->f_pos += offset; 880 break; 881 default: 882 return -EINVAL; 883 } 884 force_successful_syscall_return(); 885 return file->f_pos; 886 } 887 888 static int mem_release(struct inode *inode, struct file *file) 889 { 890 struct mm_struct *mm = file->private_data; 891 if (mm) 892 mmdrop(mm); 893 return 0; 894 } 895 896 static const struct file_operations proc_mem_operations = { 897 .llseek = mem_lseek, 898 .read = mem_read, 899 .write = mem_write, 900 .open = mem_open, 901 .release = mem_release, 902 }; 903 904 static int environ_open(struct inode *inode, struct file *file) 905 { 906 return __mem_open(inode, file, PTRACE_MODE_READ); 907 } 908 909 static ssize_t environ_read(struct file *file, char __user *buf, 910 size_t count, loff_t *ppos) 911 { 912 char *page; 913 unsigned long src = *ppos; 914 int ret = 0; 915 struct mm_struct *mm = file->private_data; 916 unsigned long env_start, env_end; 917 918 /* Ensure the process spawned far enough to have an environment. */ 919 if (!mm || !mm->env_end) 920 return 0; 921 922 page = (char *)__get_free_page(GFP_KERNEL); 923 if (!page) 924 return -ENOMEM; 925 926 ret = 0; 927 if (!mmget_not_zero(mm)) 928 goto free; 929 930 down_read(&mm->mmap_sem); 931 env_start = mm->env_start; 932 env_end = mm->env_end; 933 up_read(&mm->mmap_sem); 934 935 while (count > 0) { 936 size_t this_len, max_len; 937 int retval; 938 939 if (src >= (env_end - env_start)) 940 break; 941 942 this_len = env_end - (env_start + src); 943 944 max_len = min_t(size_t, PAGE_SIZE, count); 945 this_len = min(max_len, this_len); 946 947 retval = access_remote_vm(mm, (env_start + src), page, this_len, 0); 948 949 if (retval <= 0) { 950 ret = retval; 951 break; 952 } 953 954 if (copy_to_user(buf, page, retval)) { 955 ret = -EFAULT; 956 break; 957 } 958 959 ret += retval; 960 src += retval; 961 buf += retval; 962 count -= retval; 963 } 964 *ppos = src; 965 mmput(mm); 966 967 free: 968 free_page((unsigned long) page); 969 return ret; 970 } 971 972 static const struct file_operations proc_environ_operations = { 973 .open = environ_open, 974 .read = environ_read, 975 .llseek = generic_file_llseek, 976 .release = mem_release, 977 }; 978 979 static int auxv_open(struct inode *inode, struct file *file) 980 { 981 return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); 982 } 983 984 static ssize_t auxv_read(struct file *file, char __user *buf, 985 size_t count, loff_t *ppos) 986 { 987 struct mm_struct *mm = file->private_data; 988 unsigned int nwords = 0; 989 990 if (!mm) 991 return 0; 992 do { 993 nwords += 2; 994 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 995 return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv, 996 nwords * sizeof(mm->saved_auxv[0])); 997 } 998 999 static const struct file_operations proc_auxv_operations = { 1000 .open = auxv_open, 1001 .read = auxv_read, 1002 .llseek = generic_file_llseek, 1003 .release = mem_release, 1004 }; 1005 1006 static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, 1007 loff_t *ppos) 1008 { 1009 struct task_struct *task = get_proc_task(file_inode(file)); 1010 char buffer[PROC_NUMBUF]; 1011 int oom_adj = OOM_ADJUST_MIN; 1012 size_t len; 1013 1014 if (!task) 1015 return -ESRCH; 1016 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) 1017 oom_adj = OOM_ADJUST_MAX; 1018 else 1019 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / 1020 OOM_SCORE_ADJ_MAX; 1021 put_task_struct(task); 1022 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); 1023 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1024 } 1025 1026 static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) 1027 { 1028 static DEFINE_MUTEX(oom_adj_mutex); 1029 struct mm_struct *mm = NULL; 1030 struct task_struct *task; 1031 int err = 0; 1032 1033 task = get_proc_task(file_inode(file)); 1034 if (!task) 1035 return -ESRCH; 1036 1037 mutex_lock(&oom_adj_mutex); 1038 if (legacy) { 1039 if (oom_adj < task->signal->oom_score_adj && 1040 !capable(CAP_SYS_RESOURCE)) { 1041 err = -EACCES; 1042 goto err_unlock; 1043 } 1044 /* 1045 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use 1046 * /proc/pid/oom_score_adj instead. 1047 */ 1048 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", 1049 current->comm, task_pid_nr(current), task_pid_nr(task), 1050 task_pid_nr(task)); 1051 } else { 1052 if ((short)oom_adj < task->signal->oom_score_adj_min && 1053 !capable(CAP_SYS_RESOURCE)) { 1054 err = -EACCES; 1055 goto err_unlock; 1056 } 1057 } 1058 1059 /* 1060 * Make sure we will check other processes sharing the mm if this is 1061 * not vfrok which wants its own oom_score_adj. 1062 * pin the mm so it doesn't go away and get reused after task_unlock 1063 */ 1064 if (!task->vfork_done) { 1065 struct task_struct *p = find_lock_task_mm(task); 1066 1067 if (p) { 1068 if (atomic_read(&p->mm->mm_users) > 1) { 1069 mm = p->mm; 1070 mmgrab(mm); 1071 } 1072 task_unlock(p); 1073 } 1074 } 1075 1076 task->signal->oom_score_adj = oom_adj; 1077 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1078 task->signal->oom_score_adj_min = (short)oom_adj; 1079 trace_oom_score_adj_update(task); 1080 1081 if (mm) { 1082 struct task_struct *p; 1083 1084 rcu_read_lock(); 1085 for_each_process(p) { 1086 if (same_thread_group(task, p)) 1087 continue; 1088 1089 /* do not touch kernel threads or the global init */ 1090 if (p->flags & PF_KTHREAD || is_global_init(p)) 1091 continue; 1092 1093 task_lock(p); 1094 if (!p->vfork_done && process_shares_mm(p, mm)) { 1095 pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n", 1096 task_pid_nr(p), p->comm, 1097 p->signal->oom_score_adj, oom_adj, 1098 task_pid_nr(task), task->comm); 1099 p->signal->oom_score_adj = oom_adj; 1100 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1101 p->signal->oom_score_adj_min = (short)oom_adj; 1102 } 1103 task_unlock(p); 1104 } 1105 rcu_read_unlock(); 1106 mmdrop(mm); 1107 } 1108 err_unlock: 1109 mutex_unlock(&oom_adj_mutex); 1110 put_task_struct(task); 1111 return err; 1112 } 1113 1114 /* 1115 * /proc/pid/oom_adj exists solely for backwards compatibility with previous 1116 * kernels. The effective policy is defined by oom_score_adj, which has a 1117 * different scale: oom_adj grew exponentially and oom_score_adj grows linearly. 1118 * Values written to oom_adj are simply mapped linearly to oom_score_adj. 1119 * Processes that become oom disabled via oom_adj will still be oom disabled 1120 * with this implementation. 1121 * 1122 * oom_adj cannot be removed since existing userspace binaries use it. 1123 */ 1124 static ssize_t oom_adj_write(struct file *file, const char __user *buf, 1125 size_t count, loff_t *ppos) 1126 { 1127 char buffer[PROC_NUMBUF]; 1128 int oom_adj; 1129 int err; 1130 1131 memset(buffer, 0, sizeof(buffer)); 1132 if (count > sizeof(buffer) - 1) 1133 count = sizeof(buffer) - 1; 1134 if (copy_from_user(buffer, buf, count)) { 1135 err = -EFAULT; 1136 goto out; 1137 } 1138 1139 err = kstrtoint(strstrip(buffer), 0, &oom_adj); 1140 if (err) 1141 goto out; 1142 if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && 1143 oom_adj != OOM_DISABLE) { 1144 err = -EINVAL; 1145 goto out; 1146 } 1147 1148 /* 1149 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum 1150 * value is always attainable. 1151 */ 1152 if (oom_adj == OOM_ADJUST_MAX) 1153 oom_adj = OOM_SCORE_ADJ_MAX; 1154 else 1155 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; 1156 1157 err = __set_oom_adj(file, oom_adj, true); 1158 out: 1159 return err < 0 ? err : count; 1160 } 1161 1162 static const struct file_operations proc_oom_adj_operations = { 1163 .read = oom_adj_read, 1164 .write = oom_adj_write, 1165 .llseek = generic_file_llseek, 1166 }; 1167 1168 static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 1169 size_t count, loff_t *ppos) 1170 { 1171 struct task_struct *task = get_proc_task(file_inode(file)); 1172 char buffer[PROC_NUMBUF]; 1173 short oom_score_adj = OOM_SCORE_ADJ_MIN; 1174 size_t len; 1175 1176 if (!task) 1177 return -ESRCH; 1178 oom_score_adj = task->signal->oom_score_adj; 1179 put_task_struct(task); 1180 len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); 1181 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1182 } 1183 1184 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, 1185 size_t count, loff_t *ppos) 1186 { 1187 char buffer[PROC_NUMBUF]; 1188 int oom_score_adj; 1189 int err; 1190 1191 memset(buffer, 0, sizeof(buffer)); 1192 if (count > sizeof(buffer) - 1) 1193 count = sizeof(buffer) - 1; 1194 if (copy_from_user(buffer, buf, count)) { 1195 err = -EFAULT; 1196 goto out; 1197 } 1198 1199 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); 1200 if (err) 1201 goto out; 1202 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1203 oom_score_adj > OOM_SCORE_ADJ_MAX) { 1204 err = -EINVAL; 1205 goto out; 1206 } 1207 1208 err = __set_oom_adj(file, oom_score_adj, false); 1209 out: 1210 return err < 0 ? err : count; 1211 } 1212 1213 static const struct file_operations proc_oom_score_adj_operations = { 1214 .read = oom_score_adj_read, 1215 .write = oom_score_adj_write, 1216 .llseek = default_llseek, 1217 }; 1218 1219 #ifdef CONFIG_AUDITSYSCALL 1220 #define TMPBUFLEN 11 1221 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1222 size_t count, loff_t *ppos) 1223 { 1224 struct inode * inode = file_inode(file); 1225 struct task_struct *task = get_proc_task(inode); 1226 ssize_t length; 1227 char tmpbuf[TMPBUFLEN]; 1228 1229 if (!task) 1230 return -ESRCH; 1231 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1232 from_kuid(file->f_cred->user_ns, 1233 audit_get_loginuid(task))); 1234 put_task_struct(task); 1235 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1236 } 1237 1238 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 1239 size_t count, loff_t *ppos) 1240 { 1241 struct inode * inode = file_inode(file); 1242 uid_t loginuid; 1243 kuid_t kloginuid; 1244 int rv; 1245 1246 rcu_read_lock(); 1247 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { 1248 rcu_read_unlock(); 1249 return -EPERM; 1250 } 1251 rcu_read_unlock(); 1252 1253 if (*ppos != 0) { 1254 /* No partial writes. */ 1255 return -EINVAL; 1256 } 1257 1258 rv = kstrtou32_from_user(buf, count, 10, &loginuid); 1259 if (rv < 0) 1260 return rv; 1261 1262 /* is userspace tring to explicitly UNSET the loginuid? */ 1263 if (loginuid == AUDIT_UID_UNSET) { 1264 kloginuid = INVALID_UID; 1265 } else { 1266 kloginuid = make_kuid(file->f_cred->user_ns, loginuid); 1267 if (!uid_valid(kloginuid)) 1268 return -EINVAL; 1269 } 1270 1271 rv = audit_set_loginuid(kloginuid); 1272 if (rv < 0) 1273 return rv; 1274 return count; 1275 } 1276 1277 static const struct file_operations proc_loginuid_operations = { 1278 .read = proc_loginuid_read, 1279 .write = proc_loginuid_write, 1280 .llseek = generic_file_llseek, 1281 }; 1282 1283 static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1284 size_t count, loff_t *ppos) 1285 { 1286 struct inode * inode = file_inode(file); 1287 struct task_struct *task = get_proc_task(inode); 1288 ssize_t length; 1289 char tmpbuf[TMPBUFLEN]; 1290 1291 if (!task) 1292 return -ESRCH; 1293 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1294 audit_get_sessionid(task)); 1295 put_task_struct(task); 1296 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1297 } 1298 1299 static const struct file_operations proc_sessionid_operations = { 1300 .read = proc_sessionid_read, 1301 .llseek = generic_file_llseek, 1302 }; 1303 #endif 1304 1305 #ifdef CONFIG_FAULT_INJECTION 1306 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 1307 size_t count, loff_t *ppos) 1308 { 1309 struct task_struct *task = get_proc_task(file_inode(file)); 1310 char buffer[PROC_NUMBUF]; 1311 size_t len; 1312 int make_it_fail; 1313 1314 if (!task) 1315 return -ESRCH; 1316 make_it_fail = task->make_it_fail; 1317 put_task_struct(task); 1318 1319 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); 1320 1321 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1322 } 1323 1324 static ssize_t proc_fault_inject_write(struct file * file, 1325 const char __user * buf, size_t count, loff_t *ppos) 1326 { 1327 struct task_struct *task; 1328 char buffer[PROC_NUMBUF]; 1329 int make_it_fail; 1330 int rv; 1331 1332 if (!capable(CAP_SYS_RESOURCE)) 1333 return -EPERM; 1334 memset(buffer, 0, sizeof(buffer)); 1335 if (count > sizeof(buffer) - 1) 1336 count = sizeof(buffer) - 1; 1337 if (copy_from_user(buffer, buf, count)) 1338 return -EFAULT; 1339 rv = kstrtoint(strstrip(buffer), 0, &make_it_fail); 1340 if (rv < 0) 1341 return rv; 1342 if (make_it_fail < 0 || make_it_fail > 1) 1343 return -EINVAL; 1344 1345 task = get_proc_task(file_inode(file)); 1346 if (!task) 1347 return -ESRCH; 1348 task->make_it_fail = make_it_fail; 1349 put_task_struct(task); 1350 1351 return count; 1352 } 1353 1354 static const struct file_operations proc_fault_inject_operations = { 1355 .read = proc_fault_inject_read, 1356 .write = proc_fault_inject_write, 1357 .llseek = generic_file_llseek, 1358 }; 1359 1360 static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf, 1361 size_t count, loff_t *ppos) 1362 { 1363 struct task_struct *task; 1364 int err; 1365 unsigned int n; 1366 1367 err = kstrtouint_from_user(buf, count, 0, &n); 1368 if (err) 1369 return err; 1370 1371 task = get_proc_task(file_inode(file)); 1372 if (!task) 1373 return -ESRCH; 1374 WRITE_ONCE(task->fail_nth, n); 1375 put_task_struct(task); 1376 1377 return count; 1378 } 1379 1380 static ssize_t proc_fail_nth_read(struct file *file, char __user *buf, 1381 size_t count, loff_t *ppos) 1382 { 1383 struct task_struct *task; 1384 char numbuf[PROC_NUMBUF]; 1385 ssize_t len; 1386 1387 task = get_proc_task(file_inode(file)); 1388 if (!task) 1389 return -ESRCH; 1390 len = snprintf(numbuf, sizeof(numbuf), "%u\n", 1391 READ_ONCE(task->fail_nth)); 1392 len = simple_read_from_buffer(buf, count, ppos, numbuf, len); 1393 put_task_struct(task); 1394 1395 return len; 1396 } 1397 1398 static const struct file_operations proc_fail_nth_operations = { 1399 .read = proc_fail_nth_read, 1400 .write = proc_fail_nth_write, 1401 }; 1402 #endif 1403 1404 1405 #ifdef CONFIG_SCHED_DEBUG 1406 /* 1407 * Print out various scheduling related per-task fields: 1408 */ 1409 static int sched_show(struct seq_file *m, void *v) 1410 { 1411 struct inode *inode = m->private; 1412 struct pid_namespace *ns = inode->i_sb->s_fs_info; 1413 struct task_struct *p; 1414 1415 p = get_proc_task(inode); 1416 if (!p) 1417 return -ESRCH; 1418 proc_sched_show_task(p, ns, m); 1419 1420 put_task_struct(p); 1421 1422 return 0; 1423 } 1424 1425 static ssize_t 1426 sched_write(struct file *file, const char __user *buf, 1427 size_t count, loff_t *offset) 1428 { 1429 struct inode *inode = file_inode(file); 1430 struct task_struct *p; 1431 1432 p = get_proc_task(inode); 1433 if (!p) 1434 return -ESRCH; 1435 proc_sched_set_task(p); 1436 1437 put_task_struct(p); 1438 1439 return count; 1440 } 1441 1442 static int sched_open(struct inode *inode, struct file *filp) 1443 { 1444 return single_open(filp, sched_show, inode); 1445 } 1446 1447 static const struct file_operations proc_pid_sched_operations = { 1448 .open = sched_open, 1449 .read = seq_read, 1450 .write = sched_write, 1451 .llseek = seq_lseek, 1452 .release = single_release, 1453 }; 1454 1455 #endif 1456 1457 #ifdef CONFIG_SCHED_AUTOGROUP 1458 /* 1459 * Print out autogroup related information: 1460 */ 1461 static int sched_autogroup_show(struct seq_file *m, void *v) 1462 { 1463 struct inode *inode = m->private; 1464 struct task_struct *p; 1465 1466 p = get_proc_task(inode); 1467 if (!p) 1468 return -ESRCH; 1469 proc_sched_autogroup_show_task(p, m); 1470 1471 put_task_struct(p); 1472 1473 return 0; 1474 } 1475 1476 static ssize_t 1477 sched_autogroup_write(struct file *file, const char __user *buf, 1478 size_t count, loff_t *offset) 1479 { 1480 struct inode *inode = file_inode(file); 1481 struct task_struct *p; 1482 char buffer[PROC_NUMBUF]; 1483 int nice; 1484 int err; 1485 1486 memset(buffer, 0, sizeof(buffer)); 1487 if (count > sizeof(buffer) - 1) 1488 count = sizeof(buffer) - 1; 1489 if (copy_from_user(buffer, buf, count)) 1490 return -EFAULT; 1491 1492 err = kstrtoint(strstrip(buffer), 0, &nice); 1493 if (err < 0) 1494 return err; 1495 1496 p = get_proc_task(inode); 1497 if (!p) 1498 return -ESRCH; 1499 1500 err = proc_sched_autogroup_set_nice(p, nice); 1501 if (err) 1502 count = err; 1503 1504 put_task_struct(p); 1505 1506 return count; 1507 } 1508 1509 static int sched_autogroup_open(struct inode *inode, struct file *filp) 1510 { 1511 int ret; 1512 1513 ret = single_open(filp, sched_autogroup_show, NULL); 1514 if (!ret) { 1515 struct seq_file *m = filp->private_data; 1516 1517 m->private = inode; 1518 } 1519 return ret; 1520 } 1521 1522 static const struct file_operations proc_pid_sched_autogroup_operations = { 1523 .open = sched_autogroup_open, 1524 .read = seq_read, 1525 .write = sched_autogroup_write, 1526 .llseek = seq_lseek, 1527 .release = single_release, 1528 }; 1529 1530 #endif /* CONFIG_SCHED_AUTOGROUP */ 1531 1532 static ssize_t comm_write(struct file *file, const char __user *buf, 1533 size_t count, loff_t *offset) 1534 { 1535 struct inode *inode = file_inode(file); 1536 struct task_struct *p; 1537 char buffer[TASK_COMM_LEN]; 1538 const size_t maxlen = sizeof(buffer) - 1; 1539 1540 memset(buffer, 0, sizeof(buffer)); 1541 if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) 1542 return -EFAULT; 1543 1544 p = get_proc_task(inode); 1545 if (!p) 1546 return -ESRCH; 1547 1548 if (same_thread_group(current, p)) 1549 set_task_comm(p, buffer); 1550 else 1551 count = -EINVAL; 1552 1553 put_task_struct(p); 1554 1555 return count; 1556 } 1557 1558 static int comm_show(struct seq_file *m, void *v) 1559 { 1560 struct inode *inode = m->private; 1561 struct task_struct *p; 1562 1563 p = get_proc_task(inode); 1564 if (!p) 1565 return -ESRCH; 1566 1567 task_lock(p); 1568 seq_printf(m, "%s\n", p->comm); 1569 task_unlock(p); 1570 1571 put_task_struct(p); 1572 1573 return 0; 1574 } 1575 1576 static int comm_open(struct inode *inode, struct file *filp) 1577 { 1578 return single_open(filp, comm_show, inode); 1579 } 1580 1581 static const struct file_operations proc_pid_set_comm_operations = { 1582 .open = comm_open, 1583 .read = seq_read, 1584 .write = comm_write, 1585 .llseek = seq_lseek, 1586 .release = single_release, 1587 }; 1588 1589 static int proc_exe_link(struct dentry *dentry, struct path *exe_path) 1590 { 1591 struct task_struct *task; 1592 struct file *exe_file; 1593 1594 task = get_proc_task(d_inode(dentry)); 1595 if (!task) 1596 return -ENOENT; 1597 exe_file = get_task_exe_file(task); 1598 put_task_struct(task); 1599 if (exe_file) { 1600 *exe_path = exe_file->f_path; 1601 path_get(&exe_file->f_path); 1602 fput(exe_file); 1603 return 0; 1604 } else 1605 return -ENOENT; 1606 } 1607 1608 static const char *proc_pid_get_link(struct dentry *dentry, 1609 struct inode *inode, 1610 struct delayed_call *done) 1611 { 1612 struct path path; 1613 int error = -EACCES; 1614 1615 if (!dentry) 1616 return ERR_PTR(-ECHILD); 1617 1618 /* Are we allowed to snoop on the tasks file descriptors? */ 1619 if (!proc_fd_access_allowed(inode)) 1620 goto out; 1621 1622 error = PROC_I(inode)->op.proc_get_link(dentry, &path); 1623 if (error) 1624 goto out; 1625 1626 nd_jump_link(&path); 1627 return NULL; 1628 out: 1629 return ERR_PTR(error); 1630 } 1631 1632 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) 1633 { 1634 char *tmp = (char *)__get_free_page(GFP_KERNEL); 1635 char *pathname; 1636 int len; 1637 1638 if (!tmp) 1639 return -ENOMEM; 1640 1641 pathname = d_path(path, tmp, PAGE_SIZE); 1642 len = PTR_ERR(pathname); 1643 if (IS_ERR(pathname)) 1644 goto out; 1645 len = tmp + PAGE_SIZE - 1 - pathname; 1646 1647 if (len > buflen) 1648 len = buflen; 1649 if (copy_to_user(buffer, pathname, len)) 1650 len = -EFAULT; 1651 out: 1652 free_page((unsigned long)tmp); 1653 return len; 1654 } 1655 1656 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1657 { 1658 int error = -EACCES; 1659 struct inode *inode = d_inode(dentry); 1660 struct path path; 1661 1662 /* Are we allowed to snoop on the tasks file descriptors? */ 1663 if (!proc_fd_access_allowed(inode)) 1664 goto out; 1665 1666 error = PROC_I(inode)->op.proc_get_link(dentry, &path); 1667 if (error) 1668 goto out; 1669 1670 error = do_proc_readlink(&path, buffer, buflen); 1671 path_put(&path); 1672 out: 1673 return error; 1674 } 1675 1676 const struct inode_operations proc_pid_link_inode_operations = { 1677 .readlink = proc_pid_readlink, 1678 .get_link = proc_pid_get_link, 1679 .setattr = proc_setattr, 1680 }; 1681 1682 1683 /* building an inode */ 1684 1685 void task_dump_owner(struct task_struct *task, mode_t mode, 1686 kuid_t *ruid, kgid_t *rgid) 1687 { 1688 /* Depending on the state of dumpable compute who should own a 1689 * proc file for a task. 1690 */ 1691 const struct cred *cred; 1692 kuid_t uid; 1693 kgid_t gid; 1694 1695 /* Default to the tasks effective ownership */ 1696 rcu_read_lock(); 1697 cred = __task_cred(task); 1698 uid = cred->euid; 1699 gid = cred->egid; 1700 rcu_read_unlock(); 1701 1702 /* 1703 * Before the /proc/pid/status file was created the only way to read 1704 * the effective uid of a /process was to stat /proc/pid. Reading 1705 * /proc/pid/status is slow enough that procps and other packages 1706 * kept stating /proc/pid. To keep the rules in /proc simple I have 1707 * made this apply to all per process world readable and executable 1708 * directories. 1709 */ 1710 if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) { 1711 struct mm_struct *mm; 1712 task_lock(task); 1713 mm = task->mm; 1714 /* Make non-dumpable tasks owned by some root */ 1715 if (mm) { 1716 if (get_dumpable(mm) != SUID_DUMP_USER) { 1717 struct user_namespace *user_ns = mm->user_ns; 1718 1719 uid = make_kuid(user_ns, 0); 1720 if (!uid_valid(uid)) 1721 uid = GLOBAL_ROOT_UID; 1722 1723 gid = make_kgid(user_ns, 0); 1724 if (!gid_valid(gid)) 1725 gid = GLOBAL_ROOT_GID; 1726 } 1727 } else { 1728 uid = GLOBAL_ROOT_UID; 1729 gid = GLOBAL_ROOT_GID; 1730 } 1731 task_unlock(task); 1732 } 1733 *ruid = uid; 1734 *rgid = gid; 1735 } 1736 1737 struct inode *proc_pid_make_inode(struct super_block * sb, 1738 struct task_struct *task, umode_t mode) 1739 { 1740 struct inode * inode; 1741 struct proc_inode *ei; 1742 1743 /* We need a new inode */ 1744 1745 inode = new_inode(sb); 1746 if (!inode) 1747 goto out; 1748 1749 /* Common stuff */ 1750 ei = PROC_I(inode); 1751 inode->i_mode = mode; 1752 inode->i_ino = get_next_ino(); 1753 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 1754 inode->i_op = &proc_def_inode_operations; 1755 1756 /* 1757 * grab the reference to task. 1758 */ 1759 ei->pid = get_task_pid(task, PIDTYPE_PID); 1760 if (!ei->pid) 1761 goto out_unlock; 1762 1763 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); 1764 security_task_to_inode(task, inode); 1765 1766 out: 1767 return inode; 1768 1769 out_unlock: 1770 iput(inode); 1771 return NULL; 1772 } 1773 1774 int pid_getattr(const struct path *path, struct kstat *stat, 1775 u32 request_mask, unsigned int query_flags) 1776 { 1777 struct inode *inode = d_inode(path->dentry); 1778 struct task_struct *task; 1779 struct pid_namespace *pid = path->dentry->d_sb->s_fs_info; 1780 1781 generic_fillattr(inode, stat); 1782 1783 rcu_read_lock(); 1784 stat->uid = GLOBAL_ROOT_UID; 1785 stat->gid = GLOBAL_ROOT_GID; 1786 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1787 if (task) { 1788 if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { 1789 rcu_read_unlock(); 1790 /* 1791 * This doesn't prevent learning whether PID exists, 1792 * it only makes getattr() consistent with readdir(). 1793 */ 1794 return -ENOENT; 1795 } 1796 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); 1797 } 1798 rcu_read_unlock(); 1799 return 0; 1800 } 1801 1802 /* dentry stuff */ 1803 1804 /* 1805 * Exceptional case: normally we are not allowed to unhash a busy 1806 * directory. In this case, however, we can do it - no aliasing problems 1807 * due to the way we treat inodes. 1808 * 1809 * Rewrite the inode's ownerships here because the owning task may have 1810 * performed a setuid(), etc. 1811 * 1812 */ 1813 int pid_revalidate(struct dentry *dentry, unsigned int flags) 1814 { 1815 struct inode *inode; 1816 struct task_struct *task; 1817 1818 if (flags & LOOKUP_RCU) 1819 return -ECHILD; 1820 1821 inode = d_inode(dentry); 1822 task = get_proc_task(inode); 1823 1824 if (task) { 1825 task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); 1826 1827 inode->i_mode &= ~(S_ISUID | S_ISGID); 1828 security_task_to_inode(task, inode); 1829 put_task_struct(task); 1830 return 1; 1831 } 1832 return 0; 1833 } 1834 1835 static inline bool proc_inode_is_dead(struct inode *inode) 1836 { 1837 return !proc_pid(inode)->tasks[PIDTYPE_PID].first; 1838 } 1839 1840 int pid_delete_dentry(const struct dentry *dentry) 1841 { 1842 /* Is the task we represent dead? 1843 * If so, then don't put the dentry on the lru list, 1844 * kill it immediately. 1845 */ 1846 return proc_inode_is_dead(d_inode(dentry)); 1847 } 1848 1849 const struct dentry_operations pid_dentry_operations = 1850 { 1851 .d_revalidate = pid_revalidate, 1852 .d_delete = pid_delete_dentry, 1853 }; 1854 1855 /* Lookups */ 1856 1857 /* 1858 * Fill a directory entry. 1859 * 1860 * If possible create the dcache entry and derive our inode number and 1861 * file type from dcache entry. 1862 * 1863 * Since all of the proc inode numbers are dynamically generated, the inode 1864 * numbers do not exist until the inode is cache. This means creating the 1865 * the dcache entry in readdir is necessary to keep the inode numbers 1866 * reported by readdir in sync with the inode numbers reported 1867 * by stat. 1868 */ 1869 bool proc_fill_cache(struct file *file, struct dir_context *ctx, 1870 const char *name, int len, 1871 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1872 { 1873 struct dentry *child, *dir = file->f_path.dentry; 1874 struct qstr qname = QSTR_INIT(name, len); 1875 struct inode *inode; 1876 unsigned type; 1877 ino_t ino; 1878 1879 child = d_hash_and_lookup(dir, &qname); 1880 if (!child) { 1881 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); 1882 child = d_alloc_parallel(dir, &qname, &wq); 1883 if (IS_ERR(child)) 1884 goto end_instantiate; 1885 if (d_in_lookup(child)) { 1886 int err = instantiate(d_inode(dir), child, task, ptr); 1887 d_lookup_done(child); 1888 if (err < 0) { 1889 dput(child); 1890 goto end_instantiate; 1891 } 1892 } 1893 } 1894 inode = d_inode(child); 1895 ino = inode->i_ino; 1896 type = inode->i_mode >> 12; 1897 dput(child); 1898 return dir_emit(ctx, name, len, ino, type); 1899 1900 end_instantiate: 1901 return dir_emit(ctx, name, len, 1, DT_UNKNOWN); 1902 } 1903 1904 /* 1905 * dname_to_vma_addr - maps a dentry name into two unsigned longs 1906 * which represent vma start and end addresses. 1907 */ 1908 static int dname_to_vma_addr(struct dentry *dentry, 1909 unsigned long *start, unsigned long *end) 1910 { 1911 if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) 1912 return -EINVAL; 1913 1914 return 0; 1915 } 1916 1917 static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) 1918 { 1919 unsigned long vm_start, vm_end; 1920 bool exact_vma_exists = false; 1921 struct mm_struct *mm = NULL; 1922 struct task_struct *task; 1923 struct inode *inode; 1924 int status = 0; 1925 1926 if (flags & LOOKUP_RCU) 1927 return -ECHILD; 1928 1929 inode = d_inode(dentry); 1930 task = get_proc_task(inode); 1931 if (!task) 1932 goto out_notask; 1933 1934 mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); 1935 if (IS_ERR_OR_NULL(mm)) 1936 goto out; 1937 1938 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { 1939 down_read(&mm->mmap_sem); 1940 exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); 1941 up_read(&mm->mmap_sem); 1942 } 1943 1944 mmput(mm); 1945 1946 if (exact_vma_exists) { 1947 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); 1948 1949 security_task_to_inode(task, inode); 1950 status = 1; 1951 } 1952 1953 out: 1954 put_task_struct(task); 1955 1956 out_notask: 1957 return status; 1958 } 1959 1960 static const struct dentry_operations tid_map_files_dentry_operations = { 1961 .d_revalidate = map_files_d_revalidate, 1962 .d_delete = pid_delete_dentry, 1963 }; 1964 1965 static int map_files_get_link(struct dentry *dentry, struct path *path) 1966 { 1967 unsigned long vm_start, vm_end; 1968 struct vm_area_struct *vma; 1969 struct task_struct *task; 1970 struct mm_struct *mm; 1971 int rc; 1972 1973 rc = -ENOENT; 1974 task = get_proc_task(d_inode(dentry)); 1975 if (!task) 1976 goto out; 1977 1978 mm = get_task_mm(task); 1979 put_task_struct(task); 1980 if (!mm) 1981 goto out; 1982 1983 rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); 1984 if (rc) 1985 goto out_mmput; 1986 1987 rc = -ENOENT; 1988 down_read(&mm->mmap_sem); 1989 vma = find_exact_vma(mm, vm_start, vm_end); 1990 if (vma && vma->vm_file) { 1991 *path = vma->vm_file->f_path; 1992 path_get(path); 1993 rc = 0; 1994 } 1995 up_read(&mm->mmap_sem); 1996 1997 out_mmput: 1998 mmput(mm); 1999 out: 2000 return rc; 2001 } 2002 2003 struct map_files_info { 2004 fmode_t mode; 2005 unsigned int len; 2006 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ 2007 }; 2008 2009 /* 2010 * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the 2011 * symlinks may be used to bypass permissions on ancestor directories in the 2012 * path to the file in question. 2013 */ 2014 static const char * 2015 proc_map_files_get_link(struct dentry *dentry, 2016 struct inode *inode, 2017 struct delayed_call *done) 2018 { 2019 if (!capable(CAP_SYS_ADMIN)) 2020 return ERR_PTR(-EPERM); 2021 2022 return proc_pid_get_link(dentry, inode, done); 2023 } 2024 2025 /* 2026 * Identical to proc_pid_link_inode_operations except for get_link() 2027 */ 2028 static const struct inode_operations proc_map_files_link_inode_operations = { 2029 .readlink = proc_pid_readlink, 2030 .get_link = proc_map_files_get_link, 2031 .setattr = proc_setattr, 2032 }; 2033 2034 static int 2035 proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, 2036 struct task_struct *task, const void *ptr) 2037 { 2038 fmode_t mode = (fmode_t)(unsigned long)ptr; 2039 struct proc_inode *ei; 2040 struct inode *inode; 2041 2042 inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | 2043 ((mode & FMODE_READ ) ? S_IRUSR : 0) | 2044 ((mode & FMODE_WRITE) ? S_IWUSR : 0)); 2045 if (!inode) 2046 return -ENOENT; 2047 2048 ei = PROC_I(inode); 2049 ei->op.proc_get_link = map_files_get_link; 2050 2051 inode->i_op = &proc_map_files_link_inode_operations; 2052 inode->i_size = 64; 2053 2054 d_set_d_op(dentry, &tid_map_files_dentry_operations); 2055 d_add(dentry, inode); 2056 2057 return 0; 2058 } 2059 2060 static struct dentry *proc_map_files_lookup(struct inode *dir, 2061 struct dentry *dentry, unsigned int flags) 2062 { 2063 unsigned long vm_start, vm_end; 2064 struct vm_area_struct *vma; 2065 struct task_struct *task; 2066 int result; 2067 struct mm_struct *mm; 2068 2069 result = -ENOENT; 2070 task = get_proc_task(dir); 2071 if (!task) 2072 goto out; 2073 2074 result = -EACCES; 2075 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 2076 goto out_put_task; 2077 2078 result = -ENOENT; 2079 if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) 2080 goto out_put_task; 2081 2082 mm = get_task_mm(task); 2083 if (!mm) 2084 goto out_put_task; 2085 2086 down_read(&mm->mmap_sem); 2087 vma = find_exact_vma(mm, vm_start, vm_end); 2088 if (!vma) 2089 goto out_no_vma; 2090 2091 if (vma->vm_file) 2092 result = proc_map_files_instantiate(dir, dentry, task, 2093 (void *)(unsigned long)vma->vm_file->f_mode); 2094 2095 out_no_vma: 2096 up_read(&mm->mmap_sem); 2097 mmput(mm); 2098 out_put_task: 2099 put_task_struct(task); 2100 out: 2101 return ERR_PTR(result); 2102 } 2103 2104 static const struct inode_operations proc_map_files_inode_operations = { 2105 .lookup = proc_map_files_lookup, 2106 .permission = proc_fd_permission, 2107 .setattr = proc_setattr, 2108 }; 2109 2110 static int 2111 proc_map_files_readdir(struct file *file, struct dir_context *ctx) 2112 { 2113 struct vm_area_struct *vma; 2114 struct task_struct *task; 2115 struct mm_struct *mm; 2116 unsigned long nr_files, pos, i; 2117 struct flex_array *fa = NULL; 2118 struct map_files_info info; 2119 struct map_files_info *p; 2120 int ret; 2121 2122 ret = -ENOENT; 2123 task = get_proc_task(file_inode(file)); 2124 if (!task) 2125 goto out; 2126 2127 ret = -EACCES; 2128 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 2129 goto out_put_task; 2130 2131 ret = 0; 2132 if (!dir_emit_dots(file, ctx)) 2133 goto out_put_task; 2134 2135 mm = get_task_mm(task); 2136 if (!mm) 2137 goto out_put_task; 2138 down_read(&mm->mmap_sem); 2139 2140 nr_files = 0; 2141 2142 /* 2143 * We need two passes here: 2144 * 2145 * 1) Collect vmas of mapped files with mmap_sem taken 2146 * 2) Release mmap_sem and instantiate entries 2147 * 2148 * otherwise we get lockdep complained, since filldir() 2149 * routine might require mmap_sem taken in might_fault(). 2150 */ 2151 2152 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { 2153 if (vma->vm_file && ++pos > ctx->pos) 2154 nr_files++; 2155 } 2156 2157 if (nr_files) { 2158 fa = flex_array_alloc(sizeof(info), nr_files, 2159 GFP_KERNEL); 2160 if (!fa || flex_array_prealloc(fa, 0, nr_files, 2161 GFP_KERNEL)) { 2162 ret = -ENOMEM; 2163 if (fa) 2164 flex_array_free(fa); 2165 up_read(&mm->mmap_sem); 2166 mmput(mm); 2167 goto out_put_task; 2168 } 2169 for (i = 0, vma = mm->mmap, pos = 2; vma; 2170 vma = vma->vm_next) { 2171 if (!vma->vm_file) 2172 continue; 2173 if (++pos <= ctx->pos) 2174 continue; 2175 2176 info.mode = vma->vm_file->f_mode; 2177 info.len = snprintf(info.name, 2178 sizeof(info.name), "%lx-%lx", 2179 vma->vm_start, vma->vm_end); 2180 if (flex_array_put(fa, i++, &info, GFP_KERNEL)) 2181 BUG(); 2182 } 2183 } 2184 up_read(&mm->mmap_sem); 2185 2186 for (i = 0; i < nr_files; i++) { 2187 p = flex_array_get(fa, i); 2188 if (!proc_fill_cache(file, ctx, 2189 p->name, p->len, 2190 proc_map_files_instantiate, 2191 task, 2192 (void *)(unsigned long)p->mode)) 2193 break; 2194 ctx->pos++; 2195 } 2196 if (fa) 2197 flex_array_free(fa); 2198 mmput(mm); 2199 2200 out_put_task: 2201 put_task_struct(task); 2202 out: 2203 return ret; 2204 } 2205 2206 static const struct file_operations proc_map_files_operations = { 2207 .read = generic_read_dir, 2208 .iterate_shared = proc_map_files_readdir, 2209 .llseek = generic_file_llseek, 2210 }; 2211 2212 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) 2213 struct timers_private { 2214 struct pid *pid; 2215 struct task_struct *task; 2216 struct sighand_struct *sighand; 2217 struct pid_namespace *ns; 2218 unsigned long flags; 2219 }; 2220 2221 static void *timers_start(struct seq_file *m, loff_t *pos) 2222 { 2223 struct timers_private *tp = m->private; 2224 2225 tp->task = get_pid_task(tp->pid, PIDTYPE_PID); 2226 if (!tp->task) 2227 return ERR_PTR(-ESRCH); 2228 2229 tp->sighand = lock_task_sighand(tp->task, &tp->flags); 2230 if (!tp->sighand) 2231 return ERR_PTR(-ESRCH); 2232 2233 return seq_list_start(&tp->task->signal->posix_timers, *pos); 2234 } 2235 2236 static void *timers_next(struct seq_file *m, void *v, loff_t *pos) 2237 { 2238 struct timers_private *tp = m->private; 2239 return seq_list_next(v, &tp->task->signal->posix_timers, pos); 2240 } 2241 2242 static void timers_stop(struct seq_file *m, void *v) 2243 { 2244 struct timers_private *tp = m->private; 2245 2246 if (tp->sighand) { 2247 unlock_task_sighand(tp->task, &tp->flags); 2248 tp->sighand = NULL; 2249 } 2250 2251 if (tp->task) { 2252 put_task_struct(tp->task); 2253 tp->task = NULL; 2254 } 2255 } 2256 2257 static int show_timer(struct seq_file *m, void *v) 2258 { 2259 struct k_itimer *timer; 2260 struct timers_private *tp = m->private; 2261 int notify; 2262 static const char * const nstr[] = { 2263 [SIGEV_SIGNAL] = "signal", 2264 [SIGEV_NONE] = "none", 2265 [SIGEV_THREAD] = "thread", 2266 }; 2267 2268 timer = list_entry((struct list_head *)v, struct k_itimer, list); 2269 notify = timer->it_sigev_notify; 2270 2271 seq_printf(m, "ID: %d\n", timer->it_id); 2272 seq_printf(m, "signal: %d/%p\n", 2273 timer->sigq->info.si_signo, 2274 timer->sigq->info.si_value.sival_ptr); 2275 seq_printf(m, "notify: %s/%s.%d\n", 2276 nstr[notify & ~SIGEV_THREAD_ID], 2277 (notify & SIGEV_THREAD_ID) ? "tid" : "pid", 2278 pid_nr_ns(timer->it_pid, tp->ns)); 2279 seq_printf(m, "ClockID: %d\n", timer->it_clock); 2280 2281 return 0; 2282 } 2283 2284 static const struct seq_operations proc_timers_seq_ops = { 2285 .start = timers_start, 2286 .next = timers_next, 2287 .stop = timers_stop, 2288 .show = show_timer, 2289 }; 2290 2291 static int proc_timers_open(struct inode *inode, struct file *file) 2292 { 2293 struct timers_private *tp; 2294 2295 tp = __seq_open_private(file, &proc_timers_seq_ops, 2296 sizeof(struct timers_private)); 2297 if (!tp) 2298 return -ENOMEM; 2299 2300 tp->pid = proc_pid(inode); 2301 tp->ns = inode->i_sb->s_fs_info; 2302 return 0; 2303 } 2304 2305 static const struct file_operations proc_timers_operations = { 2306 .open = proc_timers_open, 2307 .read = seq_read, 2308 .llseek = seq_lseek, 2309 .release = seq_release_private, 2310 }; 2311 #endif 2312 2313 static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, 2314 size_t count, loff_t *offset) 2315 { 2316 struct inode *inode = file_inode(file); 2317 struct task_struct *p; 2318 u64 slack_ns; 2319 int err; 2320 2321 err = kstrtoull_from_user(buf, count, 10, &slack_ns); 2322 if (err < 0) 2323 return err; 2324 2325 p = get_proc_task(inode); 2326 if (!p) 2327 return -ESRCH; 2328 2329 if (p != current) { 2330 if (!capable(CAP_SYS_NICE)) { 2331 count = -EPERM; 2332 goto out; 2333 } 2334 2335 err = security_task_setscheduler(p); 2336 if (err) { 2337 count = err; 2338 goto out; 2339 } 2340 } 2341 2342 task_lock(p); 2343 if (slack_ns == 0) 2344 p->timer_slack_ns = p->default_timer_slack_ns; 2345 else 2346 p->timer_slack_ns = slack_ns; 2347 task_unlock(p); 2348 2349 out: 2350 put_task_struct(p); 2351 2352 return count; 2353 } 2354 2355 static int timerslack_ns_show(struct seq_file *m, void *v) 2356 { 2357 struct inode *inode = m->private; 2358 struct task_struct *p; 2359 int err = 0; 2360 2361 p = get_proc_task(inode); 2362 if (!p) 2363 return -ESRCH; 2364 2365 if (p != current) { 2366 2367 if (!capable(CAP_SYS_NICE)) { 2368 err = -EPERM; 2369 goto out; 2370 } 2371 err = security_task_getscheduler(p); 2372 if (err) 2373 goto out; 2374 } 2375 2376 task_lock(p); 2377 seq_printf(m, "%llu\n", p->timer_slack_ns); 2378 task_unlock(p); 2379 2380 out: 2381 put_task_struct(p); 2382 2383 return err; 2384 } 2385 2386 static int timerslack_ns_open(struct inode *inode, struct file *filp) 2387 { 2388 return single_open(filp, timerslack_ns_show, inode); 2389 } 2390 2391 static const struct file_operations proc_pid_set_timerslack_ns_operations = { 2392 .open = timerslack_ns_open, 2393 .read = seq_read, 2394 .write = timerslack_ns_write, 2395 .llseek = seq_lseek, 2396 .release = single_release, 2397 }; 2398 2399 static int proc_pident_instantiate(struct inode *dir, 2400 struct dentry *dentry, struct task_struct *task, const void *ptr) 2401 { 2402 const struct pid_entry *p = ptr; 2403 struct inode *inode; 2404 struct proc_inode *ei; 2405 2406 inode = proc_pid_make_inode(dir->i_sb, task, p->mode); 2407 if (!inode) 2408 goto out; 2409 2410 ei = PROC_I(inode); 2411 if (S_ISDIR(inode->i_mode)) 2412 set_nlink(inode, 2); /* Use getattr to fix if necessary */ 2413 if (p->iop) 2414 inode->i_op = p->iop; 2415 if (p->fop) 2416 inode->i_fop = p->fop; 2417 ei->op = p->op; 2418 d_set_d_op(dentry, &pid_dentry_operations); 2419 d_add(dentry, inode); 2420 /* Close the race of the process dying before we return the dentry */ 2421 if (pid_revalidate(dentry, 0)) 2422 return 0; 2423 out: 2424 return -ENOENT; 2425 } 2426 2427 static struct dentry *proc_pident_lookup(struct inode *dir, 2428 struct dentry *dentry, 2429 const struct pid_entry *ents, 2430 unsigned int nents) 2431 { 2432 int error; 2433 struct task_struct *task = get_proc_task(dir); 2434 const struct pid_entry *p, *last; 2435 2436 error = -ENOENT; 2437 2438 if (!task) 2439 goto out_no_task; 2440 2441 /* 2442 * Yes, it does not scale. And it should not. Don't add 2443 * new entries into /proc/<tgid>/ without very good reasons. 2444 */ 2445 last = &ents[nents]; 2446 for (p = ents; p < last; p++) { 2447 if (p->len != dentry->d_name.len) 2448 continue; 2449 if (!memcmp(dentry->d_name.name, p->name, p->len)) 2450 break; 2451 } 2452 if (p >= last) 2453 goto out; 2454 2455 error = proc_pident_instantiate(dir, dentry, task, p); 2456 out: 2457 put_task_struct(task); 2458 out_no_task: 2459 return ERR_PTR(error); 2460 } 2461 2462 static int proc_pident_readdir(struct file *file, struct dir_context *ctx, 2463 const struct pid_entry *ents, unsigned int nents) 2464 { 2465 struct task_struct *task = get_proc_task(file_inode(file)); 2466 const struct pid_entry *p; 2467 2468 if (!task) 2469 return -ENOENT; 2470 2471 if (!dir_emit_dots(file, ctx)) 2472 goto out; 2473 2474 if (ctx->pos >= nents + 2) 2475 goto out; 2476 2477 for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { 2478 if (!proc_fill_cache(file, ctx, p->name, p->len, 2479 proc_pident_instantiate, task, p)) 2480 break; 2481 ctx->pos++; 2482 } 2483 out: 2484 put_task_struct(task); 2485 return 0; 2486 } 2487 2488 #ifdef CONFIG_SECURITY 2489 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 2490 size_t count, loff_t *ppos) 2491 { 2492 struct inode * inode = file_inode(file); 2493 char *p = NULL; 2494 ssize_t length; 2495 struct task_struct *task = get_proc_task(inode); 2496 2497 if (!task) 2498 return -ESRCH; 2499 2500 length = security_getprocattr(task, 2501 (char*)file->f_path.dentry->d_name.name, 2502 &p); 2503 put_task_struct(task); 2504 if (length > 0) 2505 length = simple_read_from_buffer(buf, count, ppos, p, length); 2506 kfree(p); 2507 return length; 2508 } 2509 2510 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 2511 size_t count, loff_t *ppos) 2512 { 2513 struct inode * inode = file_inode(file); 2514 void *page; 2515 ssize_t length; 2516 struct task_struct *task = get_proc_task(inode); 2517 2518 length = -ESRCH; 2519 if (!task) 2520 goto out_no_task; 2521 2522 /* A task may only write its own attributes. */ 2523 length = -EACCES; 2524 if (current != task) 2525 goto out; 2526 2527 if (count > PAGE_SIZE) 2528 count = PAGE_SIZE; 2529 2530 /* No partial writes. */ 2531 length = -EINVAL; 2532 if (*ppos != 0) 2533 goto out; 2534 2535 page = memdup_user(buf, count); 2536 if (IS_ERR(page)) { 2537 length = PTR_ERR(page); 2538 goto out; 2539 } 2540 2541 /* Guard against adverse ptrace interaction */ 2542 length = mutex_lock_interruptible(¤t->signal->cred_guard_mutex); 2543 if (length < 0) 2544 goto out_free; 2545 2546 length = security_setprocattr(file->f_path.dentry->d_name.name, 2547 page, count); 2548 mutex_unlock(¤t->signal->cred_guard_mutex); 2549 out_free: 2550 kfree(page); 2551 out: 2552 put_task_struct(task); 2553 out_no_task: 2554 return length; 2555 } 2556 2557 static const struct file_operations proc_pid_attr_operations = { 2558 .read = proc_pid_attr_read, 2559 .write = proc_pid_attr_write, 2560 .llseek = generic_file_llseek, 2561 }; 2562 2563 static const struct pid_entry attr_dir_stuff[] = { 2564 REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2565 REG("prev", S_IRUGO, proc_pid_attr_operations), 2566 REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2567 REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2568 REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2569 REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2570 }; 2571 2572 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) 2573 { 2574 return proc_pident_readdir(file, ctx, 2575 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2576 } 2577 2578 static const struct file_operations proc_attr_dir_operations = { 2579 .read = generic_read_dir, 2580 .iterate_shared = proc_attr_dir_readdir, 2581 .llseek = generic_file_llseek, 2582 }; 2583 2584 static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2585 struct dentry *dentry, unsigned int flags) 2586 { 2587 return proc_pident_lookup(dir, dentry, 2588 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2589 } 2590 2591 static const struct inode_operations proc_attr_dir_inode_operations = { 2592 .lookup = proc_attr_dir_lookup, 2593 .getattr = pid_getattr, 2594 .setattr = proc_setattr, 2595 }; 2596 2597 #endif 2598 2599 #ifdef CONFIG_ELF_CORE 2600 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2601 size_t count, loff_t *ppos) 2602 { 2603 struct task_struct *task = get_proc_task(file_inode(file)); 2604 struct mm_struct *mm; 2605 char buffer[PROC_NUMBUF]; 2606 size_t len; 2607 int ret; 2608 2609 if (!task) 2610 return -ESRCH; 2611 2612 ret = 0; 2613 mm = get_task_mm(task); 2614 if (mm) { 2615 len = snprintf(buffer, sizeof(buffer), "%08lx\n", 2616 ((mm->flags & MMF_DUMP_FILTER_MASK) >> 2617 MMF_DUMP_FILTER_SHIFT)); 2618 mmput(mm); 2619 ret = simple_read_from_buffer(buf, count, ppos, buffer, len); 2620 } 2621 2622 put_task_struct(task); 2623 2624 return ret; 2625 } 2626 2627 static ssize_t proc_coredump_filter_write(struct file *file, 2628 const char __user *buf, 2629 size_t count, 2630 loff_t *ppos) 2631 { 2632 struct task_struct *task; 2633 struct mm_struct *mm; 2634 unsigned int val; 2635 int ret; 2636 int i; 2637 unsigned long mask; 2638 2639 ret = kstrtouint_from_user(buf, count, 0, &val); 2640 if (ret < 0) 2641 return ret; 2642 2643 ret = -ESRCH; 2644 task = get_proc_task(file_inode(file)); 2645 if (!task) 2646 goto out_no_task; 2647 2648 mm = get_task_mm(task); 2649 if (!mm) 2650 goto out_no_mm; 2651 ret = 0; 2652 2653 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { 2654 if (val & mask) 2655 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2656 else 2657 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2658 } 2659 2660 mmput(mm); 2661 out_no_mm: 2662 put_task_struct(task); 2663 out_no_task: 2664 if (ret < 0) 2665 return ret; 2666 return count; 2667 } 2668 2669 static const struct file_operations proc_coredump_filter_operations = { 2670 .read = proc_coredump_filter_read, 2671 .write = proc_coredump_filter_write, 2672 .llseek = generic_file_llseek, 2673 }; 2674 #endif 2675 2676 #ifdef CONFIG_TASK_IO_ACCOUNTING 2677 static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole) 2678 { 2679 struct task_io_accounting acct = task->ioac; 2680 unsigned long flags; 2681 int result; 2682 2683 result = mutex_lock_killable(&task->signal->cred_guard_mutex); 2684 if (result) 2685 return result; 2686 2687 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { 2688 result = -EACCES; 2689 goto out_unlock; 2690 } 2691 2692 if (whole && lock_task_sighand(task, &flags)) { 2693 struct task_struct *t = task; 2694 2695 task_io_accounting_add(&acct, &task->signal->ioac); 2696 while_each_thread(task, t) 2697 task_io_accounting_add(&acct, &t->ioac); 2698 2699 unlock_task_sighand(task, &flags); 2700 } 2701 seq_printf(m, 2702 "rchar: %llu\n" 2703 "wchar: %llu\n" 2704 "syscr: %llu\n" 2705 "syscw: %llu\n" 2706 "read_bytes: %llu\n" 2707 "write_bytes: %llu\n" 2708 "cancelled_write_bytes: %llu\n", 2709 (unsigned long long)acct.rchar, 2710 (unsigned long long)acct.wchar, 2711 (unsigned long long)acct.syscr, 2712 (unsigned long long)acct.syscw, 2713 (unsigned long long)acct.read_bytes, 2714 (unsigned long long)acct.write_bytes, 2715 (unsigned long long)acct.cancelled_write_bytes); 2716 result = 0; 2717 2718 out_unlock: 2719 mutex_unlock(&task->signal->cred_guard_mutex); 2720 return result; 2721 } 2722 2723 static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns, 2724 struct pid *pid, struct task_struct *task) 2725 { 2726 return do_io_accounting(task, m, 0); 2727 } 2728 2729 static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns, 2730 struct pid *pid, struct task_struct *task) 2731 { 2732 return do_io_accounting(task, m, 1); 2733 } 2734 #endif /* CONFIG_TASK_IO_ACCOUNTING */ 2735 2736 #ifdef CONFIG_USER_NS 2737 static int proc_id_map_open(struct inode *inode, struct file *file, 2738 const struct seq_operations *seq_ops) 2739 { 2740 struct user_namespace *ns = NULL; 2741 struct task_struct *task; 2742 struct seq_file *seq; 2743 int ret = -EINVAL; 2744 2745 task = get_proc_task(inode); 2746 if (task) { 2747 rcu_read_lock(); 2748 ns = get_user_ns(task_cred_xxx(task, user_ns)); 2749 rcu_read_unlock(); 2750 put_task_struct(task); 2751 } 2752 if (!ns) 2753 goto err; 2754 2755 ret = seq_open(file, seq_ops); 2756 if (ret) 2757 goto err_put_ns; 2758 2759 seq = file->private_data; 2760 seq->private = ns; 2761 2762 return 0; 2763 err_put_ns: 2764 put_user_ns(ns); 2765 err: 2766 return ret; 2767 } 2768 2769 static int proc_id_map_release(struct inode *inode, struct file *file) 2770 { 2771 struct seq_file *seq = file->private_data; 2772 struct user_namespace *ns = seq->private; 2773 put_user_ns(ns); 2774 return seq_release(inode, file); 2775 } 2776 2777 static int proc_uid_map_open(struct inode *inode, struct file *file) 2778 { 2779 return proc_id_map_open(inode, file, &proc_uid_seq_operations); 2780 } 2781 2782 static int proc_gid_map_open(struct inode *inode, struct file *file) 2783 { 2784 return proc_id_map_open(inode, file, &proc_gid_seq_operations); 2785 } 2786 2787 static int proc_projid_map_open(struct inode *inode, struct file *file) 2788 { 2789 return proc_id_map_open(inode, file, &proc_projid_seq_operations); 2790 } 2791 2792 static const struct file_operations proc_uid_map_operations = { 2793 .open = proc_uid_map_open, 2794 .write = proc_uid_map_write, 2795 .read = seq_read, 2796 .llseek = seq_lseek, 2797 .release = proc_id_map_release, 2798 }; 2799 2800 static const struct file_operations proc_gid_map_operations = { 2801 .open = proc_gid_map_open, 2802 .write = proc_gid_map_write, 2803 .read = seq_read, 2804 .llseek = seq_lseek, 2805 .release = proc_id_map_release, 2806 }; 2807 2808 static const struct file_operations proc_projid_map_operations = { 2809 .open = proc_projid_map_open, 2810 .write = proc_projid_map_write, 2811 .read = seq_read, 2812 .llseek = seq_lseek, 2813 .release = proc_id_map_release, 2814 }; 2815 2816 static int proc_setgroups_open(struct inode *inode, struct file *file) 2817 { 2818 struct user_namespace *ns = NULL; 2819 struct task_struct *task; 2820 int ret; 2821 2822 ret = -ESRCH; 2823 task = get_proc_task(inode); 2824 if (task) { 2825 rcu_read_lock(); 2826 ns = get_user_ns(task_cred_xxx(task, user_ns)); 2827 rcu_read_unlock(); 2828 put_task_struct(task); 2829 } 2830 if (!ns) 2831 goto err; 2832 2833 if (file->f_mode & FMODE_WRITE) { 2834 ret = -EACCES; 2835 if (!ns_capable(ns, CAP_SYS_ADMIN)) 2836 goto err_put_ns; 2837 } 2838 2839 ret = single_open(file, &proc_setgroups_show, ns); 2840 if (ret) 2841 goto err_put_ns; 2842 2843 return 0; 2844 err_put_ns: 2845 put_user_ns(ns); 2846 err: 2847 return ret; 2848 } 2849 2850 static int proc_setgroups_release(struct inode *inode, struct file *file) 2851 { 2852 struct seq_file *seq = file->private_data; 2853 struct user_namespace *ns = seq->private; 2854 int ret = single_release(inode, file); 2855 put_user_ns(ns); 2856 return ret; 2857 } 2858 2859 static const struct file_operations proc_setgroups_operations = { 2860 .open = proc_setgroups_open, 2861 .write = proc_setgroups_write, 2862 .read = seq_read, 2863 .llseek = seq_lseek, 2864 .release = proc_setgroups_release, 2865 }; 2866 #endif /* CONFIG_USER_NS */ 2867 2868 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2869 struct pid *pid, struct task_struct *task) 2870 { 2871 int err = lock_trace(task); 2872 if (!err) { 2873 seq_printf(m, "%08x\n", task->personality); 2874 unlock_trace(task); 2875 } 2876 return err; 2877 } 2878 2879 #ifdef CONFIG_LIVEPATCH 2880 static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns, 2881 struct pid *pid, struct task_struct *task) 2882 { 2883 seq_printf(m, "%d\n", task->patch_state); 2884 return 0; 2885 } 2886 #endif /* CONFIG_LIVEPATCH */ 2887 2888 /* 2889 * Thread groups 2890 */ 2891 static const struct file_operations proc_task_operations; 2892 static const struct inode_operations proc_task_inode_operations; 2893 2894 static const struct pid_entry tgid_base_stuff[] = { 2895 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2896 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2897 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), 2898 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2899 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 2900 #ifdef CONFIG_NET 2901 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2902 #endif 2903 REG("environ", S_IRUSR, proc_environ_operations), 2904 REG("auxv", S_IRUSR, proc_auxv_operations), 2905 ONE("status", S_IRUGO, proc_pid_status), 2906 ONE("personality", S_IRUSR, proc_pid_personality), 2907 ONE("limits", S_IRUGO, proc_pid_limits), 2908 #ifdef CONFIG_SCHED_DEBUG 2909 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2910 #endif 2911 #ifdef CONFIG_SCHED_AUTOGROUP 2912 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), 2913 #endif 2914 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2915 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2916 ONE("syscall", S_IRUSR, proc_pid_syscall), 2917 #endif 2918 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), 2919 ONE("stat", S_IRUGO, proc_tgid_stat), 2920 ONE("statm", S_IRUGO, proc_pid_statm), 2921 REG("maps", S_IRUGO, proc_pid_maps_operations), 2922 #ifdef CONFIG_NUMA 2923 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), 2924 #endif 2925 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2926 LNK("cwd", proc_cwd_link), 2927 LNK("root", proc_root_link), 2928 LNK("exe", proc_exe_link), 2929 REG("mounts", S_IRUGO, proc_mounts_operations), 2930 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 2931 REG("mountstats", S_IRUSR, proc_mountstats_operations), 2932 #ifdef CONFIG_PROC_PAGE_MONITOR 2933 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2934 REG("smaps", S_IRUGO, proc_pid_smaps_operations), 2935 REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), 2936 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2937 #endif 2938 #ifdef CONFIG_SECURITY 2939 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2940 #endif 2941 #ifdef CONFIG_KALLSYMS 2942 ONE("wchan", S_IRUGO, proc_pid_wchan), 2943 #endif 2944 #ifdef CONFIG_STACKTRACE 2945 ONE("stack", S_IRUSR, proc_pid_stack), 2946 #endif 2947 #ifdef CONFIG_SCHED_INFO 2948 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 2949 #endif 2950 #ifdef CONFIG_LATENCYTOP 2951 REG("latency", S_IRUGO, proc_lstats_operations), 2952 #endif 2953 #ifdef CONFIG_PROC_PID_CPUSET 2954 ONE("cpuset", S_IRUGO, proc_cpuset_show), 2955 #endif 2956 #ifdef CONFIG_CGROUPS 2957 ONE("cgroup", S_IRUGO, proc_cgroup_show), 2958 #endif 2959 ONE("oom_score", S_IRUGO, proc_oom_score), 2960 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 2961 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2962 #ifdef CONFIG_AUDITSYSCALL 2963 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2964 REG("sessionid", S_IRUGO, proc_sessionid_operations), 2965 #endif 2966 #ifdef CONFIG_FAULT_INJECTION 2967 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2968 REG("fail-nth", 0644, proc_fail_nth_operations), 2969 #endif 2970 #ifdef CONFIG_ELF_CORE 2971 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2972 #endif 2973 #ifdef CONFIG_TASK_IO_ACCOUNTING 2974 ONE("io", S_IRUSR, proc_tgid_io_accounting), 2975 #endif 2976 #ifdef CONFIG_HARDWALL 2977 ONE("hardwall", S_IRUGO, proc_pid_hardwall), 2978 #endif 2979 #ifdef CONFIG_USER_NS 2980 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2981 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2982 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 2983 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), 2984 #endif 2985 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) 2986 REG("timers", S_IRUGO, proc_timers_operations), 2987 #endif 2988 REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), 2989 #ifdef CONFIG_LIVEPATCH 2990 ONE("patch_state", S_IRUSR, proc_pid_patch_state), 2991 #endif 2992 }; 2993 2994 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) 2995 { 2996 return proc_pident_readdir(file, ctx, 2997 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 2998 } 2999 3000 static const struct file_operations proc_tgid_base_operations = { 3001 .read = generic_read_dir, 3002 .iterate_shared = proc_tgid_base_readdir, 3003 .llseek = generic_file_llseek, 3004 }; 3005 3006 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 3007 { 3008 return proc_pident_lookup(dir, dentry, 3009 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3010 } 3011 3012 static const struct inode_operations proc_tgid_base_inode_operations = { 3013 .lookup = proc_tgid_base_lookup, 3014 .getattr = pid_getattr, 3015 .setattr = proc_setattr, 3016 .permission = proc_pid_permission, 3017 }; 3018 3019 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 3020 { 3021 struct dentry *dentry, *leader, *dir; 3022 char buf[PROC_NUMBUF]; 3023 struct qstr name; 3024 3025 name.name = buf; 3026 name.len = snprintf(buf, sizeof(buf), "%d", pid); 3027 /* no ->d_hash() rejects on procfs */ 3028 dentry = d_hash_and_lookup(mnt->mnt_root, &name); 3029 if (dentry) { 3030 d_invalidate(dentry); 3031 dput(dentry); 3032 } 3033 3034 if (pid == tgid) 3035 return; 3036 3037 name.name = buf; 3038 name.len = snprintf(buf, sizeof(buf), "%d", tgid); 3039 leader = d_hash_and_lookup(mnt->mnt_root, &name); 3040 if (!leader) 3041 goto out; 3042 3043 name.name = "task"; 3044 name.len = strlen(name.name); 3045 dir = d_hash_and_lookup(leader, &name); 3046 if (!dir) 3047 goto out_put_leader; 3048 3049 name.name = buf; 3050 name.len = snprintf(buf, sizeof(buf), "%d", pid); 3051 dentry = d_hash_and_lookup(dir, &name); 3052 if (dentry) { 3053 d_invalidate(dentry); 3054 dput(dentry); 3055 } 3056 3057 dput(dir); 3058 out_put_leader: 3059 dput(leader); 3060 out: 3061 return; 3062 } 3063 3064 /** 3065 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 3066 * @task: task that should be flushed. 3067 * 3068 * When flushing dentries from proc, one needs to flush them from global 3069 * proc (proc_mnt) and from all the namespaces' procs this task was seen 3070 * in. This call is supposed to do all of this job. 3071 * 3072 * Looks in the dcache for 3073 * /proc/@pid 3074 * /proc/@tgid/task/@pid 3075 * if either directory is present flushes it and all of it'ts children 3076 * from the dcache. 3077 * 3078 * It is safe and reasonable to cache /proc entries for a task until 3079 * that task exits. After that they just clog up the dcache with 3080 * useless entries, possibly causing useful dcache entries to be 3081 * flushed instead. This routine is proved to flush those useless 3082 * dcache entries at process exit time. 3083 * 3084 * NOTE: This routine is just an optimization so it does not guarantee 3085 * that no dcache entries will exist at process exit time it 3086 * just makes it very unlikely that any will persist. 3087 */ 3088 3089 void proc_flush_task(struct task_struct *task) 3090 { 3091 int i; 3092 struct pid *pid, *tgid; 3093 struct upid *upid; 3094 3095 pid = task_pid(task); 3096 tgid = task_tgid(task); 3097 3098 for (i = 0; i <= pid->level; i++) { 3099 upid = &pid->numbers[i]; 3100 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 3101 tgid->numbers[i].nr); 3102 } 3103 } 3104 3105 static int proc_pid_instantiate(struct inode *dir, 3106 struct dentry * dentry, 3107 struct task_struct *task, const void *ptr) 3108 { 3109 struct inode *inode; 3110 3111 inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); 3112 if (!inode) 3113 goto out; 3114 3115 inode->i_op = &proc_tgid_base_inode_operations; 3116 inode->i_fop = &proc_tgid_base_operations; 3117 inode->i_flags|=S_IMMUTABLE; 3118 3119 set_nlink(inode, nlink_tgid); 3120 3121 d_set_d_op(dentry, &pid_dentry_operations); 3122 3123 d_add(dentry, inode); 3124 /* Close the race of the process dying before we return the dentry */ 3125 if (pid_revalidate(dentry, 0)) 3126 return 0; 3127 out: 3128 return -ENOENT; 3129 } 3130 3131 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 3132 { 3133 int result = -ENOENT; 3134 struct task_struct *task; 3135 unsigned tgid; 3136 struct pid_namespace *ns; 3137 3138 tgid = name_to_int(&dentry->d_name); 3139 if (tgid == ~0U) 3140 goto out; 3141 3142 ns = dentry->d_sb->s_fs_info; 3143 rcu_read_lock(); 3144 task = find_task_by_pid_ns(tgid, ns); 3145 if (task) 3146 get_task_struct(task); 3147 rcu_read_unlock(); 3148 if (!task) 3149 goto out; 3150 3151 result = proc_pid_instantiate(dir, dentry, task, NULL); 3152 put_task_struct(task); 3153 out: 3154 return ERR_PTR(result); 3155 } 3156 3157 /* 3158 * Find the first task with tgid >= tgid 3159 * 3160 */ 3161 struct tgid_iter { 3162 unsigned int tgid; 3163 struct task_struct *task; 3164 }; 3165 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) 3166 { 3167 struct pid *pid; 3168 3169 if (iter.task) 3170 put_task_struct(iter.task); 3171 rcu_read_lock(); 3172 retry: 3173 iter.task = NULL; 3174 pid = find_ge_pid(iter.tgid, ns); 3175 if (pid) { 3176 iter.tgid = pid_nr_ns(pid, ns); 3177 iter.task = pid_task(pid, PIDTYPE_PID); 3178 /* What we to know is if the pid we have find is the 3179 * pid of a thread_group_leader. Testing for task 3180 * being a thread_group_leader is the obvious thing 3181 * todo but there is a window when it fails, due to 3182 * the pid transfer logic in de_thread. 3183 * 3184 * So we perform the straight forward test of seeing 3185 * if the pid we have found is the pid of a thread 3186 * group leader, and don't worry if the task we have 3187 * found doesn't happen to be a thread group leader. 3188 * As we don't care in the case of readdir. 3189 */ 3190 if (!iter.task || !has_group_leader_pid(iter.task)) { 3191 iter.tgid += 1; 3192 goto retry; 3193 } 3194 get_task_struct(iter.task); 3195 } 3196 rcu_read_unlock(); 3197 return iter; 3198 } 3199 3200 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) 3201 3202 /* for the /proc/ directory itself, after non-process stuff has been done */ 3203 int proc_pid_readdir(struct file *file, struct dir_context *ctx) 3204 { 3205 struct tgid_iter iter; 3206 struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info; 3207 loff_t pos = ctx->pos; 3208 3209 if (pos >= PID_MAX_LIMIT + TGID_OFFSET) 3210 return 0; 3211 3212 if (pos == TGID_OFFSET - 2) { 3213 struct inode *inode = d_inode(ns->proc_self); 3214 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) 3215 return 0; 3216 ctx->pos = pos = pos + 1; 3217 } 3218 if (pos == TGID_OFFSET - 1) { 3219 struct inode *inode = d_inode(ns->proc_thread_self); 3220 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) 3221 return 0; 3222 ctx->pos = pos = pos + 1; 3223 } 3224 iter.tgid = pos - TGID_OFFSET; 3225 iter.task = NULL; 3226 for (iter = next_tgid(ns, iter); 3227 iter.task; 3228 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3229 char name[PROC_NUMBUF]; 3230 int len; 3231 3232 cond_resched(); 3233 if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) 3234 continue; 3235 3236 len = snprintf(name, sizeof(name), "%d", iter.tgid); 3237 ctx->pos = iter.tgid + TGID_OFFSET; 3238 if (!proc_fill_cache(file, ctx, name, len, 3239 proc_pid_instantiate, iter.task, NULL)) { 3240 put_task_struct(iter.task); 3241 return 0; 3242 } 3243 } 3244 ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; 3245 return 0; 3246 } 3247 3248 /* 3249 * proc_tid_comm_permission is a special permission function exclusively 3250 * used for the node /proc/<pid>/task/<tid>/comm. 3251 * It bypasses generic permission checks in the case where a task of the same 3252 * task group attempts to access the node. 3253 * The rationale behind this is that glibc and bionic access this node for 3254 * cross thread naming (pthread_set/getname_np(!self)). However, if 3255 * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0, 3256 * which locks out the cross thread naming implementation. 3257 * This function makes sure that the node is always accessible for members of 3258 * same thread group. 3259 */ 3260 static int proc_tid_comm_permission(struct inode *inode, int mask) 3261 { 3262 bool is_same_tgroup; 3263 struct task_struct *task; 3264 3265 task = get_proc_task(inode); 3266 if (!task) 3267 return -ESRCH; 3268 is_same_tgroup = same_thread_group(current, task); 3269 put_task_struct(task); 3270 3271 if (likely(is_same_tgroup && !(mask & MAY_EXEC))) { 3272 /* This file (/proc/<pid>/task/<tid>/comm) can always be 3273 * read or written by the members of the corresponding 3274 * thread group. 3275 */ 3276 return 0; 3277 } 3278 3279 return generic_permission(inode, mask); 3280 } 3281 3282 static const struct inode_operations proc_tid_comm_inode_operations = { 3283 .permission = proc_tid_comm_permission, 3284 }; 3285 3286 /* 3287 * Tasks 3288 */ 3289 static const struct pid_entry tid_base_stuff[] = { 3290 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3291 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3292 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3293 #ifdef CONFIG_NET 3294 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 3295 #endif 3296 REG("environ", S_IRUSR, proc_environ_operations), 3297 REG("auxv", S_IRUSR, proc_auxv_operations), 3298 ONE("status", S_IRUGO, proc_pid_status), 3299 ONE("personality", S_IRUSR, proc_pid_personality), 3300 ONE("limits", S_IRUGO, proc_pid_limits), 3301 #ifdef CONFIG_SCHED_DEBUG 3302 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3303 #endif 3304 NOD("comm", S_IFREG|S_IRUGO|S_IWUSR, 3305 &proc_tid_comm_inode_operations, 3306 &proc_pid_set_comm_operations, {}), 3307 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3308 ONE("syscall", S_IRUSR, proc_pid_syscall), 3309 #endif 3310 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), 3311 ONE("stat", S_IRUGO, proc_tid_stat), 3312 ONE("statm", S_IRUGO, proc_pid_statm), 3313 REG("maps", S_IRUGO, proc_tid_maps_operations), 3314 #ifdef CONFIG_PROC_CHILDREN 3315 REG("children", S_IRUGO, proc_tid_children_operations), 3316 #endif 3317 #ifdef CONFIG_NUMA 3318 REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), 3319 #endif 3320 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3321 LNK("cwd", proc_cwd_link), 3322 LNK("root", proc_root_link), 3323 LNK("exe", proc_exe_link), 3324 REG("mounts", S_IRUGO, proc_mounts_operations), 3325 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3326 #ifdef CONFIG_PROC_PAGE_MONITOR 3327 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3328 REG("smaps", S_IRUGO, proc_tid_smaps_operations), 3329 REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), 3330 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3331 #endif 3332 #ifdef CONFIG_SECURITY 3333 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3334 #endif 3335 #ifdef CONFIG_KALLSYMS 3336 ONE("wchan", S_IRUGO, proc_pid_wchan), 3337 #endif 3338 #ifdef CONFIG_STACKTRACE 3339 ONE("stack", S_IRUSR, proc_pid_stack), 3340 #endif 3341 #ifdef CONFIG_SCHED_INFO 3342 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 3343 #endif 3344 #ifdef CONFIG_LATENCYTOP 3345 REG("latency", S_IRUGO, proc_lstats_operations), 3346 #endif 3347 #ifdef CONFIG_PROC_PID_CPUSET 3348 ONE("cpuset", S_IRUGO, proc_cpuset_show), 3349 #endif 3350 #ifdef CONFIG_CGROUPS 3351 ONE("cgroup", S_IRUGO, proc_cgroup_show), 3352 #endif 3353 ONE("oom_score", S_IRUGO, proc_oom_score), 3354 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 3355 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3356 #ifdef CONFIG_AUDITSYSCALL 3357 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3358 REG("sessionid", S_IRUGO, proc_sessionid_operations), 3359 #endif 3360 #ifdef CONFIG_FAULT_INJECTION 3361 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3362 REG("fail-nth", 0644, proc_fail_nth_operations), 3363 #endif 3364 #ifdef CONFIG_TASK_IO_ACCOUNTING 3365 ONE("io", S_IRUSR, proc_tid_io_accounting), 3366 #endif 3367 #ifdef CONFIG_HARDWALL 3368 ONE("hardwall", S_IRUGO, proc_pid_hardwall), 3369 #endif 3370 #ifdef CONFIG_USER_NS 3371 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 3372 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 3373 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 3374 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), 3375 #endif 3376 #ifdef CONFIG_LIVEPATCH 3377 ONE("patch_state", S_IRUSR, proc_pid_patch_state), 3378 #endif 3379 }; 3380 3381 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) 3382 { 3383 return proc_pident_readdir(file, ctx, 3384 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3385 } 3386 3387 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 3388 { 3389 return proc_pident_lookup(dir, dentry, 3390 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3391 } 3392 3393 static const struct file_operations proc_tid_base_operations = { 3394 .read = generic_read_dir, 3395 .iterate_shared = proc_tid_base_readdir, 3396 .llseek = generic_file_llseek, 3397 }; 3398 3399 static const struct inode_operations proc_tid_base_inode_operations = { 3400 .lookup = proc_tid_base_lookup, 3401 .getattr = pid_getattr, 3402 .setattr = proc_setattr, 3403 }; 3404 3405 static int proc_task_instantiate(struct inode *dir, 3406 struct dentry *dentry, struct task_struct *task, const void *ptr) 3407 { 3408 struct inode *inode; 3409 inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); 3410 3411 if (!inode) 3412 goto out; 3413 inode->i_op = &proc_tid_base_inode_operations; 3414 inode->i_fop = &proc_tid_base_operations; 3415 inode->i_flags|=S_IMMUTABLE; 3416 3417 set_nlink(inode, nlink_tid); 3418 3419 d_set_d_op(dentry, &pid_dentry_operations); 3420 3421 d_add(dentry, inode); 3422 /* Close the race of the process dying before we return the dentry */ 3423 if (pid_revalidate(dentry, 0)) 3424 return 0; 3425 out: 3426 return -ENOENT; 3427 } 3428 3429 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 3430 { 3431 int result = -ENOENT; 3432 struct task_struct *task; 3433 struct task_struct *leader = get_proc_task(dir); 3434 unsigned tid; 3435 struct pid_namespace *ns; 3436 3437 if (!leader) 3438 goto out_no_task; 3439 3440 tid = name_to_int(&dentry->d_name); 3441 if (tid == ~0U) 3442 goto out; 3443 3444 ns = dentry->d_sb->s_fs_info; 3445 rcu_read_lock(); 3446 task = find_task_by_pid_ns(tid, ns); 3447 if (task) 3448 get_task_struct(task); 3449 rcu_read_unlock(); 3450 if (!task) 3451 goto out; 3452 if (!same_thread_group(leader, task)) 3453 goto out_drop_task; 3454 3455 result = proc_task_instantiate(dir, dentry, task, NULL); 3456 out_drop_task: 3457 put_task_struct(task); 3458 out: 3459 put_task_struct(leader); 3460 out_no_task: 3461 return ERR_PTR(result); 3462 } 3463 3464 /* 3465 * Find the first tid of a thread group to return to user space. 3466 * 3467 * Usually this is just the thread group leader, but if the users 3468 * buffer was too small or there was a seek into the middle of the 3469 * directory we have more work todo. 3470 * 3471 * In the case of a short read we start with find_task_by_pid. 3472 * 3473 * In the case of a seek we start with the leader and walk nr 3474 * threads past it. 3475 */ 3476 static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, 3477 struct pid_namespace *ns) 3478 { 3479 struct task_struct *pos, *task; 3480 unsigned long nr = f_pos; 3481 3482 if (nr != f_pos) /* 32bit overflow? */ 3483 return NULL; 3484 3485 rcu_read_lock(); 3486 task = pid_task(pid, PIDTYPE_PID); 3487 if (!task) 3488 goto fail; 3489 3490 /* Attempt to start with the tid of a thread */ 3491 if (tid && nr) { 3492 pos = find_task_by_pid_ns(tid, ns); 3493 if (pos && same_thread_group(pos, task)) 3494 goto found; 3495 } 3496 3497 /* If nr exceeds the number of threads there is nothing todo */ 3498 if (nr >= get_nr_threads(task)) 3499 goto fail; 3500 3501 /* If we haven't found our starting place yet start 3502 * with the leader and walk nr threads forward. 3503 */ 3504 pos = task = task->group_leader; 3505 do { 3506 if (!nr--) 3507 goto found; 3508 } while_each_thread(task, pos); 3509 fail: 3510 pos = NULL; 3511 goto out; 3512 found: 3513 get_task_struct(pos); 3514 out: 3515 rcu_read_unlock(); 3516 return pos; 3517 } 3518 3519 /* 3520 * Find the next thread in the thread list. 3521 * Return NULL if there is an error or no next thread. 3522 * 3523 * The reference to the input task_struct is released. 3524 */ 3525 static struct task_struct *next_tid(struct task_struct *start) 3526 { 3527 struct task_struct *pos = NULL; 3528 rcu_read_lock(); 3529 if (pid_alive(start)) { 3530 pos = next_thread(start); 3531 if (thread_group_leader(pos)) 3532 pos = NULL; 3533 else 3534 get_task_struct(pos); 3535 } 3536 rcu_read_unlock(); 3537 put_task_struct(start); 3538 return pos; 3539 } 3540 3541 /* for the /proc/TGID/task/ directories */ 3542 static int proc_task_readdir(struct file *file, struct dir_context *ctx) 3543 { 3544 struct inode *inode = file_inode(file); 3545 struct task_struct *task; 3546 struct pid_namespace *ns; 3547 int tid; 3548 3549 if (proc_inode_is_dead(inode)) 3550 return -ENOENT; 3551 3552 if (!dir_emit_dots(file, ctx)) 3553 return 0; 3554 3555 /* f_version caches the tgid value that the last readdir call couldn't 3556 * return. lseek aka telldir automagically resets f_version to 0. 3557 */ 3558 ns = inode->i_sb->s_fs_info; 3559 tid = (int)file->f_version; 3560 file->f_version = 0; 3561 for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); 3562 task; 3563 task = next_tid(task), ctx->pos++) { 3564 char name[PROC_NUMBUF]; 3565 int len; 3566 tid = task_pid_nr_ns(task, ns); 3567 len = snprintf(name, sizeof(name), "%d", tid); 3568 if (!proc_fill_cache(file, ctx, name, len, 3569 proc_task_instantiate, task, NULL)) { 3570 /* returning this tgid failed, save it as the first 3571 * pid for the next readir call */ 3572 file->f_version = (u64)tid; 3573 put_task_struct(task); 3574 break; 3575 } 3576 } 3577 3578 return 0; 3579 } 3580 3581 static int proc_task_getattr(const struct path *path, struct kstat *stat, 3582 u32 request_mask, unsigned int query_flags) 3583 { 3584 struct inode *inode = d_inode(path->dentry); 3585 struct task_struct *p = get_proc_task(inode); 3586 generic_fillattr(inode, stat); 3587 3588 if (p) { 3589 stat->nlink += get_nr_threads(p); 3590 put_task_struct(p); 3591 } 3592 3593 return 0; 3594 } 3595 3596 static const struct inode_operations proc_task_inode_operations = { 3597 .lookup = proc_task_lookup, 3598 .getattr = proc_task_getattr, 3599 .setattr = proc_setattr, 3600 .permission = proc_pid_permission, 3601 }; 3602 3603 static const struct file_operations proc_task_operations = { 3604 .read = generic_read_dir, 3605 .iterate_shared = proc_task_readdir, 3606 .llseek = generic_file_llseek, 3607 }; 3608 3609 void __init set_proc_pid_nlink(void) 3610 { 3611 nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3612 nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3613 } 3614