1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/proc/base.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 * 7 * proc base directory handling functions 8 * 9 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 10 * Instead of using magical inumbers to determine the kind of object 11 * we allocate and fill in-core inodes upon lookup. They don't even 12 * go into icache. We cache the reference to task_struct upon lookup too. 13 * Eventually it should become a filesystem in its own. We don't use the 14 * rest of procfs anymore. 15 * 16 * 17 * Changelog: 18 * 17-Jan-2005 19 * Allan Bezerra 20 * Bruna Moreira <bruna.moreira@indt.org.br> 21 * Edjard Mota <edjard.mota@indt.org.br> 22 * Ilias Biris <ilias.biris@indt.org.br> 23 * Mauricio Lin <mauricio.lin@indt.org.br> 24 * 25 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 26 * 27 * A new process specific entry (smaps) included in /proc. It shows the 28 * size of rss for each memory area. The maps entry lacks information 29 * about physical memory size (rss) for each mapped file, i.e., 30 * rss information for executables and library files. 31 * This additional information is useful for any tools that need to know 32 * about physical memory consumption for a process specific library. 33 * 34 * Changelog: 35 * 21-Feb-2005 36 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 37 * Pud inclusion in the page table walking. 38 * 39 * ChangeLog: 40 * 10-Mar-2005 41 * 10LE Instituto Nokia de Tecnologia - INdT: 42 * A better way to walks through the page table as suggested by Hugh Dickins. 43 * 44 * Simo Piiroinen <simo.piiroinen@nokia.com>: 45 * Smaps information related to shared, private, clean and dirty pages. 46 * 47 * Paul Mundt <paul.mundt@nokia.com>: 48 * Overall revision about smaps. 49 */ 50 51 #include <linux/uaccess.h> 52 53 #include <linux/errno.h> 54 #include <linux/time.h> 55 #include <linux/proc_fs.h> 56 #include <linux/stat.h> 57 #include <linux/task_io_accounting_ops.h> 58 #include <linux/init.h> 59 #include <linux/capability.h> 60 #include <linux/file.h> 61 #include <linux/fdtable.h> 62 #include <linux/string.h> 63 #include <linux/seq_file.h> 64 #include <linux/namei.h> 65 #include <linux/mnt_namespace.h> 66 #include <linux/mm.h> 67 #include <linux/swap.h> 68 #include <linux/rcupdate.h> 69 #include <linux/kallsyms.h> 70 #include <linux/stacktrace.h> 71 #include <linux/resource.h> 72 #include <linux/module.h> 73 #include <linux/mount.h> 74 #include <linux/security.h> 75 #include <linux/ptrace.h> 76 #include <linux/tracehook.h> 77 #include <linux/printk.h> 78 #include <linux/cache.h> 79 #include <linux/cgroup.h> 80 #include <linux/cpuset.h> 81 #include <linux/audit.h> 82 #include <linux/poll.h> 83 #include <linux/nsproxy.h> 84 #include <linux/oom.h> 85 #include <linux/elf.h> 86 #include <linux/pid_namespace.h> 87 #include <linux/user_namespace.h> 88 #include <linux/fs_struct.h> 89 #include <linux/slab.h> 90 #include <linux/sched/autogroup.h> 91 #include <linux/sched/mm.h> 92 #include <linux/sched/coredump.h> 93 #include <linux/sched/debug.h> 94 #include <linux/sched/stat.h> 95 #include <linux/flex_array.h> 96 #include <linux/posix-timers.h> 97 #include <trace/events/oom.h> 98 #include "internal.h" 99 #include "fd.h" 100 101 #include "../../lib/kstrtox.h" 102 103 /* NOTE: 104 * Implementing inode permission operations in /proc is almost 105 * certainly an error. Permission checks need to happen during 106 * each system call not at open time. The reason is that most of 107 * what we wish to check for permissions in /proc varies at runtime. 108 * 109 * The classic example of a problem is opening file descriptors 110 * in /proc for a task before it execs a suid executable. 111 */ 112 113 static u8 nlink_tid __ro_after_init; 114 static u8 nlink_tgid __ro_after_init; 115 116 struct pid_entry { 117 const char *name; 118 unsigned int len; 119 umode_t mode; 120 const struct inode_operations *iop; 121 const struct file_operations *fop; 122 union proc_op op; 123 }; 124 125 #define NOD(NAME, MODE, IOP, FOP, OP) { \ 126 .name = (NAME), \ 127 .len = sizeof(NAME) - 1, \ 128 .mode = MODE, \ 129 .iop = IOP, \ 130 .fop = FOP, \ 131 .op = OP, \ 132 } 133 134 #define DIR(NAME, MODE, iops, fops) \ 135 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) 136 #define LNK(NAME, get_link) \ 137 NOD(NAME, (S_IFLNK|S_IRWXUGO), \ 138 &proc_pid_link_inode_operations, NULL, \ 139 { .proc_get_link = get_link } ) 140 #define REG(NAME, MODE, fops) \ 141 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 142 #define ONE(NAME, MODE, show) \ 143 NOD(NAME, (S_IFREG|(MODE)), \ 144 NULL, &proc_single_file_operations, \ 145 { .proc_show = show } ) 146 147 /* 148 * Count the number of hardlinks for the pid_entry table, excluding the . 149 * and .. links. 150 */ 151 static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, 152 unsigned int n) 153 { 154 unsigned int i; 155 unsigned int count; 156 157 count = 2; 158 for (i = 0; i < n; ++i) { 159 if (S_ISDIR(entries[i].mode)) 160 ++count; 161 } 162 163 return count; 164 } 165 166 static int get_task_root(struct task_struct *task, struct path *root) 167 { 168 int result = -ENOENT; 169 170 task_lock(task); 171 if (task->fs) { 172 get_fs_root(task->fs, root); 173 result = 0; 174 } 175 task_unlock(task); 176 return result; 177 } 178 179 static int proc_cwd_link(struct dentry *dentry, struct path *path) 180 { 181 struct task_struct *task = get_proc_task(d_inode(dentry)); 182 int result = -ENOENT; 183 184 if (task) { 185 task_lock(task); 186 if (task->fs) { 187 get_fs_pwd(task->fs, path); 188 result = 0; 189 } 190 task_unlock(task); 191 put_task_struct(task); 192 } 193 return result; 194 } 195 196 static int proc_root_link(struct dentry *dentry, struct path *path) 197 { 198 struct task_struct *task = get_proc_task(d_inode(dentry)); 199 int result = -ENOENT; 200 201 if (task) { 202 result = get_task_root(task, path); 203 put_task_struct(task); 204 } 205 return result; 206 } 207 208 static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, 209 size_t _count, loff_t *pos) 210 { 211 char *page; 212 unsigned long count = _count; 213 unsigned long arg_start, arg_end, env_start, env_end; 214 unsigned long len1, len2, len; 215 unsigned long p; 216 char c; 217 ssize_t rv; 218 219 /* Check if process spawned far enough to have cmdline. */ 220 if (!mm->env_end) 221 return 0; 222 223 page = (char *)__get_free_page(GFP_KERNEL); 224 if (!page) 225 return -ENOMEM; 226 227 down_read(&mm->mmap_sem); 228 arg_start = mm->arg_start; 229 arg_end = mm->arg_end; 230 env_start = mm->env_start; 231 env_end = mm->env_end; 232 up_read(&mm->mmap_sem); 233 234 BUG_ON(arg_start > arg_end); 235 BUG_ON(env_start > env_end); 236 237 len1 = arg_end - arg_start; 238 len2 = env_end - env_start; 239 240 /* Empty ARGV. */ 241 if (len1 == 0) { 242 rv = 0; 243 goto out_free_page; 244 } 245 /* 246 * Inherently racy -- command line shares address space 247 * with code and data. 248 */ 249 rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON); 250 if (rv <= 0) 251 goto out_free_page; 252 253 rv = 0; 254 255 if (c == '\0') { 256 /* Command line (set of strings) occupies whole ARGV. */ 257 if (len1 <= *pos) 258 goto out_free_page; 259 260 p = arg_start + *pos; 261 len = len1 - *pos; 262 while (count > 0 && len > 0) { 263 unsigned int _count; 264 int nr_read; 265 266 _count = min3(count, len, PAGE_SIZE); 267 nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); 268 if (nr_read < 0) 269 rv = nr_read; 270 if (nr_read <= 0) 271 goto out_free_page; 272 273 if (copy_to_user(buf, page, nr_read)) { 274 rv = -EFAULT; 275 goto out_free_page; 276 } 277 278 p += nr_read; 279 len -= nr_read; 280 buf += nr_read; 281 count -= nr_read; 282 rv += nr_read; 283 } 284 } else { 285 /* 286 * Command line (1 string) occupies ARGV and 287 * extends into ENVP. 288 */ 289 struct { 290 unsigned long p; 291 unsigned long len; 292 } cmdline[2] = { 293 { .p = arg_start, .len = len1 }, 294 { .p = env_start, .len = len2 }, 295 }; 296 loff_t pos1 = *pos; 297 unsigned int i; 298 299 i = 0; 300 while (i < 2 && pos1 >= cmdline[i].len) { 301 pos1 -= cmdline[i].len; 302 i++; 303 } 304 while (i < 2) { 305 p = cmdline[i].p + pos1; 306 len = cmdline[i].len - pos1; 307 while (count > 0 && len > 0) { 308 unsigned int _count, l; 309 int nr_read; 310 bool final; 311 312 _count = min3(count, len, PAGE_SIZE); 313 nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); 314 if (nr_read < 0) 315 rv = nr_read; 316 if (nr_read <= 0) 317 goto out_free_page; 318 319 /* 320 * Command line can be shorter than whole ARGV 321 * even if last "marker" byte says it is not. 322 */ 323 final = false; 324 l = strnlen(page, nr_read); 325 if (l < nr_read) { 326 nr_read = l; 327 final = true; 328 } 329 330 if (copy_to_user(buf, page, nr_read)) { 331 rv = -EFAULT; 332 goto out_free_page; 333 } 334 335 p += nr_read; 336 len -= nr_read; 337 buf += nr_read; 338 count -= nr_read; 339 rv += nr_read; 340 341 if (final) 342 goto out_free_page; 343 } 344 345 /* Only first chunk can be read partially. */ 346 pos1 = 0; 347 i++; 348 } 349 } 350 351 out_free_page: 352 free_page((unsigned long)page); 353 return rv; 354 } 355 356 static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf, 357 size_t count, loff_t *pos) 358 { 359 struct mm_struct *mm; 360 ssize_t ret; 361 362 mm = get_task_mm(tsk); 363 if (!mm) 364 return 0; 365 366 ret = get_mm_cmdline(mm, buf, count, pos); 367 mmput(mm); 368 return ret; 369 } 370 371 static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, 372 size_t count, loff_t *pos) 373 { 374 struct task_struct *tsk; 375 ssize_t ret; 376 377 BUG_ON(*pos < 0); 378 379 tsk = get_proc_task(file_inode(file)); 380 if (!tsk) 381 return -ESRCH; 382 ret = get_task_cmdline(tsk, buf, count, pos); 383 put_task_struct(tsk); 384 if (ret > 0) 385 *pos += ret; 386 return ret; 387 } 388 389 static const struct file_operations proc_pid_cmdline_ops = { 390 .read = proc_pid_cmdline_read, 391 .llseek = generic_file_llseek, 392 }; 393 394 #ifdef CONFIG_KALLSYMS 395 /* 396 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 397 * Returns the resolved symbol. If that fails, simply return the address. 398 */ 399 static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, 400 struct pid *pid, struct task_struct *task) 401 { 402 unsigned long wchan; 403 char symname[KSYM_NAME_LEN]; 404 405 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 406 goto print0; 407 408 wchan = get_wchan(task); 409 if (wchan && !lookup_symbol_name(wchan, symname)) { 410 seq_puts(m, symname); 411 return 0; 412 } 413 414 print0: 415 seq_putc(m, '0'); 416 return 0; 417 } 418 #endif /* CONFIG_KALLSYMS */ 419 420 static int lock_trace(struct task_struct *task) 421 { 422 int err = mutex_lock_killable(&task->signal->cred_guard_mutex); 423 if (err) 424 return err; 425 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { 426 mutex_unlock(&task->signal->cred_guard_mutex); 427 return -EPERM; 428 } 429 return 0; 430 } 431 432 static void unlock_trace(struct task_struct *task) 433 { 434 mutex_unlock(&task->signal->cred_guard_mutex); 435 } 436 437 #ifdef CONFIG_STACKTRACE 438 439 #define MAX_STACK_TRACE_DEPTH 64 440 441 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, 442 struct pid *pid, struct task_struct *task) 443 { 444 struct stack_trace trace; 445 unsigned long *entries; 446 int err; 447 int i; 448 449 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 450 if (!entries) 451 return -ENOMEM; 452 453 trace.nr_entries = 0; 454 trace.max_entries = MAX_STACK_TRACE_DEPTH; 455 trace.entries = entries; 456 trace.skip = 0; 457 458 err = lock_trace(task); 459 if (!err) { 460 save_stack_trace_tsk(task, &trace); 461 462 for (i = 0; i < trace.nr_entries; i++) { 463 seq_printf(m, "[<0>] %pB\n", (void *)entries[i]); 464 } 465 unlock_trace(task); 466 } 467 kfree(entries); 468 469 return err; 470 } 471 #endif 472 473 #ifdef CONFIG_SCHED_INFO 474 /* 475 * Provides /proc/PID/schedstat 476 */ 477 static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, 478 struct pid *pid, struct task_struct *task) 479 { 480 if (unlikely(!sched_info_on())) 481 seq_printf(m, "0 0 0\n"); 482 else 483 seq_printf(m, "%llu %llu %lu\n", 484 (unsigned long long)task->se.sum_exec_runtime, 485 (unsigned long long)task->sched_info.run_delay, 486 task->sched_info.pcount); 487 488 return 0; 489 } 490 #endif 491 492 #ifdef CONFIG_LATENCYTOP 493 static int lstats_show_proc(struct seq_file *m, void *v) 494 { 495 int i; 496 struct inode *inode = m->private; 497 struct task_struct *task = get_proc_task(inode); 498 499 if (!task) 500 return -ESRCH; 501 seq_puts(m, "Latency Top version : v0.1\n"); 502 for (i = 0; i < 32; i++) { 503 struct latency_record *lr = &task->latency_record[i]; 504 if (lr->backtrace[0]) { 505 int q; 506 seq_printf(m, "%i %li %li", 507 lr->count, lr->time, lr->max); 508 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 509 unsigned long bt = lr->backtrace[q]; 510 if (!bt) 511 break; 512 if (bt == ULONG_MAX) 513 break; 514 seq_printf(m, " %ps", (void *)bt); 515 } 516 seq_putc(m, '\n'); 517 } 518 519 } 520 put_task_struct(task); 521 return 0; 522 } 523 524 static int lstats_open(struct inode *inode, struct file *file) 525 { 526 return single_open(file, lstats_show_proc, inode); 527 } 528 529 static ssize_t lstats_write(struct file *file, const char __user *buf, 530 size_t count, loff_t *offs) 531 { 532 struct task_struct *task = get_proc_task(file_inode(file)); 533 534 if (!task) 535 return -ESRCH; 536 clear_all_latency_tracing(task); 537 put_task_struct(task); 538 539 return count; 540 } 541 542 static const struct file_operations proc_lstats_operations = { 543 .open = lstats_open, 544 .read = seq_read, 545 .write = lstats_write, 546 .llseek = seq_lseek, 547 .release = single_release, 548 }; 549 550 #endif 551 552 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, 553 struct pid *pid, struct task_struct *task) 554 { 555 unsigned long totalpages = totalram_pages + total_swap_pages; 556 unsigned long points = 0; 557 558 points = oom_badness(task, NULL, NULL, totalpages) * 559 1000 / totalpages; 560 seq_printf(m, "%lu\n", points); 561 562 return 0; 563 } 564 565 struct limit_names { 566 const char *name; 567 const char *unit; 568 }; 569 570 static const struct limit_names lnames[RLIM_NLIMITS] = { 571 [RLIMIT_CPU] = {"Max cpu time", "seconds"}, 572 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 573 [RLIMIT_DATA] = {"Max data size", "bytes"}, 574 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 575 [RLIMIT_CORE] = {"Max core file size", "bytes"}, 576 [RLIMIT_RSS] = {"Max resident set", "bytes"}, 577 [RLIMIT_NPROC] = {"Max processes", "processes"}, 578 [RLIMIT_NOFILE] = {"Max open files", "files"}, 579 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, 580 [RLIMIT_AS] = {"Max address space", "bytes"}, 581 [RLIMIT_LOCKS] = {"Max file locks", "locks"}, 582 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, 583 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, 584 [RLIMIT_NICE] = {"Max nice priority", NULL}, 585 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, 586 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, 587 }; 588 589 /* Display limits for a process */ 590 static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, 591 struct pid *pid, struct task_struct *task) 592 { 593 unsigned int i; 594 unsigned long flags; 595 596 struct rlimit rlim[RLIM_NLIMITS]; 597 598 if (!lock_task_sighand(task, &flags)) 599 return 0; 600 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); 601 unlock_task_sighand(task, &flags); 602 603 /* 604 * print the file header 605 */ 606 seq_printf(m, "%-25s %-20s %-20s %-10s\n", 607 "Limit", "Soft Limit", "Hard Limit", "Units"); 608 609 for (i = 0; i < RLIM_NLIMITS; i++) { 610 if (rlim[i].rlim_cur == RLIM_INFINITY) 611 seq_printf(m, "%-25s %-20s ", 612 lnames[i].name, "unlimited"); 613 else 614 seq_printf(m, "%-25s %-20lu ", 615 lnames[i].name, rlim[i].rlim_cur); 616 617 if (rlim[i].rlim_max == RLIM_INFINITY) 618 seq_printf(m, "%-20s ", "unlimited"); 619 else 620 seq_printf(m, "%-20lu ", rlim[i].rlim_max); 621 622 if (lnames[i].unit) 623 seq_printf(m, "%-10s\n", lnames[i].unit); 624 else 625 seq_putc(m, '\n'); 626 } 627 628 return 0; 629 } 630 631 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 632 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, 633 struct pid *pid, struct task_struct *task) 634 { 635 long nr; 636 unsigned long args[6], sp, pc; 637 int res; 638 639 res = lock_trace(task); 640 if (res) 641 return res; 642 643 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 644 seq_puts(m, "running\n"); 645 else if (nr < 0) 646 seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 647 else 648 seq_printf(m, 649 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 650 nr, 651 args[0], args[1], args[2], args[3], args[4], args[5], 652 sp, pc); 653 unlock_trace(task); 654 655 return 0; 656 } 657 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 658 659 /************************************************************************/ 660 /* Here the fs part begins */ 661 /************************************************************************/ 662 663 /* permission checks */ 664 static int proc_fd_access_allowed(struct inode *inode) 665 { 666 struct task_struct *task; 667 int allowed = 0; 668 /* Allow access to a task's file descriptors if it is us or we 669 * may use ptrace attach to the process and find out that 670 * information. 671 */ 672 task = get_proc_task(inode); 673 if (task) { 674 allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 675 put_task_struct(task); 676 } 677 return allowed; 678 } 679 680 int proc_setattr(struct dentry *dentry, struct iattr *attr) 681 { 682 int error; 683 struct inode *inode = d_inode(dentry); 684 685 if (attr->ia_valid & ATTR_MODE) 686 return -EPERM; 687 688 error = setattr_prepare(dentry, attr); 689 if (error) 690 return error; 691 692 setattr_copy(inode, attr); 693 mark_inode_dirty(inode); 694 return 0; 695 } 696 697 /* 698 * May current process learn task's sched/cmdline info (for hide_pid_min=1) 699 * or euid/egid (for hide_pid_min=2)? 700 */ 701 static bool has_pid_permissions(struct pid_namespace *pid, 702 struct task_struct *task, 703 int hide_pid_min) 704 { 705 if (pid->hide_pid < hide_pid_min) 706 return true; 707 if (in_group_p(pid->pid_gid)) 708 return true; 709 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 710 } 711 712 713 static int proc_pid_permission(struct inode *inode, int mask) 714 { 715 struct pid_namespace *pid = inode->i_sb->s_fs_info; 716 struct task_struct *task; 717 bool has_perms; 718 719 task = get_proc_task(inode); 720 if (!task) 721 return -ESRCH; 722 has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS); 723 put_task_struct(task); 724 725 if (!has_perms) { 726 if (pid->hide_pid == HIDEPID_INVISIBLE) { 727 /* 728 * Let's make getdents(), stat(), and open() 729 * consistent with each other. If a process 730 * may not stat() a file, it shouldn't be seen 731 * in procfs at all. 732 */ 733 return -ENOENT; 734 } 735 736 return -EPERM; 737 } 738 return generic_permission(inode, mask); 739 } 740 741 742 743 static const struct inode_operations proc_def_inode_operations = { 744 .setattr = proc_setattr, 745 }; 746 747 static int proc_single_show(struct seq_file *m, void *v) 748 { 749 struct inode *inode = m->private; 750 struct pid_namespace *ns; 751 struct pid *pid; 752 struct task_struct *task; 753 int ret; 754 755 ns = inode->i_sb->s_fs_info; 756 pid = proc_pid(inode); 757 task = get_pid_task(pid, PIDTYPE_PID); 758 if (!task) 759 return -ESRCH; 760 761 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); 762 763 put_task_struct(task); 764 return ret; 765 } 766 767 static int proc_single_open(struct inode *inode, struct file *filp) 768 { 769 return single_open(filp, proc_single_show, inode); 770 } 771 772 static const struct file_operations proc_single_file_operations = { 773 .open = proc_single_open, 774 .read = seq_read, 775 .llseek = seq_lseek, 776 .release = single_release, 777 }; 778 779 780 struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) 781 { 782 struct task_struct *task = get_proc_task(inode); 783 struct mm_struct *mm = ERR_PTR(-ESRCH); 784 785 if (task) { 786 mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); 787 put_task_struct(task); 788 789 if (!IS_ERR_OR_NULL(mm)) { 790 /* ensure this mm_struct can't be freed */ 791 mmgrab(mm); 792 /* but do not pin its memory */ 793 mmput(mm); 794 } 795 } 796 797 return mm; 798 } 799 800 static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) 801 { 802 struct mm_struct *mm = proc_mem_open(inode, mode); 803 804 if (IS_ERR(mm)) 805 return PTR_ERR(mm); 806 807 file->private_data = mm; 808 return 0; 809 } 810 811 static int mem_open(struct inode *inode, struct file *file) 812 { 813 int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); 814 815 /* OK to pass negative loff_t, we can catch out-of-range */ 816 file->f_mode |= FMODE_UNSIGNED_OFFSET; 817 818 return ret; 819 } 820 821 static ssize_t mem_rw(struct file *file, char __user *buf, 822 size_t count, loff_t *ppos, int write) 823 { 824 struct mm_struct *mm = file->private_data; 825 unsigned long addr = *ppos; 826 ssize_t copied; 827 char *page; 828 unsigned int flags; 829 830 if (!mm) 831 return 0; 832 833 page = (char *)__get_free_page(GFP_KERNEL); 834 if (!page) 835 return -ENOMEM; 836 837 copied = 0; 838 if (!mmget_not_zero(mm)) 839 goto free; 840 841 flags = FOLL_FORCE | (write ? FOLL_WRITE : 0); 842 843 while (count > 0) { 844 int this_len = min_t(int, count, PAGE_SIZE); 845 846 if (write && copy_from_user(page, buf, this_len)) { 847 copied = -EFAULT; 848 break; 849 } 850 851 this_len = access_remote_vm(mm, addr, page, this_len, flags); 852 if (!this_len) { 853 if (!copied) 854 copied = -EIO; 855 break; 856 } 857 858 if (!write && copy_to_user(buf, page, this_len)) { 859 copied = -EFAULT; 860 break; 861 } 862 863 buf += this_len; 864 addr += this_len; 865 copied += this_len; 866 count -= this_len; 867 } 868 *ppos = addr; 869 870 mmput(mm); 871 free: 872 free_page((unsigned long) page); 873 return copied; 874 } 875 876 static ssize_t mem_read(struct file *file, char __user *buf, 877 size_t count, loff_t *ppos) 878 { 879 return mem_rw(file, buf, count, ppos, 0); 880 } 881 882 static ssize_t mem_write(struct file *file, const char __user *buf, 883 size_t count, loff_t *ppos) 884 { 885 return mem_rw(file, (char __user*)buf, count, ppos, 1); 886 } 887 888 loff_t mem_lseek(struct file *file, loff_t offset, int orig) 889 { 890 switch (orig) { 891 case 0: 892 file->f_pos = offset; 893 break; 894 case 1: 895 file->f_pos += offset; 896 break; 897 default: 898 return -EINVAL; 899 } 900 force_successful_syscall_return(); 901 return file->f_pos; 902 } 903 904 static int mem_release(struct inode *inode, struct file *file) 905 { 906 struct mm_struct *mm = file->private_data; 907 if (mm) 908 mmdrop(mm); 909 return 0; 910 } 911 912 static const struct file_operations proc_mem_operations = { 913 .llseek = mem_lseek, 914 .read = mem_read, 915 .write = mem_write, 916 .open = mem_open, 917 .release = mem_release, 918 }; 919 920 static int environ_open(struct inode *inode, struct file *file) 921 { 922 return __mem_open(inode, file, PTRACE_MODE_READ); 923 } 924 925 static ssize_t environ_read(struct file *file, char __user *buf, 926 size_t count, loff_t *ppos) 927 { 928 char *page; 929 unsigned long src = *ppos; 930 int ret = 0; 931 struct mm_struct *mm = file->private_data; 932 unsigned long env_start, env_end; 933 934 /* Ensure the process spawned far enough to have an environment. */ 935 if (!mm || !mm->env_end) 936 return 0; 937 938 page = (char *)__get_free_page(GFP_KERNEL); 939 if (!page) 940 return -ENOMEM; 941 942 ret = 0; 943 if (!mmget_not_zero(mm)) 944 goto free; 945 946 down_read(&mm->mmap_sem); 947 env_start = mm->env_start; 948 env_end = mm->env_end; 949 up_read(&mm->mmap_sem); 950 951 while (count > 0) { 952 size_t this_len, max_len; 953 int retval; 954 955 if (src >= (env_end - env_start)) 956 break; 957 958 this_len = env_end - (env_start + src); 959 960 max_len = min_t(size_t, PAGE_SIZE, count); 961 this_len = min(max_len, this_len); 962 963 retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); 964 965 if (retval <= 0) { 966 ret = retval; 967 break; 968 } 969 970 if (copy_to_user(buf, page, retval)) { 971 ret = -EFAULT; 972 break; 973 } 974 975 ret += retval; 976 src += retval; 977 buf += retval; 978 count -= retval; 979 } 980 *ppos = src; 981 mmput(mm); 982 983 free: 984 free_page((unsigned long) page); 985 return ret; 986 } 987 988 static const struct file_operations proc_environ_operations = { 989 .open = environ_open, 990 .read = environ_read, 991 .llseek = generic_file_llseek, 992 .release = mem_release, 993 }; 994 995 static int auxv_open(struct inode *inode, struct file *file) 996 { 997 return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); 998 } 999 1000 static ssize_t auxv_read(struct file *file, char __user *buf, 1001 size_t count, loff_t *ppos) 1002 { 1003 struct mm_struct *mm = file->private_data; 1004 unsigned int nwords = 0; 1005 1006 if (!mm) 1007 return 0; 1008 do { 1009 nwords += 2; 1010 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 1011 return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv, 1012 nwords * sizeof(mm->saved_auxv[0])); 1013 } 1014 1015 static const struct file_operations proc_auxv_operations = { 1016 .open = auxv_open, 1017 .read = auxv_read, 1018 .llseek = generic_file_llseek, 1019 .release = mem_release, 1020 }; 1021 1022 static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, 1023 loff_t *ppos) 1024 { 1025 struct task_struct *task = get_proc_task(file_inode(file)); 1026 char buffer[PROC_NUMBUF]; 1027 int oom_adj = OOM_ADJUST_MIN; 1028 size_t len; 1029 1030 if (!task) 1031 return -ESRCH; 1032 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) 1033 oom_adj = OOM_ADJUST_MAX; 1034 else 1035 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / 1036 OOM_SCORE_ADJ_MAX; 1037 put_task_struct(task); 1038 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); 1039 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1040 } 1041 1042 static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) 1043 { 1044 static DEFINE_MUTEX(oom_adj_mutex); 1045 struct mm_struct *mm = NULL; 1046 struct task_struct *task; 1047 int err = 0; 1048 1049 task = get_proc_task(file_inode(file)); 1050 if (!task) 1051 return -ESRCH; 1052 1053 mutex_lock(&oom_adj_mutex); 1054 if (legacy) { 1055 if (oom_adj < task->signal->oom_score_adj && 1056 !capable(CAP_SYS_RESOURCE)) { 1057 err = -EACCES; 1058 goto err_unlock; 1059 } 1060 /* 1061 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use 1062 * /proc/pid/oom_score_adj instead. 1063 */ 1064 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", 1065 current->comm, task_pid_nr(current), task_pid_nr(task), 1066 task_pid_nr(task)); 1067 } else { 1068 if ((short)oom_adj < task->signal->oom_score_adj_min && 1069 !capable(CAP_SYS_RESOURCE)) { 1070 err = -EACCES; 1071 goto err_unlock; 1072 } 1073 } 1074 1075 /* 1076 * Make sure we will check other processes sharing the mm if this is 1077 * not vfrok which wants its own oom_score_adj. 1078 * pin the mm so it doesn't go away and get reused after task_unlock 1079 */ 1080 if (!task->vfork_done) { 1081 struct task_struct *p = find_lock_task_mm(task); 1082 1083 if (p) { 1084 if (atomic_read(&p->mm->mm_users) > 1) { 1085 mm = p->mm; 1086 mmgrab(mm); 1087 } 1088 task_unlock(p); 1089 } 1090 } 1091 1092 task->signal->oom_score_adj = oom_adj; 1093 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1094 task->signal->oom_score_adj_min = (short)oom_adj; 1095 trace_oom_score_adj_update(task); 1096 1097 if (mm) { 1098 struct task_struct *p; 1099 1100 rcu_read_lock(); 1101 for_each_process(p) { 1102 if (same_thread_group(task, p)) 1103 continue; 1104 1105 /* do not touch kernel threads or the global init */ 1106 if (p->flags & PF_KTHREAD || is_global_init(p)) 1107 continue; 1108 1109 task_lock(p); 1110 if (!p->vfork_done && process_shares_mm(p, mm)) { 1111 pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n", 1112 task_pid_nr(p), p->comm, 1113 p->signal->oom_score_adj, oom_adj, 1114 task_pid_nr(task), task->comm); 1115 p->signal->oom_score_adj = oom_adj; 1116 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1117 p->signal->oom_score_adj_min = (short)oom_adj; 1118 } 1119 task_unlock(p); 1120 } 1121 rcu_read_unlock(); 1122 mmdrop(mm); 1123 } 1124 err_unlock: 1125 mutex_unlock(&oom_adj_mutex); 1126 put_task_struct(task); 1127 return err; 1128 } 1129 1130 /* 1131 * /proc/pid/oom_adj exists solely for backwards compatibility with previous 1132 * kernels. The effective policy is defined by oom_score_adj, which has a 1133 * different scale: oom_adj grew exponentially and oom_score_adj grows linearly. 1134 * Values written to oom_adj are simply mapped linearly to oom_score_adj. 1135 * Processes that become oom disabled via oom_adj will still be oom disabled 1136 * with this implementation. 1137 * 1138 * oom_adj cannot be removed since existing userspace binaries use it. 1139 */ 1140 static ssize_t oom_adj_write(struct file *file, const char __user *buf, 1141 size_t count, loff_t *ppos) 1142 { 1143 char buffer[PROC_NUMBUF]; 1144 int oom_adj; 1145 int err; 1146 1147 memset(buffer, 0, sizeof(buffer)); 1148 if (count > sizeof(buffer) - 1) 1149 count = sizeof(buffer) - 1; 1150 if (copy_from_user(buffer, buf, count)) { 1151 err = -EFAULT; 1152 goto out; 1153 } 1154 1155 err = kstrtoint(strstrip(buffer), 0, &oom_adj); 1156 if (err) 1157 goto out; 1158 if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && 1159 oom_adj != OOM_DISABLE) { 1160 err = -EINVAL; 1161 goto out; 1162 } 1163 1164 /* 1165 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum 1166 * value is always attainable. 1167 */ 1168 if (oom_adj == OOM_ADJUST_MAX) 1169 oom_adj = OOM_SCORE_ADJ_MAX; 1170 else 1171 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; 1172 1173 err = __set_oom_adj(file, oom_adj, true); 1174 out: 1175 return err < 0 ? err : count; 1176 } 1177 1178 static const struct file_operations proc_oom_adj_operations = { 1179 .read = oom_adj_read, 1180 .write = oom_adj_write, 1181 .llseek = generic_file_llseek, 1182 }; 1183 1184 static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 1185 size_t count, loff_t *ppos) 1186 { 1187 struct task_struct *task = get_proc_task(file_inode(file)); 1188 char buffer[PROC_NUMBUF]; 1189 short oom_score_adj = OOM_SCORE_ADJ_MIN; 1190 size_t len; 1191 1192 if (!task) 1193 return -ESRCH; 1194 oom_score_adj = task->signal->oom_score_adj; 1195 put_task_struct(task); 1196 len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); 1197 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1198 } 1199 1200 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, 1201 size_t count, loff_t *ppos) 1202 { 1203 char buffer[PROC_NUMBUF]; 1204 int oom_score_adj; 1205 int err; 1206 1207 memset(buffer, 0, sizeof(buffer)); 1208 if (count > sizeof(buffer) - 1) 1209 count = sizeof(buffer) - 1; 1210 if (copy_from_user(buffer, buf, count)) { 1211 err = -EFAULT; 1212 goto out; 1213 } 1214 1215 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); 1216 if (err) 1217 goto out; 1218 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1219 oom_score_adj > OOM_SCORE_ADJ_MAX) { 1220 err = -EINVAL; 1221 goto out; 1222 } 1223 1224 err = __set_oom_adj(file, oom_score_adj, false); 1225 out: 1226 return err < 0 ? err : count; 1227 } 1228 1229 static const struct file_operations proc_oom_score_adj_operations = { 1230 .read = oom_score_adj_read, 1231 .write = oom_score_adj_write, 1232 .llseek = default_llseek, 1233 }; 1234 1235 #ifdef CONFIG_AUDITSYSCALL 1236 #define TMPBUFLEN 11 1237 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1238 size_t count, loff_t *ppos) 1239 { 1240 struct inode * inode = file_inode(file); 1241 struct task_struct *task = get_proc_task(inode); 1242 ssize_t length; 1243 char tmpbuf[TMPBUFLEN]; 1244 1245 if (!task) 1246 return -ESRCH; 1247 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1248 from_kuid(file->f_cred->user_ns, 1249 audit_get_loginuid(task))); 1250 put_task_struct(task); 1251 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1252 } 1253 1254 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 1255 size_t count, loff_t *ppos) 1256 { 1257 struct inode * inode = file_inode(file); 1258 uid_t loginuid; 1259 kuid_t kloginuid; 1260 int rv; 1261 1262 rcu_read_lock(); 1263 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { 1264 rcu_read_unlock(); 1265 return -EPERM; 1266 } 1267 rcu_read_unlock(); 1268 1269 if (*ppos != 0) { 1270 /* No partial writes. */ 1271 return -EINVAL; 1272 } 1273 1274 rv = kstrtou32_from_user(buf, count, 10, &loginuid); 1275 if (rv < 0) 1276 return rv; 1277 1278 /* is userspace tring to explicitly UNSET the loginuid? */ 1279 if (loginuid == AUDIT_UID_UNSET) { 1280 kloginuid = INVALID_UID; 1281 } else { 1282 kloginuid = make_kuid(file->f_cred->user_ns, loginuid); 1283 if (!uid_valid(kloginuid)) 1284 return -EINVAL; 1285 } 1286 1287 rv = audit_set_loginuid(kloginuid); 1288 if (rv < 0) 1289 return rv; 1290 return count; 1291 } 1292 1293 static const struct file_operations proc_loginuid_operations = { 1294 .read = proc_loginuid_read, 1295 .write = proc_loginuid_write, 1296 .llseek = generic_file_llseek, 1297 }; 1298 1299 static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1300 size_t count, loff_t *ppos) 1301 { 1302 struct inode * inode = file_inode(file); 1303 struct task_struct *task = get_proc_task(inode); 1304 ssize_t length; 1305 char tmpbuf[TMPBUFLEN]; 1306 1307 if (!task) 1308 return -ESRCH; 1309 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1310 audit_get_sessionid(task)); 1311 put_task_struct(task); 1312 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1313 } 1314 1315 static const struct file_operations proc_sessionid_operations = { 1316 .read = proc_sessionid_read, 1317 .llseek = generic_file_llseek, 1318 }; 1319 #endif 1320 1321 #ifdef CONFIG_FAULT_INJECTION 1322 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 1323 size_t count, loff_t *ppos) 1324 { 1325 struct task_struct *task = get_proc_task(file_inode(file)); 1326 char buffer[PROC_NUMBUF]; 1327 size_t len; 1328 int make_it_fail; 1329 1330 if (!task) 1331 return -ESRCH; 1332 make_it_fail = task->make_it_fail; 1333 put_task_struct(task); 1334 1335 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); 1336 1337 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1338 } 1339 1340 static ssize_t proc_fault_inject_write(struct file * file, 1341 const char __user * buf, size_t count, loff_t *ppos) 1342 { 1343 struct task_struct *task; 1344 char buffer[PROC_NUMBUF]; 1345 int make_it_fail; 1346 int rv; 1347 1348 if (!capable(CAP_SYS_RESOURCE)) 1349 return -EPERM; 1350 memset(buffer, 0, sizeof(buffer)); 1351 if (count > sizeof(buffer) - 1) 1352 count = sizeof(buffer) - 1; 1353 if (copy_from_user(buffer, buf, count)) 1354 return -EFAULT; 1355 rv = kstrtoint(strstrip(buffer), 0, &make_it_fail); 1356 if (rv < 0) 1357 return rv; 1358 if (make_it_fail < 0 || make_it_fail > 1) 1359 return -EINVAL; 1360 1361 task = get_proc_task(file_inode(file)); 1362 if (!task) 1363 return -ESRCH; 1364 task->make_it_fail = make_it_fail; 1365 put_task_struct(task); 1366 1367 return count; 1368 } 1369 1370 static const struct file_operations proc_fault_inject_operations = { 1371 .read = proc_fault_inject_read, 1372 .write = proc_fault_inject_write, 1373 .llseek = generic_file_llseek, 1374 }; 1375 1376 static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf, 1377 size_t count, loff_t *ppos) 1378 { 1379 struct task_struct *task; 1380 int err; 1381 unsigned int n; 1382 1383 err = kstrtouint_from_user(buf, count, 0, &n); 1384 if (err) 1385 return err; 1386 1387 task = get_proc_task(file_inode(file)); 1388 if (!task) 1389 return -ESRCH; 1390 task->fail_nth = n; 1391 put_task_struct(task); 1392 1393 return count; 1394 } 1395 1396 static ssize_t proc_fail_nth_read(struct file *file, char __user *buf, 1397 size_t count, loff_t *ppos) 1398 { 1399 struct task_struct *task; 1400 char numbuf[PROC_NUMBUF]; 1401 ssize_t len; 1402 1403 task = get_proc_task(file_inode(file)); 1404 if (!task) 1405 return -ESRCH; 1406 len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth); 1407 len = simple_read_from_buffer(buf, count, ppos, numbuf, len); 1408 put_task_struct(task); 1409 1410 return len; 1411 } 1412 1413 static const struct file_operations proc_fail_nth_operations = { 1414 .read = proc_fail_nth_read, 1415 .write = proc_fail_nth_write, 1416 }; 1417 #endif 1418 1419 1420 #ifdef CONFIG_SCHED_DEBUG 1421 /* 1422 * Print out various scheduling related per-task fields: 1423 */ 1424 static int sched_show(struct seq_file *m, void *v) 1425 { 1426 struct inode *inode = m->private; 1427 struct pid_namespace *ns = inode->i_sb->s_fs_info; 1428 struct task_struct *p; 1429 1430 p = get_proc_task(inode); 1431 if (!p) 1432 return -ESRCH; 1433 proc_sched_show_task(p, ns, m); 1434 1435 put_task_struct(p); 1436 1437 return 0; 1438 } 1439 1440 static ssize_t 1441 sched_write(struct file *file, const char __user *buf, 1442 size_t count, loff_t *offset) 1443 { 1444 struct inode *inode = file_inode(file); 1445 struct task_struct *p; 1446 1447 p = get_proc_task(inode); 1448 if (!p) 1449 return -ESRCH; 1450 proc_sched_set_task(p); 1451 1452 put_task_struct(p); 1453 1454 return count; 1455 } 1456 1457 static int sched_open(struct inode *inode, struct file *filp) 1458 { 1459 return single_open(filp, sched_show, inode); 1460 } 1461 1462 static const struct file_operations proc_pid_sched_operations = { 1463 .open = sched_open, 1464 .read = seq_read, 1465 .write = sched_write, 1466 .llseek = seq_lseek, 1467 .release = single_release, 1468 }; 1469 1470 #endif 1471 1472 #ifdef CONFIG_SCHED_AUTOGROUP 1473 /* 1474 * Print out autogroup related information: 1475 */ 1476 static int sched_autogroup_show(struct seq_file *m, void *v) 1477 { 1478 struct inode *inode = m->private; 1479 struct task_struct *p; 1480 1481 p = get_proc_task(inode); 1482 if (!p) 1483 return -ESRCH; 1484 proc_sched_autogroup_show_task(p, m); 1485 1486 put_task_struct(p); 1487 1488 return 0; 1489 } 1490 1491 static ssize_t 1492 sched_autogroup_write(struct file *file, const char __user *buf, 1493 size_t count, loff_t *offset) 1494 { 1495 struct inode *inode = file_inode(file); 1496 struct task_struct *p; 1497 char buffer[PROC_NUMBUF]; 1498 int nice; 1499 int err; 1500 1501 memset(buffer, 0, sizeof(buffer)); 1502 if (count > sizeof(buffer) - 1) 1503 count = sizeof(buffer) - 1; 1504 if (copy_from_user(buffer, buf, count)) 1505 return -EFAULT; 1506 1507 err = kstrtoint(strstrip(buffer), 0, &nice); 1508 if (err < 0) 1509 return err; 1510 1511 p = get_proc_task(inode); 1512 if (!p) 1513 return -ESRCH; 1514 1515 err = proc_sched_autogroup_set_nice(p, nice); 1516 if (err) 1517 count = err; 1518 1519 put_task_struct(p); 1520 1521 return count; 1522 } 1523 1524 static int sched_autogroup_open(struct inode *inode, struct file *filp) 1525 { 1526 int ret; 1527 1528 ret = single_open(filp, sched_autogroup_show, NULL); 1529 if (!ret) { 1530 struct seq_file *m = filp->private_data; 1531 1532 m->private = inode; 1533 } 1534 return ret; 1535 } 1536 1537 static const struct file_operations proc_pid_sched_autogroup_operations = { 1538 .open = sched_autogroup_open, 1539 .read = seq_read, 1540 .write = sched_autogroup_write, 1541 .llseek = seq_lseek, 1542 .release = single_release, 1543 }; 1544 1545 #endif /* CONFIG_SCHED_AUTOGROUP */ 1546 1547 static ssize_t comm_write(struct file *file, const char __user *buf, 1548 size_t count, loff_t *offset) 1549 { 1550 struct inode *inode = file_inode(file); 1551 struct task_struct *p; 1552 char buffer[TASK_COMM_LEN]; 1553 const size_t maxlen = sizeof(buffer) - 1; 1554 1555 memset(buffer, 0, sizeof(buffer)); 1556 if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) 1557 return -EFAULT; 1558 1559 p = get_proc_task(inode); 1560 if (!p) 1561 return -ESRCH; 1562 1563 if (same_thread_group(current, p)) 1564 set_task_comm(p, buffer); 1565 else 1566 count = -EINVAL; 1567 1568 put_task_struct(p); 1569 1570 return count; 1571 } 1572 1573 static int comm_show(struct seq_file *m, void *v) 1574 { 1575 struct inode *inode = m->private; 1576 struct task_struct *p; 1577 1578 p = get_proc_task(inode); 1579 if (!p) 1580 return -ESRCH; 1581 1582 task_lock(p); 1583 seq_printf(m, "%s\n", p->comm); 1584 task_unlock(p); 1585 1586 put_task_struct(p); 1587 1588 return 0; 1589 } 1590 1591 static int comm_open(struct inode *inode, struct file *filp) 1592 { 1593 return single_open(filp, comm_show, inode); 1594 } 1595 1596 static const struct file_operations proc_pid_set_comm_operations = { 1597 .open = comm_open, 1598 .read = seq_read, 1599 .write = comm_write, 1600 .llseek = seq_lseek, 1601 .release = single_release, 1602 }; 1603 1604 static int proc_exe_link(struct dentry *dentry, struct path *exe_path) 1605 { 1606 struct task_struct *task; 1607 struct file *exe_file; 1608 1609 task = get_proc_task(d_inode(dentry)); 1610 if (!task) 1611 return -ENOENT; 1612 exe_file = get_task_exe_file(task); 1613 put_task_struct(task); 1614 if (exe_file) { 1615 *exe_path = exe_file->f_path; 1616 path_get(&exe_file->f_path); 1617 fput(exe_file); 1618 return 0; 1619 } else 1620 return -ENOENT; 1621 } 1622 1623 static const char *proc_pid_get_link(struct dentry *dentry, 1624 struct inode *inode, 1625 struct delayed_call *done) 1626 { 1627 struct path path; 1628 int error = -EACCES; 1629 1630 if (!dentry) 1631 return ERR_PTR(-ECHILD); 1632 1633 /* Are we allowed to snoop on the tasks file descriptors? */ 1634 if (!proc_fd_access_allowed(inode)) 1635 goto out; 1636 1637 error = PROC_I(inode)->op.proc_get_link(dentry, &path); 1638 if (error) 1639 goto out; 1640 1641 nd_jump_link(&path); 1642 return NULL; 1643 out: 1644 return ERR_PTR(error); 1645 } 1646 1647 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) 1648 { 1649 char *tmp = (char *)__get_free_page(GFP_KERNEL); 1650 char *pathname; 1651 int len; 1652 1653 if (!tmp) 1654 return -ENOMEM; 1655 1656 pathname = d_path(path, tmp, PAGE_SIZE); 1657 len = PTR_ERR(pathname); 1658 if (IS_ERR(pathname)) 1659 goto out; 1660 len = tmp + PAGE_SIZE - 1 - pathname; 1661 1662 if (len > buflen) 1663 len = buflen; 1664 if (copy_to_user(buffer, pathname, len)) 1665 len = -EFAULT; 1666 out: 1667 free_page((unsigned long)tmp); 1668 return len; 1669 } 1670 1671 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1672 { 1673 int error = -EACCES; 1674 struct inode *inode = d_inode(dentry); 1675 struct path path; 1676 1677 /* Are we allowed to snoop on the tasks file descriptors? */ 1678 if (!proc_fd_access_allowed(inode)) 1679 goto out; 1680 1681 error = PROC_I(inode)->op.proc_get_link(dentry, &path); 1682 if (error) 1683 goto out; 1684 1685 error = do_proc_readlink(&path, buffer, buflen); 1686 path_put(&path); 1687 out: 1688 return error; 1689 } 1690 1691 const struct inode_operations proc_pid_link_inode_operations = { 1692 .readlink = proc_pid_readlink, 1693 .get_link = proc_pid_get_link, 1694 .setattr = proc_setattr, 1695 }; 1696 1697 1698 /* building an inode */ 1699 1700 void task_dump_owner(struct task_struct *task, umode_t mode, 1701 kuid_t *ruid, kgid_t *rgid) 1702 { 1703 /* Depending on the state of dumpable compute who should own a 1704 * proc file for a task. 1705 */ 1706 const struct cred *cred; 1707 kuid_t uid; 1708 kgid_t gid; 1709 1710 if (unlikely(task->flags & PF_KTHREAD)) { 1711 *ruid = GLOBAL_ROOT_UID; 1712 *rgid = GLOBAL_ROOT_GID; 1713 return; 1714 } 1715 1716 /* Default to the tasks effective ownership */ 1717 rcu_read_lock(); 1718 cred = __task_cred(task); 1719 uid = cred->euid; 1720 gid = cred->egid; 1721 rcu_read_unlock(); 1722 1723 /* 1724 * Before the /proc/pid/status file was created the only way to read 1725 * the effective uid of a /process was to stat /proc/pid. Reading 1726 * /proc/pid/status is slow enough that procps and other packages 1727 * kept stating /proc/pid. To keep the rules in /proc simple I have 1728 * made this apply to all per process world readable and executable 1729 * directories. 1730 */ 1731 if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) { 1732 struct mm_struct *mm; 1733 task_lock(task); 1734 mm = task->mm; 1735 /* Make non-dumpable tasks owned by some root */ 1736 if (mm) { 1737 if (get_dumpable(mm) != SUID_DUMP_USER) { 1738 struct user_namespace *user_ns = mm->user_ns; 1739 1740 uid = make_kuid(user_ns, 0); 1741 if (!uid_valid(uid)) 1742 uid = GLOBAL_ROOT_UID; 1743 1744 gid = make_kgid(user_ns, 0); 1745 if (!gid_valid(gid)) 1746 gid = GLOBAL_ROOT_GID; 1747 } 1748 } else { 1749 uid = GLOBAL_ROOT_UID; 1750 gid = GLOBAL_ROOT_GID; 1751 } 1752 task_unlock(task); 1753 } 1754 *ruid = uid; 1755 *rgid = gid; 1756 } 1757 1758 struct inode *proc_pid_make_inode(struct super_block * sb, 1759 struct task_struct *task, umode_t mode) 1760 { 1761 struct inode * inode; 1762 struct proc_inode *ei; 1763 1764 /* We need a new inode */ 1765 1766 inode = new_inode(sb); 1767 if (!inode) 1768 goto out; 1769 1770 /* Common stuff */ 1771 ei = PROC_I(inode); 1772 inode->i_mode = mode; 1773 inode->i_ino = get_next_ino(); 1774 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 1775 inode->i_op = &proc_def_inode_operations; 1776 1777 /* 1778 * grab the reference to task. 1779 */ 1780 ei->pid = get_task_pid(task, PIDTYPE_PID); 1781 if (!ei->pid) 1782 goto out_unlock; 1783 1784 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); 1785 security_task_to_inode(task, inode); 1786 1787 out: 1788 return inode; 1789 1790 out_unlock: 1791 iput(inode); 1792 return NULL; 1793 } 1794 1795 int pid_getattr(const struct path *path, struct kstat *stat, 1796 u32 request_mask, unsigned int query_flags) 1797 { 1798 struct inode *inode = d_inode(path->dentry); 1799 struct task_struct *task; 1800 struct pid_namespace *pid = path->dentry->d_sb->s_fs_info; 1801 1802 generic_fillattr(inode, stat); 1803 1804 rcu_read_lock(); 1805 stat->uid = GLOBAL_ROOT_UID; 1806 stat->gid = GLOBAL_ROOT_GID; 1807 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1808 if (task) { 1809 if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { 1810 rcu_read_unlock(); 1811 /* 1812 * This doesn't prevent learning whether PID exists, 1813 * it only makes getattr() consistent with readdir(). 1814 */ 1815 return -ENOENT; 1816 } 1817 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); 1818 } 1819 rcu_read_unlock(); 1820 return 0; 1821 } 1822 1823 /* dentry stuff */ 1824 1825 /* 1826 * Exceptional case: normally we are not allowed to unhash a busy 1827 * directory. In this case, however, we can do it - no aliasing problems 1828 * due to the way we treat inodes. 1829 * 1830 * Rewrite the inode's ownerships here because the owning task may have 1831 * performed a setuid(), etc. 1832 * 1833 */ 1834 int pid_revalidate(struct dentry *dentry, unsigned int flags) 1835 { 1836 struct inode *inode; 1837 struct task_struct *task; 1838 1839 if (flags & LOOKUP_RCU) 1840 return -ECHILD; 1841 1842 inode = d_inode(dentry); 1843 task = get_proc_task(inode); 1844 1845 if (task) { 1846 task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); 1847 1848 inode->i_mode &= ~(S_ISUID | S_ISGID); 1849 security_task_to_inode(task, inode); 1850 put_task_struct(task); 1851 return 1; 1852 } 1853 return 0; 1854 } 1855 1856 static inline bool proc_inode_is_dead(struct inode *inode) 1857 { 1858 return !proc_pid(inode)->tasks[PIDTYPE_PID].first; 1859 } 1860 1861 int pid_delete_dentry(const struct dentry *dentry) 1862 { 1863 /* Is the task we represent dead? 1864 * If so, then don't put the dentry on the lru list, 1865 * kill it immediately. 1866 */ 1867 return proc_inode_is_dead(d_inode(dentry)); 1868 } 1869 1870 const struct dentry_operations pid_dentry_operations = 1871 { 1872 .d_revalidate = pid_revalidate, 1873 .d_delete = pid_delete_dentry, 1874 }; 1875 1876 /* Lookups */ 1877 1878 /* 1879 * Fill a directory entry. 1880 * 1881 * If possible create the dcache entry and derive our inode number and 1882 * file type from dcache entry. 1883 * 1884 * Since all of the proc inode numbers are dynamically generated, the inode 1885 * numbers do not exist until the inode is cache. This means creating the 1886 * the dcache entry in readdir is necessary to keep the inode numbers 1887 * reported by readdir in sync with the inode numbers reported 1888 * by stat. 1889 */ 1890 bool proc_fill_cache(struct file *file, struct dir_context *ctx, 1891 const char *name, int len, 1892 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1893 { 1894 struct dentry *child, *dir = file->f_path.dentry; 1895 struct qstr qname = QSTR_INIT(name, len); 1896 struct inode *inode; 1897 unsigned type; 1898 ino_t ino; 1899 1900 child = d_hash_and_lookup(dir, &qname); 1901 if (!child) { 1902 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); 1903 child = d_alloc_parallel(dir, &qname, &wq); 1904 if (IS_ERR(child)) 1905 goto end_instantiate; 1906 if (d_in_lookup(child)) { 1907 int err = instantiate(d_inode(dir), child, task, ptr); 1908 d_lookup_done(child); 1909 if (err < 0) { 1910 dput(child); 1911 goto end_instantiate; 1912 } 1913 } 1914 } 1915 inode = d_inode(child); 1916 ino = inode->i_ino; 1917 type = inode->i_mode >> 12; 1918 dput(child); 1919 return dir_emit(ctx, name, len, ino, type); 1920 1921 end_instantiate: 1922 return dir_emit(ctx, name, len, 1, DT_UNKNOWN); 1923 } 1924 1925 /* 1926 * dname_to_vma_addr - maps a dentry name into two unsigned longs 1927 * which represent vma start and end addresses. 1928 */ 1929 static int dname_to_vma_addr(struct dentry *dentry, 1930 unsigned long *start, unsigned long *end) 1931 { 1932 const char *str = dentry->d_name.name; 1933 unsigned long long sval, eval; 1934 unsigned int len; 1935 1936 if (str[0] == '0' && str[1] != '-') 1937 return -EINVAL; 1938 len = _parse_integer(str, 16, &sval); 1939 if (len & KSTRTOX_OVERFLOW) 1940 return -EINVAL; 1941 if (sval != (unsigned long)sval) 1942 return -EINVAL; 1943 str += len; 1944 1945 if (*str != '-') 1946 return -EINVAL; 1947 str++; 1948 1949 if (str[0] == '0' && str[1]) 1950 return -EINVAL; 1951 len = _parse_integer(str, 16, &eval); 1952 if (len & KSTRTOX_OVERFLOW) 1953 return -EINVAL; 1954 if (eval != (unsigned long)eval) 1955 return -EINVAL; 1956 str += len; 1957 1958 if (*str != '\0') 1959 return -EINVAL; 1960 1961 *start = sval; 1962 *end = eval; 1963 1964 return 0; 1965 } 1966 1967 static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) 1968 { 1969 unsigned long vm_start, vm_end; 1970 bool exact_vma_exists = false; 1971 struct mm_struct *mm = NULL; 1972 struct task_struct *task; 1973 struct inode *inode; 1974 int status = 0; 1975 1976 if (flags & LOOKUP_RCU) 1977 return -ECHILD; 1978 1979 inode = d_inode(dentry); 1980 task = get_proc_task(inode); 1981 if (!task) 1982 goto out_notask; 1983 1984 mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); 1985 if (IS_ERR_OR_NULL(mm)) 1986 goto out; 1987 1988 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { 1989 down_read(&mm->mmap_sem); 1990 exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); 1991 up_read(&mm->mmap_sem); 1992 } 1993 1994 mmput(mm); 1995 1996 if (exact_vma_exists) { 1997 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); 1998 1999 security_task_to_inode(task, inode); 2000 status = 1; 2001 } 2002 2003 out: 2004 put_task_struct(task); 2005 2006 out_notask: 2007 return status; 2008 } 2009 2010 static const struct dentry_operations tid_map_files_dentry_operations = { 2011 .d_revalidate = map_files_d_revalidate, 2012 .d_delete = pid_delete_dentry, 2013 }; 2014 2015 static int map_files_get_link(struct dentry *dentry, struct path *path) 2016 { 2017 unsigned long vm_start, vm_end; 2018 struct vm_area_struct *vma; 2019 struct task_struct *task; 2020 struct mm_struct *mm; 2021 int rc; 2022 2023 rc = -ENOENT; 2024 task = get_proc_task(d_inode(dentry)); 2025 if (!task) 2026 goto out; 2027 2028 mm = get_task_mm(task); 2029 put_task_struct(task); 2030 if (!mm) 2031 goto out; 2032 2033 rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); 2034 if (rc) 2035 goto out_mmput; 2036 2037 rc = -ENOENT; 2038 down_read(&mm->mmap_sem); 2039 vma = find_exact_vma(mm, vm_start, vm_end); 2040 if (vma && vma->vm_file) { 2041 *path = vma->vm_file->f_path; 2042 path_get(path); 2043 rc = 0; 2044 } 2045 up_read(&mm->mmap_sem); 2046 2047 out_mmput: 2048 mmput(mm); 2049 out: 2050 return rc; 2051 } 2052 2053 struct map_files_info { 2054 unsigned long start; 2055 unsigned long end; 2056 fmode_t mode; 2057 }; 2058 2059 /* 2060 * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the 2061 * symlinks may be used to bypass permissions on ancestor directories in the 2062 * path to the file in question. 2063 */ 2064 static const char * 2065 proc_map_files_get_link(struct dentry *dentry, 2066 struct inode *inode, 2067 struct delayed_call *done) 2068 { 2069 if (!capable(CAP_SYS_ADMIN)) 2070 return ERR_PTR(-EPERM); 2071 2072 return proc_pid_get_link(dentry, inode, done); 2073 } 2074 2075 /* 2076 * Identical to proc_pid_link_inode_operations except for get_link() 2077 */ 2078 static const struct inode_operations proc_map_files_link_inode_operations = { 2079 .readlink = proc_pid_readlink, 2080 .get_link = proc_map_files_get_link, 2081 .setattr = proc_setattr, 2082 }; 2083 2084 static int 2085 proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, 2086 struct task_struct *task, const void *ptr) 2087 { 2088 fmode_t mode = (fmode_t)(unsigned long)ptr; 2089 struct proc_inode *ei; 2090 struct inode *inode; 2091 2092 inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | 2093 ((mode & FMODE_READ ) ? S_IRUSR : 0) | 2094 ((mode & FMODE_WRITE) ? S_IWUSR : 0)); 2095 if (!inode) 2096 return -ENOENT; 2097 2098 ei = PROC_I(inode); 2099 ei->op.proc_get_link = map_files_get_link; 2100 2101 inode->i_op = &proc_map_files_link_inode_operations; 2102 inode->i_size = 64; 2103 2104 d_set_d_op(dentry, &tid_map_files_dentry_operations); 2105 d_add(dentry, inode); 2106 2107 return 0; 2108 } 2109 2110 static struct dentry *proc_map_files_lookup(struct inode *dir, 2111 struct dentry *dentry, unsigned int flags) 2112 { 2113 unsigned long vm_start, vm_end; 2114 struct vm_area_struct *vma; 2115 struct task_struct *task; 2116 int result; 2117 struct mm_struct *mm; 2118 2119 result = -ENOENT; 2120 task = get_proc_task(dir); 2121 if (!task) 2122 goto out; 2123 2124 result = -EACCES; 2125 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 2126 goto out_put_task; 2127 2128 result = -ENOENT; 2129 if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) 2130 goto out_put_task; 2131 2132 mm = get_task_mm(task); 2133 if (!mm) 2134 goto out_put_task; 2135 2136 down_read(&mm->mmap_sem); 2137 vma = find_exact_vma(mm, vm_start, vm_end); 2138 if (!vma) 2139 goto out_no_vma; 2140 2141 if (vma->vm_file) 2142 result = proc_map_files_instantiate(dir, dentry, task, 2143 (void *)(unsigned long)vma->vm_file->f_mode); 2144 2145 out_no_vma: 2146 up_read(&mm->mmap_sem); 2147 mmput(mm); 2148 out_put_task: 2149 put_task_struct(task); 2150 out: 2151 return ERR_PTR(result); 2152 } 2153 2154 static const struct inode_operations proc_map_files_inode_operations = { 2155 .lookup = proc_map_files_lookup, 2156 .permission = proc_fd_permission, 2157 .setattr = proc_setattr, 2158 }; 2159 2160 static int 2161 proc_map_files_readdir(struct file *file, struct dir_context *ctx) 2162 { 2163 struct vm_area_struct *vma; 2164 struct task_struct *task; 2165 struct mm_struct *mm; 2166 unsigned long nr_files, pos, i; 2167 struct flex_array *fa = NULL; 2168 struct map_files_info info; 2169 struct map_files_info *p; 2170 int ret; 2171 2172 ret = -ENOENT; 2173 task = get_proc_task(file_inode(file)); 2174 if (!task) 2175 goto out; 2176 2177 ret = -EACCES; 2178 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 2179 goto out_put_task; 2180 2181 ret = 0; 2182 if (!dir_emit_dots(file, ctx)) 2183 goto out_put_task; 2184 2185 mm = get_task_mm(task); 2186 if (!mm) 2187 goto out_put_task; 2188 down_read(&mm->mmap_sem); 2189 2190 nr_files = 0; 2191 2192 /* 2193 * We need two passes here: 2194 * 2195 * 1) Collect vmas of mapped files with mmap_sem taken 2196 * 2) Release mmap_sem and instantiate entries 2197 * 2198 * otherwise we get lockdep complained, since filldir() 2199 * routine might require mmap_sem taken in might_fault(). 2200 */ 2201 2202 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { 2203 if (vma->vm_file && ++pos > ctx->pos) 2204 nr_files++; 2205 } 2206 2207 if (nr_files) { 2208 fa = flex_array_alloc(sizeof(info), nr_files, 2209 GFP_KERNEL); 2210 if (!fa || flex_array_prealloc(fa, 0, nr_files, 2211 GFP_KERNEL)) { 2212 ret = -ENOMEM; 2213 if (fa) 2214 flex_array_free(fa); 2215 up_read(&mm->mmap_sem); 2216 mmput(mm); 2217 goto out_put_task; 2218 } 2219 for (i = 0, vma = mm->mmap, pos = 2; vma; 2220 vma = vma->vm_next) { 2221 if (!vma->vm_file) 2222 continue; 2223 if (++pos <= ctx->pos) 2224 continue; 2225 2226 info.start = vma->vm_start; 2227 info.end = vma->vm_end; 2228 info.mode = vma->vm_file->f_mode; 2229 if (flex_array_put(fa, i++, &info, GFP_KERNEL)) 2230 BUG(); 2231 } 2232 } 2233 up_read(&mm->mmap_sem); 2234 mmput(mm); 2235 2236 for (i = 0; i < nr_files; i++) { 2237 char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ 2238 unsigned int len; 2239 2240 p = flex_array_get(fa, i); 2241 len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end); 2242 if (!proc_fill_cache(file, ctx, 2243 buf, len, 2244 proc_map_files_instantiate, 2245 task, 2246 (void *)(unsigned long)p->mode)) 2247 break; 2248 ctx->pos++; 2249 } 2250 if (fa) 2251 flex_array_free(fa); 2252 2253 out_put_task: 2254 put_task_struct(task); 2255 out: 2256 return ret; 2257 } 2258 2259 static const struct file_operations proc_map_files_operations = { 2260 .read = generic_read_dir, 2261 .iterate_shared = proc_map_files_readdir, 2262 .llseek = generic_file_llseek, 2263 }; 2264 2265 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) 2266 struct timers_private { 2267 struct pid *pid; 2268 struct task_struct *task; 2269 struct sighand_struct *sighand; 2270 struct pid_namespace *ns; 2271 unsigned long flags; 2272 }; 2273 2274 static void *timers_start(struct seq_file *m, loff_t *pos) 2275 { 2276 struct timers_private *tp = m->private; 2277 2278 tp->task = get_pid_task(tp->pid, PIDTYPE_PID); 2279 if (!tp->task) 2280 return ERR_PTR(-ESRCH); 2281 2282 tp->sighand = lock_task_sighand(tp->task, &tp->flags); 2283 if (!tp->sighand) 2284 return ERR_PTR(-ESRCH); 2285 2286 return seq_list_start(&tp->task->signal->posix_timers, *pos); 2287 } 2288 2289 static void *timers_next(struct seq_file *m, void *v, loff_t *pos) 2290 { 2291 struct timers_private *tp = m->private; 2292 return seq_list_next(v, &tp->task->signal->posix_timers, pos); 2293 } 2294 2295 static void timers_stop(struct seq_file *m, void *v) 2296 { 2297 struct timers_private *tp = m->private; 2298 2299 if (tp->sighand) { 2300 unlock_task_sighand(tp->task, &tp->flags); 2301 tp->sighand = NULL; 2302 } 2303 2304 if (tp->task) { 2305 put_task_struct(tp->task); 2306 tp->task = NULL; 2307 } 2308 } 2309 2310 static int show_timer(struct seq_file *m, void *v) 2311 { 2312 struct k_itimer *timer; 2313 struct timers_private *tp = m->private; 2314 int notify; 2315 static const char * const nstr[] = { 2316 [SIGEV_SIGNAL] = "signal", 2317 [SIGEV_NONE] = "none", 2318 [SIGEV_THREAD] = "thread", 2319 }; 2320 2321 timer = list_entry((struct list_head *)v, struct k_itimer, list); 2322 notify = timer->it_sigev_notify; 2323 2324 seq_printf(m, "ID: %d\n", timer->it_id); 2325 seq_printf(m, "signal: %d/%px\n", 2326 timer->sigq->info.si_signo, 2327 timer->sigq->info.si_value.sival_ptr); 2328 seq_printf(m, "notify: %s/%s.%d\n", 2329 nstr[notify & ~SIGEV_THREAD_ID], 2330 (notify & SIGEV_THREAD_ID) ? "tid" : "pid", 2331 pid_nr_ns(timer->it_pid, tp->ns)); 2332 seq_printf(m, "ClockID: %d\n", timer->it_clock); 2333 2334 return 0; 2335 } 2336 2337 static const struct seq_operations proc_timers_seq_ops = { 2338 .start = timers_start, 2339 .next = timers_next, 2340 .stop = timers_stop, 2341 .show = show_timer, 2342 }; 2343 2344 static int proc_timers_open(struct inode *inode, struct file *file) 2345 { 2346 struct timers_private *tp; 2347 2348 tp = __seq_open_private(file, &proc_timers_seq_ops, 2349 sizeof(struct timers_private)); 2350 if (!tp) 2351 return -ENOMEM; 2352 2353 tp->pid = proc_pid(inode); 2354 tp->ns = inode->i_sb->s_fs_info; 2355 return 0; 2356 } 2357 2358 static const struct file_operations proc_timers_operations = { 2359 .open = proc_timers_open, 2360 .read = seq_read, 2361 .llseek = seq_lseek, 2362 .release = seq_release_private, 2363 }; 2364 #endif 2365 2366 static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, 2367 size_t count, loff_t *offset) 2368 { 2369 struct inode *inode = file_inode(file); 2370 struct task_struct *p; 2371 u64 slack_ns; 2372 int err; 2373 2374 err = kstrtoull_from_user(buf, count, 10, &slack_ns); 2375 if (err < 0) 2376 return err; 2377 2378 p = get_proc_task(inode); 2379 if (!p) 2380 return -ESRCH; 2381 2382 if (p != current) { 2383 if (!capable(CAP_SYS_NICE)) { 2384 count = -EPERM; 2385 goto out; 2386 } 2387 2388 err = security_task_setscheduler(p); 2389 if (err) { 2390 count = err; 2391 goto out; 2392 } 2393 } 2394 2395 task_lock(p); 2396 if (slack_ns == 0) 2397 p->timer_slack_ns = p->default_timer_slack_ns; 2398 else 2399 p->timer_slack_ns = slack_ns; 2400 task_unlock(p); 2401 2402 out: 2403 put_task_struct(p); 2404 2405 return count; 2406 } 2407 2408 static int timerslack_ns_show(struct seq_file *m, void *v) 2409 { 2410 struct inode *inode = m->private; 2411 struct task_struct *p; 2412 int err = 0; 2413 2414 p = get_proc_task(inode); 2415 if (!p) 2416 return -ESRCH; 2417 2418 if (p != current) { 2419 2420 if (!capable(CAP_SYS_NICE)) { 2421 err = -EPERM; 2422 goto out; 2423 } 2424 err = security_task_getscheduler(p); 2425 if (err) 2426 goto out; 2427 } 2428 2429 task_lock(p); 2430 seq_printf(m, "%llu\n", p->timer_slack_ns); 2431 task_unlock(p); 2432 2433 out: 2434 put_task_struct(p); 2435 2436 return err; 2437 } 2438 2439 static int timerslack_ns_open(struct inode *inode, struct file *filp) 2440 { 2441 return single_open(filp, timerslack_ns_show, inode); 2442 } 2443 2444 static const struct file_operations proc_pid_set_timerslack_ns_operations = { 2445 .open = timerslack_ns_open, 2446 .read = seq_read, 2447 .write = timerslack_ns_write, 2448 .llseek = seq_lseek, 2449 .release = single_release, 2450 }; 2451 2452 static int proc_pident_instantiate(struct inode *dir, 2453 struct dentry *dentry, struct task_struct *task, const void *ptr) 2454 { 2455 const struct pid_entry *p = ptr; 2456 struct inode *inode; 2457 struct proc_inode *ei; 2458 2459 inode = proc_pid_make_inode(dir->i_sb, task, p->mode); 2460 if (!inode) 2461 goto out; 2462 2463 ei = PROC_I(inode); 2464 if (S_ISDIR(inode->i_mode)) 2465 set_nlink(inode, 2); /* Use getattr to fix if necessary */ 2466 if (p->iop) 2467 inode->i_op = p->iop; 2468 if (p->fop) 2469 inode->i_fop = p->fop; 2470 ei->op = p->op; 2471 d_set_d_op(dentry, &pid_dentry_operations); 2472 d_add(dentry, inode); 2473 /* Close the race of the process dying before we return the dentry */ 2474 if (pid_revalidate(dentry, 0)) 2475 return 0; 2476 out: 2477 return -ENOENT; 2478 } 2479 2480 static struct dentry *proc_pident_lookup(struct inode *dir, 2481 struct dentry *dentry, 2482 const struct pid_entry *ents, 2483 unsigned int nents) 2484 { 2485 int error; 2486 struct task_struct *task = get_proc_task(dir); 2487 const struct pid_entry *p, *last; 2488 2489 error = -ENOENT; 2490 2491 if (!task) 2492 goto out_no_task; 2493 2494 /* 2495 * Yes, it does not scale. And it should not. Don't add 2496 * new entries into /proc/<tgid>/ without very good reasons. 2497 */ 2498 last = &ents[nents]; 2499 for (p = ents; p < last; p++) { 2500 if (p->len != dentry->d_name.len) 2501 continue; 2502 if (!memcmp(dentry->d_name.name, p->name, p->len)) 2503 break; 2504 } 2505 if (p >= last) 2506 goto out; 2507 2508 error = proc_pident_instantiate(dir, dentry, task, p); 2509 out: 2510 put_task_struct(task); 2511 out_no_task: 2512 return ERR_PTR(error); 2513 } 2514 2515 static int proc_pident_readdir(struct file *file, struct dir_context *ctx, 2516 const struct pid_entry *ents, unsigned int nents) 2517 { 2518 struct task_struct *task = get_proc_task(file_inode(file)); 2519 const struct pid_entry *p; 2520 2521 if (!task) 2522 return -ENOENT; 2523 2524 if (!dir_emit_dots(file, ctx)) 2525 goto out; 2526 2527 if (ctx->pos >= nents + 2) 2528 goto out; 2529 2530 for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { 2531 if (!proc_fill_cache(file, ctx, p->name, p->len, 2532 proc_pident_instantiate, task, p)) 2533 break; 2534 ctx->pos++; 2535 } 2536 out: 2537 put_task_struct(task); 2538 return 0; 2539 } 2540 2541 #ifdef CONFIG_SECURITY 2542 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 2543 size_t count, loff_t *ppos) 2544 { 2545 struct inode * inode = file_inode(file); 2546 char *p = NULL; 2547 ssize_t length; 2548 struct task_struct *task = get_proc_task(inode); 2549 2550 if (!task) 2551 return -ESRCH; 2552 2553 length = security_getprocattr(task, 2554 (char*)file->f_path.dentry->d_name.name, 2555 &p); 2556 put_task_struct(task); 2557 if (length > 0) 2558 length = simple_read_from_buffer(buf, count, ppos, p, length); 2559 kfree(p); 2560 return length; 2561 } 2562 2563 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 2564 size_t count, loff_t *ppos) 2565 { 2566 struct inode * inode = file_inode(file); 2567 void *page; 2568 ssize_t length; 2569 struct task_struct *task = get_proc_task(inode); 2570 2571 length = -ESRCH; 2572 if (!task) 2573 goto out_no_task; 2574 2575 /* A task may only write its own attributes. */ 2576 length = -EACCES; 2577 if (current != task) 2578 goto out; 2579 2580 if (count > PAGE_SIZE) 2581 count = PAGE_SIZE; 2582 2583 /* No partial writes. */ 2584 length = -EINVAL; 2585 if (*ppos != 0) 2586 goto out; 2587 2588 page = memdup_user(buf, count); 2589 if (IS_ERR(page)) { 2590 length = PTR_ERR(page); 2591 goto out; 2592 } 2593 2594 /* Guard against adverse ptrace interaction */ 2595 length = mutex_lock_interruptible(¤t->signal->cred_guard_mutex); 2596 if (length < 0) 2597 goto out_free; 2598 2599 length = security_setprocattr(file->f_path.dentry->d_name.name, 2600 page, count); 2601 mutex_unlock(¤t->signal->cred_guard_mutex); 2602 out_free: 2603 kfree(page); 2604 out: 2605 put_task_struct(task); 2606 out_no_task: 2607 return length; 2608 } 2609 2610 static const struct file_operations proc_pid_attr_operations = { 2611 .read = proc_pid_attr_read, 2612 .write = proc_pid_attr_write, 2613 .llseek = generic_file_llseek, 2614 }; 2615 2616 static const struct pid_entry attr_dir_stuff[] = { 2617 REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2618 REG("prev", S_IRUGO, proc_pid_attr_operations), 2619 REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2620 REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2621 REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2622 REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2623 }; 2624 2625 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) 2626 { 2627 return proc_pident_readdir(file, ctx, 2628 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2629 } 2630 2631 static const struct file_operations proc_attr_dir_operations = { 2632 .read = generic_read_dir, 2633 .iterate_shared = proc_attr_dir_readdir, 2634 .llseek = generic_file_llseek, 2635 }; 2636 2637 static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2638 struct dentry *dentry, unsigned int flags) 2639 { 2640 return proc_pident_lookup(dir, dentry, 2641 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2642 } 2643 2644 static const struct inode_operations proc_attr_dir_inode_operations = { 2645 .lookup = proc_attr_dir_lookup, 2646 .getattr = pid_getattr, 2647 .setattr = proc_setattr, 2648 }; 2649 2650 #endif 2651 2652 #ifdef CONFIG_ELF_CORE 2653 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2654 size_t count, loff_t *ppos) 2655 { 2656 struct task_struct *task = get_proc_task(file_inode(file)); 2657 struct mm_struct *mm; 2658 char buffer[PROC_NUMBUF]; 2659 size_t len; 2660 int ret; 2661 2662 if (!task) 2663 return -ESRCH; 2664 2665 ret = 0; 2666 mm = get_task_mm(task); 2667 if (mm) { 2668 len = snprintf(buffer, sizeof(buffer), "%08lx\n", 2669 ((mm->flags & MMF_DUMP_FILTER_MASK) >> 2670 MMF_DUMP_FILTER_SHIFT)); 2671 mmput(mm); 2672 ret = simple_read_from_buffer(buf, count, ppos, buffer, len); 2673 } 2674 2675 put_task_struct(task); 2676 2677 return ret; 2678 } 2679 2680 static ssize_t proc_coredump_filter_write(struct file *file, 2681 const char __user *buf, 2682 size_t count, 2683 loff_t *ppos) 2684 { 2685 struct task_struct *task; 2686 struct mm_struct *mm; 2687 unsigned int val; 2688 int ret; 2689 int i; 2690 unsigned long mask; 2691 2692 ret = kstrtouint_from_user(buf, count, 0, &val); 2693 if (ret < 0) 2694 return ret; 2695 2696 ret = -ESRCH; 2697 task = get_proc_task(file_inode(file)); 2698 if (!task) 2699 goto out_no_task; 2700 2701 mm = get_task_mm(task); 2702 if (!mm) 2703 goto out_no_mm; 2704 ret = 0; 2705 2706 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { 2707 if (val & mask) 2708 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2709 else 2710 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2711 } 2712 2713 mmput(mm); 2714 out_no_mm: 2715 put_task_struct(task); 2716 out_no_task: 2717 if (ret < 0) 2718 return ret; 2719 return count; 2720 } 2721 2722 static const struct file_operations proc_coredump_filter_operations = { 2723 .read = proc_coredump_filter_read, 2724 .write = proc_coredump_filter_write, 2725 .llseek = generic_file_llseek, 2726 }; 2727 #endif 2728 2729 #ifdef CONFIG_TASK_IO_ACCOUNTING 2730 static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole) 2731 { 2732 struct task_io_accounting acct = task->ioac; 2733 unsigned long flags; 2734 int result; 2735 2736 result = mutex_lock_killable(&task->signal->cred_guard_mutex); 2737 if (result) 2738 return result; 2739 2740 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { 2741 result = -EACCES; 2742 goto out_unlock; 2743 } 2744 2745 if (whole && lock_task_sighand(task, &flags)) { 2746 struct task_struct *t = task; 2747 2748 task_io_accounting_add(&acct, &task->signal->ioac); 2749 while_each_thread(task, t) 2750 task_io_accounting_add(&acct, &t->ioac); 2751 2752 unlock_task_sighand(task, &flags); 2753 } 2754 seq_printf(m, 2755 "rchar: %llu\n" 2756 "wchar: %llu\n" 2757 "syscr: %llu\n" 2758 "syscw: %llu\n" 2759 "read_bytes: %llu\n" 2760 "write_bytes: %llu\n" 2761 "cancelled_write_bytes: %llu\n", 2762 (unsigned long long)acct.rchar, 2763 (unsigned long long)acct.wchar, 2764 (unsigned long long)acct.syscr, 2765 (unsigned long long)acct.syscw, 2766 (unsigned long long)acct.read_bytes, 2767 (unsigned long long)acct.write_bytes, 2768 (unsigned long long)acct.cancelled_write_bytes); 2769 result = 0; 2770 2771 out_unlock: 2772 mutex_unlock(&task->signal->cred_guard_mutex); 2773 return result; 2774 } 2775 2776 static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns, 2777 struct pid *pid, struct task_struct *task) 2778 { 2779 return do_io_accounting(task, m, 0); 2780 } 2781 2782 static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns, 2783 struct pid *pid, struct task_struct *task) 2784 { 2785 return do_io_accounting(task, m, 1); 2786 } 2787 #endif /* CONFIG_TASK_IO_ACCOUNTING */ 2788 2789 #ifdef CONFIG_USER_NS 2790 static int proc_id_map_open(struct inode *inode, struct file *file, 2791 const struct seq_operations *seq_ops) 2792 { 2793 struct user_namespace *ns = NULL; 2794 struct task_struct *task; 2795 struct seq_file *seq; 2796 int ret = -EINVAL; 2797 2798 task = get_proc_task(inode); 2799 if (task) { 2800 rcu_read_lock(); 2801 ns = get_user_ns(task_cred_xxx(task, user_ns)); 2802 rcu_read_unlock(); 2803 put_task_struct(task); 2804 } 2805 if (!ns) 2806 goto err; 2807 2808 ret = seq_open(file, seq_ops); 2809 if (ret) 2810 goto err_put_ns; 2811 2812 seq = file->private_data; 2813 seq->private = ns; 2814 2815 return 0; 2816 err_put_ns: 2817 put_user_ns(ns); 2818 err: 2819 return ret; 2820 } 2821 2822 static int proc_id_map_release(struct inode *inode, struct file *file) 2823 { 2824 struct seq_file *seq = file->private_data; 2825 struct user_namespace *ns = seq->private; 2826 put_user_ns(ns); 2827 return seq_release(inode, file); 2828 } 2829 2830 static int proc_uid_map_open(struct inode *inode, struct file *file) 2831 { 2832 return proc_id_map_open(inode, file, &proc_uid_seq_operations); 2833 } 2834 2835 static int proc_gid_map_open(struct inode *inode, struct file *file) 2836 { 2837 return proc_id_map_open(inode, file, &proc_gid_seq_operations); 2838 } 2839 2840 static int proc_projid_map_open(struct inode *inode, struct file *file) 2841 { 2842 return proc_id_map_open(inode, file, &proc_projid_seq_operations); 2843 } 2844 2845 static const struct file_operations proc_uid_map_operations = { 2846 .open = proc_uid_map_open, 2847 .write = proc_uid_map_write, 2848 .read = seq_read, 2849 .llseek = seq_lseek, 2850 .release = proc_id_map_release, 2851 }; 2852 2853 static const struct file_operations proc_gid_map_operations = { 2854 .open = proc_gid_map_open, 2855 .write = proc_gid_map_write, 2856 .read = seq_read, 2857 .llseek = seq_lseek, 2858 .release = proc_id_map_release, 2859 }; 2860 2861 static const struct file_operations proc_projid_map_operations = { 2862 .open = proc_projid_map_open, 2863 .write = proc_projid_map_write, 2864 .read = seq_read, 2865 .llseek = seq_lseek, 2866 .release = proc_id_map_release, 2867 }; 2868 2869 static int proc_setgroups_open(struct inode *inode, struct file *file) 2870 { 2871 struct user_namespace *ns = NULL; 2872 struct task_struct *task; 2873 int ret; 2874 2875 ret = -ESRCH; 2876 task = get_proc_task(inode); 2877 if (task) { 2878 rcu_read_lock(); 2879 ns = get_user_ns(task_cred_xxx(task, user_ns)); 2880 rcu_read_unlock(); 2881 put_task_struct(task); 2882 } 2883 if (!ns) 2884 goto err; 2885 2886 if (file->f_mode & FMODE_WRITE) { 2887 ret = -EACCES; 2888 if (!ns_capable(ns, CAP_SYS_ADMIN)) 2889 goto err_put_ns; 2890 } 2891 2892 ret = single_open(file, &proc_setgroups_show, ns); 2893 if (ret) 2894 goto err_put_ns; 2895 2896 return 0; 2897 err_put_ns: 2898 put_user_ns(ns); 2899 err: 2900 return ret; 2901 } 2902 2903 static int proc_setgroups_release(struct inode *inode, struct file *file) 2904 { 2905 struct seq_file *seq = file->private_data; 2906 struct user_namespace *ns = seq->private; 2907 int ret = single_release(inode, file); 2908 put_user_ns(ns); 2909 return ret; 2910 } 2911 2912 static const struct file_operations proc_setgroups_operations = { 2913 .open = proc_setgroups_open, 2914 .write = proc_setgroups_write, 2915 .read = seq_read, 2916 .llseek = seq_lseek, 2917 .release = proc_setgroups_release, 2918 }; 2919 #endif /* CONFIG_USER_NS */ 2920 2921 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2922 struct pid *pid, struct task_struct *task) 2923 { 2924 int err = lock_trace(task); 2925 if (!err) { 2926 seq_printf(m, "%08x\n", task->personality); 2927 unlock_trace(task); 2928 } 2929 return err; 2930 } 2931 2932 #ifdef CONFIG_LIVEPATCH 2933 static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns, 2934 struct pid *pid, struct task_struct *task) 2935 { 2936 seq_printf(m, "%d\n", task->patch_state); 2937 return 0; 2938 } 2939 #endif /* CONFIG_LIVEPATCH */ 2940 2941 /* 2942 * Thread groups 2943 */ 2944 static const struct file_operations proc_task_operations; 2945 static const struct inode_operations proc_task_inode_operations; 2946 2947 static const struct pid_entry tgid_base_stuff[] = { 2948 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2949 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2950 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), 2951 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2952 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 2953 #ifdef CONFIG_NET 2954 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2955 #endif 2956 REG("environ", S_IRUSR, proc_environ_operations), 2957 REG("auxv", S_IRUSR, proc_auxv_operations), 2958 ONE("status", S_IRUGO, proc_pid_status), 2959 ONE("personality", S_IRUSR, proc_pid_personality), 2960 ONE("limits", S_IRUGO, proc_pid_limits), 2961 #ifdef CONFIG_SCHED_DEBUG 2962 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2963 #endif 2964 #ifdef CONFIG_SCHED_AUTOGROUP 2965 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), 2966 #endif 2967 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2968 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2969 ONE("syscall", S_IRUSR, proc_pid_syscall), 2970 #endif 2971 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), 2972 ONE("stat", S_IRUGO, proc_tgid_stat), 2973 ONE("statm", S_IRUGO, proc_pid_statm), 2974 REG("maps", S_IRUGO, proc_pid_maps_operations), 2975 #ifdef CONFIG_NUMA 2976 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), 2977 #endif 2978 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2979 LNK("cwd", proc_cwd_link), 2980 LNK("root", proc_root_link), 2981 LNK("exe", proc_exe_link), 2982 REG("mounts", S_IRUGO, proc_mounts_operations), 2983 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 2984 REG("mountstats", S_IRUSR, proc_mountstats_operations), 2985 #ifdef CONFIG_PROC_PAGE_MONITOR 2986 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2987 REG("smaps", S_IRUGO, proc_pid_smaps_operations), 2988 REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), 2989 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2990 #endif 2991 #ifdef CONFIG_SECURITY 2992 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2993 #endif 2994 #ifdef CONFIG_KALLSYMS 2995 ONE("wchan", S_IRUGO, proc_pid_wchan), 2996 #endif 2997 #ifdef CONFIG_STACKTRACE 2998 ONE("stack", S_IRUSR, proc_pid_stack), 2999 #endif 3000 #ifdef CONFIG_SCHED_INFO 3001 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 3002 #endif 3003 #ifdef CONFIG_LATENCYTOP 3004 REG("latency", S_IRUGO, proc_lstats_operations), 3005 #endif 3006 #ifdef CONFIG_PROC_PID_CPUSET 3007 ONE("cpuset", S_IRUGO, proc_cpuset_show), 3008 #endif 3009 #ifdef CONFIG_CGROUPS 3010 ONE("cgroup", S_IRUGO, proc_cgroup_show), 3011 #endif 3012 ONE("oom_score", S_IRUGO, proc_oom_score), 3013 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 3014 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3015 #ifdef CONFIG_AUDITSYSCALL 3016 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3017 REG("sessionid", S_IRUGO, proc_sessionid_operations), 3018 #endif 3019 #ifdef CONFIG_FAULT_INJECTION 3020 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3021 REG("fail-nth", 0644, proc_fail_nth_operations), 3022 #endif 3023 #ifdef CONFIG_ELF_CORE 3024 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 3025 #endif 3026 #ifdef CONFIG_TASK_IO_ACCOUNTING 3027 ONE("io", S_IRUSR, proc_tgid_io_accounting), 3028 #endif 3029 #ifdef CONFIG_USER_NS 3030 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 3031 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 3032 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 3033 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), 3034 #endif 3035 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) 3036 REG("timers", S_IRUGO, proc_timers_operations), 3037 #endif 3038 REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), 3039 #ifdef CONFIG_LIVEPATCH 3040 ONE("patch_state", S_IRUSR, proc_pid_patch_state), 3041 #endif 3042 }; 3043 3044 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) 3045 { 3046 return proc_pident_readdir(file, ctx, 3047 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3048 } 3049 3050 static const struct file_operations proc_tgid_base_operations = { 3051 .read = generic_read_dir, 3052 .iterate_shared = proc_tgid_base_readdir, 3053 .llseek = generic_file_llseek, 3054 }; 3055 3056 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 3057 { 3058 return proc_pident_lookup(dir, dentry, 3059 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3060 } 3061 3062 static const struct inode_operations proc_tgid_base_inode_operations = { 3063 .lookup = proc_tgid_base_lookup, 3064 .getattr = pid_getattr, 3065 .setattr = proc_setattr, 3066 .permission = proc_pid_permission, 3067 }; 3068 3069 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 3070 { 3071 struct dentry *dentry, *leader, *dir; 3072 char buf[10 + 1]; 3073 struct qstr name; 3074 3075 name.name = buf; 3076 name.len = snprintf(buf, sizeof(buf), "%u", pid); 3077 /* no ->d_hash() rejects on procfs */ 3078 dentry = d_hash_and_lookup(mnt->mnt_root, &name); 3079 if (dentry) { 3080 d_invalidate(dentry); 3081 dput(dentry); 3082 } 3083 3084 if (pid == tgid) 3085 return; 3086 3087 name.name = buf; 3088 name.len = snprintf(buf, sizeof(buf), "%u", tgid); 3089 leader = d_hash_and_lookup(mnt->mnt_root, &name); 3090 if (!leader) 3091 goto out; 3092 3093 name.name = "task"; 3094 name.len = strlen(name.name); 3095 dir = d_hash_and_lookup(leader, &name); 3096 if (!dir) 3097 goto out_put_leader; 3098 3099 name.name = buf; 3100 name.len = snprintf(buf, sizeof(buf), "%u", pid); 3101 dentry = d_hash_and_lookup(dir, &name); 3102 if (dentry) { 3103 d_invalidate(dentry); 3104 dput(dentry); 3105 } 3106 3107 dput(dir); 3108 out_put_leader: 3109 dput(leader); 3110 out: 3111 return; 3112 } 3113 3114 /** 3115 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 3116 * @task: task that should be flushed. 3117 * 3118 * When flushing dentries from proc, one needs to flush them from global 3119 * proc (proc_mnt) and from all the namespaces' procs this task was seen 3120 * in. This call is supposed to do all of this job. 3121 * 3122 * Looks in the dcache for 3123 * /proc/@pid 3124 * /proc/@tgid/task/@pid 3125 * if either directory is present flushes it and all of it'ts children 3126 * from the dcache. 3127 * 3128 * It is safe and reasonable to cache /proc entries for a task until 3129 * that task exits. After that they just clog up the dcache with 3130 * useless entries, possibly causing useful dcache entries to be 3131 * flushed instead. This routine is proved to flush those useless 3132 * dcache entries at process exit time. 3133 * 3134 * NOTE: This routine is just an optimization so it does not guarantee 3135 * that no dcache entries will exist at process exit time it 3136 * just makes it very unlikely that any will persist. 3137 */ 3138 3139 void proc_flush_task(struct task_struct *task) 3140 { 3141 int i; 3142 struct pid *pid, *tgid; 3143 struct upid *upid; 3144 3145 pid = task_pid(task); 3146 tgid = task_tgid(task); 3147 3148 for (i = 0; i <= pid->level; i++) { 3149 upid = &pid->numbers[i]; 3150 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 3151 tgid->numbers[i].nr); 3152 } 3153 } 3154 3155 static int proc_pid_instantiate(struct inode *dir, 3156 struct dentry * dentry, 3157 struct task_struct *task, const void *ptr) 3158 { 3159 struct inode *inode; 3160 3161 inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); 3162 if (!inode) 3163 goto out; 3164 3165 inode->i_op = &proc_tgid_base_inode_operations; 3166 inode->i_fop = &proc_tgid_base_operations; 3167 inode->i_flags|=S_IMMUTABLE; 3168 3169 set_nlink(inode, nlink_tgid); 3170 3171 d_set_d_op(dentry, &pid_dentry_operations); 3172 3173 d_add(dentry, inode); 3174 /* Close the race of the process dying before we return the dentry */ 3175 if (pid_revalidate(dentry, 0)) 3176 return 0; 3177 out: 3178 return -ENOENT; 3179 } 3180 3181 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 3182 { 3183 int result = -ENOENT; 3184 struct task_struct *task; 3185 unsigned tgid; 3186 struct pid_namespace *ns; 3187 3188 tgid = name_to_int(&dentry->d_name); 3189 if (tgid == ~0U) 3190 goto out; 3191 3192 ns = dentry->d_sb->s_fs_info; 3193 rcu_read_lock(); 3194 task = find_task_by_pid_ns(tgid, ns); 3195 if (task) 3196 get_task_struct(task); 3197 rcu_read_unlock(); 3198 if (!task) 3199 goto out; 3200 3201 result = proc_pid_instantiate(dir, dentry, task, NULL); 3202 put_task_struct(task); 3203 out: 3204 return ERR_PTR(result); 3205 } 3206 3207 /* 3208 * Find the first task with tgid >= tgid 3209 * 3210 */ 3211 struct tgid_iter { 3212 unsigned int tgid; 3213 struct task_struct *task; 3214 }; 3215 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) 3216 { 3217 struct pid *pid; 3218 3219 if (iter.task) 3220 put_task_struct(iter.task); 3221 rcu_read_lock(); 3222 retry: 3223 iter.task = NULL; 3224 pid = find_ge_pid(iter.tgid, ns); 3225 if (pid) { 3226 iter.tgid = pid_nr_ns(pid, ns); 3227 iter.task = pid_task(pid, PIDTYPE_PID); 3228 /* What we to know is if the pid we have find is the 3229 * pid of a thread_group_leader. Testing for task 3230 * being a thread_group_leader is the obvious thing 3231 * todo but there is a window when it fails, due to 3232 * the pid transfer logic in de_thread. 3233 * 3234 * So we perform the straight forward test of seeing 3235 * if the pid we have found is the pid of a thread 3236 * group leader, and don't worry if the task we have 3237 * found doesn't happen to be a thread group leader. 3238 * As we don't care in the case of readdir. 3239 */ 3240 if (!iter.task || !has_group_leader_pid(iter.task)) { 3241 iter.tgid += 1; 3242 goto retry; 3243 } 3244 get_task_struct(iter.task); 3245 } 3246 rcu_read_unlock(); 3247 return iter; 3248 } 3249 3250 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) 3251 3252 /* for the /proc/ directory itself, after non-process stuff has been done */ 3253 int proc_pid_readdir(struct file *file, struct dir_context *ctx) 3254 { 3255 struct tgid_iter iter; 3256 struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info; 3257 loff_t pos = ctx->pos; 3258 3259 if (pos >= PID_MAX_LIMIT + TGID_OFFSET) 3260 return 0; 3261 3262 if (pos == TGID_OFFSET - 2) { 3263 struct inode *inode = d_inode(ns->proc_self); 3264 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) 3265 return 0; 3266 ctx->pos = pos = pos + 1; 3267 } 3268 if (pos == TGID_OFFSET - 1) { 3269 struct inode *inode = d_inode(ns->proc_thread_self); 3270 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) 3271 return 0; 3272 ctx->pos = pos = pos + 1; 3273 } 3274 iter.tgid = pos - TGID_OFFSET; 3275 iter.task = NULL; 3276 for (iter = next_tgid(ns, iter); 3277 iter.task; 3278 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3279 char name[10 + 1]; 3280 int len; 3281 3282 cond_resched(); 3283 if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) 3284 continue; 3285 3286 len = snprintf(name, sizeof(name), "%u", iter.tgid); 3287 ctx->pos = iter.tgid + TGID_OFFSET; 3288 if (!proc_fill_cache(file, ctx, name, len, 3289 proc_pid_instantiate, iter.task, NULL)) { 3290 put_task_struct(iter.task); 3291 return 0; 3292 } 3293 } 3294 ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; 3295 return 0; 3296 } 3297 3298 /* 3299 * proc_tid_comm_permission is a special permission function exclusively 3300 * used for the node /proc/<pid>/task/<tid>/comm. 3301 * It bypasses generic permission checks in the case where a task of the same 3302 * task group attempts to access the node. 3303 * The rationale behind this is that glibc and bionic access this node for 3304 * cross thread naming (pthread_set/getname_np(!self)). However, if 3305 * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0, 3306 * which locks out the cross thread naming implementation. 3307 * This function makes sure that the node is always accessible for members of 3308 * same thread group. 3309 */ 3310 static int proc_tid_comm_permission(struct inode *inode, int mask) 3311 { 3312 bool is_same_tgroup; 3313 struct task_struct *task; 3314 3315 task = get_proc_task(inode); 3316 if (!task) 3317 return -ESRCH; 3318 is_same_tgroup = same_thread_group(current, task); 3319 put_task_struct(task); 3320 3321 if (likely(is_same_tgroup && !(mask & MAY_EXEC))) { 3322 /* This file (/proc/<pid>/task/<tid>/comm) can always be 3323 * read or written by the members of the corresponding 3324 * thread group. 3325 */ 3326 return 0; 3327 } 3328 3329 return generic_permission(inode, mask); 3330 } 3331 3332 static const struct inode_operations proc_tid_comm_inode_operations = { 3333 .permission = proc_tid_comm_permission, 3334 }; 3335 3336 /* 3337 * Tasks 3338 */ 3339 static const struct pid_entry tid_base_stuff[] = { 3340 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3341 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3342 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3343 #ifdef CONFIG_NET 3344 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 3345 #endif 3346 REG("environ", S_IRUSR, proc_environ_operations), 3347 REG("auxv", S_IRUSR, proc_auxv_operations), 3348 ONE("status", S_IRUGO, proc_pid_status), 3349 ONE("personality", S_IRUSR, proc_pid_personality), 3350 ONE("limits", S_IRUGO, proc_pid_limits), 3351 #ifdef CONFIG_SCHED_DEBUG 3352 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3353 #endif 3354 NOD("comm", S_IFREG|S_IRUGO|S_IWUSR, 3355 &proc_tid_comm_inode_operations, 3356 &proc_pid_set_comm_operations, {}), 3357 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3358 ONE("syscall", S_IRUSR, proc_pid_syscall), 3359 #endif 3360 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), 3361 ONE("stat", S_IRUGO, proc_tid_stat), 3362 ONE("statm", S_IRUGO, proc_pid_statm), 3363 REG("maps", S_IRUGO, proc_tid_maps_operations), 3364 #ifdef CONFIG_PROC_CHILDREN 3365 REG("children", S_IRUGO, proc_tid_children_operations), 3366 #endif 3367 #ifdef CONFIG_NUMA 3368 REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), 3369 #endif 3370 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3371 LNK("cwd", proc_cwd_link), 3372 LNK("root", proc_root_link), 3373 LNK("exe", proc_exe_link), 3374 REG("mounts", S_IRUGO, proc_mounts_operations), 3375 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3376 #ifdef CONFIG_PROC_PAGE_MONITOR 3377 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3378 REG("smaps", S_IRUGO, proc_tid_smaps_operations), 3379 REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), 3380 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3381 #endif 3382 #ifdef CONFIG_SECURITY 3383 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3384 #endif 3385 #ifdef CONFIG_KALLSYMS 3386 ONE("wchan", S_IRUGO, proc_pid_wchan), 3387 #endif 3388 #ifdef CONFIG_STACKTRACE 3389 ONE("stack", S_IRUSR, proc_pid_stack), 3390 #endif 3391 #ifdef CONFIG_SCHED_INFO 3392 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 3393 #endif 3394 #ifdef CONFIG_LATENCYTOP 3395 REG("latency", S_IRUGO, proc_lstats_operations), 3396 #endif 3397 #ifdef CONFIG_PROC_PID_CPUSET 3398 ONE("cpuset", S_IRUGO, proc_cpuset_show), 3399 #endif 3400 #ifdef CONFIG_CGROUPS 3401 ONE("cgroup", S_IRUGO, proc_cgroup_show), 3402 #endif 3403 ONE("oom_score", S_IRUGO, proc_oom_score), 3404 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 3405 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3406 #ifdef CONFIG_AUDITSYSCALL 3407 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3408 REG("sessionid", S_IRUGO, proc_sessionid_operations), 3409 #endif 3410 #ifdef CONFIG_FAULT_INJECTION 3411 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3412 REG("fail-nth", 0644, proc_fail_nth_operations), 3413 #endif 3414 #ifdef CONFIG_TASK_IO_ACCOUNTING 3415 ONE("io", S_IRUSR, proc_tid_io_accounting), 3416 #endif 3417 #ifdef CONFIG_USER_NS 3418 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 3419 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 3420 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 3421 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), 3422 #endif 3423 #ifdef CONFIG_LIVEPATCH 3424 ONE("patch_state", S_IRUSR, proc_pid_patch_state), 3425 #endif 3426 }; 3427 3428 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) 3429 { 3430 return proc_pident_readdir(file, ctx, 3431 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3432 } 3433 3434 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 3435 { 3436 return proc_pident_lookup(dir, dentry, 3437 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3438 } 3439 3440 static const struct file_operations proc_tid_base_operations = { 3441 .read = generic_read_dir, 3442 .iterate_shared = proc_tid_base_readdir, 3443 .llseek = generic_file_llseek, 3444 }; 3445 3446 static const struct inode_operations proc_tid_base_inode_operations = { 3447 .lookup = proc_tid_base_lookup, 3448 .getattr = pid_getattr, 3449 .setattr = proc_setattr, 3450 }; 3451 3452 static int proc_task_instantiate(struct inode *dir, 3453 struct dentry *dentry, struct task_struct *task, const void *ptr) 3454 { 3455 struct inode *inode; 3456 inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); 3457 3458 if (!inode) 3459 goto out; 3460 inode->i_op = &proc_tid_base_inode_operations; 3461 inode->i_fop = &proc_tid_base_operations; 3462 inode->i_flags|=S_IMMUTABLE; 3463 3464 set_nlink(inode, nlink_tid); 3465 3466 d_set_d_op(dentry, &pid_dentry_operations); 3467 3468 d_add(dentry, inode); 3469 /* Close the race of the process dying before we return the dentry */ 3470 if (pid_revalidate(dentry, 0)) 3471 return 0; 3472 out: 3473 return -ENOENT; 3474 } 3475 3476 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 3477 { 3478 int result = -ENOENT; 3479 struct task_struct *task; 3480 struct task_struct *leader = get_proc_task(dir); 3481 unsigned tid; 3482 struct pid_namespace *ns; 3483 3484 if (!leader) 3485 goto out_no_task; 3486 3487 tid = name_to_int(&dentry->d_name); 3488 if (tid == ~0U) 3489 goto out; 3490 3491 ns = dentry->d_sb->s_fs_info; 3492 rcu_read_lock(); 3493 task = find_task_by_pid_ns(tid, ns); 3494 if (task) 3495 get_task_struct(task); 3496 rcu_read_unlock(); 3497 if (!task) 3498 goto out; 3499 if (!same_thread_group(leader, task)) 3500 goto out_drop_task; 3501 3502 result = proc_task_instantiate(dir, dentry, task, NULL); 3503 out_drop_task: 3504 put_task_struct(task); 3505 out: 3506 put_task_struct(leader); 3507 out_no_task: 3508 return ERR_PTR(result); 3509 } 3510 3511 /* 3512 * Find the first tid of a thread group to return to user space. 3513 * 3514 * Usually this is just the thread group leader, but if the users 3515 * buffer was too small or there was a seek into the middle of the 3516 * directory we have more work todo. 3517 * 3518 * In the case of a short read we start with find_task_by_pid. 3519 * 3520 * In the case of a seek we start with the leader and walk nr 3521 * threads past it. 3522 */ 3523 static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, 3524 struct pid_namespace *ns) 3525 { 3526 struct task_struct *pos, *task; 3527 unsigned long nr = f_pos; 3528 3529 if (nr != f_pos) /* 32bit overflow? */ 3530 return NULL; 3531 3532 rcu_read_lock(); 3533 task = pid_task(pid, PIDTYPE_PID); 3534 if (!task) 3535 goto fail; 3536 3537 /* Attempt to start with the tid of a thread */ 3538 if (tid && nr) { 3539 pos = find_task_by_pid_ns(tid, ns); 3540 if (pos && same_thread_group(pos, task)) 3541 goto found; 3542 } 3543 3544 /* If nr exceeds the number of threads there is nothing todo */ 3545 if (nr >= get_nr_threads(task)) 3546 goto fail; 3547 3548 /* If we haven't found our starting place yet start 3549 * with the leader and walk nr threads forward. 3550 */ 3551 pos = task = task->group_leader; 3552 do { 3553 if (!nr--) 3554 goto found; 3555 } while_each_thread(task, pos); 3556 fail: 3557 pos = NULL; 3558 goto out; 3559 found: 3560 get_task_struct(pos); 3561 out: 3562 rcu_read_unlock(); 3563 return pos; 3564 } 3565 3566 /* 3567 * Find the next thread in the thread list. 3568 * Return NULL if there is an error or no next thread. 3569 * 3570 * The reference to the input task_struct is released. 3571 */ 3572 static struct task_struct *next_tid(struct task_struct *start) 3573 { 3574 struct task_struct *pos = NULL; 3575 rcu_read_lock(); 3576 if (pid_alive(start)) { 3577 pos = next_thread(start); 3578 if (thread_group_leader(pos)) 3579 pos = NULL; 3580 else 3581 get_task_struct(pos); 3582 } 3583 rcu_read_unlock(); 3584 put_task_struct(start); 3585 return pos; 3586 } 3587 3588 /* for the /proc/TGID/task/ directories */ 3589 static int proc_task_readdir(struct file *file, struct dir_context *ctx) 3590 { 3591 struct inode *inode = file_inode(file); 3592 struct task_struct *task; 3593 struct pid_namespace *ns; 3594 int tid; 3595 3596 if (proc_inode_is_dead(inode)) 3597 return -ENOENT; 3598 3599 if (!dir_emit_dots(file, ctx)) 3600 return 0; 3601 3602 /* f_version caches the tgid value that the last readdir call couldn't 3603 * return. lseek aka telldir automagically resets f_version to 0. 3604 */ 3605 ns = inode->i_sb->s_fs_info; 3606 tid = (int)file->f_version; 3607 file->f_version = 0; 3608 for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); 3609 task; 3610 task = next_tid(task), ctx->pos++) { 3611 char name[10 + 1]; 3612 int len; 3613 tid = task_pid_nr_ns(task, ns); 3614 len = snprintf(name, sizeof(name), "%u", tid); 3615 if (!proc_fill_cache(file, ctx, name, len, 3616 proc_task_instantiate, task, NULL)) { 3617 /* returning this tgid failed, save it as the first 3618 * pid for the next readir call */ 3619 file->f_version = (u64)tid; 3620 put_task_struct(task); 3621 break; 3622 } 3623 } 3624 3625 return 0; 3626 } 3627 3628 static int proc_task_getattr(const struct path *path, struct kstat *stat, 3629 u32 request_mask, unsigned int query_flags) 3630 { 3631 struct inode *inode = d_inode(path->dentry); 3632 struct task_struct *p = get_proc_task(inode); 3633 generic_fillattr(inode, stat); 3634 3635 if (p) { 3636 stat->nlink += get_nr_threads(p); 3637 put_task_struct(p); 3638 } 3639 3640 return 0; 3641 } 3642 3643 static const struct inode_operations proc_task_inode_operations = { 3644 .lookup = proc_task_lookup, 3645 .getattr = proc_task_getattr, 3646 .setattr = proc_setattr, 3647 .permission = proc_pid_permission, 3648 }; 3649 3650 static const struct file_operations proc_task_operations = { 3651 .read = generic_read_dir, 3652 .iterate_shared = proc_task_readdir, 3653 .llseek = generic_file_llseek, 3654 }; 3655 3656 void __init set_proc_pid_nlink(void) 3657 { 3658 nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3659 nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3660 } 3661