1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/proc/base.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 * 7 * proc base directory handling functions 8 * 9 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 10 * Instead of using magical inumbers to determine the kind of object 11 * we allocate and fill in-core inodes upon lookup. They don't even 12 * go into icache. We cache the reference to task_struct upon lookup too. 13 * Eventually it should become a filesystem in its own. We don't use the 14 * rest of procfs anymore. 15 * 16 * 17 * Changelog: 18 * 17-Jan-2005 19 * Allan Bezerra 20 * Bruna Moreira <bruna.moreira@indt.org.br> 21 * Edjard Mota <edjard.mota@indt.org.br> 22 * Ilias Biris <ilias.biris@indt.org.br> 23 * Mauricio Lin <mauricio.lin@indt.org.br> 24 * 25 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 26 * 27 * A new process specific entry (smaps) included in /proc. It shows the 28 * size of rss for each memory area. The maps entry lacks information 29 * about physical memory size (rss) for each mapped file, i.e., 30 * rss information for executables and library files. 31 * This additional information is useful for any tools that need to know 32 * about physical memory consumption for a process specific library. 33 * 34 * Changelog: 35 * 21-Feb-2005 36 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 37 * Pud inclusion in the page table walking. 38 * 39 * ChangeLog: 40 * 10-Mar-2005 41 * 10LE Instituto Nokia de Tecnologia - INdT: 42 * A better way to walks through the page table as suggested by Hugh Dickins. 43 * 44 * Simo Piiroinen <simo.piiroinen@nokia.com>: 45 * Smaps information related to shared, private, clean and dirty pages. 46 * 47 * Paul Mundt <paul.mundt@nokia.com>: 48 * Overall revision about smaps. 49 */ 50 51 #include <linux/uaccess.h> 52 53 #include <linux/errno.h> 54 #include <linux/time.h> 55 #include <linux/proc_fs.h> 56 #include <linux/stat.h> 57 #include <linux/task_io_accounting_ops.h> 58 #include <linux/init.h> 59 #include <linux/capability.h> 60 #include <linux/file.h> 61 #include <linux/fdtable.h> 62 #include <linux/generic-radix-tree.h> 63 #include <linux/string.h> 64 #include <linux/seq_file.h> 65 #include <linux/namei.h> 66 #include <linux/mnt_namespace.h> 67 #include <linux/mm.h> 68 #include <linux/swap.h> 69 #include <linux/rcupdate.h> 70 #include <linux/kallsyms.h> 71 #include <linux/stacktrace.h> 72 #include <linux/resource.h> 73 #include <linux/module.h> 74 #include <linux/mount.h> 75 #include <linux/security.h> 76 #include <linux/ptrace.h> 77 #include <linux/tracehook.h> 78 #include <linux/printk.h> 79 #include <linux/cache.h> 80 #include <linux/cgroup.h> 81 #include <linux/cpuset.h> 82 #include <linux/audit.h> 83 #include <linux/poll.h> 84 #include <linux/nsproxy.h> 85 #include <linux/oom.h> 86 #include <linux/elf.h> 87 #include <linux/pid_namespace.h> 88 #include <linux/user_namespace.h> 89 #include <linux/fs_struct.h> 90 #include <linux/slab.h> 91 #include <linux/sched/autogroup.h> 92 #include <linux/sched/mm.h> 93 #include <linux/sched/coredump.h> 94 #include <linux/sched/debug.h> 95 #include <linux/sched/stat.h> 96 #include <linux/posix-timers.h> 97 #include <trace/events/oom.h> 98 #include "internal.h" 99 #include "fd.h" 100 101 #include "../../lib/kstrtox.h" 102 103 /* NOTE: 104 * Implementing inode permission operations in /proc is almost 105 * certainly an error. Permission checks need to happen during 106 * each system call not at open time. The reason is that most of 107 * what we wish to check for permissions in /proc varies at runtime. 108 * 109 * The classic example of a problem is opening file descriptors 110 * in /proc for a task before it execs a suid executable. 111 */ 112 113 static u8 nlink_tid __ro_after_init; 114 static u8 nlink_tgid __ro_after_init; 115 116 struct pid_entry { 117 const char *name; 118 unsigned int len; 119 umode_t mode; 120 const struct inode_operations *iop; 121 const struct file_operations *fop; 122 union proc_op op; 123 }; 124 125 #define NOD(NAME, MODE, IOP, FOP, OP) { \ 126 .name = (NAME), \ 127 .len = sizeof(NAME) - 1, \ 128 .mode = MODE, \ 129 .iop = IOP, \ 130 .fop = FOP, \ 131 .op = OP, \ 132 } 133 134 #define DIR(NAME, MODE, iops, fops) \ 135 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) 136 #define LNK(NAME, get_link) \ 137 NOD(NAME, (S_IFLNK|S_IRWXUGO), \ 138 &proc_pid_link_inode_operations, NULL, \ 139 { .proc_get_link = get_link } ) 140 #define REG(NAME, MODE, fops) \ 141 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 142 #define ONE(NAME, MODE, show) \ 143 NOD(NAME, (S_IFREG|(MODE)), \ 144 NULL, &proc_single_file_operations, \ 145 { .proc_show = show } ) 146 #define ATTR(LSM, NAME, MODE) \ 147 NOD(NAME, (S_IFREG|(MODE)), \ 148 NULL, &proc_pid_attr_operations, \ 149 { .lsm = LSM }) 150 151 /* 152 * Count the number of hardlinks for the pid_entry table, excluding the . 153 * and .. links. 154 */ 155 static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, 156 unsigned int n) 157 { 158 unsigned int i; 159 unsigned int count; 160 161 count = 2; 162 for (i = 0; i < n; ++i) { 163 if (S_ISDIR(entries[i].mode)) 164 ++count; 165 } 166 167 return count; 168 } 169 170 static int get_task_root(struct task_struct *task, struct path *root) 171 { 172 int result = -ENOENT; 173 174 task_lock(task); 175 if (task->fs) { 176 get_fs_root(task->fs, root); 177 result = 0; 178 } 179 task_unlock(task); 180 return result; 181 } 182 183 static int proc_cwd_link(struct dentry *dentry, struct path *path) 184 { 185 struct task_struct *task = get_proc_task(d_inode(dentry)); 186 int result = -ENOENT; 187 188 if (task) { 189 task_lock(task); 190 if (task->fs) { 191 get_fs_pwd(task->fs, path); 192 result = 0; 193 } 194 task_unlock(task); 195 put_task_struct(task); 196 } 197 return result; 198 } 199 200 static int proc_root_link(struct dentry *dentry, struct path *path) 201 { 202 struct task_struct *task = get_proc_task(d_inode(dentry)); 203 int result = -ENOENT; 204 205 if (task) { 206 result = get_task_root(task, path); 207 put_task_struct(task); 208 } 209 return result; 210 } 211 212 /* 213 * If the user used setproctitle(), we just get the string from 214 * user space at arg_start, and limit it to a maximum of one page. 215 */ 216 static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, 217 size_t count, unsigned long pos, 218 unsigned long arg_start) 219 { 220 char *page; 221 int ret, got; 222 223 if (pos >= PAGE_SIZE) 224 return 0; 225 226 page = (char *)__get_free_page(GFP_KERNEL); 227 if (!page) 228 return -ENOMEM; 229 230 ret = 0; 231 got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); 232 if (got > 0) { 233 int len = strnlen(page, got); 234 235 /* Include the NUL character if it was found */ 236 if (len < got) 237 len++; 238 239 if (len > pos) { 240 len -= pos; 241 if (len > count) 242 len = count; 243 len -= copy_to_user(buf, page+pos, len); 244 if (!len) 245 len = -EFAULT; 246 ret = len; 247 } 248 } 249 free_page((unsigned long)page); 250 return ret; 251 } 252 253 static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, 254 size_t count, loff_t *ppos) 255 { 256 unsigned long arg_start, arg_end, env_start, env_end; 257 unsigned long pos, len; 258 char *page, c; 259 260 /* Check if process spawned far enough to have cmdline. */ 261 if (!mm->env_end) 262 return 0; 263 264 spin_lock(&mm->arg_lock); 265 arg_start = mm->arg_start; 266 arg_end = mm->arg_end; 267 env_start = mm->env_start; 268 env_end = mm->env_end; 269 spin_unlock(&mm->arg_lock); 270 271 if (arg_start >= arg_end) 272 return 0; 273 274 /* 275 * We allow setproctitle() to overwrite the argument 276 * strings, and overflow past the original end. But 277 * only when it overflows into the environment area. 278 */ 279 if (env_start != arg_end || env_end < env_start) 280 env_start = env_end = arg_end; 281 len = env_end - arg_start; 282 283 /* We're not going to care if "*ppos" has high bits set */ 284 pos = *ppos; 285 if (pos >= len) 286 return 0; 287 if (count > len - pos) 288 count = len - pos; 289 if (!count) 290 return 0; 291 292 /* 293 * Magical special case: if the argv[] end byte is not 294 * zero, the user has overwritten it with setproctitle(3). 295 * 296 * Possible future enhancement: do this only once when 297 * pos is 0, and set a flag in the 'struct file'. 298 */ 299 if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) 300 return get_mm_proctitle(mm, buf, count, pos, arg_start); 301 302 /* 303 * For the non-setproctitle() case we limit things strictly 304 * to the [arg_start, arg_end[ range. 305 */ 306 pos += arg_start; 307 if (pos < arg_start || pos >= arg_end) 308 return 0; 309 if (count > arg_end - pos) 310 count = arg_end - pos; 311 312 page = (char *)__get_free_page(GFP_KERNEL); 313 if (!page) 314 return -ENOMEM; 315 316 len = 0; 317 while (count) { 318 int got; 319 size_t size = min_t(size_t, PAGE_SIZE, count); 320 321 got = access_remote_vm(mm, pos, page, size, FOLL_ANON); 322 if (got <= 0) 323 break; 324 got -= copy_to_user(buf, page, got); 325 if (unlikely(!got)) { 326 if (!len) 327 len = -EFAULT; 328 break; 329 } 330 pos += got; 331 buf += got; 332 len += got; 333 count -= got; 334 } 335 336 free_page((unsigned long)page); 337 return len; 338 } 339 340 static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf, 341 size_t count, loff_t *pos) 342 { 343 struct mm_struct *mm; 344 ssize_t ret; 345 346 mm = get_task_mm(tsk); 347 if (!mm) 348 return 0; 349 350 ret = get_mm_cmdline(mm, buf, count, pos); 351 mmput(mm); 352 return ret; 353 } 354 355 static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, 356 size_t count, loff_t *pos) 357 { 358 struct task_struct *tsk; 359 ssize_t ret; 360 361 BUG_ON(*pos < 0); 362 363 tsk = get_proc_task(file_inode(file)); 364 if (!tsk) 365 return -ESRCH; 366 ret = get_task_cmdline(tsk, buf, count, pos); 367 put_task_struct(tsk); 368 if (ret > 0) 369 *pos += ret; 370 return ret; 371 } 372 373 static const struct file_operations proc_pid_cmdline_ops = { 374 .read = proc_pid_cmdline_read, 375 .llseek = generic_file_llseek, 376 }; 377 378 #ifdef CONFIG_KALLSYMS 379 /* 380 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 381 * Returns the resolved symbol. If that fails, simply return the address. 382 */ 383 static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, 384 struct pid *pid, struct task_struct *task) 385 { 386 unsigned long wchan; 387 char symname[KSYM_NAME_LEN]; 388 389 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 390 goto print0; 391 392 wchan = get_wchan(task); 393 if (wchan && !lookup_symbol_name(wchan, symname)) { 394 seq_puts(m, symname); 395 return 0; 396 } 397 398 print0: 399 seq_putc(m, '0'); 400 return 0; 401 } 402 #endif /* CONFIG_KALLSYMS */ 403 404 static int lock_trace(struct task_struct *task) 405 { 406 int err = mutex_lock_killable(&task->signal->cred_guard_mutex); 407 if (err) 408 return err; 409 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { 410 mutex_unlock(&task->signal->cred_guard_mutex); 411 return -EPERM; 412 } 413 return 0; 414 } 415 416 static void unlock_trace(struct task_struct *task) 417 { 418 mutex_unlock(&task->signal->cred_guard_mutex); 419 } 420 421 #ifdef CONFIG_STACKTRACE 422 423 #define MAX_STACK_TRACE_DEPTH 64 424 425 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, 426 struct pid *pid, struct task_struct *task) 427 { 428 unsigned long *entries; 429 int err; 430 431 /* 432 * The ability to racily run the kernel stack unwinder on a running task 433 * and then observe the unwinder output is scary; while it is useful for 434 * debugging kernel issues, it can also allow an attacker to leak kernel 435 * stack contents. 436 * Doing this in a manner that is at least safe from races would require 437 * some work to ensure that the remote task can not be scheduled; and 438 * even then, this would still expose the unwinder as local attack 439 * surface. 440 * Therefore, this interface is restricted to root. 441 */ 442 if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) 443 return -EACCES; 444 445 entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries), 446 GFP_KERNEL); 447 if (!entries) 448 return -ENOMEM; 449 450 err = lock_trace(task); 451 if (!err) { 452 unsigned int i, nr_entries; 453 454 nr_entries = stack_trace_save_tsk(task, entries, 455 MAX_STACK_TRACE_DEPTH, 0); 456 457 for (i = 0; i < nr_entries; i++) { 458 seq_printf(m, "[<0>] %pB\n", (void *)entries[i]); 459 } 460 461 unlock_trace(task); 462 } 463 kfree(entries); 464 465 return err; 466 } 467 #endif 468 469 #ifdef CONFIG_SCHED_INFO 470 /* 471 * Provides /proc/PID/schedstat 472 */ 473 static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, 474 struct pid *pid, struct task_struct *task) 475 { 476 if (unlikely(!sched_info_on())) 477 seq_puts(m, "0 0 0\n"); 478 else 479 seq_printf(m, "%llu %llu %lu\n", 480 (unsigned long long)task->se.sum_exec_runtime, 481 (unsigned long long)task->sched_info.run_delay, 482 task->sched_info.pcount); 483 484 return 0; 485 } 486 #endif 487 488 #ifdef CONFIG_LATENCYTOP 489 static int lstats_show_proc(struct seq_file *m, void *v) 490 { 491 int i; 492 struct inode *inode = m->private; 493 struct task_struct *task = get_proc_task(inode); 494 495 if (!task) 496 return -ESRCH; 497 seq_puts(m, "Latency Top version : v0.1\n"); 498 for (i = 0; i < LT_SAVECOUNT; i++) { 499 struct latency_record *lr = &task->latency_record[i]; 500 if (lr->backtrace[0]) { 501 int q; 502 seq_printf(m, "%i %li %li", 503 lr->count, lr->time, lr->max); 504 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 505 unsigned long bt = lr->backtrace[q]; 506 507 if (!bt) 508 break; 509 seq_printf(m, " %ps", (void *)bt); 510 } 511 seq_putc(m, '\n'); 512 } 513 514 } 515 put_task_struct(task); 516 return 0; 517 } 518 519 static int lstats_open(struct inode *inode, struct file *file) 520 { 521 return single_open(file, lstats_show_proc, inode); 522 } 523 524 static ssize_t lstats_write(struct file *file, const char __user *buf, 525 size_t count, loff_t *offs) 526 { 527 struct task_struct *task = get_proc_task(file_inode(file)); 528 529 if (!task) 530 return -ESRCH; 531 clear_tsk_latency_tracing(task); 532 put_task_struct(task); 533 534 return count; 535 } 536 537 static const struct file_operations proc_lstats_operations = { 538 .open = lstats_open, 539 .read = seq_read, 540 .write = lstats_write, 541 .llseek = seq_lseek, 542 .release = single_release, 543 }; 544 545 #endif 546 547 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, 548 struct pid *pid, struct task_struct *task) 549 { 550 unsigned long totalpages = totalram_pages() + total_swap_pages; 551 unsigned long points = 0; 552 553 points = oom_badness(task, totalpages) * 1000 / totalpages; 554 seq_printf(m, "%lu\n", points); 555 556 return 0; 557 } 558 559 struct limit_names { 560 const char *name; 561 const char *unit; 562 }; 563 564 static const struct limit_names lnames[RLIM_NLIMITS] = { 565 [RLIMIT_CPU] = {"Max cpu time", "seconds"}, 566 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 567 [RLIMIT_DATA] = {"Max data size", "bytes"}, 568 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 569 [RLIMIT_CORE] = {"Max core file size", "bytes"}, 570 [RLIMIT_RSS] = {"Max resident set", "bytes"}, 571 [RLIMIT_NPROC] = {"Max processes", "processes"}, 572 [RLIMIT_NOFILE] = {"Max open files", "files"}, 573 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, 574 [RLIMIT_AS] = {"Max address space", "bytes"}, 575 [RLIMIT_LOCKS] = {"Max file locks", "locks"}, 576 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, 577 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, 578 [RLIMIT_NICE] = {"Max nice priority", NULL}, 579 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, 580 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, 581 }; 582 583 /* Display limits for a process */ 584 static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, 585 struct pid *pid, struct task_struct *task) 586 { 587 unsigned int i; 588 unsigned long flags; 589 590 struct rlimit rlim[RLIM_NLIMITS]; 591 592 if (!lock_task_sighand(task, &flags)) 593 return 0; 594 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); 595 unlock_task_sighand(task, &flags); 596 597 /* 598 * print the file header 599 */ 600 seq_puts(m, "Limit " 601 "Soft Limit " 602 "Hard Limit " 603 "Units \n"); 604 605 for (i = 0; i < RLIM_NLIMITS; i++) { 606 if (rlim[i].rlim_cur == RLIM_INFINITY) 607 seq_printf(m, "%-25s %-20s ", 608 lnames[i].name, "unlimited"); 609 else 610 seq_printf(m, "%-25s %-20lu ", 611 lnames[i].name, rlim[i].rlim_cur); 612 613 if (rlim[i].rlim_max == RLIM_INFINITY) 614 seq_printf(m, "%-20s ", "unlimited"); 615 else 616 seq_printf(m, "%-20lu ", rlim[i].rlim_max); 617 618 if (lnames[i].unit) 619 seq_printf(m, "%-10s\n", lnames[i].unit); 620 else 621 seq_putc(m, '\n'); 622 } 623 624 return 0; 625 } 626 627 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 628 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, 629 struct pid *pid, struct task_struct *task) 630 { 631 struct syscall_info info; 632 u64 *args = &info.data.args[0]; 633 int res; 634 635 res = lock_trace(task); 636 if (res) 637 return res; 638 639 if (task_current_syscall(task, &info)) 640 seq_puts(m, "running\n"); 641 else if (info.data.nr < 0) 642 seq_printf(m, "%d 0x%llx 0x%llx\n", 643 info.data.nr, info.sp, info.data.instruction_pointer); 644 else 645 seq_printf(m, 646 "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n", 647 info.data.nr, 648 args[0], args[1], args[2], args[3], args[4], args[5], 649 info.sp, info.data.instruction_pointer); 650 unlock_trace(task); 651 652 return 0; 653 } 654 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 655 656 /************************************************************************/ 657 /* Here the fs part begins */ 658 /************************************************************************/ 659 660 /* permission checks */ 661 static int proc_fd_access_allowed(struct inode *inode) 662 { 663 struct task_struct *task; 664 int allowed = 0; 665 /* Allow access to a task's file descriptors if it is us or we 666 * may use ptrace attach to the process and find out that 667 * information. 668 */ 669 task = get_proc_task(inode); 670 if (task) { 671 allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 672 put_task_struct(task); 673 } 674 return allowed; 675 } 676 677 int proc_setattr(struct dentry *dentry, struct iattr *attr) 678 { 679 int error; 680 struct inode *inode = d_inode(dentry); 681 682 if (attr->ia_valid & ATTR_MODE) 683 return -EPERM; 684 685 error = setattr_prepare(dentry, attr); 686 if (error) 687 return error; 688 689 setattr_copy(inode, attr); 690 mark_inode_dirty(inode); 691 return 0; 692 } 693 694 /* 695 * May current process learn task's sched/cmdline info (for hide_pid_min=1) 696 * or euid/egid (for hide_pid_min=2)? 697 */ 698 static bool has_pid_permissions(struct pid_namespace *pid, 699 struct task_struct *task, 700 int hide_pid_min) 701 { 702 if (pid->hide_pid < hide_pid_min) 703 return true; 704 if (in_group_p(pid->pid_gid)) 705 return true; 706 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 707 } 708 709 710 static int proc_pid_permission(struct inode *inode, int mask) 711 { 712 struct pid_namespace *pid = proc_pid_ns(inode); 713 struct task_struct *task; 714 bool has_perms; 715 716 task = get_proc_task(inode); 717 if (!task) 718 return -ESRCH; 719 has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS); 720 put_task_struct(task); 721 722 if (!has_perms) { 723 if (pid->hide_pid == HIDEPID_INVISIBLE) { 724 /* 725 * Let's make getdents(), stat(), and open() 726 * consistent with each other. If a process 727 * may not stat() a file, it shouldn't be seen 728 * in procfs at all. 729 */ 730 return -ENOENT; 731 } 732 733 return -EPERM; 734 } 735 return generic_permission(inode, mask); 736 } 737 738 739 740 static const struct inode_operations proc_def_inode_operations = { 741 .setattr = proc_setattr, 742 }; 743 744 static int proc_single_show(struct seq_file *m, void *v) 745 { 746 struct inode *inode = m->private; 747 struct pid_namespace *ns = proc_pid_ns(inode); 748 struct pid *pid = proc_pid(inode); 749 struct task_struct *task; 750 int ret; 751 752 task = get_pid_task(pid, PIDTYPE_PID); 753 if (!task) 754 return -ESRCH; 755 756 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); 757 758 put_task_struct(task); 759 return ret; 760 } 761 762 static int proc_single_open(struct inode *inode, struct file *filp) 763 { 764 return single_open(filp, proc_single_show, inode); 765 } 766 767 static const struct file_operations proc_single_file_operations = { 768 .open = proc_single_open, 769 .read = seq_read, 770 .llseek = seq_lseek, 771 .release = single_release, 772 }; 773 774 775 struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) 776 { 777 struct task_struct *task = get_proc_task(inode); 778 struct mm_struct *mm = ERR_PTR(-ESRCH); 779 780 if (task) { 781 mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); 782 put_task_struct(task); 783 784 if (!IS_ERR_OR_NULL(mm)) { 785 /* ensure this mm_struct can't be freed */ 786 mmgrab(mm); 787 /* but do not pin its memory */ 788 mmput(mm); 789 } 790 } 791 792 return mm; 793 } 794 795 static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) 796 { 797 struct mm_struct *mm = proc_mem_open(inode, mode); 798 799 if (IS_ERR(mm)) 800 return PTR_ERR(mm); 801 802 file->private_data = mm; 803 return 0; 804 } 805 806 static int mem_open(struct inode *inode, struct file *file) 807 { 808 int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); 809 810 /* OK to pass negative loff_t, we can catch out-of-range */ 811 file->f_mode |= FMODE_UNSIGNED_OFFSET; 812 813 return ret; 814 } 815 816 static ssize_t mem_rw(struct file *file, char __user *buf, 817 size_t count, loff_t *ppos, int write) 818 { 819 struct mm_struct *mm = file->private_data; 820 unsigned long addr = *ppos; 821 ssize_t copied; 822 char *page; 823 unsigned int flags; 824 825 if (!mm) 826 return 0; 827 828 page = (char *)__get_free_page(GFP_KERNEL); 829 if (!page) 830 return -ENOMEM; 831 832 copied = 0; 833 if (!mmget_not_zero(mm)) 834 goto free; 835 836 flags = FOLL_FORCE | (write ? FOLL_WRITE : 0); 837 838 while (count > 0) { 839 int this_len = min_t(int, count, PAGE_SIZE); 840 841 if (write && copy_from_user(page, buf, this_len)) { 842 copied = -EFAULT; 843 break; 844 } 845 846 this_len = access_remote_vm(mm, addr, page, this_len, flags); 847 if (!this_len) { 848 if (!copied) 849 copied = -EIO; 850 break; 851 } 852 853 if (!write && copy_to_user(buf, page, this_len)) { 854 copied = -EFAULT; 855 break; 856 } 857 858 buf += this_len; 859 addr += this_len; 860 copied += this_len; 861 count -= this_len; 862 } 863 *ppos = addr; 864 865 mmput(mm); 866 free: 867 free_page((unsigned long) page); 868 return copied; 869 } 870 871 static ssize_t mem_read(struct file *file, char __user *buf, 872 size_t count, loff_t *ppos) 873 { 874 return mem_rw(file, buf, count, ppos, 0); 875 } 876 877 static ssize_t mem_write(struct file *file, const char __user *buf, 878 size_t count, loff_t *ppos) 879 { 880 return mem_rw(file, (char __user*)buf, count, ppos, 1); 881 } 882 883 loff_t mem_lseek(struct file *file, loff_t offset, int orig) 884 { 885 switch (orig) { 886 case 0: 887 file->f_pos = offset; 888 break; 889 case 1: 890 file->f_pos += offset; 891 break; 892 default: 893 return -EINVAL; 894 } 895 force_successful_syscall_return(); 896 return file->f_pos; 897 } 898 899 static int mem_release(struct inode *inode, struct file *file) 900 { 901 struct mm_struct *mm = file->private_data; 902 if (mm) 903 mmdrop(mm); 904 return 0; 905 } 906 907 static const struct file_operations proc_mem_operations = { 908 .llseek = mem_lseek, 909 .read = mem_read, 910 .write = mem_write, 911 .open = mem_open, 912 .release = mem_release, 913 }; 914 915 static int environ_open(struct inode *inode, struct file *file) 916 { 917 return __mem_open(inode, file, PTRACE_MODE_READ); 918 } 919 920 static ssize_t environ_read(struct file *file, char __user *buf, 921 size_t count, loff_t *ppos) 922 { 923 char *page; 924 unsigned long src = *ppos; 925 int ret = 0; 926 struct mm_struct *mm = file->private_data; 927 unsigned long env_start, env_end; 928 929 /* Ensure the process spawned far enough to have an environment. */ 930 if (!mm || !mm->env_end) 931 return 0; 932 933 page = (char *)__get_free_page(GFP_KERNEL); 934 if (!page) 935 return -ENOMEM; 936 937 ret = 0; 938 if (!mmget_not_zero(mm)) 939 goto free; 940 941 spin_lock(&mm->arg_lock); 942 env_start = mm->env_start; 943 env_end = mm->env_end; 944 spin_unlock(&mm->arg_lock); 945 946 while (count > 0) { 947 size_t this_len, max_len; 948 int retval; 949 950 if (src >= (env_end - env_start)) 951 break; 952 953 this_len = env_end - (env_start + src); 954 955 max_len = min_t(size_t, PAGE_SIZE, count); 956 this_len = min(max_len, this_len); 957 958 retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); 959 960 if (retval <= 0) { 961 ret = retval; 962 break; 963 } 964 965 if (copy_to_user(buf, page, retval)) { 966 ret = -EFAULT; 967 break; 968 } 969 970 ret += retval; 971 src += retval; 972 buf += retval; 973 count -= retval; 974 } 975 *ppos = src; 976 mmput(mm); 977 978 free: 979 free_page((unsigned long) page); 980 return ret; 981 } 982 983 static const struct file_operations proc_environ_operations = { 984 .open = environ_open, 985 .read = environ_read, 986 .llseek = generic_file_llseek, 987 .release = mem_release, 988 }; 989 990 static int auxv_open(struct inode *inode, struct file *file) 991 { 992 return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); 993 } 994 995 static ssize_t auxv_read(struct file *file, char __user *buf, 996 size_t count, loff_t *ppos) 997 { 998 struct mm_struct *mm = file->private_data; 999 unsigned int nwords = 0; 1000 1001 if (!mm) 1002 return 0; 1003 do { 1004 nwords += 2; 1005 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 1006 return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv, 1007 nwords * sizeof(mm->saved_auxv[0])); 1008 } 1009 1010 static const struct file_operations proc_auxv_operations = { 1011 .open = auxv_open, 1012 .read = auxv_read, 1013 .llseek = generic_file_llseek, 1014 .release = mem_release, 1015 }; 1016 1017 static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, 1018 loff_t *ppos) 1019 { 1020 struct task_struct *task = get_proc_task(file_inode(file)); 1021 char buffer[PROC_NUMBUF]; 1022 int oom_adj = OOM_ADJUST_MIN; 1023 size_t len; 1024 1025 if (!task) 1026 return -ESRCH; 1027 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) 1028 oom_adj = OOM_ADJUST_MAX; 1029 else 1030 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / 1031 OOM_SCORE_ADJ_MAX; 1032 put_task_struct(task); 1033 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); 1034 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1035 } 1036 1037 static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) 1038 { 1039 static DEFINE_MUTEX(oom_adj_mutex); 1040 struct mm_struct *mm = NULL; 1041 struct task_struct *task; 1042 int err = 0; 1043 1044 task = get_proc_task(file_inode(file)); 1045 if (!task) 1046 return -ESRCH; 1047 1048 mutex_lock(&oom_adj_mutex); 1049 if (legacy) { 1050 if (oom_adj < task->signal->oom_score_adj && 1051 !capable(CAP_SYS_RESOURCE)) { 1052 err = -EACCES; 1053 goto err_unlock; 1054 } 1055 /* 1056 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use 1057 * /proc/pid/oom_score_adj instead. 1058 */ 1059 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", 1060 current->comm, task_pid_nr(current), task_pid_nr(task), 1061 task_pid_nr(task)); 1062 } else { 1063 if ((short)oom_adj < task->signal->oom_score_adj_min && 1064 !capable(CAP_SYS_RESOURCE)) { 1065 err = -EACCES; 1066 goto err_unlock; 1067 } 1068 } 1069 1070 /* 1071 * Make sure we will check other processes sharing the mm if this is 1072 * not vfrok which wants its own oom_score_adj. 1073 * pin the mm so it doesn't go away and get reused after task_unlock 1074 */ 1075 if (!task->vfork_done) { 1076 struct task_struct *p = find_lock_task_mm(task); 1077 1078 if (p) { 1079 if (atomic_read(&p->mm->mm_users) > 1) { 1080 mm = p->mm; 1081 mmgrab(mm); 1082 } 1083 task_unlock(p); 1084 } 1085 } 1086 1087 task->signal->oom_score_adj = oom_adj; 1088 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1089 task->signal->oom_score_adj_min = (short)oom_adj; 1090 trace_oom_score_adj_update(task); 1091 1092 if (mm) { 1093 struct task_struct *p; 1094 1095 rcu_read_lock(); 1096 for_each_process(p) { 1097 if (same_thread_group(task, p)) 1098 continue; 1099 1100 /* do not touch kernel threads or the global init */ 1101 if (p->flags & PF_KTHREAD || is_global_init(p)) 1102 continue; 1103 1104 task_lock(p); 1105 if (!p->vfork_done && process_shares_mm(p, mm)) { 1106 p->signal->oom_score_adj = oom_adj; 1107 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1108 p->signal->oom_score_adj_min = (short)oom_adj; 1109 } 1110 task_unlock(p); 1111 } 1112 rcu_read_unlock(); 1113 mmdrop(mm); 1114 } 1115 err_unlock: 1116 mutex_unlock(&oom_adj_mutex); 1117 put_task_struct(task); 1118 return err; 1119 } 1120 1121 /* 1122 * /proc/pid/oom_adj exists solely for backwards compatibility with previous 1123 * kernels. The effective policy is defined by oom_score_adj, which has a 1124 * different scale: oom_adj grew exponentially and oom_score_adj grows linearly. 1125 * Values written to oom_adj are simply mapped linearly to oom_score_adj. 1126 * Processes that become oom disabled via oom_adj will still be oom disabled 1127 * with this implementation. 1128 * 1129 * oom_adj cannot be removed since existing userspace binaries use it. 1130 */ 1131 static ssize_t oom_adj_write(struct file *file, const char __user *buf, 1132 size_t count, loff_t *ppos) 1133 { 1134 char buffer[PROC_NUMBUF]; 1135 int oom_adj; 1136 int err; 1137 1138 memset(buffer, 0, sizeof(buffer)); 1139 if (count > sizeof(buffer) - 1) 1140 count = sizeof(buffer) - 1; 1141 if (copy_from_user(buffer, buf, count)) { 1142 err = -EFAULT; 1143 goto out; 1144 } 1145 1146 err = kstrtoint(strstrip(buffer), 0, &oom_adj); 1147 if (err) 1148 goto out; 1149 if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && 1150 oom_adj != OOM_DISABLE) { 1151 err = -EINVAL; 1152 goto out; 1153 } 1154 1155 /* 1156 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum 1157 * value is always attainable. 1158 */ 1159 if (oom_adj == OOM_ADJUST_MAX) 1160 oom_adj = OOM_SCORE_ADJ_MAX; 1161 else 1162 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; 1163 1164 err = __set_oom_adj(file, oom_adj, true); 1165 out: 1166 return err < 0 ? err : count; 1167 } 1168 1169 static const struct file_operations proc_oom_adj_operations = { 1170 .read = oom_adj_read, 1171 .write = oom_adj_write, 1172 .llseek = generic_file_llseek, 1173 }; 1174 1175 static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 1176 size_t count, loff_t *ppos) 1177 { 1178 struct task_struct *task = get_proc_task(file_inode(file)); 1179 char buffer[PROC_NUMBUF]; 1180 short oom_score_adj = OOM_SCORE_ADJ_MIN; 1181 size_t len; 1182 1183 if (!task) 1184 return -ESRCH; 1185 oom_score_adj = task->signal->oom_score_adj; 1186 put_task_struct(task); 1187 len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); 1188 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1189 } 1190 1191 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, 1192 size_t count, loff_t *ppos) 1193 { 1194 char buffer[PROC_NUMBUF]; 1195 int oom_score_adj; 1196 int err; 1197 1198 memset(buffer, 0, sizeof(buffer)); 1199 if (count > sizeof(buffer) - 1) 1200 count = sizeof(buffer) - 1; 1201 if (copy_from_user(buffer, buf, count)) { 1202 err = -EFAULT; 1203 goto out; 1204 } 1205 1206 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); 1207 if (err) 1208 goto out; 1209 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1210 oom_score_adj > OOM_SCORE_ADJ_MAX) { 1211 err = -EINVAL; 1212 goto out; 1213 } 1214 1215 err = __set_oom_adj(file, oom_score_adj, false); 1216 out: 1217 return err < 0 ? err : count; 1218 } 1219 1220 static const struct file_operations proc_oom_score_adj_operations = { 1221 .read = oom_score_adj_read, 1222 .write = oom_score_adj_write, 1223 .llseek = default_llseek, 1224 }; 1225 1226 #ifdef CONFIG_AUDIT 1227 #define TMPBUFLEN 11 1228 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1229 size_t count, loff_t *ppos) 1230 { 1231 struct inode * inode = file_inode(file); 1232 struct task_struct *task = get_proc_task(inode); 1233 ssize_t length; 1234 char tmpbuf[TMPBUFLEN]; 1235 1236 if (!task) 1237 return -ESRCH; 1238 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1239 from_kuid(file->f_cred->user_ns, 1240 audit_get_loginuid(task))); 1241 put_task_struct(task); 1242 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1243 } 1244 1245 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 1246 size_t count, loff_t *ppos) 1247 { 1248 struct inode * inode = file_inode(file); 1249 uid_t loginuid; 1250 kuid_t kloginuid; 1251 int rv; 1252 1253 rcu_read_lock(); 1254 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { 1255 rcu_read_unlock(); 1256 return -EPERM; 1257 } 1258 rcu_read_unlock(); 1259 1260 if (*ppos != 0) { 1261 /* No partial writes. */ 1262 return -EINVAL; 1263 } 1264 1265 rv = kstrtou32_from_user(buf, count, 10, &loginuid); 1266 if (rv < 0) 1267 return rv; 1268 1269 /* is userspace tring to explicitly UNSET the loginuid? */ 1270 if (loginuid == AUDIT_UID_UNSET) { 1271 kloginuid = INVALID_UID; 1272 } else { 1273 kloginuid = make_kuid(file->f_cred->user_ns, loginuid); 1274 if (!uid_valid(kloginuid)) 1275 return -EINVAL; 1276 } 1277 1278 rv = audit_set_loginuid(kloginuid); 1279 if (rv < 0) 1280 return rv; 1281 return count; 1282 } 1283 1284 static const struct file_operations proc_loginuid_operations = { 1285 .read = proc_loginuid_read, 1286 .write = proc_loginuid_write, 1287 .llseek = generic_file_llseek, 1288 }; 1289 1290 static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1291 size_t count, loff_t *ppos) 1292 { 1293 struct inode * inode = file_inode(file); 1294 struct task_struct *task = get_proc_task(inode); 1295 ssize_t length; 1296 char tmpbuf[TMPBUFLEN]; 1297 1298 if (!task) 1299 return -ESRCH; 1300 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1301 audit_get_sessionid(task)); 1302 put_task_struct(task); 1303 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1304 } 1305 1306 static const struct file_operations proc_sessionid_operations = { 1307 .read = proc_sessionid_read, 1308 .llseek = generic_file_llseek, 1309 }; 1310 #endif 1311 1312 #ifdef CONFIG_FAULT_INJECTION 1313 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 1314 size_t count, loff_t *ppos) 1315 { 1316 struct task_struct *task = get_proc_task(file_inode(file)); 1317 char buffer[PROC_NUMBUF]; 1318 size_t len; 1319 int make_it_fail; 1320 1321 if (!task) 1322 return -ESRCH; 1323 make_it_fail = task->make_it_fail; 1324 put_task_struct(task); 1325 1326 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); 1327 1328 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1329 } 1330 1331 static ssize_t proc_fault_inject_write(struct file * file, 1332 const char __user * buf, size_t count, loff_t *ppos) 1333 { 1334 struct task_struct *task; 1335 char buffer[PROC_NUMBUF]; 1336 int make_it_fail; 1337 int rv; 1338 1339 if (!capable(CAP_SYS_RESOURCE)) 1340 return -EPERM; 1341 memset(buffer, 0, sizeof(buffer)); 1342 if (count > sizeof(buffer) - 1) 1343 count = sizeof(buffer) - 1; 1344 if (copy_from_user(buffer, buf, count)) 1345 return -EFAULT; 1346 rv = kstrtoint(strstrip(buffer), 0, &make_it_fail); 1347 if (rv < 0) 1348 return rv; 1349 if (make_it_fail < 0 || make_it_fail > 1) 1350 return -EINVAL; 1351 1352 task = get_proc_task(file_inode(file)); 1353 if (!task) 1354 return -ESRCH; 1355 task->make_it_fail = make_it_fail; 1356 put_task_struct(task); 1357 1358 return count; 1359 } 1360 1361 static const struct file_operations proc_fault_inject_operations = { 1362 .read = proc_fault_inject_read, 1363 .write = proc_fault_inject_write, 1364 .llseek = generic_file_llseek, 1365 }; 1366 1367 static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf, 1368 size_t count, loff_t *ppos) 1369 { 1370 struct task_struct *task; 1371 int err; 1372 unsigned int n; 1373 1374 err = kstrtouint_from_user(buf, count, 0, &n); 1375 if (err) 1376 return err; 1377 1378 task = get_proc_task(file_inode(file)); 1379 if (!task) 1380 return -ESRCH; 1381 task->fail_nth = n; 1382 put_task_struct(task); 1383 1384 return count; 1385 } 1386 1387 static ssize_t proc_fail_nth_read(struct file *file, char __user *buf, 1388 size_t count, loff_t *ppos) 1389 { 1390 struct task_struct *task; 1391 char numbuf[PROC_NUMBUF]; 1392 ssize_t len; 1393 1394 task = get_proc_task(file_inode(file)); 1395 if (!task) 1396 return -ESRCH; 1397 len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth); 1398 put_task_struct(task); 1399 return simple_read_from_buffer(buf, count, ppos, numbuf, len); 1400 } 1401 1402 static const struct file_operations proc_fail_nth_operations = { 1403 .read = proc_fail_nth_read, 1404 .write = proc_fail_nth_write, 1405 }; 1406 #endif 1407 1408 1409 #ifdef CONFIG_SCHED_DEBUG 1410 /* 1411 * Print out various scheduling related per-task fields: 1412 */ 1413 static int sched_show(struct seq_file *m, void *v) 1414 { 1415 struct inode *inode = m->private; 1416 struct pid_namespace *ns = proc_pid_ns(inode); 1417 struct task_struct *p; 1418 1419 p = get_proc_task(inode); 1420 if (!p) 1421 return -ESRCH; 1422 proc_sched_show_task(p, ns, m); 1423 1424 put_task_struct(p); 1425 1426 return 0; 1427 } 1428 1429 static ssize_t 1430 sched_write(struct file *file, const char __user *buf, 1431 size_t count, loff_t *offset) 1432 { 1433 struct inode *inode = file_inode(file); 1434 struct task_struct *p; 1435 1436 p = get_proc_task(inode); 1437 if (!p) 1438 return -ESRCH; 1439 proc_sched_set_task(p); 1440 1441 put_task_struct(p); 1442 1443 return count; 1444 } 1445 1446 static int sched_open(struct inode *inode, struct file *filp) 1447 { 1448 return single_open(filp, sched_show, inode); 1449 } 1450 1451 static const struct file_operations proc_pid_sched_operations = { 1452 .open = sched_open, 1453 .read = seq_read, 1454 .write = sched_write, 1455 .llseek = seq_lseek, 1456 .release = single_release, 1457 }; 1458 1459 #endif 1460 1461 #ifdef CONFIG_SCHED_AUTOGROUP 1462 /* 1463 * Print out autogroup related information: 1464 */ 1465 static int sched_autogroup_show(struct seq_file *m, void *v) 1466 { 1467 struct inode *inode = m->private; 1468 struct task_struct *p; 1469 1470 p = get_proc_task(inode); 1471 if (!p) 1472 return -ESRCH; 1473 proc_sched_autogroup_show_task(p, m); 1474 1475 put_task_struct(p); 1476 1477 return 0; 1478 } 1479 1480 static ssize_t 1481 sched_autogroup_write(struct file *file, const char __user *buf, 1482 size_t count, loff_t *offset) 1483 { 1484 struct inode *inode = file_inode(file); 1485 struct task_struct *p; 1486 char buffer[PROC_NUMBUF]; 1487 int nice; 1488 int err; 1489 1490 memset(buffer, 0, sizeof(buffer)); 1491 if (count > sizeof(buffer) - 1) 1492 count = sizeof(buffer) - 1; 1493 if (copy_from_user(buffer, buf, count)) 1494 return -EFAULT; 1495 1496 err = kstrtoint(strstrip(buffer), 0, &nice); 1497 if (err < 0) 1498 return err; 1499 1500 p = get_proc_task(inode); 1501 if (!p) 1502 return -ESRCH; 1503 1504 err = proc_sched_autogroup_set_nice(p, nice); 1505 if (err) 1506 count = err; 1507 1508 put_task_struct(p); 1509 1510 return count; 1511 } 1512 1513 static int sched_autogroup_open(struct inode *inode, struct file *filp) 1514 { 1515 int ret; 1516 1517 ret = single_open(filp, sched_autogroup_show, NULL); 1518 if (!ret) { 1519 struct seq_file *m = filp->private_data; 1520 1521 m->private = inode; 1522 } 1523 return ret; 1524 } 1525 1526 static const struct file_operations proc_pid_sched_autogroup_operations = { 1527 .open = sched_autogroup_open, 1528 .read = seq_read, 1529 .write = sched_autogroup_write, 1530 .llseek = seq_lseek, 1531 .release = single_release, 1532 }; 1533 1534 #endif /* CONFIG_SCHED_AUTOGROUP */ 1535 1536 static ssize_t comm_write(struct file *file, const char __user *buf, 1537 size_t count, loff_t *offset) 1538 { 1539 struct inode *inode = file_inode(file); 1540 struct task_struct *p; 1541 char buffer[TASK_COMM_LEN]; 1542 const size_t maxlen = sizeof(buffer) - 1; 1543 1544 memset(buffer, 0, sizeof(buffer)); 1545 if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) 1546 return -EFAULT; 1547 1548 p = get_proc_task(inode); 1549 if (!p) 1550 return -ESRCH; 1551 1552 if (same_thread_group(current, p)) 1553 set_task_comm(p, buffer); 1554 else 1555 count = -EINVAL; 1556 1557 put_task_struct(p); 1558 1559 return count; 1560 } 1561 1562 static int comm_show(struct seq_file *m, void *v) 1563 { 1564 struct inode *inode = m->private; 1565 struct task_struct *p; 1566 1567 p = get_proc_task(inode); 1568 if (!p) 1569 return -ESRCH; 1570 1571 proc_task_name(m, p, false); 1572 seq_putc(m, '\n'); 1573 1574 put_task_struct(p); 1575 1576 return 0; 1577 } 1578 1579 static int comm_open(struct inode *inode, struct file *filp) 1580 { 1581 return single_open(filp, comm_show, inode); 1582 } 1583 1584 static const struct file_operations proc_pid_set_comm_operations = { 1585 .open = comm_open, 1586 .read = seq_read, 1587 .write = comm_write, 1588 .llseek = seq_lseek, 1589 .release = single_release, 1590 }; 1591 1592 static int proc_exe_link(struct dentry *dentry, struct path *exe_path) 1593 { 1594 struct task_struct *task; 1595 struct file *exe_file; 1596 1597 task = get_proc_task(d_inode(dentry)); 1598 if (!task) 1599 return -ENOENT; 1600 exe_file = get_task_exe_file(task); 1601 put_task_struct(task); 1602 if (exe_file) { 1603 *exe_path = exe_file->f_path; 1604 path_get(&exe_file->f_path); 1605 fput(exe_file); 1606 return 0; 1607 } else 1608 return -ENOENT; 1609 } 1610 1611 static const char *proc_pid_get_link(struct dentry *dentry, 1612 struct inode *inode, 1613 struct delayed_call *done) 1614 { 1615 struct path path; 1616 int error = -EACCES; 1617 1618 if (!dentry) 1619 return ERR_PTR(-ECHILD); 1620 1621 /* Are we allowed to snoop on the tasks file descriptors? */ 1622 if (!proc_fd_access_allowed(inode)) 1623 goto out; 1624 1625 error = PROC_I(inode)->op.proc_get_link(dentry, &path); 1626 if (error) 1627 goto out; 1628 1629 nd_jump_link(&path); 1630 return NULL; 1631 out: 1632 return ERR_PTR(error); 1633 } 1634 1635 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) 1636 { 1637 char *tmp = (char *)__get_free_page(GFP_KERNEL); 1638 char *pathname; 1639 int len; 1640 1641 if (!tmp) 1642 return -ENOMEM; 1643 1644 pathname = d_path(path, tmp, PAGE_SIZE); 1645 len = PTR_ERR(pathname); 1646 if (IS_ERR(pathname)) 1647 goto out; 1648 len = tmp + PAGE_SIZE - 1 - pathname; 1649 1650 if (len > buflen) 1651 len = buflen; 1652 if (copy_to_user(buffer, pathname, len)) 1653 len = -EFAULT; 1654 out: 1655 free_page((unsigned long)tmp); 1656 return len; 1657 } 1658 1659 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1660 { 1661 int error = -EACCES; 1662 struct inode *inode = d_inode(dentry); 1663 struct path path; 1664 1665 /* Are we allowed to snoop on the tasks file descriptors? */ 1666 if (!proc_fd_access_allowed(inode)) 1667 goto out; 1668 1669 error = PROC_I(inode)->op.proc_get_link(dentry, &path); 1670 if (error) 1671 goto out; 1672 1673 error = do_proc_readlink(&path, buffer, buflen); 1674 path_put(&path); 1675 out: 1676 return error; 1677 } 1678 1679 const struct inode_operations proc_pid_link_inode_operations = { 1680 .readlink = proc_pid_readlink, 1681 .get_link = proc_pid_get_link, 1682 .setattr = proc_setattr, 1683 }; 1684 1685 1686 /* building an inode */ 1687 1688 void task_dump_owner(struct task_struct *task, umode_t mode, 1689 kuid_t *ruid, kgid_t *rgid) 1690 { 1691 /* Depending on the state of dumpable compute who should own a 1692 * proc file for a task. 1693 */ 1694 const struct cred *cred; 1695 kuid_t uid; 1696 kgid_t gid; 1697 1698 if (unlikely(task->flags & PF_KTHREAD)) { 1699 *ruid = GLOBAL_ROOT_UID; 1700 *rgid = GLOBAL_ROOT_GID; 1701 return; 1702 } 1703 1704 /* Default to the tasks effective ownership */ 1705 rcu_read_lock(); 1706 cred = __task_cred(task); 1707 uid = cred->euid; 1708 gid = cred->egid; 1709 rcu_read_unlock(); 1710 1711 /* 1712 * Before the /proc/pid/status file was created the only way to read 1713 * the effective uid of a /process was to stat /proc/pid. Reading 1714 * /proc/pid/status is slow enough that procps and other packages 1715 * kept stating /proc/pid. To keep the rules in /proc simple I have 1716 * made this apply to all per process world readable and executable 1717 * directories. 1718 */ 1719 if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) { 1720 struct mm_struct *mm; 1721 task_lock(task); 1722 mm = task->mm; 1723 /* Make non-dumpable tasks owned by some root */ 1724 if (mm) { 1725 if (get_dumpable(mm) != SUID_DUMP_USER) { 1726 struct user_namespace *user_ns = mm->user_ns; 1727 1728 uid = make_kuid(user_ns, 0); 1729 if (!uid_valid(uid)) 1730 uid = GLOBAL_ROOT_UID; 1731 1732 gid = make_kgid(user_ns, 0); 1733 if (!gid_valid(gid)) 1734 gid = GLOBAL_ROOT_GID; 1735 } 1736 } else { 1737 uid = GLOBAL_ROOT_UID; 1738 gid = GLOBAL_ROOT_GID; 1739 } 1740 task_unlock(task); 1741 } 1742 *ruid = uid; 1743 *rgid = gid; 1744 } 1745 1746 struct inode *proc_pid_make_inode(struct super_block * sb, 1747 struct task_struct *task, umode_t mode) 1748 { 1749 struct inode * inode; 1750 struct proc_inode *ei; 1751 1752 /* We need a new inode */ 1753 1754 inode = new_inode(sb); 1755 if (!inode) 1756 goto out; 1757 1758 /* Common stuff */ 1759 ei = PROC_I(inode); 1760 inode->i_mode = mode; 1761 inode->i_ino = get_next_ino(); 1762 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 1763 inode->i_op = &proc_def_inode_operations; 1764 1765 /* 1766 * grab the reference to task. 1767 */ 1768 ei->pid = get_task_pid(task, PIDTYPE_PID); 1769 if (!ei->pid) 1770 goto out_unlock; 1771 1772 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); 1773 security_task_to_inode(task, inode); 1774 1775 out: 1776 return inode; 1777 1778 out_unlock: 1779 iput(inode); 1780 return NULL; 1781 } 1782 1783 int pid_getattr(const struct path *path, struct kstat *stat, 1784 u32 request_mask, unsigned int query_flags) 1785 { 1786 struct inode *inode = d_inode(path->dentry); 1787 struct pid_namespace *pid = proc_pid_ns(inode); 1788 struct task_struct *task; 1789 1790 generic_fillattr(inode, stat); 1791 1792 stat->uid = GLOBAL_ROOT_UID; 1793 stat->gid = GLOBAL_ROOT_GID; 1794 rcu_read_lock(); 1795 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1796 if (task) { 1797 if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { 1798 rcu_read_unlock(); 1799 /* 1800 * This doesn't prevent learning whether PID exists, 1801 * it only makes getattr() consistent with readdir(). 1802 */ 1803 return -ENOENT; 1804 } 1805 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); 1806 } 1807 rcu_read_unlock(); 1808 return 0; 1809 } 1810 1811 /* dentry stuff */ 1812 1813 /* 1814 * Set <pid>/... inode ownership (can change due to setuid(), etc.) 1815 */ 1816 void pid_update_inode(struct task_struct *task, struct inode *inode) 1817 { 1818 task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); 1819 1820 inode->i_mode &= ~(S_ISUID | S_ISGID); 1821 security_task_to_inode(task, inode); 1822 } 1823 1824 /* 1825 * Rewrite the inode's ownerships here because the owning task may have 1826 * performed a setuid(), etc. 1827 * 1828 */ 1829 static int pid_revalidate(struct dentry *dentry, unsigned int flags) 1830 { 1831 struct inode *inode; 1832 struct task_struct *task; 1833 1834 if (flags & LOOKUP_RCU) 1835 return -ECHILD; 1836 1837 inode = d_inode(dentry); 1838 task = get_proc_task(inode); 1839 1840 if (task) { 1841 pid_update_inode(task, inode); 1842 put_task_struct(task); 1843 return 1; 1844 } 1845 return 0; 1846 } 1847 1848 static inline bool proc_inode_is_dead(struct inode *inode) 1849 { 1850 return !proc_pid(inode)->tasks[PIDTYPE_PID].first; 1851 } 1852 1853 int pid_delete_dentry(const struct dentry *dentry) 1854 { 1855 /* Is the task we represent dead? 1856 * If so, then don't put the dentry on the lru list, 1857 * kill it immediately. 1858 */ 1859 return proc_inode_is_dead(d_inode(dentry)); 1860 } 1861 1862 const struct dentry_operations pid_dentry_operations = 1863 { 1864 .d_revalidate = pid_revalidate, 1865 .d_delete = pid_delete_dentry, 1866 }; 1867 1868 /* Lookups */ 1869 1870 /* 1871 * Fill a directory entry. 1872 * 1873 * If possible create the dcache entry and derive our inode number and 1874 * file type from dcache entry. 1875 * 1876 * Since all of the proc inode numbers are dynamically generated, the inode 1877 * numbers do not exist until the inode is cache. This means creating the 1878 * the dcache entry in readdir is necessary to keep the inode numbers 1879 * reported by readdir in sync with the inode numbers reported 1880 * by stat. 1881 */ 1882 bool proc_fill_cache(struct file *file, struct dir_context *ctx, 1883 const char *name, unsigned int len, 1884 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1885 { 1886 struct dentry *child, *dir = file->f_path.dentry; 1887 struct qstr qname = QSTR_INIT(name, len); 1888 struct inode *inode; 1889 unsigned type = DT_UNKNOWN; 1890 ino_t ino = 1; 1891 1892 child = d_hash_and_lookup(dir, &qname); 1893 if (!child) { 1894 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); 1895 child = d_alloc_parallel(dir, &qname, &wq); 1896 if (IS_ERR(child)) 1897 goto end_instantiate; 1898 if (d_in_lookup(child)) { 1899 struct dentry *res; 1900 res = instantiate(child, task, ptr); 1901 d_lookup_done(child); 1902 if (unlikely(res)) { 1903 dput(child); 1904 child = res; 1905 if (IS_ERR(child)) 1906 goto end_instantiate; 1907 } 1908 } 1909 } 1910 inode = d_inode(child); 1911 ino = inode->i_ino; 1912 type = inode->i_mode >> 12; 1913 dput(child); 1914 end_instantiate: 1915 return dir_emit(ctx, name, len, ino, type); 1916 } 1917 1918 /* 1919 * dname_to_vma_addr - maps a dentry name into two unsigned longs 1920 * which represent vma start and end addresses. 1921 */ 1922 static int dname_to_vma_addr(struct dentry *dentry, 1923 unsigned long *start, unsigned long *end) 1924 { 1925 const char *str = dentry->d_name.name; 1926 unsigned long long sval, eval; 1927 unsigned int len; 1928 1929 if (str[0] == '0' && str[1] != '-') 1930 return -EINVAL; 1931 len = _parse_integer(str, 16, &sval); 1932 if (len & KSTRTOX_OVERFLOW) 1933 return -EINVAL; 1934 if (sval != (unsigned long)sval) 1935 return -EINVAL; 1936 str += len; 1937 1938 if (*str != '-') 1939 return -EINVAL; 1940 str++; 1941 1942 if (str[0] == '0' && str[1]) 1943 return -EINVAL; 1944 len = _parse_integer(str, 16, &eval); 1945 if (len & KSTRTOX_OVERFLOW) 1946 return -EINVAL; 1947 if (eval != (unsigned long)eval) 1948 return -EINVAL; 1949 str += len; 1950 1951 if (*str != '\0') 1952 return -EINVAL; 1953 1954 *start = sval; 1955 *end = eval; 1956 1957 return 0; 1958 } 1959 1960 static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) 1961 { 1962 unsigned long vm_start, vm_end; 1963 bool exact_vma_exists = false; 1964 struct mm_struct *mm = NULL; 1965 struct task_struct *task; 1966 struct inode *inode; 1967 int status = 0; 1968 1969 if (flags & LOOKUP_RCU) 1970 return -ECHILD; 1971 1972 inode = d_inode(dentry); 1973 task = get_proc_task(inode); 1974 if (!task) 1975 goto out_notask; 1976 1977 mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); 1978 if (IS_ERR_OR_NULL(mm)) 1979 goto out; 1980 1981 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { 1982 status = down_read_killable(&mm->mmap_sem); 1983 if (!status) { 1984 exact_vma_exists = !!find_exact_vma(mm, vm_start, 1985 vm_end); 1986 up_read(&mm->mmap_sem); 1987 } 1988 } 1989 1990 mmput(mm); 1991 1992 if (exact_vma_exists) { 1993 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); 1994 1995 security_task_to_inode(task, inode); 1996 status = 1; 1997 } 1998 1999 out: 2000 put_task_struct(task); 2001 2002 out_notask: 2003 return status; 2004 } 2005 2006 static const struct dentry_operations tid_map_files_dentry_operations = { 2007 .d_revalidate = map_files_d_revalidate, 2008 .d_delete = pid_delete_dentry, 2009 }; 2010 2011 static int map_files_get_link(struct dentry *dentry, struct path *path) 2012 { 2013 unsigned long vm_start, vm_end; 2014 struct vm_area_struct *vma; 2015 struct task_struct *task; 2016 struct mm_struct *mm; 2017 int rc; 2018 2019 rc = -ENOENT; 2020 task = get_proc_task(d_inode(dentry)); 2021 if (!task) 2022 goto out; 2023 2024 mm = get_task_mm(task); 2025 put_task_struct(task); 2026 if (!mm) 2027 goto out; 2028 2029 rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); 2030 if (rc) 2031 goto out_mmput; 2032 2033 rc = down_read_killable(&mm->mmap_sem); 2034 if (rc) 2035 goto out_mmput; 2036 2037 rc = -ENOENT; 2038 vma = find_exact_vma(mm, vm_start, vm_end); 2039 if (vma && vma->vm_file) { 2040 *path = vma->vm_file->f_path; 2041 path_get(path); 2042 rc = 0; 2043 } 2044 up_read(&mm->mmap_sem); 2045 2046 out_mmput: 2047 mmput(mm); 2048 out: 2049 return rc; 2050 } 2051 2052 struct map_files_info { 2053 unsigned long start; 2054 unsigned long end; 2055 fmode_t mode; 2056 }; 2057 2058 /* 2059 * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the 2060 * symlinks may be used to bypass permissions on ancestor directories in the 2061 * path to the file in question. 2062 */ 2063 static const char * 2064 proc_map_files_get_link(struct dentry *dentry, 2065 struct inode *inode, 2066 struct delayed_call *done) 2067 { 2068 if (!capable(CAP_SYS_ADMIN)) 2069 return ERR_PTR(-EPERM); 2070 2071 return proc_pid_get_link(dentry, inode, done); 2072 } 2073 2074 /* 2075 * Identical to proc_pid_link_inode_operations except for get_link() 2076 */ 2077 static const struct inode_operations proc_map_files_link_inode_operations = { 2078 .readlink = proc_pid_readlink, 2079 .get_link = proc_map_files_get_link, 2080 .setattr = proc_setattr, 2081 }; 2082 2083 static struct dentry * 2084 proc_map_files_instantiate(struct dentry *dentry, 2085 struct task_struct *task, const void *ptr) 2086 { 2087 fmode_t mode = (fmode_t)(unsigned long)ptr; 2088 struct proc_inode *ei; 2089 struct inode *inode; 2090 2091 inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | 2092 ((mode & FMODE_READ ) ? S_IRUSR : 0) | 2093 ((mode & FMODE_WRITE) ? S_IWUSR : 0)); 2094 if (!inode) 2095 return ERR_PTR(-ENOENT); 2096 2097 ei = PROC_I(inode); 2098 ei->op.proc_get_link = map_files_get_link; 2099 2100 inode->i_op = &proc_map_files_link_inode_operations; 2101 inode->i_size = 64; 2102 2103 d_set_d_op(dentry, &tid_map_files_dentry_operations); 2104 return d_splice_alias(inode, dentry); 2105 } 2106 2107 static struct dentry *proc_map_files_lookup(struct inode *dir, 2108 struct dentry *dentry, unsigned int flags) 2109 { 2110 unsigned long vm_start, vm_end; 2111 struct vm_area_struct *vma; 2112 struct task_struct *task; 2113 struct dentry *result; 2114 struct mm_struct *mm; 2115 2116 result = ERR_PTR(-ENOENT); 2117 task = get_proc_task(dir); 2118 if (!task) 2119 goto out; 2120 2121 result = ERR_PTR(-EACCES); 2122 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 2123 goto out_put_task; 2124 2125 result = ERR_PTR(-ENOENT); 2126 if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) 2127 goto out_put_task; 2128 2129 mm = get_task_mm(task); 2130 if (!mm) 2131 goto out_put_task; 2132 2133 result = ERR_PTR(-EINTR); 2134 if (down_read_killable(&mm->mmap_sem)) 2135 goto out_put_mm; 2136 2137 result = ERR_PTR(-ENOENT); 2138 vma = find_exact_vma(mm, vm_start, vm_end); 2139 if (!vma) 2140 goto out_no_vma; 2141 2142 if (vma->vm_file) 2143 result = proc_map_files_instantiate(dentry, task, 2144 (void *)(unsigned long)vma->vm_file->f_mode); 2145 2146 out_no_vma: 2147 up_read(&mm->mmap_sem); 2148 out_put_mm: 2149 mmput(mm); 2150 out_put_task: 2151 put_task_struct(task); 2152 out: 2153 return result; 2154 } 2155 2156 static const struct inode_operations proc_map_files_inode_operations = { 2157 .lookup = proc_map_files_lookup, 2158 .permission = proc_fd_permission, 2159 .setattr = proc_setattr, 2160 }; 2161 2162 static int 2163 proc_map_files_readdir(struct file *file, struct dir_context *ctx) 2164 { 2165 struct vm_area_struct *vma; 2166 struct task_struct *task; 2167 struct mm_struct *mm; 2168 unsigned long nr_files, pos, i; 2169 GENRADIX(struct map_files_info) fa; 2170 struct map_files_info *p; 2171 int ret; 2172 2173 genradix_init(&fa); 2174 2175 ret = -ENOENT; 2176 task = get_proc_task(file_inode(file)); 2177 if (!task) 2178 goto out; 2179 2180 ret = -EACCES; 2181 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 2182 goto out_put_task; 2183 2184 ret = 0; 2185 if (!dir_emit_dots(file, ctx)) 2186 goto out_put_task; 2187 2188 mm = get_task_mm(task); 2189 if (!mm) 2190 goto out_put_task; 2191 2192 ret = down_read_killable(&mm->mmap_sem); 2193 if (ret) { 2194 mmput(mm); 2195 goto out_put_task; 2196 } 2197 2198 nr_files = 0; 2199 2200 /* 2201 * We need two passes here: 2202 * 2203 * 1) Collect vmas of mapped files with mmap_sem taken 2204 * 2) Release mmap_sem and instantiate entries 2205 * 2206 * otherwise we get lockdep complained, since filldir() 2207 * routine might require mmap_sem taken in might_fault(). 2208 */ 2209 2210 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { 2211 if (!vma->vm_file) 2212 continue; 2213 if (++pos <= ctx->pos) 2214 continue; 2215 2216 p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL); 2217 if (!p) { 2218 ret = -ENOMEM; 2219 up_read(&mm->mmap_sem); 2220 mmput(mm); 2221 goto out_put_task; 2222 } 2223 2224 p->start = vma->vm_start; 2225 p->end = vma->vm_end; 2226 p->mode = vma->vm_file->f_mode; 2227 } 2228 up_read(&mm->mmap_sem); 2229 mmput(mm); 2230 2231 for (i = 0; i < nr_files; i++) { 2232 char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ 2233 unsigned int len; 2234 2235 p = genradix_ptr(&fa, i); 2236 len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end); 2237 if (!proc_fill_cache(file, ctx, 2238 buf, len, 2239 proc_map_files_instantiate, 2240 task, 2241 (void *)(unsigned long)p->mode)) 2242 break; 2243 ctx->pos++; 2244 } 2245 2246 out_put_task: 2247 put_task_struct(task); 2248 out: 2249 genradix_free(&fa); 2250 return ret; 2251 } 2252 2253 static const struct file_operations proc_map_files_operations = { 2254 .read = generic_read_dir, 2255 .iterate_shared = proc_map_files_readdir, 2256 .llseek = generic_file_llseek, 2257 }; 2258 2259 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) 2260 struct timers_private { 2261 struct pid *pid; 2262 struct task_struct *task; 2263 struct sighand_struct *sighand; 2264 struct pid_namespace *ns; 2265 unsigned long flags; 2266 }; 2267 2268 static void *timers_start(struct seq_file *m, loff_t *pos) 2269 { 2270 struct timers_private *tp = m->private; 2271 2272 tp->task = get_pid_task(tp->pid, PIDTYPE_PID); 2273 if (!tp->task) 2274 return ERR_PTR(-ESRCH); 2275 2276 tp->sighand = lock_task_sighand(tp->task, &tp->flags); 2277 if (!tp->sighand) 2278 return ERR_PTR(-ESRCH); 2279 2280 return seq_list_start(&tp->task->signal->posix_timers, *pos); 2281 } 2282 2283 static void *timers_next(struct seq_file *m, void *v, loff_t *pos) 2284 { 2285 struct timers_private *tp = m->private; 2286 return seq_list_next(v, &tp->task->signal->posix_timers, pos); 2287 } 2288 2289 static void timers_stop(struct seq_file *m, void *v) 2290 { 2291 struct timers_private *tp = m->private; 2292 2293 if (tp->sighand) { 2294 unlock_task_sighand(tp->task, &tp->flags); 2295 tp->sighand = NULL; 2296 } 2297 2298 if (tp->task) { 2299 put_task_struct(tp->task); 2300 tp->task = NULL; 2301 } 2302 } 2303 2304 static int show_timer(struct seq_file *m, void *v) 2305 { 2306 struct k_itimer *timer; 2307 struct timers_private *tp = m->private; 2308 int notify; 2309 static const char * const nstr[] = { 2310 [SIGEV_SIGNAL] = "signal", 2311 [SIGEV_NONE] = "none", 2312 [SIGEV_THREAD] = "thread", 2313 }; 2314 2315 timer = list_entry((struct list_head *)v, struct k_itimer, list); 2316 notify = timer->it_sigev_notify; 2317 2318 seq_printf(m, "ID: %d\n", timer->it_id); 2319 seq_printf(m, "signal: %d/%px\n", 2320 timer->sigq->info.si_signo, 2321 timer->sigq->info.si_value.sival_ptr); 2322 seq_printf(m, "notify: %s/%s.%d\n", 2323 nstr[notify & ~SIGEV_THREAD_ID], 2324 (notify & SIGEV_THREAD_ID) ? "tid" : "pid", 2325 pid_nr_ns(timer->it_pid, tp->ns)); 2326 seq_printf(m, "ClockID: %d\n", timer->it_clock); 2327 2328 return 0; 2329 } 2330 2331 static const struct seq_operations proc_timers_seq_ops = { 2332 .start = timers_start, 2333 .next = timers_next, 2334 .stop = timers_stop, 2335 .show = show_timer, 2336 }; 2337 2338 static int proc_timers_open(struct inode *inode, struct file *file) 2339 { 2340 struct timers_private *tp; 2341 2342 tp = __seq_open_private(file, &proc_timers_seq_ops, 2343 sizeof(struct timers_private)); 2344 if (!tp) 2345 return -ENOMEM; 2346 2347 tp->pid = proc_pid(inode); 2348 tp->ns = proc_pid_ns(inode); 2349 return 0; 2350 } 2351 2352 static const struct file_operations proc_timers_operations = { 2353 .open = proc_timers_open, 2354 .read = seq_read, 2355 .llseek = seq_lseek, 2356 .release = seq_release_private, 2357 }; 2358 #endif 2359 2360 static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, 2361 size_t count, loff_t *offset) 2362 { 2363 struct inode *inode = file_inode(file); 2364 struct task_struct *p; 2365 u64 slack_ns; 2366 int err; 2367 2368 err = kstrtoull_from_user(buf, count, 10, &slack_ns); 2369 if (err < 0) 2370 return err; 2371 2372 p = get_proc_task(inode); 2373 if (!p) 2374 return -ESRCH; 2375 2376 if (p != current) { 2377 rcu_read_lock(); 2378 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { 2379 rcu_read_unlock(); 2380 count = -EPERM; 2381 goto out; 2382 } 2383 rcu_read_unlock(); 2384 2385 err = security_task_setscheduler(p); 2386 if (err) { 2387 count = err; 2388 goto out; 2389 } 2390 } 2391 2392 task_lock(p); 2393 if (slack_ns == 0) 2394 p->timer_slack_ns = p->default_timer_slack_ns; 2395 else 2396 p->timer_slack_ns = slack_ns; 2397 task_unlock(p); 2398 2399 out: 2400 put_task_struct(p); 2401 2402 return count; 2403 } 2404 2405 static int timerslack_ns_show(struct seq_file *m, void *v) 2406 { 2407 struct inode *inode = m->private; 2408 struct task_struct *p; 2409 int err = 0; 2410 2411 p = get_proc_task(inode); 2412 if (!p) 2413 return -ESRCH; 2414 2415 if (p != current) { 2416 rcu_read_lock(); 2417 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { 2418 rcu_read_unlock(); 2419 err = -EPERM; 2420 goto out; 2421 } 2422 rcu_read_unlock(); 2423 2424 err = security_task_getscheduler(p); 2425 if (err) 2426 goto out; 2427 } 2428 2429 task_lock(p); 2430 seq_printf(m, "%llu\n", p->timer_slack_ns); 2431 task_unlock(p); 2432 2433 out: 2434 put_task_struct(p); 2435 2436 return err; 2437 } 2438 2439 static int timerslack_ns_open(struct inode *inode, struct file *filp) 2440 { 2441 return single_open(filp, timerslack_ns_show, inode); 2442 } 2443 2444 static const struct file_operations proc_pid_set_timerslack_ns_operations = { 2445 .open = timerslack_ns_open, 2446 .read = seq_read, 2447 .write = timerslack_ns_write, 2448 .llseek = seq_lseek, 2449 .release = single_release, 2450 }; 2451 2452 static struct dentry *proc_pident_instantiate(struct dentry *dentry, 2453 struct task_struct *task, const void *ptr) 2454 { 2455 const struct pid_entry *p = ptr; 2456 struct inode *inode; 2457 struct proc_inode *ei; 2458 2459 inode = proc_pid_make_inode(dentry->d_sb, task, p->mode); 2460 if (!inode) 2461 return ERR_PTR(-ENOENT); 2462 2463 ei = PROC_I(inode); 2464 if (S_ISDIR(inode->i_mode)) 2465 set_nlink(inode, 2); /* Use getattr to fix if necessary */ 2466 if (p->iop) 2467 inode->i_op = p->iop; 2468 if (p->fop) 2469 inode->i_fop = p->fop; 2470 ei->op = p->op; 2471 pid_update_inode(task, inode); 2472 d_set_d_op(dentry, &pid_dentry_operations); 2473 return d_splice_alias(inode, dentry); 2474 } 2475 2476 static struct dentry *proc_pident_lookup(struct inode *dir, 2477 struct dentry *dentry, 2478 const struct pid_entry *p, 2479 const struct pid_entry *end) 2480 { 2481 struct task_struct *task = get_proc_task(dir); 2482 struct dentry *res = ERR_PTR(-ENOENT); 2483 2484 if (!task) 2485 goto out_no_task; 2486 2487 /* 2488 * Yes, it does not scale. And it should not. Don't add 2489 * new entries into /proc/<tgid>/ without very good reasons. 2490 */ 2491 for (; p < end; p++) { 2492 if (p->len != dentry->d_name.len) 2493 continue; 2494 if (!memcmp(dentry->d_name.name, p->name, p->len)) { 2495 res = proc_pident_instantiate(dentry, task, p); 2496 break; 2497 } 2498 } 2499 put_task_struct(task); 2500 out_no_task: 2501 return res; 2502 } 2503 2504 static int proc_pident_readdir(struct file *file, struct dir_context *ctx, 2505 const struct pid_entry *ents, unsigned int nents) 2506 { 2507 struct task_struct *task = get_proc_task(file_inode(file)); 2508 const struct pid_entry *p; 2509 2510 if (!task) 2511 return -ENOENT; 2512 2513 if (!dir_emit_dots(file, ctx)) 2514 goto out; 2515 2516 if (ctx->pos >= nents + 2) 2517 goto out; 2518 2519 for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { 2520 if (!proc_fill_cache(file, ctx, p->name, p->len, 2521 proc_pident_instantiate, task, p)) 2522 break; 2523 ctx->pos++; 2524 } 2525 out: 2526 put_task_struct(task); 2527 return 0; 2528 } 2529 2530 #ifdef CONFIG_SECURITY 2531 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 2532 size_t count, loff_t *ppos) 2533 { 2534 struct inode * inode = file_inode(file); 2535 char *p = NULL; 2536 ssize_t length; 2537 struct task_struct *task = get_proc_task(inode); 2538 2539 if (!task) 2540 return -ESRCH; 2541 2542 length = security_getprocattr(task, PROC_I(inode)->op.lsm, 2543 (char*)file->f_path.dentry->d_name.name, 2544 &p); 2545 put_task_struct(task); 2546 if (length > 0) 2547 length = simple_read_from_buffer(buf, count, ppos, p, length); 2548 kfree(p); 2549 return length; 2550 } 2551 2552 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 2553 size_t count, loff_t *ppos) 2554 { 2555 struct inode * inode = file_inode(file); 2556 struct task_struct *task; 2557 void *page; 2558 int rv; 2559 2560 rcu_read_lock(); 2561 task = pid_task(proc_pid(inode), PIDTYPE_PID); 2562 if (!task) { 2563 rcu_read_unlock(); 2564 return -ESRCH; 2565 } 2566 /* A task may only write its own attributes. */ 2567 if (current != task) { 2568 rcu_read_unlock(); 2569 return -EACCES; 2570 } 2571 /* Prevent changes to overridden credentials. */ 2572 if (current_cred() != current_real_cred()) { 2573 rcu_read_unlock(); 2574 return -EBUSY; 2575 } 2576 rcu_read_unlock(); 2577 2578 if (count > PAGE_SIZE) 2579 count = PAGE_SIZE; 2580 2581 /* No partial writes. */ 2582 if (*ppos != 0) 2583 return -EINVAL; 2584 2585 page = memdup_user(buf, count); 2586 if (IS_ERR(page)) { 2587 rv = PTR_ERR(page); 2588 goto out; 2589 } 2590 2591 /* Guard against adverse ptrace interaction */ 2592 rv = mutex_lock_interruptible(¤t->signal->cred_guard_mutex); 2593 if (rv < 0) 2594 goto out_free; 2595 2596 rv = security_setprocattr(PROC_I(inode)->op.lsm, 2597 file->f_path.dentry->d_name.name, page, 2598 count); 2599 mutex_unlock(¤t->signal->cred_guard_mutex); 2600 out_free: 2601 kfree(page); 2602 out: 2603 return rv; 2604 } 2605 2606 static const struct file_operations proc_pid_attr_operations = { 2607 .read = proc_pid_attr_read, 2608 .write = proc_pid_attr_write, 2609 .llseek = generic_file_llseek, 2610 }; 2611 2612 #define LSM_DIR_OPS(LSM) \ 2613 static int proc_##LSM##_attr_dir_iterate(struct file *filp, \ 2614 struct dir_context *ctx) \ 2615 { \ 2616 return proc_pident_readdir(filp, ctx, \ 2617 LSM##_attr_dir_stuff, \ 2618 ARRAY_SIZE(LSM##_attr_dir_stuff)); \ 2619 } \ 2620 \ 2621 static const struct file_operations proc_##LSM##_attr_dir_ops = { \ 2622 .read = generic_read_dir, \ 2623 .iterate = proc_##LSM##_attr_dir_iterate, \ 2624 .llseek = default_llseek, \ 2625 }; \ 2626 \ 2627 static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \ 2628 struct dentry *dentry, unsigned int flags) \ 2629 { \ 2630 return proc_pident_lookup(dir, dentry, \ 2631 LSM##_attr_dir_stuff, \ 2632 LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \ 2633 } \ 2634 \ 2635 static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \ 2636 .lookup = proc_##LSM##_attr_dir_lookup, \ 2637 .getattr = pid_getattr, \ 2638 .setattr = proc_setattr, \ 2639 } 2640 2641 #ifdef CONFIG_SECURITY_SMACK 2642 static const struct pid_entry smack_attr_dir_stuff[] = { 2643 ATTR("smack", "current", 0666), 2644 }; 2645 LSM_DIR_OPS(smack); 2646 #endif 2647 2648 static const struct pid_entry attr_dir_stuff[] = { 2649 ATTR(NULL, "current", 0666), 2650 ATTR(NULL, "prev", 0444), 2651 ATTR(NULL, "exec", 0666), 2652 ATTR(NULL, "fscreate", 0666), 2653 ATTR(NULL, "keycreate", 0666), 2654 ATTR(NULL, "sockcreate", 0666), 2655 #ifdef CONFIG_SECURITY_SMACK 2656 DIR("smack", 0555, 2657 proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops), 2658 #endif 2659 }; 2660 2661 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) 2662 { 2663 return proc_pident_readdir(file, ctx, 2664 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2665 } 2666 2667 static const struct file_operations proc_attr_dir_operations = { 2668 .read = generic_read_dir, 2669 .iterate_shared = proc_attr_dir_readdir, 2670 .llseek = generic_file_llseek, 2671 }; 2672 2673 static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2674 struct dentry *dentry, unsigned int flags) 2675 { 2676 return proc_pident_lookup(dir, dentry, 2677 attr_dir_stuff, 2678 attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff)); 2679 } 2680 2681 static const struct inode_operations proc_attr_dir_inode_operations = { 2682 .lookup = proc_attr_dir_lookup, 2683 .getattr = pid_getattr, 2684 .setattr = proc_setattr, 2685 }; 2686 2687 #endif 2688 2689 #ifdef CONFIG_ELF_CORE 2690 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2691 size_t count, loff_t *ppos) 2692 { 2693 struct task_struct *task = get_proc_task(file_inode(file)); 2694 struct mm_struct *mm; 2695 char buffer[PROC_NUMBUF]; 2696 size_t len; 2697 int ret; 2698 2699 if (!task) 2700 return -ESRCH; 2701 2702 ret = 0; 2703 mm = get_task_mm(task); 2704 if (mm) { 2705 len = snprintf(buffer, sizeof(buffer), "%08lx\n", 2706 ((mm->flags & MMF_DUMP_FILTER_MASK) >> 2707 MMF_DUMP_FILTER_SHIFT)); 2708 mmput(mm); 2709 ret = simple_read_from_buffer(buf, count, ppos, buffer, len); 2710 } 2711 2712 put_task_struct(task); 2713 2714 return ret; 2715 } 2716 2717 static ssize_t proc_coredump_filter_write(struct file *file, 2718 const char __user *buf, 2719 size_t count, 2720 loff_t *ppos) 2721 { 2722 struct task_struct *task; 2723 struct mm_struct *mm; 2724 unsigned int val; 2725 int ret; 2726 int i; 2727 unsigned long mask; 2728 2729 ret = kstrtouint_from_user(buf, count, 0, &val); 2730 if (ret < 0) 2731 return ret; 2732 2733 ret = -ESRCH; 2734 task = get_proc_task(file_inode(file)); 2735 if (!task) 2736 goto out_no_task; 2737 2738 mm = get_task_mm(task); 2739 if (!mm) 2740 goto out_no_mm; 2741 ret = 0; 2742 2743 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { 2744 if (val & mask) 2745 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2746 else 2747 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2748 } 2749 2750 mmput(mm); 2751 out_no_mm: 2752 put_task_struct(task); 2753 out_no_task: 2754 if (ret < 0) 2755 return ret; 2756 return count; 2757 } 2758 2759 static const struct file_operations proc_coredump_filter_operations = { 2760 .read = proc_coredump_filter_read, 2761 .write = proc_coredump_filter_write, 2762 .llseek = generic_file_llseek, 2763 }; 2764 #endif 2765 2766 #ifdef CONFIG_TASK_IO_ACCOUNTING 2767 static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole) 2768 { 2769 struct task_io_accounting acct = task->ioac; 2770 unsigned long flags; 2771 int result; 2772 2773 result = mutex_lock_killable(&task->signal->cred_guard_mutex); 2774 if (result) 2775 return result; 2776 2777 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { 2778 result = -EACCES; 2779 goto out_unlock; 2780 } 2781 2782 if (whole && lock_task_sighand(task, &flags)) { 2783 struct task_struct *t = task; 2784 2785 task_io_accounting_add(&acct, &task->signal->ioac); 2786 while_each_thread(task, t) 2787 task_io_accounting_add(&acct, &t->ioac); 2788 2789 unlock_task_sighand(task, &flags); 2790 } 2791 seq_printf(m, 2792 "rchar: %llu\n" 2793 "wchar: %llu\n" 2794 "syscr: %llu\n" 2795 "syscw: %llu\n" 2796 "read_bytes: %llu\n" 2797 "write_bytes: %llu\n" 2798 "cancelled_write_bytes: %llu\n", 2799 (unsigned long long)acct.rchar, 2800 (unsigned long long)acct.wchar, 2801 (unsigned long long)acct.syscr, 2802 (unsigned long long)acct.syscw, 2803 (unsigned long long)acct.read_bytes, 2804 (unsigned long long)acct.write_bytes, 2805 (unsigned long long)acct.cancelled_write_bytes); 2806 result = 0; 2807 2808 out_unlock: 2809 mutex_unlock(&task->signal->cred_guard_mutex); 2810 return result; 2811 } 2812 2813 static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns, 2814 struct pid *pid, struct task_struct *task) 2815 { 2816 return do_io_accounting(task, m, 0); 2817 } 2818 2819 static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns, 2820 struct pid *pid, struct task_struct *task) 2821 { 2822 return do_io_accounting(task, m, 1); 2823 } 2824 #endif /* CONFIG_TASK_IO_ACCOUNTING */ 2825 2826 #ifdef CONFIG_USER_NS 2827 static int proc_id_map_open(struct inode *inode, struct file *file, 2828 const struct seq_operations *seq_ops) 2829 { 2830 struct user_namespace *ns = NULL; 2831 struct task_struct *task; 2832 struct seq_file *seq; 2833 int ret = -EINVAL; 2834 2835 task = get_proc_task(inode); 2836 if (task) { 2837 rcu_read_lock(); 2838 ns = get_user_ns(task_cred_xxx(task, user_ns)); 2839 rcu_read_unlock(); 2840 put_task_struct(task); 2841 } 2842 if (!ns) 2843 goto err; 2844 2845 ret = seq_open(file, seq_ops); 2846 if (ret) 2847 goto err_put_ns; 2848 2849 seq = file->private_data; 2850 seq->private = ns; 2851 2852 return 0; 2853 err_put_ns: 2854 put_user_ns(ns); 2855 err: 2856 return ret; 2857 } 2858 2859 static int proc_id_map_release(struct inode *inode, struct file *file) 2860 { 2861 struct seq_file *seq = file->private_data; 2862 struct user_namespace *ns = seq->private; 2863 put_user_ns(ns); 2864 return seq_release(inode, file); 2865 } 2866 2867 static int proc_uid_map_open(struct inode *inode, struct file *file) 2868 { 2869 return proc_id_map_open(inode, file, &proc_uid_seq_operations); 2870 } 2871 2872 static int proc_gid_map_open(struct inode *inode, struct file *file) 2873 { 2874 return proc_id_map_open(inode, file, &proc_gid_seq_operations); 2875 } 2876 2877 static int proc_projid_map_open(struct inode *inode, struct file *file) 2878 { 2879 return proc_id_map_open(inode, file, &proc_projid_seq_operations); 2880 } 2881 2882 static const struct file_operations proc_uid_map_operations = { 2883 .open = proc_uid_map_open, 2884 .write = proc_uid_map_write, 2885 .read = seq_read, 2886 .llseek = seq_lseek, 2887 .release = proc_id_map_release, 2888 }; 2889 2890 static const struct file_operations proc_gid_map_operations = { 2891 .open = proc_gid_map_open, 2892 .write = proc_gid_map_write, 2893 .read = seq_read, 2894 .llseek = seq_lseek, 2895 .release = proc_id_map_release, 2896 }; 2897 2898 static const struct file_operations proc_projid_map_operations = { 2899 .open = proc_projid_map_open, 2900 .write = proc_projid_map_write, 2901 .read = seq_read, 2902 .llseek = seq_lseek, 2903 .release = proc_id_map_release, 2904 }; 2905 2906 static int proc_setgroups_open(struct inode *inode, struct file *file) 2907 { 2908 struct user_namespace *ns = NULL; 2909 struct task_struct *task; 2910 int ret; 2911 2912 ret = -ESRCH; 2913 task = get_proc_task(inode); 2914 if (task) { 2915 rcu_read_lock(); 2916 ns = get_user_ns(task_cred_xxx(task, user_ns)); 2917 rcu_read_unlock(); 2918 put_task_struct(task); 2919 } 2920 if (!ns) 2921 goto err; 2922 2923 if (file->f_mode & FMODE_WRITE) { 2924 ret = -EACCES; 2925 if (!ns_capable(ns, CAP_SYS_ADMIN)) 2926 goto err_put_ns; 2927 } 2928 2929 ret = single_open(file, &proc_setgroups_show, ns); 2930 if (ret) 2931 goto err_put_ns; 2932 2933 return 0; 2934 err_put_ns: 2935 put_user_ns(ns); 2936 err: 2937 return ret; 2938 } 2939 2940 static int proc_setgroups_release(struct inode *inode, struct file *file) 2941 { 2942 struct seq_file *seq = file->private_data; 2943 struct user_namespace *ns = seq->private; 2944 int ret = single_release(inode, file); 2945 put_user_ns(ns); 2946 return ret; 2947 } 2948 2949 static const struct file_operations proc_setgroups_operations = { 2950 .open = proc_setgroups_open, 2951 .write = proc_setgroups_write, 2952 .read = seq_read, 2953 .llseek = seq_lseek, 2954 .release = proc_setgroups_release, 2955 }; 2956 #endif /* CONFIG_USER_NS */ 2957 2958 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2959 struct pid *pid, struct task_struct *task) 2960 { 2961 int err = lock_trace(task); 2962 if (!err) { 2963 seq_printf(m, "%08x\n", task->personality); 2964 unlock_trace(task); 2965 } 2966 return err; 2967 } 2968 2969 #ifdef CONFIG_LIVEPATCH 2970 static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns, 2971 struct pid *pid, struct task_struct *task) 2972 { 2973 seq_printf(m, "%d\n", task->patch_state); 2974 return 0; 2975 } 2976 #endif /* CONFIG_LIVEPATCH */ 2977 2978 #ifdef CONFIG_STACKLEAK_METRICS 2979 static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, 2980 struct pid *pid, struct task_struct *task) 2981 { 2982 unsigned long prev_depth = THREAD_SIZE - 2983 (task->prev_lowest_stack & (THREAD_SIZE - 1)); 2984 unsigned long depth = THREAD_SIZE - 2985 (task->lowest_stack & (THREAD_SIZE - 1)); 2986 2987 seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n", 2988 prev_depth, depth); 2989 return 0; 2990 } 2991 #endif /* CONFIG_STACKLEAK_METRICS */ 2992 2993 /* 2994 * Thread groups 2995 */ 2996 static const struct file_operations proc_task_operations; 2997 static const struct inode_operations proc_task_inode_operations; 2998 2999 static const struct pid_entry tgid_base_stuff[] = { 3000 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 3001 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3002 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), 3003 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3004 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3005 #ifdef CONFIG_NET 3006 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 3007 #endif 3008 REG("environ", S_IRUSR, proc_environ_operations), 3009 REG("auxv", S_IRUSR, proc_auxv_operations), 3010 ONE("status", S_IRUGO, proc_pid_status), 3011 ONE("personality", S_IRUSR, proc_pid_personality), 3012 ONE("limits", S_IRUGO, proc_pid_limits), 3013 #ifdef CONFIG_SCHED_DEBUG 3014 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3015 #endif 3016 #ifdef CONFIG_SCHED_AUTOGROUP 3017 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), 3018 #endif 3019 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 3020 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3021 ONE("syscall", S_IRUSR, proc_pid_syscall), 3022 #endif 3023 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), 3024 ONE("stat", S_IRUGO, proc_tgid_stat), 3025 ONE("statm", S_IRUGO, proc_pid_statm), 3026 REG("maps", S_IRUGO, proc_pid_maps_operations), 3027 #ifdef CONFIG_NUMA 3028 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), 3029 #endif 3030 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3031 LNK("cwd", proc_cwd_link), 3032 LNK("root", proc_root_link), 3033 LNK("exe", proc_exe_link), 3034 REG("mounts", S_IRUGO, proc_mounts_operations), 3035 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3036 REG("mountstats", S_IRUSR, proc_mountstats_operations), 3037 #ifdef CONFIG_PROC_PAGE_MONITOR 3038 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3039 REG("smaps", S_IRUGO, proc_pid_smaps_operations), 3040 REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), 3041 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3042 #endif 3043 #ifdef CONFIG_SECURITY 3044 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3045 #endif 3046 #ifdef CONFIG_KALLSYMS 3047 ONE("wchan", S_IRUGO, proc_pid_wchan), 3048 #endif 3049 #ifdef CONFIG_STACKTRACE 3050 ONE("stack", S_IRUSR, proc_pid_stack), 3051 #endif 3052 #ifdef CONFIG_SCHED_INFO 3053 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 3054 #endif 3055 #ifdef CONFIG_LATENCYTOP 3056 REG("latency", S_IRUGO, proc_lstats_operations), 3057 #endif 3058 #ifdef CONFIG_PROC_PID_CPUSET 3059 ONE("cpuset", S_IRUGO, proc_cpuset_show), 3060 #endif 3061 #ifdef CONFIG_CGROUPS 3062 ONE("cgroup", S_IRUGO, proc_cgroup_show), 3063 #endif 3064 ONE("oom_score", S_IRUGO, proc_oom_score), 3065 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 3066 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3067 #ifdef CONFIG_AUDIT 3068 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3069 REG("sessionid", S_IRUGO, proc_sessionid_operations), 3070 #endif 3071 #ifdef CONFIG_FAULT_INJECTION 3072 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3073 REG("fail-nth", 0644, proc_fail_nth_operations), 3074 #endif 3075 #ifdef CONFIG_ELF_CORE 3076 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 3077 #endif 3078 #ifdef CONFIG_TASK_IO_ACCOUNTING 3079 ONE("io", S_IRUSR, proc_tgid_io_accounting), 3080 #endif 3081 #ifdef CONFIG_USER_NS 3082 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 3083 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 3084 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 3085 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), 3086 #endif 3087 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) 3088 REG("timers", S_IRUGO, proc_timers_operations), 3089 #endif 3090 REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), 3091 #ifdef CONFIG_LIVEPATCH 3092 ONE("patch_state", S_IRUSR, proc_pid_patch_state), 3093 #endif 3094 #ifdef CONFIG_STACKLEAK_METRICS 3095 ONE("stack_depth", S_IRUGO, proc_stack_depth), 3096 #endif 3097 #ifdef CONFIG_PROC_PID_ARCH_STATUS 3098 ONE("arch_status", S_IRUGO, proc_pid_arch_status), 3099 #endif 3100 }; 3101 3102 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) 3103 { 3104 return proc_pident_readdir(file, ctx, 3105 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3106 } 3107 3108 static const struct file_operations proc_tgid_base_operations = { 3109 .read = generic_read_dir, 3110 .iterate_shared = proc_tgid_base_readdir, 3111 .llseek = generic_file_llseek, 3112 }; 3113 3114 struct pid *tgid_pidfd_to_pid(const struct file *file) 3115 { 3116 if (file->f_op != &proc_tgid_base_operations) 3117 return ERR_PTR(-EBADF); 3118 3119 return proc_pid(file_inode(file)); 3120 } 3121 3122 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 3123 { 3124 return proc_pident_lookup(dir, dentry, 3125 tgid_base_stuff, 3126 tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff)); 3127 } 3128 3129 static const struct inode_operations proc_tgid_base_inode_operations = { 3130 .lookup = proc_tgid_base_lookup, 3131 .getattr = pid_getattr, 3132 .setattr = proc_setattr, 3133 .permission = proc_pid_permission, 3134 }; 3135 3136 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 3137 { 3138 struct dentry *dentry, *leader, *dir; 3139 char buf[10 + 1]; 3140 struct qstr name; 3141 3142 name.name = buf; 3143 name.len = snprintf(buf, sizeof(buf), "%u", pid); 3144 /* no ->d_hash() rejects on procfs */ 3145 dentry = d_hash_and_lookup(mnt->mnt_root, &name); 3146 if (dentry) { 3147 d_invalidate(dentry); 3148 dput(dentry); 3149 } 3150 3151 if (pid == tgid) 3152 return; 3153 3154 name.name = buf; 3155 name.len = snprintf(buf, sizeof(buf), "%u", tgid); 3156 leader = d_hash_and_lookup(mnt->mnt_root, &name); 3157 if (!leader) 3158 goto out; 3159 3160 name.name = "task"; 3161 name.len = strlen(name.name); 3162 dir = d_hash_and_lookup(leader, &name); 3163 if (!dir) 3164 goto out_put_leader; 3165 3166 name.name = buf; 3167 name.len = snprintf(buf, sizeof(buf), "%u", pid); 3168 dentry = d_hash_and_lookup(dir, &name); 3169 if (dentry) { 3170 d_invalidate(dentry); 3171 dput(dentry); 3172 } 3173 3174 dput(dir); 3175 out_put_leader: 3176 dput(leader); 3177 out: 3178 return; 3179 } 3180 3181 /** 3182 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 3183 * @task: task that should be flushed. 3184 * 3185 * When flushing dentries from proc, one needs to flush them from global 3186 * proc (proc_mnt) and from all the namespaces' procs this task was seen 3187 * in. This call is supposed to do all of this job. 3188 * 3189 * Looks in the dcache for 3190 * /proc/@pid 3191 * /proc/@tgid/task/@pid 3192 * if either directory is present flushes it and all of it'ts children 3193 * from the dcache. 3194 * 3195 * It is safe and reasonable to cache /proc entries for a task until 3196 * that task exits. After that they just clog up the dcache with 3197 * useless entries, possibly causing useful dcache entries to be 3198 * flushed instead. This routine is proved to flush those useless 3199 * dcache entries at process exit time. 3200 * 3201 * NOTE: This routine is just an optimization so it does not guarantee 3202 * that no dcache entries will exist at process exit time it 3203 * just makes it very unlikely that any will persist. 3204 */ 3205 3206 void proc_flush_task(struct task_struct *task) 3207 { 3208 int i; 3209 struct pid *pid, *tgid; 3210 struct upid *upid; 3211 3212 pid = task_pid(task); 3213 tgid = task_tgid(task); 3214 3215 for (i = 0; i <= pid->level; i++) { 3216 upid = &pid->numbers[i]; 3217 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 3218 tgid->numbers[i].nr); 3219 } 3220 } 3221 3222 static struct dentry *proc_pid_instantiate(struct dentry * dentry, 3223 struct task_struct *task, const void *ptr) 3224 { 3225 struct inode *inode; 3226 3227 inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); 3228 if (!inode) 3229 return ERR_PTR(-ENOENT); 3230 3231 inode->i_op = &proc_tgid_base_inode_operations; 3232 inode->i_fop = &proc_tgid_base_operations; 3233 inode->i_flags|=S_IMMUTABLE; 3234 3235 set_nlink(inode, nlink_tgid); 3236 pid_update_inode(task, inode); 3237 3238 d_set_d_op(dentry, &pid_dentry_operations); 3239 return d_splice_alias(inode, dentry); 3240 } 3241 3242 struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags) 3243 { 3244 struct task_struct *task; 3245 unsigned tgid; 3246 struct pid_namespace *ns; 3247 struct dentry *result = ERR_PTR(-ENOENT); 3248 3249 tgid = name_to_int(&dentry->d_name); 3250 if (tgid == ~0U) 3251 goto out; 3252 3253 ns = dentry->d_sb->s_fs_info; 3254 rcu_read_lock(); 3255 task = find_task_by_pid_ns(tgid, ns); 3256 if (task) 3257 get_task_struct(task); 3258 rcu_read_unlock(); 3259 if (!task) 3260 goto out; 3261 3262 result = proc_pid_instantiate(dentry, task, NULL); 3263 put_task_struct(task); 3264 out: 3265 return result; 3266 } 3267 3268 /* 3269 * Find the first task with tgid >= tgid 3270 * 3271 */ 3272 struct tgid_iter { 3273 unsigned int tgid; 3274 struct task_struct *task; 3275 }; 3276 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) 3277 { 3278 struct pid *pid; 3279 3280 if (iter.task) 3281 put_task_struct(iter.task); 3282 rcu_read_lock(); 3283 retry: 3284 iter.task = NULL; 3285 pid = find_ge_pid(iter.tgid, ns); 3286 if (pid) { 3287 iter.tgid = pid_nr_ns(pid, ns); 3288 iter.task = pid_task(pid, PIDTYPE_PID); 3289 /* What we to know is if the pid we have find is the 3290 * pid of a thread_group_leader. Testing for task 3291 * being a thread_group_leader is the obvious thing 3292 * todo but there is a window when it fails, due to 3293 * the pid transfer logic in de_thread. 3294 * 3295 * So we perform the straight forward test of seeing 3296 * if the pid we have found is the pid of a thread 3297 * group leader, and don't worry if the task we have 3298 * found doesn't happen to be a thread group leader. 3299 * As we don't care in the case of readdir. 3300 */ 3301 if (!iter.task || !has_group_leader_pid(iter.task)) { 3302 iter.tgid += 1; 3303 goto retry; 3304 } 3305 get_task_struct(iter.task); 3306 } 3307 rcu_read_unlock(); 3308 return iter; 3309 } 3310 3311 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) 3312 3313 /* for the /proc/ directory itself, after non-process stuff has been done */ 3314 int proc_pid_readdir(struct file *file, struct dir_context *ctx) 3315 { 3316 struct tgid_iter iter; 3317 struct pid_namespace *ns = proc_pid_ns(file_inode(file)); 3318 loff_t pos = ctx->pos; 3319 3320 if (pos >= PID_MAX_LIMIT + TGID_OFFSET) 3321 return 0; 3322 3323 if (pos == TGID_OFFSET - 2) { 3324 struct inode *inode = d_inode(ns->proc_self); 3325 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) 3326 return 0; 3327 ctx->pos = pos = pos + 1; 3328 } 3329 if (pos == TGID_OFFSET - 1) { 3330 struct inode *inode = d_inode(ns->proc_thread_self); 3331 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) 3332 return 0; 3333 ctx->pos = pos = pos + 1; 3334 } 3335 iter.tgid = pos - TGID_OFFSET; 3336 iter.task = NULL; 3337 for (iter = next_tgid(ns, iter); 3338 iter.task; 3339 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3340 char name[10 + 1]; 3341 unsigned int len; 3342 3343 cond_resched(); 3344 if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) 3345 continue; 3346 3347 len = snprintf(name, sizeof(name), "%u", iter.tgid); 3348 ctx->pos = iter.tgid + TGID_OFFSET; 3349 if (!proc_fill_cache(file, ctx, name, len, 3350 proc_pid_instantiate, iter.task, NULL)) { 3351 put_task_struct(iter.task); 3352 return 0; 3353 } 3354 } 3355 ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; 3356 return 0; 3357 } 3358 3359 /* 3360 * proc_tid_comm_permission is a special permission function exclusively 3361 * used for the node /proc/<pid>/task/<tid>/comm. 3362 * It bypasses generic permission checks in the case where a task of the same 3363 * task group attempts to access the node. 3364 * The rationale behind this is that glibc and bionic access this node for 3365 * cross thread naming (pthread_set/getname_np(!self)). However, if 3366 * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0, 3367 * which locks out the cross thread naming implementation. 3368 * This function makes sure that the node is always accessible for members of 3369 * same thread group. 3370 */ 3371 static int proc_tid_comm_permission(struct inode *inode, int mask) 3372 { 3373 bool is_same_tgroup; 3374 struct task_struct *task; 3375 3376 task = get_proc_task(inode); 3377 if (!task) 3378 return -ESRCH; 3379 is_same_tgroup = same_thread_group(current, task); 3380 put_task_struct(task); 3381 3382 if (likely(is_same_tgroup && !(mask & MAY_EXEC))) { 3383 /* This file (/proc/<pid>/task/<tid>/comm) can always be 3384 * read or written by the members of the corresponding 3385 * thread group. 3386 */ 3387 return 0; 3388 } 3389 3390 return generic_permission(inode, mask); 3391 } 3392 3393 static const struct inode_operations proc_tid_comm_inode_operations = { 3394 .permission = proc_tid_comm_permission, 3395 }; 3396 3397 /* 3398 * Tasks 3399 */ 3400 static const struct pid_entry tid_base_stuff[] = { 3401 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3402 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3403 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3404 #ifdef CONFIG_NET 3405 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 3406 #endif 3407 REG("environ", S_IRUSR, proc_environ_operations), 3408 REG("auxv", S_IRUSR, proc_auxv_operations), 3409 ONE("status", S_IRUGO, proc_pid_status), 3410 ONE("personality", S_IRUSR, proc_pid_personality), 3411 ONE("limits", S_IRUGO, proc_pid_limits), 3412 #ifdef CONFIG_SCHED_DEBUG 3413 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3414 #endif 3415 NOD("comm", S_IFREG|S_IRUGO|S_IWUSR, 3416 &proc_tid_comm_inode_operations, 3417 &proc_pid_set_comm_operations, {}), 3418 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3419 ONE("syscall", S_IRUSR, proc_pid_syscall), 3420 #endif 3421 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), 3422 ONE("stat", S_IRUGO, proc_tid_stat), 3423 ONE("statm", S_IRUGO, proc_pid_statm), 3424 REG("maps", S_IRUGO, proc_pid_maps_operations), 3425 #ifdef CONFIG_PROC_CHILDREN 3426 REG("children", S_IRUGO, proc_tid_children_operations), 3427 #endif 3428 #ifdef CONFIG_NUMA 3429 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), 3430 #endif 3431 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3432 LNK("cwd", proc_cwd_link), 3433 LNK("root", proc_root_link), 3434 LNK("exe", proc_exe_link), 3435 REG("mounts", S_IRUGO, proc_mounts_operations), 3436 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3437 #ifdef CONFIG_PROC_PAGE_MONITOR 3438 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3439 REG("smaps", S_IRUGO, proc_pid_smaps_operations), 3440 REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), 3441 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3442 #endif 3443 #ifdef CONFIG_SECURITY 3444 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3445 #endif 3446 #ifdef CONFIG_KALLSYMS 3447 ONE("wchan", S_IRUGO, proc_pid_wchan), 3448 #endif 3449 #ifdef CONFIG_STACKTRACE 3450 ONE("stack", S_IRUSR, proc_pid_stack), 3451 #endif 3452 #ifdef CONFIG_SCHED_INFO 3453 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 3454 #endif 3455 #ifdef CONFIG_LATENCYTOP 3456 REG("latency", S_IRUGO, proc_lstats_operations), 3457 #endif 3458 #ifdef CONFIG_PROC_PID_CPUSET 3459 ONE("cpuset", S_IRUGO, proc_cpuset_show), 3460 #endif 3461 #ifdef CONFIG_CGROUPS 3462 ONE("cgroup", S_IRUGO, proc_cgroup_show), 3463 #endif 3464 ONE("oom_score", S_IRUGO, proc_oom_score), 3465 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 3466 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3467 #ifdef CONFIG_AUDIT 3468 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3469 REG("sessionid", S_IRUGO, proc_sessionid_operations), 3470 #endif 3471 #ifdef CONFIG_FAULT_INJECTION 3472 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3473 REG("fail-nth", 0644, proc_fail_nth_operations), 3474 #endif 3475 #ifdef CONFIG_TASK_IO_ACCOUNTING 3476 ONE("io", S_IRUSR, proc_tid_io_accounting), 3477 #endif 3478 #ifdef CONFIG_USER_NS 3479 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 3480 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 3481 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 3482 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), 3483 #endif 3484 #ifdef CONFIG_LIVEPATCH 3485 ONE("patch_state", S_IRUSR, proc_pid_patch_state), 3486 #endif 3487 #ifdef CONFIG_PROC_PID_ARCH_STATUS 3488 ONE("arch_status", S_IRUGO, proc_pid_arch_status), 3489 #endif 3490 }; 3491 3492 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) 3493 { 3494 return proc_pident_readdir(file, ctx, 3495 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3496 } 3497 3498 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 3499 { 3500 return proc_pident_lookup(dir, dentry, 3501 tid_base_stuff, 3502 tid_base_stuff + ARRAY_SIZE(tid_base_stuff)); 3503 } 3504 3505 static const struct file_operations proc_tid_base_operations = { 3506 .read = generic_read_dir, 3507 .iterate_shared = proc_tid_base_readdir, 3508 .llseek = generic_file_llseek, 3509 }; 3510 3511 static const struct inode_operations proc_tid_base_inode_operations = { 3512 .lookup = proc_tid_base_lookup, 3513 .getattr = pid_getattr, 3514 .setattr = proc_setattr, 3515 }; 3516 3517 static struct dentry *proc_task_instantiate(struct dentry *dentry, 3518 struct task_struct *task, const void *ptr) 3519 { 3520 struct inode *inode; 3521 inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); 3522 if (!inode) 3523 return ERR_PTR(-ENOENT); 3524 3525 inode->i_op = &proc_tid_base_inode_operations; 3526 inode->i_fop = &proc_tid_base_operations; 3527 inode->i_flags |= S_IMMUTABLE; 3528 3529 set_nlink(inode, nlink_tid); 3530 pid_update_inode(task, inode); 3531 3532 d_set_d_op(dentry, &pid_dentry_operations); 3533 return d_splice_alias(inode, dentry); 3534 } 3535 3536 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 3537 { 3538 struct task_struct *task; 3539 struct task_struct *leader = get_proc_task(dir); 3540 unsigned tid; 3541 struct pid_namespace *ns; 3542 struct dentry *result = ERR_PTR(-ENOENT); 3543 3544 if (!leader) 3545 goto out_no_task; 3546 3547 tid = name_to_int(&dentry->d_name); 3548 if (tid == ~0U) 3549 goto out; 3550 3551 ns = dentry->d_sb->s_fs_info; 3552 rcu_read_lock(); 3553 task = find_task_by_pid_ns(tid, ns); 3554 if (task) 3555 get_task_struct(task); 3556 rcu_read_unlock(); 3557 if (!task) 3558 goto out; 3559 if (!same_thread_group(leader, task)) 3560 goto out_drop_task; 3561 3562 result = proc_task_instantiate(dentry, task, NULL); 3563 out_drop_task: 3564 put_task_struct(task); 3565 out: 3566 put_task_struct(leader); 3567 out_no_task: 3568 return result; 3569 } 3570 3571 /* 3572 * Find the first tid of a thread group to return to user space. 3573 * 3574 * Usually this is just the thread group leader, but if the users 3575 * buffer was too small or there was a seek into the middle of the 3576 * directory we have more work todo. 3577 * 3578 * In the case of a short read we start with find_task_by_pid. 3579 * 3580 * In the case of a seek we start with the leader and walk nr 3581 * threads past it. 3582 */ 3583 static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, 3584 struct pid_namespace *ns) 3585 { 3586 struct task_struct *pos, *task; 3587 unsigned long nr = f_pos; 3588 3589 if (nr != f_pos) /* 32bit overflow? */ 3590 return NULL; 3591 3592 rcu_read_lock(); 3593 task = pid_task(pid, PIDTYPE_PID); 3594 if (!task) 3595 goto fail; 3596 3597 /* Attempt to start with the tid of a thread */ 3598 if (tid && nr) { 3599 pos = find_task_by_pid_ns(tid, ns); 3600 if (pos && same_thread_group(pos, task)) 3601 goto found; 3602 } 3603 3604 /* If nr exceeds the number of threads there is nothing todo */ 3605 if (nr >= get_nr_threads(task)) 3606 goto fail; 3607 3608 /* If we haven't found our starting place yet start 3609 * with the leader and walk nr threads forward. 3610 */ 3611 pos = task = task->group_leader; 3612 do { 3613 if (!nr--) 3614 goto found; 3615 } while_each_thread(task, pos); 3616 fail: 3617 pos = NULL; 3618 goto out; 3619 found: 3620 get_task_struct(pos); 3621 out: 3622 rcu_read_unlock(); 3623 return pos; 3624 } 3625 3626 /* 3627 * Find the next thread in the thread list. 3628 * Return NULL if there is an error or no next thread. 3629 * 3630 * The reference to the input task_struct is released. 3631 */ 3632 static struct task_struct *next_tid(struct task_struct *start) 3633 { 3634 struct task_struct *pos = NULL; 3635 rcu_read_lock(); 3636 if (pid_alive(start)) { 3637 pos = next_thread(start); 3638 if (thread_group_leader(pos)) 3639 pos = NULL; 3640 else 3641 get_task_struct(pos); 3642 } 3643 rcu_read_unlock(); 3644 put_task_struct(start); 3645 return pos; 3646 } 3647 3648 /* for the /proc/TGID/task/ directories */ 3649 static int proc_task_readdir(struct file *file, struct dir_context *ctx) 3650 { 3651 struct inode *inode = file_inode(file); 3652 struct task_struct *task; 3653 struct pid_namespace *ns; 3654 int tid; 3655 3656 if (proc_inode_is_dead(inode)) 3657 return -ENOENT; 3658 3659 if (!dir_emit_dots(file, ctx)) 3660 return 0; 3661 3662 /* f_version caches the tgid value that the last readdir call couldn't 3663 * return. lseek aka telldir automagically resets f_version to 0. 3664 */ 3665 ns = proc_pid_ns(inode); 3666 tid = (int)file->f_version; 3667 file->f_version = 0; 3668 for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); 3669 task; 3670 task = next_tid(task), ctx->pos++) { 3671 char name[10 + 1]; 3672 unsigned int len; 3673 tid = task_pid_nr_ns(task, ns); 3674 len = snprintf(name, sizeof(name), "%u", tid); 3675 if (!proc_fill_cache(file, ctx, name, len, 3676 proc_task_instantiate, task, NULL)) { 3677 /* returning this tgid failed, save it as the first 3678 * pid for the next readir call */ 3679 file->f_version = (u64)tid; 3680 put_task_struct(task); 3681 break; 3682 } 3683 } 3684 3685 return 0; 3686 } 3687 3688 static int proc_task_getattr(const struct path *path, struct kstat *stat, 3689 u32 request_mask, unsigned int query_flags) 3690 { 3691 struct inode *inode = d_inode(path->dentry); 3692 struct task_struct *p = get_proc_task(inode); 3693 generic_fillattr(inode, stat); 3694 3695 if (p) { 3696 stat->nlink += get_nr_threads(p); 3697 put_task_struct(p); 3698 } 3699 3700 return 0; 3701 } 3702 3703 static const struct inode_operations proc_task_inode_operations = { 3704 .lookup = proc_task_lookup, 3705 .getattr = proc_task_getattr, 3706 .setattr = proc_setattr, 3707 .permission = proc_pid_permission, 3708 }; 3709 3710 static const struct file_operations proc_task_operations = { 3711 .read = generic_read_dir, 3712 .iterate_shared = proc_task_readdir, 3713 .llseek = generic_file_llseek, 3714 }; 3715 3716 void __init set_proc_pid_nlink(void) 3717 { 3718 nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3719 nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3720 } 3721