1 /* 2 * linux/fs/proc/base.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * proc base directory handling functions 7 * 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 9 * Instead of using magical inumbers to determine the kind of object 10 * we allocate and fill in-core inodes upon lookup. They don't even 11 * go into icache. We cache the reference to task_struct upon lookup too. 12 * Eventually it should become a filesystem in its own. We don't use the 13 * rest of procfs anymore. 14 * 15 * 16 * Changelog: 17 * 17-Jan-2005 18 * Allan Bezerra 19 * Bruna Moreira <bruna.moreira@indt.org.br> 20 * Edjard Mota <edjard.mota@indt.org.br> 21 * Ilias Biris <ilias.biris@indt.org.br> 22 * Mauricio Lin <mauricio.lin@indt.org.br> 23 * 24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 25 * 26 * A new process specific entry (smaps) included in /proc. It shows the 27 * size of rss for each memory area. The maps entry lacks information 28 * about physical memory size (rss) for each mapped file, i.e., 29 * rss information for executables and library files. 30 * This additional information is useful for any tools that need to know 31 * about physical memory consumption for a process specific library. 32 * 33 * Changelog: 34 * 21-Feb-2005 35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 36 * Pud inclusion in the page table walking. 37 * 38 * ChangeLog: 39 * 10-Mar-2005 40 * 10LE Instituto Nokia de Tecnologia - INdT: 41 * A better way to walks through the page table as suggested by Hugh Dickins. 42 * 43 * Simo Piiroinen <simo.piiroinen@nokia.com>: 44 * Smaps information related to shared, private, clean and dirty pages. 45 * 46 * Paul Mundt <paul.mundt@nokia.com>: 47 * Overall revision about smaps. 48 */ 49 50 #include <asm/uaccess.h> 51 52 #include <linux/errno.h> 53 #include <linux/time.h> 54 #include <linux/proc_fs.h> 55 #include <linux/stat.h> 56 #include <linux/task_io_accounting_ops.h> 57 #include <linux/init.h> 58 #include <linux/capability.h> 59 #include <linux/file.h> 60 #include <linux/fdtable.h> 61 #include <linux/string.h> 62 #include <linux/seq_file.h> 63 #include <linux/namei.h> 64 #include <linux/mnt_namespace.h> 65 #include <linux/mm.h> 66 #include <linux/rcupdate.h> 67 #include <linux/kallsyms.h> 68 #include <linux/resource.h> 69 #include <linux/module.h> 70 #include <linux/mount.h> 71 #include <linux/security.h> 72 #include <linux/ptrace.h> 73 #include <linux/tracehook.h> 74 #include <linux/cgroup.h> 75 #include <linux/cpuset.h> 76 #include <linux/audit.h> 77 #include <linux/poll.h> 78 #include <linux/nsproxy.h> 79 #include <linux/oom.h> 80 #include <linux/elf.h> 81 #include <linux/pid_namespace.h> 82 #include "internal.h" 83 84 /* NOTE: 85 * Implementing inode permission operations in /proc is almost 86 * certainly an error. Permission checks need to happen during 87 * each system call not at open time. The reason is that most of 88 * what we wish to check for permissions in /proc varies at runtime. 89 * 90 * The classic example of a problem is opening file descriptors 91 * in /proc for a task before it execs a suid executable. 92 */ 93 94 struct pid_entry { 95 char *name; 96 int len; 97 mode_t mode; 98 const struct inode_operations *iop; 99 const struct file_operations *fop; 100 union proc_op op; 101 }; 102 103 #define NOD(NAME, MODE, IOP, FOP, OP) { \ 104 .name = (NAME), \ 105 .len = sizeof(NAME) - 1, \ 106 .mode = MODE, \ 107 .iop = IOP, \ 108 .fop = FOP, \ 109 .op = OP, \ 110 } 111 112 #define DIR(NAME, MODE, iops, fops) \ 113 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) 114 #define LNK(NAME, get_link) \ 115 NOD(NAME, (S_IFLNK|S_IRWXUGO), \ 116 &proc_pid_link_inode_operations, NULL, \ 117 { .proc_get_link = get_link } ) 118 #define REG(NAME, MODE, fops) \ 119 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 120 #define INF(NAME, MODE, read) \ 121 NOD(NAME, (S_IFREG|(MODE)), \ 122 NULL, &proc_info_file_operations, \ 123 { .proc_read = read } ) 124 #define ONE(NAME, MODE, show) \ 125 NOD(NAME, (S_IFREG|(MODE)), \ 126 NULL, &proc_single_file_operations, \ 127 { .proc_show = show } ) 128 129 /* 130 * Count the number of hardlinks for the pid_entry table, excluding the . 131 * and .. links. 132 */ 133 static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, 134 unsigned int n) 135 { 136 unsigned int i; 137 unsigned int count; 138 139 count = 0; 140 for (i = 0; i < n; ++i) { 141 if (S_ISDIR(entries[i].mode)) 142 ++count; 143 } 144 145 return count; 146 } 147 148 static struct fs_struct *get_fs_struct(struct task_struct *task) 149 { 150 struct fs_struct *fs; 151 task_lock(task); 152 fs = task->fs; 153 if(fs) 154 atomic_inc(&fs->count); 155 task_unlock(task); 156 return fs; 157 } 158 159 static int get_nr_threads(struct task_struct *tsk) 160 { 161 unsigned long flags; 162 int count = 0; 163 164 if (lock_task_sighand(tsk, &flags)) { 165 count = atomic_read(&tsk->signal->count); 166 unlock_task_sighand(tsk, &flags); 167 } 168 return count; 169 } 170 171 static int proc_cwd_link(struct inode *inode, struct path *path) 172 { 173 struct task_struct *task = get_proc_task(inode); 174 struct fs_struct *fs = NULL; 175 int result = -ENOENT; 176 177 if (task) { 178 fs = get_fs_struct(task); 179 put_task_struct(task); 180 } 181 if (fs) { 182 read_lock(&fs->lock); 183 *path = fs->pwd; 184 path_get(&fs->pwd); 185 read_unlock(&fs->lock); 186 result = 0; 187 put_fs_struct(fs); 188 } 189 return result; 190 } 191 192 static int proc_root_link(struct inode *inode, struct path *path) 193 { 194 struct task_struct *task = get_proc_task(inode); 195 struct fs_struct *fs = NULL; 196 int result = -ENOENT; 197 198 if (task) { 199 fs = get_fs_struct(task); 200 put_task_struct(task); 201 } 202 if (fs) { 203 read_lock(&fs->lock); 204 *path = fs->root; 205 path_get(&fs->root); 206 read_unlock(&fs->lock); 207 result = 0; 208 put_fs_struct(fs); 209 } 210 return result; 211 } 212 213 /* 214 * Return zero if current may access user memory in @task, -error if not. 215 */ 216 static int check_mem_permission(struct task_struct *task) 217 { 218 /* 219 * A task can always look at itself, in case it chooses 220 * to use system calls instead of load instructions. 221 */ 222 if (task == current) 223 return 0; 224 225 /* 226 * If current is actively ptrace'ing, and would also be 227 * permitted to freshly attach with ptrace now, permit it. 228 */ 229 if (task_is_stopped_or_traced(task)) { 230 int match; 231 rcu_read_lock(); 232 match = (tracehook_tracer_task(task) == current); 233 rcu_read_unlock(); 234 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) 235 return 0; 236 } 237 238 /* 239 * Noone else is allowed. 240 */ 241 return -EPERM; 242 } 243 244 struct mm_struct *mm_for_maps(struct task_struct *task) 245 { 246 struct mm_struct *mm = get_task_mm(task); 247 if (!mm) 248 return NULL; 249 down_read(&mm->mmap_sem); 250 task_lock(task); 251 if (task->mm != mm) 252 goto out; 253 if (task->mm != current->mm && 254 __ptrace_may_access(task, PTRACE_MODE_READ) < 0) 255 goto out; 256 task_unlock(task); 257 return mm; 258 out: 259 task_unlock(task); 260 up_read(&mm->mmap_sem); 261 mmput(mm); 262 return NULL; 263 } 264 265 static int proc_pid_cmdline(struct task_struct *task, char * buffer) 266 { 267 int res = 0; 268 unsigned int len; 269 struct mm_struct *mm = get_task_mm(task); 270 if (!mm) 271 goto out; 272 if (!mm->arg_end) 273 goto out_mm; /* Shh! No looking before we're done */ 274 275 len = mm->arg_end - mm->arg_start; 276 277 if (len > PAGE_SIZE) 278 len = PAGE_SIZE; 279 280 res = access_process_vm(task, mm->arg_start, buffer, len, 0); 281 282 // If the nul at the end of args has been overwritten, then 283 // assume application is using setproctitle(3). 284 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { 285 len = strnlen(buffer, res); 286 if (len < res) { 287 res = len; 288 } else { 289 len = mm->env_end - mm->env_start; 290 if (len > PAGE_SIZE - res) 291 len = PAGE_SIZE - res; 292 res += access_process_vm(task, mm->env_start, buffer+res, len, 0); 293 res = strnlen(buffer, res); 294 } 295 } 296 out_mm: 297 mmput(mm); 298 out: 299 return res; 300 } 301 302 static int proc_pid_auxv(struct task_struct *task, char *buffer) 303 { 304 int res = 0; 305 struct mm_struct *mm = get_task_mm(task); 306 if (mm) { 307 unsigned int nwords = 0; 308 do 309 nwords += 2; 310 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 311 res = nwords * sizeof(mm->saved_auxv[0]); 312 if (res > PAGE_SIZE) 313 res = PAGE_SIZE; 314 memcpy(buffer, mm->saved_auxv, res); 315 mmput(mm); 316 } 317 return res; 318 } 319 320 321 #ifdef CONFIG_KALLSYMS 322 /* 323 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 324 * Returns the resolved symbol. If that fails, simply return the address. 325 */ 326 static int proc_pid_wchan(struct task_struct *task, char *buffer) 327 { 328 unsigned long wchan; 329 char symname[KSYM_NAME_LEN]; 330 331 wchan = get_wchan(task); 332 333 if (lookup_symbol_name(wchan, symname) < 0) 334 return sprintf(buffer, "%lu", wchan); 335 else 336 return sprintf(buffer, "%s", symname); 337 } 338 #endif /* CONFIG_KALLSYMS */ 339 340 #ifdef CONFIG_SCHEDSTATS 341 /* 342 * Provides /proc/PID/schedstat 343 */ 344 static int proc_pid_schedstat(struct task_struct *task, char *buffer) 345 { 346 return sprintf(buffer, "%llu %llu %lu\n", 347 (unsigned long long)task->se.sum_exec_runtime, 348 (unsigned long long)task->sched_info.run_delay, 349 task->sched_info.pcount); 350 } 351 #endif 352 353 #ifdef CONFIG_LATENCYTOP 354 static int lstats_show_proc(struct seq_file *m, void *v) 355 { 356 int i; 357 struct inode *inode = m->private; 358 struct task_struct *task = get_proc_task(inode); 359 360 if (!task) 361 return -ESRCH; 362 seq_puts(m, "Latency Top version : v0.1\n"); 363 for (i = 0; i < 32; i++) { 364 if (task->latency_record[i].backtrace[0]) { 365 int q; 366 seq_printf(m, "%i %li %li ", 367 task->latency_record[i].count, 368 task->latency_record[i].time, 369 task->latency_record[i].max); 370 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 371 char sym[KSYM_SYMBOL_LEN]; 372 char *c; 373 if (!task->latency_record[i].backtrace[q]) 374 break; 375 if (task->latency_record[i].backtrace[q] == ULONG_MAX) 376 break; 377 sprint_symbol(sym, task->latency_record[i].backtrace[q]); 378 c = strchr(sym, '+'); 379 if (c) 380 *c = 0; 381 seq_printf(m, "%s ", sym); 382 } 383 seq_printf(m, "\n"); 384 } 385 386 } 387 put_task_struct(task); 388 return 0; 389 } 390 391 static int lstats_open(struct inode *inode, struct file *file) 392 { 393 return single_open(file, lstats_show_proc, inode); 394 } 395 396 static ssize_t lstats_write(struct file *file, const char __user *buf, 397 size_t count, loff_t *offs) 398 { 399 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 400 401 if (!task) 402 return -ESRCH; 403 clear_all_latency_tracing(task); 404 put_task_struct(task); 405 406 return count; 407 } 408 409 static const struct file_operations proc_lstats_operations = { 410 .open = lstats_open, 411 .read = seq_read, 412 .write = lstats_write, 413 .llseek = seq_lseek, 414 .release = single_release, 415 }; 416 417 #endif 418 419 /* The badness from the OOM killer */ 420 unsigned long badness(struct task_struct *p, unsigned long uptime); 421 static int proc_oom_score(struct task_struct *task, char *buffer) 422 { 423 unsigned long points; 424 struct timespec uptime; 425 426 do_posix_clock_monotonic_gettime(&uptime); 427 read_lock(&tasklist_lock); 428 points = badness(task, uptime.tv_sec); 429 read_unlock(&tasklist_lock); 430 return sprintf(buffer, "%lu\n", points); 431 } 432 433 struct limit_names { 434 char *name; 435 char *unit; 436 }; 437 438 static const struct limit_names lnames[RLIM_NLIMITS] = { 439 [RLIMIT_CPU] = {"Max cpu time", "ms"}, 440 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 441 [RLIMIT_DATA] = {"Max data size", "bytes"}, 442 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 443 [RLIMIT_CORE] = {"Max core file size", "bytes"}, 444 [RLIMIT_RSS] = {"Max resident set", "bytes"}, 445 [RLIMIT_NPROC] = {"Max processes", "processes"}, 446 [RLIMIT_NOFILE] = {"Max open files", "files"}, 447 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, 448 [RLIMIT_AS] = {"Max address space", "bytes"}, 449 [RLIMIT_LOCKS] = {"Max file locks", "locks"}, 450 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, 451 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, 452 [RLIMIT_NICE] = {"Max nice priority", NULL}, 453 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, 454 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, 455 }; 456 457 /* Display limits for a process */ 458 static int proc_pid_limits(struct task_struct *task, char *buffer) 459 { 460 unsigned int i; 461 int count = 0; 462 unsigned long flags; 463 char *bufptr = buffer; 464 465 struct rlimit rlim[RLIM_NLIMITS]; 466 467 if (!lock_task_sighand(task, &flags)) 468 return 0; 469 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); 470 unlock_task_sighand(task, &flags); 471 472 /* 473 * print the file header 474 */ 475 count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", 476 "Limit", "Soft Limit", "Hard Limit", "Units"); 477 478 for (i = 0; i < RLIM_NLIMITS; i++) { 479 if (rlim[i].rlim_cur == RLIM_INFINITY) 480 count += sprintf(&bufptr[count], "%-25s %-20s ", 481 lnames[i].name, "unlimited"); 482 else 483 count += sprintf(&bufptr[count], "%-25s %-20lu ", 484 lnames[i].name, rlim[i].rlim_cur); 485 486 if (rlim[i].rlim_max == RLIM_INFINITY) 487 count += sprintf(&bufptr[count], "%-20s ", "unlimited"); 488 else 489 count += sprintf(&bufptr[count], "%-20lu ", 490 rlim[i].rlim_max); 491 492 if (lnames[i].unit) 493 count += sprintf(&bufptr[count], "%-10s\n", 494 lnames[i].unit); 495 else 496 count += sprintf(&bufptr[count], "\n"); 497 } 498 499 return count; 500 } 501 502 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 503 static int proc_pid_syscall(struct task_struct *task, char *buffer) 504 { 505 long nr; 506 unsigned long args[6], sp, pc; 507 508 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 509 return sprintf(buffer, "running\n"); 510 511 if (nr < 0) 512 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 513 514 return sprintf(buffer, 515 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 516 nr, 517 args[0], args[1], args[2], args[3], args[4], args[5], 518 sp, pc); 519 } 520 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 521 522 /************************************************************************/ 523 /* Here the fs part begins */ 524 /************************************************************************/ 525 526 /* permission checks */ 527 static int proc_fd_access_allowed(struct inode *inode) 528 { 529 struct task_struct *task; 530 int allowed = 0; 531 /* Allow access to a task's file descriptors if it is us or we 532 * may use ptrace attach to the process and find out that 533 * information. 534 */ 535 task = get_proc_task(inode); 536 if (task) { 537 allowed = ptrace_may_access(task, PTRACE_MODE_READ); 538 put_task_struct(task); 539 } 540 return allowed; 541 } 542 543 static int proc_setattr(struct dentry *dentry, struct iattr *attr) 544 { 545 int error; 546 struct inode *inode = dentry->d_inode; 547 548 if (attr->ia_valid & ATTR_MODE) 549 return -EPERM; 550 551 error = inode_change_ok(inode, attr); 552 if (!error) 553 error = inode_setattr(inode, attr); 554 return error; 555 } 556 557 static const struct inode_operations proc_def_inode_operations = { 558 .setattr = proc_setattr, 559 }; 560 561 static int mounts_open_common(struct inode *inode, struct file *file, 562 const struct seq_operations *op) 563 { 564 struct task_struct *task = get_proc_task(inode); 565 struct nsproxy *nsp; 566 struct mnt_namespace *ns = NULL; 567 struct fs_struct *fs = NULL; 568 struct path root; 569 struct proc_mounts *p; 570 int ret = -EINVAL; 571 572 if (task) { 573 rcu_read_lock(); 574 nsp = task_nsproxy(task); 575 if (nsp) { 576 ns = nsp->mnt_ns; 577 if (ns) 578 get_mnt_ns(ns); 579 } 580 rcu_read_unlock(); 581 if (ns) 582 fs = get_fs_struct(task); 583 put_task_struct(task); 584 } 585 586 if (!ns) 587 goto err; 588 if (!fs) 589 goto err_put_ns; 590 591 read_lock(&fs->lock); 592 root = fs->root; 593 path_get(&root); 594 read_unlock(&fs->lock); 595 put_fs_struct(fs); 596 597 ret = -ENOMEM; 598 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); 599 if (!p) 600 goto err_put_path; 601 602 file->private_data = &p->m; 603 ret = seq_open(file, op); 604 if (ret) 605 goto err_free; 606 607 p->m.private = p; 608 p->ns = ns; 609 p->root = root; 610 p->event = ns->event; 611 612 return 0; 613 614 err_free: 615 kfree(p); 616 err_put_path: 617 path_put(&root); 618 err_put_ns: 619 put_mnt_ns(ns); 620 err: 621 return ret; 622 } 623 624 static int mounts_release(struct inode *inode, struct file *file) 625 { 626 struct proc_mounts *p = file->private_data; 627 path_put(&p->root); 628 put_mnt_ns(p->ns); 629 return seq_release(inode, file); 630 } 631 632 static unsigned mounts_poll(struct file *file, poll_table *wait) 633 { 634 struct proc_mounts *p = file->private_data; 635 struct mnt_namespace *ns = p->ns; 636 unsigned res = 0; 637 638 poll_wait(file, &ns->poll, wait); 639 640 spin_lock(&vfsmount_lock); 641 if (p->event != ns->event) { 642 p->event = ns->event; 643 res = POLLERR; 644 } 645 spin_unlock(&vfsmount_lock); 646 647 return res; 648 } 649 650 static int mounts_open(struct inode *inode, struct file *file) 651 { 652 return mounts_open_common(inode, file, &mounts_op); 653 } 654 655 static const struct file_operations proc_mounts_operations = { 656 .open = mounts_open, 657 .read = seq_read, 658 .llseek = seq_lseek, 659 .release = mounts_release, 660 .poll = mounts_poll, 661 }; 662 663 static int mountinfo_open(struct inode *inode, struct file *file) 664 { 665 return mounts_open_common(inode, file, &mountinfo_op); 666 } 667 668 static const struct file_operations proc_mountinfo_operations = { 669 .open = mountinfo_open, 670 .read = seq_read, 671 .llseek = seq_lseek, 672 .release = mounts_release, 673 .poll = mounts_poll, 674 }; 675 676 static int mountstats_open(struct inode *inode, struct file *file) 677 { 678 return mounts_open_common(inode, file, &mountstats_op); 679 } 680 681 static const struct file_operations proc_mountstats_operations = { 682 .open = mountstats_open, 683 .read = seq_read, 684 .llseek = seq_lseek, 685 .release = mounts_release, 686 }; 687 688 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 689 690 static ssize_t proc_info_read(struct file * file, char __user * buf, 691 size_t count, loff_t *ppos) 692 { 693 struct inode * inode = file->f_path.dentry->d_inode; 694 unsigned long page; 695 ssize_t length; 696 struct task_struct *task = get_proc_task(inode); 697 698 length = -ESRCH; 699 if (!task) 700 goto out_no_task; 701 702 if (count > PROC_BLOCK_SIZE) 703 count = PROC_BLOCK_SIZE; 704 705 length = -ENOMEM; 706 if (!(page = __get_free_page(GFP_TEMPORARY))) 707 goto out; 708 709 length = PROC_I(inode)->op.proc_read(task, (char*)page); 710 711 if (length >= 0) 712 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 713 free_page(page); 714 out: 715 put_task_struct(task); 716 out_no_task: 717 return length; 718 } 719 720 static const struct file_operations proc_info_file_operations = { 721 .read = proc_info_read, 722 }; 723 724 static int proc_single_show(struct seq_file *m, void *v) 725 { 726 struct inode *inode = m->private; 727 struct pid_namespace *ns; 728 struct pid *pid; 729 struct task_struct *task; 730 int ret; 731 732 ns = inode->i_sb->s_fs_info; 733 pid = proc_pid(inode); 734 task = get_pid_task(pid, PIDTYPE_PID); 735 if (!task) 736 return -ESRCH; 737 738 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); 739 740 put_task_struct(task); 741 return ret; 742 } 743 744 static int proc_single_open(struct inode *inode, struct file *filp) 745 { 746 int ret; 747 ret = single_open(filp, proc_single_show, NULL); 748 if (!ret) { 749 struct seq_file *m = filp->private_data; 750 751 m->private = inode; 752 } 753 return ret; 754 } 755 756 static const struct file_operations proc_single_file_operations = { 757 .open = proc_single_open, 758 .read = seq_read, 759 .llseek = seq_lseek, 760 .release = single_release, 761 }; 762 763 static int mem_open(struct inode* inode, struct file* file) 764 { 765 file->private_data = (void*)((long)current->self_exec_id); 766 return 0; 767 } 768 769 static ssize_t mem_read(struct file * file, char __user * buf, 770 size_t count, loff_t *ppos) 771 { 772 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 773 char *page; 774 unsigned long src = *ppos; 775 int ret = -ESRCH; 776 struct mm_struct *mm; 777 778 if (!task) 779 goto out_no_task; 780 781 if (check_mem_permission(task)) 782 goto out; 783 784 ret = -ENOMEM; 785 page = (char *)__get_free_page(GFP_TEMPORARY); 786 if (!page) 787 goto out; 788 789 ret = 0; 790 791 mm = get_task_mm(task); 792 if (!mm) 793 goto out_free; 794 795 ret = -EIO; 796 797 if (file->private_data != (void*)((long)current->self_exec_id)) 798 goto out_put; 799 800 ret = 0; 801 802 while (count > 0) { 803 int this_len, retval; 804 805 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 806 retval = access_process_vm(task, src, page, this_len, 0); 807 if (!retval || check_mem_permission(task)) { 808 if (!ret) 809 ret = -EIO; 810 break; 811 } 812 813 if (copy_to_user(buf, page, retval)) { 814 ret = -EFAULT; 815 break; 816 } 817 818 ret += retval; 819 src += retval; 820 buf += retval; 821 count -= retval; 822 } 823 *ppos = src; 824 825 out_put: 826 mmput(mm); 827 out_free: 828 free_page((unsigned long) page); 829 out: 830 put_task_struct(task); 831 out_no_task: 832 return ret; 833 } 834 835 #define mem_write NULL 836 837 #ifndef mem_write 838 /* This is a security hazard */ 839 static ssize_t mem_write(struct file * file, const char __user *buf, 840 size_t count, loff_t *ppos) 841 { 842 int copied; 843 char *page; 844 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 845 unsigned long dst = *ppos; 846 847 copied = -ESRCH; 848 if (!task) 849 goto out_no_task; 850 851 if (check_mem_permission(task)) 852 goto out; 853 854 copied = -ENOMEM; 855 page = (char *)__get_free_page(GFP_TEMPORARY); 856 if (!page) 857 goto out; 858 859 copied = 0; 860 while (count > 0) { 861 int this_len, retval; 862 863 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 864 if (copy_from_user(page, buf, this_len)) { 865 copied = -EFAULT; 866 break; 867 } 868 retval = access_process_vm(task, dst, page, this_len, 1); 869 if (!retval) { 870 if (!copied) 871 copied = -EIO; 872 break; 873 } 874 copied += retval; 875 buf += retval; 876 dst += retval; 877 count -= retval; 878 } 879 *ppos = dst; 880 free_page((unsigned long) page); 881 out: 882 put_task_struct(task); 883 out_no_task: 884 return copied; 885 } 886 #endif 887 888 loff_t mem_lseek(struct file *file, loff_t offset, int orig) 889 { 890 switch (orig) { 891 case 0: 892 file->f_pos = offset; 893 break; 894 case 1: 895 file->f_pos += offset; 896 break; 897 default: 898 return -EINVAL; 899 } 900 force_successful_syscall_return(); 901 return file->f_pos; 902 } 903 904 static const struct file_operations proc_mem_operations = { 905 .llseek = mem_lseek, 906 .read = mem_read, 907 .write = mem_write, 908 .open = mem_open, 909 }; 910 911 static ssize_t environ_read(struct file *file, char __user *buf, 912 size_t count, loff_t *ppos) 913 { 914 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 915 char *page; 916 unsigned long src = *ppos; 917 int ret = -ESRCH; 918 struct mm_struct *mm; 919 920 if (!task) 921 goto out_no_task; 922 923 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 924 goto out; 925 926 ret = -ENOMEM; 927 page = (char *)__get_free_page(GFP_TEMPORARY); 928 if (!page) 929 goto out; 930 931 ret = 0; 932 933 mm = get_task_mm(task); 934 if (!mm) 935 goto out_free; 936 937 while (count > 0) { 938 int this_len, retval, max_len; 939 940 this_len = mm->env_end - (mm->env_start + src); 941 942 if (this_len <= 0) 943 break; 944 945 max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 946 this_len = (this_len > max_len) ? max_len : this_len; 947 948 retval = access_process_vm(task, (mm->env_start + src), 949 page, this_len, 0); 950 951 if (retval <= 0) { 952 ret = retval; 953 break; 954 } 955 956 if (copy_to_user(buf, page, retval)) { 957 ret = -EFAULT; 958 break; 959 } 960 961 ret += retval; 962 src += retval; 963 buf += retval; 964 count -= retval; 965 } 966 *ppos = src; 967 968 mmput(mm); 969 out_free: 970 free_page((unsigned long) page); 971 out: 972 put_task_struct(task); 973 out_no_task: 974 return ret; 975 } 976 977 static const struct file_operations proc_environ_operations = { 978 .read = environ_read, 979 }; 980 981 static ssize_t oom_adjust_read(struct file *file, char __user *buf, 982 size_t count, loff_t *ppos) 983 { 984 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 985 char buffer[PROC_NUMBUF]; 986 size_t len; 987 int oom_adjust; 988 989 if (!task) 990 return -ESRCH; 991 oom_adjust = task->oomkilladj; 992 put_task_struct(task); 993 994 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 995 996 return simple_read_from_buffer(buf, count, ppos, buffer, len); 997 } 998 999 static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 1000 size_t count, loff_t *ppos) 1001 { 1002 struct task_struct *task; 1003 char buffer[PROC_NUMBUF], *end; 1004 int oom_adjust; 1005 1006 memset(buffer, 0, sizeof(buffer)); 1007 if (count > sizeof(buffer) - 1) 1008 count = sizeof(buffer) - 1; 1009 if (copy_from_user(buffer, buf, count)) 1010 return -EFAULT; 1011 oom_adjust = simple_strtol(buffer, &end, 0); 1012 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1013 oom_adjust != OOM_DISABLE) 1014 return -EINVAL; 1015 if (*end == '\n') 1016 end++; 1017 task = get_proc_task(file->f_path.dentry->d_inode); 1018 if (!task) 1019 return -ESRCH; 1020 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { 1021 put_task_struct(task); 1022 return -EACCES; 1023 } 1024 task->oomkilladj = oom_adjust; 1025 put_task_struct(task); 1026 if (end - buffer == 0) 1027 return -EIO; 1028 return end - buffer; 1029 } 1030 1031 static const struct file_operations proc_oom_adjust_operations = { 1032 .read = oom_adjust_read, 1033 .write = oom_adjust_write, 1034 }; 1035 1036 #ifdef CONFIG_AUDITSYSCALL 1037 #define TMPBUFLEN 21 1038 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1039 size_t count, loff_t *ppos) 1040 { 1041 struct inode * inode = file->f_path.dentry->d_inode; 1042 struct task_struct *task = get_proc_task(inode); 1043 ssize_t length; 1044 char tmpbuf[TMPBUFLEN]; 1045 1046 if (!task) 1047 return -ESRCH; 1048 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1049 audit_get_loginuid(task)); 1050 put_task_struct(task); 1051 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1052 } 1053 1054 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 1055 size_t count, loff_t *ppos) 1056 { 1057 struct inode * inode = file->f_path.dentry->d_inode; 1058 char *page, *tmp; 1059 ssize_t length; 1060 uid_t loginuid; 1061 1062 if (!capable(CAP_AUDIT_CONTROL)) 1063 return -EPERM; 1064 1065 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) 1066 return -EPERM; 1067 1068 if (count >= PAGE_SIZE) 1069 count = PAGE_SIZE - 1; 1070 1071 if (*ppos != 0) { 1072 /* No partial writes. */ 1073 return -EINVAL; 1074 } 1075 page = (char*)__get_free_page(GFP_TEMPORARY); 1076 if (!page) 1077 return -ENOMEM; 1078 length = -EFAULT; 1079 if (copy_from_user(page, buf, count)) 1080 goto out_free_page; 1081 1082 page[count] = '\0'; 1083 loginuid = simple_strtoul(page, &tmp, 10); 1084 if (tmp == page) { 1085 length = -EINVAL; 1086 goto out_free_page; 1087 1088 } 1089 length = audit_set_loginuid(current, loginuid); 1090 if (likely(length == 0)) 1091 length = count; 1092 1093 out_free_page: 1094 free_page((unsigned long) page); 1095 return length; 1096 } 1097 1098 static const struct file_operations proc_loginuid_operations = { 1099 .read = proc_loginuid_read, 1100 .write = proc_loginuid_write, 1101 }; 1102 1103 static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1104 size_t count, loff_t *ppos) 1105 { 1106 struct inode * inode = file->f_path.dentry->d_inode; 1107 struct task_struct *task = get_proc_task(inode); 1108 ssize_t length; 1109 char tmpbuf[TMPBUFLEN]; 1110 1111 if (!task) 1112 return -ESRCH; 1113 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1114 audit_get_sessionid(task)); 1115 put_task_struct(task); 1116 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1117 } 1118 1119 static const struct file_operations proc_sessionid_operations = { 1120 .read = proc_sessionid_read, 1121 }; 1122 #endif 1123 1124 #ifdef CONFIG_FAULT_INJECTION 1125 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 1126 size_t count, loff_t *ppos) 1127 { 1128 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 1129 char buffer[PROC_NUMBUF]; 1130 size_t len; 1131 int make_it_fail; 1132 1133 if (!task) 1134 return -ESRCH; 1135 make_it_fail = task->make_it_fail; 1136 put_task_struct(task); 1137 1138 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); 1139 1140 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1141 } 1142 1143 static ssize_t proc_fault_inject_write(struct file * file, 1144 const char __user * buf, size_t count, loff_t *ppos) 1145 { 1146 struct task_struct *task; 1147 char buffer[PROC_NUMBUF], *end; 1148 int make_it_fail; 1149 1150 if (!capable(CAP_SYS_RESOURCE)) 1151 return -EPERM; 1152 memset(buffer, 0, sizeof(buffer)); 1153 if (count > sizeof(buffer) - 1) 1154 count = sizeof(buffer) - 1; 1155 if (copy_from_user(buffer, buf, count)) 1156 return -EFAULT; 1157 make_it_fail = simple_strtol(buffer, &end, 0); 1158 if (*end == '\n') 1159 end++; 1160 task = get_proc_task(file->f_dentry->d_inode); 1161 if (!task) 1162 return -ESRCH; 1163 task->make_it_fail = make_it_fail; 1164 put_task_struct(task); 1165 if (end - buffer == 0) 1166 return -EIO; 1167 return end - buffer; 1168 } 1169 1170 static const struct file_operations proc_fault_inject_operations = { 1171 .read = proc_fault_inject_read, 1172 .write = proc_fault_inject_write, 1173 }; 1174 #endif 1175 1176 1177 #ifdef CONFIG_SCHED_DEBUG 1178 /* 1179 * Print out various scheduling related per-task fields: 1180 */ 1181 static int sched_show(struct seq_file *m, void *v) 1182 { 1183 struct inode *inode = m->private; 1184 struct task_struct *p; 1185 1186 p = get_proc_task(inode); 1187 if (!p) 1188 return -ESRCH; 1189 proc_sched_show_task(p, m); 1190 1191 put_task_struct(p); 1192 1193 return 0; 1194 } 1195 1196 static ssize_t 1197 sched_write(struct file *file, const char __user *buf, 1198 size_t count, loff_t *offset) 1199 { 1200 struct inode *inode = file->f_path.dentry->d_inode; 1201 struct task_struct *p; 1202 1203 p = get_proc_task(inode); 1204 if (!p) 1205 return -ESRCH; 1206 proc_sched_set_task(p); 1207 1208 put_task_struct(p); 1209 1210 return count; 1211 } 1212 1213 static int sched_open(struct inode *inode, struct file *filp) 1214 { 1215 int ret; 1216 1217 ret = single_open(filp, sched_show, NULL); 1218 if (!ret) { 1219 struct seq_file *m = filp->private_data; 1220 1221 m->private = inode; 1222 } 1223 return ret; 1224 } 1225 1226 static const struct file_operations proc_pid_sched_operations = { 1227 .open = sched_open, 1228 .read = seq_read, 1229 .write = sched_write, 1230 .llseek = seq_lseek, 1231 .release = single_release, 1232 }; 1233 1234 #endif 1235 1236 /* 1237 * We added or removed a vma mapping the executable. The vmas are only mapped 1238 * during exec and are not mapped with the mmap system call. 1239 * Callers must hold down_write() on the mm's mmap_sem for these 1240 */ 1241 void added_exe_file_vma(struct mm_struct *mm) 1242 { 1243 mm->num_exe_file_vmas++; 1244 } 1245 1246 void removed_exe_file_vma(struct mm_struct *mm) 1247 { 1248 mm->num_exe_file_vmas--; 1249 if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ 1250 fput(mm->exe_file); 1251 mm->exe_file = NULL; 1252 } 1253 1254 } 1255 1256 void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) 1257 { 1258 if (new_exe_file) 1259 get_file(new_exe_file); 1260 if (mm->exe_file) 1261 fput(mm->exe_file); 1262 mm->exe_file = new_exe_file; 1263 mm->num_exe_file_vmas = 0; 1264 } 1265 1266 struct file *get_mm_exe_file(struct mm_struct *mm) 1267 { 1268 struct file *exe_file; 1269 1270 /* We need mmap_sem to protect against races with removal of 1271 * VM_EXECUTABLE vmas */ 1272 down_read(&mm->mmap_sem); 1273 exe_file = mm->exe_file; 1274 if (exe_file) 1275 get_file(exe_file); 1276 up_read(&mm->mmap_sem); 1277 return exe_file; 1278 } 1279 1280 void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) 1281 { 1282 /* It's safe to write the exe_file pointer without exe_file_lock because 1283 * this is called during fork when the task is not yet in /proc */ 1284 newmm->exe_file = get_mm_exe_file(oldmm); 1285 } 1286 1287 static int proc_exe_link(struct inode *inode, struct path *exe_path) 1288 { 1289 struct task_struct *task; 1290 struct mm_struct *mm; 1291 struct file *exe_file; 1292 1293 task = get_proc_task(inode); 1294 if (!task) 1295 return -ENOENT; 1296 mm = get_task_mm(task); 1297 put_task_struct(task); 1298 if (!mm) 1299 return -ENOENT; 1300 exe_file = get_mm_exe_file(mm); 1301 mmput(mm); 1302 if (exe_file) { 1303 *exe_path = exe_file->f_path; 1304 path_get(&exe_file->f_path); 1305 fput(exe_file); 1306 return 0; 1307 } else 1308 return -ENOENT; 1309 } 1310 1311 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1312 { 1313 struct inode *inode = dentry->d_inode; 1314 int error = -EACCES; 1315 1316 /* We don't need a base pointer in the /proc filesystem */ 1317 path_put(&nd->path); 1318 1319 /* Are we allowed to snoop on the tasks file descriptors? */ 1320 if (!proc_fd_access_allowed(inode)) 1321 goto out; 1322 1323 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1324 nd->last_type = LAST_BIND; 1325 out: 1326 return ERR_PTR(error); 1327 } 1328 1329 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) 1330 { 1331 char *tmp = (char*)__get_free_page(GFP_TEMPORARY); 1332 char *pathname; 1333 int len; 1334 1335 if (!tmp) 1336 return -ENOMEM; 1337 1338 pathname = d_path(path, tmp, PAGE_SIZE); 1339 len = PTR_ERR(pathname); 1340 if (IS_ERR(pathname)) 1341 goto out; 1342 len = tmp + PAGE_SIZE - 1 - pathname; 1343 1344 if (len > buflen) 1345 len = buflen; 1346 if (copy_to_user(buffer, pathname, len)) 1347 len = -EFAULT; 1348 out: 1349 free_page((unsigned long)tmp); 1350 return len; 1351 } 1352 1353 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1354 { 1355 int error = -EACCES; 1356 struct inode *inode = dentry->d_inode; 1357 struct path path; 1358 1359 /* Are we allowed to snoop on the tasks file descriptors? */ 1360 if (!proc_fd_access_allowed(inode)) 1361 goto out; 1362 1363 error = PROC_I(inode)->op.proc_get_link(inode, &path); 1364 if (error) 1365 goto out; 1366 1367 error = do_proc_readlink(&path, buffer, buflen); 1368 path_put(&path); 1369 out: 1370 return error; 1371 } 1372 1373 static const struct inode_operations proc_pid_link_inode_operations = { 1374 .readlink = proc_pid_readlink, 1375 .follow_link = proc_pid_follow_link, 1376 .setattr = proc_setattr, 1377 }; 1378 1379 1380 /* building an inode */ 1381 1382 static int task_dumpable(struct task_struct *task) 1383 { 1384 int dumpable = 0; 1385 struct mm_struct *mm; 1386 1387 task_lock(task); 1388 mm = task->mm; 1389 if (mm) 1390 dumpable = get_dumpable(mm); 1391 task_unlock(task); 1392 if(dumpable == 1) 1393 return 1; 1394 return 0; 1395 } 1396 1397 1398 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) 1399 { 1400 struct inode * inode; 1401 struct proc_inode *ei; 1402 const struct cred *cred; 1403 1404 /* We need a new inode */ 1405 1406 inode = new_inode(sb); 1407 if (!inode) 1408 goto out; 1409 1410 /* Common stuff */ 1411 ei = PROC_I(inode); 1412 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1413 inode->i_op = &proc_def_inode_operations; 1414 1415 /* 1416 * grab the reference to task. 1417 */ 1418 ei->pid = get_task_pid(task, PIDTYPE_PID); 1419 if (!ei->pid) 1420 goto out_unlock; 1421 1422 inode->i_uid = 0; 1423 inode->i_gid = 0; 1424 if (task_dumpable(task)) { 1425 rcu_read_lock(); 1426 cred = __task_cred(task); 1427 inode->i_uid = cred->euid; 1428 inode->i_gid = cred->egid; 1429 rcu_read_unlock(); 1430 } 1431 security_task_to_inode(task, inode); 1432 1433 out: 1434 return inode; 1435 1436 out_unlock: 1437 iput(inode); 1438 return NULL; 1439 } 1440 1441 static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1442 { 1443 struct inode *inode = dentry->d_inode; 1444 struct task_struct *task; 1445 const struct cred *cred; 1446 1447 generic_fillattr(inode, stat); 1448 1449 rcu_read_lock(); 1450 stat->uid = 0; 1451 stat->gid = 0; 1452 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1453 if (task) { 1454 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1455 task_dumpable(task)) { 1456 cred = __task_cred(task); 1457 stat->uid = cred->euid; 1458 stat->gid = cred->egid; 1459 } 1460 } 1461 rcu_read_unlock(); 1462 return 0; 1463 } 1464 1465 /* dentry stuff */ 1466 1467 /* 1468 * Exceptional case: normally we are not allowed to unhash a busy 1469 * directory. In this case, however, we can do it - no aliasing problems 1470 * due to the way we treat inodes. 1471 * 1472 * Rewrite the inode's ownerships here because the owning task may have 1473 * performed a setuid(), etc. 1474 * 1475 * Before the /proc/pid/status file was created the only way to read 1476 * the effective uid of a /process was to stat /proc/pid. Reading 1477 * /proc/pid/status is slow enough that procps and other packages 1478 * kept stating /proc/pid. To keep the rules in /proc simple I have 1479 * made this apply to all per process world readable and executable 1480 * directories. 1481 */ 1482 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1483 { 1484 struct inode *inode = dentry->d_inode; 1485 struct task_struct *task = get_proc_task(inode); 1486 const struct cred *cred; 1487 1488 if (task) { 1489 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1490 task_dumpable(task)) { 1491 rcu_read_lock(); 1492 cred = __task_cred(task); 1493 inode->i_uid = cred->euid; 1494 inode->i_gid = cred->egid; 1495 rcu_read_unlock(); 1496 } else { 1497 inode->i_uid = 0; 1498 inode->i_gid = 0; 1499 } 1500 inode->i_mode &= ~(S_ISUID | S_ISGID); 1501 security_task_to_inode(task, inode); 1502 put_task_struct(task); 1503 return 1; 1504 } 1505 d_drop(dentry); 1506 return 0; 1507 } 1508 1509 static int pid_delete_dentry(struct dentry * dentry) 1510 { 1511 /* Is the task we represent dead? 1512 * If so, then don't put the dentry on the lru list, 1513 * kill it immediately. 1514 */ 1515 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1516 } 1517 1518 static struct dentry_operations pid_dentry_operations = 1519 { 1520 .d_revalidate = pid_revalidate, 1521 .d_delete = pid_delete_dentry, 1522 }; 1523 1524 /* Lookups */ 1525 1526 typedef struct dentry *instantiate_t(struct inode *, struct dentry *, 1527 struct task_struct *, const void *); 1528 1529 /* 1530 * Fill a directory entry. 1531 * 1532 * If possible create the dcache entry and derive our inode number and 1533 * file type from dcache entry. 1534 * 1535 * Since all of the proc inode numbers are dynamically generated, the inode 1536 * numbers do not exist until the inode is cache. This means creating the 1537 * the dcache entry in readdir is necessary to keep the inode numbers 1538 * reported by readdir in sync with the inode numbers reported 1539 * by stat. 1540 */ 1541 static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1542 char *name, int len, 1543 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1544 { 1545 struct dentry *child, *dir = filp->f_path.dentry; 1546 struct inode *inode; 1547 struct qstr qname; 1548 ino_t ino = 0; 1549 unsigned type = DT_UNKNOWN; 1550 1551 qname.name = name; 1552 qname.len = len; 1553 qname.hash = full_name_hash(name, len); 1554 1555 child = d_lookup(dir, &qname); 1556 if (!child) { 1557 struct dentry *new; 1558 new = d_alloc(dir, &qname); 1559 if (new) { 1560 child = instantiate(dir->d_inode, new, task, ptr); 1561 if (child) 1562 dput(new); 1563 else 1564 child = new; 1565 } 1566 } 1567 if (!child || IS_ERR(child) || !child->d_inode) 1568 goto end_instantiate; 1569 inode = child->d_inode; 1570 if (inode) { 1571 ino = inode->i_ino; 1572 type = inode->i_mode >> 12; 1573 } 1574 dput(child); 1575 end_instantiate: 1576 if (!ino) 1577 ino = find_inode_number(dir, &qname); 1578 if (!ino) 1579 ino = 1; 1580 return filldir(dirent, name, len, filp->f_pos, ino, type); 1581 } 1582 1583 static unsigned name_to_int(struct dentry *dentry) 1584 { 1585 const char *name = dentry->d_name.name; 1586 int len = dentry->d_name.len; 1587 unsigned n = 0; 1588 1589 if (len > 1 && *name == '0') 1590 goto out; 1591 while (len-- > 0) { 1592 unsigned c = *name++ - '0'; 1593 if (c > 9) 1594 goto out; 1595 if (n >= (~0U-9)/10) 1596 goto out; 1597 n *= 10; 1598 n += c; 1599 } 1600 return n; 1601 out: 1602 return ~0U; 1603 } 1604 1605 #define PROC_FDINFO_MAX 64 1606 1607 static int proc_fd_info(struct inode *inode, struct path *path, char *info) 1608 { 1609 struct task_struct *task = get_proc_task(inode); 1610 struct files_struct *files = NULL; 1611 struct file *file; 1612 int fd = proc_fd(inode); 1613 1614 if (task) { 1615 files = get_files_struct(task); 1616 put_task_struct(task); 1617 } 1618 if (files) { 1619 /* 1620 * We are not taking a ref to the file structure, so we must 1621 * hold ->file_lock. 1622 */ 1623 spin_lock(&files->file_lock); 1624 file = fcheck_files(files, fd); 1625 if (file) { 1626 if (path) { 1627 *path = file->f_path; 1628 path_get(&file->f_path); 1629 } 1630 if (info) 1631 snprintf(info, PROC_FDINFO_MAX, 1632 "pos:\t%lli\n" 1633 "flags:\t0%o\n", 1634 (long long) file->f_pos, 1635 file->f_flags); 1636 spin_unlock(&files->file_lock); 1637 put_files_struct(files); 1638 return 0; 1639 } 1640 spin_unlock(&files->file_lock); 1641 put_files_struct(files); 1642 } 1643 return -ENOENT; 1644 } 1645 1646 static int proc_fd_link(struct inode *inode, struct path *path) 1647 { 1648 return proc_fd_info(inode, path, NULL); 1649 } 1650 1651 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1652 { 1653 struct inode *inode = dentry->d_inode; 1654 struct task_struct *task = get_proc_task(inode); 1655 int fd = proc_fd(inode); 1656 struct files_struct *files; 1657 const struct cred *cred; 1658 1659 if (task) { 1660 files = get_files_struct(task); 1661 if (files) { 1662 rcu_read_lock(); 1663 if (fcheck_files(files, fd)) { 1664 rcu_read_unlock(); 1665 put_files_struct(files); 1666 if (task_dumpable(task)) { 1667 rcu_read_lock(); 1668 cred = __task_cred(task); 1669 inode->i_uid = cred->euid; 1670 inode->i_gid = cred->egid; 1671 rcu_read_unlock(); 1672 } else { 1673 inode->i_uid = 0; 1674 inode->i_gid = 0; 1675 } 1676 inode->i_mode &= ~(S_ISUID | S_ISGID); 1677 security_task_to_inode(task, inode); 1678 put_task_struct(task); 1679 return 1; 1680 } 1681 rcu_read_unlock(); 1682 put_files_struct(files); 1683 } 1684 put_task_struct(task); 1685 } 1686 d_drop(dentry); 1687 return 0; 1688 } 1689 1690 static struct dentry_operations tid_fd_dentry_operations = 1691 { 1692 .d_revalidate = tid_fd_revalidate, 1693 .d_delete = pid_delete_dentry, 1694 }; 1695 1696 static struct dentry *proc_fd_instantiate(struct inode *dir, 1697 struct dentry *dentry, struct task_struct *task, const void *ptr) 1698 { 1699 unsigned fd = *(const unsigned *)ptr; 1700 struct file *file; 1701 struct files_struct *files; 1702 struct inode *inode; 1703 struct proc_inode *ei; 1704 struct dentry *error = ERR_PTR(-ENOENT); 1705 1706 inode = proc_pid_make_inode(dir->i_sb, task); 1707 if (!inode) 1708 goto out; 1709 ei = PROC_I(inode); 1710 ei->fd = fd; 1711 files = get_files_struct(task); 1712 if (!files) 1713 goto out_iput; 1714 inode->i_mode = S_IFLNK; 1715 1716 /* 1717 * We are not taking a ref to the file structure, so we must 1718 * hold ->file_lock. 1719 */ 1720 spin_lock(&files->file_lock); 1721 file = fcheck_files(files, fd); 1722 if (!file) 1723 goto out_unlock; 1724 if (file->f_mode & FMODE_READ) 1725 inode->i_mode |= S_IRUSR | S_IXUSR; 1726 if (file->f_mode & FMODE_WRITE) 1727 inode->i_mode |= S_IWUSR | S_IXUSR; 1728 spin_unlock(&files->file_lock); 1729 put_files_struct(files); 1730 1731 inode->i_op = &proc_pid_link_inode_operations; 1732 inode->i_size = 64; 1733 ei->op.proc_get_link = proc_fd_link; 1734 dentry->d_op = &tid_fd_dentry_operations; 1735 d_add(dentry, inode); 1736 /* Close the race of the process dying before we return the dentry */ 1737 if (tid_fd_revalidate(dentry, NULL)) 1738 error = NULL; 1739 1740 out: 1741 return error; 1742 out_unlock: 1743 spin_unlock(&files->file_lock); 1744 put_files_struct(files); 1745 out_iput: 1746 iput(inode); 1747 goto out; 1748 } 1749 1750 static struct dentry *proc_lookupfd_common(struct inode *dir, 1751 struct dentry *dentry, 1752 instantiate_t instantiate) 1753 { 1754 struct task_struct *task = get_proc_task(dir); 1755 unsigned fd = name_to_int(dentry); 1756 struct dentry *result = ERR_PTR(-ENOENT); 1757 1758 if (!task) 1759 goto out_no_task; 1760 if (fd == ~0U) 1761 goto out; 1762 1763 result = instantiate(dir, dentry, task, &fd); 1764 out: 1765 put_task_struct(task); 1766 out_no_task: 1767 return result; 1768 } 1769 1770 static int proc_readfd_common(struct file * filp, void * dirent, 1771 filldir_t filldir, instantiate_t instantiate) 1772 { 1773 struct dentry *dentry = filp->f_path.dentry; 1774 struct inode *inode = dentry->d_inode; 1775 struct task_struct *p = get_proc_task(inode); 1776 unsigned int fd, ino; 1777 int retval; 1778 struct files_struct * files; 1779 1780 retval = -ENOENT; 1781 if (!p) 1782 goto out_no_task; 1783 retval = 0; 1784 1785 fd = filp->f_pos; 1786 switch (fd) { 1787 case 0: 1788 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 1789 goto out; 1790 filp->f_pos++; 1791 case 1: 1792 ino = parent_ino(dentry); 1793 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1794 goto out; 1795 filp->f_pos++; 1796 default: 1797 files = get_files_struct(p); 1798 if (!files) 1799 goto out; 1800 rcu_read_lock(); 1801 for (fd = filp->f_pos-2; 1802 fd < files_fdtable(files)->max_fds; 1803 fd++, filp->f_pos++) { 1804 char name[PROC_NUMBUF]; 1805 int len; 1806 1807 if (!fcheck_files(files, fd)) 1808 continue; 1809 rcu_read_unlock(); 1810 1811 len = snprintf(name, sizeof(name), "%d", fd); 1812 if (proc_fill_cache(filp, dirent, filldir, 1813 name, len, instantiate, 1814 p, &fd) < 0) { 1815 rcu_read_lock(); 1816 break; 1817 } 1818 rcu_read_lock(); 1819 } 1820 rcu_read_unlock(); 1821 put_files_struct(files); 1822 } 1823 out: 1824 put_task_struct(p); 1825 out_no_task: 1826 return retval; 1827 } 1828 1829 static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, 1830 struct nameidata *nd) 1831 { 1832 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); 1833 } 1834 1835 static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) 1836 { 1837 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); 1838 } 1839 1840 static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, 1841 size_t len, loff_t *ppos) 1842 { 1843 char tmp[PROC_FDINFO_MAX]; 1844 int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); 1845 if (!err) 1846 err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); 1847 return err; 1848 } 1849 1850 static const struct file_operations proc_fdinfo_file_operations = { 1851 .open = nonseekable_open, 1852 .read = proc_fdinfo_read, 1853 }; 1854 1855 static const struct file_operations proc_fd_operations = { 1856 .read = generic_read_dir, 1857 .readdir = proc_readfd, 1858 }; 1859 1860 /* 1861 * /proc/pid/fd needs a special permission handler so that a process can still 1862 * access /proc/self/fd after it has executed a setuid(). 1863 */ 1864 static int proc_fd_permission(struct inode *inode, int mask) 1865 { 1866 int rv; 1867 1868 rv = generic_permission(inode, mask, NULL); 1869 if (rv == 0) 1870 return 0; 1871 if (task_pid(current) == proc_pid(inode)) 1872 rv = 0; 1873 return rv; 1874 } 1875 1876 /* 1877 * proc directories can do almost nothing.. 1878 */ 1879 static const struct inode_operations proc_fd_inode_operations = { 1880 .lookup = proc_lookupfd, 1881 .permission = proc_fd_permission, 1882 .setattr = proc_setattr, 1883 }; 1884 1885 static struct dentry *proc_fdinfo_instantiate(struct inode *dir, 1886 struct dentry *dentry, struct task_struct *task, const void *ptr) 1887 { 1888 unsigned fd = *(unsigned *)ptr; 1889 struct inode *inode; 1890 struct proc_inode *ei; 1891 struct dentry *error = ERR_PTR(-ENOENT); 1892 1893 inode = proc_pid_make_inode(dir->i_sb, task); 1894 if (!inode) 1895 goto out; 1896 ei = PROC_I(inode); 1897 ei->fd = fd; 1898 inode->i_mode = S_IFREG | S_IRUSR; 1899 inode->i_fop = &proc_fdinfo_file_operations; 1900 dentry->d_op = &tid_fd_dentry_operations; 1901 d_add(dentry, inode); 1902 /* Close the race of the process dying before we return the dentry */ 1903 if (tid_fd_revalidate(dentry, NULL)) 1904 error = NULL; 1905 1906 out: 1907 return error; 1908 } 1909 1910 static struct dentry *proc_lookupfdinfo(struct inode *dir, 1911 struct dentry *dentry, 1912 struct nameidata *nd) 1913 { 1914 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); 1915 } 1916 1917 static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) 1918 { 1919 return proc_readfd_common(filp, dirent, filldir, 1920 proc_fdinfo_instantiate); 1921 } 1922 1923 static const struct file_operations proc_fdinfo_operations = { 1924 .read = generic_read_dir, 1925 .readdir = proc_readfdinfo, 1926 }; 1927 1928 /* 1929 * proc directories can do almost nothing.. 1930 */ 1931 static const struct inode_operations proc_fdinfo_inode_operations = { 1932 .lookup = proc_lookupfdinfo, 1933 .setattr = proc_setattr, 1934 }; 1935 1936 1937 static struct dentry *proc_pident_instantiate(struct inode *dir, 1938 struct dentry *dentry, struct task_struct *task, const void *ptr) 1939 { 1940 const struct pid_entry *p = ptr; 1941 struct inode *inode; 1942 struct proc_inode *ei; 1943 struct dentry *error = ERR_PTR(-EINVAL); 1944 1945 inode = proc_pid_make_inode(dir->i_sb, task); 1946 if (!inode) 1947 goto out; 1948 1949 ei = PROC_I(inode); 1950 inode->i_mode = p->mode; 1951 if (S_ISDIR(inode->i_mode)) 1952 inode->i_nlink = 2; /* Use getattr to fix if necessary */ 1953 if (p->iop) 1954 inode->i_op = p->iop; 1955 if (p->fop) 1956 inode->i_fop = p->fop; 1957 ei->op = p->op; 1958 dentry->d_op = &pid_dentry_operations; 1959 d_add(dentry, inode); 1960 /* Close the race of the process dying before we return the dentry */ 1961 if (pid_revalidate(dentry, NULL)) 1962 error = NULL; 1963 out: 1964 return error; 1965 } 1966 1967 static struct dentry *proc_pident_lookup(struct inode *dir, 1968 struct dentry *dentry, 1969 const struct pid_entry *ents, 1970 unsigned int nents) 1971 { 1972 struct inode *inode; 1973 struct dentry *error; 1974 struct task_struct *task = get_proc_task(dir); 1975 const struct pid_entry *p, *last; 1976 1977 error = ERR_PTR(-ENOENT); 1978 inode = NULL; 1979 1980 if (!task) 1981 goto out_no_task; 1982 1983 /* 1984 * Yes, it does not scale. And it should not. Don't add 1985 * new entries into /proc/<tgid>/ without very good reasons. 1986 */ 1987 last = &ents[nents - 1]; 1988 for (p = ents; p <= last; p++) { 1989 if (p->len != dentry->d_name.len) 1990 continue; 1991 if (!memcmp(dentry->d_name.name, p->name, p->len)) 1992 break; 1993 } 1994 if (p > last) 1995 goto out; 1996 1997 error = proc_pident_instantiate(dir, dentry, task, p); 1998 out: 1999 put_task_struct(task); 2000 out_no_task: 2001 return error; 2002 } 2003 2004 static int proc_pident_fill_cache(struct file *filp, void *dirent, 2005 filldir_t filldir, struct task_struct *task, const struct pid_entry *p) 2006 { 2007 return proc_fill_cache(filp, dirent, filldir, p->name, p->len, 2008 proc_pident_instantiate, task, p); 2009 } 2010 2011 static int proc_pident_readdir(struct file *filp, 2012 void *dirent, filldir_t filldir, 2013 const struct pid_entry *ents, unsigned int nents) 2014 { 2015 int i; 2016 struct dentry *dentry = filp->f_path.dentry; 2017 struct inode *inode = dentry->d_inode; 2018 struct task_struct *task = get_proc_task(inode); 2019 const struct pid_entry *p, *last; 2020 ino_t ino; 2021 int ret; 2022 2023 ret = -ENOENT; 2024 if (!task) 2025 goto out_no_task; 2026 2027 ret = 0; 2028 i = filp->f_pos; 2029 switch (i) { 2030 case 0: 2031 ino = inode->i_ino; 2032 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 2033 goto out; 2034 i++; 2035 filp->f_pos++; 2036 /* fall through */ 2037 case 1: 2038 ino = parent_ino(dentry); 2039 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 2040 goto out; 2041 i++; 2042 filp->f_pos++; 2043 /* fall through */ 2044 default: 2045 i -= 2; 2046 if (i >= nents) { 2047 ret = 1; 2048 goto out; 2049 } 2050 p = ents + i; 2051 last = &ents[nents - 1]; 2052 while (p <= last) { 2053 if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) 2054 goto out; 2055 filp->f_pos++; 2056 p++; 2057 } 2058 } 2059 2060 ret = 1; 2061 out: 2062 put_task_struct(task); 2063 out_no_task: 2064 return ret; 2065 } 2066 2067 #ifdef CONFIG_SECURITY 2068 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 2069 size_t count, loff_t *ppos) 2070 { 2071 struct inode * inode = file->f_path.dentry->d_inode; 2072 char *p = NULL; 2073 ssize_t length; 2074 struct task_struct *task = get_proc_task(inode); 2075 2076 if (!task) 2077 return -ESRCH; 2078 2079 length = security_getprocattr(task, 2080 (char*)file->f_path.dentry->d_name.name, 2081 &p); 2082 put_task_struct(task); 2083 if (length > 0) 2084 length = simple_read_from_buffer(buf, count, ppos, p, length); 2085 kfree(p); 2086 return length; 2087 } 2088 2089 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 2090 size_t count, loff_t *ppos) 2091 { 2092 struct inode * inode = file->f_path.dentry->d_inode; 2093 char *page; 2094 ssize_t length; 2095 struct task_struct *task = get_proc_task(inode); 2096 2097 length = -ESRCH; 2098 if (!task) 2099 goto out_no_task; 2100 if (count > PAGE_SIZE) 2101 count = PAGE_SIZE; 2102 2103 /* No partial writes. */ 2104 length = -EINVAL; 2105 if (*ppos != 0) 2106 goto out; 2107 2108 length = -ENOMEM; 2109 page = (char*)__get_free_page(GFP_TEMPORARY); 2110 if (!page) 2111 goto out; 2112 2113 length = -EFAULT; 2114 if (copy_from_user(page, buf, count)) 2115 goto out_free; 2116 2117 length = security_setprocattr(task, 2118 (char*)file->f_path.dentry->d_name.name, 2119 (void*)page, count); 2120 out_free: 2121 free_page((unsigned long) page); 2122 out: 2123 put_task_struct(task); 2124 out_no_task: 2125 return length; 2126 } 2127 2128 static const struct file_operations proc_pid_attr_operations = { 2129 .read = proc_pid_attr_read, 2130 .write = proc_pid_attr_write, 2131 }; 2132 2133 static const struct pid_entry attr_dir_stuff[] = { 2134 REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2135 REG("prev", S_IRUGO, proc_pid_attr_operations), 2136 REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2137 REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2138 REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2139 REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2140 }; 2141 2142 static int proc_attr_dir_readdir(struct file * filp, 2143 void * dirent, filldir_t filldir) 2144 { 2145 return proc_pident_readdir(filp,dirent,filldir, 2146 attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); 2147 } 2148 2149 static const struct file_operations proc_attr_dir_operations = { 2150 .read = generic_read_dir, 2151 .readdir = proc_attr_dir_readdir, 2152 }; 2153 2154 static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2155 struct dentry *dentry, struct nameidata *nd) 2156 { 2157 return proc_pident_lookup(dir, dentry, 2158 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2159 } 2160 2161 static const struct inode_operations proc_attr_dir_inode_operations = { 2162 .lookup = proc_attr_dir_lookup, 2163 .getattr = pid_getattr, 2164 .setattr = proc_setattr, 2165 }; 2166 2167 #endif 2168 2169 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 2170 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2171 size_t count, loff_t *ppos) 2172 { 2173 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 2174 struct mm_struct *mm; 2175 char buffer[PROC_NUMBUF]; 2176 size_t len; 2177 int ret; 2178 2179 if (!task) 2180 return -ESRCH; 2181 2182 ret = 0; 2183 mm = get_task_mm(task); 2184 if (mm) { 2185 len = snprintf(buffer, sizeof(buffer), "%08lx\n", 2186 ((mm->flags & MMF_DUMP_FILTER_MASK) >> 2187 MMF_DUMP_FILTER_SHIFT)); 2188 mmput(mm); 2189 ret = simple_read_from_buffer(buf, count, ppos, buffer, len); 2190 } 2191 2192 put_task_struct(task); 2193 2194 return ret; 2195 } 2196 2197 static ssize_t proc_coredump_filter_write(struct file *file, 2198 const char __user *buf, 2199 size_t count, 2200 loff_t *ppos) 2201 { 2202 struct task_struct *task; 2203 struct mm_struct *mm; 2204 char buffer[PROC_NUMBUF], *end; 2205 unsigned int val; 2206 int ret; 2207 int i; 2208 unsigned long mask; 2209 2210 ret = -EFAULT; 2211 memset(buffer, 0, sizeof(buffer)); 2212 if (count > sizeof(buffer) - 1) 2213 count = sizeof(buffer) - 1; 2214 if (copy_from_user(buffer, buf, count)) 2215 goto out_no_task; 2216 2217 ret = -EINVAL; 2218 val = (unsigned int)simple_strtoul(buffer, &end, 0); 2219 if (*end == '\n') 2220 end++; 2221 if (end - buffer == 0) 2222 goto out_no_task; 2223 2224 ret = -ESRCH; 2225 task = get_proc_task(file->f_dentry->d_inode); 2226 if (!task) 2227 goto out_no_task; 2228 2229 ret = end - buffer; 2230 mm = get_task_mm(task); 2231 if (!mm) 2232 goto out_no_mm; 2233 2234 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { 2235 if (val & mask) 2236 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2237 else 2238 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2239 } 2240 2241 mmput(mm); 2242 out_no_mm: 2243 put_task_struct(task); 2244 out_no_task: 2245 return ret; 2246 } 2247 2248 static const struct file_operations proc_coredump_filter_operations = { 2249 .read = proc_coredump_filter_read, 2250 .write = proc_coredump_filter_write, 2251 }; 2252 #endif 2253 2254 /* 2255 * /proc/self: 2256 */ 2257 static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 2258 int buflen) 2259 { 2260 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 2261 pid_t tgid = task_tgid_nr_ns(current, ns); 2262 char tmp[PROC_NUMBUF]; 2263 if (!tgid) 2264 return -ENOENT; 2265 sprintf(tmp, "%d", tgid); 2266 return vfs_readlink(dentry,buffer,buflen,tmp); 2267 } 2268 2269 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 2270 { 2271 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 2272 pid_t tgid = task_tgid_nr_ns(current, ns); 2273 char tmp[PROC_NUMBUF]; 2274 if (!tgid) 2275 return ERR_PTR(-ENOENT); 2276 sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); 2277 return ERR_PTR(vfs_follow_link(nd,tmp)); 2278 } 2279 2280 static const struct inode_operations proc_self_inode_operations = { 2281 .readlink = proc_self_readlink, 2282 .follow_link = proc_self_follow_link, 2283 }; 2284 2285 /* 2286 * proc base 2287 * 2288 * These are the directory entries in the root directory of /proc 2289 * that properly belong to the /proc filesystem, as they describe 2290 * describe something that is process related. 2291 */ 2292 static const struct pid_entry proc_base_stuff[] = { 2293 NOD("self", S_IFLNK|S_IRWXUGO, 2294 &proc_self_inode_operations, NULL, {}), 2295 }; 2296 2297 /* 2298 * Exceptional case: normally we are not allowed to unhash a busy 2299 * directory. In this case, however, we can do it - no aliasing problems 2300 * due to the way we treat inodes. 2301 */ 2302 static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd) 2303 { 2304 struct inode *inode = dentry->d_inode; 2305 struct task_struct *task = get_proc_task(inode); 2306 if (task) { 2307 put_task_struct(task); 2308 return 1; 2309 } 2310 d_drop(dentry); 2311 return 0; 2312 } 2313 2314 static struct dentry_operations proc_base_dentry_operations = 2315 { 2316 .d_revalidate = proc_base_revalidate, 2317 .d_delete = pid_delete_dentry, 2318 }; 2319 2320 static struct dentry *proc_base_instantiate(struct inode *dir, 2321 struct dentry *dentry, struct task_struct *task, const void *ptr) 2322 { 2323 const struct pid_entry *p = ptr; 2324 struct inode *inode; 2325 struct proc_inode *ei; 2326 struct dentry *error = ERR_PTR(-EINVAL); 2327 2328 /* Allocate the inode */ 2329 error = ERR_PTR(-ENOMEM); 2330 inode = new_inode(dir->i_sb); 2331 if (!inode) 2332 goto out; 2333 2334 /* Initialize the inode */ 2335 ei = PROC_I(inode); 2336 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 2337 2338 /* 2339 * grab the reference to the task. 2340 */ 2341 ei->pid = get_task_pid(task, PIDTYPE_PID); 2342 if (!ei->pid) 2343 goto out_iput; 2344 2345 inode->i_uid = 0; 2346 inode->i_gid = 0; 2347 inode->i_mode = p->mode; 2348 if (S_ISDIR(inode->i_mode)) 2349 inode->i_nlink = 2; 2350 if (S_ISLNK(inode->i_mode)) 2351 inode->i_size = 64; 2352 if (p->iop) 2353 inode->i_op = p->iop; 2354 if (p->fop) 2355 inode->i_fop = p->fop; 2356 ei->op = p->op; 2357 dentry->d_op = &proc_base_dentry_operations; 2358 d_add(dentry, inode); 2359 error = NULL; 2360 out: 2361 return error; 2362 out_iput: 2363 iput(inode); 2364 goto out; 2365 } 2366 2367 static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) 2368 { 2369 struct dentry *error; 2370 struct task_struct *task = get_proc_task(dir); 2371 const struct pid_entry *p, *last; 2372 2373 error = ERR_PTR(-ENOENT); 2374 2375 if (!task) 2376 goto out_no_task; 2377 2378 /* Lookup the directory entry */ 2379 last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; 2380 for (p = proc_base_stuff; p <= last; p++) { 2381 if (p->len != dentry->d_name.len) 2382 continue; 2383 if (!memcmp(dentry->d_name.name, p->name, p->len)) 2384 break; 2385 } 2386 if (p > last) 2387 goto out; 2388 2389 error = proc_base_instantiate(dir, dentry, task, p); 2390 2391 out: 2392 put_task_struct(task); 2393 out_no_task: 2394 return error; 2395 } 2396 2397 static int proc_base_fill_cache(struct file *filp, void *dirent, 2398 filldir_t filldir, struct task_struct *task, const struct pid_entry *p) 2399 { 2400 return proc_fill_cache(filp, dirent, filldir, p->name, p->len, 2401 proc_base_instantiate, task, p); 2402 } 2403 2404 #ifdef CONFIG_TASK_IO_ACCOUNTING 2405 static int do_io_accounting(struct task_struct *task, char *buffer, int whole) 2406 { 2407 struct task_io_accounting acct = task->ioac; 2408 unsigned long flags; 2409 2410 if (whole && lock_task_sighand(task, &flags)) { 2411 struct task_struct *t = task; 2412 2413 task_io_accounting_add(&acct, &task->signal->ioac); 2414 while_each_thread(task, t) 2415 task_io_accounting_add(&acct, &t->ioac); 2416 2417 unlock_task_sighand(task, &flags); 2418 } 2419 return sprintf(buffer, 2420 "rchar: %llu\n" 2421 "wchar: %llu\n" 2422 "syscr: %llu\n" 2423 "syscw: %llu\n" 2424 "read_bytes: %llu\n" 2425 "write_bytes: %llu\n" 2426 "cancelled_write_bytes: %llu\n", 2427 (unsigned long long)acct.rchar, 2428 (unsigned long long)acct.wchar, 2429 (unsigned long long)acct.syscr, 2430 (unsigned long long)acct.syscw, 2431 (unsigned long long)acct.read_bytes, 2432 (unsigned long long)acct.write_bytes, 2433 (unsigned long long)acct.cancelled_write_bytes); 2434 } 2435 2436 static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2437 { 2438 return do_io_accounting(task, buffer, 0); 2439 } 2440 2441 static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) 2442 { 2443 return do_io_accounting(task, buffer, 1); 2444 } 2445 #endif /* CONFIG_TASK_IO_ACCOUNTING */ 2446 2447 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2448 struct pid *pid, struct task_struct *task) 2449 { 2450 seq_printf(m, "%08x\n", task->personality); 2451 return 0; 2452 } 2453 2454 /* 2455 * Thread groups 2456 */ 2457 static const struct file_operations proc_task_operations; 2458 static const struct inode_operations proc_task_inode_operations; 2459 2460 static const struct pid_entry tgid_base_stuff[] = { 2461 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2462 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2463 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2464 #ifdef CONFIG_NET 2465 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2466 #endif 2467 REG("environ", S_IRUSR, proc_environ_operations), 2468 INF("auxv", S_IRUSR, proc_pid_auxv), 2469 ONE("status", S_IRUGO, proc_pid_status), 2470 ONE("personality", S_IRUSR, proc_pid_personality), 2471 INF("limits", S_IRUSR, proc_pid_limits), 2472 #ifdef CONFIG_SCHED_DEBUG 2473 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2474 #endif 2475 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2476 INF("syscall", S_IRUSR, proc_pid_syscall), 2477 #endif 2478 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2479 ONE("stat", S_IRUGO, proc_tgid_stat), 2480 ONE("statm", S_IRUGO, proc_pid_statm), 2481 REG("maps", S_IRUGO, proc_maps_operations), 2482 #ifdef CONFIG_NUMA 2483 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 2484 #endif 2485 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2486 LNK("cwd", proc_cwd_link), 2487 LNK("root", proc_root_link), 2488 LNK("exe", proc_exe_link), 2489 REG("mounts", S_IRUGO, proc_mounts_operations), 2490 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 2491 REG("mountstats", S_IRUSR, proc_mountstats_operations), 2492 #ifdef CONFIG_PROC_PAGE_MONITOR 2493 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2494 REG("smaps", S_IRUGO, proc_smaps_operations), 2495 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2496 #endif 2497 #ifdef CONFIG_SECURITY 2498 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2499 #endif 2500 #ifdef CONFIG_KALLSYMS 2501 INF("wchan", S_IRUGO, proc_pid_wchan), 2502 #endif 2503 #ifdef CONFIG_SCHEDSTATS 2504 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2505 #endif 2506 #ifdef CONFIG_LATENCYTOP 2507 REG("latency", S_IRUGO, proc_lstats_operations), 2508 #endif 2509 #ifdef CONFIG_PROC_PID_CPUSET 2510 REG("cpuset", S_IRUGO, proc_cpuset_operations), 2511 #endif 2512 #ifdef CONFIG_CGROUPS 2513 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2514 #endif 2515 INF("oom_score", S_IRUGO, proc_oom_score), 2516 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 2517 #ifdef CONFIG_AUDITSYSCALL 2518 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2519 REG("sessionid", S_IRUGO, proc_sessionid_operations), 2520 #endif 2521 #ifdef CONFIG_FAULT_INJECTION 2522 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2523 #endif 2524 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 2525 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2526 #endif 2527 #ifdef CONFIG_TASK_IO_ACCOUNTING 2528 INF("io", S_IRUGO, proc_tgid_io_accounting), 2529 #endif 2530 }; 2531 2532 static int proc_tgid_base_readdir(struct file * filp, 2533 void * dirent, filldir_t filldir) 2534 { 2535 return proc_pident_readdir(filp,dirent,filldir, 2536 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 2537 } 2538 2539 static const struct file_operations proc_tgid_base_operations = { 2540 .read = generic_read_dir, 2541 .readdir = proc_tgid_base_readdir, 2542 }; 2543 2544 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 2545 return proc_pident_lookup(dir, dentry, 2546 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 2547 } 2548 2549 static const struct inode_operations proc_tgid_base_inode_operations = { 2550 .lookup = proc_tgid_base_lookup, 2551 .getattr = pid_getattr, 2552 .setattr = proc_setattr, 2553 }; 2554 2555 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 2556 { 2557 struct dentry *dentry, *leader, *dir; 2558 char buf[PROC_NUMBUF]; 2559 struct qstr name; 2560 2561 name.name = buf; 2562 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2563 dentry = d_hash_and_lookup(mnt->mnt_root, &name); 2564 if (dentry) { 2565 if (!(current->flags & PF_EXITING)) 2566 shrink_dcache_parent(dentry); 2567 d_drop(dentry); 2568 dput(dentry); 2569 } 2570 2571 if (tgid == 0) 2572 goto out; 2573 2574 name.name = buf; 2575 name.len = snprintf(buf, sizeof(buf), "%d", tgid); 2576 leader = d_hash_and_lookup(mnt->mnt_root, &name); 2577 if (!leader) 2578 goto out; 2579 2580 name.name = "task"; 2581 name.len = strlen(name.name); 2582 dir = d_hash_and_lookup(leader, &name); 2583 if (!dir) 2584 goto out_put_leader; 2585 2586 name.name = buf; 2587 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2588 dentry = d_hash_and_lookup(dir, &name); 2589 if (dentry) { 2590 shrink_dcache_parent(dentry); 2591 d_drop(dentry); 2592 dput(dentry); 2593 } 2594 2595 dput(dir); 2596 out_put_leader: 2597 dput(leader); 2598 out: 2599 return; 2600 } 2601 2602 /** 2603 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 2604 * @task: task that should be flushed. 2605 * 2606 * When flushing dentries from proc, one needs to flush them from global 2607 * proc (proc_mnt) and from all the namespaces' procs this task was seen 2608 * in. This call is supposed to do all of this job. 2609 * 2610 * Looks in the dcache for 2611 * /proc/@pid 2612 * /proc/@tgid/task/@pid 2613 * if either directory is present flushes it and all of it'ts children 2614 * from the dcache. 2615 * 2616 * It is safe and reasonable to cache /proc entries for a task until 2617 * that task exits. After that they just clog up the dcache with 2618 * useless entries, possibly causing useful dcache entries to be 2619 * flushed instead. This routine is proved to flush those useless 2620 * dcache entries at process exit time. 2621 * 2622 * NOTE: This routine is just an optimization so it does not guarantee 2623 * that no dcache entries will exist at process exit time it 2624 * just makes it very unlikely that any will persist. 2625 */ 2626 2627 void proc_flush_task(struct task_struct *task) 2628 { 2629 int i; 2630 struct pid *pid, *tgid = NULL; 2631 struct upid *upid; 2632 2633 pid = task_pid(task); 2634 if (thread_group_leader(task)) 2635 tgid = task_tgid(task); 2636 2637 for (i = 0; i <= pid->level; i++) { 2638 upid = &pid->numbers[i]; 2639 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 2640 tgid ? tgid->numbers[i].nr : 0); 2641 } 2642 2643 upid = &pid->numbers[pid->level]; 2644 if (upid->nr == 1) 2645 pid_ns_release_proc(upid->ns); 2646 } 2647 2648 static struct dentry *proc_pid_instantiate(struct inode *dir, 2649 struct dentry * dentry, 2650 struct task_struct *task, const void *ptr) 2651 { 2652 struct dentry *error = ERR_PTR(-ENOENT); 2653 struct inode *inode; 2654 2655 inode = proc_pid_make_inode(dir->i_sb, task); 2656 if (!inode) 2657 goto out; 2658 2659 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2660 inode->i_op = &proc_tgid_base_inode_operations; 2661 inode->i_fop = &proc_tgid_base_operations; 2662 inode->i_flags|=S_IMMUTABLE; 2663 2664 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 2665 ARRAY_SIZE(tgid_base_stuff)); 2666 2667 dentry->d_op = &pid_dentry_operations; 2668 2669 d_add(dentry, inode); 2670 /* Close the race of the process dying before we return the dentry */ 2671 if (pid_revalidate(dentry, NULL)) 2672 error = NULL; 2673 out: 2674 return error; 2675 } 2676 2677 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2678 { 2679 struct dentry *result = ERR_PTR(-ENOENT); 2680 struct task_struct *task; 2681 unsigned tgid; 2682 struct pid_namespace *ns; 2683 2684 result = proc_base_lookup(dir, dentry); 2685 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) 2686 goto out; 2687 2688 tgid = name_to_int(dentry); 2689 if (tgid == ~0U) 2690 goto out; 2691 2692 ns = dentry->d_sb->s_fs_info; 2693 rcu_read_lock(); 2694 task = find_task_by_pid_ns(tgid, ns); 2695 if (task) 2696 get_task_struct(task); 2697 rcu_read_unlock(); 2698 if (!task) 2699 goto out; 2700 2701 result = proc_pid_instantiate(dir, dentry, task, NULL); 2702 put_task_struct(task); 2703 out: 2704 return result; 2705 } 2706 2707 /* 2708 * Find the first task with tgid >= tgid 2709 * 2710 */ 2711 struct tgid_iter { 2712 unsigned int tgid; 2713 struct task_struct *task; 2714 }; 2715 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) 2716 { 2717 struct pid *pid; 2718 2719 if (iter.task) 2720 put_task_struct(iter.task); 2721 rcu_read_lock(); 2722 retry: 2723 iter.task = NULL; 2724 pid = find_ge_pid(iter.tgid, ns); 2725 if (pid) { 2726 iter.tgid = pid_nr_ns(pid, ns); 2727 iter.task = pid_task(pid, PIDTYPE_PID); 2728 /* What we to know is if the pid we have find is the 2729 * pid of a thread_group_leader. Testing for task 2730 * being a thread_group_leader is the obvious thing 2731 * todo but there is a window when it fails, due to 2732 * the pid transfer logic in de_thread. 2733 * 2734 * So we perform the straight forward test of seeing 2735 * if the pid we have found is the pid of a thread 2736 * group leader, and don't worry if the task we have 2737 * found doesn't happen to be a thread group leader. 2738 * As we don't care in the case of readdir. 2739 */ 2740 if (!iter.task || !has_group_leader_pid(iter.task)) { 2741 iter.tgid += 1; 2742 goto retry; 2743 } 2744 get_task_struct(iter.task); 2745 } 2746 rcu_read_unlock(); 2747 return iter; 2748 } 2749 2750 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) 2751 2752 static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 2753 struct tgid_iter iter) 2754 { 2755 char name[PROC_NUMBUF]; 2756 int len = snprintf(name, sizeof(name), "%d", iter.tgid); 2757 return proc_fill_cache(filp, dirent, filldir, name, len, 2758 proc_pid_instantiate, iter.task, NULL); 2759 } 2760 2761 /* for the /proc/ directory itself, after non-process stuff has been done */ 2762 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2763 { 2764 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2765 struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); 2766 struct tgid_iter iter; 2767 struct pid_namespace *ns; 2768 2769 if (!reaper) 2770 goto out_no_task; 2771 2772 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { 2773 const struct pid_entry *p = &proc_base_stuff[nr]; 2774 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) 2775 goto out; 2776 } 2777 2778 ns = filp->f_dentry->d_sb->s_fs_info; 2779 iter.task = NULL; 2780 iter.tgid = filp->f_pos - TGID_OFFSET; 2781 for (iter = next_tgid(ns, iter); 2782 iter.task; 2783 iter.tgid += 1, iter = next_tgid(ns, iter)) { 2784 filp->f_pos = iter.tgid + TGID_OFFSET; 2785 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { 2786 put_task_struct(iter.task); 2787 goto out; 2788 } 2789 } 2790 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; 2791 out: 2792 put_task_struct(reaper); 2793 out_no_task: 2794 return 0; 2795 } 2796 2797 /* 2798 * Tasks 2799 */ 2800 static const struct pid_entry tid_base_stuff[] = { 2801 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2802 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations), 2803 REG("environ", S_IRUSR, proc_environ_operations), 2804 INF("auxv", S_IRUSR, proc_pid_auxv), 2805 ONE("status", S_IRUGO, proc_pid_status), 2806 ONE("personality", S_IRUSR, proc_pid_personality), 2807 INF("limits", S_IRUSR, proc_pid_limits), 2808 #ifdef CONFIG_SCHED_DEBUG 2809 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2810 #endif 2811 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2812 INF("syscall", S_IRUSR, proc_pid_syscall), 2813 #endif 2814 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2815 ONE("stat", S_IRUGO, proc_tid_stat), 2816 ONE("statm", S_IRUGO, proc_pid_statm), 2817 REG("maps", S_IRUGO, proc_maps_operations), 2818 #ifdef CONFIG_NUMA 2819 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 2820 #endif 2821 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2822 LNK("cwd", proc_cwd_link), 2823 LNK("root", proc_root_link), 2824 LNK("exe", proc_exe_link), 2825 REG("mounts", S_IRUGO, proc_mounts_operations), 2826 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 2827 #ifdef CONFIG_PROC_PAGE_MONITOR 2828 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2829 REG("smaps", S_IRUGO, proc_smaps_operations), 2830 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2831 #endif 2832 #ifdef CONFIG_SECURITY 2833 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2834 #endif 2835 #ifdef CONFIG_KALLSYMS 2836 INF("wchan", S_IRUGO, proc_pid_wchan), 2837 #endif 2838 #ifdef CONFIG_SCHEDSTATS 2839 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2840 #endif 2841 #ifdef CONFIG_LATENCYTOP 2842 REG("latency", S_IRUGO, proc_lstats_operations), 2843 #endif 2844 #ifdef CONFIG_PROC_PID_CPUSET 2845 REG("cpuset", S_IRUGO, proc_cpuset_operations), 2846 #endif 2847 #ifdef CONFIG_CGROUPS 2848 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2849 #endif 2850 INF("oom_score", S_IRUGO, proc_oom_score), 2851 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 2852 #ifdef CONFIG_AUDITSYSCALL 2853 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2854 REG("sessionid", S_IRUSR, proc_sessionid_operations), 2855 #endif 2856 #ifdef CONFIG_FAULT_INJECTION 2857 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2858 #endif 2859 #ifdef CONFIG_TASK_IO_ACCOUNTING 2860 INF("io", S_IRUGO, proc_tid_io_accounting), 2861 #endif 2862 }; 2863 2864 static int proc_tid_base_readdir(struct file * filp, 2865 void * dirent, filldir_t filldir) 2866 { 2867 return proc_pident_readdir(filp,dirent,filldir, 2868 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 2869 } 2870 2871 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 2872 return proc_pident_lookup(dir, dentry, 2873 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 2874 } 2875 2876 static const struct file_operations proc_tid_base_operations = { 2877 .read = generic_read_dir, 2878 .readdir = proc_tid_base_readdir, 2879 }; 2880 2881 static const struct inode_operations proc_tid_base_inode_operations = { 2882 .lookup = proc_tid_base_lookup, 2883 .getattr = pid_getattr, 2884 .setattr = proc_setattr, 2885 }; 2886 2887 static struct dentry *proc_task_instantiate(struct inode *dir, 2888 struct dentry *dentry, struct task_struct *task, const void *ptr) 2889 { 2890 struct dentry *error = ERR_PTR(-ENOENT); 2891 struct inode *inode; 2892 inode = proc_pid_make_inode(dir->i_sb, task); 2893 2894 if (!inode) 2895 goto out; 2896 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2897 inode->i_op = &proc_tid_base_inode_operations; 2898 inode->i_fop = &proc_tid_base_operations; 2899 inode->i_flags|=S_IMMUTABLE; 2900 2901 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 2902 ARRAY_SIZE(tid_base_stuff)); 2903 2904 dentry->d_op = &pid_dentry_operations; 2905 2906 d_add(dentry, inode); 2907 /* Close the race of the process dying before we return the dentry */ 2908 if (pid_revalidate(dentry, NULL)) 2909 error = NULL; 2910 out: 2911 return error; 2912 } 2913 2914 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2915 { 2916 struct dentry *result = ERR_PTR(-ENOENT); 2917 struct task_struct *task; 2918 struct task_struct *leader = get_proc_task(dir); 2919 unsigned tid; 2920 struct pid_namespace *ns; 2921 2922 if (!leader) 2923 goto out_no_task; 2924 2925 tid = name_to_int(dentry); 2926 if (tid == ~0U) 2927 goto out; 2928 2929 ns = dentry->d_sb->s_fs_info; 2930 rcu_read_lock(); 2931 task = find_task_by_pid_ns(tid, ns); 2932 if (task) 2933 get_task_struct(task); 2934 rcu_read_unlock(); 2935 if (!task) 2936 goto out; 2937 if (!same_thread_group(leader, task)) 2938 goto out_drop_task; 2939 2940 result = proc_task_instantiate(dir, dentry, task, NULL); 2941 out_drop_task: 2942 put_task_struct(task); 2943 out: 2944 put_task_struct(leader); 2945 out_no_task: 2946 return result; 2947 } 2948 2949 /* 2950 * Find the first tid of a thread group to return to user space. 2951 * 2952 * Usually this is just the thread group leader, but if the users 2953 * buffer was too small or there was a seek into the middle of the 2954 * directory we have more work todo. 2955 * 2956 * In the case of a short read we start with find_task_by_pid. 2957 * 2958 * In the case of a seek we start with the leader and walk nr 2959 * threads past it. 2960 */ 2961 static struct task_struct *first_tid(struct task_struct *leader, 2962 int tid, int nr, struct pid_namespace *ns) 2963 { 2964 struct task_struct *pos; 2965 2966 rcu_read_lock(); 2967 /* Attempt to start with the pid of a thread */ 2968 if (tid && (nr > 0)) { 2969 pos = find_task_by_pid_ns(tid, ns); 2970 if (pos && (pos->group_leader == leader)) 2971 goto found; 2972 } 2973 2974 /* If nr exceeds the number of threads there is nothing todo */ 2975 pos = NULL; 2976 if (nr && nr >= get_nr_threads(leader)) 2977 goto out; 2978 2979 /* If we haven't found our starting place yet start 2980 * with the leader and walk nr threads forward. 2981 */ 2982 for (pos = leader; nr > 0; --nr) { 2983 pos = next_thread(pos); 2984 if (pos == leader) { 2985 pos = NULL; 2986 goto out; 2987 } 2988 } 2989 found: 2990 get_task_struct(pos); 2991 out: 2992 rcu_read_unlock(); 2993 return pos; 2994 } 2995 2996 /* 2997 * Find the next thread in the thread list. 2998 * Return NULL if there is an error or no next thread. 2999 * 3000 * The reference to the input task_struct is released. 3001 */ 3002 static struct task_struct *next_tid(struct task_struct *start) 3003 { 3004 struct task_struct *pos = NULL; 3005 rcu_read_lock(); 3006 if (pid_alive(start)) { 3007 pos = next_thread(start); 3008 if (thread_group_leader(pos)) 3009 pos = NULL; 3010 else 3011 get_task_struct(pos); 3012 } 3013 rcu_read_unlock(); 3014 put_task_struct(start); 3015 return pos; 3016 } 3017 3018 static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 3019 struct task_struct *task, int tid) 3020 { 3021 char name[PROC_NUMBUF]; 3022 int len = snprintf(name, sizeof(name), "%d", tid); 3023 return proc_fill_cache(filp, dirent, filldir, name, len, 3024 proc_task_instantiate, task, NULL); 3025 } 3026 3027 /* for the /proc/TGID/task/ directories */ 3028 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 3029 { 3030 struct dentry *dentry = filp->f_path.dentry; 3031 struct inode *inode = dentry->d_inode; 3032 struct task_struct *leader = NULL; 3033 struct task_struct *task; 3034 int retval = -ENOENT; 3035 ino_t ino; 3036 int tid; 3037 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 3038 struct pid_namespace *ns; 3039 3040 task = get_proc_task(inode); 3041 if (!task) 3042 goto out_no_task; 3043 rcu_read_lock(); 3044 if (pid_alive(task)) { 3045 leader = task->group_leader; 3046 get_task_struct(leader); 3047 } 3048 rcu_read_unlock(); 3049 put_task_struct(task); 3050 if (!leader) 3051 goto out_no_task; 3052 retval = 0; 3053 3054 switch (pos) { 3055 case 0: 3056 ino = inode->i_ino; 3057 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) 3058 goto out; 3059 pos++; 3060 /* fall through */ 3061 case 1: 3062 ino = parent_ino(dentry); 3063 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) 3064 goto out; 3065 pos++; 3066 /* fall through */ 3067 } 3068 3069 /* f_version caches the tgid value that the last readdir call couldn't 3070 * return. lseek aka telldir automagically resets f_version to 0. 3071 */ 3072 ns = filp->f_dentry->d_sb->s_fs_info; 3073 tid = (int)filp->f_version; 3074 filp->f_version = 0; 3075 for (task = first_tid(leader, tid, pos - 2, ns); 3076 task; 3077 task = next_tid(task), pos++) { 3078 tid = task_pid_nr_ns(task, ns); 3079 if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { 3080 /* returning this tgid failed, save it as the first 3081 * pid for the next readir call */ 3082 filp->f_version = (u64)tid; 3083 put_task_struct(task); 3084 break; 3085 } 3086 } 3087 out: 3088 filp->f_pos = pos; 3089 put_task_struct(leader); 3090 out_no_task: 3091 return retval; 3092 } 3093 3094 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 3095 { 3096 struct inode *inode = dentry->d_inode; 3097 struct task_struct *p = get_proc_task(inode); 3098 generic_fillattr(inode, stat); 3099 3100 if (p) { 3101 stat->nlink += get_nr_threads(p); 3102 put_task_struct(p); 3103 } 3104 3105 return 0; 3106 } 3107 3108 static const struct inode_operations proc_task_inode_operations = { 3109 .lookup = proc_task_lookup, 3110 .getattr = proc_task_getattr, 3111 .setattr = proc_setattr, 3112 }; 3113 3114 static const struct file_operations proc_task_operations = { 3115 .read = generic_read_dir, 3116 .readdir = proc_task_readdir, 3117 }; 3118