1 /* 2 * linux/fs/proc/base.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * proc base directory handling functions 7 * 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 9 * Instead of using magical inumbers to determine the kind of object 10 * we allocate and fill in-core inodes upon lookup. They don't even 11 * go into icache. We cache the reference to task_struct upon lookup too. 12 * Eventually it should become a filesystem in its own. We don't use the 13 * rest of procfs anymore. 14 * 15 * 16 * Changelog: 17 * 17-Jan-2005 18 * Allan Bezerra 19 * Bruna Moreira <bruna.moreira@indt.org.br> 20 * Edjard Mota <edjard.mota@indt.org.br> 21 * Ilias Biris <ilias.biris@indt.org.br> 22 * Mauricio Lin <mauricio.lin@indt.org.br> 23 * 24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 25 * 26 * A new process specific entry (smaps) included in /proc. It shows the 27 * size of rss for each memory area. The maps entry lacks information 28 * about physical memory size (rss) for each mapped file, i.e., 29 * rss information for executables and library files. 30 * This additional information is useful for any tools that need to know 31 * about physical memory consumption for a process specific library. 32 * 33 * Changelog: 34 * 21-Feb-2005 35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 36 * Pud inclusion in the page table walking. 37 * 38 * ChangeLog: 39 * 10-Mar-2005 40 * 10LE Instituto Nokia de Tecnologia - INdT: 41 * A better way to walks through the page table as suggested by Hugh Dickins. 42 * 43 * Simo Piiroinen <simo.piiroinen@nokia.com>: 44 * Smaps information related to shared, private, clean and dirty pages. 45 * 46 * Paul Mundt <paul.mundt@nokia.com>: 47 * Overall revision about smaps. 48 */ 49 50 #include <asm/uaccess.h> 51 52 #include <linux/errno.h> 53 #include <linux/time.h> 54 #include <linux/proc_fs.h> 55 #include <linux/stat.h> 56 #include <linux/task_io_accounting_ops.h> 57 #include <linux/init.h> 58 #include <linux/capability.h> 59 #include <linux/file.h> 60 #include <linux/fdtable.h> 61 #include <linux/string.h> 62 #include <linux/seq_file.h> 63 #include <linux/namei.h> 64 #include <linux/mnt_namespace.h> 65 #include <linux/mm.h> 66 #include <linux/rcupdate.h> 67 #include <linux/kallsyms.h> 68 #include <linux/stacktrace.h> 69 #include <linux/resource.h> 70 #include <linux/module.h> 71 #include <linux/mount.h> 72 #include <linux/security.h> 73 #include <linux/ptrace.h> 74 #include <linux/tracehook.h> 75 #include <linux/cgroup.h> 76 #include <linux/cpuset.h> 77 #include <linux/audit.h> 78 #include <linux/poll.h> 79 #include <linux/nsproxy.h> 80 #include <linux/oom.h> 81 #include <linux/elf.h> 82 #include <linux/pid_namespace.h> 83 #include "internal.h" 84 85 /* NOTE: 86 * Implementing inode permission operations in /proc is almost 87 * certainly an error. Permission checks need to happen during 88 * each system call not at open time. The reason is that most of 89 * what we wish to check for permissions in /proc varies at runtime. 90 * 91 * The classic example of a problem is opening file descriptors 92 * in /proc for a task before it execs a suid executable. 93 */ 94 95 struct pid_entry { 96 char *name; 97 int len; 98 mode_t mode; 99 const struct inode_operations *iop; 100 const struct file_operations *fop; 101 union proc_op op; 102 }; 103 104 #define NOD(NAME, MODE, IOP, FOP, OP) { \ 105 .name = (NAME), \ 106 .len = sizeof(NAME) - 1, \ 107 .mode = MODE, \ 108 .iop = IOP, \ 109 .fop = FOP, \ 110 .op = OP, \ 111 } 112 113 #define DIR(NAME, MODE, iops, fops) \ 114 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) 115 #define LNK(NAME, get_link) \ 116 NOD(NAME, (S_IFLNK|S_IRWXUGO), \ 117 &proc_pid_link_inode_operations, NULL, \ 118 { .proc_get_link = get_link } ) 119 #define REG(NAME, MODE, fops) \ 120 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 121 #define INF(NAME, MODE, read) \ 122 NOD(NAME, (S_IFREG|(MODE)), \ 123 NULL, &proc_info_file_operations, \ 124 { .proc_read = read } ) 125 #define ONE(NAME, MODE, show) \ 126 NOD(NAME, (S_IFREG|(MODE)), \ 127 NULL, &proc_single_file_operations, \ 128 { .proc_show = show } ) 129 130 /* 131 * Count the number of hardlinks for the pid_entry table, excluding the . 132 * and .. links. 133 */ 134 static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, 135 unsigned int n) 136 { 137 unsigned int i; 138 unsigned int count; 139 140 count = 0; 141 for (i = 0; i < n; ++i) { 142 if (S_ISDIR(entries[i].mode)) 143 ++count; 144 } 145 146 return count; 147 } 148 149 static int get_fs_path(struct task_struct *task, struct path *path, bool root) 150 { 151 struct fs_struct *fs; 152 int result = -ENOENT; 153 154 task_lock(task); 155 fs = task->fs; 156 if (fs) { 157 read_lock(&fs->lock); 158 *path = root ? fs->root : fs->pwd; 159 path_get(path); 160 read_unlock(&fs->lock); 161 result = 0; 162 } 163 task_unlock(task); 164 return result; 165 } 166 167 static int get_nr_threads(struct task_struct *tsk) 168 { 169 unsigned long flags; 170 int count = 0; 171 172 if (lock_task_sighand(tsk, &flags)) { 173 count = atomic_read(&tsk->signal->count); 174 unlock_task_sighand(tsk, &flags); 175 } 176 return count; 177 } 178 179 static int proc_cwd_link(struct inode *inode, struct path *path) 180 { 181 struct task_struct *task = get_proc_task(inode); 182 int result = -ENOENT; 183 184 if (task) { 185 result = get_fs_path(task, path, 0); 186 put_task_struct(task); 187 } 188 return result; 189 } 190 191 static int proc_root_link(struct inode *inode, struct path *path) 192 { 193 struct task_struct *task = get_proc_task(inode); 194 int result = -ENOENT; 195 196 if (task) { 197 result = get_fs_path(task, path, 1); 198 put_task_struct(task); 199 } 200 return result; 201 } 202 203 /* 204 * Return zero if current may access user memory in @task, -error if not. 205 */ 206 static int check_mem_permission(struct task_struct *task) 207 { 208 /* 209 * A task can always look at itself, in case it chooses 210 * to use system calls instead of load instructions. 211 */ 212 if (task == current) 213 return 0; 214 215 /* 216 * If current is actively ptrace'ing, and would also be 217 * permitted to freshly attach with ptrace now, permit it. 218 */ 219 if (task_is_stopped_or_traced(task)) { 220 int match; 221 rcu_read_lock(); 222 match = (tracehook_tracer_task(task) == current); 223 rcu_read_unlock(); 224 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) 225 return 0; 226 } 227 228 /* 229 * Noone else is allowed. 230 */ 231 return -EPERM; 232 } 233 234 struct mm_struct *mm_for_maps(struct task_struct *task) 235 { 236 struct mm_struct *mm = get_task_mm(task); 237 if (!mm) 238 return NULL; 239 down_read(&mm->mmap_sem); 240 task_lock(task); 241 if (task->mm != mm) 242 goto out; 243 if (task->mm != current->mm && 244 __ptrace_may_access(task, PTRACE_MODE_READ) < 0) 245 goto out; 246 task_unlock(task); 247 return mm; 248 out: 249 task_unlock(task); 250 up_read(&mm->mmap_sem); 251 mmput(mm); 252 return NULL; 253 } 254 255 static int proc_pid_cmdline(struct task_struct *task, char * buffer) 256 { 257 int res = 0; 258 unsigned int len; 259 struct mm_struct *mm = get_task_mm(task); 260 if (!mm) 261 goto out; 262 if (!mm->arg_end) 263 goto out_mm; /* Shh! No looking before we're done */ 264 265 len = mm->arg_end - mm->arg_start; 266 267 if (len > PAGE_SIZE) 268 len = PAGE_SIZE; 269 270 res = access_process_vm(task, mm->arg_start, buffer, len, 0); 271 272 // If the nul at the end of args has been overwritten, then 273 // assume application is using setproctitle(3). 274 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { 275 len = strnlen(buffer, res); 276 if (len < res) { 277 res = len; 278 } else { 279 len = mm->env_end - mm->env_start; 280 if (len > PAGE_SIZE - res) 281 len = PAGE_SIZE - res; 282 res += access_process_vm(task, mm->env_start, buffer+res, len, 0); 283 res = strnlen(buffer, res); 284 } 285 } 286 out_mm: 287 mmput(mm); 288 out: 289 return res; 290 } 291 292 static int proc_pid_auxv(struct task_struct *task, char *buffer) 293 { 294 int res = 0; 295 struct mm_struct *mm = get_task_mm(task); 296 if (mm) { 297 unsigned int nwords = 0; 298 do { 299 nwords += 2; 300 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 301 res = nwords * sizeof(mm->saved_auxv[0]); 302 if (res > PAGE_SIZE) 303 res = PAGE_SIZE; 304 memcpy(buffer, mm->saved_auxv, res); 305 mmput(mm); 306 } 307 return res; 308 } 309 310 311 #ifdef CONFIG_KALLSYMS 312 /* 313 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 314 * Returns the resolved symbol. If that fails, simply return the address. 315 */ 316 static int proc_pid_wchan(struct task_struct *task, char *buffer) 317 { 318 unsigned long wchan; 319 char symname[KSYM_NAME_LEN]; 320 321 wchan = get_wchan(task); 322 323 if (lookup_symbol_name(wchan, symname) < 0) 324 return sprintf(buffer, "%lu", wchan); 325 else 326 return sprintf(buffer, "%s", symname); 327 } 328 #endif /* CONFIG_KALLSYMS */ 329 330 #ifdef CONFIG_STACKTRACE 331 332 #define MAX_STACK_TRACE_DEPTH 64 333 334 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, 335 struct pid *pid, struct task_struct *task) 336 { 337 struct stack_trace trace; 338 unsigned long *entries; 339 int i; 340 341 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 342 if (!entries) 343 return -ENOMEM; 344 345 trace.nr_entries = 0; 346 trace.max_entries = MAX_STACK_TRACE_DEPTH; 347 trace.entries = entries; 348 trace.skip = 0; 349 save_stack_trace_tsk(task, &trace); 350 351 for (i = 0; i < trace.nr_entries; i++) { 352 seq_printf(m, "[<%p>] %pS\n", 353 (void *)entries[i], (void *)entries[i]); 354 } 355 kfree(entries); 356 357 return 0; 358 } 359 #endif 360 361 #ifdef CONFIG_SCHEDSTATS 362 /* 363 * Provides /proc/PID/schedstat 364 */ 365 static int proc_pid_schedstat(struct task_struct *task, char *buffer) 366 { 367 return sprintf(buffer, "%llu %llu %lu\n", 368 (unsigned long long)task->se.sum_exec_runtime, 369 (unsigned long long)task->sched_info.run_delay, 370 task->sched_info.pcount); 371 } 372 #endif 373 374 #ifdef CONFIG_LATENCYTOP 375 static int lstats_show_proc(struct seq_file *m, void *v) 376 { 377 int i; 378 struct inode *inode = m->private; 379 struct task_struct *task = get_proc_task(inode); 380 381 if (!task) 382 return -ESRCH; 383 seq_puts(m, "Latency Top version : v0.1\n"); 384 for (i = 0; i < 32; i++) { 385 if (task->latency_record[i].backtrace[0]) { 386 int q; 387 seq_printf(m, "%i %li %li ", 388 task->latency_record[i].count, 389 task->latency_record[i].time, 390 task->latency_record[i].max); 391 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 392 char sym[KSYM_SYMBOL_LEN]; 393 char *c; 394 if (!task->latency_record[i].backtrace[q]) 395 break; 396 if (task->latency_record[i].backtrace[q] == ULONG_MAX) 397 break; 398 sprint_symbol(sym, task->latency_record[i].backtrace[q]); 399 c = strchr(sym, '+'); 400 if (c) 401 *c = 0; 402 seq_printf(m, "%s ", sym); 403 } 404 seq_printf(m, "\n"); 405 } 406 407 } 408 put_task_struct(task); 409 return 0; 410 } 411 412 static int lstats_open(struct inode *inode, struct file *file) 413 { 414 return single_open(file, lstats_show_proc, inode); 415 } 416 417 static ssize_t lstats_write(struct file *file, const char __user *buf, 418 size_t count, loff_t *offs) 419 { 420 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 421 422 if (!task) 423 return -ESRCH; 424 clear_all_latency_tracing(task); 425 put_task_struct(task); 426 427 return count; 428 } 429 430 static const struct file_operations proc_lstats_operations = { 431 .open = lstats_open, 432 .read = seq_read, 433 .write = lstats_write, 434 .llseek = seq_lseek, 435 .release = single_release, 436 }; 437 438 #endif 439 440 /* The badness from the OOM killer */ 441 unsigned long badness(struct task_struct *p, unsigned long uptime); 442 static int proc_oom_score(struct task_struct *task, char *buffer) 443 { 444 unsigned long points; 445 struct timespec uptime; 446 447 do_posix_clock_monotonic_gettime(&uptime); 448 read_lock(&tasklist_lock); 449 points = badness(task, uptime.tv_sec); 450 read_unlock(&tasklist_lock); 451 return sprintf(buffer, "%lu\n", points); 452 } 453 454 struct limit_names { 455 char *name; 456 char *unit; 457 }; 458 459 static const struct limit_names lnames[RLIM_NLIMITS] = { 460 [RLIMIT_CPU] = {"Max cpu time", "ms"}, 461 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 462 [RLIMIT_DATA] = {"Max data size", "bytes"}, 463 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 464 [RLIMIT_CORE] = {"Max core file size", "bytes"}, 465 [RLIMIT_RSS] = {"Max resident set", "bytes"}, 466 [RLIMIT_NPROC] = {"Max processes", "processes"}, 467 [RLIMIT_NOFILE] = {"Max open files", "files"}, 468 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, 469 [RLIMIT_AS] = {"Max address space", "bytes"}, 470 [RLIMIT_LOCKS] = {"Max file locks", "locks"}, 471 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, 472 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, 473 [RLIMIT_NICE] = {"Max nice priority", NULL}, 474 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, 475 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, 476 }; 477 478 /* Display limits for a process */ 479 static int proc_pid_limits(struct task_struct *task, char *buffer) 480 { 481 unsigned int i; 482 int count = 0; 483 unsigned long flags; 484 char *bufptr = buffer; 485 486 struct rlimit rlim[RLIM_NLIMITS]; 487 488 if (!lock_task_sighand(task, &flags)) 489 return 0; 490 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); 491 unlock_task_sighand(task, &flags); 492 493 /* 494 * print the file header 495 */ 496 count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", 497 "Limit", "Soft Limit", "Hard Limit", "Units"); 498 499 for (i = 0; i < RLIM_NLIMITS; i++) { 500 if (rlim[i].rlim_cur == RLIM_INFINITY) 501 count += sprintf(&bufptr[count], "%-25s %-20s ", 502 lnames[i].name, "unlimited"); 503 else 504 count += sprintf(&bufptr[count], "%-25s %-20lu ", 505 lnames[i].name, rlim[i].rlim_cur); 506 507 if (rlim[i].rlim_max == RLIM_INFINITY) 508 count += sprintf(&bufptr[count], "%-20s ", "unlimited"); 509 else 510 count += sprintf(&bufptr[count], "%-20lu ", 511 rlim[i].rlim_max); 512 513 if (lnames[i].unit) 514 count += sprintf(&bufptr[count], "%-10s\n", 515 lnames[i].unit); 516 else 517 count += sprintf(&bufptr[count], "\n"); 518 } 519 520 return count; 521 } 522 523 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 524 static int proc_pid_syscall(struct task_struct *task, char *buffer) 525 { 526 long nr; 527 unsigned long args[6], sp, pc; 528 529 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 530 return sprintf(buffer, "running\n"); 531 532 if (nr < 0) 533 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 534 535 return sprintf(buffer, 536 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 537 nr, 538 args[0], args[1], args[2], args[3], args[4], args[5], 539 sp, pc); 540 } 541 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 542 543 /************************************************************************/ 544 /* Here the fs part begins */ 545 /************************************************************************/ 546 547 /* permission checks */ 548 static int proc_fd_access_allowed(struct inode *inode) 549 { 550 struct task_struct *task; 551 int allowed = 0; 552 /* Allow access to a task's file descriptors if it is us or we 553 * may use ptrace attach to the process and find out that 554 * information. 555 */ 556 task = get_proc_task(inode); 557 if (task) { 558 allowed = ptrace_may_access(task, PTRACE_MODE_READ); 559 put_task_struct(task); 560 } 561 return allowed; 562 } 563 564 static int proc_setattr(struct dentry *dentry, struct iattr *attr) 565 { 566 int error; 567 struct inode *inode = dentry->d_inode; 568 569 if (attr->ia_valid & ATTR_MODE) 570 return -EPERM; 571 572 error = inode_change_ok(inode, attr); 573 if (!error) 574 error = inode_setattr(inode, attr); 575 return error; 576 } 577 578 static const struct inode_operations proc_def_inode_operations = { 579 .setattr = proc_setattr, 580 }; 581 582 static int mounts_open_common(struct inode *inode, struct file *file, 583 const struct seq_operations *op) 584 { 585 struct task_struct *task = get_proc_task(inode); 586 struct nsproxy *nsp; 587 struct mnt_namespace *ns = NULL; 588 struct path root; 589 struct proc_mounts *p; 590 int ret = -EINVAL; 591 592 if (task) { 593 rcu_read_lock(); 594 nsp = task_nsproxy(task); 595 if (nsp) { 596 ns = nsp->mnt_ns; 597 if (ns) 598 get_mnt_ns(ns); 599 } 600 rcu_read_unlock(); 601 if (ns && get_fs_path(task, &root, 1) == 0) 602 ret = 0; 603 put_task_struct(task); 604 } 605 606 if (!ns) 607 goto err; 608 if (ret) 609 goto err_put_ns; 610 611 ret = -ENOMEM; 612 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); 613 if (!p) 614 goto err_put_path; 615 616 file->private_data = &p->m; 617 ret = seq_open(file, op); 618 if (ret) 619 goto err_free; 620 621 p->m.private = p; 622 p->ns = ns; 623 p->root = root; 624 p->event = ns->event; 625 626 return 0; 627 628 err_free: 629 kfree(p); 630 err_put_path: 631 path_put(&root); 632 err_put_ns: 633 put_mnt_ns(ns); 634 err: 635 return ret; 636 } 637 638 static int mounts_release(struct inode *inode, struct file *file) 639 { 640 struct proc_mounts *p = file->private_data; 641 path_put(&p->root); 642 put_mnt_ns(p->ns); 643 return seq_release(inode, file); 644 } 645 646 static unsigned mounts_poll(struct file *file, poll_table *wait) 647 { 648 struct proc_mounts *p = file->private_data; 649 struct mnt_namespace *ns = p->ns; 650 unsigned res = 0; 651 652 poll_wait(file, &ns->poll, wait); 653 654 spin_lock(&vfsmount_lock); 655 if (p->event != ns->event) { 656 p->event = ns->event; 657 res = POLLERR; 658 } 659 spin_unlock(&vfsmount_lock); 660 661 return res; 662 } 663 664 static int mounts_open(struct inode *inode, struct file *file) 665 { 666 return mounts_open_common(inode, file, &mounts_op); 667 } 668 669 static const struct file_operations proc_mounts_operations = { 670 .open = mounts_open, 671 .read = seq_read, 672 .llseek = seq_lseek, 673 .release = mounts_release, 674 .poll = mounts_poll, 675 }; 676 677 static int mountinfo_open(struct inode *inode, struct file *file) 678 { 679 return mounts_open_common(inode, file, &mountinfo_op); 680 } 681 682 static const struct file_operations proc_mountinfo_operations = { 683 .open = mountinfo_open, 684 .read = seq_read, 685 .llseek = seq_lseek, 686 .release = mounts_release, 687 .poll = mounts_poll, 688 }; 689 690 static int mountstats_open(struct inode *inode, struct file *file) 691 { 692 return mounts_open_common(inode, file, &mountstats_op); 693 } 694 695 static const struct file_operations proc_mountstats_operations = { 696 .open = mountstats_open, 697 .read = seq_read, 698 .llseek = seq_lseek, 699 .release = mounts_release, 700 }; 701 702 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 703 704 static ssize_t proc_info_read(struct file * file, char __user * buf, 705 size_t count, loff_t *ppos) 706 { 707 struct inode * inode = file->f_path.dentry->d_inode; 708 unsigned long page; 709 ssize_t length; 710 struct task_struct *task = get_proc_task(inode); 711 712 length = -ESRCH; 713 if (!task) 714 goto out_no_task; 715 716 if (count > PROC_BLOCK_SIZE) 717 count = PROC_BLOCK_SIZE; 718 719 length = -ENOMEM; 720 if (!(page = __get_free_page(GFP_TEMPORARY))) 721 goto out; 722 723 length = PROC_I(inode)->op.proc_read(task, (char*)page); 724 725 if (length >= 0) 726 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 727 free_page(page); 728 out: 729 put_task_struct(task); 730 out_no_task: 731 return length; 732 } 733 734 static const struct file_operations proc_info_file_operations = { 735 .read = proc_info_read, 736 }; 737 738 static int proc_single_show(struct seq_file *m, void *v) 739 { 740 struct inode *inode = m->private; 741 struct pid_namespace *ns; 742 struct pid *pid; 743 struct task_struct *task; 744 int ret; 745 746 ns = inode->i_sb->s_fs_info; 747 pid = proc_pid(inode); 748 task = get_pid_task(pid, PIDTYPE_PID); 749 if (!task) 750 return -ESRCH; 751 752 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); 753 754 put_task_struct(task); 755 return ret; 756 } 757 758 static int proc_single_open(struct inode *inode, struct file *filp) 759 { 760 int ret; 761 ret = single_open(filp, proc_single_show, NULL); 762 if (!ret) { 763 struct seq_file *m = filp->private_data; 764 765 m->private = inode; 766 } 767 return ret; 768 } 769 770 static const struct file_operations proc_single_file_operations = { 771 .open = proc_single_open, 772 .read = seq_read, 773 .llseek = seq_lseek, 774 .release = single_release, 775 }; 776 777 static int mem_open(struct inode* inode, struct file* file) 778 { 779 file->private_data = (void*)((long)current->self_exec_id); 780 return 0; 781 } 782 783 static ssize_t mem_read(struct file * file, char __user * buf, 784 size_t count, loff_t *ppos) 785 { 786 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 787 char *page; 788 unsigned long src = *ppos; 789 int ret = -ESRCH; 790 struct mm_struct *mm; 791 792 if (!task) 793 goto out_no_task; 794 795 if (check_mem_permission(task)) 796 goto out; 797 798 ret = -ENOMEM; 799 page = (char *)__get_free_page(GFP_TEMPORARY); 800 if (!page) 801 goto out; 802 803 ret = 0; 804 805 mm = get_task_mm(task); 806 if (!mm) 807 goto out_free; 808 809 ret = -EIO; 810 811 if (file->private_data != (void*)((long)current->self_exec_id)) 812 goto out_put; 813 814 ret = 0; 815 816 while (count > 0) { 817 int this_len, retval; 818 819 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 820 retval = access_process_vm(task, src, page, this_len, 0); 821 if (!retval || check_mem_permission(task)) { 822 if (!ret) 823 ret = -EIO; 824 break; 825 } 826 827 if (copy_to_user(buf, page, retval)) { 828 ret = -EFAULT; 829 break; 830 } 831 832 ret += retval; 833 src += retval; 834 buf += retval; 835 count -= retval; 836 } 837 *ppos = src; 838 839 out_put: 840 mmput(mm); 841 out_free: 842 free_page((unsigned long) page); 843 out: 844 put_task_struct(task); 845 out_no_task: 846 return ret; 847 } 848 849 #define mem_write NULL 850 851 #ifndef mem_write 852 /* This is a security hazard */ 853 static ssize_t mem_write(struct file * file, const char __user *buf, 854 size_t count, loff_t *ppos) 855 { 856 int copied; 857 char *page; 858 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 859 unsigned long dst = *ppos; 860 861 copied = -ESRCH; 862 if (!task) 863 goto out_no_task; 864 865 if (check_mem_permission(task)) 866 goto out; 867 868 copied = -ENOMEM; 869 page = (char *)__get_free_page(GFP_TEMPORARY); 870 if (!page) 871 goto out; 872 873 copied = 0; 874 while (count > 0) { 875 int this_len, retval; 876 877 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 878 if (copy_from_user(page, buf, this_len)) { 879 copied = -EFAULT; 880 break; 881 } 882 retval = access_process_vm(task, dst, page, this_len, 1); 883 if (!retval) { 884 if (!copied) 885 copied = -EIO; 886 break; 887 } 888 copied += retval; 889 buf += retval; 890 dst += retval; 891 count -= retval; 892 } 893 *ppos = dst; 894 free_page((unsigned long) page); 895 out: 896 put_task_struct(task); 897 out_no_task: 898 return copied; 899 } 900 #endif 901 902 loff_t mem_lseek(struct file *file, loff_t offset, int orig) 903 { 904 switch (orig) { 905 case 0: 906 file->f_pos = offset; 907 break; 908 case 1: 909 file->f_pos += offset; 910 break; 911 default: 912 return -EINVAL; 913 } 914 force_successful_syscall_return(); 915 return file->f_pos; 916 } 917 918 static const struct file_operations proc_mem_operations = { 919 .llseek = mem_lseek, 920 .read = mem_read, 921 .write = mem_write, 922 .open = mem_open, 923 }; 924 925 static ssize_t environ_read(struct file *file, char __user *buf, 926 size_t count, loff_t *ppos) 927 { 928 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 929 char *page; 930 unsigned long src = *ppos; 931 int ret = -ESRCH; 932 struct mm_struct *mm; 933 934 if (!task) 935 goto out_no_task; 936 937 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 938 goto out; 939 940 ret = -ENOMEM; 941 page = (char *)__get_free_page(GFP_TEMPORARY); 942 if (!page) 943 goto out; 944 945 ret = 0; 946 947 mm = get_task_mm(task); 948 if (!mm) 949 goto out_free; 950 951 while (count > 0) { 952 int this_len, retval, max_len; 953 954 this_len = mm->env_end - (mm->env_start + src); 955 956 if (this_len <= 0) 957 break; 958 959 max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 960 this_len = (this_len > max_len) ? max_len : this_len; 961 962 retval = access_process_vm(task, (mm->env_start + src), 963 page, this_len, 0); 964 965 if (retval <= 0) { 966 ret = retval; 967 break; 968 } 969 970 if (copy_to_user(buf, page, retval)) { 971 ret = -EFAULT; 972 break; 973 } 974 975 ret += retval; 976 src += retval; 977 buf += retval; 978 count -= retval; 979 } 980 *ppos = src; 981 982 mmput(mm); 983 out_free: 984 free_page((unsigned long) page); 985 out: 986 put_task_struct(task); 987 out_no_task: 988 return ret; 989 } 990 991 static const struct file_operations proc_environ_operations = { 992 .read = environ_read, 993 }; 994 995 static ssize_t oom_adjust_read(struct file *file, char __user *buf, 996 size_t count, loff_t *ppos) 997 { 998 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 999 char buffer[PROC_NUMBUF]; 1000 size_t len; 1001 int oom_adjust; 1002 1003 if (!task) 1004 return -ESRCH; 1005 oom_adjust = task->oomkilladj; 1006 put_task_struct(task); 1007 1008 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1009 1010 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1011 } 1012 1013 static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 1014 size_t count, loff_t *ppos) 1015 { 1016 struct task_struct *task; 1017 char buffer[PROC_NUMBUF], *end; 1018 int oom_adjust; 1019 1020 memset(buffer, 0, sizeof(buffer)); 1021 if (count > sizeof(buffer) - 1) 1022 count = sizeof(buffer) - 1; 1023 if (copy_from_user(buffer, buf, count)) 1024 return -EFAULT; 1025 oom_adjust = simple_strtol(buffer, &end, 0); 1026 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1027 oom_adjust != OOM_DISABLE) 1028 return -EINVAL; 1029 if (*end == '\n') 1030 end++; 1031 task = get_proc_task(file->f_path.dentry->d_inode); 1032 if (!task) 1033 return -ESRCH; 1034 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { 1035 put_task_struct(task); 1036 return -EACCES; 1037 } 1038 task->oomkilladj = oom_adjust; 1039 put_task_struct(task); 1040 if (end - buffer == 0) 1041 return -EIO; 1042 return end - buffer; 1043 } 1044 1045 static const struct file_operations proc_oom_adjust_operations = { 1046 .read = oom_adjust_read, 1047 .write = oom_adjust_write, 1048 }; 1049 1050 #ifdef CONFIG_AUDITSYSCALL 1051 #define TMPBUFLEN 21 1052 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1053 size_t count, loff_t *ppos) 1054 { 1055 struct inode * inode = file->f_path.dentry->d_inode; 1056 struct task_struct *task = get_proc_task(inode); 1057 ssize_t length; 1058 char tmpbuf[TMPBUFLEN]; 1059 1060 if (!task) 1061 return -ESRCH; 1062 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1063 audit_get_loginuid(task)); 1064 put_task_struct(task); 1065 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1066 } 1067 1068 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 1069 size_t count, loff_t *ppos) 1070 { 1071 struct inode * inode = file->f_path.dentry->d_inode; 1072 char *page, *tmp; 1073 ssize_t length; 1074 uid_t loginuid; 1075 1076 if (!capable(CAP_AUDIT_CONTROL)) 1077 return -EPERM; 1078 1079 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) 1080 return -EPERM; 1081 1082 if (count >= PAGE_SIZE) 1083 count = PAGE_SIZE - 1; 1084 1085 if (*ppos != 0) { 1086 /* No partial writes. */ 1087 return -EINVAL; 1088 } 1089 page = (char*)__get_free_page(GFP_TEMPORARY); 1090 if (!page) 1091 return -ENOMEM; 1092 length = -EFAULT; 1093 if (copy_from_user(page, buf, count)) 1094 goto out_free_page; 1095 1096 page[count] = '\0'; 1097 loginuid = simple_strtoul(page, &tmp, 10); 1098 if (tmp == page) { 1099 length = -EINVAL; 1100 goto out_free_page; 1101 1102 } 1103 length = audit_set_loginuid(current, loginuid); 1104 if (likely(length == 0)) 1105 length = count; 1106 1107 out_free_page: 1108 free_page((unsigned long) page); 1109 return length; 1110 } 1111 1112 static const struct file_operations proc_loginuid_operations = { 1113 .read = proc_loginuid_read, 1114 .write = proc_loginuid_write, 1115 }; 1116 1117 static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1118 size_t count, loff_t *ppos) 1119 { 1120 struct inode * inode = file->f_path.dentry->d_inode; 1121 struct task_struct *task = get_proc_task(inode); 1122 ssize_t length; 1123 char tmpbuf[TMPBUFLEN]; 1124 1125 if (!task) 1126 return -ESRCH; 1127 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1128 audit_get_sessionid(task)); 1129 put_task_struct(task); 1130 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1131 } 1132 1133 static const struct file_operations proc_sessionid_operations = { 1134 .read = proc_sessionid_read, 1135 }; 1136 #endif 1137 1138 #ifdef CONFIG_FAULT_INJECTION 1139 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 1140 size_t count, loff_t *ppos) 1141 { 1142 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 1143 char buffer[PROC_NUMBUF]; 1144 size_t len; 1145 int make_it_fail; 1146 1147 if (!task) 1148 return -ESRCH; 1149 make_it_fail = task->make_it_fail; 1150 put_task_struct(task); 1151 1152 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); 1153 1154 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1155 } 1156 1157 static ssize_t proc_fault_inject_write(struct file * file, 1158 const char __user * buf, size_t count, loff_t *ppos) 1159 { 1160 struct task_struct *task; 1161 char buffer[PROC_NUMBUF], *end; 1162 int make_it_fail; 1163 1164 if (!capable(CAP_SYS_RESOURCE)) 1165 return -EPERM; 1166 memset(buffer, 0, sizeof(buffer)); 1167 if (count > sizeof(buffer) - 1) 1168 count = sizeof(buffer) - 1; 1169 if (copy_from_user(buffer, buf, count)) 1170 return -EFAULT; 1171 make_it_fail = simple_strtol(buffer, &end, 0); 1172 if (*end == '\n') 1173 end++; 1174 task = get_proc_task(file->f_dentry->d_inode); 1175 if (!task) 1176 return -ESRCH; 1177 task->make_it_fail = make_it_fail; 1178 put_task_struct(task); 1179 if (end - buffer == 0) 1180 return -EIO; 1181 return end - buffer; 1182 } 1183 1184 static const struct file_operations proc_fault_inject_operations = { 1185 .read = proc_fault_inject_read, 1186 .write = proc_fault_inject_write, 1187 }; 1188 #endif 1189 1190 1191 #ifdef CONFIG_SCHED_DEBUG 1192 /* 1193 * Print out various scheduling related per-task fields: 1194 */ 1195 static int sched_show(struct seq_file *m, void *v) 1196 { 1197 struct inode *inode = m->private; 1198 struct task_struct *p; 1199 1200 p = get_proc_task(inode); 1201 if (!p) 1202 return -ESRCH; 1203 proc_sched_show_task(p, m); 1204 1205 put_task_struct(p); 1206 1207 return 0; 1208 } 1209 1210 static ssize_t 1211 sched_write(struct file *file, const char __user *buf, 1212 size_t count, loff_t *offset) 1213 { 1214 struct inode *inode = file->f_path.dentry->d_inode; 1215 struct task_struct *p; 1216 1217 p = get_proc_task(inode); 1218 if (!p) 1219 return -ESRCH; 1220 proc_sched_set_task(p); 1221 1222 put_task_struct(p); 1223 1224 return count; 1225 } 1226 1227 static int sched_open(struct inode *inode, struct file *filp) 1228 { 1229 int ret; 1230 1231 ret = single_open(filp, sched_show, NULL); 1232 if (!ret) { 1233 struct seq_file *m = filp->private_data; 1234 1235 m->private = inode; 1236 } 1237 return ret; 1238 } 1239 1240 static const struct file_operations proc_pid_sched_operations = { 1241 .open = sched_open, 1242 .read = seq_read, 1243 .write = sched_write, 1244 .llseek = seq_lseek, 1245 .release = single_release, 1246 }; 1247 1248 #endif 1249 1250 /* 1251 * We added or removed a vma mapping the executable. The vmas are only mapped 1252 * during exec and are not mapped with the mmap system call. 1253 * Callers must hold down_write() on the mm's mmap_sem for these 1254 */ 1255 void added_exe_file_vma(struct mm_struct *mm) 1256 { 1257 mm->num_exe_file_vmas++; 1258 } 1259 1260 void removed_exe_file_vma(struct mm_struct *mm) 1261 { 1262 mm->num_exe_file_vmas--; 1263 if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ 1264 fput(mm->exe_file); 1265 mm->exe_file = NULL; 1266 } 1267 1268 } 1269 1270 void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) 1271 { 1272 if (new_exe_file) 1273 get_file(new_exe_file); 1274 if (mm->exe_file) 1275 fput(mm->exe_file); 1276 mm->exe_file = new_exe_file; 1277 mm->num_exe_file_vmas = 0; 1278 } 1279 1280 struct file *get_mm_exe_file(struct mm_struct *mm) 1281 { 1282 struct file *exe_file; 1283 1284 /* We need mmap_sem to protect against races with removal of 1285 * VM_EXECUTABLE vmas */ 1286 down_read(&mm->mmap_sem); 1287 exe_file = mm->exe_file; 1288 if (exe_file) 1289 get_file(exe_file); 1290 up_read(&mm->mmap_sem); 1291 return exe_file; 1292 } 1293 1294 void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) 1295 { 1296 /* It's safe to write the exe_file pointer without exe_file_lock because 1297 * this is called during fork when the task is not yet in /proc */ 1298 newmm->exe_file = get_mm_exe_file(oldmm); 1299 } 1300 1301 static int proc_exe_link(struct inode *inode, struct path *exe_path) 1302 { 1303 struct task_struct *task; 1304 struct mm_struct *mm; 1305 struct file *exe_file; 1306 1307 task = get_proc_task(inode); 1308 if (!task) 1309 return -ENOENT; 1310 mm = get_task_mm(task); 1311 put_task_struct(task); 1312 if (!mm) 1313 return -ENOENT; 1314 exe_file = get_mm_exe_file(mm); 1315 mmput(mm); 1316 if (exe_file) { 1317 *exe_path = exe_file->f_path; 1318 path_get(&exe_file->f_path); 1319 fput(exe_file); 1320 return 0; 1321 } else 1322 return -ENOENT; 1323 } 1324 1325 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1326 { 1327 struct inode *inode = dentry->d_inode; 1328 int error = -EACCES; 1329 1330 /* We don't need a base pointer in the /proc filesystem */ 1331 path_put(&nd->path); 1332 1333 /* Are we allowed to snoop on the tasks file descriptors? */ 1334 if (!proc_fd_access_allowed(inode)) 1335 goto out; 1336 1337 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1338 nd->last_type = LAST_BIND; 1339 out: 1340 return ERR_PTR(error); 1341 } 1342 1343 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) 1344 { 1345 char *tmp = (char*)__get_free_page(GFP_TEMPORARY); 1346 char *pathname; 1347 int len; 1348 1349 if (!tmp) 1350 return -ENOMEM; 1351 1352 pathname = d_path(path, tmp, PAGE_SIZE); 1353 len = PTR_ERR(pathname); 1354 if (IS_ERR(pathname)) 1355 goto out; 1356 len = tmp + PAGE_SIZE - 1 - pathname; 1357 1358 if (len > buflen) 1359 len = buflen; 1360 if (copy_to_user(buffer, pathname, len)) 1361 len = -EFAULT; 1362 out: 1363 free_page((unsigned long)tmp); 1364 return len; 1365 } 1366 1367 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1368 { 1369 int error = -EACCES; 1370 struct inode *inode = dentry->d_inode; 1371 struct path path; 1372 1373 /* Are we allowed to snoop on the tasks file descriptors? */ 1374 if (!proc_fd_access_allowed(inode)) 1375 goto out; 1376 1377 error = PROC_I(inode)->op.proc_get_link(inode, &path); 1378 if (error) 1379 goto out; 1380 1381 error = do_proc_readlink(&path, buffer, buflen); 1382 path_put(&path); 1383 out: 1384 return error; 1385 } 1386 1387 static const struct inode_operations proc_pid_link_inode_operations = { 1388 .readlink = proc_pid_readlink, 1389 .follow_link = proc_pid_follow_link, 1390 .setattr = proc_setattr, 1391 }; 1392 1393 1394 /* building an inode */ 1395 1396 static int task_dumpable(struct task_struct *task) 1397 { 1398 int dumpable = 0; 1399 struct mm_struct *mm; 1400 1401 task_lock(task); 1402 mm = task->mm; 1403 if (mm) 1404 dumpable = get_dumpable(mm); 1405 task_unlock(task); 1406 if(dumpable == 1) 1407 return 1; 1408 return 0; 1409 } 1410 1411 1412 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) 1413 { 1414 struct inode * inode; 1415 struct proc_inode *ei; 1416 const struct cred *cred; 1417 1418 /* We need a new inode */ 1419 1420 inode = new_inode(sb); 1421 if (!inode) 1422 goto out; 1423 1424 /* Common stuff */ 1425 ei = PROC_I(inode); 1426 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1427 inode->i_op = &proc_def_inode_operations; 1428 1429 /* 1430 * grab the reference to task. 1431 */ 1432 ei->pid = get_task_pid(task, PIDTYPE_PID); 1433 if (!ei->pid) 1434 goto out_unlock; 1435 1436 if (task_dumpable(task)) { 1437 rcu_read_lock(); 1438 cred = __task_cred(task); 1439 inode->i_uid = cred->euid; 1440 inode->i_gid = cred->egid; 1441 rcu_read_unlock(); 1442 } 1443 security_task_to_inode(task, inode); 1444 1445 out: 1446 return inode; 1447 1448 out_unlock: 1449 iput(inode); 1450 return NULL; 1451 } 1452 1453 static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1454 { 1455 struct inode *inode = dentry->d_inode; 1456 struct task_struct *task; 1457 const struct cred *cred; 1458 1459 generic_fillattr(inode, stat); 1460 1461 rcu_read_lock(); 1462 stat->uid = 0; 1463 stat->gid = 0; 1464 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1465 if (task) { 1466 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1467 task_dumpable(task)) { 1468 cred = __task_cred(task); 1469 stat->uid = cred->euid; 1470 stat->gid = cred->egid; 1471 } 1472 } 1473 rcu_read_unlock(); 1474 return 0; 1475 } 1476 1477 /* dentry stuff */ 1478 1479 /* 1480 * Exceptional case: normally we are not allowed to unhash a busy 1481 * directory. In this case, however, we can do it - no aliasing problems 1482 * due to the way we treat inodes. 1483 * 1484 * Rewrite the inode's ownerships here because the owning task may have 1485 * performed a setuid(), etc. 1486 * 1487 * Before the /proc/pid/status file was created the only way to read 1488 * the effective uid of a /process was to stat /proc/pid. Reading 1489 * /proc/pid/status is slow enough that procps and other packages 1490 * kept stating /proc/pid. To keep the rules in /proc simple I have 1491 * made this apply to all per process world readable and executable 1492 * directories. 1493 */ 1494 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1495 { 1496 struct inode *inode = dentry->d_inode; 1497 struct task_struct *task = get_proc_task(inode); 1498 const struct cred *cred; 1499 1500 if (task) { 1501 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1502 task_dumpable(task)) { 1503 rcu_read_lock(); 1504 cred = __task_cred(task); 1505 inode->i_uid = cred->euid; 1506 inode->i_gid = cred->egid; 1507 rcu_read_unlock(); 1508 } else { 1509 inode->i_uid = 0; 1510 inode->i_gid = 0; 1511 } 1512 inode->i_mode &= ~(S_ISUID | S_ISGID); 1513 security_task_to_inode(task, inode); 1514 put_task_struct(task); 1515 return 1; 1516 } 1517 d_drop(dentry); 1518 return 0; 1519 } 1520 1521 static int pid_delete_dentry(struct dentry * dentry) 1522 { 1523 /* Is the task we represent dead? 1524 * If so, then don't put the dentry on the lru list, 1525 * kill it immediately. 1526 */ 1527 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1528 } 1529 1530 static const struct dentry_operations pid_dentry_operations = 1531 { 1532 .d_revalidate = pid_revalidate, 1533 .d_delete = pid_delete_dentry, 1534 }; 1535 1536 /* Lookups */ 1537 1538 typedef struct dentry *instantiate_t(struct inode *, struct dentry *, 1539 struct task_struct *, const void *); 1540 1541 /* 1542 * Fill a directory entry. 1543 * 1544 * If possible create the dcache entry and derive our inode number and 1545 * file type from dcache entry. 1546 * 1547 * Since all of the proc inode numbers are dynamically generated, the inode 1548 * numbers do not exist until the inode is cache. This means creating the 1549 * the dcache entry in readdir is necessary to keep the inode numbers 1550 * reported by readdir in sync with the inode numbers reported 1551 * by stat. 1552 */ 1553 static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1554 char *name, int len, 1555 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1556 { 1557 struct dentry *child, *dir = filp->f_path.dentry; 1558 struct inode *inode; 1559 struct qstr qname; 1560 ino_t ino = 0; 1561 unsigned type = DT_UNKNOWN; 1562 1563 qname.name = name; 1564 qname.len = len; 1565 qname.hash = full_name_hash(name, len); 1566 1567 child = d_lookup(dir, &qname); 1568 if (!child) { 1569 struct dentry *new; 1570 new = d_alloc(dir, &qname); 1571 if (new) { 1572 child = instantiate(dir->d_inode, new, task, ptr); 1573 if (child) 1574 dput(new); 1575 else 1576 child = new; 1577 } 1578 } 1579 if (!child || IS_ERR(child) || !child->d_inode) 1580 goto end_instantiate; 1581 inode = child->d_inode; 1582 if (inode) { 1583 ino = inode->i_ino; 1584 type = inode->i_mode >> 12; 1585 } 1586 dput(child); 1587 end_instantiate: 1588 if (!ino) 1589 ino = find_inode_number(dir, &qname); 1590 if (!ino) 1591 ino = 1; 1592 return filldir(dirent, name, len, filp->f_pos, ino, type); 1593 } 1594 1595 static unsigned name_to_int(struct dentry *dentry) 1596 { 1597 const char *name = dentry->d_name.name; 1598 int len = dentry->d_name.len; 1599 unsigned n = 0; 1600 1601 if (len > 1 && *name == '0') 1602 goto out; 1603 while (len-- > 0) { 1604 unsigned c = *name++ - '0'; 1605 if (c > 9) 1606 goto out; 1607 if (n >= (~0U-9)/10) 1608 goto out; 1609 n *= 10; 1610 n += c; 1611 } 1612 return n; 1613 out: 1614 return ~0U; 1615 } 1616 1617 #define PROC_FDINFO_MAX 64 1618 1619 static int proc_fd_info(struct inode *inode, struct path *path, char *info) 1620 { 1621 struct task_struct *task = get_proc_task(inode); 1622 struct files_struct *files = NULL; 1623 struct file *file; 1624 int fd = proc_fd(inode); 1625 1626 if (task) { 1627 files = get_files_struct(task); 1628 put_task_struct(task); 1629 } 1630 if (files) { 1631 /* 1632 * We are not taking a ref to the file structure, so we must 1633 * hold ->file_lock. 1634 */ 1635 spin_lock(&files->file_lock); 1636 file = fcheck_files(files, fd); 1637 if (file) { 1638 if (path) { 1639 *path = file->f_path; 1640 path_get(&file->f_path); 1641 } 1642 if (info) 1643 snprintf(info, PROC_FDINFO_MAX, 1644 "pos:\t%lli\n" 1645 "flags:\t0%o\n", 1646 (long long) file->f_pos, 1647 file->f_flags); 1648 spin_unlock(&files->file_lock); 1649 put_files_struct(files); 1650 return 0; 1651 } 1652 spin_unlock(&files->file_lock); 1653 put_files_struct(files); 1654 } 1655 return -ENOENT; 1656 } 1657 1658 static int proc_fd_link(struct inode *inode, struct path *path) 1659 { 1660 return proc_fd_info(inode, path, NULL); 1661 } 1662 1663 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1664 { 1665 struct inode *inode = dentry->d_inode; 1666 struct task_struct *task = get_proc_task(inode); 1667 int fd = proc_fd(inode); 1668 struct files_struct *files; 1669 const struct cred *cred; 1670 1671 if (task) { 1672 files = get_files_struct(task); 1673 if (files) { 1674 rcu_read_lock(); 1675 if (fcheck_files(files, fd)) { 1676 rcu_read_unlock(); 1677 put_files_struct(files); 1678 if (task_dumpable(task)) { 1679 rcu_read_lock(); 1680 cred = __task_cred(task); 1681 inode->i_uid = cred->euid; 1682 inode->i_gid = cred->egid; 1683 rcu_read_unlock(); 1684 } else { 1685 inode->i_uid = 0; 1686 inode->i_gid = 0; 1687 } 1688 inode->i_mode &= ~(S_ISUID | S_ISGID); 1689 security_task_to_inode(task, inode); 1690 put_task_struct(task); 1691 return 1; 1692 } 1693 rcu_read_unlock(); 1694 put_files_struct(files); 1695 } 1696 put_task_struct(task); 1697 } 1698 d_drop(dentry); 1699 return 0; 1700 } 1701 1702 static const struct dentry_operations tid_fd_dentry_operations = 1703 { 1704 .d_revalidate = tid_fd_revalidate, 1705 .d_delete = pid_delete_dentry, 1706 }; 1707 1708 static struct dentry *proc_fd_instantiate(struct inode *dir, 1709 struct dentry *dentry, struct task_struct *task, const void *ptr) 1710 { 1711 unsigned fd = *(const unsigned *)ptr; 1712 struct file *file; 1713 struct files_struct *files; 1714 struct inode *inode; 1715 struct proc_inode *ei; 1716 struct dentry *error = ERR_PTR(-ENOENT); 1717 1718 inode = proc_pid_make_inode(dir->i_sb, task); 1719 if (!inode) 1720 goto out; 1721 ei = PROC_I(inode); 1722 ei->fd = fd; 1723 files = get_files_struct(task); 1724 if (!files) 1725 goto out_iput; 1726 inode->i_mode = S_IFLNK; 1727 1728 /* 1729 * We are not taking a ref to the file structure, so we must 1730 * hold ->file_lock. 1731 */ 1732 spin_lock(&files->file_lock); 1733 file = fcheck_files(files, fd); 1734 if (!file) 1735 goto out_unlock; 1736 if (file->f_mode & FMODE_READ) 1737 inode->i_mode |= S_IRUSR | S_IXUSR; 1738 if (file->f_mode & FMODE_WRITE) 1739 inode->i_mode |= S_IWUSR | S_IXUSR; 1740 spin_unlock(&files->file_lock); 1741 put_files_struct(files); 1742 1743 inode->i_op = &proc_pid_link_inode_operations; 1744 inode->i_size = 64; 1745 ei->op.proc_get_link = proc_fd_link; 1746 dentry->d_op = &tid_fd_dentry_operations; 1747 d_add(dentry, inode); 1748 /* Close the race of the process dying before we return the dentry */ 1749 if (tid_fd_revalidate(dentry, NULL)) 1750 error = NULL; 1751 1752 out: 1753 return error; 1754 out_unlock: 1755 spin_unlock(&files->file_lock); 1756 put_files_struct(files); 1757 out_iput: 1758 iput(inode); 1759 goto out; 1760 } 1761 1762 static struct dentry *proc_lookupfd_common(struct inode *dir, 1763 struct dentry *dentry, 1764 instantiate_t instantiate) 1765 { 1766 struct task_struct *task = get_proc_task(dir); 1767 unsigned fd = name_to_int(dentry); 1768 struct dentry *result = ERR_PTR(-ENOENT); 1769 1770 if (!task) 1771 goto out_no_task; 1772 if (fd == ~0U) 1773 goto out; 1774 1775 result = instantiate(dir, dentry, task, &fd); 1776 out: 1777 put_task_struct(task); 1778 out_no_task: 1779 return result; 1780 } 1781 1782 static int proc_readfd_common(struct file * filp, void * dirent, 1783 filldir_t filldir, instantiate_t instantiate) 1784 { 1785 struct dentry *dentry = filp->f_path.dentry; 1786 struct inode *inode = dentry->d_inode; 1787 struct task_struct *p = get_proc_task(inode); 1788 unsigned int fd, ino; 1789 int retval; 1790 struct files_struct * files; 1791 1792 retval = -ENOENT; 1793 if (!p) 1794 goto out_no_task; 1795 retval = 0; 1796 1797 fd = filp->f_pos; 1798 switch (fd) { 1799 case 0: 1800 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 1801 goto out; 1802 filp->f_pos++; 1803 case 1: 1804 ino = parent_ino(dentry); 1805 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1806 goto out; 1807 filp->f_pos++; 1808 default: 1809 files = get_files_struct(p); 1810 if (!files) 1811 goto out; 1812 rcu_read_lock(); 1813 for (fd = filp->f_pos-2; 1814 fd < files_fdtable(files)->max_fds; 1815 fd++, filp->f_pos++) { 1816 char name[PROC_NUMBUF]; 1817 int len; 1818 1819 if (!fcheck_files(files, fd)) 1820 continue; 1821 rcu_read_unlock(); 1822 1823 len = snprintf(name, sizeof(name), "%d", fd); 1824 if (proc_fill_cache(filp, dirent, filldir, 1825 name, len, instantiate, 1826 p, &fd) < 0) { 1827 rcu_read_lock(); 1828 break; 1829 } 1830 rcu_read_lock(); 1831 } 1832 rcu_read_unlock(); 1833 put_files_struct(files); 1834 } 1835 out: 1836 put_task_struct(p); 1837 out_no_task: 1838 return retval; 1839 } 1840 1841 static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, 1842 struct nameidata *nd) 1843 { 1844 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); 1845 } 1846 1847 static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) 1848 { 1849 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); 1850 } 1851 1852 static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, 1853 size_t len, loff_t *ppos) 1854 { 1855 char tmp[PROC_FDINFO_MAX]; 1856 int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); 1857 if (!err) 1858 err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); 1859 return err; 1860 } 1861 1862 static const struct file_operations proc_fdinfo_file_operations = { 1863 .open = nonseekable_open, 1864 .read = proc_fdinfo_read, 1865 }; 1866 1867 static const struct file_operations proc_fd_operations = { 1868 .read = generic_read_dir, 1869 .readdir = proc_readfd, 1870 }; 1871 1872 /* 1873 * /proc/pid/fd needs a special permission handler so that a process can still 1874 * access /proc/self/fd after it has executed a setuid(). 1875 */ 1876 static int proc_fd_permission(struct inode *inode, int mask) 1877 { 1878 int rv; 1879 1880 rv = generic_permission(inode, mask, NULL); 1881 if (rv == 0) 1882 return 0; 1883 if (task_pid(current) == proc_pid(inode)) 1884 rv = 0; 1885 return rv; 1886 } 1887 1888 /* 1889 * proc directories can do almost nothing.. 1890 */ 1891 static const struct inode_operations proc_fd_inode_operations = { 1892 .lookup = proc_lookupfd, 1893 .permission = proc_fd_permission, 1894 .setattr = proc_setattr, 1895 }; 1896 1897 static struct dentry *proc_fdinfo_instantiate(struct inode *dir, 1898 struct dentry *dentry, struct task_struct *task, const void *ptr) 1899 { 1900 unsigned fd = *(unsigned *)ptr; 1901 struct inode *inode; 1902 struct proc_inode *ei; 1903 struct dentry *error = ERR_PTR(-ENOENT); 1904 1905 inode = proc_pid_make_inode(dir->i_sb, task); 1906 if (!inode) 1907 goto out; 1908 ei = PROC_I(inode); 1909 ei->fd = fd; 1910 inode->i_mode = S_IFREG | S_IRUSR; 1911 inode->i_fop = &proc_fdinfo_file_operations; 1912 dentry->d_op = &tid_fd_dentry_operations; 1913 d_add(dentry, inode); 1914 /* Close the race of the process dying before we return the dentry */ 1915 if (tid_fd_revalidate(dentry, NULL)) 1916 error = NULL; 1917 1918 out: 1919 return error; 1920 } 1921 1922 static struct dentry *proc_lookupfdinfo(struct inode *dir, 1923 struct dentry *dentry, 1924 struct nameidata *nd) 1925 { 1926 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); 1927 } 1928 1929 static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) 1930 { 1931 return proc_readfd_common(filp, dirent, filldir, 1932 proc_fdinfo_instantiate); 1933 } 1934 1935 static const struct file_operations proc_fdinfo_operations = { 1936 .read = generic_read_dir, 1937 .readdir = proc_readfdinfo, 1938 }; 1939 1940 /* 1941 * proc directories can do almost nothing.. 1942 */ 1943 static const struct inode_operations proc_fdinfo_inode_operations = { 1944 .lookup = proc_lookupfdinfo, 1945 .setattr = proc_setattr, 1946 }; 1947 1948 1949 static struct dentry *proc_pident_instantiate(struct inode *dir, 1950 struct dentry *dentry, struct task_struct *task, const void *ptr) 1951 { 1952 const struct pid_entry *p = ptr; 1953 struct inode *inode; 1954 struct proc_inode *ei; 1955 struct dentry *error = ERR_PTR(-EINVAL); 1956 1957 inode = proc_pid_make_inode(dir->i_sb, task); 1958 if (!inode) 1959 goto out; 1960 1961 ei = PROC_I(inode); 1962 inode->i_mode = p->mode; 1963 if (S_ISDIR(inode->i_mode)) 1964 inode->i_nlink = 2; /* Use getattr to fix if necessary */ 1965 if (p->iop) 1966 inode->i_op = p->iop; 1967 if (p->fop) 1968 inode->i_fop = p->fop; 1969 ei->op = p->op; 1970 dentry->d_op = &pid_dentry_operations; 1971 d_add(dentry, inode); 1972 /* Close the race of the process dying before we return the dentry */ 1973 if (pid_revalidate(dentry, NULL)) 1974 error = NULL; 1975 out: 1976 return error; 1977 } 1978 1979 static struct dentry *proc_pident_lookup(struct inode *dir, 1980 struct dentry *dentry, 1981 const struct pid_entry *ents, 1982 unsigned int nents) 1983 { 1984 struct dentry *error; 1985 struct task_struct *task = get_proc_task(dir); 1986 const struct pid_entry *p, *last; 1987 1988 error = ERR_PTR(-ENOENT); 1989 1990 if (!task) 1991 goto out_no_task; 1992 1993 /* 1994 * Yes, it does not scale. And it should not. Don't add 1995 * new entries into /proc/<tgid>/ without very good reasons. 1996 */ 1997 last = &ents[nents - 1]; 1998 for (p = ents; p <= last; p++) { 1999 if (p->len != dentry->d_name.len) 2000 continue; 2001 if (!memcmp(dentry->d_name.name, p->name, p->len)) 2002 break; 2003 } 2004 if (p > last) 2005 goto out; 2006 2007 error = proc_pident_instantiate(dir, dentry, task, p); 2008 out: 2009 put_task_struct(task); 2010 out_no_task: 2011 return error; 2012 } 2013 2014 static int proc_pident_fill_cache(struct file *filp, void *dirent, 2015 filldir_t filldir, struct task_struct *task, const struct pid_entry *p) 2016 { 2017 return proc_fill_cache(filp, dirent, filldir, p->name, p->len, 2018 proc_pident_instantiate, task, p); 2019 } 2020 2021 static int proc_pident_readdir(struct file *filp, 2022 void *dirent, filldir_t filldir, 2023 const struct pid_entry *ents, unsigned int nents) 2024 { 2025 int i; 2026 struct dentry *dentry = filp->f_path.dentry; 2027 struct inode *inode = dentry->d_inode; 2028 struct task_struct *task = get_proc_task(inode); 2029 const struct pid_entry *p, *last; 2030 ino_t ino; 2031 int ret; 2032 2033 ret = -ENOENT; 2034 if (!task) 2035 goto out_no_task; 2036 2037 ret = 0; 2038 i = filp->f_pos; 2039 switch (i) { 2040 case 0: 2041 ino = inode->i_ino; 2042 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 2043 goto out; 2044 i++; 2045 filp->f_pos++; 2046 /* fall through */ 2047 case 1: 2048 ino = parent_ino(dentry); 2049 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 2050 goto out; 2051 i++; 2052 filp->f_pos++; 2053 /* fall through */ 2054 default: 2055 i -= 2; 2056 if (i >= nents) { 2057 ret = 1; 2058 goto out; 2059 } 2060 p = ents + i; 2061 last = &ents[nents - 1]; 2062 while (p <= last) { 2063 if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) 2064 goto out; 2065 filp->f_pos++; 2066 p++; 2067 } 2068 } 2069 2070 ret = 1; 2071 out: 2072 put_task_struct(task); 2073 out_no_task: 2074 return ret; 2075 } 2076 2077 #ifdef CONFIG_SECURITY 2078 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 2079 size_t count, loff_t *ppos) 2080 { 2081 struct inode * inode = file->f_path.dentry->d_inode; 2082 char *p = NULL; 2083 ssize_t length; 2084 struct task_struct *task = get_proc_task(inode); 2085 2086 if (!task) 2087 return -ESRCH; 2088 2089 length = security_getprocattr(task, 2090 (char*)file->f_path.dentry->d_name.name, 2091 &p); 2092 put_task_struct(task); 2093 if (length > 0) 2094 length = simple_read_from_buffer(buf, count, ppos, p, length); 2095 kfree(p); 2096 return length; 2097 } 2098 2099 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 2100 size_t count, loff_t *ppos) 2101 { 2102 struct inode * inode = file->f_path.dentry->d_inode; 2103 char *page; 2104 ssize_t length; 2105 struct task_struct *task = get_proc_task(inode); 2106 2107 length = -ESRCH; 2108 if (!task) 2109 goto out_no_task; 2110 if (count > PAGE_SIZE) 2111 count = PAGE_SIZE; 2112 2113 /* No partial writes. */ 2114 length = -EINVAL; 2115 if (*ppos != 0) 2116 goto out; 2117 2118 length = -ENOMEM; 2119 page = (char*)__get_free_page(GFP_TEMPORARY); 2120 if (!page) 2121 goto out; 2122 2123 length = -EFAULT; 2124 if (copy_from_user(page, buf, count)) 2125 goto out_free; 2126 2127 length = security_setprocattr(task, 2128 (char*)file->f_path.dentry->d_name.name, 2129 (void*)page, count); 2130 out_free: 2131 free_page((unsigned long) page); 2132 out: 2133 put_task_struct(task); 2134 out_no_task: 2135 return length; 2136 } 2137 2138 static const struct file_operations proc_pid_attr_operations = { 2139 .read = proc_pid_attr_read, 2140 .write = proc_pid_attr_write, 2141 }; 2142 2143 static const struct pid_entry attr_dir_stuff[] = { 2144 REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2145 REG("prev", S_IRUGO, proc_pid_attr_operations), 2146 REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2147 REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2148 REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2149 REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2150 }; 2151 2152 static int proc_attr_dir_readdir(struct file * filp, 2153 void * dirent, filldir_t filldir) 2154 { 2155 return proc_pident_readdir(filp,dirent,filldir, 2156 attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); 2157 } 2158 2159 static const struct file_operations proc_attr_dir_operations = { 2160 .read = generic_read_dir, 2161 .readdir = proc_attr_dir_readdir, 2162 }; 2163 2164 static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2165 struct dentry *dentry, struct nameidata *nd) 2166 { 2167 return proc_pident_lookup(dir, dentry, 2168 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2169 } 2170 2171 static const struct inode_operations proc_attr_dir_inode_operations = { 2172 .lookup = proc_attr_dir_lookup, 2173 .getattr = pid_getattr, 2174 .setattr = proc_setattr, 2175 }; 2176 2177 #endif 2178 2179 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 2180 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2181 size_t count, loff_t *ppos) 2182 { 2183 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 2184 struct mm_struct *mm; 2185 char buffer[PROC_NUMBUF]; 2186 size_t len; 2187 int ret; 2188 2189 if (!task) 2190 return -ESRCH; 2191 2192 ret = 0; 2193 mm = get_task_mm(task); 2194 if (mm) { 2195 len = snprintf(buffer, sizeof(buffer), "%08lx\n", 2196 ((mm->flags & MMF_DUMP_FILTER_MASK) >> 2197 MMF_DUMP_FILTER_SHIFT)); 2198 mmput(mm); 2199 ret = simple_read_from_buffer(buf, count, ppos, buffer, len); 2200 } 2201 2202 put_task_struct(task); 2203 2204 return ret; 2205 } 2206 2207 static ssize_t proc_coredump_filter_write(struct file *file, 2208 const char __user *buf, 2209 size_t count, 2210 loff_t *ppos) 2211 { 2212 struct task_struct *task; 2213 struct mm_struct *mm; 2214 char buffer[PROC_NUMBUF], *end; 2215 unsigned int val; 2216 int ret; 2217 int i; 2218 unsigned long mask; 2219 2220 ret = -EFAULT; 2221 memset(buffer, 0, sizeof(buffer)); 2222 if (count > sizeof(buffer) - 1) 2223 count = sizeof(buffer) - 1; 2224 if (copy_from_user(buffer, buf, count)) 2225 goto out_no_task; 2226 2227 ret = -EINVAL; 2228 val = (unsigned int)simple_strtoul(buffer, &end, 0); 2229 if (*end == '\n') 2230 end++; 2231 if (end - buffer == 0) 2232 goto out_no_task; 2233 2234 ret = -ESRCH; 2235 task = get_proc_task(file->f_dentry->d_inode); 2236 if (!task) 2237 goto out_no_task; 2238 2239 ret = end - buffer; 2240 mm = get_task_mm(task); 2241 if (!mm) 2242 goto out_no_mm; 2243 2244 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { 2245 if (val & mask) 2246 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2247 else 2248 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2249 } 2250 2251 mmput(mm); 2252 out_no_mm: 2253 put_task_struct(task); 2254 out_no_task: 2255 return ret; 2256 } 2257 2258 static const struct file_operations proc_coredump_filter_operations = { 2259 .read = proc_coredump_filter_read, 2260 .write = proc_coredump_filter_write, 2261 }; 2262 #endif 2263 2264 /* 2265 * /proc/self: 2266 */ 2267 static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 2268 int buflen) 2269 { 2270 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 2271 pid_t tgid = task_tgid_nr_ns(current, ns); 2272 char tmp[PROC_NUMBUF]; 2273 if (!tgid) 2274 return -ENOENT; 2275 sprintf(tmp, "%d", tgid); 2276 return vfs_readlink(dentry,buffer,buflen,tmp); 2277 } 2278 2279 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 2280 { 2281 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 2282 pid_t tgid = task_tgid_nr_ns(current, ns); 2283 char tmp[PROC_NUMBUF]; 2284 if (!tgid) 2285 return ERR_PTR(-ENOENT); 2286 sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); 2287 return ERR_PTR(vfs_follow_link(nd,tmp)); 2288 } 2289 2290 static const struct inode_operations proc_self_inode_operations = { 2291 .readlink = proc_self_readlink, 2292 .follow_link = proc_self_follow_link, 2293 }; 2294 2295 /* 2296 * proc base 2297 * 2298 * These are the directory entries in the root directory of /proc 2299 * that properly belong to the /proc filesystem, as they describe 2300 * describe something that is process related. 2301 */ 2302 static const struct pid_entry proc_base_stuff[] = { 2303 NOD("self", S_IFLNK|S_IRWXUGO, 2304 &proc_self_inode_operations, NULL, {}), 2305 }; 2306 2307 /* 2308 * Exceptional case: normally we are not allowed to unhash a busy 2309 * directory. In this case, however, we can do it - no aliasing problems 2310 * due to the way we treat inodes. 2311 */ 2312 static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd) 2313 { 2314 struct inode *inode = dentry->d_inode; 2315 struct task_struct *task = get_proc_task(inode); 2316 if (task) { 2317 put_task_struct(task); 2318 return 1; 2319 } 2320 d_drop(dentry); 2321 return 0; 2322 } 2323 2324 static const struct dentry_operations proc_base_dentry_operations = 2325 { 2326 .d_revalidate = proc_base_revalidate, 2327 .d_delete = pid_delete_dentry, 2328 }; 2329 2330 static struct dentry *proc_base_instantiate(struct inode *dir, 2331 struct dentry *dentry, struct task_struct *task, const void *ptr) 2332 { 2333 const struct pid_entry *p = ptr; 2334 struct inode *inode; 2335 struct proc_inode *ei; 2336 struct dentry *error = ERR_PTR(-EINVAL); 2337 2338 /* Allocate the inode */ 2339 error = ERR_PTR(-ENOMEM); 2340 inode = new_inode(dir->i_sb); 2341 if (!inode) 2342 goto out; 2343 2344 /* Initialize the inode */ 2345 ei = PROC_I(inode); 2346 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 2347 2348 /* 2349 * grab the reference to the task. 2350 */ 2351 ei->pid = get_task_pid(task, PIDTYPE_PID); 2352 if (!ei->pid) 2353 goto out_iput; 2354 2355 inode->i_mode = p->mode; 2356 if (S_ISDIR(inode->i_mode)) 2357 inode->i_nlink = 2; 2358 if (S_ISLNK(inode->i_mode)) 2359 inode->i_size = 64; 2360 if (p->iop) 2361 inode->i_op = p->iop; 2362 if (p->fop) 2363 inode->i_fop = p->fop; 2364 ei->op = p->op; 2365 dentry->d_op = &proc_base_dentry_operations; 2366 d_add(dentry, inode); 2367 error = NULL; 2368 out: 2369 return error; 2370 out_iput: 2371 iput(inode); 2372 goto out; 2373 } 2374 2375 static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) 2376 { 2377 struct dentry *error; 2378 struct task_struct *task = get_proc_task(dir); 2379 const struct pid_entry *p, *last; 2380 2381 error = ERR_PTR(-ENOENT); 2382 2383 if (!task) 2384 goto out_no_task; 2385 2386 /* Lookup the directory entry */ 2387 last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; 2388 for (p = proc_base_stuff; p <= last; p++) { 2389 if (p->len != dentry->d_name.len) 2390 continue; 2391 if (!memcmp(dentry->d_name.name, p->name, p->len)) 2392 break; 2393 } 2394 if (p > last) 2395 goto out; 2396 2397 error = proc_base_instantiate(dir, dentry, task, p); 2398 2399 out: 2400 put_task_struct(task); 2401 out_no_task: 2402 return error; 2403 } 2404 2405 static int proc_base_fill_cache(struct file *filp, void *dirent, 2406 filldir_t filldir, struct task_struct *task, const struct pid_entry *p) 2407 { 2408 return proc_fill_cache(filp, dirent, filldir, p->name, p->len, 2409 proc_base_instantiate, task, p); 2410 } 2411 2412 #ifdef CONFIG_TASK_IO_ACCOUNTING 2413 static int do_io_accounting(struct task_struct *task, char *buffer, int whole) 2414 { 2415 struct task_io_accounting acct = task->ioac; 2416 unsigned long flags; 2417 2418 if (whole && lock_task_sighand(task, &flags)) { 2419 struct task_struct *t = task; 2420 2421 task_io_accounting_add(&acct, &task->signal->ioac); 2422 while_each_thread(task, t) 2423 task_io_accounting_add(&acct, &t->ioac); 2424 2425 unlock_task_sighand(task, &flags); 2426 } 2427 return sprintf(buffer, 2428 "rchar: %llu\n" 2429 "wchar: %llu\n" 2430 "syscr: %llu\n" 2431 "syscw: %llu\n" 2432 "read_bytes: %llu\n" 2433 "write_bytes: %llu\n" 2434 "cancelled_write_bytes: %llu\n", 2435 (unsigned long long)acct.rchar, 2436 (unsigned long long)acct.wchar, 2437 (unsigned long long)acct.syscr, 2438 (unsigned long long)acct.syscw, 2439 (unsigned long long)acct.read_bytes, 2440 (unsigned long long)acct.write_bytes, 2441 (unsigned long long)acct.cancelled_write_bytes); 2442 } 2443 2444 static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2445 { 2446 return do_io_accounting(task, buffer, 0); 2447 } 2448 2449 static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) 2450 { 2451 return do_io_accounting(task, buffer, 1); 2452 } 2453 #endif /* CONFIG_TASK_IO_ACCOUNTING */ 2454 2455 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2456 struct pid *pid, struct task_struct *task) 2457 { 2458 seq_printf(m, "%08x\n", task->personality); 2459 return 0; 2460 } 2461 2462 /* 2463 * Thread groups 2464 */ 2465 static const struct file_operations proc_task_operations; 2466 static const struct inode_operations proc_task_inode_operations; 2467 2468 static const struct pid_entry tgid_base_stuff[] = { 2469 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2470 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2471 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2472 #ifdef CONFIG_NET 2473 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2474 #endif 2475 REG("environ", S_IRUSR, proc_environ_operations), 2476 INF("auxv", S_IRUSR, proc_pid_auxv), 2477 ONE("status", S_IRUGO, proc_pid_status), 2478 ONE("personality", S_IRUSR, proc_pid_personality), 2479 INF("limits", S_IRUSR, proc_pid_limits), 2480 #ifdef CONFIG_SCHED_DEBUG 2481 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2482 #endif 2483 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2484 INF("syscall", S_IRUSR, proc_pid_syscall), 2485 #endif 2486 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2487 ONE("stat", S_IRUGO, proc_tgid_stat), 2488 ONE("statm", S_IRUGO, proc_pid_statm), 2489 REG("maps", S_IRUGO, proc_maps_operations), 2490 #ifdef CONFIG_NUMA 2491 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 2492 #endif 2493 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2494 LNK("cwd", proc_cwd_link), 2495 LNK("root", proc_root_link), 2496 LNK("exe", proc_exe_link), 2497 REG("mounts", S_IRUGO, proc_mounts_operations), 2498 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 2499 REG("mountstats", S_IRUSR, proc_mountstats_operations), 2500 #ifdef CONFIG_PROC_PAGE_MONITOR 2501 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2502 REG("smaps", S_IRUGO, proc_smaps_operations), 2503 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2504 #endif 2505 #ifdef CONFIG_SECURITY 2506 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2507 #endif 2508 #ifdef CONFIG_KALLSYMS 2509 INF("wchan", S_IRUGO, proc_pid_wchan), 2510 #endif 2511 #ifdef CONFIG_STACKTRACE 2512 ONE("stack", S_IRUSR, proc_pid_stack), 2513 #endif 2514 #ifdef CONFIG_SCHEDSTATS 2515 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2516 #endif 2517 #ifdef CONFIG_LATENCYTOP 2518 REG("latency", S_IRUGO, proc_lstats_operations), 2519 #endif 2520 #ifdef CONFIG_PROC_PID_CPUSET 2521 REG("cpuset", S_IRUGO, proc_cpuset_operations), 2522 #endif 2523 #ifdef CONFIG_CGROUPS 2524 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2525 #endif 2526 INF("oom_score", S_IRUGO, proc_oom_score), 2527 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 2528 #ifdef CONFIG_AUDITSYSCALL 2529 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2530 REG("sessionid", S_IRUGO, proc_sessionid_operations), 2531 #endif 2532 #ifdef CONFIG_FAULT_INJECTION 2533 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2534 #endif 2535 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 2536 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2537 #endif 2538 #ifdef CONFIG_TASK_IO_ACCOUNTING 2539 INF("io", S_IRUGO, proc_tgid_io_accounting), 2540 #endif 2541 }; 2542 2543 static int proc_tgid_base_readdir(struct file * filp, 2544 void * dirent, filldir_t filldir) 2545 { 2546 return proc_pident_readdir(filp,dirent,filldir, 2547 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 2548 } 2549 2550 static const struct file_operations proc_tgid_base_operations = { 2551 .read = generic_read_dir, 2552 .readdir = proc_tgid_base_readdir, 2553 }; 2554 2555 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 2556 return proc_pident_lookup(dir, dentry, 2557 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 2558 } 2559 2560 static const struct inode_operations proc_tgid_base_inode_operations = { 2561 .lookup = proc_tgid_base_lookup, 2562 .getattr = pid_getattr, 2563 .setattr = proc_setattr, 2564 }; 2565 2566 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 2567 { 2568 struct dentry *dentry, *leader, *dir; 2569 char buf[PROC_NUMBUF]; 2570 struct qstr name; 2571 2572 name.name = buf; 2573 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2574 dentry = d_hash_and_lookup(mnt->mnt_root, &name); 2575 if (dentry) { 2576 if (!(current->flags & PF_EXITING)) 2577 shrink_dcache_parent(dentry); 2578 d_drop(dentry); 2579 dput(dentry); 2580 } 2581 2582 if (tgid == 0) 2583 goto out; 2584 2585 name.name = buf; 2586 name.len = snprintf(buf, sizeof(buf), "%d", tgid); 2587 leader = d_hash_and_lookup(mnt->mnt_root, &name); 2588 if (!leader) 2589 goto out; 2590 2591 name.name = "task"; 2592 name.len = strlen(name.name); 2593 dir = d_hash_and_lookup(leader, &name); 2594 if (!dir) 2595 goto out_put_leader; 2596 2597 name.name = buf; 2598 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2599 dentry = d_hash_and_lookup(dir, &name); 2600 if (dentry) { 2601 shrink_dcache_parent(dentry); 2602 d_drop(dentry); 2603 dput(dentry); 2604 } 2605 2606 dput(dir); 2607 out_put_leader: 2608 dput(leader); 2609 out: 2610 return; 2611 } 2612 2613 /** 2614 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 2615 * @task: task that should be flushed. 2616 * 2617 * When flushing dentries from proc, one needs to flush them from global 2618 * proc (proc_mnt) and from all the namespaces' procs this task was seen 2619 * in. This call is supposed to do all of this job. 2620 * 2621 * Looks in the dcache for 2622 * /proc/@pid 2623 * /proc/@tgid/task/@pid 2624 * if either directory is present flushes it and all of it'ts children 2625 * from the dcache. 2626 * 2627 * It is safe and reasonable to cache /proc entries for a task until 2628 * that task exits. After that they just clog up the dcache with 2629 * useless entries, possibly causing useful dcache entries to be 2630 * flushed instead. This routine is proved to flush those useless 2631 * dcache entries at process exit time. 2632 * 2633 * NOTE: This routine is just an optimization so it does not guarantee 2634 * that no dcache entries will exist at process exit time it 2635 * just makes it very unlikely that any will persist. 2636 */ 2637 2638 void proc_flush_task(struct task_struct *task) 2639 { 2640 int i; 2641 struct pid *pid, *tgid = NULL; 2642 struct upid *upid; 2643 2644 pid = task_pid(task); 2645 if (thread_group_leader(task)) 2646 tgid = task_tgid(task); 2647 2648 for (i = 0; i <= pid->level; i++) { 2649 upid = &pid->numbers[i]; 2650 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 2651 tgid ? tgid->numbers[i].nr : 0); 2652 } 2653 2654 upid = &pid->numbers[pid->level]; 2655 if (upid->nr == 1) 2656 pid_ns_release_proc(upid->ns); 2657 } 2658 2659 static struct dentry *proc_pid_instantiate(struct inode *dir, 2660 struct dentry * dentry, 2661 struct task_struct *task, const void *ptr) 2662 { 2663 struct dentry *error = ERR_PTR(-ENOENT); 2664 struct inode *inode; 2665 2666 inode = proc_pid_make_inode(dir->i_sb, task); 2667 if (!inode) 2668 goto out; 2669 2670 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2671 inode->i_op = &proc_tgid_base_inode_operations; 2672 inode->i_fop = &proc_tgid_base_operations; 2673 inode->i_flags|=S_IMMUTABLE; 2674 2675 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 2676 ARRAY_SIZE(tgid_base_stuff)); 2677 2678 dentry->d_op = &pid_dentry_operations; 2679 2680 d_add(dentry, inode); 2681 /* Close the race of the process dying before we return the dentry */ 2682 if (pid_revalidate(dentry, NULL)) 2683 error = NULL; 2684 out: 2685 return error; 2686 } 2687 2688 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2689 { 2690 struct dentry *result = ERR_PTR(-ENOENT); 2691 struct task_struct *task; 2692 unsigned tgid; 2693 struct pid_namespace *ns; 2694 2695 result = proc_base_lookup(dir, dentry); 2696 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) 2697 goto out; 2698 2699 tgid = name_to_int(dentry); 2700 if (tgid == ~0U) 2701 goto out; 2702 2703 ns = dentry->d_sb->s_fs_info; 2704 rcu_read_lock(); 2705 task = find_task_by_pid_ns(tgid, ns); 2706 if (task) 2707 get_task_struct(task); 2708 rcu_read_unlock(); 2709 if (!task) 2710 goto out; 2711 2712 result = proc_pid_instantiate(dir, dentry, task, NULL); 2713 put_task_struct(task); 2714 out: 2715 return result; 2716 } 2717 2718 /* 2719 * Find the first task with tgid >= tgid 2720 * 2721 */ 2722 struct tgid_iter { 2723 unsigned int tgid; 2724 struct task_struct *task; 2725 }; 2726 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) 2727 { 2728 struct pid *pid; 2729 2730 if (iter.task) 2731 put_task_struct(iter.task); 2732 rcu_read_lock(); 2733 retry: 2734 iter.task = NULL; 2735 pid = find_ge_pid(iter.tgid, ns); 2736 if (pid) { 2737 iter.tgid = pid_nr_ns(pid, ns); 2738 iter.task = pid_task(pid, PIDTYPE_PID); 2739 /* What we to know is if the pid we have find is the 2740 * pid of a thread_group_leader. Testing for task 2741 * being a thread_group_leader is the obvious thing 2742 * todo but there is a window when it fails, due to 2743 * the pid transfer logic in de_thread. 2744 * 2745 * So we perform the straight forward test of seeing 2746 * if the pid we have found is the pid of a thread 2747 * group leader, and don't worry if the task we have 2748 * found doesn't happen to be a thread group leader. 2749 * As we don't care in the case of readdir. 2750 */ 2751 if (!iter.task || !has_group_leader_pid(iter.task)) { 2752 iter.tgid += 1; 2753 goto retry; 2754 } 2755 get_task_struct(iter.task); 2756 } 2757 rcu_read_unlock(); 2758 return iter; 2759 } 2760 2761 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) 2762 2763 static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 2764 struct tgid_iter iter) 2765 { 2766 char name[PROC_NUMBUF]; 2767 int len = snprintf(name, sizeof(name), "%d", iter.tgid); 2768 return proc_fill_cache(filp, dirent, filldir, name, len, 2769 proc_pid_instantiate, iter.task, NULL); 2770 } 2771 2772 /* for the /proc/ directory itself, after non-process stuff has been done */ 2773 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2774 { 2775 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2776 struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); 2777 struct tgid_iter iter; 2778 struct pid_namespace *ns; 2779 2780 if (!reaper) 2781 goto out_no_task; 2782 2783 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { 2784 const struct pid_entry *p = &proc_base_stuff[nr]; 2785 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) 2786 goto out; 2787 } 2788 2789 ns = filp->f_dentry->d_sb->s_fs_info; 2790 iter.task = NULL; 2791 iter.tgid = filp->f_pos - TGID_OFFSET; 2792 for (iter = next_tgid(ns, iter); 2793 iter.task; 2794 iter.tgid += 1, iter = next_tgid(ns, iter)) { 2795 filp->f_pos = iter.tgid + TGID_OFFSET; 2796 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { 2797 put_task_struct(iter.task); 2798 goto out; 2799 } 2800 } 2801 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; 2802 out: 2803 put_task_struct(reaper); 2804 out_no_task: 2805 return 0; 2806 } 2807 2808 /* 2809 * Tasks 2810 */ 2811 static const struct pid_entry tid_base_stuff[] = { 2812 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2813 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations), 2814 REG("environ", S_IRUSR, proc_environ_operations), 2815 INF("auxv", S_IRUSR, proc_pid_auxv), 2816 ONE("status", S_IRUGO, proc_pid_status), 2817 ONE("personality", S_IRUSR, proc_pid_personality), 2818 INF("limits", S_IRUSR, proc_pid_limits), 2819 #ifdef CONFIG_SCHED_DEBUG 2820 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2821 #endif 2822 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2823 INF("syscall", S_IRUSR, proc_pid_syscall), 2824 #endif 2825 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2826 ONE("stat", S_IRUGO, proc_tid_stat), 2827 ONE("statm", S_IRUGO, proc_pid_statm), 2828 REG("maps", S_IRUGO, proc_maps_operations), 2829 #ifdef CONFIG_NUMA 2830 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 2831 #endif 2832 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2833 LNK("cwd", proc_cwd_link), 2834 LNK("root", proc_root_link), 2835 LNK("exe", proc_exe_link), 2836 REG("mounts", S_IRUGO, proc_mounts_operations), 2837 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 2838 #ifdef CONFIG_PROC_PAGE_MONITOR 2839 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2840 REG("smaps", S_IRUGO, proc_smaps_operations), 2841 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2842 #endif 2843 #ifdef CONFIG_SECURITY 2844 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2845 #endif 2846 #ifdef CONFIG_KALLSYMS 2847 INF("wchan", S_IRUGO, proc_pid_wchan), 2848 #endif 2849 #ifdef CONFIG_STACKTRACE 2850 ONE("stack", S_IRUSR, proc_pid_stack), 2851 #endif 2852 #ifdef CONFIG_SCHEDSTATS 2853 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2854 #endif 2855 #ifdef CONFIG_LATENCYTOP 2856 REG("latency", S_IRUGO, proc_lstats_operations), 2857 #endif 2858 #ifdef CONFIG_PROC_PID_CPUSET 2859 REG("cpuset", S_IRUGO, proc_cpuset_operations), 2860 #endif 2861 #ifdef CONFIG_CGROUPS 2862 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2863 #endif 2864 INF("oom_score", S_IRUGO, proc_oom_score), 2865 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 2866 #ifdef CONFIG_AUDITSYSCALL 2867 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2868 REG("sessionid", S_IRUSR, proc_sessionid_operations), 2869 #endif 2870 #ifdef CONFIG_FAULT_INJECTION 2871 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2872 #endif 2873 #ifdef CONFIG_TASK_IO_ACCOUNTING 2874 INF("io", S_IRUGO, proc_tid_io_accounting), 2875 #endif 2876 }; 2877 2878 static int proc_tid_base_readdir(struct file * filp, 2879 void * dirent, filldir_t filldir) 2880 { 2881 return proc_pident_readdir(filp,dirent,filldir, 2882 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 2883 } 2884 2885 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 2886 return proc_pident_lookup(dir, dentry, 2887 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 2888 } 2889 2890 static const struct file_operations proc_tid_base_operations = { 2891 .read = generic_read_dir, 2892 .readdir = proc_tid_base_readdir, 2893 }; 2894 2895 static const struct inode_operations proc_tid_base_inode_operations = { 2896 .lookup = proc_tid_base_lookup, 2897 .getattr = pid_getattr, 2898 .setattr = proc_setattr, 2899 }; 2900 2901 static struct dentry *proc_task_instantiate(struct inode *dir, 2902 struct dentry *dentry, struct task_struct *task, const void *ptr) 2903 { 2904 struct dentry *error = ERR_PTR(-ENOENT); 2905 struct inode *inode; 2906 inode = proc_pid_make_inode(dir->i_sb, task); 2907 2908 if (!inode) 2909 goto out; 2910 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2911 inode->i_op = &proc_tid_base_inode_operations; 2912 inode->i_fop = &proc_tid_base_operations; 2913 inode->i_flags|=S_IMMUTABLE; 2914 2915 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 2916 ARRAY_SIZE(tid_base_stuff)); 2917 2918 dentry->d_op = &pid_dentry_operations; 2919 2920 d_add(dentry, inode); 2921 /* Close the race of the process dying before we return the dentry */ 2922 if (pid_revalidate(dentry, NULL)) 2923 error = NULL; 2924 out: 2925 return error; 2926 } 2927 2928 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2929 { 2930 struct dentry *result = ERR_PTR(-ENOENT); 2931 struct task_struct *task; 2932 struct task_struct *leader = get_proc_task(dir); 2933 unsigned tid; 2934 struct pid_namespace *ns; 2935 2936 if (!leader) 2937 goto out_no_task; 2938 2939 tid = name_to_int(dentry); 2940 if (tid == ~0U) 2941 goto out; 2942 2943 ns = dentry->d_sb->s_fs_info; 2944 rcu_read_lock(); 2945 task = find_task_by_pid_ns(tid, ns); 2946 if (task) 2947 get_task_struct(task); 2948 rcu_read_unlock(); 2949 if (!task) 2950 goto out; 2951 if (!same_thread_group(leader, task)) 2952 goto out_drop_task; 2953 2954 result = proc_task_instantiate(dir, dentry, task, NULL); 2955 out_drop_task: 2956 put_task_struct(task); 2957 out: 2958 put_task_struct(leader); 2959 out_no_task: 2960 return result; 2961 } 2962 2963 /* 2964 * Find the first tid of a thread group to return to user space. 2965 * 2966 * Usually this is just the thread group leader, but if the users 2967 * buffer was too small or there was a seek into the middle of the 2968 * directory we have more work todo. 2969 * 2970 * In the case of a short read we start with find_task_by_pid. 2971 * 2972 * In the case of a seek we start with the leader and walk nr 2973 * threads past it. 2974 */ 2975 static struct task_struct *first_tid(struct task_struct *leader, 2976 int tid, int nr, struct pid_namespace *ns) 2977 { 2978 struct task_struct *pos; 2979 2980 rcu_read_lock(); 2981 /* Attempt to start with the pid of a thread */ 2982 if (tid && (nr > 0)) { 2983 pos = find_task_by_pid_ns(tid, ns); 2984 if (pos && (pos->group_leader == leader)) 2985 goto found; 2986 } 2987 2988 /* If nr exceeds the number of threads there is nothing todo */ 2989 pos = NULL; 2990 if (nr && nr >= get_nr_threads(leader)) 2991 goto out; 2992 2993 /* If we haven't found our starting place yet start 2994 * with the leader and walk nr threads forward. 2995 */ 2996 for (pos = leader; nr > 0; --nr) { 2997 pos = next_thread(pos); 2998 if (pos == leader) { 2999 pos = NULL; 3000 goto out; 3001 } 3002 } 3003 found: 3004 get_task_struct(pos); 3005 out: 3006 rcu_read_unlock(); 3007 return pos; 3008 } 3009 3010 /* 3011 * Find the next thread in the thread list. 3012 * Return NULL if there is an error or no next thread. 3013 * 3014 * The reference to the input task_struct is released. 3015 */ 3016 static struct task_struct *next_tid(struct task_struct *start) 3017 { 3018 struct task_struct *pos = NULL; 3019 rcu_read_lock(); 3020 if (pid_alive(start)) { 3021 pos = next_thread(start); 3022 if (thread_group_leader(pos)) 3023 pos = NULL; 3024 else 3025 get_task_struct(pos); 3026 } 3027 rcu_read_unlock(); 3028 put_task_struct(start); 3029 return pos; 3030 } 3031 3032 static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 3033 struct task_struct *task, int tid) 3034 { 3035 char name[PROC_NUMBUF]; 3036 int len = snprintf(name, sizeof(name), "%d", tid); 3037 return proc_fill_cache(filp, dirent, filldir, name, len, 3038 proc_task_instantiate, task, NULL); 3039 } 3040 3041 /* for the /proc/TGID/task/ directories */ 3042 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 3043 { 3044 struct dentry *dentry = filp->f_path.dentry; 3045 struct inode *inode = dentry->d_inode; 3046 struct task_struct *leader = NULL; 3047 struct task_struct *task; 3048 int retval = -ENOENT; 3049 ino_t ino; 3050 int tid; 3051 struct pid_namespace *ns; 3052 3053 task = get_proc_task(inode); 3054 if (!task) 3055 goto out_no_task; 3056 rcu_read_lock(); 3057 if (pid_alive(task)) { 3058 leader = task->group_leader; 3059 get_task_struct(leader); 3060 } 3061 rcu_read_unlock(); 3062 put_task_struct(task); 3063 if (!leader) 3064 goto out_no_task; 3065 retval = 0; 3066 3067 switch ((unsigned long)filp->f_pos) { 3068 case 0: 3069 ino = inode->i_ino; 3070 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) 3071 goto out; 3072 filp->f_pos++; 3073 /* fall through */ 3074 case 1: 3075 ino = parent_ino(dentry); 3076 if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) 3077 goto out; 3078 filp->f_pos++; 3079 /* fall through */ 3080 } 3081 3082 /* f_version caches the tgid value that the last readdir call couldn't 3083 * return. lseek aka telldir automagically resets f_version to 0. 3084 */ 3085 ns = filp->f_dentry->d_sb->s_fs_info; 3086 tid = (int)filp->f_version; 3087 filp->f_version = 0; 3088 for (task = first_tid(leader, tid, filp->f_pos - 2, ns); 3089 task; 3090 task = next_tid(task), filp->f_pos++) { 3091 tid = task_pid_nr_ns(task, ns); 3092 if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { 3093 /* returning this tgid failed, save it as the first 3094 * pid for the next readir call */ 3095 filp->f_version = (u64)tid; 3096 put_task_struct(task); 3097 break; 3098 } 3099 } 3100 out: 3101 put_task_struct(leader); 3102 out_no_task: 3103 return retval; 3104 } 3105 3106 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 3107 { 3108 struct inode *inode = dentry->d_inode; 3109 struct task_struct *p = get_proc_task(inode); 3110 generic_fillattr(inode, stat); 3111 3112 if (p) { 3113 stat->nlink += get_nr_threads(p); 3114 put_task_struct(p); 3115 } 3116 3117 return 0; 3118 } 3119 3120 static const struct inode_operations proc_task_inode_operations = { 3121 .lookup = proc_task_lookup, 3122 .getattr = proc_task_getattr, 3123 .setattr = proc_setattr, 3124 }; 3125 3126 static const struct file_operations proc_task_operations = { 3127 .read = generic_read_dir, 3128 .readdir = proc_task_readdir, 3129 }; 3130