1 /* 2 * linux/fs/proc/base.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * proc base directory handling functions 7 * 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 9 * Instead of using magical inumbers to determine the kind of object 10 * we allocate and fill in-core inodes upon lookup. They don't even 11 * go into icache. We cache the reference to task_struct upon lookup too. 12 * Eventually it should become a filesystem in its own. We don't use the 13 * rest of procfs anymore. 14 * 15 * 16 * Changelog: 17 * 17-Jan-2005 18 * Allan Bezerra 19 * Bruna Moreira <bruna.moreira@indt.org.br> 20 * Edjard Mota <edjard.mota@indt.org.br> 21 * Ilias Biris <ilias.biris@indt.org.br> 22 * Mauricio Lin <mauricio.lin@indt.org.br> 23 * 24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 25 * 26 * A new process specific entry (smaps) included in /proc. It shows the 27 * size of rss for each memory area. The maps entry lacks information 28 * about physical memory size (rss) for each mapped file, i.e., 29 * rss information for executables and library files. 30 * This additional information is useful for any tools that need to know 31 * about physical memory consumption for a process specific library. 32 * 33 * Changelog: 34 * 21-Feb-2005 35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 36 * Pud inclusion in the page table walking. 37 * 38 * ChangeLog: 39 * 10-Mar-2005 40 * 10LE Instituto Nokia de Tecnologia - INdT: 41 * A better way to walks through the page table as suggested by Hugh Dickins. 42 * 43 * Simo Piiroinen <simo.piiroinen@nokia.com>: 44 * Smaps information related to shared, private, clean and dirty pages. 45 * 46 * Paul Mundt <paul.mundt@nokia.com>: 47 * Overall revision about smaps. 48 */ 49 50 #include <asm/uaccess.h> 51 52 #include <linux/config.h> 53 #include <linux/errno.h> 54 #include <linux/time.h> 55 #include <linux/proc_fs.h> 56 #include <linux/stat.h> 57 #include <linux/init.h> 58 #include <linux/file.h> 59 #include <linux/string.h> 60 #include <linux/seq_file.h> 61 #include <linux/namei.h> 62 #include <linux/namespace.h> 63 #include <linux/mm.h> 64 #include <linux/smp_lock.h> 65 #include <linux/rcupdate.h> 66 #include <linux/kallsyms.h> 67 #include <linux/mount.h> 68 #include <linux/security.h> 69 #include <linux/ptrace.h> 70 #include <linux/seccomp.h> 71 #include <linux/cpuset.h> 72 #include <linux/audit.h> 73 #include <linux/poll.h> 74 #include "internal.h" 75 76 /* 77 * For hysterical raisins we keep the same inumbers as in the old procfs. 78 * Feel free to change the macro below - just keep the range distinct from 79 * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). 80 * As soon as we'll get a separate superblock we will be able to forget 81 * about magical ranges too. 82 */ 83 84 #define fake_ino(pid,ino) (((pid)<<16)|(ino)) 85 86 enum pid_directory_inos { 87 PROC_TGID_INO = 2, 88 PROC_TGID_TASK, 89 PROC_TGID_STATUS, 90 PROC_TGID_MEM, 91 #ifdef CONFIG_SECCOMP 92 PROC_TGID_SECCOMP, 93 #endif 94 PROC_TGID_CWD, 95 PROC_TGID_ROOT, 96 PROC_TGID_EXE, 97 PROC_TGID_FD, 98 PROC_TGID_ENVIRON, 99 PROC_TGID_AUXV, 100 PROC_TGID_CMDLINE, 101 PROC_TGID_STAT, 102 PROC_TGID_STATM, 103 PROC_TGID_MAPS, 104 PROC_TGID_NUMA_MAPS, 105 PROC_TGID_MOUNTS, 106 PROC_TGID_WCHAN, 107 #ifdef CONFIG_MMU 108 PROC_TGID_SMAPS, 109 #endif 110 #ifdef CONFIG_SCHEDSTATS 111 PROC_TGID_SCHEDSTAT, 112 #endif 113 #ifdef CONFIG_CPUSETS 114 PROC_TGID_CPUSET, 115 #endif 116 #ifdef CONFIG_SECURITY 117 PROC_TGID_ATTR, 118 PROC_TGID_ATTR_CURRENT, 119 PROC_TGID_ATTR_PREV, 120 PROC_TGID_ATTR_EXEC, 121 PROC_TGID_ATTR_FSCREATE, 122 #endif 123 #ifdef CONFIG_AUDITSYSCALL 124 PROC_TGID_LOGINUID, 125 #endif 126 PROC_TGID_OOM_SCORE, 127 PROC_TGID_OOM_ADJUST, 128 PROC_TID_INO, 129 PROC_TID_STATUS, 130 PROC_TID_MEM, 131 #ifdef CONFIG_SECCOMP 132 PROC_TID_SECCOMP, 133 #endif 134 PROC_TID_CWD, 135 PROC_TID_ROOT, 136 PROC_TID_EXE, 137 PROC_TID_FD, 138 PROC_TID_ENVIRON, 139 PROC_TID_AUXV, 140 PROC_TID_CMDLINE, 141 PROC_TID_STAT, 142 PROC_TID_STATM, 143 PROC_TID_MAPS, 144 PROC_TID_NUMA_MAPS, 145 PROC_TID_MOUNTS, 146 PROC_TID_WCHAN, 147 #ifdef CONFIG_MMU 148 PROC_TID_SMAPS, 149 #endif 150 #ifdef CONFIG_SCHEDSTATS 151 PROC_TID_SCHEDSTAT, 152 #endif 153 #ifdef CONFIG_CPUSETS 154 PROC_TID_CPUSET, 155 #endif 156 #ifdef CONFIG_SECURITY 157 PROC_TID_ATTR, 158 PROC_TID_ATTR_CURRENT, 159 PROC_TID_ATTR_PREV, 160 PROC_TID_ATTR_EXEC, 161 PROC_TID_ATTR_FSCREATE, 162 #endif 163 #ifdef CONFIG_AUDITSYSCALL 164 PROC_TID_LOGINUID, 165 #endif 166 PROC_TID_OOM_SCORE, 167 PROC_TID_OOM_ADJUST, 168 169 /* Add new entries before this */ 170 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ 171 }; 172 173 struct pid_entry { 174 int type; 175 int len; 176 char *name; 177 mode_t mode; 178 }; 179 180 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} 181 182 static struct pid_entry tgid_base_stuff[] = { 183 E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), 184 E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 185 E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), 186 E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), 187 E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO), 188 E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 189 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), 190 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), 191 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), 192 #ifdef CONFIG_NUMA 193 E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), 194 #endif 195 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 196 #ifdef CONFIG_SECCOMP 197 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 198 #endif 199 E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 200 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), 201 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), 202 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 203 #ifdef CONFIG_MMU 204 E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), 205 #endif 206 #ifdef CONFIG_SECURITY 207 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 208 #endif 209 #ifdef CONFIG_KALLSYMS 210 E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), 211 #endif 212 #ifdef CONFIG_SCHEDSTATS 213 E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), 214 #endif 215 #ifdef CONFIG_CPUSETS 216 E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 217 #endif 218 E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 219 E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 220 #ifdef CONFIG_AUDITSYSCALL 221 E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 222 #endif 223 {0,0,NULL,0} 224 }; 225 static struct pid_entry tid_base_stuff[] = { 226 E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 227 E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR), 228 E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR), 229 E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO), 230 E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 231 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), 232 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), 233 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), 234 #ifdef CONFIG_NUMA 235 E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), 236 #endif 237 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 238 #ifdef CONFIG_SECCOMP 239 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 240 #endif 241 E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 242 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), 243 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), 244 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 245 #ifdef CONFIG_MMU 246 E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), 247 #endif 248 #ifdef CONFIG_SECURITY 249 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 250 #endif 251 #ifdef CONFIG_KALLSYMS 252 E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), 253 #endif 254 #ifdef CONFIG_SCHEDSTATS 255 E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), 256 #endif 257 #ifdef CONFIG_CPUSETS 258 E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 259 #endif 260 E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 261 E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 262 #ifdef CONFIG_AUDITSYSCALL 263 E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 264 #endif 265 {0,0,NULL,0} 266 }; 267 268 #ifdef CONFIG_SECURITY 269 static struct pid_entry tgid_attr_stuff[] = { 270 E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 271 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 272 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 273 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 274 {0,0,NULL,0} 275 }; 276 static struct pid_entry tid_attr_stuff[] = { 277 E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 278 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 279 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 280 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 281 {0,0,NULL,0} 282 }; 283 #endif 284 285 #undef E 286 287 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 288 { 289 struct task_struct *task = proc_task(inode); 290 struct files_struct *files; 291 struct file *file; 292 int fd = proc_type(inode) - PROC_TID_FD_DIR; 293 294 files = get_files_struct(task); 295 if (files) { 296 rcu_read_lock(); 297 file = fcheck_files(files, fd); 298 if (file) { 299 *mnt = mntget(file->f_vfsmnt); 300 *dentry = dget(file->f_dentry); 301 rcu_read_unlock(); 302 put_files_struct(files); 303 return 0; 304 } 305 rcu_read_unlock(); 306 put_files_struct(files); 307 } 308 return -ENOENT; 309 } 310 311 static struct fs_struct *get_fs_struct(struct task_struct *task) 312 { 313 struct fs_struct *fs; 314 task_lock(task); 315 fs = task->fs; 316 if(fs) 317 atomic_inc(&fs->count); 318 task_unlock(task); 319 return fs; 320 } 321 322 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 323 { 324 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 325 int result = -ENOENT; 326 if (fs) { 327 read_lock(&fs->lock); 328 *mnt = mntget(fs->pwdmnt); 329 *dentry = dget(fs->pwd); 330 read_unlock(&fs->lock); 331 result = 0; 332 put_fs_struct(fs); 333 } 334 return result; 335 } 336 337 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 338 { 339 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 340 int result = -ENOENT; 341 if (fs) { 342 read_lock(&fs->lock); 343 *mnt = mntget(fs->rootmnt); 344 *dentry = dget(fs->root); 345 read_unlock(&fs->lock); 346 result = 0; 347 put_fs_struct(fs); 348 } 349 return result; 350 } 351 352 353 /* Same as proc_root_link, but this addionally tries to get fs from other 354 * threads in the group */ 355 static int proc_task_root_link(struct inode *inode, struct dentry **dentry, 356 struct vfsmount **mnt) 357 { 358 struct fs_struct *fs; 359 int result = -ENOENT; 360 struct task_struct *leader = proc_task(inode); 361 362 task_lock(leader); 363 fs = leader->fs; 364 if (fs) { 365 atomic_inc(&fs->count); 366 task_unlock(leader); 367 } else { 368 /* Try to get fs from other threads */ 369 task_unlock(leader); 370 read_lock(&tasklist_lock); 371 if (pid_alive(leader)) { 372 struct task_struct *task = leader; 373 374 while ((task = next_thread(task)) != leader) { 375 task_lock(task); 376 fs = task->fs; 377 if (fs) { 378 atomic_inc(&fs->count); 379 task_unlock(task); 380 break; 381 } 382 task_unlock(task); 383 } 384 } 385 read_unlock(&tasklist_lock); 386 } 387 388 if (fs) { 389 read_lock(&fs->lock); 390 *mnt = mntget(fs->rootmnt); 391 *dentry = dget(fs->root); 392 read_unlock(&fs->lock); 393 result = 0; 394 put_fs_struct(fs); 395 } 396 return result; 397 } 398 399 400 #define MAY_PTRACE(task) \ 401 (task == current || \ 402 (task->parent == current && \ 403 (task->ptrace & PT_PTRACED) && \ 404 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ 405 security_ptrace(current,task) == 0)) 406 407 static int proc_pid_environ(struct task_struct *task, char * buffer) 408 { 409 int res = 0; 410 struct mm_struct *mm = get_task_mm(task); 411 if (mm) { 412 unsigned int len = mm->env_end - mm->env_start; 413 if (len > PAGE_SIZE) 414 len = PAGE_SIZE; 415 res = access_process_vm(task, mm->env_start, buffer, len, 0); 416 if (!ptrace_may_attach(task)) 417 res = -ESRCH; 418 mmput(mm); 419 } 420 return res; 421 } 422 423 static int proc_pid_cmdline(struct task_struct *task, char * buffer) 424 { 425 int res = 0; 426 unsigned int len; 427 struct mm_struct *mm = get_task_mm(task); 428 if (!mm) 429 goto out; 430 if (!mm->arg_end) 431 goto out_mm; /* Shh! No looking before we're done */ 432 433 len = mm->arg_end - mm->arg_start; 434 435 if (len > PAGE_SIZE) 436 len = PAGE_SIZE; 437 438 res = access_process_vm(task, mm->arg_start, buffer, len, 0); 439 440 // If the nul at the end of args has been overwritten, then 441 // assume application is using setproctitle(3). 442 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { 443 len = strnlen(buffer, res); 444 if (len < res) { 445 res = len; 446 } else { 447 len = mm->env_end - mm->env_start; 448 if (len > PAGE_SIZE - res) 449 len = PAGE_SIZE - res; 450 res += access_process_vm(task, mm->env_start, buffer+res, len, 0); 451 res = strnlen(buffer, res); 452 } 453 } 454 out_mm: 455 mmput(mm); 456 out: 457 return res; 458 } 459 460 static int proc_pid_auxv(struct task_struct *task, char *buffer) 461 { 462 int res = 0; 463 struct mm_struct *mm = get_task_mm(task); 464 if (mm) { 465 unsigned int nwords = 0; 466 do 467 nwords += 2; 468 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 469 res = nwords * sizeof(mm->saved_auxv[0]); 470 if (res > PAGE_SIZE) 471 res = PAGE_SIZE; 472 memcpy(buffer, mm->saved_auxv, res); 473 mmput(mm); 474 } 475 return res; 476 } 477 478 479 #ifdef CONFIG_KALLSYMS 480 /* 481 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 482 * Returns the resolved symbol. If that fails, simply return the address. 483 */ 484 static int proc_pid_wchan(struct task_struct *task, char *buffer) 485 { 486 char *modname; 487 const char *sym_name; 488 unsigned long wchan, size, offset; 489 char namebuf[KSYM_NAME_LEN+1]; 490 491 wchan = get_wchan(task); 492 493 sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); 494 if (sym_name) 495 return sprintf(buffer, "%s", sym_name); 496 return sprintf(buffer, "%lu", wchan); 497 } 498 #endif /* CONFIG_KALLSYMS */ 499 500 #ifdef CONFIG_SCHEDSTATS 501 /* 502 * Provides /proc/PID/schedstat 503 */ 504 static int proc_pid_schedstat(struct task_struct *task, char *buffer) 505 { 506 return sprintf(buffer, "%lu %lu %lu\n", 507 task->sched_info.cpu_time, 508 task->sched_info.run_delay, 509 task->sched_info.pcnt); 510 } 511 #endif 512 513 /* The badness from the OOM killer */ 514 unsigned long badness(struct task_struct *p, unsigned long uptime); 515 static int proc_oom_score(struct task_struct *task, char *buffer) 516 { 517 unsigned long points; 518 struct timespec uptime; 519 520 do_posix_clock_monotonic_gettime(&uptime); 521 points = badness(task, uptime.tv_sec); 522 return sprintf(buffer, "%lu\n", points); 523 } 524 525 /************************************************************************/ 526 /* Here the fs part begins */ 527 /************************************************************************/ 528 529 /* permission checks */ 530 531 /* If the process being read is separated by chroot from the reading process, 532 * don't let the reader access the threads. 533 */ 534 static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) 535 { 536 struct dentry *de, *base; 537 struct vfsmount *our_vfsmnt, *mnt; 538 int res = 0; 539 read_lock(¤t->fs->lock); 540 our_vfsmnt = mntget(current->fs->rootmnt); 541 base = dget(current->fs->root); 542 read_unlock(¤t->fs->lock); 543 544 spin_lock(&vfsmount_lock); 545 de = root; 546 mnt = vfsmnt; 547 548 while (vfsmnt != our_vfsmnt) { 549 if (vfsmnt == vfsmnt->mnt_parent) 550 goto out; 551 de = vfsmnt->mnt_mountpoint; 552 vfsmnt = vfsmnt->mnt_parent; 553 } 554 555 if (!is_subdir(de, base)) 556 goto out; 557 spin_unlock(&vfsmount_lock); 558 559 exit: 560 dput(base); 561 mntput(our_vfsmnt); 562 dput(root); 563 mntput(mnt); 564 return res; 565 out: 566 spin_unlock(&vfsmount_lock); 567 res = -EACCES; 568 goto exit; 569 } 570 571 static int proc_check_root(struct inode *inode) 572 { 573 struct dentry *root; 574 struct vfsmount *vfsmnt; 575 576 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ 577 return -ENOENT; 578 return proc_check_chroot(root, vfsmnt); 579 } 580 581 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) 582 { 583 if (generic_permission(inode, mask, NULL) != 0) 584 return -EACCES; 585 return proc_check_root(inode); 586 } 587 588 static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd) 589 { 590 struct dentry *root; 591 struct vfsmount *vfsmnt; 592 593 if (generic_permission(inode, mask, NULL) != 0) 594 return -EACCES; 595 596 if (proc_task_root_link(inode, &root, &vfsmnt)) 597 return -ENOENT; 598 599 return proc_check_chroot(root, vfsmnt); 600 } 601 602 extern struct seq_operations proc_pid_maps_op; 603 static int maps_open(struct inode *inode, struct file *file) 604 { 605 struct task_struct *task = proc_task(inode); 606 int ret = seq_open(file, &proc_pid_maps_op); 607 if (!ret) { 608 struct seq_file *m = file->private_data; 609 m->private = task; 610 } 611 return ret; 612 } 613 614 static struct file_operations proc_maps_operations = { 615 .open = maps_open, 616 .read = seq_read, 617 .llseek = seq_lseek, 618 .release = seq_release, 619 }; 620 621 #ifdef CONFIG_NUMA 622 extern struct seq_operations proc_pid_numa_maps_op; 623 static int numa_maps_open(struct inode *inode, struct file *file) 624 { 625 struct task_struct *task = proc_task(inode); 626 int ret = seq_open(file, &proc_pid_numa_maps_op); 627 if (!ret) { 628 struct seq_file *m = file->private_data; 629 m->private = task; 630 } 631 return ret; 632 } 633 634 static struct file_operations proc_numa_maps_operations = { 635 .open = numa_maps_open, 636 .read = seq_read, 637 .llseek = seq_lseek, 638 .release = seq_release, 639 }; 640 #endif 641 642 #ifdef CONFIG_MMU 643 extern struct seq_operations proc_pid_smaps_op; 644 static int smaps_open(struct inode *inode, struct file *file) 645 { 646 struct task_struct *task = proc_task(inode); 647 int ret = seq_open(file, &proc_pid_smaps_op); 648 if (!ret) { 649 struct seq_file *m = file->private_data; 650 m->private = task; 651 } 652 return ret; 653 } 654 655 static struct file_operations proc_smaps_operations = { 656 .open = smaps_open, 657 .read = seq_read, 658 .llseek = seq_lseek, 659 .release = seq_release, 660 }; 661 #endif 662 663 extern struct seq_operations mounts_op; 664 struct proc_mounts { 665 struct seq_file m; 666 int event; 667 }; 668 669 static int mounts_open(struct inode *inode, struct file *file) 670 { 671 struct task_struct *task = proc_task(inode); 672 struct namespace *namespace; 673 struct proc_mounts *p; 674 int ret = -EINVAL; 675 676 task_lock(task); 677 namespace = task->namespace; 678 if (namespace) 679 get_namespace(namespace); 680 task_unlock(task); 681 682 if (namespace) { 683 ret = -ENOMEM; 684 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); 685 if (p) { 686 file->private_data = &p->m; 687 ret = seq_open(file, &mounts_op); 688 if (!ret) { 689 p->m.private = namespace; 690 p->event = namespace->event; 691 return 0; 692 } 693 kfree(p); 694 } 695 put_namespace(namespace); 696 } 697 return ret; 698 } 699 700 static int mounts_release(struct inode *inode, struct file *file) 701 { 702 struct seq_file *m = file->private_data; 703 struct namespace *namespace = m->private; 704 put_namespace(namespace); 705 return seq_release(inode, file); 706 } 707 708 static unsigned mounts_poll(struct file *file, poll_table *wait) 709 { 710 struct proc_mounts *p = file->private_data; 711 struct namespace *ns = p->m.private; 712 unsigned res = 0; 713 714 poll_wait(file, &ns->poll, wait); 715 716 spin_lock(&vfsmount_lock); 717 if (p->event != ns->event) { 718 p->event = ns->event; 719 res = POLLERR; 720 } 721 spin_unlock(&vfsmount_lock); 722 723 return res; 724 } 725 726 static struct file_operations proc_mounts_operations = { 727 .open = mounts_open, 728 .read = seq_read, 729 .llseek = seq_lseek, 730 .release = mounts_release, 731 .poll = mounts_poll, 732 }; 733 734 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 735 736 static ssize_t proc_info_read(struct file * file, char __user * buf, 737 size_t count, loff_t *ppos) 738 { 739 struct inode * inode = file->f_dentry->d_inode; 740 unsigned long page; 741 ssize_t length; 742 struct task_struct *task = proc_task(inode); 743 744 if (count > PROC_BLOCK_SIZE) 745 count = PROC_BLOCK_SIZE; 746 if (!(page = __get_free_page(GFP_KERNEL))) 747 return -ENOMEM; 748 749 length = PROC_I(inode)->op.proc_read(task, (char*)page); 750 751 if (length >= 0) 752 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 753 free_page(page); 754 return length; 755 } 756 757 static struct file_operations proc_info_file_operations = { 758 .read = proc_info_read, 759 }; 760 761 static int mem_open(struct inode* inode, struct file* file) 762 { 763 file->private_data = (void*)((long)current->self_exec_id); 764 return 0; 765 } 766 767 static ssize_t mem_read(struct file * file, char __user * buf, 768 size_t count, loff_t *ppos) 769 { 770 struct task_struct *task = proc_task(file->f_dentry->d_inode); 771 char *page; 772 unsigned long src = *ppos; 773 int ret = -ESRCH; 774 struct mm_struct *mm; 775 776 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 777 goto out; 778 779 ret = -ENOMEM; 780 page = (char *)__get_free_page(GFP_USER); 781 if (!page) 782 goto out; 783 784 ret = 0; 785 786 mm = get_task_mm(task); 787 if (!mm) 788 goto out_free; 789 790 ret = -EIO; 791 792 if (file->private_data != (void*)((long)current->self_exec_id)) 793 goto out_put; 794 795 ret = 0; 796 797 while (count > 0) { 798 int this_len, retval; 799 800 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 801 retval = access_process_vm(task, src, page, this_len, 0); 802 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { 803 if (!ret) 804 ret = -EIO; 805 break; 806 } 807 808 if (copy_to_user(buf, page, retval)) { 809 ret = -EFAULT; 810 break; 811 } 812 813 ret += retval; 814 src += retval; 815 buf += retval; 816 count -= retval; 817 } 818 *ppos = src; 819 820 out_put: 821 mmput(mm); 822 out_free: 823 free_page((unsigned long) page); 824 out: 825 return ret; 826 } 827 828 #define mem_write NULL 829 830 #ifndef mem_write 831 /* This is a security hazard */ 832 static ssize_t mem_write(struct file * file, const char * buf, 833 size_t count, loff_t *ppos) 834 { 835 int copied = 0; 836 char *page; 837 struct task_struct *task = proc_task(file->f_dentry->d_inode); 838 unsigned long dst = *ppos; 839 840 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 841 return -ESRCH; 842 843 page = (char *)__get_free_page(GFP_USER); 844 if (!page) 845 return -ENOMEM; 846 847 while (count > 0) { 848 int this_len, retval; 849 850 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 851 if (copy_from_user(page, buf, this_len)) { 852 copied = -EFAULT; 853 break; 854 } 855 retval = access_process_vm(task, dst, page, this_len, 1); 856 if (!retval) { 857 if (!copied) 858 copied = -EIO; 859 break; 860 } 861 copied += retval; 862 buf += retval; 863 dst += retval; 864 count -= retval; 865 } 866 *ppos = dst; 867 free_page((unsigned long) page); 868 return copied; 869 } 870 #endif 871 872 static loff_t mem_lseek(struct file * file, loff_t offset, int orig) 873 { 874 switch (orig) { 875 case 0: 876 file->f_pos = offset; 877 break; 878 case 1: 879 file->f_pos += offset; 880 break; 881 default: 882 return -EINVAL; 883 } 884 force_successful_syscall_return(); 885 return file->f_pos; 886 } 887 888 static struct file_operations proc_mem_operations = { 889 .llseek = mem_lseek, 890 .read = mem_read, 891 .write = mem_write, 892 .open = mem_open, 893 }; 894 895 static ssize_t oom_adjust_read(struct file *file, char __user *buf, 896 size_t count, loff_t *ppos) 897 { 898 struct task_struct *task = proc_task(file->f_dentry->d_inode); 899 char buffer[8]; 900 size_t len; 901 int oom_adjust = task->oomkilladj; 902 loff_t __ppos = *ppos; 903 904 len = sprintf(buffer, "%i\n", oom_adjust); 905 if (__ppos >= len) 906 return 0; 907 if (count > len-__ppos) 908 count = len-__ppos; 909 if (copy_to_user(buf, buffer + __ppos, count)) 910 return -EFAULT; 911 *ppos = __ppos + count; 912 return count; 913 } 914 915 static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 916 size_t count, loff_t *ppos) 917 { 918 struct task_struct *task = proc_task(file->f_dentry->d_inode); 919 char buffer[8], *end; 920 int oom_adjust; 921 922 if (!capable(CAP_SYS_RESOURCE)) 923 return -EPERM; 924 memset(buffer, 0, 8); 925 if (count > 6) 926 count = 6; 927 if (copy_from_user(buffer, buf, count)) 928 return -EFAULT; 929 oom_adjust = simple_strtol(buffer, &end, 0); 930 if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE) 931 return -EINVAL; 932 if (*end == '\n') 933 end++; 934 task->oomkilladj = oom_adjust; 935 if (end - buffer == 0) 936 return -EIO; 937 return end - buffer; 938 } 939 940 static struct file_operations proc_oom_adjust_operations = { 941 .read = oom_adjust_read, 942 .write = oom_adjust_write, 943 }; 944 945 static struct inode_operations proc_mem_inode_operations = { 946 .permission = proc_permission, 947 }; 948 949 #ifdef CONFIG_AUDITSYSCALL 950 #define TMPBUFLEN 21 951 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 952 size_t count, loff_t *ppos) 953 { 954 struct inode * inode = file->f_dentry->d_inode; 955 struct task_struct *task = proc_task(inode); 956 ssize_t length; 957 char tmpbuf[TMPBUFLEN]; 958 959 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 960 audit_get_loginuid(task->audit_context)); 961 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 962 } 963 964 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 965 size_t count, loff_t *ppos) 966 { 967 struct inode * inode = file->f_dentry->d_inode; 968 char *page, *tmp; 969 ssize_t length; 970 struct task_struct *task = proc_task(inode); 971 uid_t loginuid; 972 973 if (!capable(CAP_AUDIT_CONTROL)) 974 return -EPERM; 975 976 if (current != task) 977 return -EPERM; 978 979 if (count > PAGE_SIZE) 980 count = PAGE_SIZE; 981 982 if (*ppos != 0) { 983 /* No partial writes. */ 984 return -EINVAL; 985 } 986 page = (char*)__get_free_page(GFP_USER); 987 if (!page) 988 return -ENOMEM; 989 length = -EFAULT; 990 if (copy_from_user(page, buf, count)) 991 goto out_free_page; 992 993 loginuid = simple_strtoul(page, &tmp, 10); 994 if (tmp == page) { 995 length = -EINVAL; 996 goto out_free_page; 997 998 } 999 length = audit_set_loginuid(task, loginuid); 1000 if (likely(length == 0)) 1001 length = count; 1002 1003 out_free_page: 1004 free_page((unsigned long) page); 1005 return length; 1006 } 1007 1008 static struct file_operations proc_loginuid_operations = { 1009 .read = proc_loginuid_read, 1010 .write = proc_loginuid_write, 1011 }; 1012 #endif 1013 1014 #ifdef CONFIG_SECCOMP 1015 static ssize_t seccomp_read(struct file *file, char __user *buf, 1016 size_t count, loff_t *ppos) 1017 { 1018 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 1019 char __buf[20]; 1020 loff_t __ppos = *ppos; 1021 size_t len; 1022 1023 /* no need to print the trailing zero, so use only len */ 1024 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 1025 if (__ppos >= len) 1026 return 0; 1027 if (count > len - __ppos) 1028 count = len - __ppos; 1029 if (copy_to_user(buf, __buf + __ppos, count)) 1030 return -EFAULT; 1031 *ppos = __ppos + count; 1032 return count; 1033 } 1034 1035 static ssize_t seccomp_write(struct file *file, const char __user *buf, 1036 size_t count, loff_t *ppos) 1037 { 1038 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 1039 char __buf[20], *end; 1040 unsigned int seccomp_mode; 1041 1042 /* can set it only once to be even more secure */ 1043 if (unlikely(tsk->seccomp.mode)) 1044 return -EPERM; 1045 1046 memset(__buf, 0, sizeof(__buf)); 1047 count = min(count, sizeof(__buf) - 1); 1048 if (copy_from_user(__buf, buf, count)) 1049 return -EFAULT; 1050 seccomp_mode = simple_strtoul(__buf, &end, 0); 1051 if (*end == '\n') 1052 end++; 1053 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 1054 tsk->seccomp.mode = seccomp_mode; 1055 set_tsk_thread_flag(tsk, TIF_SECCOMP); 1056 } else 1057 return -EINVAL; 1058 if (unlikely(!(end - __buf))) 1059 return -EIO; 1060 return end - __buf; 1061 } 1062 1063 static struct file_operations proc_seccomp_operations = { 1064 .read = seccomp_read, 1065 .write = seccomp_write, 1066 }; 1067 #endif /* CONFIG_SECCOMP */ 1068 1069 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1070 { 1071 struct inode *inode = dentry->d_inode; 1072 int error = -EACCES; 1073 1074 /* We don't need a base pointer in the /proc filesystem */ 1075 path_release(nd); 1076 1077 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 1078 goto out; 1079 error = proc_check_root(inode); 1080 if (error) 1081 goto out; 1082 1083 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 1084 nd->last_type = LAST_BIND; 1085 out: 1086 return ERR_PTR(error); 1087 } 1088 1089 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, 1090 char __user *buffer, int buflen) 1091 { 1092 struct inode * inode; 1093 char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; 1094 int len; 1095 1096 if (!tmp) 1097 return -ENOMEM; 1098 1099 inode = dentry->d_inode; 1100 path = d_path(dentry, mnt, tmp, PAGE_SIZE); 1101 len = PTR_ERR(path); 1102 if (IS_ERR(path)) 1103 goto out; 1104 len = tmp + PAGE_SIZE - 1 - path; 1105 1106 if (len > buflen) 1107 len = buflen; 1108 if (copy_to_user(buffer, path, len)) 1109 len = -EFAULT; 1110 out: 1111 free_page((unsigned long)tmp); 1112 return len; 1113 } 1114 1115 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1116 { 1117 int error = -EACCES; 1118 struct inode *inode = dentry->d_inode; 1119 struct dentry *de; 1120 struct vfsmount *mnt = NULL; 1121 1122 lock_kernel(); 1123 1124 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 1125 goto out; 1126 error = proc_check_root(inode); 1127 if (error) 1128 goto out; 1129 1130 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); 1131 if (error) 1132 goto out; 1133 1134 error = do_proc_readlink(de, mnt, buffer, buflen); 1135 dput(de); 1136 mntput(mnt); 1137 out: 1138 unlock_kernel(); 1139 return error; 1140 } 1141 1142 static struct inode_operations proc_pid_link_inode_operations = { 1143 .readlink = proc_pid_readlink, 1144 .follow_link = proc_pid_follow_link 1145 }; 1146 1147 #define NUMBUF 10 1148 1149 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 1150 { 1151 struct inode *inode = filp->f_dentry->d_inode; 1152 struct task_struct *p = proc_task(inode); 1153 unsigned int fd, tid, ino; 1154 int retval; 1155 char buf[NUMBUF]; 1156 struct files_struct * files; 1157 struct fdtable *fdt; 1158 1159 retval = -ENOENT; 1160 if (!pid_alive(p)) 1161 goto out; 1162 retval = 0; 1163 tid = p->pid; 1164 1165 fd = filp->f_pos; 1166 switch (fd) { 1167 case 0: 1168 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 1169 goto out; 1170 filp->f_pos++; 1171 case 1: 1172 ino = fake_ino(tid, PROC_TID_INO); 1173 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1174 goto out; 1175 filp->f_pos++; 1176 default: 1177 files = get_files_struct(p); 1178 if (!files) 1179 goto out; 1180 rcu_read_lock(); 1181 fdt = files_fdtable(files); 1182 for (fd = filp->f_pos-2; 1183 fd < fdt->max_fds; 1184 fd++, filp->f_pos++) { 1185 unsigned int i,j; 1186 1187 if (!fcheck_files(files, fd)) 1188 continue; 1189 rcu_read_unlock(); 1190 1191 j = NUMBUF; 1192 i = fd; 1193 do { 1194 j--; 1195 buf[j] = '0' + (i % 10); 1196 i /= 10; 1197 } while (i); 1198 1199 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1200 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1201 rcu_read_lock(); 1202 break; 1203 } 1204 rcu_read_lock(); 1205 } 1206 rcu_read_unlock(); 1207 put_files_struct(files); 1208 } 1209 out: 1210 return retval; 1211 } 1212 1213 static int proc_pident_readdir(struct file *filp, 1214 void *dirent, filldir_t filldir, 1215 struct pid_entry *ents, unsigned int nents) 1216 { 1217 int i; 1218 int pid; 1219 struct dentry *dentry = filp->f_dentry; 1220 struct inode *inode = dentry->d_inode; 1221 struct pid_entry *p; 1222 ino_t ino; 1223 int ret; 1224 1225 ret = -ENOENT; 1226 if (!pid_alive(proc_task(inode))) 1227 goto out; 1228 1229 ret = 0; 1230 pid = proc_task(inode)->pid; 1231 i = filp->f_pos; 1232 switch (i) { 1233 case 0: 1234 ino = inode->i_ino; 1235 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 1236 goto out; 1237 i++; 1238 filp->f_pos++; 1239 /* fall through */ 1240 case 1: 1241 ino = parent_ino(dentry); 1242 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 1243 goto out; 1244 i++; 1245 filp->f_pos++; 1246 /* fall through */ 1247 default: 1248 i -= 2; 1249 if (i >= nents) { 1250 ret = 1; 1251 goto out; 1252 } 1253 p = ents + i; 1254 while (p->name) { 1255 if (filldir(dirent, p->name, p->len, filp->f_pos, 1256 fake_ino(pid, p->type), p->mode >> 12) < 0) 1257 goto out; 1258 filp->f_pos++; 1259 p++; 1260 } 1261 } 1262 1263 ret = 1; 1264 out: 1265 return ret; 1266 } 1267 1268 static int proc_tgid_base_readdir(struct file * filp, 1269 void * dirent, filldir_t filldir) 1270 { 1271 return proc_pident_readdir(filp,dirent,filldir, 1272 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 1273 } 1274 1275 static int proc_tid_base_readdir(struct file * filp, 1276 void * dirent, filldir_t filldir) 1277 { 1278 return proc_pident_readdir(filp,dirent,filldir, 1279 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 1280 } 1281 1282 /* building an inode */ 1283 1284 static int task_dumpable(struct task_struct *task) 1285 { 1286 int dumpable = 0; 1287 struct mm_struct *mm; 1288 1289 task_lock(task); 1290 mm = task->mm; 1291 if (mm) 1292 dumpable = mm->dumpable; 1293 task_unlock(task); 1294 if(dumpable == 1) 1295 return 1; 1296 return 0; 1297 } 1298 1299 1300 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) 1301 { 1302 struct inode * inode; 1303 struct proc_inode *ei; 1304 1305 /* We need a new inode */ 1306 1307 inode = new_inode(sb); 1308 if (!inode) 1309 goto out; 1310 1311 /* Common stuff */ 1312 ei = PROC_I(inode); 1313 ei->task = NULL; 1314 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1315 inode->i_ino = fake_ino(task->pid, ino); 1316 1317 if (!pid_alive(task)) 1318 goto out_unlock; 1319 1320 /* 1321 * grab the reference to task. 1322 */ 1323 get_task_struct(task); 1324 ei->task = task; 1325 ei->type = ino; 1326 inode->i_uid = 0; 1327 inode->i_gid = 0; 1328 if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { 1329 inode->i_uid = task->euid; 1330 inode->i_gid = task->egid; 1331 } 1332 security_task_to_inode(task, inode); 1333 1334 out: 1335 return inode; 1336 1337 out_unlock: 1338 ei->pde = NULL; 1339 iput(inode); 1340 return NULL; 1341 } 1342 1343 /* dentry stuff */ 1344 1345 /* 1346 * Exceptional case: normally we are not allowed to unhash a busy 1347 * directory. In this case, however, we can do it - no aliasing problems 1348 * due to the way we treat inodes. 1349 * 1350 * Rewrite the inode's ownerships here because the owning task may have 1351 * performed a setuid(), etc. 1352 */ 1353 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1354 { 1355 struct inode *inode = dentry->d_inode; 1356 struct task_struct *task = proc_task(inode); 1357 if (pid_alive(task)) { 1358 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { 1359 inode->i_uid = task->euid; 1360 inode->i_gid = task->egid; 1361 } else { 1362 inode->i_uid = 0; 1363 inode->i_gid = 0; 1364 } 1365 security_task_to_inode(task, inode); 1366 return 1; 1367 } 1368 d_drop(dentry); 1369 return 0; 1370 } 1371 1372 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1373 { 1374 struct inode *inode = dentry->d_inode; 1375 struct task_struct *task = proc_task(inode); 1376 int fd = proc_type(inode) - PROC_TID_FD_DIR; 1377 struct files_struct *files; 1378 1379 files = get_files_struct(task); 1380 if (files) { 1381 rcu_read_lock(); 1382 if (fcheck_files(files, fd)) { 1383 rcu_read_unlock(); 1384 put_files_struct(files); 1385 if (task_dumpable(task)) { 1386 inode->i_uid = task->euid; 1387 inode->i_gid = task->egid; 1388 } else { 1389 inode->i_uid = 0; 1390 inode->i_gid = 0; 1391 } 1392 security_task_to_inode(task, inode); 1393 return 1; 1394 } 1395 rcu_read_unlock(); 1396 put_files_struct(files); 1397 } 1398 d_drop(dentry); 1399 return 0; 1400 } 1401 1402 static void pid_base_iput(struct dentry *dentry, struct inode *inode) 1403 { 1404 struct task_struct *task = proc_task(inode); 1405 spin_lock(&task->proc_lock); 1406 if (task->proc_dentry == dentry) 1407 task->proc_dentry = NULL; 1408 spin_unlock(&task->proc_lock); 1409 iput(inode); 1410 } 1411 1412 static int pid_delete_dentry(struct dentry * dentry) 1413 { 1414 /* Is the task we represent dead? 1415 * If so, then don't put the dentry on the lru list, 1416 * kill it immediately. 1417 */ 1418 return !pid_alive(proc_task(dentry->d_inode)); 1419 } 1420 1421 static struct dentry_operations tid_fd_dentry_operations = 1422 { 1423 .d_revalidate = tid_fd_revalidate, 1424 .d_delete = pid_delete_dentry, 1425 }; 1426 1427 static struct dentry_operations pid_dentry_operations = 1428 { 1429 .d_revalidate = pid_revalidate, 1430 .d_delete = pid_delete_dentry, 1431 }; 1432 1433 static struct dentry_operations pid_base_dentry_operations = 1434 { 1435 .d_revalidate = pid_revalidate, 1436 .d_iput = pid_base_iput, 1437 .d_delete = pid_delete_dentry, 1438 }; 1439 1440 /* Lookups */ 1441 1442 static unsigned name_to_int(struct dentry *dentry) 1443 { 1444 const char *name = dentry->d_name.name; 1445 int len = dentry->d_name.len; 1446 unsigned n = 0; 1447 1448 if (len > 1 && *name == '0') 1449 goto out; 1450 while (len-- > 0) { 1451 unsigned c = *name++ - '0'; 1452 if (c > 9) 1453 goto out; 1454 if (n >= (~0U-9)/10) 1455 goto out; 1456 n *= 10; 1457 n += c; 1458 } 1459 return n; 1460 out: 1461 return ~0U; 1462 } 1463 1464 /* SMP-safe */ 1465 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1466 { 1467 struct task_struct *task = proc_task(dir); 1468 unsigned fd = name_to_int(dentry); 1469 struct file * file; 1470 struct files_struct * files; 1471 struct inode *inode; 1472 struct proc_inode *ei; 1473 1474 if (fd == ~0U) 1475 goto out; 1476 if (!pid_alive(task)) 1477 goto out; 1478 1479 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1480 if (!inode) 1481 goto out; 1482 ei = PROC_I(inode); 1483 files = get_files_struct(task); 1484 if (!files) 1485 goto out_unlock; 1486 inode->i_mode = S_IFLNK; 1487 rcu_read_lock(); 1488 file = fcheck_files(files, fd); 1489 if (!file) 1490 goto out_unlock2; 1491 if (file->f_mode & 1) 1492 inode->i_mode |= S_IRUSR | S_IXUSR; 1493 if (file->f_mode & 2) 1494 inode->i_mode |= S_IWUSR | S_IXUSR; 1495 rcu_read_unlock(); 1496 put_files_struct(files); 1497 inode->i_op = &proc_pid_link_inode_operations; 1498 inode->i_size = 64; 1499 ei->op.proc_get_link = proc_fd_link; 1500 dentry->d_op = &tid_fd_dentry_operations; 1501 d_add(dentry, inode); 1502 return NULL; 1503 1504 out_unlock2: 1505 rcu_read_unlock(); 1506 put_files_struct(files); 1507 out_unlock: 1508 iput(inode); 1509 out: 1510 return ERR_PTR(-ENOENT); 1511 } 1512 1513 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1514 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1515 1516 static struct file_operations proc_fd_operations = { 1517 .read = generic_read_dir, 1518 .readdir = proc_readfd, 1519 }; 1520 1521 static struct file_operations proc_task_operations = { 1522 .read = generic_read_dir, 1523 .readdir = proc_task_readdir, 1524 }; 1525 1526 /* 1527 * proc directories can do almost nothing.. 1528 */ 1529 static struct inode_operations proc_fd_inode_operations = { 1530 .lookup = proc_lookupfd, 1531 .permission = proc_permission, 1532 }; 1533 1534 static struct inode_operations proc_task_inode_operations = { 1535 .lookup = proc_task_lookup, 1536 .permission = proc_task_permission, 1537 }; 1538 1539 #ifdef CONFIG_SECURITY 1540 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 1541 size_t count, loff_t *ppos) 1542 { 1543 struct inode * inode = file->f_dentry->d_inode; 1544 unsigned long page; 1545 ssize_t length; 1546 struct task_struct *task = proc_task(inode); 1547 1548 if (count > PAGE_SIZE) 1549 count = PAGE_SIZE; 1550 if (!(page = __get_free_page(GFP_KERNEL))) 1551 return -ENOMEM; 1552 1553 length = security_getprocattr(task, 1554 (char*)file->f_dentry->d_name.name, 1555 (void*)page, count); 1556 if (length >= 0) 1557 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1558 free_page(page); 1559 return length; 1560 } 1561 1562 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 1563 size_t count, loff_t *ppos) 1564 { 1565 struct inode * inode = file->f_dentry->d_inode; 1566 char *page; 1567 ssize_t length; 1568 struct task_struct *task = proc_task(inode); 1569 1570 if (count > PAGE_SIZE) 1571 count = PAGE_SIZE; 1572 if (*ppos != 0) { 1573 /* No partial writes. */ 1574 return -EINVAL; 1575 } 1576 page = (char*)__get_free_page(GFP_USER); 1577 if (!page) 1578 return -ENOMEM; 1579 length = -EFAULT; 1580 if (copy_from_user(page, buf, count)) 1581 goto out; 1582 1583 length = security_setprocattr(task, 1584 (char*)file->f_dentry->d_name.name, 1585 (void*)page, count); 1586 out: 1587 free_page((unsigned long) page); 1588 return length; 1589 } 1590 1591 static struct file_operations proc_pid_attr_operations = { 1592 .read = proc_pid_attr_read, 1593 .write = proc_pid_attr_write, 1594 }; 1595 1596 static struct file_operations proc_tid_attr_operations; 1597 static struct inode_operations proc_tid_attr_inode_operations; 1598 static struct file_operations proc_tgid_attr_operations; 1599 static struct inode_operations proc_tgid_attr_inode_operations; 1600 #endif 1601 1602 static int get_tid_list(int index, unsigned int *tids, struct inode *dir); 1603 1604 /* SMP-safe */ 1605 static struct dentry *proc_pident_lookup(struct inode *dir, 1606 struct dentry *dentry, 1607 struct pid_entry *ents) 1608 { 1609 struct inode *inode; 1610 int error; 1611 struct task_struct *task = proc_task(dir); 1612 struct pid_entry *p; 1613 struct proc_inode *ei; 1614 1615 error = -ENOENT; 1616 inode = NULL; 1617 1618 if (!pid_alive(task)) 1619 goto out; 1620 1621 for (p = ents; p->name; p++) { 1622 if (p->len != dentry->d_name.len) 1623 continue; 1624 if (!memcmp(dentry->d_name.name, p->name, p->len)) 1625 break; 1626 } 1627 if (!p->name) 1628 goto out; 1629 1630 error = -EINVAL; 1631 inode = proc_pid_make_inode(dir->i_sb, task, p->type); 1632 if (!inode) 1633 goto out; 1634 1635 ei = PROC_I(inode); 1636 inode->i_mode = p->mode; 1637 /* 1638 * Yes, it does not scale. And it should not. Don't add 1639 * new entries into /proc/<tgid>/ without very good reasons. 1640 */ 1641 switch(p->type) { 1642 case PROC_TGID_TASK: 1643 inode->i_nlink = 2 + get_tid_list(2, NULL, dir); 1644 inode->i_op = &proc_task_inode_operations; 1645 inode->i_fop = &proc_task_operations; 1646 break; 1647 case PROC_TID_FD: 1648 case PROC_TGID_FD: 1649 inode->i_nlink = 2; 1650 inode->i_op = &proc_fd_inode_operations; 1651 inode->i_fop = &proc_fd_operations; 1652 break; 1653 case PROC_TID_EXE: 1654 case PROC_TGID_EXE: 1655 inode->i_op = &proc_pid_link_inode_operations; 1656 ei->op.proc_get_link = proc_exe_link; 1657 break; 1658 case PROC_TID_CWD: 1659 case PROC_TGID_CWD: 1660 inode->i_op = &proc_pid_link_inode_operations; 1661 ei->op.proc_get_link = proc_cwd_link; 1662 break; 1663 case PROC_TID_ROOT: 1664 case PROC_TGID_ROOT: 1665 inode->i_op = &proc_pid_link_inode_operations; 1666 ei->op.proc_get_link = proc_root_link; 1667 break; 1668 case PROC_TID_ENVIRON: 1669 case PROC_TGID_ENVIRON: 1670 inode->i_fop = &proc_info_file_operations; 1671 ei->op.proc_read = proc_pid_environ; 1672 break; 1673 case PROC_TID_AUXV: 1674 case PROC_TGID_AUXV: 1675 inode->i_fop = &proc_info_file_operations; 1676 ei->op.proc_read = proc_pid_auxv; 1677 break; 1678 case PROC_TID_STATUS: 1679 case PROC_TGID_STATUS: 1680 inode->i_fop = &proc_info_file_operations; 1681 ei->op.proc_read = proc_pid_status; 1682 break; 1683 case PROC_TID_STAT: 1684 inode->i_fop = &proc_info_file_operations; 1685 ei->op.proc_read = proc_tid_stat; 1686 break; 1687 case PROC_TGID_STAT: 1688 inode->i_fop = &proc_info_file_operations; 1689 ei->op.proc_read = proc_tgid_stat; 1690 break; 1691 case PROC_TID_CMDLINE: 1692 case PROC_TGID_CMDLINE: 1693 inode->i_fop = &proc_info_file_operations; 1694 ei->op.proc_read = proc_pid_cmdline; 1695 break; 1696 case PROC_TID_STATM: 1697 case PROC_TGID_STATM: 1698 inode->i_fop = &proc_info_file_operations; 1699 ei->op.proc_read = proc_pid_statm; 1700 break; 1701 case PROC_TID_MAPS: 1702 case PROC_TGID_MAPS: 1703 inode->i_fop = &proc_maps_operations; 1704 break; 1705 #ifdef CONFIG_NUMA 1706 case PROC_TID_NUMA_MAPS: 1707 case PROC_TGID_NUMA_MAPS: 1708 inode->i_fop = &proc_numa_maps_operations; 1709 break; 1710 #endif 1711 case PROC_TID_MEM: 1712 case PROC_TGID_MEM: 1713 inode->i_op = &proc_mem_inode_operations; 1714 inode->i_fop = &proc_mem_operations; 1715 break; 1716 #ifdef CONFIG_SECCOMP 1717 case PROC_TID_SECCOMP: 1718 case PROC_TGID_SECCOMP: 1719 inode->i_fop = &proc_seccomp_operations; 1720 break; 1721 #endif /* CONFIG_SECCOMP */ 1722 case PROC_TID_MOUNTS: 1723 case PROC_TGID_MOUNTS: 1724 inode->i_fop = &proc_mounts_operations; 1725 break; 1726 #ifdef CONFIG_MMU 1727 case PROC_TID_SMAPS: 1728 case PROC_TGID_SMAPS: 1729 inode->i_fop = &proc_smaps_operations; 1730 break; 1731 #endif 1732 #ifdef CONFIG_SECURITY 1733 case PROC_TID_ATTR: 1734 inode->i_nlink = 2; 1735 inode->i_op = &proc_tid_attr_inode_operations; 1736 inode->i_fop = &proc_tid_attr_operations; 1737 break; 1738 case PROC_TGID_ATTR: 1739 inode->i_nlink = 2; 1740 inode->i_op = &proc_tgid_attr_inode_operations; 1741 inode->i_fop = &proc_tgid_attr_operations; 1742 break; 1743 case PROC_TID_ATTR_CURRENT: 1744 case PROC_TGID_ATTR_CURRENT: 1745 case PROC_TID_ATTR_PREV: 1746 case PROC_TGID_ATTR_PREV: 1747 case PROC_TID_ATTR_EXEC: 1748 case PROC_TGID_ATTR_EXEC: 1749 case PROC_TID_ATTR_FSCREATE: 1750 case PROC_TGID_ATTR_FSCREATE: 1751 inode->i_fop = &proc_pid_attr_operations; 1752 break; 1753 #endif 1754 #ifdef CONFIG_KALLSYMS 1755 case PROC_TID_WCHAN: 1756 case PROC_TGID_WCHAN: 1757 inode->i_fop = &proc_info_file_operations; 1758 ei->op.proc_read = proc_pid_wchan; 1759 break; 1760 #endif 1761 #ifdef CONFIG_SCHEDSTATS 1762 case PROC_TID_SCHEDSTAT: 1763 case PROC_TGID_SCHEDSTAT: 1764 inode->i_fop = &proc_info_file_operations; 1765 ei->op.proc_read = proc_pid_schedstat; 1766 break; 1767 #endif 1768 #ifdef CONFIG_CPUSETS 1769 case PROC_TID_CPUSET: 1770 case PROC_TGID_CPUSET: 1771 inode->i_fop = &proc_cpuset_operations; 1772 break; 1773 #endif 1774 case PROC_TID_OOM_SCORE: 1775 case PROC_TGID_OOM_SCORE: 1776 inode->i_fop = &proc_info_file_operations; 1777 ei->op.proc_read = proc_oom_score; 1778 break; 1779 case PROC_TID_OOM_ADJUST: 1780 case PROC_TGID_OOM_ADJUST: 1781 inode->i_fop = &proc_oom_adjust_operations; 1782 break; 1783 #ifdef CONFIG_AUDITSYSCALL 1784 case PROC_TID_LOGINUID: 1785 case PROC_TGID_LOGINUID: 1786 inode->i_fop = &proc_loginuid_operations; 1787 break; 1788 #endif 1789 default: 1790 printk("procfs: impossible type (%d)",p->type); 1791 iput(inode); 1792 return ERR_PTR(-EINVAL); 1793 } 1794 dentry->d_op = &pid_dentry_operations; 1795 d_add(dentry, inode); 1796 return NULL; 1797 1798 out: 1799 return ERR_PTR(error); 1800 } 1801 1802 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1803 return proc_pident_lookup(dir, dentry, tgid_base_stuff); 1804 } 1805 1806 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1807 return proc_pident_lookup(dir, dentry, tid_base_stuff); 1808 } 1809 1810 static struct file_operations proc_tgid_base_operations = { 1811 .read = generic_read_dir, 1812 .readdir = proc_tgid_base_readdir, 1813 }; 1814 1815 static struct file_operations proc_tid_base_operations = { 1816 .read = generic_read_dir, 1817 .readdir = proc_tid_base_readdir, 1818 }; 1819 1820 static struct inode_operations proc_tgid_base_inode_operations = { 1821 .lookup = proc_tgid_base_lookup, 1822 }; 1823 1824 static struct inode_operations proc_tid_base_inode_operations = { 1825 .lookup = proc_tid_base_lookup, 1826 }; 1827 1828 #ifdef CONFIG_SECURITY 1829 static int proc_tgid_attr_readdir(struct file * filp, 1830 void * dirent, filldir_t filldir) 1831 { 1832 return proc_pident_readdir(filp,dirent,filldir, 1833 tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff)); 1834 } 1835 1836 static int proc_tid_attr_readdir(struct file * filp, 1837 void * dirent, filldir_t filldir) 1838 { 1839 return proc_pident_readdir(filp,dirent,filldir, 1840 tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); 1841 } 1842 1843 static struct file_operations proc_tgid_attr_operations = { 1844 .read = generic_read_dir, 1845 .readdir = proc_tgid_attr_readdir, 1846 }; 1847 1848 static struct file_operations proc_tid_attr_operations = { 1849 .read = generic_read_dir, 1850 .readdir = proc_tid_attr_readdir, 1851 }; 1852 1853 static struct dentry *proc_tgid_attr_lookup(struct inode *dir, 1854 struct dentry *dentry, struct nameidata *nd) 1855 { 1856 return proc_pident_lookup(dir, dentry, tgid_attr_stuff); 1857 } 1858 1859 static struct dentry *proc_tid_attr_lookup(struct inode *dir, 1860 struct dentry *dentry, struct nameidata *nd) 1861 { 1862 return proc_pident_lookup(dir, dentry, tid_attr_stuff); 1863 } 1864 1865 static struct inode_operations proc_tgid_attr_inode_operations = { 1866 .lookup = proc_tgid_attr_lookup, 1867 }; 1868 1869 static struct inode_operations proc_tid_attr_inode_operations = { 1870 .lookup = proc_tid_attr_lookup, 1871 }; 1872 #endif 1873 1874 /* 1875 * /proc/self: 1876 */ 1877 static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 1878 int buflen) 1879 { 1880 char tmp[30]; 1881 sprintf(tmp, "%d", current->tgid); 1882 return vfs_readlink(dentry,buffer,buflen,tmp); 1883 } 1884 1885 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1886 { 1887 char tmp[30]; 1888 sprintf(tmp, "%d", current->tgid); 1889 return ERR_PTR(vfs_follow_link(nd,tmp)); 1890 } 1891 1892 static struct inode_operations proc_self_inode_operations = { 1893 .readlink = proc_self_readlink, 1894 .follow_link = proc_self_follow_link, 1895 }; 1896 1897 /** 1898 * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. 1899 * @p: task that should be flushed. 1900 * 1901 * Drops the /proc/@pid dcache entry from the hash chains. 1902 * 1903 * Dropping /proc/@pid entries and detach_pid must be synchroneous, 1904 * otherwise e.g. /proc/@pid/exe might point to the wrong executable, 1905 * if the pid value is immediately reused. This is enforced by 1906 * - caller must acquire spin_lock(p->proc_lock) 1907 * - must be called before detach_pid() 1908 * - proc_pid_lookup acquires proc_lock, and checks that 1909 * the target is not dead by looking at the attach count 1910 * of PIDTYPE_PID. 1911 */ 1912 1913 struct dentry *proc_pid_unhash(struct task_struct *p) 1914 { 1915 struct dentry *proc_dentry; 1916 1917 proc_dentry = p->proc_dentry; 1918 if (proc_dentry != NULL) { 1919 1920 spin_lock(&dcache_lock); 1921 spin_lock(&proc_dentry->d_lock); 1922 if (!d_unhashed(proc_dentry)) { 1923 dget_locked(proc_dentry); 1924 __d_drop(proc_dentry); 1925 spin_unlock(&proc_dentry->d_lock); 1926 } else { 1927 spin_unlock(&proc_dentry->d_lock); 1928 proc_dentry = NULL; 1929 } 1930 spin_unlock(&dcache_lock); 1931 } 1932 return proc_dentry; 1933 } 1934 1935 /** 1936 * proc_pid_flush - recover memory used by stale /proc/@pid/x entries 1937 * @proc_dentry: directoy to prune. 1938 * 1939 * Shrink the /proc directory that was used by the just killed thread. 1940 */ 1941 1942 void proc_pid_flush(struct dentry *proc_dentry) 1943 { 1944 might_sleep(); 1945 if(proc_dentry != NULL) { 1946 shrink_dcache_parent(proc_dentry); 1947 dput(proc_dentry); 1948 } 1949 } 1950 1951 /* SMP-safe */ 1952 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1953 { 1954 struct task_struct *task; 1955 struct inode *inode; 1956 struct proc_inode *ei; 1957 unsigned tgid; 1958 int died; 1959 1960 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 1961 inode = new_inode(dir->i_sb); 1962 if (!inode) 1963 return ERR_PTR(-ENOMEM); 1964 ei = PROC_I(inode); 1965 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1966 inode->i_ino = fake_ino(0, PROC_TGID_INO); 1967 ei->pde = NULL; 1968 inode->i_mode = S_IFLNK|S_IRWXUGO; 1969 inode->i_uid = inode->i_gid = 0; 1970 inode->i_size = 64; 1971 inode->i_op = &proc_self_inode_operations; 1972 d_add(dentry, inode); 1973 return NULL; 1974 } 1975 tgid = name_to_int(dentry); 1976 if (tgid == ~0U) 1977 goto out; 1978 1979 read_lock(&tasklist_lock); 1980 task = find_task_by_pid(tgid); 1981 if (task) 1982 get_task_struct(task); 1983 read_unlock(&tasklist_lock); 1984 if (!task) 1985 goto out; 1986 1987 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); 1988 1989 1990 if (!inode) { 1991 put_task_struct(task); 1992 goto out; 1993 } 1994 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 1995 inode->i_op = &proc_tgid_base_inode_operations; 1996 inode->i_fop = &proc_tgid_base_operations; 1997 inode->i_flags|=S_IMMUTABLE; 1998 #ifdef CONFIG_SECURITY 1999 inode->i_nlink = 5; 2000 #else 2001 inode->i_nlink = 4; 2002 #endif 2003 2004 dentry->d_op = &pid_base_dentry_operations; 2005 2006 died = 0; 2007 d_add(dentry, inode); 2008 spin_lock(&task->proc_lock); 2009 task->proc_dentry = dentry; 2010 if (!pid_alive(task)) { 2011 dentry = proc_pid_unhash(task); 2012 died = 1; 2013 } 2014 spin_unlock(&task->proc_lock); 2015 2016 put_task_struct(task); 2017 if (died) { 2018 proc_pid_flush(dentry); 2019 goto out; 2020 } 2021 return NULL; 2022 out: 2023 return ERR_PTR(-ENOENT); 2024 } 2025 2026 /* SMP-safe */ 2027 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2028 { 2029 struct task_struct *task; 2030 struct task_struct *leader = proc_task(dir); 2031 struct inode *inode; 2032 unsigned tid; 2033 2034 tid = name_to_int(dentry); 2035 if (tid == ~0U) 2036 goto out; 2037 2038 read_lock(&tasklist_lock); 2039 task = find_task_by_pid(tid); 2040 if (task) 2041 get_task_struct(task); 2042 read_unlock(&tasklist_lock); 2043 if (!task) 2044 goto out; 2045 if (leader->tgid != task->tgid) 2046 goto out_drop_task; 2047 2048 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); 2049 2050 2051 if (!inode) 2052 goto out_drop_task; 2053 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2054 inode->i_op = &proc_tid_base_inode_operations; 2055 inode->i_fop = &proc_tid_base_operations; 2056 inode->i_flags|=S_IMMUTABLE; 2057 #ifdef CONFIG_SECURITY 2058 inode->i_nlink = 4; 2059 #else 2060 inode->i_nlink = 3; 2061 #endif 2062 2063 dentry->d_op = &pid_base_dentry_operations; 2064 2065 d_add(dentry, inode); 2066 2067 put_task_struct(task); 2068 return NULL; 2069 out_drop_task: 2070 put_task_struct(task); 2071 out: 2072 return ERR_PTR(-ENOENT); 2073 } 2074 2075 #define PROC_NUMBUF 10 2076 #define PROC_MAXPIDS 20 2077 2078 /* 2079 * Get a few tgid's to return for filldir - we need to hold the 2080 * tasklist lock while doing this, and we must release it before 2081 * we actually do the filldir itself, so we use a temp buffer.. 2082 */ 2083 static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) 2084 { 2085 struct task_struct *p; 2086 int nr_tgids = 0; 2087 2088 index--; 2089 read_lock(&tasklist_lock); 2090 p = NULL; 2091 if (version) { 2092 p = find_task_by_pid(version); 2093 if (p && !thread_group_leader(p)) 2094 p = NULL; 2095 } 2096 2097 if (p) 2098 index = 0; 2099 else 2100 p = next_task(&init_task); 2101 2102 for ( ; p != &init_task; p = next_task(p)) { 2103 int tgid = p->pid; 2104 if (!pid_alive(p)) 2105 continue; 2106 if (--index >= 0) 2107 continue; 2108 tgids[nr_tgids] = tgid; 2109 nr_tgids++; 2110 if (nr_tgids >= PROC_MAXPIDS) 2111 break; 2112 } 2113 read_unlock(&tasklist_lock); 2114 return nr_tgids; 2115 } 2116 2117 /* 2118 * Get a few tid's to return for filldir - we need to hold the 2119 * tasklist lock while doing this, and we must release it before 2120 * we actually do the filldir itself, so we use a temp buffer.. 2121 */ 2122 static int get_tid_list(int index, unsigned int *tids, struct inode *dir) 2123 { 2124 struct task_struct *leader_task = proc_task(dir); 2125 struct task_struct *task = leader_task; 2126 int nr_tids = 0; 2127 2128 index -= 2; 2129 read_lock(&tasklist_lock); 2130 /* 2131 * The starting point task (leader_task) might be an already 2132 * unlinked task, which cannot be used to access the task-list 2133 * via next_thread(). 2134 */ 2135 if (pid_alive(task)) do { 2136 int tid = task->pid; 2137 2138 if (--index >= 0) 2139 continue; 2140 if (tids != NULL) 2141 tids[nr_tids] = tid; 2142 nr_tids++; 2143 if (nr_tids >= PROC_MAXPIDS) 2144 break; 2145 } while ((task = next_thread(task)) != leader_task); 2146 read_unlock(&tasklist_lock); 2147 return nr_tids; 2148 } 2149 2150 /* for the /proc/ directory itself, after non-process stuff has been done */ 2151 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2152 { 2153 unsigned int tgid_array[PROC_MAXPIDS]; 2154 char buf[PROC_NUMBUF]; 2155 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2156 unsigned int nr_tgids, i; 2157 int next_tgid; 2158 2159 if (!nr) { 2160 ino_t ino = fake_ino(0,PROC_TGID_INO); 2161 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) 2162 return 0; 2163 filp->f_pos++; 2164 nr++; 2165 } 2166 2167 /* f_version caches the tgid value that the last readdir call couldn't 2168 * return. lseek aka telldir automagically resets f_version to 0. 2169 */ 2170 next_tgid = filp->f_version; 2171 filp->f_version = 0; 2172 for (;;) { 2173 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); 2174 if (!nr_tgids) { 2175 /* no more entries ! */ 2176 break; 2177 } 2178 next_tgid = 0; 2179 2180 /* do not use the last found pid, reserve it for next_tgid */ 2181 if (nr_tgids == PROC_MAXPIDS) { 2182 nr_tgids--; 2183 next_tgid = tgid_array[nr_tgids]; 2184 } 2185 2186 for (i=0;i<nr_tgids;i++) { 2187 int tgid = tgid_array[i]; 2188 ino_t ino = fake_ino(tgid,PROC_TGID_INO); 2189 unsigned long j = PROC_NUMBUF; 2190 2191 do 2192 buf[--j] = '0' + (tgid % 10); 2193 while ((tgid /= 10) != 0); 2194 2195 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { 2196 /* returning this tgid failed, save it as the first 2197 * pid for the next readir call */ 2198 filp->f_version = tgid_array[i]; 2199 goto out; 2200 } 2201 filp->f_pos++; 2202 nr++; 2203 } 2204 } 2205 out: 2206 return 0; 2207 } 2208 2209 /* for the /proc/TGID/task/ directories */ 2210 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2211 { 2212 unsigned int tid_array[PROC_MAXPIDS]; 2213 char buf[PROC_NUMBUF]; 2214 unsigned int nr_tids, i; 2215 struct dentry *dentry = filp->f_dentry; 2216 struct inode *inode = dentry->d_inode; 2217 int retval = -ENOENT; 2218 ino_t ino; 2219 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2220 2221 if (!pid_alive(proc_task(inode))) 2222 goto out; 2223 retval = 0; 2224 2225 switch (pos) { 2226 case 0: 2227 ino = inode->i_ino; 2228 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) 2229 goto out; 2230 pos++; 2231 /* fall through */ 2232 case 1: 2233 ino = parent_ino(dentry); 2234 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) 2235 goto out; 2236 pos++; 2237 /* fall through */ 2238 } 2239 2240 nr_tids = get_tid_list(pos, tid_array, inode); 2241 inode->i_nlink = pos + nr_tids; 2242 2243 for (i = 0; i < nr_tids; i++) { 2244 unsigned long j = PROC_NUMBUF; 2245 int tid = tid_array[i]; 2246 2247 ino = fake_ino(tid,PROC_TID_INO); 2248 2249 do 2250 buf[--j] = '0' + (tid % 10); 2251 while ((tid /= 10) != 0); 2252 2253 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) 2254 break; 2255 pos++; 2256 } 2257 out: 2258 filp->f_pos = pos; 2259 return retval; 2260 } 2261