1 /* 2 * linux/fs/proc/base.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * proc base directory handling functions 7 * 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 9 * Instead of using magical inumbers to determine the kind of object 10 * we allocate and fill in-core inodes upon lookup. They don't even 11 * go into icache. We cache the reference to task_struct upon lookup too. 12 * Eventually it should become a filesystem in its own. We don't use the 13 * rest of procfs anymore. 14 * 15 * 16 * Changelog: 17 * 17-Jan-2005 18 * Allan Bezerra 19 * Bruna Moreira <bruna.moreira@indt.org.br> 20 * Edjard Mota <edjard.mota@indt.org.br> 21 * Ilias Biris <ilias.biris@indt.org.br> 22 * Mauricio Lin <mauricio.lin@indt.org.br> 23 * 24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 25 * 26 * A new process specific entry (smaps) included in /proc. It shows the 27 * size of rss for each memory area. The maps entry lacks information 28 * about physical memory size (rss) for each mapped file, i.e., 29 * rss information for executables and library files. 30 * This additional information is useful for any tools that need to know 31 * about physical memory consumption for a process specific library. 32 * 33 * Changelog: 34 * 21-Feb-2005 35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 36 * Pud inclusion in the page table walking. 37 * 38 * ChangeLog: 39 * 10-Mar-2005 40 * 10LE Instituto Nokia de Tecnologia - INdT: 41 * A better way to walks through the page table as suggested by Hugh Dickins. 42 * 43 * Simo Piiroinen <simo.piiroinen@nokia.com>: 44 * Smaps information related to shared, private, clean and dirty pages. 45 * 46 * Paul Mundt <paul.mundt@nokia.com>: 47 * Overall revision about smaps. 48 */ 49 50 #include <asm/uaccess.h> 51 52 #include <linux/config.h> 53 #include <linux/errno.h> 54 #include <linux/time.h> 55 #include <linux/proc_fs.h> 56 #include <linux/stat.h> 57 #include <linux/init.h> 58 #include <linux/file.h> 59 #include <linux/string.h> 60 #include <linux/seq_file.h> 61 #include <linux/namei.h> 62 #include <linux/namespace.h> 63 #include <linux/mm.h> 64 #include <linux/smp_lock.h> 65 #include <linux/rcupdate.h> 66 #include <linux/kallsyms.h> 67 #include <linux/mount.h> 68 #include <linux/security.h> 69 #include <linux/ptrace.h> 70 #include <linux/seccomp.h> 71 #include <linux/cpuset.h> 72 #include <linux/audit.h> 73 #include "internal.h" 74 75 /* 76 * For hysterical raisins we keep the same inumbers as in the old procfs. 77 * Feel free to change the macro below - just keep the range distinct from 78 * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). 79 * As soon as we'll get a separate superblock we will be able to forget 80 * about magical ranges too. 81 */ 82 83 #define fake_ino(pid,ino) (((pid)<<16)|(ino)) 84 85 enum pid_directory_inos { 86 PROC_TGID_INO = 2, 87 PROC_TGID_TASK, 88 PROC_TGID_STATUS, 89 PROC_TGID_MEM, 90 #ifdef CONFIG_SECCOMP 91 PROC_TGID_SECCOMP, 92 #endif 93 PROC_TGID_CWD, 94 PROC_TGID_ROOT, 95 PROC_TGID_EXE, 96 PROC_TGID_FD, 97 PROC_TGID_ENVIRON, 98 PROC_TGID_AUXV, 99 PROC_TGID_CMDLINE, 100 PROC_TGID_STAT, 101 PROC_TGID_STATM, 102 PROC_TGID_MAPS, 103 PROC_TGID_NUMA_MAPS, 104 PROC_TGID_MOUNTS, 105 PROC_TGID_WCHAN, 106 #ifdef CONFIG_MMU 107 PROC_TGID_SMAPS, 108 #endif 109 #ifdef CONFIG_SCHEDSTATS 110 PROC_TGID_SCHEDSTAT, 111 #endif 112 #ifdef CONFIG_CPUSETS 113 PROC_TGID_CPUSET, 114 #endif 115 #ifdef CONFIG_SECURITY 116 PROC_TGID_ATTR, 117 PROC_TGID_ATTR_CURRENT, 118 PROC_TGID_ATTR_PREV, 119 PROC_TGID_ATTR_EXEC, 120 PROC_TGID_ATTR_FSCREATE, 121 #endif 122 #ifdef CONFIG_AUDITSYSCALL 123 PROC_TGID_LOGINUID, 124 #endif 125 PROC_TGID_OOM_SCORE, 126 PROC_TGID_OOM_ADJUST, 127 PROC_TID_INO, 128 PROC_TID_STATUS, 129 PROC_TID_MEM, 130 #ifdef CONFIG_SECCOMP 131 PROC_TID_SECCOMP, 132 #endif 133 PROC_TID_CWD, 134 PROC_TID_ROOT, 135 PROC_TID_EXE, 136 PROC_TID_FD, 137 PROC_TID_ENVIRON, 138 PROC_TID_AUXV, 139 PROC_TID_CMDLINE, 140 PROC_TID_STAT, 141 PROC_TID_STATM, 142 PROC_TID_MAPS, 143 PROC_TID_NUMA_MAPS, 144 PROC_TID_MOUNTS, 145 PROC_TID_WCHAN, 146 #ifdef CONFIG_MMU 147 PROC_TID_SMAPS, 148 #endif 149 #ifdef CONFIG_SCHEDSTATS 150 PROC_TID_SCHEDSTAT, 151 #endif 152 #ifdef CONFIG_CPUSETS 153 PROC_TID_CPUSET, 154 #endif 155 #ifdef CONFIG_SECURITY 156 PROC_TID_ATTR, 157 PROC_TID_ATTR_CURRENT, 158 PROC_TID_ATTR_PREV, 159 PROC_TID_ATTR_EXEC, 160 PROC_TID_ATTR_FSCREATE, 161 #endif 162 #ifdef CONFIG_AUDITSYSCALL 163 PROC_TID_LOGINUID, 164 #endif 165 PROC_TID_OOM_SCORE, 166 PROC_TID_OOM_ADJUST, 167 168 /* Add new entries before this */ 169 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ 170 }; 171 172 struct pid_entry { 173 int type; 174 int len; 175 char *name; 176 mode_t mode; 177 }; 178 179 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} 180 181 static struct pid_entry tgid_base_stuff[] = { 182 E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), 183 E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 184 E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), 185 E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), 186 E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO), 187 E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 188 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), 189 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), 190 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), 191 #ifdef CONFIG_NUMA 192 E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), 193 #endif 194 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 195 #ifdef CONFIG_SECCOMP 196 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 197 #endif 198 E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 199 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), 200 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), 201 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 202 #ifdef CONFIG_MMU 203 E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), 204 #endif 205 #ifdef CONFIG_SECURITY 206 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 207 #endif 208 #ifdef CONFIG_KALLSYMS 209 E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), 210 #endif 211 #ifdef CONFIG_SCHEDSTATS 212 E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), 213 #endif 214 #ifdef CONFIG_CPUSETS 215 E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 216 #endif 217 E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 218 E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 219 #ifdef CONFIG_AUDITSYSCALL 220 E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 221 #endif 222 {0,0,NULL,0} 223 }; 224 static struct pid_entry tid_base_stuff[] = { 225 E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 226 E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR), 227 E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR), 228 E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO), 229 E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 230 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), 231 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), 232 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), 233 #ifdef CONFIG_NUMA 234 E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), 235 #endif 236 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 237 #ifdef CONFIG_SECCOMP 238 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 239 #endif 240 E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 241 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), 242 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), 243 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 244 #ifdef CONFIG_MMU 245 E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), 246 #endif 247 #ifdef CONFIG_SECURITY 248 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 249 #endif 250 #ifdef CONFIG_KALLSYMS 251 E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), 252 #endif 253 #ifdef CONFIG_SCHEDSTATS 254 E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), 255 #endif 256 #ifdef CONFIG_CPUSETS 257 E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 258 #endif 259 E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 260 E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 261 #ifdef CONFIG_AUDITSYSCALL 262 E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 263 #endif 264 {0,0,NULL,0} 265 }; 266 267 #ifdef CONFIG_SECURITY 268 static struct pid_entry tgid_attr_stuff[] = { 269 E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 270 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 271 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 272 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 273 {0,0,NULL,0} 274 }; 275 static struct pid_entry tid_attr_stuff[] = { 276 E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 277 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 278 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 279 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 280 {0,0,NULL,0} 281 }; 282 #endif 283 284 #undef E 285 286 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 287 { 288 struct task_struct *task = proc_task(inode); 289 struct files_struct *files; 290 struct file *file; 291 int fd = proc_type(inode) - PROC_TID_FD_DIR; 292 293 files = get_files_struct(task); 294 if (files) { 295 rcu_read_lock(); 296 file = fcheck_files(files, fd); 297 if (file) { 298 *mnt = mntget(file->f_vfsmnt); 299 *dentry = dget(file->f_dentry); 300 rcu_read_unlock(); 301 put_files_struct(files); 302 return 0; 303 } 304 rcu_read_unlock(); 305 put_files_struct(files); 306 } 307 return -ENOENT; 308 } 309 310 static struct fs_struct *get_fs_struct(struct task_struct *task) 311 { 312 struct fs_struct *fs; 313 task_lock(task); 314 fs = task->fs; 315 if(fs) 316 atomic_inc(&fs->count); 317 task_unlock(task); 318 return fs; 319 } 320 321 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 322 { 323 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 324 int result = -ENOENT; 325 if (fs) { 326 read_lock(&fs->lock); 327 *mnt = mntget(fs->pwdmnt); 328 *dentry = dget(fs->pwd); 329 read_unlock(&fs->lock); 330 result = 0; 331 put_fs_struct(fs); 332 } 333 return result; 334 } 335 336 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 337 { 338 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 339 int result = -ENOENT; 340 if (fs) { 341 read_lock(&fs->lock); 342 *mnt = mntget(fs->rootmnt); 343 *dentry = dget(fs->root); 344 read_unlock(&fs->lock); 345 result = 0; 346 put_fs_struct(fs); 347 } 348 return result; 349 } 350 351 352 /* Same as proc_root_link, but this addionally tries to get fs from other 353 * threads in the group */ 354 static int proc_task_root_link(struct inode *inode, struct dentry **dentry, 355 struct vfsmount **mnt) 356 { 357 struct fs_struct *fs; 358 int result = -ENOENT; 359 struct task_struct *leader = proc_task(inode); 360 361 task_lock(leader); 362 fs = leader->fs; 363 if (fs) { 364 atomic_inc(&fs->count); 365 task_unlock(leader); 366 } else { 367 /* Try to get fs from other threads */ 368 task_unlock(leader); 369 read_lock(&tasklist_lock); 370 if (pid_alive(leader)) { 371 struct task_struct *task = leader; 372 373 while ((task = next_thread(task)) != leader) { 374 task_lock(task); 375 fs = task->fs; 376 if (fs) { 377 atomic_inc(&fs->count); 378 task_unlock(task); 379 break; 380 } 381 task_unlock(task); 382 } 383 } 384 read_unlock(&tasklist_lock); 385 } 386 387 if (fs) { 388 read_lock(&fs->lock); 389 *mnt = mntget(fs->rootmnt); 390 *dentry = dget(fs->root); 391 read_unlock(&fs->lock); 392 result = 0; 393 put_fs_struct(fs); 394 } 395 return result; 396 } 397 398 399 #define MAY_PTRACE(task) \ 400 (task == current || \ 401 (task->parent == current && \ 402 (task->ptrace & PT_PTRACED) && \ 403 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ 404 security_ptrace(current,task) == 0)) 405 406 static int proc_pid_environ(struct task_struct *task, char * buffer) 407 { 408 int res = 0; 409 struct mm_struct *mm = get_task_mm(task); 410 if (mm) { 411 unsigned int len = mm->env_end - mm->env_start; 412 if (len > PAGE_SIZE) 413 len = PAGE_SIZE; 414 res = access_process_vm(task, mm->env_start, buffer, len, 0); 415 if (!ptrace_may_attach(task)) 416 res = -ESRCH; 417 mmput(mm); 418 } 419 return res; 420 } 421 422 static int proc_pid_cmdline(struct task_struct *task, char * buffer) 423 { 424 int res = 0; 425 unsigned int len; 426 struct mm_struct *mm = get_task_mm(task); 427 if (!mm) 428 goto out; 429 if (!mm->arg_end) 430 goto out_mm; /* Shh! No looking before we're done */ 431 432 len = mm->arg_end - mm->arg_start; 433 434 if (len > PAGE_SIZE) 435 len = PAGE_SIZE; 436 437 res = access_process_vm(task, mm->arg_start, buffer, len, 0); 438 439 // If the nul at the end of args has been overwritten, then 440 // assume application is using setproctitle(3). 441 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { 442 len = strnlen(buffer, res); 443 if (len < res) { 444 res = len; 445 } else { 446 len = mm->env_end - mm->env_start; 447 if (len > PAGE_SIZE - res) 448 len = PAGE_SIZE - res; 449 res += access_process_vm(task, mm->env_start, buffer+res, len, 0); 450 res = strnlen(buffer, res); 451 } 452 } 453 out_mm: 454 mmput(mm); 455 out: 456 return res; 457 } 458 459 static int proc_pid_auxv(struct task_struct *task, char *buffer) 460 { 461 int res = 0; 462 struct mm_struct *mm = get_task_mm(task); 463 if (mm) { 464 unsigned int nwords = 0; 465 do 466 nwords += 2; 467 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 468 res = nwords * sizeof(mm->saved_auxv[0]); 469 if (res > PAGE_SIZE) 470 res = PAGE_SIZE; 471 memcpy(buffer, mm->saved_auxv, res); 472 mmput(mm); 473 } 474 return res; 475 } 476 477 478 #ifdef CONFIG_KALLSYMS 479 /* 480 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 481 * Returns the resolved symbol. If that fails, simply return the address. 482 */ 483 static int proc_pid_wchan(struct task_struct *task, char *buffer) 484 { 485 char *modname; 486 const char *sym_name; 487 unsigned long wchan, size, offset; 488 char namebuf[KSYM_NAME_LEN+1]; 489 490 wchan = get_wchan(task); 491 492 sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); 493 if (sym_name) 494 return sprintf(buffer, "%s", sym_name); 495 return sprintf(buffer, "%lu", wchan); 496 } 497 #endif /* CONFIG_KALLSYMS */ 498 499 #ifdef CONFIG_SCHEDSTATS 500 /* 501 * Provides /proc/PID/schedstat 502 */ 503 static int proc_pid_schedstat(struct task_struct *task, char *buffer) 504 { 505 return sprintf(buffer, "%lu %lu %lu\n", 506 task->sched_info.cpu_time, 507 task->sched_info.run_delay, 508 task->sched_info.pcnt); 509 } 510 #endif 511 512 /* The badness from the OOM killer */ 513 unsigned long badness(struct task_struct *p, unsigned long uptime); 514 static int proc_oom_score(struct task_struct *task, char *buffer) 515 { 516 unsigned long points; 517 struct timespec uptime; 518 519 do_posix_clock_monotonic_gettime(&uptime); 520 points = badness(task, uptime.tv_sec); 521 return sprintf(buffer, "%lu\n", points); 522 } 523 524 /************************************************************************/ 525 /* Here the fs part begins */ 526 /************************************************************************/ 527 528 /* permission checks */ 529 530 /* If the process being read is separated by chroot from the reading process, 531 * don't let the reader access the threads. 532 */ 533 static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) 534 { 535 struct dentry *de, *base; 536 struct vfsmount *our_vfsmnt, *mnt; 537 int res = 0; 538 read_lock(¤t->fs->lock); 539 our_vfsmnt = mntget(current->fs->rootmnt); 540 base = dget(current->fs->root); 541 read_unlock(¤t->fs->lock); 542 543 spin_lock(&vfsmount_lock); 544 de = root; 545 mnt = vfsmnt; 546 547 while (vfsmnt != our_vfsmnt) { 548 if (vfsmnt == vfsmnt->mnt_parent) 549 goto out; 550 de = vfsmnt->mnt_mountpoint; 551 vfsmnt = vfsmnt->mnt_parent; 552 } 553 554 if (!is_subdir(de, base)) 555 goto out; 556 spin_unlock(&vfsmount_lock); 557 558 exit: 559 dput(base); 560 mntput(our_vfsmnt); 561 dput(root); 562 mntput(mnt); 563 return res; 564 out: 565 spin_unlock(&vfsmount_lock); 566 res = -EACCES; 567 goto exit; 568 } 569 570 static int proc_check_root(struct inode *inode) 571 { 572 struct dentry *root; 573 struct vfsmount *vfsmnt; 574 575 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ 576 return -ENOENT; 577 return proc_check_chroot(root, vfsmnt); 578 } 579 580 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) 581 { 582 if (generic_permission(inode, mask, NULL) != 0) 583 return -EACCES; 584 return proc_check_root(inode); 585 } 586 587 static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd) 588 { 589 struct dentry *root; 590 struct vfsmount *vfsmnt; 591 592 if (generic_permission(inode, mask, NULL) != 0) 593 return -EACCES; 594 595 if (proc_task_root_link(inode, &root, &vfsmnt)) 596 return -ENOENT; 597 598 return proc_check_chroot(root, vfsmnt); 599 } 600 601 extern struct seq_operations proc_pid_maps_op; 602 static int maps_open(struct inode *inode, struct file *file) 603 { 604 struct task_struct *task = proc_task(inode); 605 int ret = seq_open(file, &proc_pid_maps_op); 606 if (!ret) { 607 struct seq_file *m = file->private_data; 608 m->private = task; 609 } 610 return ret; 611 } 612 613 static struct file_operations proc_maps_operations = { 614 .open = maps_open, 615 .read = seq_read, 616 .llseek = seq_lseek, 617 .release = seq_release, 618 }; 619 620 #ifdef CONFIG_NUMA 621 extern struct seq_operations proc_pid_numa_maps_op; 622 static int numa_maps_open(struct inode *inode, struct file *file) 623 { 624 struct task_struct *task = proc_task(inode); 625 int ret = seq_open(file, &proc_pid_numa_maps_op); 626 if (!ret) { 627 struct seq_file *m = file->private_data; 628 m->private = task; 629 } 630 return ret; 631 } 632 633 static struct file_operations proc_numa_maps_operations = { 634 .open = numa_maps_open, 635 .read = seq_read, 636 .llseek = seq_lseek, 637 .release = seq_release, 638 }; 639 #endif 640 641 #ifdef CONFIG_MMU 642 extern struct seq_operations proc_pid_smaps_op; 643 static int smaps_open(struct inode *inode, struct file *file) 644 { 645 struct task_struct *task = proc_task(inode); 646 int ret = seq_open(file, &proc_pid_smaps_op); 647 if (!ret) { 648 struct seq_file *m = file->private_data; 649 m->private = task; 650 } 651 return ret; 652 } 653 654 static struct file_operations proc_smaps_operations = { 655 .open = smaps_open, 656 .read = seq_read, 657 .llseek = seq_lseek, 658 .release = seq_release, 659 }; 660 #endif 661 662 extern struct seq_operations mounts_op; 663 static int mounts_open(struct inode *inode, struct file *file) 664 { 665 struct task_struct *task = proc_task(inode); 666 int ret = seq_open(file, &mounts_op); 667 668 if (!ret) { 669 struct seq_file *m = file->private_data; 670 struct namespace *namespace; 671 task_lock(task); 672 namespace = task->namespace; 673 if (namespace) 674 get_namespace(namespace); 675 task_unlock(task); 676 677 if (namespace) 678 m->private = namespace; 679 else { 680 seq_release(inode, file); 681 ret = -EINVAL; 682 } 683 } 684 return ret; 685 } 686 687 static int mounts_release(struct inode *inode, struct file *file) 688 { 689 struct seq_file *m = file->private_data; 690 struct namespace *namespace = m->private; 691 put_namespace(namespace); 692 return seq_release(inode, file); 693 } 694 695 static struct file_operations proc_mounts_operations = { 696 .open = mounts_open, 697 .read = seq_read, 698 .llseek = seq_lseek, 699 .release = mounts_release, 700 }; 701 702 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 703 704 static ssize_t proc_info_read(struct file * file, char __user * buf, 705 size_t count, loff_t *ppos) 706 { 707 struct inode * inode = file->f_dentry->d_inode; 708 unsigned long page; 709 ssize_t length; 710 struct task_struct *task = proc_task(inode); 711 712 if (count > PROC_BLOCK_SIZE) 713 count = PROC_BLOCK_SIZE; 714 if (!(page = __get_free_page(GFP_KERNEL))) 715 return -ENOMEM; 716 717 length = PROC_I(inode)->op.proc_read(task, (char*)page); 718 719 if (length >= 0) 720 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 721 free_page(page); 722 return length; 723 } 724 725 static struct file_operations proc_info_file_operations = { 726 .read = proc_info_read, 727 }; 728 729 static int mem_open(struct inode* inode, struct file* file) 730 { 731 file->private_data = (void*)((long)current->self_exec_id); 732 return 0; 733 } 734 735 static ssize_t mem_read(struct file * file, char __user * buf, 736 size_t count, loff_t *ppos) 737 { 738 struct task_struct *task = proc_task(file->f_dentry->d_inode); 739 char *page; 740 unsigned long src = *ppos; 741 int ret = -ESRCH; 742 struct mm_struct *mm; 743 744 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 745 goto out; 746 747 ret = -ENOMEM; 748 page = (char *)__get_free_page(GFP_USER); 749 if (!page) 750 goto out; 751 752 ret = 0; 753 754 mm = get_task_mm(task); 755 if (!mm) 756 goto out_free; 757 758 ret = -EIO; 759 760 if (file->private_data != (void*)((long)current->self_exec_id)) 761 goto out_put; 762 763 ret = 0; 764 765 while (count > 0) { 766 int this_len, retval; 767 768 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 769 retval = access_process_vm(task, src, page, this_len, 0); 770 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { 771 if (!ret) 772 ret = -EIO; 773 break; 774 } 775 776 if (copy_to_user(buf, page, retval)) { 777 ret = -EFAULT; 778 break; 779 } 780 781 ret += retval; 782 src += retval; 783 buf += retval; 784 count -= retval; 785 } 786 *ppos = src; 787 788 out_put: 789 mmput(mm); 790 out_free: 791 free_page((unsigned long) page); 792 out: 793 return ret; 794 } 795 796 #define mem_write NULL 797 798 #ifndef mem_write 799 /* This is a security hazard */ 800 static ssize_t mem_write(struct file * file, const char * buf, 801 size_t count, loff_t *ppos) 802 { 803 int copied = 0; 804 char *page; 805 struct task_struct *task = proc_task(file->f_dentry->d_inode); 806 unsigned long dst = *ppos; 807 808 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 809 return -ESRCH; 810 811 page = (char *)__get_free_page(GFP_USER); 812 if (!page) 813 return -ENOMEM; 814 815 while (count > 0) { 816 int this_len, retval; 817 818 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 819 if (copy_from_user(page, buf, this_len)) { 820 copied = -EFAULT; 821 break; 822 } 823 retval = access_process_vm(task, dst, page, this_len, 1); 824 if (!retval) { 825 if (!copied) 826 copied = -EIO; 827 break; 828 } 829 copied += retval; 830 buf += retval; 831 dst += retval; 832 count -= retval; 833 } 834 *ppos = dst; 835 free_page((unsigned long) page); 836 return copied; 837 } 838 #endif 839 840 static loff_t mem_lseek(struct file * file, loff_t offset, int orig) 841 { 842 switch (orig) { 843 case 0: 844 file->f_pos = offset; 845 break; 846 case 1: 847 file->f_pos += offset; 848 break; 849 default: 850 return -EINVAL; 851 } 852 force_successful_syscall_return(); 853 return file->f_pos; 854 } 855 856 static struct file_operations proc_mem_operations = { 857 .llseek = mem_lseek, 858 .read = mem_read, 859 .write = mem_write, 860 .open = mem_open, 861 }; 862 863 static ssize_t oom_adjust_read(struct file *file, char __user *buf, 864 size_t count, loff_t *ppos) 865 { 866 struct task_struct *task = proc_task(file->f_dentry->d_inode); 867 char buffer[8]; 868 size_t len; 869 int oom_adjust = task->oomkilladj; 870 loff_t __ppos = *ppos; 871 872 len = sprintf(buffer, "%i\n", oom_adjust); 873 if (__ppos >= len) 874 return 0; 875 if (count > len-__ppos) 876 count = len-__ppos; 877 if (copy_to_user(buf, buffer + __ppos, count)) 878 return -EFAULT; 879 *ppos = __ppos + count; 880 return count; 881 } 882 883 static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 884 size_t count, loff_t *ppos) 885 { 886 struct task_struct *task = proc_task(file->f_dentry->d_inode); 887 char buffer[8], *end; 888 int oom_adjust; 889 890 if (!capable(CAP_SYS_RESOURCE)) 891 return -EPERM; 892 memset(buffer, 0, 8); 893 if (count > 6) 894 count = 6; 895 if (copy_from_user(buffer, buf, count)) 896 return -EFAULT; 897 oom_adjust = simple_strtol(buffer, &end, 0); 898 if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE) 899 return -EINVAL; 900 if (*end == '\n') 901 end++; 902 task->oomkilladj = oom_adjust; 903 if (end - buffer == 0) 904 return -EIO; 905 return end - buffer; 906 } 907 908 static struct file_operations proc_oom_adjust_operations = { 909 .read = oom_adjust_read, 910 .write = oom_adjust_write, 911 }; 912 913 static struct inode_operations proc_mem_inode_operations = { 914 .permission = proc_permission, 915 }; 916 917 #ifdef CONFIG_AUDITSYSCALL 918 #define TMPBUFLEN 21 919 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 920 size_t count, loff_t *ppos) 921 { 922 struct inode * inode = file->f_dentry->d_inode; 923 struct task_struct *task = proc_task(inode); 924 ssize_t length; 925 char tmpbuf[TMPBUFLEN]; 926 927 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 928 audit_get_loginuid(task->audit_context)); 929 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 930 } 931 932 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 933 size_t count, loff_t *ppos) 934 { 935 struct inode * inode = file->f_dentry->d_inode; 936 char *page, *tmp; 937 ssize_t length; 938 struct task_struct *task = proc_task(inode); 939 uid_t loginuid; 940 941 if (!capable(CAP_AUDIT_CONTROL)) 942 return -EPERM; 943 944 if (current != task) 945 return -EPERM; 946 947 if (count > PAGE_SIZE) 948 count = PAGE_SIZE; 949 950 if (*ppos != 0) { 951 /* No partial writes. */ 952 return -EINVAL; 953 } 954 page = (char*)__get_free_page(GFP_USER); 955 if (!page) 956 return -ENOMEM; 957 length = -EFAULT; 958 if (copy_from_user(page, buf, count)) 959 goto out_free_page; 960 961 loginuid = simple_strtoul(page, &tmp, 10); 962 if (tmp == page) { 963 length = -EINVAL; 964 goto out_free_page; 965 966 } 967 length = audit_set_loginuid(task, loginuid); 968 if (likely(length == 0)) 969 length = count; 970 971 out_free_page: 972 free_page((unsigned long) page); 973 return length; 974 } 975 976 static struct file_operations proc_loginuid_operations = { 977 .read = proc_loginuid_read, 978 .write = proc_loginuid_write, 979 }; 980 #endif 981 982 #ifdef CONFIG_SECCOMP 983 static ssize_t seccomp_read(struct file *file, char __user *buf, 984 size_t count, loff_t *ppos) 985 { 986 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 987 char __buf[20]; 988 loff_t __ppos = *ppos; 989 size_t len; 990 991 /* no need to print the trailing zero, so use only len */ 992 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 993 if (__ppos >= len) 994 return 0; 995 if (count > len - __ppos) 996 count = len - __ppos; 997 if (copy_to_user(buf, __buf + __ppos, count)) 998 return -EFAULT; 999 *ppos = __ppos + count; 1000 return count; 1001 } 1002 1003 static ssize_t seccomp_write(struct file *file, const char __user *buf, 1004 size_t count, loff_t *ppos) 1005 { 1006 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 1007 char __buf[20], *end; 1008 unsigned int seccomp_mode; 1009 1010 /* can set it only once to be even more secure */ 1011 if (unlikely(tsk->seccomp.mode)) 1012 return -EPERM; 1013 1014 memset(__buf, 0, sizeof(__buf)); 1015 count = min(count, sizeof(__buf) - 1); 1016 if (copy_from_user(__buf, buf, count)) 1017 return -EFAULT; 1018 seccomp_mode = simple_strtoul(__buf, &end, 0); 1019 if (*end == '\n') 1020 end++; 1021 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 1022 tsk->seccomp.mode = seccomp_mode; 1023 set_tsk_thread_flag(tsk, TIF_SECCOMP); 1024 } else 1025 return -EINVAL; 1026 if (unlikely(!(end - __buf))) 1027 return -EIO; 1028 return end - __buf; 1029 } 1030 1031 static struct file_operations proc_seccomp_operations = { 1032 .read = seccomp_read, 1033 .write = seccomp_write, 1034 }; 1035 #endif /* CONFIG_SECCOMP */ 1036 1037 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1038 { 1039 struct inode *inode = dentry->d_inode; 1040 int error = -EACCES; 1041 1042 /* We don't need a base pointer in the /proc filesystem */ 1043 path_release(nd); 1044 1045 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 1046 goto out; 1047 error = proc_check_root(inode); 1048 if (error) 1049 goto out; 1050 1051 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 1052 nd->last_type = LAST_BIND; 1053 out: 1054 return ERR_PTR(error); 1055 } 1056 1057 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, 1058 char __user *buffer, int buflen) 1059 { 1060 struct inode * inode; 1061 char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; 1062 int len; 1063 1064 if (!tmp) 1065 return -ENOMEM; 1066 1067 inode = dentry->d_inode; 1068 path = d_path(dentry, mnt, tmp, PAGE_SIZE); 1069 len = PTR_ERR(path); 1070 if (IS_ERR(path)) 1071 goto out; 1072 len = tmp + PAGE_SIZE - 1 - path; 1073 1074 if (len > buflen) 1075 len = buflen; 1076 if (copy_to_user(buffer, path, len)) 1077 len = -EFAULT; 1078 out: 1079 free_page((unsigned long)tmp); 1080 return len; 1081 } 1082 1083 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1084 { 1085 int error = -EACCES; 1086 struct inode *inode = dentry->d_inode; 1087 struct dentry *de; 1088 struct vfsmount *mnt = NULL; 1089 1090 lock_kernel(); 1091 1092 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 1093 goto out; 1094 error = proc_check_root(inode); 1095 if (error) 1096 goto out; 1097 1098 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); 1099 if (error) 1100 goto out; 1101 1102 error = do_proc_readlink(de, mnt, buffer, buflen); 1103 dput(de); 1104 mntput(mnt); 1105 out: 1106 unlock_kernel(); 1107 return error; 1108 } 1109 1110 static struct inode_operations proc_pid_link_inode_operations = { 1111 .readlink = proc_pid_readlink, 1112 .follow_link = proc_pid_follow_link 1113 }; 1114 1115 #define NUMBUF 10 1116 1117 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 1118 { 1119 struct inode *inode = filp->f_dentry->d_inode; 1120 struct task_struct *p = proc_task(inode); 1121 unsigned int fd, tid, ino; 1122 int retval; 1123 char buf[NUMBUF]; 1124 struct files_struct * files; 1125 struct fdtable *fdt; 1126 1127 retval = -ENOENT; 1128 if (!pid_alive(p)) 1129 goto out; 1130 retval = 0; 1131 tid = p->pid; 1132 1133 fd = filp->f_pos; 1134 switch (fd) { 1135 case 0: 1136 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 1137 goto out; 1138 filp->f_pos++; 1139 case 1: 1140 ino = fake_ino(tid, PROC_TID_INO); 1141 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1142 goto out; 1143 filp->f_pos++; 1144 default: 1145 files = get_files_struct(p); 1146 if (!files) 1147 goto out; 1148 rcu_read_lock(); 1149 fdt = files_fdtable(files); 1150 for (fd = filp->f_pos-2; 1151 fd < fdt->max_fds; 1152 fd++, filp->f_pos++) { 1153 unsigned int i,j; 1154 1155 if (!fcheck_files(files, fd)) 1156 continue; 1157 rcu_read_unlock(); 1158 1159 j = NUMBUF; 1160 i = fd; 1161 do { 1162 j--; 1163 buf[j] = '0' + (i % 10); 1164 i /= 10; 1165 } while (i); 1166 1167 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1168 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1169 rcu_read_lock(); 1170 break; 1171 } 1172 rcu_read_lock(); 1173 } 1174 rcu_read_unlock(); 1175 put_files_struct(files); 1176 } 1177 out: 1178 return retval; 1179 } 1180 1181 static int proc_pident_readdir(struct file *filp, 1182 void *dirent, filldir_t filldir, 1183 struct pid_entry *ents, unsigned int nents) 1184 { 1185 int i; 1186 int pid; 1187 struct dentry *dentry = filp->f_dentry; 1188 struct inode *inode = dentry->d_inode; 1189 struct pid_entry *p; 1190 ino_t ino; 1191 int ret; 1192 1193 ret = -ENOENT; 1194 if (!pid_alive(proc_task(inode))) 1195 goto out; 1196 1197 ret = 0; 1198 pid = proc_task(inode)->pid; 1199 i = filp->f_pos; 1200 switch (i) { 1201 case 0: 1202 ino = inode->i_ino; 1203 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 1204 goto out; 1205 i++; 1206 filp->f_pos++; 1207 /* fall through */ 1208 case 1: 1209 ino = parent_ino(dentry); 1210 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 1211 goto out; 1212 i++; 1213 filp->f_pos++; 1214 /* fall through */ 1215 default: 1216 i -= 2; 1217 if (i >= nents) { 1218 ret = 1; 1219 goto out; 1220 } 1221 p = ents + i; 1222 while (p->name) { 1223 if (filldir(dirent, p->name, p->len, filp->f_pos, 1224 fake_ino(pid, p->type), p->mode >> 12) < 0) 1225 goto out; 1226 filp->f_pos++; 1227 p++; 1228 } 1229 } 1230 1231 ret = 1; 1232 out: 1233 return ret; 1234 } 1235 1236 static int proc_tgid_base_readdir(struct file * filp, 1237 void * dirent, filldir_t filldir) 1238 { 1239 return proc_pident_readdir(filp,dirent,filldir, 1240 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 1241 } 1242 1243 static int proc_tid_base_readdir(struct file * filp, 1244 void * dirent, filldir_t filldir) 1245 { 1246 return proc_pident_readdir(filp,dirent,filldir, 1247 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 1248 } 1249 1250 /* building an inode */ 1251 1252 static int task_dumpable(struct task_struct *task) 1253 { 1254 int dumpable = 0; 1255 struct mm_struct *mm; 1256 1257 task_lock(task); 1258 mm = task->mm; 1259 if (mm) 1260 dumpable = mm->dumpable; 1261 task_unlock(task); 1262 if(dumpable == 1) 1263 return 1; 1264 return 0; 1265 } 1266 1267 1268 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) 1269 { 1270 struct inode * inode; 1271 struct proc_inode *ei; 1272 1273 /* We need a new inode */ 1274 1275 inode = new_inode(sb); 1276 if (!inode) 1277 goto out; 1278 1279 /* Common stuff */ 1280 ei = PROC_I(inode); 1281 ei->task = NULL; 1282 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1283 inode->i_ino = fake_ino(task->pid, ino); 1284 1285 if (!pid_alive(task)) 1286 goto out_unlock; 1287 1288 /* 1289 * grab the reference to task. 1290 */ 1291 get_task_struct(task); 1292 ei->task = task; 1293 ei->type = ino; 1294 inode->i_uid = 0; 1295 inode->i_gid = 0; 1296 if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { 1297 inode->i_uid = task->euid; 1298 inode->i_gid = task->egid; 1299 } 1300 security_task_to_inode(task, inode); 1301 1302 out: 1303 return inode; 1304 1305 out_unlock: 1306 ei->pde = NULL; 1307 iput(inode); 1308 return NULL; 1309 } 1310 1311 /* dentry stuff */ 1312 1313 /* 1314 * Exceptional case: normally we are not allowed to unhash a busy 1315 * directory. In this case, however, we can do it - no aliasing problems 1316 * due to the way we treat inodes. 1317 * 1318 * Rewrite the inode's ownerships here because the owning task may have 1319 * performed a setuid(), etc. 1320 */ 1321 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1322 { 1323 struct inode *inode = dentry->d_inode; 1324 struct task_struct *task = proc_task(inode); 1325 if (pid_alive(task)) { 1326 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { 1327 inode->i_uid = task->euid; 1328 inode->i_gid = task->egid; 1329 } else { 1330 inode->i_uid = 0; 1331 inode->i_gid = 0; 1332 } 1333 security_task_to_inode(task, inode); 1334 return 1; 1335 } 1336 d_drop(dentry); 1337 return 0; 1338 } 1339 1340 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1341 { 1342 struct inode *inode = dentry->d_inode; 1343 struct task_struct *task = proc_task(inode); 1344 int fd = proc_type(inode) - PROC_TID_FD_DIR; 1345 struct files_struct *files; 1346 1347 files = get_files_struct(task); 1348 if (files) { 1349 rcu_read_lock(); 1350 if (fcheck_files(files, fd)) { 1351 rcu_read_unlock(); 1352 put_files_struct(files); 1353 if (task_dumpable(task)) { 1354 inode->i_uid = task->euid; 1355 inode->i_gid = task->egid; 1356 } else { 1357 inode->i_uid = 0; 1358 inode->i_gid = 0; 1359 } 1360 security_task_to_inode(task, inode); 1361 return 1; 1362 } 1363 rcu_read_unlock(); 1364 put_files_struct(files); 1365 } 1366 d_drop(dentry); 1367 return 0; 1368 } 1369 1370 static void pid_base_iput(struct dentry *dentry, struct inode *inode) 1371 { 1372 struct task_struct *task = proc_task(inode); 1373 spin_lock(&task->proc_lock); 1374 if (task->proc_dentry == dentry) 1375 task->proc_dentry = NULL; 1376 spin_unlock(&task->proc_lock); 1377 iput(inode); 1378 } 1379 1380 static int pid_delete_dentry(struct dentry * dentry) 1381 { 1382 /* Is the task we represent dead? 1383 * If so, then don't put the dentry on the lru list, 1384 * kill it immediately. 1385 */ 1386 return !pid_alive(proc_task(dentry->d_inode)); 1387 } 1388 1389 static struct dentry_operations tid_fd_dentry_operations = 1390 { 1391 .d_revalidate = tid_fd_revalidate, 1392 .d_delete = pid_delete_dentry, 1393 }; 1394 1395 static struct dentry_operations pid_dentry_operations = 1396 { 1397 .d_revalidate = pid_revalidate, 1398 .d_delete = pid_delete_dentry, 1399 }; 1400 1401 static struct dentry_operations pid_base_dentry_operations = 1402 { 1403 .d_revalidate = pid_revalidate, 1404 .d_iput = pid_base_iput, 1405 .d_delete = pid_delete_dentry, 1406 }; 1407 1408 /* Lookups */ 1409 1410 static unsigned name_to_int(struct dentry *dentry) 1411 { 1412 const char *name = dentry->d_name.name; 1413 int len = dentry->d_name.len; 1414 unsigned n = 0; 1415 1416 if (len > 1 && *name == '0') 1417 goto out; 1418 while (len-- > 0) { 1419 unsigned c = *name++ - '0'; 1420 if (c > 9) 1421 goto out; 1422 if (n >= (~0U-9)/10) 1423 goto out; 1424 n *= 10; 1425 n += c; 1426 } 1427 return n; 1428 out: 1429 return ~0U; 1430 } 1431 1432 /* SMP-safe */ 1433 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1434 { 1435 struct task_struct *task = proc_task(dir); 1436 unsigned fd = name_to_int(dentry); 1437 struct file * file; 1438 struct files_struct * files; 1439 struct inode *inode; 1440 struct proc_inode *ei; 1441 1442 if (fd == ~0U) 1443 goto out; 1444 if (!pid_alive(task)) 1445 goto out; 1446 1447 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1448 if (!inode) 1449 goto out; 1450 ei = PROC_I(inode); 1451 files = get_files_struct(task); 1452 if (!files) 1453 goto out_unlock; 1454 inode->i_mode = S_IFLNK; 1455 rcu_read_lock(); 1456 file = fcheck_files(files, fd); 1457 if (!file) 1458 goto out_unlock2; 1459 if (file->f_mode & 1) 1460 inode->i_mode |= S_IRUSR | S_IXUSR; 1461 if (file->f_mode & 2) 1462 inode->i_mode |= S_IWUSR | S_IXUSR; 1463 rcu_read_unlock(); 1464 put_files_struct(files); 1465 inode->i_op = &proc_pid_link_inode_operations; 1466 inode->i_size = 64; 1467 ei->op.proc_get_link = proc_fd_link; 1468 dentry->d_op = &tid_fd_dentry_operations; 1469 d_add(dentry, inode); 1470 return NULL; 1471 1472 out_unlock2: 1473 rcu_read_unlock(); 1474 put_files_struct(files); 1475 out_unlock: 1476 iput(inode); 1477 out: 1478 return ERR_PTR(-ENOENT); 1479 } 1480 1481 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1482 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1483 1484 static struct file_operations proc_fd_operations = { 1485 .read = generic_read_dir, 1486 .readdir = proc_readfd, 1487 }; 1488 1489 static struct file_operations proc_task_operations = { 1490 .read = generic_read_dir, 1491 .readdir = proc_task_readdir, 1492 }; 1493 1494 /* 1495 * proc directories can do almost nothing.. 1496 */ 1497 static struct inode_operations proc_fd_inode_operations = { 1498 .lookup = proc_lookupfd, 1499 .permission = proc_permission, 1500 }; 1501 1502 static struct inode_operations proc_task_inode_operations = { 1503 .lookup = proc_task_lookup, 1504 .permission = proc_task_permission, 1505 }; 1506 1507 #ifdef CONFIG_SECURITY 1508 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 1509 size_t count, loff_t *ppos) 1510 { 1511 struct inode * inode = file->f_dentry->d_inode; 1512 unsigned long page; 1513 ssize_t length; 1514 struct task_struct *task = proc_task(inode); 1515 1516 if (count > PAGE_SIZE) 1517 count = PAGE_SIZE; 1518 if (!(page = __get_free_page(GFP_KERNEL))) 1519 return -ENOMEM; 1520 1521 length = security_getprocattr(task, 1522 (char*)file->f_dentry->d_name.name, 1523 (void*)page, count); 1524 if (length >= 0) 1525 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1526 free_page(page); 1527 return length; 1528 } 1529 1530 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 1531 size_t count, loff_t *ppos) 1532 { 1533 struct inode * inode = file->f_dentry->d_inode; 1534 char *page; 1535 ssize_t length; 1536 struct task_struct *task = proc_task(inode); 1537 1538 if (count > PAGE_SIZE) 1539 count = PAGE_SIZE; 1540 if (*ppos != 0) { 1541 /* No partial writes. */ 1542 return -EINVAL; 1543 } 1544 page = (char*)__get_free_page(GFP_USER); 1545 if (!page) 1546 return -ENOMEM; 1547 length = -EFAULT; 1548 if (copy_from_user(page, buf, count)) 1549 goto out; 1550 1551 length = security_setprocattr(task, 1552 (char*)file->f_dentry->d_name.name, 1553 (void*)page, count); 1554 out: 1555 free_page((unsigned long) page); 1556 return length; 1557 } 1558 1559 static struct file_operations proc_pid_attr_operations = { 1560 .read = proc_pid_attr_read, 1561 .write = proc_pid_attr_write, 1562 }; 1563 1564 static struct file_operations proc_tid_attr_operations; 1565 static struct inode_operations proc_tid_attr_inode_operations; 1566 static struct file_operations proc_tgid_attr_operations; 1567 static struct inode_operations proc_tgid_attr_inode_operations; 1568 #endif 1569 1570 static int get_tid_list(int index, unsigned int *tids, struct inode *dir); 1571 1572 /* SMP-safe */ 1573 static struct dentry *proc_pident_lookup(struct inode *dir, 1574 struct dentry *dentry, 1575 struct pid_entry *ents) 1576 { 1577 struct inode *inode; 1578 int error; 1579 struct task_struct *task = proc_task(dir); 1580 struct pid_entry *p; 1581 struct proc_inode *ei; 1582 1583 error = -ENOENT; 1584 inode = NULL; 1585 1586 if (!pid_alive(task)) 1587 goto out; 1588 1589 for (p = ents; p->name; p++) { 1590 if (p->len != dentry->d_name.len) 1591 continue; 1592 if (!memcmp(dentry->d_name.name, p->name, p->len)) 1593 break; 1594 } 1595 if (!p->name) 1596 goto out; 1597 1598 error = -EINVAL; 1599 inode = proc_pid_make_inode(dir->i_sb, task, p->type); 1600 if (!inode) 1601 goto out; 1602 1603 ei = PROC_I(inode); 1604 inode->i_mode = p->mode; 1605 /* 1606 * Yes, it does not scale. And it should not. Don't add 1607 * new entries into /proc/<tgid>/ without very good reasons. 1608 */ 1609 switch(p->type) { 1610 case PROC_TGID_TASK: 1611 inode->i_nlink = 2 + get_tid_list(2, NULL, dir); 1612 inode->i_op = &proc_task_inode_operations; 1613 inode->i_fop = &proc_task_operations; 1614 break; 1615 case PROC_TID_FD: 1616 case PROC_TGID_FD: 1617 inode->i_nlink = 2; 1618 inode->i_op = &proc_fd_inode_operations; 1619 inode->i_fop = &proc_fd_operations; 1620 break; 1621 case PROC_TID_EXE: 1622 case PROC_TGID_EXE: 1623 inode->i_op = &proc_pid_link_inode_operations; 1624 ei->op.proc_get_link = proc_exe_link; 1625 break; 1626 case PROC_TID_CWD: 1627 case PROC_TGID_CWD: 1628 inode->i_op = &proc_pid_link_inode_operations; 1629 ei->op.proc_get_link = proc_cwd_link; 1630 break; 1631 case PROC_TID_ROOT: 1632 case PROC_TGID_ROOT: 1633 inode->i_op = &proc_pid_link_inode_operations; 1634 ei->op.proc_get_link = proc_root_link; 1635 break; 1636 case PROC_TID_ENVIRON: 1637 case PROC_TGID_ENVIRON: 1638 inode->i_fop = &proc_info_file_operations; 1639 ei->op.proc_read = proc_pid_environ; 1640 break; 1641 case PROC_TID_AUXV: 1642 case PROC_TGID_AUXV: 1643 inode->i_fop = &proc_info_file_operations; 1644 ei->op.proc_read = proc_pid_auxv; 1645 break; 1646 case PROC_TID_STATUS: 1647 case PROC_TGID_STATUS: 1648 inode->i_fop = &proc_info_file_operations; 1649 ei->op.proc_read = proc_pid_status; 1650 break; 1651 case PROC_TID_STAT: 1652 inode->i_fop = &proc_info_file_operations; 1653 ei->op.proc_read = proc_tid_stat; 1654 break; 1655 case PROC_TGID_STAT: 1656 inode->i_fop = &proc_info_file_operations; 1657 ei->op.proc_read = proc_tgid_stat; 1658 break; 1659 case PROC_TID_CMDLINE: 1660 case PROC_TGID_CMDLINE: 1661 inode->i_fop = &proc_info_file_operations; 1662 ei->op.proc_read = proc_pid_cmdline; 1663 break; 1664 case PROC_TID_STATM: 1665 case PROC_TGID_STATM: 1666 inode->i_fop = &proc_info_file_operations; 1667 ei->op.proc_read = proc_pid_statm; 1668 break; 1669 case PROC_TID_MAPS: 1670 case PROC_TGID_MAPS: 1671 inode->i_fop = &proc_maps_operations; 1672 break; 1673 #ifdef CONFIG_NUMA 1674 case PROC_TID_NUMA_MAPS: 1675 case PROC_TGID_NUMA_MAPS: 1676 inode->i_fop = &proc_numa_maps_operations; 1677 break; 1678 #endif 1679 case PROC_TID_MEM: 1680 case PROC_TGID_MEM: 1681 inode->i_op = &proc_mem_inode_operations; 1682 inode->i_fop = &proc_mem_operations; 1683 break; 1684 #ifdef CONFIG_SECCOMP 1685 case PROC_TID_SECCOMP: 1686 case PROC_TGID_SECCOMP: 1687 inode->i_fop = &proc_seccomp_operations; 1688 break; 1689 #endif /* CONFIG_SECCOMP */ 1690 case PROC_TID_MOUNTS: 1691 case PROC_TGID_MOUNTS: 1692 inode->i_fop = &proc_mounts_operations; 1693 break; 1694 #ifdef CONFIG_MMU 1695 case PROC_TID_SMAPS: 1696 case PROC_TGID_SMAPS: 1697 inode->i_fop = &proc_smaps_operations; 1698 break; 1699 #endif 1700 #ifdef CONFIG_SECURITY 1701 case PROC_TID_ATTR: 1702 inode->i_nlink = 2; 1703 inode->i_op = &proc_tid_attr_inode_operations; 1704 inode->i_fop = &proc_tid_attr_operations; 1705 break; 1706 case PROC_TGID_ATTR: 1707 inode->i_nlink = 2; 1708 inode->i_op = &proc_tgid_attr_inode_operations; 1709 inode->i_fop = &proc_tgid_attr_operations; 1710 break; 1711 case PROC_TID_ATTR_CURRENT: 1712 case PROC_TGID_ATTR_CURRENT: 1713 case PROC_TID_ATTR_PREV: 1714 case PROC_TGID_ATTR_PREV: 1715 case PROC_TID_ATTR_EXEC: 1716 case PROC_TGID_ATTR_EXEC: 1717 case PROC_TID_ATTR_FSCREATE: 1718 case PROC_TGID_ATTR_FSCREATE: 1719 inode->i_fop = &proc_pid_attr_operations; 1720 break; 1721 #endif 1722 #ifdef CONFIG_KALLSYMS 1723 case PROC_TID_WCHAN: 1724 case PROC_TGID_WCHAN: 1725 inode->i_fop = &proc_info_file_operations; 1726 ei->op.proc_read = proc_pid_wchan; 1727 break; 1728 #endif 1729 #ifdef CONFIG_SCHEDSTATS 1730 case PROC_TID_SCHEDSTAT: 1731 case PROC_TGID_SCHEDSTAT: 1732 inode->i_fop = &proc_info_file_operations; 1733 ei->op.proc_read = proc_pid_schedstat; 1734 break; 1735 #endif 1736 #ifdef CONFIG_CPUSETS 1737 case PROC_TID_CPUSET: 1738 case PROC_TGID_CPUSET: 1739 inode->i_fop = &proc_cpuset_operations; 1740 break; 1741 #endif 1742 case PROC_TID_OOM_SCORE: 1743 case PROC_TGID_OOM_SCORE: 1744 inode->i_fop = &proc_info_file_operations; 1745 ei->op.proc_read = proc_oom_score; 1746 break; 1747 case PROC_TID_OOM_ADJUST: 1748 case PROC_TGID_OOM_ADJUST: 1749 inode->i_fop = &proc_oom_adjust_operations; 1750 break; 1751 #ifdef CONFIG_AUDITSYSCALL 1752 case PROC_TID_LOGINUID: 1753 case PROC_TGID_LOGINUID: 1754 inode->i_fop = &proc_loginuid_operations; 1755 break; 1756 #endif 1757 default: 1758 printk("procfs: impossible type (%d)",p->type); 1759 iput(inode); 1760 return ERR_PTR(-EINVAL); 1761 } 1762 dentry->d_op = &pid_dentry_operations; 1763 d_add(dentry, inode); 1764 return NULL; 1765 1766 out: 1767 return ERR_PTR(error); 1768 } 1769 1770 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1771 return proc_pident_lookup(dir, dentry, tgid_base_stuff); 1772 } 1773 1774 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1775 return proc_pident_lookup(dir, dentry, tid_base_stuff); 1776 } 1777 1778 static struct file_operations proc_tgid_base_operations = { 1779 .read = generic_read_dir, 1780 .readdir = proc_tgid_base_readdir, 1781 }; 1782 1783 static struct file_operations proc_tid_base_operations = { 1784 .read = generic_read_dir, 1785 .readdir = proc_tid_base_readdir, 1786 }; 1787 1788 static struct inode_operations proc_tgid_base_inode_operations = { 1789 .lookup = proc_tgid_base_lookup, 1790 }; 1791 1792 static struct inode_operations proc_tid_base_inode_operations = { 1793 .lookup = proc_tid_base_lookup, 1794 }; 1795 1796 #ifdef CONFIG_SECURITY 1797 static int proc_tgid_attr_readdir(struct file * filp, 1798 void * dirent, filldir_t filldir) 1799 { 1800 return proc_pident_readdir(filp,dirent,filldir, 1801 tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff)); 1802 } 1803 1804 static int proc_tid_attr_readdir(struct file * filp, 1805 void * dirent, filldir_t filldir) 1806 { 1807 return proc_pident_readdir(filp,dirent,filldir, 1808 tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); 1809 } 1810 1811 static struct file_operations proc_tgid_attr_operations = { 1812 .read = generic_read_dir, 1813 .readdir = proc_tgid_attr_readdir, 1814 }; 1815 1816 static struct file_operations proc_tid_attr_operations = { 1817 .read = generic_read_dir, 1818 .readdir = proc_tid_attr_readdir, 1819 }; 1820 1821 static struct dentry *proc_tgid_attr_lookup(struct inode *dir, 1822 struct dentry *dentry, struct nameidata *nd) 1823 { 1824 return proc_pident_lookup(dir, dentry, tgid_attr_stuff); 1825 } 1826 1827 static struct dentry *proc_tid_attr_lookup(struct inode *dir, 1828 struct dentry *dentry, struct nameidata *nd) 1829 { 1830 return proc_pident_lookup(dir, dentry, tid_attr_stuff); 1831 } 1832 1833 static struct inode_operations proc_tgid_attr_inode_operations = { 1834 .lookup = proc_tgid_attr_lookup, 1835 }; 1836 1837 static struct inode_operations proc_tid_attr_inode_operations = { 1838 .lookup = proc_tid_attr_lookup, 1839 }; 1840 #endif 1841 1842 /* 1843 * /proc/self: 1844 */ 1845 static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 1846 int buflen) 1847 { 1848 char tmp[30]; 1849 sprintf(tmp, "%d", current->tgid); 1850 return vfs_readlink(dentry,buffer,buflen,tmp); 1851 } 1852 1853 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1854 { 1855 char tmp[30]; 1856 sprintf(tmp, "%d", current->tgid); 1857 return ERR_PTR(vfs_follow_link(nd,tmp)); 1858 } 1859 1860 static struct inode_operations proc_self_inode_operations = { 1861 .readlink = proc_self_readlink, 1862 .follow_link = proc_self_follow_link, 1863 }; 1864 1865 /** 1866 * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. 1867 * @p: task that should be flushed. 1868 * 1869 * Drops the /proc/@pid dcache entry from the hash chains. 1870 * 1871 * Dropping /proc/@pid entries and detach_pid must be synchroneous, 1872 * otherwise e.g. /proc/@pid/exe might point to the wrong executable, 1873 * if the pid value is immediately reused. This is enforced by 1874 * - caller must acquire spin_lock(p->proc_lock) 1875 * - must be called before detach_pid() 1876 * - proc_pid_lookup acquires proc_lock, and checks that 1877 * the target is not dead by looking at the attach count 1878 * of PIDTYPE_PID. 1879 */ 1880 1881 struct dentry *proc_pid_unhash(struct task_struct *p) 1882 { 1883 struct dentry *proc_dentry; 1884 1885 proc_dentry = p->proc_dentry; 1886 if (proc_dentry != NULL) { 1887 1888 spin_lock(&dcache_lock); 1889 spin_lock(&proc_dentry->d_lock); 1890 if (!d_unhashed(proc_dentry)) { 1891 dget_locked(proc_dentry); 1892 __d_drop(proc_dentry); 1893 spin_unlock(&proc_dentry->d_lock); 1894 } else { 1895 spin_unlock(&proc_dentry->d_lock); 1896 proc_dentry = NULL; 1897 } 1898 spin_unlock(&dcache_lock); 1899 } 1900 return proc_dentry; 1901 } 1902 1903 /** 1904 * proc_pid_flush - recover memory used by stale /proc/@pid/x entries 1905 * @proc_dentry: directoy to prune. 1906 * 1907 * Shrink the /proc directory that was used by the just killed thread. 1908 */ 1909 1910 void proc_pid_flush(struct dentry *proc_dentry) 1911 { 1912 might_sleep(); 1913 if(proc_dentry != NULL) { 1914 shrink_dcache_parent(proc_dentry); 1915 dput(proc_dentry); 1916 } 1917 } 1918 1919 /* SMP-safe */ 1920 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1921 { 1922 struct task_struct *task; 1923 struct inode *inode; 1924 struct proc_inode *ei; 1925 unsigned tgid; 1926 int died; 1927 1928 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 1929 inode = new_inode(dir->i_sb); 1930 if (!inode) 1931 return ERR_PTR(-ENOMEM); 1932 ei = PROC_I(inode); 1933 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1934 inode->i_ino = fake_ino(0, PROC_TGID_INO); 1935 ei->pde = NULL; 1936 inode->i_mode = S_IFLNK|S_IRWXUGO; 1937 inode->i_uid = inode->i_gid = 0; 1938 inode->i_size = 64; 1939 inode->i_op = &proc_self_inode_operations; 1940 d_add(dentry, inode); 1941 return NULL; 1942 } 1943 tgid = name_to_int(dentry); 1944 if (tgid == ~0U) 1945 goto out; 1946 1947 read_lock(&tasklist_lock); 1948 task = find_task_by_pid(tgid); 1949 if (task) 1950 get_task_struct(task); 1951 read_unlock(&tasklist_lock); 1952 if (!task) 1953 goto out; 1954 1955 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); 1956 1957 1958 if (!inode) { 1959 put_task_struct(task); 1960 goto out; 1961 } 1962 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 1963 inode->i_op = &proc_tgid_base_inode_operations; 1964 inode->i_fop = &proc_tgid_base_operations; 1965 inode->i_flags|=S_IMMUTABLE; 1966 #ifdef CONFIG_SECURITY 1967 inode->i_nlink = 5; 1968 #else 1969 inode->i_nlink = 4; 1970 #endif 1971 1972 dentry->d_op = &pid_base_dentry_operations; 1973 1974 died = 0; 1975 d_add(dentry, inode); 1976 spin_lock(&task->proc_lock); 1977 task->proc_dentry = dentry; 1978 if (!pid_alive(task)) { 1979 dentry = proc_pid_unhash(task); 1980 died = 1; 1981 } 1982 spin_unlock(&task->proc_lock); 1983 1984 put_task_struct(task); 1985 if (died) { 1986 proc_pid_flush(dentry); 1987 goto out; 1988 } 1989 return NULL; 1990 out: 1991 return ERR_PTR(-ENOENT); 1992 } 1993 1994 /* SMP-safe */ 1995 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1996 { 1997 struct task_struct *task; 1998 struct task_struct *leader = proc_task(dir); 1999 struct inode *inode; 2000 unsigned tid; 2001 2002 tid = name_to_int(dentry); 2003 if (tid == ~0U) 2004 goto out; 2005 2006 read_lock(&tasklist_lock); 2007 task = find_task_by_pid(tid); 2008 if (task) 2009 get_task_struct(task); 2010 read_unlock(&tasklist_lock); 2011 if (!task) 2012 goto out; 2013 if (leader->tgid != task->tgid) 2014 goto out_drop_task; 2015 2016 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); 2017 2018 2019 if (!inode) 2020 goto out_drop_task; 2021 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2022 inode->i_op = &proc_tid_base_inode_operations; 2023 inode->i_fop = &proc_tid_base_operations; 2024 inode->i_flags|=S_IMMUTABLE; 2025 #ifdef CONFIG_SECURITY 2026 inode->i_nlink = 4; 2027 #else 2028 inode->i_nlink = 3; 2029 #endif 2030 2031 dentry->d_op = &pid_base_dentry_operations; 2032 2033 d_add(dentry, inode); 2034 2035 put_task_struct(task); 2036 return NULL; 2037 out_drop_task: 2038 put_task_struct(task); 2039 out: 2040 return ERR_PTR(-ENOENT); 2041 } 2042 2043 #define PROC_NUMBUF 10 2044 #define PROC_MAXPIDS 20 2045 2046 /* 2047 * Get a few tgid's to return for filldir - we need to hold the 2048 * tasklist lock while doing this, and we must release it before 2049 * we actually do the filldir itself, so we use a temp buffer.. 2050 */ 2051 static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) 2052 { 2053 struct task_struct *p; 2054 int nr_tgids = 0; 2055 2056 index--; 2057 read_lock(&tasklist_lock); 2058 p = NULL; 2059 if (version) { 2060 p = find_task_by_pid(version); 2061 if (p && !thread_group_leader(p)) 2062 p = NULL; 2063 } 2064 2065 if (p) 2066 index = 0; 2067 else 2068 p = next_task(&init_task); 2069 2070 for ( ; p != &init_task; p = next_task(p)) { 2071 int tgid = p->pid; 2072 if (!pid_alive(p)) 2073 continue; 2074 if (--index >= 0) 2075 continue; 2076 tgids[nr_tgids] = tgid; 2077 nr_tgids++; 2078 if (nr_tgids >= PROC_MAXPIDS) 2079 break; 2080 } 2081 read_unlock(&tasklist_lock); 2082 return nr_tgids; 2083 } 2084 2085 /* 2086 * Get a few tid's to return for filldir - we need to hold the 2087 * tasklist lock while doing this, and we must release it before 2088 * we actually do the filldir itself, so we use a temp buffer.. 2089 */ 2090 static int get_tid_list(int index, unsigned int *tids, struct inode *dir) 2091 { 2092 struct task_struct *leader_task = proc_task(dir); 2093 struct task_struct *task = leader_task; 2094 int nr_tids = 0; 2095 2096 index -= 2; 2097 read_lock(&tasklist_lock); 2098 /* 2099 * The starting point task (leader_task) might be an already 2100 * unlinked task, which cannot be used to access the task-list 2101 * via next_thread(). 2102 */ 2103 if (pid_alive(task)) do { 2104 int tid = task->pid; 2105 2106 if (--index >= 0) 2107 continue; 2108 if (tids != NULL) 2109 tids[nr_tids] = tid; 2110 nr_tids++; 2111 if (nr_tids >= PROC_MAXPIDS) 2112 break; 2113 } while ((task = next_thread(task)) != leader_task); 2114 read_unlock(&tasklist_lock); 2115 return nr_tids; 2116 } 2117 2118 /* for the /proc/ directory itself, after non-process stuff has been done */ 2119 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2120 { 2121 unsigned int tgid_array[PROC_MAXPIDS]; 2122 char buf[PROC_NUMBUF]; 2123 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2124 unsigned int nr_tgids, i; 2125 int next_tgid; 2126 2127 if (!nr) { 2128 ino_t ino = fake_ino(0,PROC_TGID_INO); 2129 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) 2130 return 0; 2131 filp->f_pos++; 2132 nr++; 2133 } 2134 2135 /* f_version caches the tgid value that the last readdir call couldn't 2136 * return. lseek aka telldir automagically resets f_version to 0. 2137 */ 2138 next_tgid = filp->f_version; 2139 filp->f_version = 0; 2140 for (;;) { 2141 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); 2142 if (!nr_tgids) { 2143 /* no more entries ! */ 2144 break; 2145 } 2146 next_tgid = 0; 2147 2148 /* do not use the last found pid, reserve it for next_tgid */ 2149 if (nr_tgids == PROC_MAXPIDS) { 2150 nr_tgids--; 2151 next_tgid = tgid_array[nr_tgids]; 2152 } 2153 2154 for (i=0;i<nr_tgids;i++) { 2155 int tgid = tgid_array[i]; 2156 ino_t ino = fake_ino(tgid,PROC_TGID_INO); 2157 unsigned long j = PROC_NUMBUF; 2158 2159 do 2160 buf[--j] = '0' + (tgid % 10); 2161 while ((tgid /= 10) != 0); 2162 2163 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { 2164 /* returning this tgid failed, save it as the first 2165 * pid for the next readir call */ 2166 filp->f_version = tgid_array[i]; 2167 goto out; 2168 } 2169 filp->f_pos++; 2170 nr++; 2171 } 2172 } 2173 out: 2174 return 0; 2175 } 2176 2177 /* for the /proc/TGID/task/ directories */ 2178 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2179 { 2180 unsigned int tid_array[PROC_MAXPIDS]; 2181 char buf[PROC_NUMBUF]; 2182 unsigned int nr_tids, i; 2183 struct dentry *dentry = filp->f_dentry; 2184 struct inode *inode = dentry->d_inode; 2185 int retval = -ENOENT; 2186 ino_t ino; 2187 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2188 2189 if (!pid_alive(proc_task(inode))) 2190 goto out; 2191 retval = 0; 2192 2193 switch (pos) { 2194 case 0: 2195 ino = inode->i_ino; 2196 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) 2197 goto out; 2198 pos++; 2199 /* fall through */ 2200 case 1: 2201 ino = parent_ino(dentry); 2202 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) 2203 goto out; 2204 pos++; 2205 /* fall through */ 2206 } 2207 2208 nr_tids = get_tid_list(pos, tid_array, inode); 2209 inode->i_nlink = pos + nr_tids; 2210 2211 for (i = 0; i < nr_tids; i++) { 2212 unsigned long j = PROC_NUMBUF; 2213 int tid = tid_array[i]; 2214 2215 ino = fake_ino(tid,PROC_TID_INO); 2216 2217 do 2218 buf[--j] = '0' + (tid % 10); 2219 while ((tid /= 10) != 0); 2220 2221 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) 2222 break; 2223 pos++; 2224 } 2225 out: 2226 filp->f_pos = pos; 2227 return retval; 2228 } 2229