1 /* 2 * linux/fs/proc/base.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * proc base directory handling functions 7 * 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 9 * Instead of using magical inumbers to determine the kind of object 10 * we allocate and fill in-core inodes upon lookup. They don't even 11 * go into icache. We cache the reference to task_struct upon lookup too. 12 * Eventually it should become a filesystem in its own. We don't use the 13 * rest of procfs anymore. 14 */ 15 16 #include <asm/uaccess.h> 17 18 #include <linux/config.h> 19 #include <linux/errno.h> 20 #include <linux/time.h> 21 #include <linux/proc_fs.h> 22 #include <linux/stat.h> 23 #include <linux/init.h> 24 #include <linux/file.h> 25 #include <linux/string.h> 26 #include <linux/seq_file.h> 27 #include <linux/namei.h> 28 #include <linux/namespace.h> 29 #include <linux/mm.h> 30 #include <linux/smp_lock.h> 31 #include <linux/kallsyms.h> 32 #include <linux/mount.h> 33 #include <linux/security.h> 34 #include <linux/ptrace.h> 35 #include <linux/seccomp.h> 36 #include <linux/cpuset.h> 37 #include <linux/audit.h> 38 #include "internal.h" 39 40 /* 41 * For hysterical raisins we keep the same inumbers as in the old procfs. 42 * Feel free to change the macro below - just keep the range distinct from 43 * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). 44 * As soon as we'll get a separate superblock we will be able to forget 45 * about magical ranges too. 46 */ 47 48 #define fake_ino(pid,ino) (((pid)<<16)|(ino)) 49 50 enum pid_directory_inos { 51 PROC_TGID_INO = 2, 52 PROC_TGID_TASK, 53 PROC_TGID_STATUS, 54 PROC_TGID_MEM, 55 #ifdef CONFIG_SECCOMP 56 PROC_TGID_SECCOMP, 57 #endif 58 PROC_TGID_CWD, 59 PROC_TGID_ROOT, 60 PROC_TGID_EXE, 61 PROC_TGID_FD, 62 PROC_TGID_ENVIRON, 63 PROC_TGID_AUXV, 64 PROC_TGID_CMDLINE, 65 PROC_TGID_STAT, 66 PROC_TGID_STATM, 67 PROC_TGID_MAPS, 68 PROC_TGID_MOUNTS, 69 PROC_TGID_WCHAN, 70 #ifdef CONFIG_SCHEDSTATS 71 PROC_TGID_SCHEDSTAT, 72 #endif 73 #ifdef CONFIG_CPUSETS 74 PROC_TGID_CPUSET, 75 #endif 76 #ifdef CONFIG_SECURITY 77 PROC_TGID_ATTR, 78 PROC_TGID_ATTR_CURRENT, 79 PROC_TGID_ATTR_PREV, 80 PROC_TGID_ATTR_EXEC, 81 PROC_TGID_ATTR_FSCREATE, 82 #endif 83 #ifdef CONFIG_AUDITSYSCALL 84 PROC_TGID_LOGINUID, 85 #endif 86 PROC_TGID_FD_DIR, 87 PROC_TGID_OOM_SCORE, 88 PROC_TGID_OOM_ADJUST, 89 PROC_TID_INO, 90 PROC_TID_STATUS, 91 PROC_TID_MEM, 92 #ifdef CONFIG_SECCOMP 93 PROC_TID_SECCOMP, 94 #endif 95 PROC_TID_CWD, 96 PROC_TID_ROOT, 97 PROC_TID_EXE, 98 PROC_TID_FD, 99 PROC_TID_ENVIRON, 100 PROC_TID_AUXV, 101 PROC_TID_CMDLINE, 102 PROC_TID_STAT, 103 PROC_TID_STATM, 104 PROC_TID_MAPS, 105 PROC_TID_MOUNTS, 106 PROC_TID_WCHAN, 107 #ifdef CONFIG_SCHEDSTATS 108 PROC_TID_SCHEDSTAT, 109 #endif 110 #ifdef CONFIG_CPUSETS 111 PROC_TID_CPUSET, 112 #endif 113 #ifdef CONFIG_SECURITY 114 PROC_TID_ATTR, 115 PROC_TID_ATTR_CURRENT, 116 PROC_TID_ATTR_PREV, 117 PROC_TID_ATTR_EXEC, 118 PROC_TID_ATTR_FSCREATE, 119 #endif 120 #ifdef CONFIG_AUDITSYSCALL 121 PROC_TID_LOGINUID, 122 #endif 123 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ 124 PROC_TID_OOM_SCORE, 125 PROC_TID_OOM_ADJUST, 126 }; 127 128 struct pid_entry { 129 int type; 130 int len; 131 char *name; 132 mode_t mode; 133 }; 134 135 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} 136 137 static struct pid_entry tgid_base_stuff[] = { 138 E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), 139 E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 140 E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), 141 E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), 142 E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO), 143 E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 144 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), 145 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), 146 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), 147 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 148 #ifdef CONFIG_SECCOMP 149 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 150 #endif 151 E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 152 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), 153 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), 154 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 155 #ifdef CONFIG_SECURITY 156 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 157 #endif 158 #ifdef CONFIG_KALLSYMS 159 E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), 160 #endif 161 #ifdef CONFIG_SCHEDSTATS 162 E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), 163 #endif 164 #ifdef CONFIG_CPUSETS 165 E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 166 #endif 167 E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 168 E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 169 #ifdef CONFIG_AUDITSYSCALL 170 E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 171 #endif 172 {0,0,NULL,0} 173 }; 174 static struct pid_entry tid_base_stuff[] = { 175 E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 176 E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR), 177 E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR), 178 E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO), 179 E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 180 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), 181 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), 182 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), 183 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 184 #ifdef CONFIG_SECCOMP 185 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 186 #endif 187 E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 188 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), 189 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), 190 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 191 #ifdef CONFIG_SECURITY 192 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 193 #endif 194 #ifdef CONFIG_KALLSYMS 195 E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), 196 #endif 197 #ifdef CONFIG_SCHEDSTATS 198 E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), 199 #endif 200 #ifdef CONFIG_CPUSETS 201 E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 202 #endif 203 E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 204 E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 205 #ifdef CONFIG_AUDITSYSCALL 206 E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 207 #endif 208 {0,0,NULL,0} 209 }; 210 211 #ifdef CONFIG_SECURITY 212 static struct pid_entry tgid_attr_stuff[] = { 213 E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 214 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 215 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 216 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 217 {0,0,NULL,0} 218 }; 219 static struct pid_entry tid_attr_stuff[] = { 220 E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 221 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 222 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 223 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 224 {0,0,NULL,0} 225 }; 226 #endif 227 228 #undef E 229 230 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 231 { 232 struct task_struct *task = proc_task(inode); 233 struct files_struct *files; 234 struct file *file; 235 int fd = proc_type(inode) - PROC_TID_FD_DIR; 236 237 files = get_files_struct(task); 238 if (files) { 239 spin_lock(&files->file_lock); 240 file = fcheck_files(files, fd); 241 if (file) { 242 *mnt = mntget(file->f_vfsmnt); 243 *dentry = dget(file->f_dentry); 244 spin_unlock(&files->file_lock); 245 put_files_struct(files); 246 return 0; 247 } 248 spin_unlock(&files->file_lock); 249 put_files_struct(files); 250 } 251 return -ENOENT; 252 } 253 254 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 255 { 256 struct fs_struct *fs; 257 int result = -ENOENT; 258 task_lock(proc_task(inode)); 259 fs = proc_task(inode)->fs; 260 if(fs) 261 atomic_inc(&fs->count); 262 task_unlock(proc_task(inode)); 263 if (fs) { 264 read_lock(&fs->lock); 265 *mnt = mntget(fs->pwdmnt); 266 *dentry = dget(fs->pwd); 267 read_unlock(&fs->lock); 268 result = 0; 269 put_fs_struct(fs); 270 } 271 return result; 272 } 273 274 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 275 { 276 struct fs_struct *fs; 277 int result = -ENOENT; 278 task_lock(proc_task(inode)); 279 fs = proc_task(inode)->fs; 280 if(fs) 281 atomic_inc(&fs->count); 282 task_unlock(proc_task(inode)); 283 if (fs) { 284 read_lock(&fs->lock); 285 *mnt = mntget(fs->rootmnt); 286 *dentry = dget(fs->root); 287 read_unlock(&fs->lock); 288 result = 0; 289 put_fs_struct(fs); 290 } 291 return result; 292 } 293 294 #define MAY_PTRACE(task) \ 295 (task == current || \ 296 (task->parent == current && \ 297 (task->ptrace & PT_PTRACED) && \ 298 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ 299 security_ptrace(current,task) == 0)) 300 301 static int may_ptrace_attach(struct task_struct *task) 302 { 303 int retval = 0; 304 305 task_lock(task); 306 307 if (!task->mm) 308 goto out; 309 if (((current->uid != task->euid) || 310 (current->uid != task->suid) || 311 (current->uid != task->uid) || 312 (current->gid != task->egid) || 313 (current->gid != task->sgid) || 314 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) 315 goto out; 316 rmb(); 317 if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) 318 goto out; 319 if (security_ptrace(current, task)) 320 goto out; 321 322 retval = 1; 323 out: 324 task_unlock(task); 325 return retval; 326 } 327 328 static int proc_pid_environ(struct task_struct *task, char * buffer) 329 { 330 int res = 0; 331 struct mm_struct *mm = get_task_mm(task); 332 if (mm) { 333 unsigned int len = mm->env_end - mm->env_start; 334 if (len > PAGE_SIZE) 335 len = PAGE_SIZE; 336 res = access_process_vm(task, mm->env_start, buffer, len, 0); 337 if (!may_ptrace_attach(task)) 338 res = -ESRCH; 339 mmput(mm); 340 } 341 return res; 342 } 343 344 static int proc_pid_cmdline(struct task_struct *task, char * buffer) 345 { 346 int res = 0; 347 unsigned int len; 348 struct mm_struct *mm = get_task_mm(task); 349 if (!mm) 350 goto out; 351 if (!mm->arg_end) 352 goto out_mm; /* Shh! No looking before we're done */ 353 354 len = mm->arg_end - mm->arg_start; 355 356 if (len > PAGE_SIZE) 357 len = PAGE_SIZE; 358 359 res = access_process_vm(task, mm->arg_start, buffer, len, 0); 360 361 // If the nul at the end of args has been overwritten, then 362 // assume application is using setproctitle(3). 363 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { 364 len = strnlen(buffer, res); 365 if (len < res) { 366 res = len; 367 } else { 368 len = mm->env_end - mm->env_start; 369 if (len > PAGE_SIZE - res) 370 len = PAGE_SIZE - res; 371 res += access_process_vm(task, mm->env_start, buffer+res, len, 0); 372 res = strnlen(buffer, res); 373 } 374 } 375 out_mm: 376 mmput(mm); 377 out: 378 return res; 379 } 380 381 static int proc_pid_auxv(struct task_struct *task, char *buffer) 382 { 383 int res = 0; 384 struct mm_struct *mm = get_task_mm(task); 385 if (mm) { 386 unsigned int nwords = 0; 387 do 388 nwords += 2; 389 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 390 res = nwords * sizeof(mm->saved_auxv[0]); 391 if (res > PAGE_SIZE) 392 res = PAGE_SIZE; 393 memcpy(buffer, mm->saved_auxv, res); 394 mmput(mm); 395 } 396 return res; 397 } 398 399 400 #ifdef CONFIG_KALLSYMS 401 /* 402 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 403 * Returns the resolved symbol. If that fails, simply return the address. 404 */ 405 static int proc_pid_wchan(struct task_struct *task, char *buffer) 406 { 407 char *modname; 408 const char *sym_name; 409 unsigned long wchan, size, offset; 410 char namebuf[KSYM_NAME_LEN+1]; 411 412 wchan = get_wchan(task); 413 414 sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); 415 if (sym_name) 416 return sprintf(buffer, "%s", sym_name); 417 return sprintf(buffer, "%lu", wchan); 418 } 419 #endif /* CONFIG_KALLSYMS */ 420 421 #ifdef CONFIG_SCHEDSTATS 422 /* 423 * Provides /proc/PID/schedstat 424 */ 425 static int proc_pid_schedstat(struct task_struct *task, char *buffer) 426 { 427 return sprintf(buffer, "%lu %lu %lu\n", 428 task->sched_info.cpu_time, 429 task->sched_info.run_delay, 430 task->sched_info.pcnt); 431 } 432 #endif 433 434 /* The badness from the OOM killer */ 435 unsigned long badness(struct task_struct *p, unsigned long uptime); 436 static int proc_oom_score(struct task_struct *task, char *buffer) 437 { 438 unsigned long points; 439 struct timespec uptime; 440 441 do_posix_clock_monotonic_gettime(&uptime); 442 points = badness(task, uptime.tv_sec); 443 return sprintf(buffer, "%lu\n", points); 444 } 445 446 /************************************************************************/ 447 /* Here the fs part begins */ 448 /************************************************************************/ 449 450 /* permission checks */ 451 452 static int proc_check_root(struct inode *inode) 453 { 454 struct dentry *de, *base, *root; 455 struct vfsmount *our_vfsmnt, *vfsmnt, *mnt; 456 int res = 0; 457 458 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ 459 return -ENOENT; 460 read_lock(¤t->fs->lock); 461 our_vfsmnt = mntget(current->fs->rootmnt); 462 base = dget(current->fs->root); 463 read_unlock(¤t->fs->lock); 464 465 spin_lock(&vfsmount_lock); 466 de = root; 467 mnt = vfsmnt; 468 469 while (vfsmnt != our_vfsmnt) { 470 if (vfsmnt == vfsmnt->mnt_parent) 471 goto out; 472 de = vfsmnt->mnt_mountpoint; 473 vfsmnt = vfsmnt->mnt_parent; 474 } 475 476 if (!is_subdir(de, base)) 477 goto out; 478 spin_unlock(&vfsmount_lock); 479 480 exit: 481 dput(base); 482 mntput(our_vfsmnt); 483 dput(root); 484 mntput(mnt); 485 return res; 486 out: 487 spin_unlock(&vfsmount_lock); 488 res = -EACCES; 489 goto exit; 490 } 491 492 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) 493 { 494 if (generic_permission(inode, mask, NULL) != 0) 495 return -EACCES; 496 return proc_check_root(inode); 497 } 498 499 extern struct seq_operations proc_pid_maps_op; 500 static int maps_open(struct inode *inode, struct file *file) 501 { 502 struct task_struct *task = proc_task(inode); 503 int ret = seq_open(file, &proc_pid_maps_op); 504 if (!ret) { 505 struct seq_file *m = file->private_data; 506 m->private = task; 507 } 508 return ret; 509 } 510 511 static struct file_operations proc_maps_operations = { 512 .open = maps_open, 513 .read = seq_read, 514 .llseek = seq_lseek, 515 .release = seq_release, 516 }; 517 518 extern struct seq_operations mounts_op; 519 static int mounts_open(struct inode *inode, struct file *file) 520 { 521 struct task_struct *task = proc_task(inode); 522 int ret = seq_open(file, &mounts_op); 523 524 if (!ret) { 525 struct seq_file *m = file->private_data; 526 struct namespace *namespace; 527 task_lock(task); 528 namespace = task->namespace; 529 if (namespace) 530 get_namespace(namespace); 531 task_unlock(task); 532 533 if (namespace) 534 m->private = namespace; 535 else { 536 seq_release(inode, file); 537 ret = -EINVAL; 538 } 539 } 540 return ret; 541 } 542 543 static int mounts_release(struct inode *inode, struct file *file) 544 { 545 struct seq_file *m = file->private_data; 546 struct namespace *namespace = m->private; 547 put_namespace(namespace); 548 return seq_release(inode, file); 549 } 550 551 static struct file_operations proc_mounts_operations = { 552 .open = mounts_open, 553 .read = seq_read, 554 .llseek = seq_lseek, 555 .release = mounts_release, 556 }; 557 558 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 559 560 static ssize_t proc_info_read(struct file * file, char __user * buf, 561 size_t count, loff_t *ppos) 562 { 563 struct inode * inode = file->f_dentry->d_inode; 564 unsigned long page; 565 ssize_t length; 566 struct task_struct *task = proc_task(inode); 567 568 if (count > PROC_BLOCK_SIZE) 569 count = PROC_BLOCK_SIZE; 570 if (!(page = __get_free_page(GFP_KERNEL))) 571 return -ENOMEM; 572 573 length = PROC_I(inode)->op.proc_read(task, (char*)page); 574 575 if (length >= 0) 576 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 577 free_page(page); 578 return length; 579 } 580 581 static struct file_operations proc_info_file_operations = { 582 .read = proc_info_read, 583 }; 584 585 static int mem_open(struct inode* inode, struct file* file) 586 { 587 file->private_data = (void*)((long)current->self_exec_id); 588 return 0; 589 } 590 591 static ssize_t mem_read(struct file * file, char __user * buf, 592 size_t count, loff_t *ppos) 593 { 594 struct task_struct *task = proc_task(file->f_dentry->d_inode); 595 char *page; 596 unsigned long src = *ppos; 597 int ret = -ESRCH; 598 struct mm_struct *mm; 599 600 if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) 601 goto out; 602 603 ret = -ENOMEM; 604 page = (char *)__get_free_page(GFP_USER); 605 if (!page) 606 goto out; 607 608 ret = 0; 609 610 mm = get_task_mm(task); 611 if (!mm) 612 goto out_free; 613 614 ret = -EIO; 615 616 if (file->private_data != (void*)((long)current->self_exec_id)) 617 goto out_put; 618 619 ret = 0; 620 621 while (count > 0) { 622 int this_len, retval; 623 624 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 625 retval = access_process_vm(task, src, page, this_len, 0); 626 if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { 627 if (!ret) 628 ret = -EIO; 629 break; 630 } 631 632 if (copy_to_user(buf, page, retval)) { 633 ret = -EFAULT; 634 break; 635 } 636 637 ret += retval; 638 src += retval; 639 buf += retval; 640 count -= retval; 641 } 642 *ppos = src; 643 644 out_put: 645 mmput(mm); 646 out_free: 647 free_page((unsigned long) page); 648 out: 649 return ret; 650 } 651 652 #define mem_write NULL 653 654 #ifndef mem_write 655 /* This is a security hazard */ 656 static ssize_t mem_write(struct file * file, const char * buf, 657 size_t count, loff_t *ppos) 658 { 659 int copied = 0; 660 char *page; 661 struct task_struct *task = proc_task(file->f_dentry->d_inode); 662 unsigned long dst = *ppos; 663 664 if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) 665 return -ESRCH; 666 667 page = (char *)__get_free_page(GFP_USER); 668 if (!page) 669 return -ENOMEM; 670 671 while (count > 0) { 672 int this_len, retval; 673 674 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 675 if (copy_from_user(page, buf, this_len)) { 676 copied = -EFAULT; 677 break; 678 } 679 retval = access_process_vm(task, dst, page, this_len, 1); 680 if (!retval) { 681 if (!copied) 682 copied = -EIO; 683 break; 684 } 685 copied += retval; 686 buf += retval; 687 dst += retval; 688 count -= retval; 689 } 690 *ppos = dst; 691 free_page((unsigned long) page); 692 return copied; 693 } 694 #endif 695 696 static loff_t mem_lseek(struct file * file, loff_t offset, int orig) 697 { 698 switch (orig) { 699 case 0: 700 file->f_pos = offset; 701 break; 702 case 1: 703 file->f_pos += offset; 704 break; 705 default: 706 return -EINVAL; 707 } 708 force_successful_syscall_return(); 709 return file->f_pos; 710 } 711 712 static struct file_operations proc_mem_operations = { 713 .llseek = mem_lseek, 714 .read = mem_read, 715 .write = mem_write, 716 .open = mem_open, 717 }; 718 719 static ssize_t oom_adjust_read(struct file *file, char __user *buf, 720 size_t count, loff_t *ppos) 721 { 722 struct task_struct *task = proc_task(file->f_dentry->d_inode); 723 char buffer[8]; 724 size_t len; 725 int oom_adjust = task->oomkilladj; 726 loff_t __ppos = *ppos; 727 728 len = sprintf(buffer, "%i\n", oom_adjust); 729 if (__ppos >= len) 730 return 0; 731 if (count > len-__ppos) 732 count = len-__ppos; 733 if (copy_to_user(buf, buffer + __ppos, count)) 734 return -EFAULT; 735 *ppos = __ppos + count; 736 return count; 737 } 738 739 static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 740 size_t count, loff_t *ppos) 741 { 742 struct task_struct *task = proc_task(file->f_dentry->d_inode); 743 char buffer[8], *end; 744 int oom_adjust; 745 746 if (!capable(CAP_SYS_RESOURCE)) 747 return -EPERM; 748 memset(buffer, 0, 8); 749 if (count > 6) 750 count = 6; 751 if (copy_from_user(buffer, buf, count)) 752 return -EFAULT; 753 oom_adjust = simple_strtol(buffer, &end, 0); 754 if (oom_adjust < -16 || oom_adjust > 15) 755 return -EINVAL; 756 if (*end == '\n') 757 end++; 758 task->oomkilladj = oom_adjust; 759 if (end - buffer == 0) 760 return -EIO; 761 return end - buffer; 762 } 763 764 static struct file_operations proc_oom_adjust_operations = { 765 .read = oom_adjust_read, 766 .write = oom_adjust_write, 767 }; 768 769 static struct inode_operations proc_mem_inode_operations = { 770 .permission = proc_permission, 771 }; 772 773 #ifdef CONFIG_AUDITSYSCALL 774 #define TMPBUFLEN 21 775 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 776 size_t count, loff_t *ppos) 777 { 778 struct inode * inode = file->f_dentry->d_inode; 779 struct task_struct *task = proc_task(inode); 780 ssize_t length; 781 char tmpbuf[TMPBUFLEN]; 782 783 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 784 audit_get_loginuid(task->audit_context)); 785 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 786 } 787 788 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 789 size_t count, loff_t *ppos) 790 { 791 struct inode * inode = file->f_dentry->d_inode; 792 char *page, *tmp; 793 ssize_t length; 794 struct task_struct *task = proc_task(inode); 795 uid_t loginuid; 796 797 if (!capable(CAP_AUDIT_CONTROL)) 798 return -EPERM; 799 800 if (current != task) 801 return -EPERM; 802 803 if (count > PAGE_SIZE) 804 count = PAGE_SIZE; 805 806 if (*ppos != 0) { 807 /* No partial writes. */ 808 return -EINVAL; 809 } 810 page = (char*)__get_free_page(GFP_USER); 811 if (!page) 812 return -ENOMEM; 813 length = -EFAULT; 814 if (copy_from_user(page, buf, count)) 815 goto out_free_page; 816 817 loginuid = simple_strtoul(page, &tmp, 10); 818 if (tmp == page) { 819 length = -EINVAL; 820 goto out_free_page; 821 822 } 823 length = audit_set_loginuid(task->audit_context, loginuid); 824 if (likely(length == 0)) 825 length = count; 826 827 out_free_page: 828 free_page((unsigned long) page); 829 return length; 830 } 831 832 static struct file_operations proc_loginuid_operations = { 833 .read = proc_loginuid_read, 834 .write = proc_loginuid_write, 835 }; 836 #endif 837 838 #ifdef CONFIG_SECCOMP 839 static ssize_t seccomp_read(struct file *file, char __user *buf, 840 size_t count, loff_t *ppos) 841 { 842 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 843 char __buf[20]; 844 loff_t __ppos = *ppos; 845 size_t len; 846 847 /* no need to print the trailing zero, so use only len */ 848 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 849 if (__ppos >= len) 850 return 0; 851 if (count > len - __ppos) 852 count = len - __ppos; 853 if (copy_to_user(buf, __buf + __ppos, count)) 854 return -EFAULT; 855 *ppos = __ppos + count; 856 return count; 857 } 858 859 static ssize_t seccomp_write(struct file *file, const char __user *buf, 860 size_t count, loff_t *ppos) 861 { 862 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 863 char __buf[20], *end; 864 unsigned int seccomp_mode; 865 866 /* can set it only once to be even more secure */ 867 if (unlikely(tsk->seccomp.mode)) 868 return -EPERM; 869 870 memset(__buf, 0, sizeof(__buf)); 871 count = min(count, sizeof(__buf) - 1); 872 if (copy_from_user(__buf, buf, count)) 873 return -EFAULT; 874 seccomp_mode = simple_strtoul(__buf, &end, 0); 875 if (*end == '\n') 876 end++; 877 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 878 tsk->seccomp.mode = seccomp_mode; 879 set_tsk_thread_flag(tsk, TIF_SECCOMP); 880 } else 881 return -EINVAL; 882 if (unlikely(!(end - __buf))) 883 return -EIO; 884 return end - __buf; 885 } 886 887 static struct file_operations proc_seccomp_operations = { 888 .read = seccomp_read, 889 .write = seccomp_write, 890 }; 891 #endif /* CONFIG_SECCOMP */ 892 893 static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 894 { 895 struct inode *inode = dentry->d_inode; 896 int error = -EACCES; 897 898 /* We don't need a base pointer in the /proc filesystem */ 899 path_release(nd); 900 901 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 902 goto out; 903 error = proc_check_root(inode); 904 if (error) 905 goto out; 906 907 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 908 nd->last_type = LAST_BIND; 909 out: 910 return error; 911 } 912 913 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, 914 char __user *buffer, int buflen) 915 { 916 struct inode * inode; 917 char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; 918 int len; 919 920 if (!tmp) 921 return -ENOMEM; 922 923 inode = dentry->d_inode; 924 path = d_path(dentry, mnt, tmp, PAGE_SIZE); 925 len = PTR_ERR(path); 926 if (IS_ERR(path)) 927 goto out; 928 len = tmp + PAGE_SIZE - 1 - path; 929 930 if (len > buflen) 931 len = buflen; 932 if (copy_to_user(buffer, path, len)) 933 len = -EFAULT; 934 out: 935 free_page((unsigned long)tmp); 936 return len; 937 } 938 939 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 940 { 941 int error = -EACCES; 942 struct inode *inode = dentry->d_inode; 943 struct dentry *de; 944 struct vfsmount *mnt = NULL; 945 946 lock_kernel(); 947 948 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 949 goto out; 950 error = proc_check_root(inode); 951 if (error) 952 goto out; 953 954 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); 955 if (error) 956 goto out; 957 958 error = do_proc_readlink(de, mnt, buffer, buflen); 959 dput(de); 960 mntput(mnt); 961 out: 962 unlock_kernel(); 963 return error; 964 } 965 966 static struct inode_operations proc_pid_link_inode_operations = { 967 .readlink = proc_pid_readlink, 968 .follow_link = proc_pid_follow_link 969 }; 970 971 #define NUMBUF 10 972 973 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 974 { 975 struct inode *inode = filp->f_dentry->d_inode; 976 struct task_struct *p = proc_task(inode); 977 unsigned int fd, tid, ino; 978 int retval; 979 char buf[NUMBUF]; 980 struct files_struct * files; 981 982 retval = -ENOENT; 983 if (!pid_alive(p)) 984 goto out; 985 retval = 0; 986 tid = p->pid; 987 988 fd = filp->f_pos; 989 switch (fd) { 990 case 0: 991 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 992 goto out; 993 filp->f_pos++; 994 case 1: 995 ino = fake_ino(tid, PROC_TID_INO); 996 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 997 goto out; 998 filp->f_pos++; 999 default: 1000 files = get_files_struct(p); 1001 if (!files) 1002 goto out; 1003 spin_lock(&files->file_lock); 1004 for (fd = filp->f_pos-2; 1005 fd < files->max_fds; 1006 fd++, filp->f_pos++) { 1007 unsigned int i,j; 1008 1009 if (!fcheck_files(files, fd)) 1010 continue; 1011 spin_unlock(&files->file_lock); 1012 1013 j = NUMBUF; 1014 i = fd; 1015 do { 1016 j--; 1017 buf[j] = '0' + (i % 10); 1018 i /= 10; 1019 } while (i); 1020 1021 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1022 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1023 spin_lock(&files->file_lock); 1024 break; 1025 } 1026 spin_lock(&files->file_lock); 1027 } 1028 spin_unlock(&files->file_lock); 1029 put_files_struct(files); 1030 } 1031 out: 1032 return retval; 1033 } 1034 1035 static int proc_pident_readdir(struct file *filp, 1036 void *dirent, filldir_t filldir, 1037 struct pid_entry *ents, unsigned int nents) 1038 { 1039 int i; 1040 int pid; 1041 struct dentry *dentry = filp->f_dentry; 1042 struct inode *inode = dentry->d_inode; 1043 struct pid_entry *p; 1044 ino_t ino; 1045 int ret; 1046 1047 ret = -ENOENT; 1048 if (!pid_alive(proc_task(inode))) 1049 goto out; 1050 1051 ret = 0; 1052 pid = proc_task(inode)->pid; 1053 i = filp->f_pos; 1054 switch (i) { 1055 case 0: 1056 ino = inode->i_ino; 1057 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 1058 goto out; 1059 i++; 1060 filp->f_pos++; 1061 /* fall through */ 1062 case 1: 1063 ino = parent_ino(dentry); 1064 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 1065 goto out; 1066 i++; 1067 filp->f_pos++; 1068 /* fall through */ 1069 default: 1070 i -= 2; 1071 if (i >= nents) { 1072 ret = 1; 1073 goto out; 1074 } 1075 p = ents + i; 1076 while (p->name) { 1077 if (filldir(dirent, p->name, p->len, filp->f_pos, 1078 fake_ino(pid, p->type), p->mode >> 12) < 0) 1079 goto out; 1080 filp->f_pos++; 1081 p++; 1082 } 1083 } 1084 1085 ret = 1; 1086 out: 1087 return ret; 1088 } 1089 1090 static int proc_tgid_base_readdir(struct file * filp, 1091 void * dirent, filldir_t filldir) 1092 { 1093 return proc_pident_readdir(filp,dirent,filldir, 1094 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 1095 } 1096 1097 static int proc_tid_base_readdir(struct file * filp, 1098 void * dirent, filldir_t filldir) 1099 { 1100 return proc_pident_readdir(filp,dirent,filldir, 1101 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 1102 } 1103 1104 /* building an inode */ 1105 1106 static int task_dumpable(struct task_struct *task) 1107 { 1108 int dumpable = 0; 1109 struct mm_struct *mm; 1110 1111 task_lock(task); 1112 mm = task->mm; 1113 if (mm) 1114 dumpable = mm->dumpable; 1115 task_unlock(task); 1116 return dumpable; 1117 } 1118 1119 1120 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) 1121 { 1122 struct inode * inode; 1123 struct proc_inode *ei; 1124 1125 /* We need a new inode */ 1126 1127 inode = new_inode(sb); 1128 if (!inode) 1129 goto out; 1130 1131 /* Common stuff */ 1132 ei = PROC_I(inode); 1133 ei->task = NULL; 1134 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1135 inode->i_ino = fake_ino(task->pid, ino); 1136 1137 if (!pid_alive(task)) 1138 goto out_unlock; 1139 1140 /* 1141 * grab the reference to task. 1142 */ 1143 get_task_struct(task); 1144 ei->task = task; 1145 ei->type = ino; 1146 inode->i_uid = 0; 1147 inode->i_gid = 0; 1148 if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { 1149 inode->i_uid = task->euid; 1150 inode->i_gid = task->egid; 1151 } 1152 security_task_to_inode(task, inode); 1153 1154 out: 1155 return inode; 1156 1157 out_unlock: 1158 ei->pde = NULL; 1159 iput(inode); 1160 return NULL; 1161 } 1162 1163 /* dentry stuff */ 1164 1165 /* 1166 * Exceptional case: normally we are not allowed to unhash a busy 1167 * directory. In this case, however, we can do it - no aliasing problems 1168 * due to the way we treat inodes. 1169 * 1170 * Rewrite the inode's ownerships here because the owning task may have 1171 * performed a setuid(), etc. 1172 */ 1173 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1174 { 1175 struct inode *inode = dentry->d_inode; 1176 struct task_struct *task = proc_task(inode); 1177 if (pid_alive(task)) { 1178 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { 1179 inode->i_uid = task->euid; 1180 inode->i_gid = task->egid; 1181 } else { 1182 inode->i_uid = 0; 1183 inode->i_gid = 0; 1184 } 1185 security_task_to_inode(task, inode); 1186 return 1; 1187 } 1188 d_drop(dentry); 1189 return 0; 1190 } 1191 1192 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1193 { 1194 struct inode *inode = dentry->d_inode; 1195 struct task_struct *task = proc_task(inode); 1196 int fd = proc_type(inode) - PROC_TID_FD_DIR; 1197 struct files_struct *files; 1198 1199 files = get_files_struct(task); 1200 if (files) { 1201 spin_lock(&files->file_lock); 1202 if (fcheck_files(files, fd)) { 1203 spin_unlock(&files->file_lock); 1204 put_files_struct(files); 1205 if (task_dumpable(task)) { 1206 inode->i_uid = task->euid; 1207 inode->i_gid = task->egid; 1208 } else { 1209 inode->i_uid = 0; 1210 inode->i_gid = 0; 1211 } 1212 security_task_to_inode(task, inode); 1213 return 1; 1214 } 1215 spin_unlock(&files->file_lock); 1216 put_files_struct(files); 1217 } 1218 d_drop(dentry); 1219 return 0; 1220 } 1221 1222 static void pid_base_iput(struct dentry *dentry, struct inode *inode) 1223 { 1224 struct task_struct *task = proc_task(inode); 1225 spin_lock(&task->proc_lock); 1226 if (task->proc_dentry == dentry) 1227 task->proc_dentry = NULL; 1228 spin_unlock(&task->proc_lock); 1229 iput(inode); 1230 } 1231 1232 static int pid_delete_dentry(struct dentry * dentry) 1233 { 1234 /* Is the task we represent dead? 1235 * If so, then don't put the dentry on the lru list, 1236 * kill it immediately. 1237 */ 1238 return !pid_alive(proc_task(dentry->d_inode)); 1239 } 1240 1241 static struct dentry_operations tid_fd_dentry_operations = 1242 { 1243 .d_revalidate = tid_fd_revalidate, 1244 .d_delete = pid_delete_dentry, 1245 }; 1246 1247 static struct dentry_operations pid_dentry_operations = 1248 { 1249 .d_revalidate = pid_revalidate, 1250 .d_delete = pid_delete_dentry, 1251 }; 1252 1253 static struct dentry_operations pid_base_dentry_operations = 1254 { 1255 .d_revalidate = pid_revalidate, 1256 .d_iput = pid_base_iput, 1257 .d_delete = pid_delete_dentry, 1258 }; 1259 1260 /* Lookups */ 1261 1262 static unsigned name_to_int(struct dentry *dentry) 1263 { 1264 const char *name = dentry->d_name.name; 1265 int len = dentry->d_name.len; 1266 unsigned n = 0; 1267 1268 if (len > 1 && *name == '0') 1269 goto out; 1270 while (len-- > 0) { 1271 unsigned c = *name++ - '0'; 1272 if (c > 9) 1273 goto out; 1274 if (n >= (~0U-9)/10) 1275 goto out; 1276 n *= 10; 1277 n += c; 1278 } 1279 return n; 1280 out: 1281 return ~0U; 1282 } 1283 1284 /* SMP-safe */ 1285 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1286 { 1287 struct task_struct *task = proc_task(dir); 1288 unsigned fd = name_to_int(dentry); 1289 struct file * file; 1290 struct files_struct * files; 1291 struct inode *inode; 1292 struct proc_inode *ei; 1293 1294 if (fd == ~0U) 1295 goto out; 1296 if (!pid_alive(task)) 1297 goto out; 1298 1299 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1300 if (!inode) 1301 goto out; 1302 ei = PROC_I(inode); 1303 files = get_files_struct(task); 1304 if (!files) 1305 goto out_unlock; 1306 inode->i_mode = S_IFLNK; 1307 spin_lock(&files->file_lock); 1308 file = fcheck_files(files, fd); 1309 if (!file) 1310 goto out_unlock2; 1311 if (file->f_mode & 1) 1312 inode->i_mode |= S_IRUSR | S_IXUSR; 1313 if (file->f_mode & 2) 1314 inode->i_mode |= S_IWUSR | S_IXUSR; 1315 spin_unlock(&files->file_lock); 1316 put_files_struct(files); 1317 inode->i_op = &proc_pid_link_inode_operations; 1318 inode->i_size = 64; 1319 ei->op.proc_get_link = proc_fd_link; 1320 dentry->d_op = &tid_fd_dentry_operations; 1321 d_add(dentry, inode); 1322 return NULL; 1323 1324 out_unlock2: 1325 spin_unlock(&files->file_lock); 1326 put_files_struct(files); 1327 out_unlock: 1328 iput(inode); 1329 out: 1330 return ERR_PTR(-ENOENT); 1331 } 1332 1333 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1334 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1335 1336 static struct file_operations proc_fd_operations = { 1337 .read = generic_read_dir, 1338 .readdir = proc_readfd, 1339 }; 1340 1341 static struct file_operations proc_task_operations = { 1342 .read = generic_read_dir, 1343 .readdir = proc_task_readdir, 1344 }; 1345 1346 /* 1347 * proc directories can do almost nothing.. 1348 */ 1349 static struct inode_operations proc_fd_inode_operations = { 1350 .lookup = proc_lookupfd, 1351 .permission = proc_permission, 1352 }; 1353 1354 static struct inode_operations proc_task_inode_operations = { 1355 .lookup = proc_task_lookup, 1356 .permission = proc_permission, 1357 }; 1358 1359 #ifdef CONFIG_SECURITY 1360 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 1361 size_t count, loff_t *ppos) 1362 { 1363 struct inode * inode = file->f_dentry->d_inode; 1364 unsigned long page; 1365 ssize_t length; 1366 struct task_struct *task = proc_task(inode); 1367 1368 if (count > PAGE_SIZE) 1369 count = PAGE_SIZE; 1370 if (!(page = __get_free_page(GFP_KERNEL))) 1371 return -ENOMEM; 1372 1373 length = security_getprocattr(task, 1374 (char*)file->f_dentry->d_name.name, 1375 (void*)page, count); 1376 if (length >= 0) 1377 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1378 free_page(page); 1379 return length; 1380 } 1381 1382 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 1383 size_t count, loff_t *ppos) 1384 { 1385 struct inode * inode = file->f_dentry->d_inode; 1386 char *page; 1387 ssize_t length; 1388 struct task_struct *task = proc_task(inode); 1389 1390 if (count > PAGE_SIZE) 1391 count = PAGE_SIZE; 1392 if (*ppos != 0) { 1393 /* No partial writes. */ 1394 return -EINVAL; 1395 } 1396 page = (char*)__get_free_page(GFP_USER); 1397 if (!page) 1398 return -ENOMEM; 1399 length = -EFAULT; 1400 if (copy_from_user(page, buf, count)) 1401 goto out; 1402 1403 length = security_setprocattr(task, 1404 (char*)file->f_dentry->d_name.name, 1405 (void*)page, count); 1406 out: 1407 free_page((unsigned long) page); 1408 return length; 1409 } 1410 1411 static struct file_operations proc_pid_attr_operations = { 1412 .read = proc_pid_attr_read, 1413 .write = proc_pid_attr_write, 1414 }; 1415 1416 static struct file_operations proc_tid_attr_operations; 1417 static struct inode_operations proc_tid_attr_inode_operations; 1418 static struct file_operations proc_tgid_attr_operations; 1419 static struct inode_operations proc_tgid_attr_inode_operations; 1420 #endif 1421 1422 /* SMP-safe */ 1423 static struct dentry *proc_pident_lookup(struct inode *dir, 1424 struct dentry *dentry, 1425 struct pid_entry *ents) 1426 { 1427 struct inode *inode; 1428 int error; 1429 struct task_struct *task = proc_task(dir); 1430 struct pid_entry *p; 1431 struct proc_inode *ei; 1432 1433 error = -ENOENT; 1434 inode = NULL; 1435 1436 if (!pid_alive(task)) 1437 goto out; 1438 1439 for (p = ents; p->name; p++) { 1440 if (p->len != dentry->d_name.len) 1441 continue; 1442 if (!memcmp(dentry->d_name.name, p->name, p->len)) 1443 break; 1444 } 1445 if (!p->name) 1446 goto out; 1447 1448 error = -EINVAL; 1449 inode = proc_pid_make_inode(dir->i_sb, task, p->type); 1450 if (!inode) 1451 goto out; 1452 1453 ei = PROC_I(inode); 1454 inode->i_mode = p->mode; 1455 /* 1456 * Yes, it does not scale. And it should not. Don't add 1457 * new entries into /proc/<tgid>/ without very good reasons. 1458 */ 1459 switch(p->type) { 1460 case PROC_TGID_TASK: 1461 inode->i_nlink = 3; 1462 inode->i_op = &proc_task_inode_operations; 1463 inode->i_fop = &proc_task_operations; 1464 break; 1465 case PROC_TID_FD: 1466 case PROC_TGID_FD: 1467 inode->i_nlink = 2; 1468 inode->i_op = &proc_fd_inode_operations; 1469 inode->i_fop = &proc_fd_operations; 1470 break; 1471 case PROC_TID_EXE: 1472 case PROC_TGID_EXE: 1473 inode->i_op = &proc_pid_link_inode_operations; 1474 ei->op.proc_get_link = proc_exe_link; 1475 break; 1476 case PROC_TID_CWD: 1477 case PROC_TGID_CWD: 1478 inode->i_op = &proc_pid_link_inode_operations; 1479 ei->op.proc_get_link = proc_cwd_link; 1480 break; 1481 case PROC_TID_ROOT: 1482 case PROC_TGID_ROOT: 1483 inode->i_op = &proc_pid_link_inode_operations; 1484 ei->op.proc_get_link = proc_root_link; 1485 break; 1486 case PROC_TID_ENVIRON: 1487 case PROC_TGID_ENVIRON: 1488 inode->i_fop = &proc_info_file_operations; 1489 ei->op.proc_read = proc_pid_environ; 1490 break; 1491 case PROC_TID_AUXV: 1492 case PROC_TGID_AUXV: 1493 inode->i_fop = &proc_info_file_operations; 1494 ei->op.proc_read = proc_pid_auxv; 1495 break; 1496 case PROC_TID_STATUS: 1497 case PROC_TGID_STATUS: 1498 inode->i_fop = &proc_info_file_operations; 1499 ei->op.proc_read = proc_pid_status; 1500 break; 1501 case PROC_TID_STAT: 1502 inode->i_fop = &proc_info_file_operations; 1503 ei->op.proc_read = proc_tid_stat; 1504 break; 1505 case PROC_TGID_STAT: 1506 inode->i_fop = &proc_info_file_operations; 1507 ei->op.proc_read = proc_tgid_stat; 1508 break; 1509 case PROC_TID_CMDLINE: 1510 case PROC_TGID_CMDLINE: 1511 inode->i_fop = &proc_info_file_operations; 1512 ei->op.proc_read = proc_pid_cmdline; 1513 break; 1514 case PROC_TID_STATM: 1515 case PROC_TGID_STATM: 1516 inode->i_fop = &proc_info_file_operations; 1517 ei->op.proc_read = proc_pid_statm; 1518 break; 1519 case PROC_TID_MAPS: 1520 case PROC_TGID_MAPS: 1521 inode->i_fop = &proc_maps_operations; 1522 break; 1523 case PROC_TID_MEM: 1524 case PROC_TGID_MEM: 1525 inode->i_op = &proc_mem_inode_operations; 1526 inode->i_fop = &proc_mem_operations; 1527 break; 1528 #ifdef CONFIG_SECCOMP 1529 case PROC_TID_SECCOMP: 1530 case PROC_TGID_SECCOMP: 1531 inode->i_fop = &proc_seccomp_operations; 1532 break; 1533 #endif /* CONFIG_SECCOMP */ 1534 case PROC_TID_MOUNTS: 1535 case PROC_TGID_MOUNTS: 1536 inode->i_fop = &proc_mounts_operations; 1537 break; 1538 #ifdef CONFIG_SECURITY 1539 case PROC_TID_ATTR: 1540 inode->i_nlink = 2; 1541 inode->i_op = &proc_tid_attr_inode_operations; 1542 inode->i_fop = &proc_tid_attr_operations; 1543 break; 1544 case PROC_TGID_ATTR: 1545 inode->i_nlink = 2; 1546 inode->i_op = &proc_tgid_attr_inode_operations; 1547 inode->i_fop = &proc_tgid_attr_operations; 1548 break; 1549 case PROC_TID_ATTR_CURRENT: 1550 case PROC_TGID_ATTR_CURRENT: 1551 case PROC_TID_ATTR_PREV: 1552 case PROC_TGID_ATTR_PREV: 1553 case PROC_TID_ATTR_EXEC: 1554 case PROC_TGID_ATTR_EXEC: 1555 case PROC_TID_ATTR_FSCREATE: 1556 case PROC_TGID_ATTR_FSCREATE: 1557 inode->i_fop = &proc_pid_attr_operations; 1558 break; 1559 #endif 1560 #ifdef CONFIG_KALLSYMS 1561 case PROC_TID_WCHAN: 1562 case PROC_TGID_WCHAN: 1563 inode->i_fop = &proc_info_file_operations; 1564 ei->op.proc_read = proc_pid_wchan; 1565 break; 1566 #endif 1567 #ifdef CONFIG_SCHEDSTATS 1568 case PROC_TID_SCHEDSTAT: 1569 case PROC_TGID_SCHEDSTAT: 1570 inode->i_fop = &proc_info_file_operations; 1571 ei->op.proc_read = proc_pid_schedstat; 1572 break; 1573 #endif 1574 #ifdef CONFIG_CPUSETS 1575 case PROC_TID_CPUSET: 1576 case PROC_TGID_CPUSET: 1577 inode->i_fop = &proc_cpuset_operations; 1578 break; 1579 #endif 1580 case PROC_TID_OOM_SCORE: 1581 case PROC_TGID_OOM_SCORE: 1582 inode->i_fop = &proc_info_file_operations; 1583 ei->op.proc_read = proc_oom_score; 1584 break; 1585 case PROC_TID_OOM_ADJUST: 1586 case PROC_TGID_OOM_ADJUST: 1587 inode->i_fop = &proc_oom_adjust_operations; 1588 break; 1589 #ifdef CONFIG_AUDITSYSCALL 1590 case PROC_TID_LOGINUID: 1591 case PROC_TGID_LOGINUID: 1592 inode->i_fop = &proc_loginuid_operations; 1593 break; 1594 #endif 1595 default: 1596 printk("procfs: impossible type (%d)",p->type); 1597 iput(inode); 1598 return ERR_PTR(-EINVAL); 1599 } 1600 dentry->d_op = &pid_dentry_operations; 1601 d_add(dentry, inode); 1602 return NULL; 1603 1604 out: 1605 return ERR_PTR(error); 1606 } 1607 1608 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1609 return proc_pident_lookup(dir, dentry, tgid_base_stuff); 1610 } 1611 1612 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1613 return proc_pident_lookup(dir, dentry, tid_base_stuff); 1614 } 1615 1616 static struct file_operations proc_tgid_base_operations = { 1617 .read = generic_read_dir, 1618 .readdir = proc_tgid_base_readdir, 1619 }; 1620 1621 static struct file_operations proc_tid_base_operations = { 1622 .read = generic_read_dir, 1623 .readdir = proc_tid_base_readdir, 1624 }; 1625 1626 static struct inode_operations proc_tgid_base_inode_operations = { 1627 .lookup = proc_tgid_base_lookup, 1628 }; 1629 1630 static struct inode_operations proc_tid_base_inode_operations = { 1631 .lookup = proc_tid_base_lookup, 1632 }; 1633 1634 #ifdef CONFIG_SECURITY 1635 static int proc_tgid_attr_readdir(struct file * filp, 1636 void * dirent, filldir_t filldir) 1637 { 1638 return proc_pident_readdir(filp,dirent,filldir, 1639 tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff)); 1640 } 1641 1642 static int proc_tid_attr_readdir(struct file * filp, 1643 void * dirent, filldir_t filldir) 1644 { 1645 return proc_pident_readdir(filp,dirent,filldir, 1646 tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); 1647 } 1648 1649 static struct file_operations proc_tgid_attr_operations = { 1650 .read = generic_read_dir, 1651 .readdir = proc_tgid_attr_readdir, 1652 }; 1653 1654 static struct file_operations proc_tid_attr_operations = { 1655 .read = generic_read_dir, 1656 .readdir = proc_tid_attr_readdir, 1657 }; 1658 1659 static struct dentry *proc_tgid_attr_lookup(struct inode *dir, 1660 struct dentry *dentry, struct nameidata *nd) 1661 { 1662 return proc_pident_lookup(dir, dentry, tgid_attr_stuff); 1663 } 1664 1665 static struct dentry *proc_tid_attr_lookup(struct inode *dir, 1666 struct dentry *dentry, struct nameidata *nd) 1667 { 1668 return proc_pident_lookup(dir, dentry, tid_attr_stuff); 1669 } 1670 1671 static struct inode_operations proc_tgid_attr_inode_operations = { 1672 .lookup = proc_tgid_attr_lookup, 1673 }; 1674 1675 static struct inode_operations proc_tid_attr_inode_operations = { 1676 .lookup = proc_tid_attr_lookup, 1677 }; 1678 #endif 1679 1680 /* 1681 * /proc/self: 1682 */ 1683 static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 1684 int buflen) 1685 { 1686 char tmp[30]; 1687 sprintf(tmp, "%d", current->tgid); 1688 return vfs_readlink(dentry,buffer,buflen,tmp); 1689 } 1690 1691 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1692 { 1693 char tmp[30]; 1694 sprintf(tmp, "%d", current->tgid); 1695 return vfs_follow_link(nd,tmp); 1696 } 1697 1698 static struct inode_operations proc_self_inode_operations = { 1699 .readlink = proc_self_readlink, 1700 .follow_link = proc_self_follow_link, 1701 }; 1702 1703 /** 1704 * proc_pid_unhash - Unhash /proc/<pid> entry from the dcache. 1705 * @p: task that should be flushed. 1706 * 1707 * Drops the /proc/<pid> dcache entry from the hash chains. 1708 * 1709 * Dropping /proc/<pid> entries and detach_pid must be synchroneous, 1710 * otherwise e.g. /proc/<pid>/exe might point to the wrong executable, 1711 * if the pid value is immediately reused. This is enforced by 1712 * - caller must acquire spin_lock(p->proc_lock) 1713 * - must be called before detach_pid() 1714 * - proc_pid_lookup acquires proc_lock, and checks that 1715 * the target is not dead by looking at the attach count 1716 * of PIDTYPE_PID. 1717 */ 1718 1719 struct dentry *proc_pid_unhash(struct task_struct *p) 1720 { 1721 struct dentry *proc_dentry; 1722 1723 proc_dentry = p->proc_dentry; 1724 if (proc_dentry != NULL) { 1725 1726 spin_lock(&dcache_lock); 1727 spin_lock(&proc_dentry->d_lock); 1728 if (!d_unhashed(proc_dentry)) { 1729 dget_locked(proc_dentry); 1730 __d_drop(proc_dentry); 1731 spin_unlock(&proc_dentry->d_lock); 1732 } else { 1733 spin_unlock(&proc_dentry->d_lock); 1734 proc_dentry = NULL; 1735 } 1736 spin_unlock(&dcache_lock); 1737 } 1738 return proc_dentry; 1739 } 1740 1741 /** 1742 * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries 1743 * @proc_entry: directoy to prune. 1744 * 1745 * Shrink the /proc directory that was used by the just killed thread. 1746 */ 1747 1748 void proc_pid_flush(struct dentry *proc_dentry) 1749 { 1750 might_sleep(); 1751 if(proc_dentry != NULL) { 1752 shrink_dcache_parent(proc_dentry); 1753 dput(proc_dentry); 1754 } 1755 } 1756 1757 /* SMP-safe */ 1758 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1759 { 1760 struct task_struct *task; 1761 struct inode *inode; 1762 struct proc_inode *ei; 1763 unsigned tgid; 1764 int died; 1765 1766 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 1767 inode = new_inode(dir->i_sb); 1768 if (!inode) 1769 return ERR_PTR(-ENOMEM); 1770 ei = PROC_I(inode); 1771 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1772 inode->i_ino = fake_ino(0, PROC_TGID_INO); 1773 ei->pde = NULL; 1774 inode->i_mode = S_IFLNK|S_IRWXUGO; 1775 inode->i_uid = inode->i_gid = 0; 1776 inode->i_size = 64; 1777 inode->i_op = &proc_self_inode_operations; 1778 d_add(dentry, inode); 1779 return NULL; 1780 } 1781 tgid = name_to_int(dentry); 1782 if (tgid == ~0U) 1783 goto out; 1784 1785 read_lock(&tasklist_lock); 1786 task = find_task_by_pid(tgid); 1787 if (task) 1788 get_task_struct(task); 1789 read_unlock(&tasklist_lock); 1790 if (!task) 1791 goto out; 1792 1793 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); 1794 1795 1796 if (!inode) { 1797 put_task_struct(task); 1798 goto out; 1799 } 1800 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 1801 inode->i_op = &proc_tgid_base_inode_operations; 1802 inode->i_fop = &proc_tgid_base_operations; 1803 inode->i_nlink = 3; 1804 inode->i_flags|=S_IMMUTABLE; 1805 1806 dentry->d_op = &pid_base_dentry_operations; 1807 1808 died = 0; 1809 d_add(dentry, inode); 1810 spin_lock(&task->proc_lock); 1811 task->proc_dentry = dentry; 1812 if (!pid_alive(task)) { 1813 dentry = proc_pid_unhash(task); 1814 died = 1; 1815 } 1816 spin_unlock(&task->proc_lock); 1817 1818 put_task_struct(task); 1819 if (died) { 1820 proc_pid_flush(dentry); 1821 goto out; 1822 } 1823 return NULL; 1824 out: 1825 return ERR_PTR(-ENOENT); 1826 } 1827 1828 /* SMP-safe */ 1829 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1830 { 1831 struct task_struct *task; 1832 struct task_struct *leader = proc_task(dir); 1833 struct inode *inode; 1834 unsigned tid; 1835 1836 tid = name_to_int(dentry); 1837 if (tid == ~0U) 1838 goto out; 1839 1840 read_lock(&tasklist_lock); 1841 task = find_task_by_pid(tid); 1842 if (task) 1843 get_task_struct(task); 1844 read_unlock(&tasklist_lock); 1845 if (!task) 1846 goto out; 1847 if (leader->tgid != task->tgid) 1848 goto out_drop_task; 1849 1850 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); 1851 1852 1853 if (!inode) 1854 goto out_drop_task; 1855 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 1856 inode->i_op = &proc_tid_base_inode_operations; 1857 inode->i_fop = &proc_tid_base_operations; 1858 inode->i_nlink = 3; 1859 inode->i_flags|=S_IMMUTABLE; 1860 1861 dentry->d_op = &pid_base_dentry_operations; 1862 1863 d_add(dentry, inode); 1864 1865 put_task_struct(task); 1866 return NULL; 1867 out_drop_task: 1868 put_task_struct(task); 1869 out: 1870 return ERR_PTR(-ENOENT); 1871 } 1872 1873 #define PROC_NUMBUF 10 1874 #define PROC_MAXPIDS 20 1875 1876 /* 1877 * Get a few tgid's to return for filldir - we need to hold the 1878 * tasklist lock while doing this, and we must release it before 1879 * we actually do the filldir itself, so we use a temp buffer.. 1880 */ 1881 static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) 1882 { 1883 struct task_struct *p; 1884 int nr_tgids = 0; 1885 1886 index--; 1887 read_lock(&tasklist_lock); 1888 p = NULL; 1889 if (version) { 1890 p = find_task_by_pid(version); 1891 if (p && !thread_group_leader(p)) 1892 p = NULL; 1893 } 1894 1895 if (p) 1896 index = 0; 1897 else 1898 p = next_task(&init_task); 1899 1900 for ( ; p != &init_task; p = next_task(p)) { 1901 int tgid = p->pid; 1902 if (!pid_alive(p)) 1903 continue; 1904 if (--index >= 0) 1905 continue; 1906 tgids[nr_tgids] = tgid; 1907 nr_tgids++; 1908 if (nr_tgids >= PROC_MAXPIDS) 1909 break; 1910 } 1911 read_unlock(&tasklist_lock); 1912 return nr_tgids; 1913 } 1914 1915 /* 1916 * Get a few tid's to return for filldir - we need to hold the 1917 * tasklist lock while doing this, and we must release it before 1918 * we actually do the filldir itself, so we use a temp buffer.. 1919 */ 1920 static int get_tid_list(int index, unsigned int *tids, struct inode *dir) 1921 { 1922 struct task_struct *leader_task = proc_task(dir); 1923 struct task_struct *task = leader_task; 1924 int nr_tids = 0; 1925 1926 index -= 2; 1927 read_lock(&tasklist_lock); 1928 /* 1929 * The starting point task (leader_task) might be an already 1930 * unlinked task, which cannot be used to access the task-list 1931 * via next_thread(). 1932 */ 1933 if (pid_alive(task)) do { 1934 int tid = task->pid; 1935 1936 if (--index >= 0) 1937 continue; 1938 tids[nr_tids] = tid; 1939 nr_tids++; 1940 if (nr_tids >= PROC_MAXPIDS) 1941 break; 1942 } while ((task = next_thread(task)) != leader_task); 1943 read_unlock(&tasklist_lock); 1944 return nr_tids; 1945 } 1946 1947 /* for the /proc/ directory itself, after non-process stuff has been done */ 1948 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 1949 { 1950 unsigned int tgid_array[PROC_MAXPIDS]; 1951 char buf[PROC_NUMBUF]; 1952 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 1953 unsigned int nr_tgids, i; 1954 int next_tgid; 1955 1956 if (!nr) { 1957 ino_t ino = fake_ino(0,PROC_TGID_INO); 1958 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) 1959 return 0; 1960 filp->f_pos++; 1961 nr++; 1962 } 1963 1964 /* f_version caches the tgid value that the last readdir call couldn't 1965 * return. lseek aka telldir automagically resets f_version to 0. 1966 */ 1967 next_tgid = filp->f_version; 1968 filp->f_version = 0; 1969 for (;;) { 1970 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); 1971 if (!nr_tgids) { 1972 /* no more entries ! */ 1973 break; 1974 } 1975 next_tgid = 0; 1976 1977 /* do not use the last found pid, reserve it for next_tgid */ 1978 if (nr_tgids == PROC_MAXPIDS) { 1979 nr_tgids--; 1980 next_tgid = tgid_array[nr_tgids]; 1981 } 1982 1983 for (i=0;i<nr_tgids;i++) { 1984 int tgid = tgid_array[i]; 1985 ino_t ino = fake_ino(tgid,PROC_TGID_INO); 1986 unsigned long j = PROC_NUMBUF; 1987 1988 do 1989 buf[--j] = '0' + (tgid % 10); 1990 while ((tgid /= 10) != 0); 1991 1992 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { 1993 /* returning this tgid failed, save it as the first 1994 * pid for the next readir call */ 1995 filp->f_version = tgid_array[i]; 1996 goto out; 1997 } 1998 filp->f_pos++; 1999 nr++; 2000 } 2001 } 2002 out: 2003 return 0; 2004 } 2005 2006 /* for the /proc/TGID/task/ directories */ 2007 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2008 { 2009 unsigned int tid_array[PROC_MAXPIDS]; 2010 char buf[PROC_NUMBUF]; 2011 unsigned int nr_tids, i; 2012 struct dentry *dentry = filp->f_dentry; 2013 struct inode *inode = dentry->d_inode; 2014 int retval = -ENOENT; 2015 ino_t ino; 2016 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2017 2018 if (!pid_alive(proc_task(inode))) 2019 goto out; 2020 retval = 0; 2021 2022 switch (pos) { 2023 case 0: 2024 ino = inode->i_ino; 2025 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) 2026 goto out; 2027 pos++; 2028 /* fall through */ 2029 case 1: 2030 ino = parent_ino(dentry); 2031 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) 2032 goto out; 2033 pos++; 2034 /* fall through */ 2035 } 2036 2037 nr_tids = get_tid_list(pos, tid_array, inode); 2038 2039 for (i = 0; i < nr_tids; i++) { 2040 unsigned long j = PROC_NUMBUF; 2041 int tid = tid_array[i]; 2042 2043 ino = fake_ino(tid,PROC_TID_INO); 2044 2045 do 2046 buf[--j] = '0' + (tid % 10); 2047 while ((tid /= 10) != 0); 2048 2049 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) 2050 break; 2051 pos++; 2052 } 2053 out: 2054 filp->f_pos = pos; 2055 return retval; 2056 } 2057