1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/time.h> 13 #include <linux/proc_fs.h> 14 #include <linux/stat.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 #include <linux/smp_lock.h> 18 #include <linux/init.h> 19 #include <linux/idr.h> 20 #include <linux/namei.h> 21 #include <linux/bitops.h> 22 #include <linux/spinlock.h> 23 #include <asm/uaccess.h> 24 25 #include "internal.h" 26 27 static ssize_t proc_file_read(struct file *file, char __user *buf, 28 size_t nbytes, loff_t *ppos); 29 static ssize_t proc_file_write(struct file *file, const char __user *buffer, 30 size_t count, loff_t *ppos); 31 static loff_t proc_file_lseek(struct file *, loff_t, int); 32 33 DEFINE_SPINLOCK(proc_subdir_lock); 34 35 static int proc_match(int len, const char *name, struct proc_dir_entry *de) 36 { 37 if (de->namelen != len) 38 return 0; 39 return !memcmp(name, de->name, len); 40 } 41 42 static const struct file_operations proc_file_operations = { 43 .llseek = proc_file_lseek, 44 .read = proc_file_read, 45 .write = proc_file_write, 46 }; 47 48 /* buffer size is one page but our output routines use some slack for overruns */ 49 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 50 51 static ssize_t 52 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 53 loff_t *ppos) 54 { 55 struct inode * inode = file->f_path.dentry->d_inode; 56 char *page; 57 ssize_t retval=0; 58 int eof=0; 59 ssize_t n, count; 60 char *start; 61 struct proc_dir_entry * dp; 62 unsigned long long pos; 63 64 /* 65 * Gaah, please just use "seq_file" instead. The legacy /proc 66 * interfaces cut loff_t down to off_t for reads, and ignore 67 * the offset entirely for writes.. 68 */ 69 pos = *ppos; 70 if (pos > MAX_NON_LFS) 71 return 0; 72 if (nbytes > MAX_NON_LFS - pos) 73 nbytes = MAX_NON_LFS - pos; 74 75 dp = PDE(inode); 76 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 77 return -ENOMEM; 78 79 while ((nbytes > 0) && !eof) { 80 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 81 82 start = NULL; 83 if (dp->get_info) { 84 /* Handle old net routines */ 85 n = dp->get_info(page, &start, *ppos, count); 86 if (n < count) 87 eof = 1; 88 } else if (dp->read_proc) { 89 /* 90 * How to be a proc read function 91 * ------------------------------ 92 * Prototype: 93 * int f(char *buffer, char **start, off_t offset, 94 * int count, int *peof, void *dat) 95 * 96 * Assume that the buffer is "count" bytes in size. 97 * 98 * If you know you have supplied all the data you 99 * have, set *peof. 100 * 101 * You have three ways to return data: 102 * 0) Leave *start = NULL. (This is the default.) 103 * Put the data of the requested offset at that 104 * offset within the buffer. Return the number (n) 105 * of bytes there are from the beginning of the 106 * buffer up to the last byte of data. If the 107 * number of supplied bytes (= n - offset) is 108 * greater than zero and you didn't signal eof 109 * and the reader is prepared to take more data 110 * you will be called again with the requested 111 * offset advanced by the number of bytes 112 * absorbed. This interface is useful for files 113 * no larger than the buffer. 114 * 1) Set *start = an unsigned long value less than 115 * the buffer address but greater than zero. 116 * Put the data of the requested offset at the 117 * beginning of the buffer. Return the number of 118 * bytes of data placed there. If this number is 119 * greater than zero and you didn't signal eof 120 * and the reader is prepared to take more data 121 * you will be called again with the requested 122 * offset advanced by *start. This interface is 123 * useful when you have a large file consisting 124 * of a series of blocks which you want to count 125 * and return as wholes. 126 * (Hack by Paul.Russell@rustcorp.com.au) 127 * 2) Set *start = an address within the buffer. 128 * Put the data of the requested offset at *start. 129 * Return the number of bytes of data placed there. 130 * If this number is greater than zero and you 131 * didn't signal eof and the reader is prepared to 132 * take more data you will be called again with the 133 * requested offset advanced by the number of bytes 134 * absorbed. 135 */ 136 n = dp->read_proc(page, &start, *ppos, 137 count, &eof, dp->data); 138 } else 139 break; 140 141 if (n == 0) /* end of file */ 142 break; 143 if (n < 0) { /* error */ 144 if (retval == 0) 145 retval = n; 146 break; 147 } 148 149 if (start == NULL) { 150 if (n > PAGE_SIZE) { 151 printk(KERN_ERR 152 "proc_file_read: Apparent buffer overflow!\n"); 153 n = PAGE_SIZE; 154 } 155 n -= *ppos; 156 if (n <= 0) 157 break; 158 if (n > count) 159 n = count; 160 start = page + *ppos; 161 } else if (start < page) { 162 if (n > PAGE_SIZE) { 163 printk(KERN_ERR 164 "proc_file_read: Apparent buffer overflow!\n"); 165 n = PAGE_SIZE; 166 } 167 if (n > count) { 168 /* 169 * Don't reduce n because doing so might 170 * cut off part of a data block. 171 */ 172 printk(KERN_WARNING 173 "proc_file_read: Read count exceeded\n"); 174 } 175 } else /* start >= page */ { 176 unsigned long startoff = (unsigned long)(start - page); 177 if (n > (PAGE_SIZE - startoff)) { 178 printk(KERN_ERR 179 "proc_file_read: Apparent buffer overflow!\n"); 180 n = PAGE_SIZE - startoff; 181 } 182 if (n > count) 183 n = count; 184 } 185 186 n -= copy_to_user(buf, start < page ? page : start, n); 187 if (n == 0) { 188 if (retval == 0) 189 retval = -EFAULT; 190 break; 191 } 192 193 *ppos += start < page ? (unsigned long)start : n; 194 nbytes -= n; 195 buf += n; 196 retval += n; 197 } 198 free_page((unsigned long) page); 199 return retval; 200 } 201 202 static ssize_t 203 proc_file_write(struct file *file, const char __user *buffer, 204 size_t count, loff_t *ppos) 205 { 206 struct inode *inode = file->f_path.dentry->d_inode; 207 struct proc_dir_entry * dp; 208 209 dp = PDE(inode); 210 211 if (!dp->write_proc) 212 return -EIO; 213 214 /* FIXME: does this routine need ppos? probably... */ 215 return dp->write_proc(file, buffer, count, dp->data); 216 } 217 218 219 static loff_t 220 proc_file_lseek(struct file *file, loff_t offset, int orig) 221 { 222 loff_t retval = -EINVAL; 223 switch (orig) { 224 case 1: 225 offset += file->f_pos; 226 /* fallthrough */ 227 case 0: 228 if (offset < 0 || offset > MAX_NON_LFS) 229 break; 230 file->f_pos = retval = offset; 231 } 232 return retval; 233 } 234 235 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 236 { 237 struct inode *inode = dentry->d_inode; 238 struct proc_dir_entry *de = PDE(inode); 239 int error; 240 241 error = inode_change_ok(inode, iattr); 242 if (error) 243 goto out; 244 245 error = inode_setattr(inode, iattr); 246 if (error) 247 goto out; 248 249 de->uid = inode->i_uid; 250 de->gid = inode->i_gid; 251 de->mode = inode->i_mode; 252 out: 253 return error; 254 } 255 256 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 257 struct kstat *stat) 258 { 259 struct inode *inode = dentry->d_inode; 260 struct proc_dir_entry *de = PROC_I(inode)->pde; 261 if (de && de->nlink) 262 inode->i_nlink = de->nlink; 263 264 generic_fillattr(inode, stat); 265 return 0; 266 } 267 268 static const struct inode_operations proc_file_inode_operations = { 269 .setattr = proc_notify_change, 270 }; 271 272 /* 273 * This function parses a name such as "tty/driver/serial", and 274 * returns the struct proc_dir_entry for "/proc/tty/driver", and 275 * returns "serial" in residual. 276 */ 277 static int xlate_proc_name(const char *name, 278 struct proc_dir_entry **ret, const char **residual) 279 { 280 const char *cp = name, *next; 281 struct proc_dir_entry *de; 282 int len; 283 int rtn = 0; 284 285 spin_lock(&proc_subdir_lock); 286 de = &proc_root; 287 while (1) { 288 next = strchr(cp, '/'); 289 if (!next) 290 break; 291 292 len = next - cp; 293 for (de = de->subdir; de ; de = de->next) { 294 if (proc_match(len, cp, de)) 295 break; 296 } 297 if (!de) { 298 rtn = -ENOENT; 299 goto out; 300 } 301 cp += len + 1; 302 } 303 *residual = cp; 304 *ret = de; 305 out: 306 spin_unlock(&proc_subdir_lock); 307 return rtn; 308 } 309 310 static DEFINE_IDR(proc_inum_idr); 311 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 312 313 #define PROC_DYNAMIC_FIRST 0xF0000000UL 314 315 /* 316 * Return an inode number between PROC_DYNAMIC_FIRST and 317 * 0xffffffff, or zero on failure. 318 */ 319 static unsigned int get_inode_number(void) 320 { 321 int i, inum = 0; 322 int error; 323 324 retry: 325 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 326 return 0; 327 328 spin_lock(&proc_inum_lock); 329 error = idr_get_new(&proc_inum_idr, NULL, &i); 330 spin_unlock(&proc_inum_lock); 331 if (error == -EAGAIN) 332 goto retry; 333 else if (error) 334 return 0; 335 336 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 337 338 /* inum will never be more than 0xf0ffffff, so no check 339 * for overflow. 340 */ 341 342 return inum; 343 } 344 345 static void release_inode_number(unsigned int inum) 346 { 347 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 348 349 spin_lock(&proc_inum_lock); 350 idr_remove(&proc_inum_idr, id); 351 spin_unlock(&proc_inum_lock); 352 } 353 354 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 355 { 356 nd_set_link(nd, PDE(dentry->d_inode)->data); 357 return NULL; 358 } 359 360 static const struct inode_operations proc_link_inode_operations = { 361 .readlink = generic_readlink, 362 .follow_link = proc_follow_link, 363 }; 364 365 /* 366 * As some entries in /proc are volatile, we want to 367 * get rid of unused dentries. This could be made 368 * smarter: we could keep a "volatile" flag in the 369 * inode to indicate which ones to keep. 370 */ 371 static int proc_delete_dentry(struct dentry * dentry) 372 { 373 return 1; 374 } 375 376 static struct dentry_operations proc_dentry_operations = 377 { 378 .d_delete = proc_delete_dentry, 379 }; 380 381 /* 382 * Don't create negative dentries here, return -ENOENT by hand 383 * instead. 384 */ 385 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 386 { 387 struct inode *inode = NULL; 388 struct proc_dir_entry * de; 389 int error = -ENOENT; 390 391 lock_kernel(); 392 spin_lock(&proc_subdir_lock); 393 de = PDE(dir); 394 if (de) { 395 for (de = de->subdir; de ; de = de->next) { 396 if (de->namelen != dentry->d_name.len) 397 continue; 398 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 399 unsigned int ino = de->low_ino; 400 401 de_get(de); 402 spin_unlock(&proc_subdir_lock); 403 error = -EINVAL; 404 inode = proc_get_inode(dir->i_sb, ino, de); 405 spin_lock(&proc_subdir_lock); 406 break; 407 } 408 } 409 } 410 spin_unlock(&proc_subdir_lock); 411 unlock_kernel(); 412 413 if (inode) { 414 dentry->d_op = &proc_dentry_operations; 415 d_add(dentry, inode); 416 return NULL; 417 } 418 de_put(de); 419 return ERR_PTR(error); 420 } 421 422 /* 423 * This returns non-zero if at EOF, so that the /proc 424 * root directory can use this and check if it should 425 * continue with the <pid> entries.. 426 * 427 * Note that the VFS-layer doesn't care about the return 428 * value of the readdir() call, as long as it's non-negative 429 * for success.. 430 */ 431 int proc_readdir(struct file * filp, 432 void * dirent, filldir_t filldir) 433 { 434 struct proc_dir_entry * de; 435 unsigned int ino; 436 int i; 437 struct inode *inode = filp->f_path.dentry->d_inode; 438 int ret = 0; 439 440 lock_kernel(); 441 442 ino = inode->i_ino; 443 de = PDE(inode); 444 if (!de) { 445 ret = -EINVAL; 446 goto out; 447 } 448 i = filp->f_pos; 449 switch (i) { 450 case 0: 451 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 452 goto out; 453 i++; 454 filp->f_pos++; 455 /* fall through */ 456 case 1: 457 if (filldir(dirent, "..", 2, i, 458 parent_ino(filp->f_path.dentry), 459 DT_DIR) < 0) 460 goto out; 461 i++; 462 filp->f_pos++; 463 /* fall through */ 464 default: 465 spin_lock(&proc_subdir_lock); 466 de = de->subdir; 467 i -= 2; 468 for (;;) { 469 if (!de) { 470 ret = 1; 471 spin_unlock(&proc_subdir_lock); 472 goto out; 473 } 474 if (!i) 475 break; 476 de = de->next; 477 i--; 478 } 479 480 do { 481 struct proc_dir_entry *next; 482 483 /* filldir passes info to user space */ 484 de_get(de); 485 spin_unlock(&proc_subdir_lock); 486 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 487 de->low_ino, de->mode >> 12) < 0) { 488 de_put(de); 489 goto out; 490 } 491 spin_lock(&proc_subdir_lock); 492 filp->f_pos++; 493 next = de->next; 494 de_put(de); 495 de = next; 496 } while (de); 497 spin_unlock(&proc_subdir_lock); 498 } 499 ret = 1; 500 out: unlock_kernel(); 501 return ret; 502 } 503 504 /* 505 * These are the generic /proc directory operations. They 506 * use the in-memory "struct proc_dir_entry" tree to parse 507 * the /proc directory. 508 */ 509 static const struct file_operations proc_dir_operations = { 510 .read = generic_read_dir, 511 .readdir = proc_readdir, 512 }; 513 514 /* 515 * proc directories can do almost nothing.. 516 */ 517 static const struct inode_operations proc_dir_inode_operations = { 518 .lookup = proc_lookup, 519 .getattr = proc_getattr, 520 .setattr = proc_notify_change, 521 }; 522 523 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 524 { 525 unsigned int i; 526 527 i = get_inode_number(); 528 if (i == 0) 529 return -EAGAIN; 530 dp->low_ino = i; 531 532 spin_lock(&proc_subdir_lock); 533 dp->next = dir->subdir; 534 dp->parent = dir; 535 dir->subdir = dp; 536 spin_unlock(&proc_subdir_lock); 537 538 if (S_ISDIR(dp->mode)) { 539 if (dp->proc_iops == NULL) { 540 dp->proc_fops = &proc_dir_operations; 541 dp->proc_iops = &proc_dir_inode_operations; 542 } 543 dir->nlink++; 544 } else if (S_ISLNK(dp->mode)) { 545 if (dp->proc_iops == NULL) 546 dp->proc_iops = &proc_link_inode_operations; 547 } else if (S_ISREG(dp->mode)) { 548 if (dp->proc_fops == NULL) 549 dp->proc_fops = &proc_file_operations; 550 if (dp->proc_iops == NULL) 551 dp->proc_iops = &proc_file_inode_operations; 552 } 553 return 0; 554 } 555 556 /* 557 * Kill an inode that got unregistered.. 558 */ 559 static void proc_kill_inodes(struct proc_dir_entry *de) 560 { 561 struct list_head *p; 562 struct super_block *sb = proc_mnt->mnt_sb; 563 564 /* 565 * Actually it's a partial revoke(). 566 */ 567 file_list_lock(); 568 list_for_each(p, &sb->s_files) { 569 struct file * filp = list_entry(p, struct file, f_u.fu_list); 570 struct dentry * dentry = filp->f_path.dentry; 571 struct inode * inode; 572 const struct file_operations *fops; 573 574 if (dentry->d_op != &proc_dentry_operations) 575 continue; 576 inode = dentry->d_inode; 577 if (PDE(inode) != de) 578 continue; 579 fops = filp->f_op; 580 filp->f_op = NULL; 581 fops_put(fops); 582 } 583 file_list_unlock(); 584 } 585 586 static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, 587 const char *name, 588 mode_t mode, 589 nlink_t nlink) 590 { 591 struct proc_dir_entry *ent = NULL; 592 const char *fn = name; 593 int len; 594 595 /* make sure name is valid */ 596 if (!name || !strlen(name)) goto out; 597 598 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 599 goto out; 600 601 /* At this point there must not be any '/' characters beyond *fn */ 602 if (strchr(fn, '/')) 603 goto out; 604 605 len = strlen(fn); 606 607 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 608 if (!ent) goto out; 609 610 memset(ent, 0, sizeof(struct proc_dir_entry)); 611 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 612 ent->name = ((char *) ent) + sizeof(*ent); 613 ent->namelen = len; 614 ent->mode = mode; 615 ent->nlink = nlink; 616 out: 617 return ent; 618 } 619 620 struct proc_dir_entry *proc_symlink(const char *name, 621 struct proc_dir_entry *parent, const char *dest) 622 { 623 struct proc_dir_entry *ent; 624 625 ent = proc_create(&parent,name, 626 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 627 628 if (ent) { 629 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 630 if (ent->data) { 631 strcpy((char*)ent->data,dest); 632 if (proc_register(parent, ent) < 0) { 633 kfree(ent->data); 634 kfree(ent); 635 ent = NULL; 636 } 637 } else { 638 kfree(ent); 639 ent = NULL; 640 } 641 } 642 return ent; 643 } 644 645 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 646 struct proc_dir_entry *parent) 647 { 648 struct proc_dir_entry *ent; 649 650 ent = proc_create(&parent, name, S_IFDIR | mode, 2); 651 if (ent) { 652 ent->proc_fops = &proc_dir_operations; 653 ent->proc_iops = &proc_dir_inode_operations; 654 655 if (proc_register(parent, ent) < 0) { 656 kfree(ent); 657 ent = NULL; 658 } 659 } 660 return ent; 661 } 662 663 struct proc_dir_entry *proc_mkdir(const char *name, 664 struct proc_dir_entry *parent) 665 { 666 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 667 } 668 669 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 670 struct proc_dir_entry *parent) 671 { 672 struct proc_dir_entry *ent; 673 nlink_t nlink; 674 675 if (S_ISDIR(mode)) { 676 if ((mode & S_IALLUGO) == 0) 677 mode |= S_IRUGO | S_IXUGO; 678 nlink = 2; 679 } else { 680 if ((mode & S_IFMT) == 0) 681 mode |= S_IFREG; 682 if ((mode & S_IALLUGO) == 0) 683 mode |= S_IRUGO; 684 nlink = 1; 685 } 686 687 ent = proc_create(&parent,name,mode,nlink); 688 if (ent) { 689 if (S_ISDIR(mode)) { 690 ent->proc_fops = &proc_dir_operations; 691 ent->proc_iops = &proc_dir_inode_operations; 692 } 693 if (proc_register(parent, ent) < 0) { 694 kfree(ent); 695 ent = NULL; 696 } 697 } 698 return ent; 699 } 700 701 void free_proc_entry(struct proc_dir_entry *de) 702 { 703 unsigned int ino = de->low_ino; 704 705 if (ino < PROC_DYNAMIC_FIRST) 706 return; 707 708 release_inode_number(ino); 709 710 if (S_ISLNK(de->mode) && de->data) 711 kfree(de->data); 712 kfree(de); 713 } 714 715 /* 716 * Remove a /proc entry and free it if it's not currently in use. 717 * If it is in use, we set the 'deleted' flag. 718 */ 719 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 720 { 721 struct proc_dir_entry **p; 722 struct proc_dir_entry *de; 723 const char *fn = name; 724 int len; 725 726 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 727 goto out; 728 len = strlen(fn); 729 730 spin_lock(&proc_subdir_lock); 731 for (p = &parent->subdir; *p; p=&(*p)->next ) { 732 if (!proc_match(len, fn, *p)) 733 continue; 734 de = *p; 735 *p = de->next; 736 de->next = NULL; 737 if (S_ISDIR(de->mode)) 738 parent->nlink--; 739 proc_kill_inodes(de); 740 de->nlink = 0; 741 WARN_ON(de->subdir); 742 if (!atomic_read(&de->count)) 743 free_proc_entry(de); 744 else { 745 de->deleted = 1; 746 printk("remove_proc_entry: %s/%s busy, count=%d\n", 747 parent->name, de->name, atomic_read(&de->count)); 748 } 749 break; 750 } 751 spin_unlock(&proc_subdir_lock); 752 out: 753 return; 754 } 755