1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/time.h> 13 #include <linux/proc_fs.h> 14 #include <linux/stat.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 #include <linux/smp_lock.h> 18 #include <linux/init.h> 19 #include <linux/idr.h> 20 #include <linux/namei.h> 21 #include <linux/bitops.h> 22 #include <linux/spinlock.h> 23 #include <linux/completion.h> 24 #include <asm/uaccess.h> 25 26 #include "internal.h" 27 28 DEFINE_SPINLOCK(proc_subdir_lock); 29 30 static int proc_match(int len, const char *name, struct proc_dir_entry *de) 31 { 32 if (de->namelen != len) 33 return 0; 34 return !memcmp(name, de->name, len); 35 } 36 37 /* buffer size is one page but our output routines use some slack for overruns */ 38 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 39 40 static ssize_t 41 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 42 loff_t *ppos) 43 { 44 struct inode * inode = file->f_path.dentry->d_inode; 45 char *page; 46 ssize_t retval=0; 47 int eof=0; 48 ssize_t n, count; 49 char *start; 50 struct proc_dir_entry * dp; 51 unsigned long long pos; 52 53 /* 54 * Gaah, please just use "seq_file" instead. The legacy /proc 55 * interfaces cut loff_t down to off_t for reads, and ignore 56 * the offset entirely for writes.. 57 */ 58 pos = *ppos; 59 if (pos > MAX_NON_LFS) 60 return 0; 61 if (nbytes > MAX_NON_LFS - pos) 62 nbytes = MAX_NON_LFS - pos; 63 64 dp = PDE(inode); 65 if (!(page = (char*) __get_free_page(GFP_TEMPORARY))) 66 return -ENOMEM; 67 68 while ((nbytes > 0) && !eof) { 69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 70 71 start = NULL; 72 if (dp->get_info) { 73 /* Handle old net routines */ 74 n = dp->get_info(page, &start, *ppos, count); 75 if (n < count) 76 eof = 1; 77 } else if (dp->read_proc) { 78 /* 79 * How to be a proc read function 80 * ------------------------------ 81 * Prototype: 82 * int f(char *buffer, char **start, off_t offset, 83 * int count, int *peof, void *dat) 84 * 85 * Assume that the buffer is "count" bytes in size. 86 * 87 * If you know you have supplied all the data you 88 * have, set *peof. 89 * 90 * You have three ways to return data: 91 * 0) Leave *start = NULL. (This is the default.) 92 * Put the data of the requested offset at that 93 * offset within the buffer. Return the number (n) 94 * of bytes there are from the beginning of the 95 * buffer up to the last byte of data. If the 96 * number of supplied bytes (= n - offset) is 97 * greater than zero and you didn't signal eof 98 * and the reader is prepared to take more data 99 * you will be called again with the requested 100 * offset advanced by the number of bytes 101 * absorbed. This interface is useful for files 102 * no larger than the buffer. 103 * 1) Set *start = an unsigned long value less than 104 * the buffer address but greater than zero. 105 * Put the data of the requested offset at the 106 * beginning of the buffer. Return the number of 107 * bytes of data placed there. If this number is 108 * greater than zero and you didn't signal eof 109 * and the reader is prepared to take more data 110 * you will be called again with the requested 111 * offset advanced by *start. This interface is 112 * useful when you have a large file consisting 113 * of a series of blocks which you want to count 114 * and return as wholes. 115 * (Hack by Paul.Russell@rustcorp.com.au) 116 * 2) Set *start = an address within the buffer. 117 * Put the data of the requested offset at *start. 118 * Return the number of bytes of data placed there. 119 * If this number is greater than zero and you 120 * didn't signal eof and the reader is prepared to 121 * take more data you will be called again with the 122 * requested offset advanced by the number of bytes 123 * absorbed. 124 */ 125 n = dp->read_proc(page, &start, *ppos, 126 count, &eof, dp->data); 127 } else 128 break; 129 130 if (n == 0) /* end of file */ 131 break; 132 if (n < 0) { /* error */ 133 if (retval == 0) 134 retval = n; 135 break; 136 } 137 138 if (start == NULL) { 139 if (n > PAGE_SIZE) { 140 printk(KERN_ERR 141 "proc_file_read: Apparent buffer overflow!\n"); 142 n = PAGE_SIZE; 143 } 144 n -= *ppos; 145 if (n <= 0) 146 break; 147 if (n > count) 148 n = count; 149 start = page + *ppos; 150 } else if (start < page) { 151 if (n > PAGE_SIZE) { 152 printk(KERN_ERR 153 "proc_file_read: Apparent buffer overflow!\n"); 154 n = PAGE_SIZE; 155 } 156 if (n > count) { 157 /* 158 * Don't reduce n because doing so might 159 * cut off part of a data block. 160 */ 161 printk(KERN_WARNING 162 "proc_file_read: Read count exceeded\n"); 163 } 164 } else /* start >= page */ { 165 unsigned long startoff = (unsigned long)(start - page); 166 if (n > (PAGE_SIZE - startoff)) { 167 printk(KERN_ERR 168 "proc_file_read: Apparent buffer overflow!\n"); 169 n = PAGE_SIZE - startoff; 170 } 171 if (n > count) 172 n = count; 173 } 174 175 n -= copy_to_user(buf, start < page ? page : start, n); 176 if (n == 0) { 177 if (retval == 0) 178 retval = -EFAULT; 179 break; 180 } 181 182 *ppos += start < page ? (unsigned long)start : n; 183 nbytes -= n; 184 buf += n; 185 retval += n; 186 } 187 free_page((unsigned long) page); 188 return retval; 189 } 190 191 static ssize_t 192 proc_file_write(struct file *file, const char __user *buffer, 193 size_t count, loff_t *ppos) 194 { 195 struct inode *inode = file->f_path.dentry->d_inode; 196 struct proc_dir_entry * dp; 197 198 dp = PDE(inode); 199 200 if (!dp->write_proc) 201 return -EIO; 202 203 /* FIXME: does this routine need ppos? probably... */ 204 return dp->write_proc(file, buffer, count, dp->data); 205 } 206 207 208 static loff_t 209 proc_file_lseek(struct file *file, loff_t offset, int orig) 210 { 211 loff_t retval = -EINVAL; 212 switch (orig) { 213 case 1: 214 offset += file->f_pos; 215 /* fallthrough */ 216 case 0: 217 if (offset < 0 || offset > MAX_NON_LFS) 218 break; 219 file->f_pos = retval = offset; 220 } 221 return retval; 222 } 223 224 static const struct file_operations proc_file_operations = { 225 .llseek = proc_file_lseek, 226 .read = proc_file_read, 227 .write = proc_file_write, 228 }; 229 230 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 231 { 232 struct inode *inode = dentry->d_inode; 233 struct proc_dir_entry *de = PDE(inode); 234 int error; 235 236 error = inode_change_ok(inode, iattr); 237 if (error) 238 goto out; 239 240 error = inode_setattr(inode, iattr); 241 if (error) 242 goto out; 243 244 de->uid = inode->i_uid; 245 de->gid = inode->i_gid; 246 de->mode = inode->i_mode; 247 out: 248 return error; 249 } 250 251 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 252 struct kstat *stat) 253 { 254 struct inode *inode = dentry->d_inode; 255 struct proc_dir_entry *de = PROC_I(inode)->pde; 256 if (de && de->nlink) 257 inode->i_nlink = de->nlink; 258 259 generic_fillattr(inode, stat); 260 return 0; 261 } 262 263 static const struct inode_operations proc_file_inode_operations = { 264 .setattr = proc_notify_change, 265 }; 266 267 /* 268 * This function parses a name such as "tty/driver/serial", and 269 * returns the struct proc_dir_entry for "/proc/tty/driver", and 270 * returns "serial" in residual. 271 */ 272 static int xlate_proc_name(const char *name, 273 struct proc_dir_entry **ret, const char **residual) 274 { 275 const char *cp = name, *next; 276 struct proc_dir_entry *de; 277 int len; 278 int rtn = 0; 279 280 spin_lock(&proc_subdir_lock); 281 de = &proc_root; 282 while (1) { 283 next = strchr(cp, '/'); 284 if (!next) 285 break; 286 287 len = next - cp; 288 for (de = de->subdir; de ; de = de->next) { 289 if (proc_match(len, cp, de)) 290 break; 291 } 292 if (!de) { 293 rtn = -ENOENT; 294 goto out; 295 } 296 cp += len + 1; 297 } 298 *residual = cp; 299 *ret = de; 300 out: 301 spin_unlock(&proc_subdir_lock); 302 return rtn; 303 } 304 305 static DEFINE_IDR(proc_inum_idr); 306 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 307 308 #define PROC_DYNAMIC_FIRST 0xF0000000UL 309 310 /* 311 * Return an inode number between PROC_DYNAMIC_FIRST and 312 * 0xffffffff, or zero on failure. 313 */ 314 static unsigned int get_inode_number(void) 315 { 316 int i, inum = 0; 317 int error; 318 319 retry: 320 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 321 return 0; 322 323 spin_lock(&proc_inum_lock); 324 error = idr_get_new(&proc_inum_idr, NULL, &i); 325 spin_unlock(&proc_inum_lock); 326 if (error == -EAGAIN) 327 goto retry; 328 else if (error) 329 return 0; 330 331 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 332 333 /* inum will never be more than 0xf0ffffff, so no check 334 * for overflow. 335 */ 336 337 return inum; 338 } 339 340 static void release_inode_number(unsigned int inum) 341 { 342 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 343 344 spin_lock(&proc_inum_lock); 345 idr_remove(&proc_inum_idr, id); 346 spin_unlock(&proc_inum_lock); 347 } 348 349 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 350 { 351 nd_set_link(nd, PDE(dentry->d_inode)->data); 352 return NULL; 353 } 354 355 static const struct inode_operations proc_link_inode_operations = { 356 .readlink = generic_readlink, 357 .follow_link = proc_follow_link, 358 }; 359 360 /* 361 * As some entries in /proc are volatile, we want to 362 * get rid of unused dentries. This could be made 363 * smarter: we could keep a "volatile" flag in the 364 * inode to indicate which ones to keep. 365 */ 366 static int proc_delete_dentry(struct dentry * dentry) 367 { 368 return 1; 369 } 370 371 static struct dentry_operations proc_dentry_operations = 372 { 373 .d_delete = proc_delete_dentry, 374 }; 375 376 /* 377 * Don't create negative dentries here, return -ENOENT by hand 378 * instead. 379 */ 380 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, 381 struct dentry *dentry) 382 { 383 struct inode *inode = NULL; 384 int error = -ENOENT; 385 386 lock_kernel(); 387 spin_lock(&proc_subdir_lock); 388 if (de) { 389 for (de = de->subdir; de ; de = de->next) { 390 if (de->namelen != dentry->d_name.len) 391 continue; 392 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 393 unsigned int ino; 394 395 ino = de->low_ino; 396 de_get(de); 397 spin_unlock(&proc_subdir_lock); 398 error = -EINVAL; 399 inode = proc_get_inode(dir->i_sb, ino, de); 400 goto out_unlock; 401 } 402 } 403 } 404 spin_unlock(&proc_subdir_lock); 405 out_unlock: 406 unlock_kernel(); 407 408 if (inode) { 409 dentry->d_op = &proc_dentry_operations; 410 d_add(dentry, inode); 411 return NULL; 412 } 413 de_put(de); 414 return ERR_PTR(error); 415 } 416 417 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, 418 struct nameidata *nd) 419 { 420 return proc_lookup_de(PDE(dir), dir, dentry); 421 } 422 423 /* 424 * This returns non-zero if at EOF, so that the /proc 425 * root directory can use this and check if it should 426 * continue with the <pid> entries.. 427 * 428 * Note that the VFS-layer doesn't care about the return 429 * value of the readdir() call, as long as it's non-negative 430 * for success.. 431 */ 432 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 433 filldir_t filldir) 434 { 435 unsigned int ino; 436 int i; 437 struct inode *inode = filp->f_path.dentry->d_inode; 438 int ret = 0; 439 440 lock_kernel(); 441 442 ino = inode->i_ino; 443 if (!de) { 444 ret = -EINVAL; 445 goto out; 446 } 447 i = filp->f_pos; 448 switch (i) { 449 case 0: 450 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 451 goto out; 452 i++; 453 filp->f_pos++; 454 /* fall through */ 455 case 1: 456 if (filldir(dirent, "..", 2, i, 457 parent_ino(filp->f_path.dentry), 458 DT_DIR) < 0) 459 goto out; 460 i++; 461 filp->f_pos++; 462 /* fall through */ 463 default: 464 spin_lock(&proc_subdir_lock); 465 de = de->subdir; 466 i -= 2; 467 for (;;) { 468 if (!de) { 469 ret = 1; 470 spin_unlock(&proc_subdir_lock); 471 goto out; 472 } 473 if (!i) 474 break; 475 de = de->next; 476 i--; 477 } 478 479 do { 480 struct proc_dir_entry *next; 481 482 /* filldir passes info to user space */ 483 de_get(de); 484 spin_unlock(&proc_subdir_lock); 485 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 486 de->low_ino, de->mode >> 12) < 0) { 487 de_put(de); 488 goto out; 489 } 490 spin_lock(&proc_subdir_lock); 491 filp->f_pos++; 492 next = de->next; 493 de_put(de); 494 de = next; 495 } while (de); 496 spin_unlock(&proc_subdir_lock); 497 } 498 ret = 1; 499 out: unlock_kernel(); 500 return ret; 501 } 502 503 int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) 504 { 505 struct inode *inode = filp->f_path.dentry->d_inode; 506 507 return proc_readdir_de(PDE(inode), filp, dirent, filldir); 508 } 509 510 /* 511 * These are the generic /proc directory operations. They 512 * use the in-memory "struct proc_dir_entry" tree to parse 513 * the /proc directory. 514 */ 515 static const struct file_operations proc_dir_operations = { 516 .read = generic_read_dir, 517 .readdir = proc_readdir, 518 }; 519 520 /* 521 * proc directories can do almost nothing.. 522 */ 523 static const struct inode_operations proc_dir_inode_operations = { 524 .lookup = proc_lookup, 525 .getattr = proc_getattr, 526 .setattr = proc_notify_change, 527 }; 528 529 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 530 { 531 unsigned int i; 532 struct proc_dir_entry *tmp; 533 534 i = get_inode_number(); 535 if (i == 0) 536 return -EAGAIN; 537 dp->low_ino = i; 538 539 if (S_ISDIR(dp->mode)) { 540 if (dp->proc_iops == NULL) { 541 dp->proc_fops = &proc_dir_operations; 542 dp->proc_iops = &proc_dir_inode_operations; 543 } 544 dir->nlink++; 545 } else if (S_ISLNK(dp->mode)) { 546 if (dp->proc_iops == NULL) 547 dp->proc_iops = &proc_link_inode_operations; 548 } else if (S_ISREG(dp->mode)) { 549 if (dp->proc_fops == NULL) 550 dp->proc_fops = &proc_file_operations; 551 if (dp->proc_iops == NULL) 552 dp->proc_iops = &proc_file_inode_operations; 553 } 554 555 spin_lock(&proc_subdir_lock); 556 557 for (tmp = dir->subdir; tmp; tmp = tmp->next) 558 if (strcmp(tmp->name, dp->name) == 0) { 559 printk(KERN_WARNING "proc_dir_entry '%s' already " 560 "registered\n", dp->name); 561 dump_stack(); 562 break; 563 } 564 565 dp->next = dir->subdir; 566 dp->parent = dir; 567 dir->subdir = dp; 568 spin_unlock(&proc_subdir_lock); 569 570 return 0; 571 } 572 573 static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, 574 const char *name, 575 mode_t mode, 576 nlink_t nlink) 577 { 578 struct proc_dir_entry *ent = NULL; 579 const char *fn = name; 580 int len; 581 582 /* make sure name is valid */ 583 if (!name || !strlen(name)) goto out; 584 585 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 586 goto out; 587 588 /* At this point there must not be any '/' characters beyond *fn */ 589 if (strchr(fn, '/')) 590 goto out; 591 592 len = strlen(fn); 593 594 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 595 if (!ent) goto out; 596 597 memset(ent, 0, sizeof(struct proc_dir_entry)); 598 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 599 ent->name = ((char *) ent) + sizeof(*ent); 600 ent->namelen = len; 601 ent->mode = mode; 602 ent->nlink = nlink; 603 atomic_set(&ent->count, 1); 604 ent->pde_users = 0; 605 spin_lock_init(&ent->pde_unload_lock); 606 ent->pde_unload_completion = NULL; 607 out: 608 return ent; 609 } 610 611 struct proc_dir_entry *proc_symlink(const char *name, 612 struct proc_dir_entry *parent, const char *dest) 613 { 614 struct proc_dir_entry *ent; 615 616 ent = __proc_create(&parent, name, 617 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 618 619 if (ent) { 620 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 621 if (ent->data) { 622 strcpy((char*)ent->data,dest); 623 if (proc_register(parent, ent) < 0) { 624 kfree(ent->data); 625 kfree(ent); 626 ent = NULL; 627 } 628 } else { 629 kfree(ent); 630 ent = NULL; 631 } 632 } 633 return ent; 634 } 635 636 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 637 struct proc_dir_entry *parent) 638 { 639 struct proc_dir_entry *ent; 640 641 ent = __proc_create(&parent, name, S_IFDIR | mode, 2); 642 if (ent) { 643 if (proc_register(parent, ent) < 0) { 644 kfree(ent); 645 ent = NULL; 646 } 647 } 648 return ent; 649 } 650 651 struct proc_dir_entry *proc_mkdir(const char *name, 652 struct proc_dir_entry *parent) 653 { 654 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 655 } 656 657 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 658 struct proc_dir_entry *parent) 659 { 660 struct proc_dir_entry *ent; 661 nlink_t nlink; 662 663 if (S_ISDIR(mode)) { 664 if ((mode & S_IALLUGO) == 0) 665 mode |= S_IRUGO | S_IXUGO; 666 nlink = 2; 667 } else { 668 if ((mode & S_IFMT) == 0) 669 mode |= S_IFREG; 670 if ((mode & S_IALLUGO) == 0) 671 mode |= S_IRUGO; 672 nlink = 1; 673 } 674 675 ent = __proc_create(&parent, name, mode, nlink); 676 if (ent) { 677 if (proc_register(parent, ent) < 0) { 678 kfree(ent); 679 ent = NULL; 680 } 681 } 682 return ent; 683 } 684 685 struct proc_dir_entry *proc_create(const char *name, mode_t mode, 686 struct proc_dir_entry *parent, 687 const struct file_operations *proc_fops) 688 { 689 struct proc_dir_entry *pde; 690 nlink_t nlink; 691 692 if (S_ISDIR(mode)) { 693 if ((mode & S_IALLUGO) == 0) 694 mode |= S_IRUGO | S_IXUGO; 695 nlink = 2; 696 } else { 697 if ((mode & S_IFMT) == 0) 698 mode |= S_IFREG; 699 if ((mode & S_IALLUGO) == 0) 700 mode |= S_IRUGO; 701 nlink = 1; 702 } 703 704 pde = __proc_create(&parent, name, mode, nlink); 705 if (!pde) 706 goto out; 707 pde->proc_fops = proc_fops; 708 if (proc_register(parent, pde) < 0) 709 goto out_free; 710 return pde; 711 out_free: 712 kfree(pde); 713 out: 714 return NULL; 715 } 716 717 void free_proc_entry(struct proc_dir_entry *de) 718 { 719 unsigned int ino = de->low_ino; 720 721 if (ino < PROC_DYNAMIC_FIRST) 722 return; 723 724 release_inode_number(ino); 725 726 if (S_ISLNK(de->mode)) 727 kfree(de->data); 728 kfree(de); 729 } 730 731 /* 732 * Remove a /proc entry and free it if it's not currently in use. 733 */ 734 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 735 { 736 struct proc_dir_entry **p; 737 struct proc_dir_entry *de; 738 const char *fn = name; 739 int len; 740 741 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 742 goto out; 743 len = strlen(fn); 744 745 spin_lock(&proc_subdir_lock); 746 for (p = &parent->subdir; *p; p=&(*p)->next ) { 747 if (!proc_match(len, fn, *p)) 748 continue; 749 de = *p; 750 *p = de->next; 751 de->next = NULL; 752 753 spin_lock(&de->pde_unload_lock); 754 /* 755 * Stop accepting new callers into module. If you're 756 * dynamically allocating ->proc_fops, save a pointer somewhere. 757 */ 758 de->proc_fops = NULL; 759 /* Wait until all existing callers into module are done. */ 760 if (de->pde_users > 0) { 761 DECLARE_COMPLETION_ONSTACK(c); 762 763 if (!de->pde_unload_completion) 764 de->pde_unload_completion = &c; 765 766 spin_unlock(&de->pde_unload_lock); 767 spin_unlock(&proc_subdir_lock); 768 769 wait_for_completion(de->pde_unload_completion); 770 771 spin_lock(&proc_subdir_lock); 772 goto continue_removing; 773 } 774 spin_unlock(&de->pde_unload_lock); 775 776 continue_removing: 777 if (S_ISDIR(de->mode)) 778 parent->nlink--; 779 de->nlink = 0; 780 WARN_ON(de->subdir); 781 if (atomic_dec_and_test(&de->count)) 782 free_proc_entry(de); 783 break; 784 } 785 spin_unlock(&proc_subdir_lock); 786 out: 787 return; 788 } 789