1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/time.h> 13 #include <linux/proc_fs.h> 14 #include <linux/stat.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 #include <linux/smp_lock.h> 18 #include <linux/init.h> 19 #include <linux/idr.h> 20 #include <linux/namei.h> 21 #include <linux/bitops.h> 22 #include <linux/spinlock.h> 23 #include <linux/completion.h> 24 #include <asm/uaccess.h> 25 26 #include "internal.h" 27 28 DEFINE_SPINLOCK(proc_subdir_lock); 29 30 static int proc_match(int len, const char *name, struct proc_dir_entry *de) 31 { 32 if (de->namelen != len) 33 return 0; 34 return !memcmp(name, de->name, len); 35 } 36 37 /* buffer size is one page but our output routines use some slack for overruns */ 38 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 39 40 static ssize_t 41 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 42 loff_t *ppos) 43 { 44 struct inode * inode = file->f_path.dentry->d_inode; 45 char *page; 46 ssize_t retval=0; 47 int eof=0; 48 ssize_t n, count; 49 char *start; 50 struct proc_dir_entry * dp; 51 unsigned long long pos; 52 53 /* 54 * Gaah, please just use "seq_file" instead. The legacy /proc 55 * interfaces cut loff_t down to off_t for reads, and ignore 56 * the offset entirely for writes.. 57 */ 58 pos = *ppos; 59 if (pos > MAX_NON_LFS) 60 return 0; 61 if (nbytes > MAX_NON_LFS - pos) 62 nbytes = MAX_NON_LFS - pos; 63 64 dp = PDE(inode); 65 if (!(page = (char*) __get_free_page(GFP_TEMPORARY))) 66 return -ENOMEM; 67 68 while ((nbytes > 0) && !eof) { 69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 70 71 start = NULL; 72 if (dp->read_proc) { 73 /* 74 * How to be a proc read function 75 * ------------------------------ 76 * Prototype: 77 * int f(char *buffer, char **start, off_t offset, 78 * int count, int *peof, void *dat) 79 * 80 * Assume that the buffer is "count" bytes in size. 81 * 82 * If you know you have supplied all the data you 83 * have, set *peof. 84 * 85 * You have three ways to return data: 86 * 0) Leave *start = NULL. (This is the default.) 87 * Put the data of the requested offset at that 88 * offset within the buffer. Return the number (n) 89 * of bytes there are from the beginning of the 90 * buffer up to the last byte of data. If the 91 * number of supplied bytes (= n - offset) is 92 * greater than zero and you didn't signal eof 93 * and the reader is prepared to take more data 94 * you will be called again with the requested 95 * offset advanced by the number of bytes 96 * absorbed. This interface is useful for files 97 * no larger than the buffer. 98 * 1) Set *start = an unsigned long value less than 99 * the buffer address but greater than zero. 100 * Put the data of the requested offset at the 101 * beginning of the buffer. Return the number of 102 * bytes of data placed there. If this number is 103 * greater than zero and you didn't signal eof 104 * and the reader is prepared to take more data 105 * you will be called again with the requested 106 * offset advanced by *start. This interface is 107 * useful when you have a large file consisting 108 * of a series of blocks which you want to count 109 * and return as wholes. 110 * (Hack by Paul.Russell@rustcorp.com.au) 111 * 2) Set *start = an address within the buffer. 112 * Put the data of the requested offset at *start. 113 * Return the number of bytes of data placed there. 114 * If this number is greater than zero and you 115 * didn't signal eof and the reader is prepared to 116 * take more data you will be called again with the 117 * requested offset advanced by the number of bytes 118 * absorbed. 119 */ 120 n = dp->read_proc(page, &start, *ppos, 121 count, &eof, dp->data); 122 } else 123 break; 124 125 if (n == 0) /* end of file */ 126 break; 127 if (n < 0) { /* error */ 128 if (retval == 0) 129 retval = n; 130 break; 131 } 132 133 if (start == NULL) { 134 if (n > PAGE_SIZE) { 135 printk(KERN_ERR 136 "proc_file_read: Apparent buffer overflow!\n"); 137 n = PAGE_SIZE; 138 } 139 n -= *ppos; 140 if (n <= 0) 141 break; 142 if (n > count) 143 n = count; 144 start = page + *ppos; 145 } else if (start < page) { 146 if (n > PAGE_SIZE) { 147 printk(KERN_ERR 148 "proc_file_read: Apparent buffer overflow!\n"); 149 n = PAGE_SIZE; 150 } 151 if (n > count) { 152 /* 153 * Don't reduce n because doing so might 154 * cut off part of a data block. 155 */ 156 printk(KERN_WARNING 157 "proc_file_read: Read count exceeded\n"); 158 } 159 } else /* start >= page */ { 160 unsigned long startoff = (unsigned long)(start - page); 161 if (n > (PAGE_SIZE - startoff)) { 162 printk(KERN_ERR 163 "proc_file_read: Apparent buffer overflow!\n"); 164 n = PAGE_SIZE - startoff; 165 } 166 if (n > count) 167 n = count; 168 } 169 170 n -= copy_to_user(buf, start < page ? page : start, n); 171 if (n == 0) { 172 if (retval == 0) 173 retval = -EFAULT; 174 break; 175 } 176 177 *ppos += start < page ? (unsigned long)start : n; 178 nbytes -= n; 179 buf += n; 180 retval += n; 181 } 182 free_page((unsigned long) page); 183 return retval; 184 } 185 186 static ssize_t 187 proc_file_write(struct file *file, const char __user *buffer, 188 size_t count, loff_t *ppos) 189 { 190 struct inode *inode = file->f_path.dentry->d_inode; 191 struct proc_dir_entry * dp; 192 193 dp = PDE(inode); 194 195 if (!dp->write_proc) 196 return -EIO; 197 198 /* FIXME: does this routine need ppos? probably... */ 199 return dp->write_proc(file, buffer, count, dp->data); 200 } 201 202 203 static loff_t 204 proc_file_lseek(struct file *file, loff_t offset, int orig) 205 { 206 loff_t retval = -EINVAL; 207 switch (orig) { 208 case 1: 209 offset += file->f_pos; 210 /* fallthrough */ 211 case 0: 212 if (offset < 0 || offset > MAX_NON_LFS) 213 break; 214 file->f_pos = retval = offset; 215 } 216 return retval; 217 } 218 219 static const struct file_operations proc_file_operations = { 220 .llseek = proc_file_lseek, 221 .read = proc_file_read, 222 .write = proc_file_write, 223 }; 224 225 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 226 { 227 struct inode *inode = dentry->d_inode; 228 struct proc_dir_entry *de = PDE(inode); 229 int error; 230 231 error = inode_change_ok(inode, iattr); 232 if (error) 233 goto out; 234 235 error = inode_setattr(inode, iattr); 236 if (error) 237 goto out; 238 239 de->uid = inode->i_uid; 240 de->gid = inode->i_gid; 241 de->mode = inode->i_mode; 242 out: 243 return error; 244 } 245 246 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 247 struct kstat *stat) 248 { 249 struct inode *inode = dentry->d_inode; 250 struct proc_dir_entry *de = PROC_I(inode)->pde; 251 if (de && de->nlink) 252 inode->i_nlink = de->nlink; 253 254 generic_fillattr(inode, stat); 255 return 0; 256 } 257 258 static const struct inode_operations proc_file_inode_operations = { 259 .setattr = proc_notify_change, 260 }; 261 262 /* 263 * This function parses a name such as "tty/driver/serial", and 264 * returns the struct proc_dir_entry for "/proc/tty/driver", and 265 * returns "serial" in residual. 266 */ 267 static int xlate_proc_name(const char *name, 268 struct proc_dir_entry **ret, const char **residual) 269 { 270 const char *cp = name, *next; 271 struct proc_dir_entry *de; 272 int len; 273 int rtn = 0; 274 275 de = *ret; 276 if (!de) 277 de = &proc_root; 278 279 spin_lock(&proc_subdir_lock); 280 while (1) { 281 next = strchr(cp, '/'); 282 if (!next) 283 break; 284 285 len = next - cp; 286 for (de = de->subdir; de ; de = de->next) { 287 if (proc_match(len, cp, de)) 288 break; 289 } 290 if (!de) { 291 rtn = -ENOENT; 292 goto out; 293 } 294 cp += len + 1; 295 } 296 *residual = cp; 297 *ret = de; 298 out: 299 spin_unlock(&proc_subdir_lock); 300 return rtn; 301 } 302 303 static DEFINE_IDR(proc_inum_idr); 304 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 305 306 #define PROC_DYNAMIC_FIRST 0xF0000000UL 307 308 /* 309 * Return an inode number between PROC_DYNAMIC_FIRST and 310 * 0xffffffff, or zero on failure. 311 */ 312 static unsigned int get_inode_number(void) 313 { 314 int i, inum = 0; 315 int error; 316 317 retry: 318 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 319 return 0; 320 321 spin_lock(&proc_inum_lock); 322 error = idr_get_new(&proc_inum_idr, NULL, &i); 323 spin_unlock(&proc_inum_lock); 324 if (error == -EAGAIN) 325 goto retry; 326 else if (error) 327 return 0; 328 329 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 330 331 /* inum will never be more than 0xf0ffffff, so no check 332 * for overflow. 333 */ 334 335 return inum; 336 } 337 338 static void release_inode_number(unsigned int inum) 339 { 340 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 341 342 spin_lock(&proc_inum_lock); 343 idr_remove(&proc_inum_idr, id); 344 spin_unlock(&proc_inum_lock); 345 } 346 347 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 348 { 349 nd_set_link(nd, PDE(dentry->d_inode)->data); 350 return NULL; 351 } 352 353 static const struct inode_operations proc_link_inode_operations = { 354 .readlink = generic_readlink, 355 .follow_link = proc_follow_link, 356 }; 357 358 /* 359 * As some entries in /proc are volatile, we want to 360 * get rid of unused dentries. This could be made 361 * smarter: we could keep a "volatile" flag in the 362 * inode to indicate which ones to keep. 363 */ 364 static int proc_delete_dentry(struct dentry * dentry) 365 { 366 return 1; 367 } 368 369 static struct dentry_operations proc_dentry_operations = 370 { 371 .d_delete = proc_delete_dentry, 372 }; 373 374 /* 375 * Don't create negative dentries here, return -ENOENT by hand 376 * instead. 377 */ 378 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, 379 struct dentry *dentry) 380 { 381 struct inode *inode = NULL; 382 int error = -ENOENT; 383 384 lock_kernel(); 385 spin_lock(&proc_subdir_lock); 386 for (de = de->subdir; de ; de = de->next) { 387 if (de->namelen != dentry->d_name.len) 388 continue; 389 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 390 unsigned int ino; 391 392 ino = de->low_ino; 393 de_get(de); 394 spin_unlock(&proc_subdir_lock); 395 error = -EINVAL; 396 inode = proc_get_inode(dir->i_sb, ino, de); 397 goto out_unlock; 398 } 399 } 400 spin_unlock(&proc_subdir_lock); 401 out_unlock: 402 unlock_kernel(); 403 404 if (inode) { 405 dentry->d_op = &proc_dentry_operations; 406 d_add(dentry, inode); 407 return NULL; 408 } 409 if (de) 410 de_put(de); 411 return ERR_PTR(error); 412 } 413 414 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, 415 struct nameidata *nd) 416 { 417 return proc_lookup_de(PDE(dir), dir, dentry); 418 } 419 420 /* 421 * This returns non-zero if at EOF, so that the /proc 422 * root directory can use this and check if it should 423 * continue with the <pid> entries.. 424 * 425 * Note that the VFS-layer doesn't care about the return 426 * value of the readdir() call, as long as it's non-negative 427 * for success.. 428 */ 429 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 430 filldir_t filldir) 431 { 432 unsigned int ino; 433 int i; 434 struct inode *inode = filp->f_path.dentry->d_inode; 435 int ret = 0; 436 437 lock_kernel(); 438 439 ino = inode->i_ino; 440 i = filp->f_pos; 441 switch (i) { 442 case 0: 443 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 444 goto out; 445 i++; 446 filp->f_pos++; 447 /* fall through */ 448 case 1: 449 if (filldir(dirent, "..", 2, i, 450 parent_ino(filp->f_path.dentry), 451 DT_DIR) < 0) 452 goto out; 453 i++; 454 filp->f_pos++; 455 /* fall through */ 456 default: 457 spin_lock(&proc_subdir_lock); 458 de = de->subdir; 459 i -= 2; 460 for (;;) { 461 if (!de) { 462 ret = 1; 463 spin_unlock(&proc_subdir_lock); 464 goto out; 465 } 466 if (!i) 467 break; 468 de = de->next; 469 i--; 470 } 471 472 do { 473 struct proc_dir_entry *next; 474 475 /* filldir passes info to user space */ 476 de_get(de); 477 spin_unlock(&proc_subdir_lock); 478 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 479 de->low_ino, de->mode >> 12) < 0) { 480 de_put(de); 481 goto out; 482 } 483 spin_lock(&proc_subdir_lock); 484 filp->f_pos++; 485 next = de->next; 486 de_put(de); 487 de = next; 488 } while (de); 489 spin_unlock(&proc_subdir_lock); 490 } 491 ret = 1; 492 out: unlock_kernel(); 493 return ret; 494 } 495 496 int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) 497 { 498 struct inode *inode = filp->f_path.dentry->d_inode; 499 500 return proc_readdir_de(PDE(inode), filp, dirent, filldir); 501 } 502 503 /* 504 * These are the generic /proc directory operations. They 505 * use the in-memory "struct proc_dir_entry" tree to parse 506 * the /proc directory. 507 */ 508 static const struct file_operations proc_dir_operations = { 509 .read = generic_read_dir, 510 .readdir = proc_readdir, 511 }; 512 513 /* 514 * proc directories can do almost nothing.. 515 */ 516 static const struct inode_operations proc_dir_inode_operations = { 517 .lookup = proc_lookup, 518 .getattr = proc_getattr, 519 .setattr = proc_notify_change, 520 }; 521 522 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 523 { 524 unsigned int i; 525 struct proc_dir_entry *tmp; 526 527 i = get_inode_number(); 528 if (i == 0) 529 return -EAGAIN; 530 dp->low_ino = i; 531 532 if (S_ISDIR(dp->mode)) { 533 if (dp->proc_iops == NULL) { 534 dp->proc_fops = &proc_dir_operations; 535 dp->proc_iops = &proc_dir_inode_operations; 536 } 537 dir->nlink++; 538 } else if (S_ISLNK(dp->mode)) { 539 if (dp->proc_iops == NULL) 540 dp->proc_iops = &proc_link_inode_operations; 541 } else if (S_ISREG(dp->mode)) { 542 if (dp->proc_fops == NULL) 543 dp->proc_fops = &proc_file_operations; 544 if (dp->proc_iops == NULL) 545 dp->proc_iops = &proc_file_inode_operations; 546 } 547 548 spin_lock(&proc_subdir_lock); 549 550 for (tmp = dir->subdir; tmp; tmp = tmp->next) 551 if (strcmp(tmp->name, dp->name) == 0) { 552 printk(KERN_WARNING "proc_dir_entry '%s' already " 553 "registered\n", dp->name); 554 dump_stack(); 555 break; 556 } 557 558 dp->next = dir->subdir; 559 dp->parent = dir; 560 dir->subdir = dp; 561 spin_unlock(&proc_subdir_lock); 562 563 return 0; 564 } 565 566 static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, 567 const char *name, 568 mode_t mode, 569 nlink_t nlink) 570 { 571 struct proc_dir_entry *ent = NULL; 572 const char *fn = name; 573 int len; 574 575 /* make sure name is valid */ 576 if (!name || !strlen(name)) goto out; 577 578 if (xlate_proc_name(name, parent, &fn) != 0) 579 goto out; 580 581 /* At this point there must not be any '/' characters beyond *fn */ 582 if (strchr(fn, '/')) 583 goto out; 584 585 len = strlen(fn); 586 587 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 588 if (!ent) goto out; 589 590 memset(ent, 0, sizeof(struct proc_dir_entry)); 591 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 592 ent->name = ((char *) ent) + sizeof(*ent); 593 ent->namelen = len; 594 ent->mode = mode; 595 ent->nlink = nlink; 596 atomic_set(&ent->count, 1); 597 ent->pde_users = 0; 598 spin_lock_init(&ent->pde_unload_lock); 599 ent->pde_unload_completion = NULL; 600 out: 601 return ent; 602 } 603 604 struct proc_dir_entry *proc_symlink(const char *name, 605 struct proc_dir_entry *parent, const char *dest) 606 { 607 struct proc_dir_entry *ent; 608 609 ent = __proc_create(&parent, name, 610 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 611 612 if (ent) { 613 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 614 if (ent->data) { 615 strcpy((char*)ent->data,dest); 616 if (proc_register(parent, ent) < 0) { 617 kfree(ent->data); 618 kfree(ent); 619 ent = NULL; 620 } 621 } else { 622 kfree(ent); 623 ent = NULL; 624 } 625 } 626 return ent; 627 } 628 629 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 630 struct proc_dir_entry *parent) 631 { 632 struct proc_dir_entry *ent; 633 634 ent = __proc_create(&parent, name, S_IFDIR | mode, 2); 635 if (ent) { 636 if (proc_register(parent, ent) < 0) { 637 kfree(ent); 638 ent = NULL; 639 } 640 } 641 return ent; 642 } 643 644 struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, 645 struct proc_dir_entry *parent) 646 { 647 struct proc_dir_entry *ent; 648 649 ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2); 650 if (ent) { 651 ent->data = net; 652 if (proc_register(parent, ent) < 0) { 653 kfree(ent); 654 ent = NULL; 655 } 656 } 657 return ent; 658 } 659 EXPORT_SYMBOL_GPL(proc_net_mkdir); 660 661 struct proc_dir_entry *proc_mkdir(const char *name, 662 struct proc_dir_entry *parent) 663 { 664 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 665 } 666 667 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 668 struct proc_dir_entry *parent) 669 { 670 struct proc_dir_entry *ent; 671 nlink_t nlink; 672 673 if (S_ISDIR(mode)) { 674 if ((mode & S_IALLUGO) == 0) 675 mode |= S_IRUGO | S_IXUGO; 676 nlink = 2; 677 } else { 678 if ((mode & S_IFMT) == 0) 679 mode |= S_IFREG; 680 if ((mode & S_IALLUGO) == 0) 681 mode |= S_IRUGO; 682 nlink = 1; 683 } 684 685 ent = __proc_create(&parent, name, mode, nlink); 686 if (ent) { 687 if (proc_register(parent, ent) < 0) { 688 kfree(ent); 689 ent = NULL; 690 } 691 } 692 return ent; 693 } 694 695 struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, 696 struct proc_dir_entry *parent, 697 const struct file_operations *proc_fops, 698 void *data) 699 { 700 struct proc_dir_entry *pde; 701 nlink_t nlink; 702 703 if (S_ISDIR(mode)) { 704 if ((mode & S_IALLUGO) == 0) 705 mode |= S_IRUGO | S_IXUGO; 706 nlink = 2; 707 } else { 708 if ((mode & S_IFMT) == 0) 709 mode |= S_IFREG; 710 if ((mode & S_IALLUGO) == 0) 711 mode |= S_IRUGO; 712 nlink = 1; 713 } 714 715 pde = __proc_create(&parent, name, mode, nlink); 716 if (!pde) 717 goto out; 718 pde->proc_fops = proc_fops; 719 pde->data = data; 720 if (proc_register(parent, pde) < 0) 721 goto out_free; 722 return pde; 723 out_free: 724 kfree(pde); 725 out: 726 return NULL; 727 } 728 729 void free_proc_entry(struct proc_dir_entry *de) 730 { 731 unsigned int ino = de->low_ino; 732 733 if (ino < PROC_DYNAMIC_FIRST) 734 return; 735 736 release_inode_number(ino); 737 738 if (S_ISLNK(de->mode)) 739 kfree(de->data); 740 kfree(de); 741 } 742 743 /* 744 * Remove a /proc entry and free it if it's not currently in use. 745 */ 746 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 747 { 748 struct proc_dir_entry **p; 749 struct proc_dir_entry *de = NULL; 750 const char *fn = name; 751 int len; 752 753 if (xlate_proc_name(name, &parent, &fn) != 0) 754 return; 755 len = strlen(fn); 756 757 spin_lock(&proc_subdir_lock); 758 for (p = &parent->subdir; *p; p=&(*p)->next ) { 759 if (proc_match(len, fn, *p)) { 760 de = *p; 761 *p = de->next; 762 de->next = NULL; 763 break; 764 } 765 } 766 spin_unlock(&proc_subdir_lock); 767 if (!de) 768 return; 769 770 spin_lock(&de->pde_unload_lock); 771 /* 772 * Stop accepting new callers into module. If you're 773 * dynamically allocating ->proc_fops, save a pointer somewhere. 774 */ 775 de->proc_fops = NULL; 776 /* Wait until all existing callers into module are done. */ 777 if (de->pde_users > 0) { 778 DECLARE_COMPLETION_ONSTACK(c); 779 780 if (!de->pde_unload_completion) 781 de->pde_unload_completion = &c; 782 783 spin_unlock(&de->pde_unload_lock); 784 785 wait_for_completion(de->pde_unload_completion); 786 787 goto continue_removing; 788 } 789 spin_unlock(&de->pde_unload_lock); 790 791 continue_removing: 792 if (S_ISDIR(de->mode)) 793 parent->nlink--; 794 de->nlink = 0; 795 if (de->subdir) { 796 printk(KERN_WARNING "%s: removing non-empty directory " 797 "'%s/%s', leaking at least '%s'\n", __func__, 798 de->parent->name, de->name, de->subdir->name); 799 WARN_ON(1); 800 } 801 if (atomic_dec_and_test(&de->count)) 802 free_proc_entry(de); 803 } 804