1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/time.h> 13 #include <linux/proc_fs.h> 14 #include <linux/stat.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 #include <linux/smp_lock.h> 18 #include <linux/init.h> 19 #include <linux/idr.h> 20 #include <linux/namei.h> 21 #include <linux/bitops.h> 22 #include <asm/uaccess.h> 23 24 static ssize_t proc_file_read(struct file *file, char __user *buf, 25 size_t nbytes, loff_t *ppos); 26 static ssize_t proc_file_write(struct file *file, const char __user *buffer, 27 size_t count, loff_t *ppos); 28 static loff_t proc_file_lseek(struct file *, loff_t, int); 29 30 int proc_match(int len, const char *name, struct proc_dir_entry *de) 31 { 32 if (de->namelen != len) 33 return 0; 34 return !memcmp(name, de->name, len); 35 } 36 37 static struct file_operations proc_file_operations = { 38 .llseek = proc_file_lseek, 39 .read = proc_file_read, 40 .write = proc_file_write, 41 }; 42 43 /* buffer size is one page but our output routines use some slack for overruns */ 44 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 45 46 static ssize_t 47 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 48 loff_t *ppos) 49 { 50 struct inode * inode = file->f_dentry->d_inode; 51 char *page; 52 ssize_t retval=0; 53 int eof=0; 54 ssize_t n, count; 55 char *start; 56 struct proc_dir_entry * dp; 57 58 dp = PDE(inode); 59 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 60 return -ENOMEM; 61 62 while ((nbytes > 0) && !eof) { 63 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 64 65 start = NULL; 66 if (dp->get_info) { 67 /* Handle old net routines */ 68 n = dp->get_info(page, &start, *ppos, count); 69 if (n < count) 70 eof = 1; 71 } else if (dp->read_proc) { 72 /* 73 * How to be a proc read function 74 * ------------------------------ 75 * Prototype: 76 * int f(char *buffer, char **start, off_t offset, 77 * int count, int *peof, void *dat) 78 * 79 * Assume that the buffer is "count" bytes in size. 80 * 81 * If you know you have supplied all the data you 82 * have, set *peof. 83 * 84 * You have three ways to return data: 85 * 0) Leave *start = NULL. (This is the default.) 86 * Put the data of the requested offset at that 87 * offset within the buffer. Return the number (n) 88 * of bytes there are from the beginning of the 89 * buffer up to the last byte of data. If the 90 * number of supplied bytes (= n - offset) is 91 * greater than zero and you didn't signal eof 92 * and the reader is prepared to take more data 93 * you will be called again with the requested 94 * offset advanced by the number of bytes 95 * absorbed. This interface is useful for files 96 * no larger than the buffer. 97 * 1) Set *start = an unsigned long value less than 98 * the buffer address but greater than zero. 99 * Put the data of the requested offset at the 100 * beginning of the buffer. Return the number of 101 * bytes of data placed there. If this number is 102 * greater than zero and you didn't signal eof 103 * and the reader is prepared to take more data 104 * you will be called again with the requested 105 * offset advanced by *start. This interface is 106 * useful when you have a large file consisting 107 * of a series of blocks which you want to count 108 * and return as wholes. 109 * (Hack by Paul.Russell@rustcorp.com.au) 110 * 2) Set *start = an address within the buffer. 111 * Put the data of the requested offset at *start. 112 * Return the number of bytes of data placed there. 113 * If this number is greater than zero and you 114 * didn't signal eof and the reader is prepared to 115 * take more data you will be called again with the 116 * requested offset advanced by the number of bytes 117 * absorbed. 118 */ 119 n = dp->read_proc(page, &start, *ppos, 120 count, &eof, dp->data); 121 } else 122 break; 123 124 if (n == 0) /* end of file */ 125 break; 126 if (n < 0) { /* error */ 127 if (retval == 0) 128 retval = n; 129 break; 130 } 131 132 if (start == NULL) { 133 if (n > PAGE_SIZE) { 134 printk(KERN_ERR 135 "proc_file_read: Apparent buffer overflow!\n"); 136 n = PAGE_SIZE; 137 } 138 n -= *ppos; 139 if (n <= 0) 140 break; 141 if (n > count) 142 n = count; 143 start = page + *ppos; 144 } else if (start < page) { 145 if (n > PAGE_SIZE) { 146 printk(KERN_ERR 147 "proc_file_read: Apparent buffer overflow!\n"); 148 n = PAGE_SIZE; 149 } 150 if (n > count) { 151 /* 152 * Don't reduce n because doing so might 153 * cut off part of a data block. 154 */ 155 printk(KERN_WARNING 156 "proc_file_read: Read count exceeded\n"); 157 } 158 } else /* start >= page */ { 159 unsigned long startoff = (unsigned long)(start - page); 160 if (n > (PAGE_SIZE - startoff)) { 161 printk(KERN_ERR 162 "proc_file_read: Apparent buffer overflow!\n"); 163 n = PAGE_SIZE - startoff; 164 } 165 if (n > count) 166 n = count; 167 } 168 169 n -= copy_to_user(buf, start < page ? page : start, n); 170 if (n == 0) { 171 if (retval == 0) 172 retval = -EFAULT; 173 break; 174 } 175 176 *ppos += start < page ? (unsigned long)start : n; 177 nbytes -= n; 178 buf += n; 179 retval += n; 180 } 181 free_page((unsigned long) page); 182 return retval; 183 } 184 185 static ssize_t 186 proc_file_write(struct file *file, const char __user *buffer, 187 size_t count, loff_t *ppos) 188 { 189 struct inode *inode = file->f_dentry->d_inode; 190 struct proc_dir_entry * dp; 191 192 dp = PDE(inode); 193 194 if (!dp->write_proc) 195 return -EIO; 196 197 /* FIXME: does this routine need ppos? probably... */ 198 return dp->write_proc(file, buffer, count, dp->data); 199 } 200 201 202 static loff_t 203 proc_file_lseek(struct file *file, loff_t offset, int orig) 204 { 205 lock_kernel(); 206 207 switch (orig) { 208 case 0: 209 if (offset < 0) 210 goto out; 211 file->f_pos = offset; 212 unlock_kernel(); 213 return(file->f_pos); 214 case 1: 215 if (offset + file->f_pos < 0) 216 goto out; 217 file->f_pos += offset; 218 unlock_kernel(); 219 return(file->f_pos); 220 case 2: 221 goto out; 222 default: 223 goto out; 224 } 225 226 out: 227 unlock_kernel(); 228 return -EINVAL; 229 } 230 231 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 232 { 233 struct inode *inode = dentry->d_inode; 234 struct proc_dir_entry *de = PDE(inode); 235 int error; 236 237 error = inode_change_ok(inode, iattr); 238 if (error) 239 goto out; 240 241 error = inode_setattr(inode, iattr); 242 if (error) 243 goto out; 244 245 de->uid = inode->i_uid; 246 de->gid = inode->i_gid; 247 de->mode = inode->i_mode; 248 out: 249 return error; 250 } 251 252 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 253 struct kstat *stat) 254 { 255 struct inode *inode = dentry->d_inode; 256 struct proc_dir_entry *de = PROC_I(inode)->pde; 257 if (de && de->nlink) 258 inode->i_nlink = de->nlink; 259 260 generic_fillattr(inode, stat); 261 return 0; 262 } 263 264 static struct inode_operations proc_file_inode_operations = { 265 .setattr = proc_notify_change, 266 }; 267 268 /* 269 * This function parses a name such as "tty/driver/serial", and 270 * returns the struct proc_dir_entry for "/proc/tty/driver", and 271 * returns "serial" in residual. 272 */ 273 static int xlate_proc_name(const char *name, 274 struct proc_dir_entry **ret, const char **residual) 275 { 276 const char *cp = name, *next; 277 struct proc_dir_entry *de; 278 int len; 279 280 de = &proc_root; 281 while (1) { 282 next = strchr(cp, '/'); 283 if (!next) 284 break; 285 286 len = next - cp; 287 for (de = de->subdir; de ; de = de->next) { 288 if (proc_match(len, cp, de)) 289 break; 290 } 291 if (!de) 292 return -ENOENT; 293 cp += len + 1; 294 } 295 *residual = cp; 296 *ret = de; 297 return 0; 298 } 299 300 static DEFINE_IDR(proc_inum_idr); 301 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 302 303 #define PROC_DYNAMIC_FIRST 0xF0000000UL 304 305 /* 306 * Return an inode number between PROC_DYNAMIC_FIRST and 307 * 0xffffffff, or zero on failure. 308 */ 309 static unsigned int get_inode_number(void) 310 { 311 int i, inum = 0; 312 int error; 313 314 retry: 315 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 316 return 0; 317 318 spin_lock(&proc_inum_lock); 319 error = idr_get_new(&proc_inum_idr, NULL, &i); 320 spin_unlock(&proc_inum_lock); 321 if (error == -EAGAIN) 322 goto retry; 323 else if (error) 324 return 0; 325 326 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 327 328 /* inum will never be more than 0xf0ffffff, so no check 329 * for overflow. 330 */ 331 332 return inum; 333 } 334 335 static void release_inode_number(unsigned int inum) 336 { 337 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 338 339 spin_lock(&proc_inum_lock); 340 idr_remove(&proc_inum_idr, id); 341 spin_unlock(&proc_inum_lock); 342 } 343 344 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 345 { 346 nd_set_link(nd, PDE(dentry->d_inode)->data); 347 return NULL; 348 } 349 350 static struct inode_operations proc_link_inode_operations = { 351 .readlink = generic_readlink, 352 .follow_link = proc_follow_link, 353 }; 354 355 /* 356 * As some entries in /proc are volatile, we want to 357 * get rid of unused dentries. This could be made 358 * smarter: we could keep a "volatile" flag in the 359 * inode to indicate which ones to keep. 360 */ 361 static int proc_delete_dentry(struct dentry * dentry) 362 { 363 return 1; 364 } 365 366 static struct dentry_operations proc_dentry_operations = 367 { 368 .d_delete = proc_delete_dentry, 369 }; 370 371 /* 372 * Don't create negative dentries here, return -ENOENT by hand 373 * instead. 374 */ 375 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 376 { 377 struct inode *inode = NULL; 378 struct proc_dir_entry * de; 379 int error = -ENOENT; 380 381 lock_kernel(); 382 de = PDE(dir); 383 if (de) { 384 for (de = de->subdir; de ; de = de->next) { 385 if (de->namelen != dentry->d_name.len) 386 continue; 387 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 388 unsigned int ino = de->low_ino; 389 390 error = -EINVAL; 391 inode = proc_get_inode(dir->i_sb, ino, de); 392 break; 393 } 394 } 395 } 396 unlock_kernel(); 397 398 if (inode) { 399 dentry->d_op = &proc_dentry_operations; 400 d_add(dentry, inode); 401 return NULL; 402 } 403 return ERR_PTR(error); 404 } 405 406 /* 407 * This returns non-zero if at EOF, so that the /proc 408 * root directory can use this and check if it should 409 * continue with the <pid> entries.. 410 * 411 * Note that the VFS-layer doesn't care about the return 412 * value of the readdir() call, as long as it's non-negative 413 * for success.. 414 */ 415 int proc_readdir(struct file * filp, 416 void * dirent, filldir_t filldir) 417 { 418 struct proc_dir_entry * de; 419 unsigned int ino; 420 int i; 421 struct inode *inode = filp->f_dentry->d_inode; 422 int ret = 0; 423 424 lock_kernel(); 425 426 ino = inode->i_ino; 427 de = PDE(inode); 428 if (!de) { 429 ret = -EINVAL; 430 goto out; 431 } 432 i = filp->f_pos; 433 switch (i) { 434 case 0: 435 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 436 goto out; 437 i++; 438 filp->f_pos++; 439 /* fall through */ 440 case 1: 441 if (filldir(dirent, "..", 2, i, 442 parent_ino(filp->f_dentry), 443 DT_DIR) < 0) 444 goto out; 445 i++; 446 filp->f_pos++; 447 /* fall through */ 448 default: 449 de = de->subdir; 450 i -= 2; 451 for (;;) { 452 if (!de) { 453 ret = 1; 454 goto out; 455 } 456 if (!i) 457 break; 458 de = de->next; 459 i--; 460 } 461 462 do { 463 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 464 de->low_ino, de->mode >> 12) < 0) 465 goto out; 466 filp->f_pos++; 467 de = de->next; 468 } while (de); 469 } 470 ret = 1; 471 out: unlock_kernel(); 472 return ret; 473 } 474 475 /* 476 * These are the generic /proc directory operations. They 477 * use the in-memory "struct proc_dir_entry" tree to parse 478 * the /proc directory. 479 */ 480 static struct file_operations proc_dir_operations = { 481 .read = generic_read_dir, 482 .readdir = proc_readdir, 483 }; 484 485 /* 486 * proc directories can do almost nothing.. 487 */ 488 static struct inode_operations proc_dir_inode_operations = { 489 .lookup = proc_lookup, 490 .getattr = proc_getattr, 491 .setattr = proc_notify_change, 492 }; 493 494 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 495 { 496 unsigned int i; 497 498 i = get_inode_number(); 499 if (i == 0) 500 return -EAGAIN; 501 dp->low_ino = i; 502 dp->next = dir->subdir; 503 dp->parent = dir; 504 dir->subdir = dp; 505 if (S_ISDIR(dp->mode)) { 506 if (dp->proc_iops == NULL) { 507 dp->proc_fops = &proc_dir_operations; 508 dp->proc_iops = &proc_dir_inode_operations; 509 } 510 dir->nlink++; 511 } else if (S_ISLNK(dp->mode)) { 512 if (dp->proc_iops == NULL) 513 dp->proc_iops = &proc_link_inode_operations; 514 } else if (S_ISREG(dp->mode)) { 515 if (dp->proc_fops == NULL) 516 dp->proc_fops = &proc_file_operations; 517 if (dp->proc_iops == NULL) 518 dp->proc_iops = &proc_file_inode_operations; 519 } 520 return 0; 521 } 522 523 /* 524 * Kill an inode that got unregistered.. 525 */ 526 static void proc_kill_inodes(struct proc_dir_entry *de) 527 { 528 struct list_head *p; 529 struct super_block *sb = proc_mnt->mnt_sb; 530 531 /* 532 * Actually it's a partial revoke(). 533 */ 534 file_list_lock(); 535 list_for_each(p, &sb->s_files) { 536 struct file * filp = list_entry(p, struct file, f_u.fu_list); 537 struct dentry * dentry = filp->f_dentry; 538 struct inode * inode; 539 struct file_operations *fops; 540 541 if (dentry->d_op != &proc_dentry_operations) 542 continue; 543 inode = dentry->d_inode; 544 if (PDE(inode) != de) 545 continue; 546 fops = filp->f_op; 547 filp->f_op = NULL; 548 fops_put(fops); 549 } 550 file_list_unlock(); 551 } 552 553 static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, 554 const char *name, 555 mode_t mode, 556 nlink_t nlink) 557 { 558 struct proc_dir_entry *ent = NULL; 559 const char *fn = name; 560 int len; 561 562 /* make sure name is valid */ 563 if (!name || !strlen(name)) goto out; 564 565 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 566 goto out; 567 568 /* At this point there must not be any '/' characters beyond *fn */ 569 if (strchr(fn, '/')) 570 goto out; 571 572 len = strlen(fn); 573 574 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 575 if (!ent) goto out; 576 577 memset(ent, 0, sizeof(struct proc_dir_entry)); 578 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 579 ent->name = ((char *) ent) + sizeof(*ent); 580 ent->namelen = len; 581 ent->mode = mode; 582 ent->nlink = nlink; 583 out: 584 return ent; 585 } 586 587 struct proc_dir_entry *proc_symlink(const char *name, 588 struct proc_dir_entry *parent, const char *dest) 589 { 590 struct proc_dir_entry *ent; 591 592 ent = proc_create(&parent,name, 593 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 594 595 if (ent) { 596 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 597 if (ent->data) { 598 strcpy((char*)ent->data,dest); 599 if (proc_register(parent, ent) < 0) { 600 kfree(ent->data); 601 kfree(ent); 602 ent = NULL; 603 } 604 } else { 605 kfree(ent); 606 ent = NULL; 607 } 608 } 609 return ent; 610 } 611 612 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 613 struct proc_dir_entry *parent) 614 { 615 struct proc_dir_entry *ent; 616 617 ent = proc_create(&parent, name, S_IFDIR | mode, 2); 618 if (ent) { 619 ent->proc_fops = &proc_dir_operations; 620 ent->proc_iops = &proc_dir_inode_operations; 621 622 if (proc_register(parent, ent) < 0) { 623 kfree(ent); 624 ent = NULL; 625 } 626 } 627 return ent; 628 } 629 630 struct proc_dir_entry *proc_mkdir(const char *name, 631 struct proc_dir_entry *parent) 632 { 633 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 634 } 635 636 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 637 struct proc_dir_entry *parent) 638 { 639 struct proc_dir_entry *ent; 640 nlink_t nlink; 641 642 if (S_ISDIR(mode)) { 643 if ((mode & S_IALLUGO) == 0) 644 mode |= S_IRUGO | S_IXUGO; 645 nlink = 2; 646 } else { 647 if ((mode & S_IFMT) == 0) 648 mode |= S_IFREG; 649 if ((mode & S_IALLUGO) == 0) 650 mode |= S_IRUGO; 651 nlink = 1; 652 } 653 654 ent = proc_create(&parent,name,mode,nlink); 655 if (ent) { 656 if (S_ISDIR(mode)) { 657 ent->proc_fops = &proc_dir_operations; 658 ent->proc_iops = &proc_dir_inode_operations; 659 } 660 if (proc_register(parent, ent) < 0) { 661 kfree(ent); 662 ent = NULL; 663 } 664 } 665 return ent; 666 } 667 668 void free_proc_entry(struct proc_dir_entry *de) 669 { 670 unsigned int ino = de->low_ino; 671 672 if (ino < PROC_DYNAMIC_FIRST) 673 return; 674 675 release_inode_number(ino); 676 677 if (S_ISLNK(de->mode) && de->data) 678 kfree(de->data); 679 kfree(de); 680 } 681 682 /* 683 * Remove a /proc entry and free it if it's not currently in use. 684 * If it is in use, we set the 'deleted' flag. 685 */ 686 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 687 { 688 struct proc_dir_entry **p; 689 struct proc_dir_entry *de; 690 const char *fn = name; 691 int len; 692 693 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 694 goto out; 695 len = strlen(fn); 696 for (p = &parent->subdir; *p; p=&(*p)->next ) { 697 if (!proc_match(len, fn, *p)) 698 continue; 699 de = *p; 700 *p = de->next; 701 de->next = NULL; 702 if (S_ISDIR(de->mode)) 703 parent->nlink--; 704 proc_kill_inodes(de); 705 de->nlink = 0; 706 WARN_ON(de->subdir); 707 if (!atomic_read(&de->count)) 708 free_proc_entry(de); 709 else { 710 de->deleted = 1; 711 printk("remove_proc_entry: %s/%s busy, count=%d\n", 712 parent->name, de->name, atomic_read(&de->count)); 713 } 714 break; 715 } 716 out: 717 return; 718 } 719