1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/time.h> 13 #include <linux/proc_fs.h> 14 #include <linux/stat.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 #include <linux/smp_lock.h> 18 #include <linux/init.h> 19 #include <linux/idr.h> 20 #include <linux/namei.h> 21 #include <linux/bitops.h> 22 #include <asm/uaccess.h> 23 24 #include "internal.h" 25 26 static ssize_t proc_file_read(struct file *file, char __user *buf, 27 size_t nbytes, loff_t *ppos); 28 static ssize_t proc_file_write(struct file *file, const char __user *buffer, 29 size_t count, loff_t *ppos); 30 static loff_t proc_file_lseek(struct file *, loff_t, int); 31 32 int proc_match(int len, const char *name, struct proc_dir_entry *de) 33 { 34 if (de->namelen != len) 35 return 0; 36 return !memcmp(name, de->name, len); 37 } 38 39 static struct file_operations proc_file_operations = { 40 .llseek = proc_file_lseek, 41 .read = proc_file_read, 42 .write = proc_file_write, 43 }; 44 45 /* buffer size is one page but our output routines use some slack for overruns */ 46 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 47 48 static ssize_t 49 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 50 loff_t *ppos) 51 { 52 struct inode * inode = file->f_dentry->d_inode; 53 char *page; 54 ssize_t retval=0; 55 int eof=0; 56 ssize_t n, count; 57 char *start; 58 struct proc_dir_entry * dp; 59 unsigned long long pos; 60 61 /* 62 * Gaah, please just use "seq_file" instead. The legacy /proc 63 * interfaces cut loff_t down to off_t for reads, and ignore 64 * the offset entirely for writes.. 65 */ 66 pos = *ppos; 67 if (pos > MAX_NON_LFS) 68 return 0; 69 if (nbytes > MAX_NON_LFS - pos) 70 nbytes = MAX_NON_LFS - pos; 71 72 dp = PDE(inode); 73 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 74 return -ENOMEM; 75 76 while ((nbytes > 0) && !eof) { 77 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 78 79 start = NULL; 80 if (dp->get_info) { 81 /* Handle old net routines */ 82 n = dp->get_info(page, &start, *ppos, count); 83 if (n < count) 84 eof = 1; 85 } else if (dp->read_proc) { 86 /* 87 * How to be a proc read function 88 * ------------------------------ 89 * Prototype: 90 * int f(char *buffer, char **start, off_t offset, 91 * int count, int *peof, void *dat) 92 * 93 * Assume that the buffer is "count" bytes in size. 94 * 95 * If you know you have supplied all the data you 96 * have, set *peof. 97 * 98 * You have three ways to return data: 99 * 0) Leave *start = NULL. (This is the default.) 100 * Put the data of the requested offset at that 101 * offset within the buffer. Return the number (n) 102 * of bytes there are from the beginning of the 103 * buffer up to the last byte of data. If the 104 * number of supplied bytes (= n - offset) is 105 * greater than zero and you didn't signal eof 106 * and the reader is prepared to take more data 107 * you will be called again with the requested 108 * offset advanced by the number of bytes 109 * absorbed. This interface is useful for files 110 * no larger than the buffer. 111 * 1) Set *start = an unsigned long value less than 112 * the buffer address but greater than zero. 113 * Put the data of the requested offset at the 114 * beginning of the buffer. Return the number of 115 * bytes of data placed there. If this number is 116 * greater than zero and you didn't signal eof 117 * and the reader is prepared to take more data 118 * you will be called again with the requested 119 * offset advanced by *start. This interface is 120 * useful when you have a large file consisting 121 * of a series of blocks which you want to count 122 * and return as wholes. 123 * (Hack by Paul.Russell@rustcorp.com.au) 124 * 2) Set *start = an address within the buffer. 125 * Put the data of the requested offset at *start. 126 * Return the number of bytes of data placed there. 127 * If this number is greater than zero and you 128 * didn't signal eof and the reader is prepared to 129 * take more data you will be called again with the 130 * requested offset advanced by the number of bytes 131 * absorbed. 132 */ 133 n = dp->read_proc(page, &start, *ppos, 134 count, &eof, dp->data); 135 } else 136 break; 137 138 if (n == 0) /* end of file */ 139 break; 140 if (n < 0) { /* error */ 141 if (retval == 0) 142 retval = n; 143 break; 144 } 145 146 if (start == NULL) { 147 if (n > PAGE_SIZE) { 148 printk(KERN_ERR 149 "proc_file_read: Apparent buffer overflow!\n"); 150 n = PAGE_SIZE; 151 } 152 n -= *ppos; 153 if (n <= 0) 154 break; 155 if (n > count) 156 n = count; 157 start = page + *ppos; 158 } else if (start < page) { 159 if (n > PAGE_SIZE) { 160 printk(KERN_ERR 161 "proc_file_read: Apparent buffer overflow!\n"); 162 n = PAGE_SIZE; 163 } 164 if (n > count) { 165 /* 166 * Don't reduce n because doing so might 167 * cut off part of a data block. 168 */ 169 printk(KERN_WARNING 170 "proc_file_read: Read count exceeded\n"); 171 } 172 } else /* start >= page */ { 173 unsigned long startoff = (unsigned long)(start - page); 174 if (n > (PAGE_SIZE - startoff)) { 175 printk(KERN_ERR 176 "proc_file_read: Apparent buffer overflow!\n"); 177 n = PAGE_SIZE - startoff; 178 } 179 if (n > count) 180 n = count; 181 } 182 183 n -= copy_to_user(buf, start < page ? page : start, n); 184 if (n == 0) { 185 if (retval == 0) 186 retval = -EFAULT; 187 break; 188 } 189 190 *ppos += start < page ? (unsigned long)start : n; 191 nbytes -= n; 192 buf += n; 193 retval += n; 194 } 195 free_page((unsigned long) page); 196 return retval; 197 } 198 199 static ssize_t 200 proc_file_write(struct file *file, const char __user *buffer, 201 size_t count, loff_t *ppos) 202 { 203 struct inode *inode = file->f_dentry->d_inode; 204 struct proc_dir_entry * dp; 205 206 dp = PDE(inode); 207 208 if (!dp->write_proc) 209 return -EIO; 210 211 /* FIXME: does this routine need ppos? probably... */ 212 return dp->write_proc(file, buffer, count, dp->data); 213 } 214 215 216 static loff_t 217 proc_file_lseek(struct file *file, loff_t offset, int orig) 218 { 219 loff_t retval = -EINVAL; 220 switch (orig) { 221 case 1: 222 offset += file->f_pos; 223 /* fallthrough */ 224 case 0: 225 if (offset < 0 || offset > MAX_NON_LFS) 226 break; 227 file->f_pos = retval = offset; 228 } 229 return retval; 230 } 231 232 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 233 { 234 struct inode *inode = dentry->d_inode; 235 struct proc_dir_entry *de = PDE(inode); 236 int error; 237 238 error = inode_change_ok(inode, iattr); 239 if (error) 240 goto out; 241 242 error = inode_setattr(inode, iattr); 243 if (error) 244 goto out; 245 246 de->uid = inode->i_uid; 247 de->gid = inode->i_gid; 248 de->mode = inode->i_mode; 249 out: 250 return error; 251 } 252 253 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 254 struct kstat *stat) 255 { 256 struct inode *inode = dentry->d_inode; 257 struct proc_dir_entry *de = PROC_I(inode)->pde; 258 if (de && de->nlink) 259 inode->i_nlink = de->nlink; 260 261 generic_fillattr(inode, stat); 262 return 0; 263 } 264 265 static struct inode_operations proc_file_inode_operations = { 266 .setattr = proc_notify_change, 267 }; 268 269 /* 270 * This function parses a name such as "tty/driver/serial", and 271 * returns the struct proc_dir_entry for "/proc/tty/driver", and 272 * returns "serial" in residual. 273 */ 274 static int xlate_proc_name(const char *name, 275 struct proc_dir_entry **ret, const char **residual) 276 { 277 const char *cp = name, *next; 278 struct proc_dir_entry *de; 279 int len; 280 281 de = &proc_root; 282 while (1) { 283 next = strchr(cp, '/'); 284 if (!next) 285 break; 286 287 len = next - cp; 288 for (de = de->subdir; de ; de = de->next) { 289 if (proc_match(len, cp, de)) 290 break; 291 } 292 if (!de) 293 return -ENOENT; 294 cp += len + 1; 295 } 296 *residual = cp; 297 *ret = de; 298 return 0; 299 } 300 301 static DEFINE_IDR(proc_inum_idr); 302 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 303 304 #define PROC_DYNAMIC_FIRST 0xF0000000UL 305 306 /* 307 * Return an inode number between PROC_DYNAMIC_FIRST and 308 * 0xffffffff, or zero on failure. 309 */ 310 static unsigned int get_inode_number(void) 311 { 312 int i, inum = 0; 313 int error; 314 315 retry: 316 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 317 return 0; 318 319 spin_lock(&proc_inum_lock); 320 error = idr_get_new(&proc_inum_idr, NULL, &i); 321 spin_unlock(&proc_inum_lock); 322 if (error == -EAGAIN) 323 goto retry; 324 else if (error) 325 return 0; 326 327 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 328 329 /* inum will never be more than 0xf0ffffff, so no check 330 * for overflow. 331 */ 332 333 return inum; 334 } 335 336 static void release_inode_number(unsigned int inum) 337 { 338 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 339 340 spin_lock(&proc_inum_lock); 341 idr_remove(&proc_inum_idr, id); 342 spin_unlock(&proc_inum_lock); 343 } 344 345 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 346 { 347 nd_set_link(nd, PDE(dentry->d_inode)->data); 348 return NULL; 349 } 350 351 static struct inode_operations proc_link_inode_operations = { 352 .readlink = generic_readlink, 353 .follow_link = proc_follow_link, 354 }; 355 356 /* 357 * As some entries in /proc are volatile, we want to 358 * get rid of unused dentries. This could be made 359 * smarter: we could keep a "volatile" flag in the 360 * inode to indicate which ones to keep. 361 */ 362 static int proc_delete_dentry(struct dentry * dentry) 363 { 364 return 1; 365 } 366 367 static struct dentry_operations proc_dentry_operations = 368 { 369 .d_delete = proc_delete_dentry, 370 }; 371 372 /* 373 * Don't create negative dentries here, return -ENOENT by hand 374 * instead. 375 */ 376 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 377 { 378 struct inode *inode = NULL; 379 struct proc_dir_entry * de; 380 int error = -ENOENT; 381 382 lock_kernel(); 383 de = PDE(dir); 384 if (de) { 385 for (de = de->subdir; de ; de = de->next) { 386 if (de->namelen != dentry->d_name.len) 387 continue; 388 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 389 unsigned int ino = de->low_ino; 390 391 error = -EINVAL; 392 inode = proc_get_inode(dir->i_sb, ino, de); 393 break; 394 } 395 } 396 } 397 unlock_kernel(); 398 399 if (inode) { 400 dentry->d_op = &proc_dentry_operations; 401 d_add(dentry, inode); 402 return NULL; 403 } 404 return ERR_PTR(error); 405 } 406 407 /* 408 * This returns non-zero if at EOF, so that the /proc 409 * root directory can use this and check if it should 410 * continue with the <pid> entries.. 411 * 412 * Note that the VFS-layer doesn't care about the return 413 * value of the readdir() call, as long as it's non-negative 414 * for success.. 415 */ 416 int proc_readdir(struct file * filp, 417 void * dirent, filldir_t filldir) 418 { 419 struct proc_dir_entry * de; 420 unsigned int ino; 421 int i; 422 struct inode *inode = filp->f_dentry->d_inode; 423 int ret = 0; 424 425 lock_kernel(); 426 427 ino = inode->i_ino; 428 de = PDE(inode); 429 if (!de) { 430 ret = -EINVAL; 431 goto out; 432 } 433 i = filp->f_pos; 434 switch (i) { 435 case 0: 436 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 437 goto out; 438 i++; 439 filp->f_pos++; 440 /* fall through */ 441 case 1: 442 if (filldir(dirent, "..", 2, i, 443 parent_ino(filp->f_dentry), 444 DT_DIR) < 0) 445 goto out; 446 i++; 447 filp->f_pos++; 448 /* fall through */ 449 default: 450 de = de->subdir; 451 i -= 2; 452 for (;;) { 453 if (!de) { 454 ret = 1; 455 goto out; 456 } 457 if (!i) 458 break; 459 de = de->next; 460 i--; 461 } 462 463 do { 464 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 465 de->low_ino, de->mode >> 12) < 0) 466 goto out; 467 filp->f_pos++; 468 de = de->next; 469 } while (de); 470 } 471 ret = 1; 472 out: unlock_kernel(); 473 return ret; 474 } 475 476 /* 477 * These are the generic /proc directory operations. They 478 * use the in-memory "struct proc_dir_entry" tree to parse 479 * the /proc directory. 480 */ 481 static struct file_operations proc_dir_operations = { 482 .read = generic_read_dir, 483 .readdir = proc_readdir, 484 }; 485 486 /* 487 * proc directories can do almost nothing.. 488 */ 489 static struct inode_operations proc_dir_inode_operations = { 490 .lookup = proc_lookup, 491 .getattr = proc_getattr, 492 .setattr = proc_notify_change, 493 }; 494 495 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 496 { 497 unsigned int i; 498 499 i = get_inode_number(); 500 if (i == 0) 501 return -EAGAIN; 502 dp->low_ino = i; 503 dp->next = dir->subdir; 504 dp->parent = dir; 505 dir->subdir = dp; 506 if (S_ISDIR(dp->mode)) { 507 if (dp->proc_iops == NULL) { 508 dp->proc_fops = &proc_dir_operations; 509 dp->proc_iops = &proc_dir_inode_operations; 510 } 511 dir->nlink++; 512 } else if (S_ISLNK(dp->mode)) { 513 if (dp->proc_iops == NULL) 514 dp->proc_iops = &proc_link_inode_operations; 515 } else if (S_ISREG(dp->mode)) { 516 if (dp->proc_fops == NULL) 517 dp->proc_fops = &proc_file_operations; 518 if (dp->proc_iops == NULL) 519 dp->proc_iops = &proc_file_inode_operations; 520 } 521 return 0; 522 } 523 524 /* 525 * Kill an inode that got unregistered.. 526 */ 527 static void proc_kill_inodes(struct proc_dir_entry *de) 528 { 529 struct list_head *p; 530 struct super_block *sb = proc_mnt->mnt_sb; 531 532 /* 533 * Actually it's a partial revoke(). 534 */ 535 file_list_lock(); 536 list_for_each(p, &sb->s_files) { 537 struct file * filp = list_entry(p, struct file, f_u.fu_list); 538 struct dentry * dentry = filp->f_dentry; 539 struct inode * inode; 540 struct file_operations *fops; 541 542 if (dentry->d_op != &proc_dentry_operations) 543 continue; 544 inode = dentry->d_inode; 545 if (PDE(inode) != de) 546 continue; 547 fops = filp->f_op; 548 filp->f_op = NULL; 549 fops_put(fops); 550 } 551 file_list_unlock(); 552 } 553 554 static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, 555 const char *name, 556 mode_t mode, 557 nlink_t nlink) 558 { 559 struct proc_dir_entry *ent = NULL; 560 const char *fn = name; 561 int len; 562 563 /* make sure name is valid */ 564 if (!name || !strlen(name)) goto out; 565 566 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 567 goto out; 568 569 /* At this point there must not be any '/' characters beyond *fn */ 570 if (strchr(fn, '/')) 571 goto out; 572 573 len = strlen(fn); 574 575 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 576 if (!ent) goto out; 577 578 memset(ent, 0, sizeof(struct proc_dir_entry)); 579 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 580 ent->name = ((char *) ent) + sizeof(*ent); 581 ent->namelen = len; 582 ent->mode = mode; 583 ent->nlink = nlink; 584 out: 585 return ent; 586 } 587 588 struct proc_dir_entry *proc_symlink(const char *name, 589 struct proc_dir_entry *parent, const char *dest) 590 { 591 struct proc_dir_entry *ent; 592 593 ent = proc_create(&parent,name, 594 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 595 596 if (ent) { 597 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 598 if (ent->data) { 599 strcpy((char*)ent->data,dest); 600 if (proc_register(parent, ent) < 0) { 601 kfree(ent->data); 602 kfree(ent); 603 ent = NULL; 604 } 605 } else { 606 kfree(ent); 607 ent = NULL; 608 } 609 } 610 return ent; 611 } 612 613 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 614 struct proc_dir_entry *parent) 615 { 616 struct proc_dir_entry *ent; 617 618 ent = proc_create(&parent, name, S_IFDIR | mode, 2); 619 if (ent) { 620 ent->proc_fops = &proc_dir_operations; 621 ent->proc_iops = &proc_dir_inode_operations; 622 623 if (proc_register(parent, ent) < 0) { 624 kfree(ent); 625 ent = NULL; 626 } 627 } 628 return ent; 629 } 630 631 struct proc_dir_entry *proc_mkdir(const char *name, 632 struct proc_dir_entry *parent) 633 { 634 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 635 } 636 637 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 638 struct proc_dir_entry *parent) 639 { 640 struct proc_dir_entry *ent; 641 nlink_t nlink; 642 643 if (S_ISDIR(mode)) { 644 if ((mode & S_IALLUGO) == 0) 645 mode |= S_IRUGO | S_IXUGO; 646 nlink = 2; 647 } else { 648 if ((mode & S_IFMT) == 0) 649 mode |= S_IFREG; 650 if ((mode & S_IALLUGO) == 0) 651 mode |= S_IRUGO; 652 nlink = 1; 653 } 654 655 ent = proc_create(&parent,name,mode,nlink); 656 if (ent) { 657 if (S_ISDIR(mode)) { 658 ent->proc_fops = &proc_dir_operations; 659 ent->proc_iops = &proc_dir_inode_operations; 660 } 661 if (proc_register(parent, ent) < 0) { 662 kfree(ent); 663 ent = NULL; 664 } 665 } 666 return ent; 667 } 668 669 void free_proc_entry(struct proc_dir_entry *de) 670 { 671 unsigned int ino = de->low_ino; 672 673 if (ino < PROC_DYNAMIC_FIRST) 674 return; 675 676 release_inode_number(ino); 677 678 if (S_ISLNK(de->mode) && de->data) 679 kfree(de->data); 680 kfree(de); 681 } 682 683 /* 684 * Remove a /proc entry and free it if it's not currently in use. 685 * If it is in use, we set the 'deleted' flag. 686 */ 687 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 688 { 689 struct proc_dir_entry **p; 690 struct proc_dir_entry *de; 691 const char *fn = name; 692 int len; 693 694 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 695 goto out; 696 len = strlen(fn); 697 for (p = &parent->subdir; *p; p=&(*p)->next ) { 698 if (!proc_match(len, fn, *p)) 699 continue; 700 de = *p; 701 *p = de->next; 702 de->next = NULL; 703 if (S_ISDIR(de->mode)) 704 parent->nlink--; 705 proc_kill_inodes(de); 706 de->nlink = 0; 707 WARN_ON(de->subdir); 708 if (!atomic_read(&de->count)) 709 free_proc_entry(de); 710 else { 711 de->deleted = 1; 712 printk("remove_proc_entry: %s/%s busy, count=%d\n", 713 parent->name, de->name, atomic_read(&de->count)); 714 } 715 break; 716 } 717 out: 718 return; 719 } 720