1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/time.h> 13 #include <linux/proc_fs.h> 14 #include <linux/stat.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 #include <linux/smp_lock.h> 18 #include <linux/init.h> 19 #include <linux/idr.h> 20 #include <linux/namei.h> 21 #include <linux/bitops.h> 22 #include <asm/uaccess.h> 23 24 static ssize_t proc_file_read(struct file *file, char __user *buf, 25 size_t nbytes, loff_t *ppos); 26 static ssize_t proc_file_write(struct file *file, const char __user *buffer, 27 size_t count, loff_t *ppos); 28 static loff_t proc_file_lseek(struct file *, loff_t, int); 29 30 int proc_match(int len, const char *name, struct proc_dir_entry *de) 31 { 32 if (de->namelen != len) 33 return 0; 34 return !memcmp(name, de->name, len); 35 } 36 37 static struct file_operations proc_file_operations = { 38 .llseek = proc_file_lseek, 39 .read = proc_file_read, 40 .write = proc_file_write, 41 }; 42 43 /* buffer size is one page but our output routines use some slack for overruns */ 44 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 45 46 static ssize_t 47 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 48 loff_t *ppos) 49 { 50 struct inode * inode = file->f_dentry->d_inode; 51 char *page; 52 ssize_t retval=0; 53 int eof=0; 54 ssize_t n, count; 55 char *start; 56 struct proc_dir_entry * dp; 57 58 dp = PDE(inode); 59 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 60 return -ENOMEM; 61 62 while ((nbytes > 0) && !eof) { 63 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 64 65 start = NULL; 66 if (dp->get_info) { 67 /* Handle old net routines */ 68 n = dp->get_info(page, &start, *ppos, count); 69 if (n < count) 70 eof = 1; 71 } else if (dp->read_proc) { 72 /* 73 * How to be a proc read function 74 * ------------------------------ 75 * Prototype: 76 * int f(char *buffer, char **start, off_t offset, 77 * int count, int *peof, void *dat) 78 * 79 * Assume that the buffer is "count" bytes in size. 80 * 81 * If you know you have supplied all the data you 82 * have, set *peof. 83 * 84 * You have three ways to return data: 85 * 0) Leave *start = NULL. (This is the default.) 86 * Put the data of the requested offset at that 87 * offset within the buffer. Return the number (n) 88 * of bytes there are from the beginning of the 89 * buffer up to the last byte of data. If the 90 * number of supplied bytes (= n - offset) is 91 * greater than zero and you didn't signal eof 92 * and the reader is prepared to take more data 93 * you will be called again with the requested 94 * offset advanced by the number of bytes 95 * absorbed. This interface is useful for files 96 * no larger than the buffer. 97 * 1) Set *start = an unsigned long value less than 98 * the buffer address but greater than zero. 99 * Put the data of the requested offset at the 100 * beginning of the buffer. Return the number of 101 * bytes of data placed there. If this number is 102 * greater than zero and you didn't signal eof 103 * and the reader is prepared to take more data 104 * you will be called again with the requested 105 * offset advanced by *start. This interface is 106 * useful when you have a large file consisting 107 * of a series of blocks which you want to count 108 * and return as wholes. 109 * (Hack by Paul.Russell@rustcorp.com.au) 110 * 2) Set *start = an address within the buffer. 111 * Put the data of the requested offset at *start. 112 * Return the number of bytes of data placed there. 113 * If this number is greater than zero and you 114 * didn't signal eof and the reader is prepared to 115 * take more data you will be called again with the 116 * requested offset advanced by the number of bytes 117 * absorbed. 118 */ 119 n = dp->read_proc(page, &start, *ppos, 120 count, &eof, dp->data); 121 } else 122 break; 123 124 if (n == 0) /* end of file */ 125 break; 126 if (n < 0) { /* error */ 127 if (retval == 0) 128 retval = n; 129 break; 130 } 131 132 if (start == NULL) { 133 if (n > PAGE_SIZE) { 134 printk(KERN_ERR 135 "proc_file_read: Apparent buffer overflow!\n"); 136 n = PAGE_SIZE; 137 } 138 n -= *ppos; 139 if (n <= 0) 140 break; 141 if (n > count) 142 n = count; 143 start = page + *ppos; 144 } else if (start < page) { 145 if (n > PAGE_SIZE) { 146 printk(KERN_ERR 147 "proc_file_read: Apparent buffer overflow!\n"); 148 n = PAGE_SIZE; 149 } 150 if (n > count) { 151 /* 152 * Don't reduce n because doing so might 153 * cut off part of a data block. 154 */ 155 printk(KERN_WARNING 156 "proc_file_read: Read count exceeded\n"); 157 } 158 } else /* start >= page */ { 159 unsigned long startoff = (unsigned long)(start - page); 160 if (n > (PAGE_SIZE - startoff)) { 161 printk(KERN_ERR 162 "proc_file_read: Apparent buffer overflow!\n"); 163 n = PAGE_SIZE - startoff; 164 } 165 if (n > count) 166 n = count; 167 } 168 169 n -= copy_to_user(buf, start < page ? page : start, n); 170 if (n == 0) { 171 if (retval == 0) 172 retval = -EFAULT; 173 break; 174 } 175 176 *ppos += start < page ? (unsigned long)start : n; 177 nbytes -= n; 178 buf += n; 179 retval += n; 180 } 181 free_page((unsigned long) page); 182 return retval; 183 } 184 185 static ssize_t 186 proc_file_write(struct file *file, const char __user *buffer, 187 size_t count, loff_t *ppos) 188 { 189 struct inode *inode = file->f_dentry->d_inode; 190 struct proc_dir_entry * dp; 191 192 dp = PDE(inode); 193 194 if (!dp->write_proc) 195 return -EIO; 196 197 /* FIXME: does this routine need ppos? probably... */ 198 return dp->write_proc(file, buffer, count, dp->data); 199 } 200 201 202 static loff_t 203 proc_file_lseek(struct file *file, loff_t offset, int orig) 204 { 205 lock_kernel(); 206 207 switch (orig) { 208 case 0: 209 if (offset < 0) 210 goto out; 211 file->f_pos = offset; 212 unlock_kernel(); 213 return(file->f_pos); 214 case 1: 215 if (offset + file->f_pos < 0) 216 goto out; 217 file->f_pos += offset; 218 unlock_kernel(); 219 return(file->f_pos); 220 case 2: 221 goto out; 222 default: 223 goto out; 224 } 225 226 out: 227 unlock_kernel(); 228 return -EINVAL; 229 } 230 231 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 232 { 233 struct inode *inode = dentry->d_inode; 234 struct proc_dir_entry *de = PDE(inode); 235 int error; 236 237 error = inode_change_ok(inode, iattr); 238 if (error) 239 goto out; 240 241 error = inode_setattr(inode, iattr); 242 if (error) 243 goto out; 244 245 de->uid = inode->i_uid; 246 de->gid = inode->i_gid; 247 de->mode = inode->i_mode; 248 out: 249 return error; 250 } 251 252 static struct inode_operations proc_file_inode_operations = { 253 .setattr = proc_notify_change, 254 }; 255 256 /* 257 * This function parses a name such as "tty/driver/serial", and 258 * returns the struct proc_dir_entry for "/proc/tty/driver", and 259 * returns "serial" in residual. 260 */ 261 static int xlate_proc_name(const char *name, 262 struct proc_dir_entry **ret, const char **residual) 263 { 264 const char *cp = name, *next; 265 struct proc_dir_entry *de; 266 int len; 267 268 de = &proc_root; 269 while (1) { 270 next = strchr(cp, '/'); 271 if (!next) 272 break; 273 274 len = next - cp; 275 for (de = de->subdir; de ; de = de->next) { 276 if (proc_match(len, cp, de)) 277 break; 278 } 279 if (!de) 280 return -ENOENT; 281 cp += len + 1; 282 } 283 *residual = cp; 284 *ret = de; 285 return 0; 286 } 287 288 static DEFINE_IDR(proc_inum_idr); 289 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 290 291 #define PROC_DYNAMIC_FIRST 0xF0000000UL 292 293 /* 294 * Return an inode number between PROC_DYNAMIC_FIRST and 295 * 0xffffffff, or zero on failure. 296 */ 297 static unsigned int get_inode_number(void) 298 { 299 int i, inum = 0; 300 int error; 301 302 retry: 303 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 304 return 0; 305 306 spin_lock(&proc_inum_lock); 307 error = idr_get_new(&proc_inum_idr, NULL, &i); 308 spin_unlock(&proc_inum_lock); 309 if (error == -EAGAIN) 310 goto retry; 311 else if (error) 312 return 0; 313 314 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 315 316 /* inum will never be more than 0xf0ffffff, so no check 317 * for overflow. 318 */ 319 320 return inum; 321 } 322 323 static void release_inode_number(unsigned int inum) 324 { 325 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 326 327 spin_lock(&proc_inum_lock); 328 idr_remove(&proc_inum_idr, id); 329 spin_unlock(&proc_inum_lock); 330 } 331 332 static int proc_follow_link(struct dentry *dentry, struct nameidata *nd) 333 { 334 nd_set_link(nd, PDE(dentry->d_inode)->data); 335 return 0; 336 } 337 338 static struct inode_operations proc_link_inode_operations = { 339 .readlink = generic_readlink, 340 .follow_link = proc_follow_link, 341 }; 342 343 /* 344 * As some entries in /proc are volatile, we want to 345 * get rid of unused dentries. This could be made 346 * smarter: we could keep a "volatile" flag in the 347 * inode to indicate which ones to keep. 348 */ 349 static int proc_delete_dentry(struct dentry * dentry) 350 { 351 return 1; 352 } 353 354 static struct dentry_operations proc_dentry_operations = 355 { 356 .d_delete = proc_delete_dentry, 357 }; 358 359 /* 360 * Don't create negative dentries here, return -ENOENT by hand 361 * instead. 362 */ 363 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 364 { 365 struct inode *inode = NULL; 366 struct proc_dir_entry * de; 367 int error = -ENOENT; 368 369 lock_kernel(); 370 de = PDE(dir); 371 if (de) { 372 for (de = de->subdir; de ; de = de->next) { 373 if (de->namelen != dentry->d_name.len) 374 continue; 375 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 376 unsigned int ino = de->low_ino; 377 378 error = -EINVAL; 379 inode = proc_get_inode(dir->i_sb, ino, de); 380 break; 381 } 382 } 383 } 384 unlock_kernel(); 385 386 if (inode) { 387 dentry->d_op = &proc_dentry_operations; 388 d_add(dentry, inode); 389 return NULL; 390 } 391 return ERR_PTR(error); 392 } 393 394 /* 395 * This returns non-zero if at EOF, so that the /proc 396 * root directory can use this and check if it should 397 * continue with the <pid> entries.. 398 * 399 * Note that the VFS-layer doesn't care about the return 400 * value of the readdir() call, as long as it's non-negative 401 * for success.. 402 */ 403 int proc_readdir(struct file * filp, 404 void * dirent, filldir_t filldir) 405 { 406 struct proc_dir_entry * de; 407 unsigned int ino; 408 int i; 409 struct inode *inode = filp->f_dentry->d_inode; 410 int ret = 0; 411 412 lock_kernel(); 413 414 ino = inode->i_ino; 415 de = PDE(inode); 416 if (!de) { 417 ret = -EINVAL; 418 goto out; 419 } 420 i = filp->f_pos; 421 switch (i) { 422 case 0: 423 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 424 goto out; 425 i++; 426 filp->f_pos++; 427 /* fall through */ 428 case 1: 429 if (filldir(dirent, "..", 2, i, 430 parent_ino(filp->f_dentry), 431 DT_DIR) < 0) 432 goto out; 433 i++; 434 filp->f_pos++; 435 /* fall through */ 436 default: 437 de = de->subdir; 438 i -= 2; 439 for (;;) { 440 if (!de) { 441 ret = 1; 442 goto out; 443 } 444 if (!i) 445 break; 446 de = de->next; 447 i--; 448 } 449 450 do { 451 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 452 de->low_ino, de->mode >> 12) < 0) 453 goto out; 454 filp->f_pos++; 455 de = de->next; 456 } while (de); 457 } 458 ret = 1; 459 out: unlock_kernel(); 460 return ret; 461 } 462 463 /* 464 * These are the generic /proc directory operations. They 465 * use the in-memory "struct proc_dir_entry" tree to parse 466 * the /proc directory. 467 */ 468 static struct file_operations proc_dir_operations = { 469 .read = generic_read_dir, 470 .readdir = proc_readdir, 471 }; 472 473 /* 474 * proc directories can do almost nothing.. 475 */ 476 static struct inode_operations proc_dir_inode_operations = { 477 .lookup = proc_lookup, 478 .setattr = proc_notify_change, 479 }; 480 481 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 482 { 483 unsigned int i; 484 485 i = get_inode_number(); 486 if (i == 0) 487 return -EAGAIN; 488 dp->low_ino = i; 489 dp->next = dir->subdir; 490 dp->parent = dir; 491 dir->subdir = dp; 492 if (S_ISDIR(dp->mode)) { 493 if (dp->proc_iops == NULL) { 494 dp->proc_fops = &proc_dir_operations; 495 dp->proc_iops = &proc_dir_inode_operations; 496 } 497 dir->nlink++; 498 } else if (S_ISLNK(dp->mode)) { 499 if (dp->proc_iops == NULL) 500 dp->proc_iops = &proc_link_inode_operations; 501 } else if (S_ISREG(dp->mode)) { 502 if (dp->proc_fops == NULL) 503 dp->proc_fops = &proc_file_operations; 504 if (dp->proc_iops == NULL) 505 dp->proc_iops = &proc_file_inode_operations; 506 } 507 return 0; 508 } 509 510 /* 511 * Kill an inode that got unregistered.. 512 */ 513 static void proc_kill_inodes(struct proc_dir_entry *de) 514 { 515 struct list_head *p; 516 struct super_block *sb = proc_mnt->mnt_sb; 517 518 /* 519 * Actually it's a partial revoke(). 520 */ 521 file_list_lock(); 522 list_for_each(p, &sb->s_files) { 523 struct file * filp = list_entry(p, struct file, f_list); 524 struct dentry * dentry = filp->f_dentry; 525 struct inode * inode; 526 struct file_operations *fops; 527 528 if (dentry->d_op != &proc_dentry_operations) 529 continue; 530 inode = dentry->d_inode; 531 if (PDE(inode) != de) 532 continue; 533 fops = filp->f_op; 534 filp->f_op = NULL; 535 fops_put(fops); 536 } 537 file_list_unlock(); 538 } 539 540 static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, 541 const char *name, 542 mode_t mode, 543 nlink_t nlink) 544 { 545 struct proc_dir_entry *ent = NULL; 546 const char *fn = name; 547 int len; 548 549 /* make sure name is valid */ 550 if (!name || !strlen(name)) goto out; 551 552 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 553 goto out; 554 555 /* At this point there must not be any '/' characters beyond *fn */ 556 if (strchr(fn, '/')) 557 goto out; 558 559 len = strlen(fn); 560 561 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 562 if (!ent) goto out; 563 564 memset(ent, 0, sizeof(struct proc_dir_entry)); 565 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 566 ent->name = ((char *) ent) + sizeof(*ent); 567 ent->namelen = len; 568 ent->mode = mode; 569 ent->nlink = nlink; 570 out: 571 return ent; 572 } 573 574 struct proc_dir_entry *proc_symlink(const char *name, 575 struct proc_dir_entry *parent, const char *dest) 576 { 577 struct proc_dir_entry *ent; 578 579 ent = proc_create(&parent,name, 580 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 581 582 if (ent) { 583 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 584 if (ent->data) { 585 strcpy((char*)ent->data,dest); 586 if (proc_register(parent, ent) < 0) { 587 kfree(ent->data); 588 kfree(ent); 589 ent = NULL; 590 } 591 } else { 592 kfree(ent); 593 ent = NULL; 594 } 595 } 596 return ent; 597 } 598 599 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 600 struct proc_dir_entry *parent) 601 { 602 struct proc_dir_entry *ent; 603 604 ent = proc_create(&parent, name, S_IFDIR | mode, 2); 605 if (ent) { 606 ent->proc_fops = &proc_dir_operations; 607 ent->proc_iops = &proc_dir_inode_operations; 608 609 if (proc_register(parent, ent) < 0) { 610 kfree(ent); 611 ent = NULL; 612 } 613 } 614 return ent; 615 } 616 617 struct proc_dir_entry *proc_mkdir(const char *name, 618 struct proc_dir_entry *parent) 619 { 620 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 621 } 622 623 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 624 struct proc_dir_entry *parent) 625 { 626 struct proc_dir_entry *ent; 627 nlink_t nlink; 628 629 if (S_ISDIR(mode)) { 630 if ((mode & S_IALLUGO) == 0) 631 mode |= S_IRUGO | S_IXUGO; 632 nlink = 2; 633 } else { 634 if ((mode & S_IFMT) == 0) 635 mode |= S_IFREG; 636 if ((mode & S_IALLUGO) == 0) 637 mode |= S_IRUGO; 638 nlink = 1; 639 } 640 641 ent = proc_create(&parent,name,mode,nlink); 642 if (ent) { 643 if (S_ISDIR(mode)) { 644 ent->proc_fops = &proc_dir_operations; 645 ent->proc_iops = &proc_dir_inode_operations; 646 } 647 if (proc_register(parent, ent) < 0) { 648 kfree(ent); 649 ent = NULL; 650 } 651 } 652 return ent; 653 } 654 655 void free_proc_entry(struct proc_dir_entry *de) 656 { 657 unsigned int ino = de->low_ino; 658 659 if (ino < PROC_DYNAMIC_FIRST) 660 return; 661 662 release_inode_number(ino); 663 664 if (S_ISLNK(de->mode) && de->data) 665 kfree(de->data); 666 kfree(de); 667 } 668 669 /* 670 * Remove a /proc entry and free it if it's not currently in use. 671 * If it is in use, we set the 'deleted' flag. 672 */ 673 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 674 { 675 struct proc_dir_entry **p; 676 struct proc_dir_entry *de; 677 const char *fn = name; 678 int len; 679 680 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 681 goto out; 682 len = strlen(fn); 683 for (p = &parent->subdir; *p; p=&(*p)->next ) { 684 if (!proc_match(len, fn, *p)) 685 continue; 686 de = *p; 687 *p = de->next; 688 de->next = NULL; 689 if (S_ISDIR(de->mode)) 690 parent->nlink--; 691 proc_kill_inodes(de); 692 de->nlink = 0; 693 WARN_ON(de->subdir); 694 if (!atomic_read(&de->count)) 695 free_proc_entry(de); 696 else { 697 de->deleted = 1; 698 printk("remove_proc_entry: %s/%s busy, count=%d\n", 699 parent->name, de->name, atomic_read(&de->count)); 700 } 701 break; 702 } 703 out: 704 return; 705 } 706