1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/cache.h> 12 #include <linux/errno.h> 13 #include <linux/time.h> 14 #include <linux/proc_fs.h> 15 #include <linux/stat.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/namei.h> 19 #include <linux/slab.h> 20 #include <linux/printk.h> 21 #include <linux/mount.h> 22 #include <linux/init.h> 23 #include <linux/idr.h> 24 #include <linux/bitops.h> 25 #include <linux/spinlock.h> 26 #include <linux/completion.h> 27 #include <linux/uaccess.h> 28 #include <linux/seq_file.h> 29 30 #include "internal.h" 31 32 static DEFINE_RWLOCK(proc_subdir_lock); 33 34 struct kmem_cache *proc_dir_entry_cache __ro_after_init; 35 36 void pde_free(struct proc_dir_entry *pde) 37 { 38 if (S_ISLNK(pde->mode)) 39 kfree(pde->data); 40 if (pde->name != pde->inline_name) 41 kfree(pde->name); 42 kmem_cache_free(proc_dir_entry_cache, pde); 43 } 44 45 static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len) 46 { 47 if (len < de->namelen) 48 return -1; 49 if (len > de->namelen) 50 return 1; 51 52 return memcmp(name, de->name, len); 53 } 54 55 static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) 56 { 57 return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry, 58 subdir_node); 59 } 60 61 static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) 62 { 63 return rb_entry_safe(rb_next(&dir->subdir_node), struct proc_dir_entry, 64 subdir_node); 65 } 66 67 static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, 68 const char *name, 69 unsigned int len) 70 { 71 struct rb_node *node = dir->subdir.rb_node; 72 73 while (node) { 74 struct proc_dir_entry *de = rb_entry(node, 75 struct proc_dir_entry, 76 subdir_node); 77 int result = proc_match(name, de, len); 78 79 if (result < 0) 80 node = node->rb_left; 81 else if (result > 0) 82 node = node->rb_right; 83 else 84 return de; 85 } 86 return NULL; 87 } 88 89 static bool pde_subdir_insert(struct proc_dir_entry *dir, 90 struct proc_dir_entry *de) 91 { 92 struct rb_root *root = &dir->subdir; 93 struct rb_node **new = &root->rb_node, *parent = NULL; 94 95 /* Figure out where to put new node */ 96 while (*new) { 97 struct proc_dir_entry *this = rb_entry(*new, 98 struct proc_dir_entry, 99 subdir_node); 100 int result = proc_match(de->name, this, de->namelen); 101 102 parent = *new; 103 if (result < 0) 104 new = &(*new)->rb_left; 105 else if (result > 0) 106 new = &(*new)->rb_right; 107 else 108 return false; 109 } 110 111 /* Add new node and rebalance tree. */ 112 rb_link_node(&de->subdir_node, parent, new); 113 rb_insert_color(&de->subdir_node, root); 114 return true; 115 } 116 117 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 118 { 119 struct inode *inode = d_inode(dentry); 120 struct proc_dir_entry *de = PDE(inode); 121 int error; 122 123 error = setattr_prepare(dentry, iattr); 124 if (error) 125 return error; 126 127 setattr_copy(inode, iattr); 128 mark_inode_dirty(inode); 129 130 proc_set_user(de, inode->i_uid, inode->i_gid); 131 de->mode = inode->i_mode; 132 return 0; 133 } 134 135 static int proc_getattr(const struct path *path, struct kstat *stat, 136 u32 request_mask, unsigned int query_flags) 137 { 138 struct inode *inode = d_inode(path->dentry); 139 struct proc_dir_entry *de = PDE(inode); 140 if (de && de->nlink) 141 set_nlink(inode, de->nlink); 142 143 generic_fillattr(inode, stat); 144 return 0; 145 } 146 147 static const struct inode_operations proc_file_inode_operations = { 148 .setattr = proc_notify_change, 149 }; 150 151 /* 152 * This function parses a name such as "tty/driver/serial", and 153 * returns the struct proc_dir_entry for "/proc/tty/driver", and 154 * returns "serial" in residual. 155 */ 156 static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, 157 const char **residual) 158 { 159 const char *cp = name, *next; 160 struct proc_dir_entry *de; 161 unsigned int len; 162 163 de = *ret; 164 if (!de) 165 de = &proc_root; 166 167 while (1) { 168 next = strchr(cp, '/'); 169 if (!next) 170 break; 171 172 len = next - cp; 173 de = pde_subdir_find(de, cp, len); 174 if (!de) { 175 WARN(1, "name '%s'\n", name); 176 return -ENOENT; 177 } 178 cp += len + 1; 179 } 180 *residual = cp; 181 *ret = de; 182 return 0; 183 } 184 185 static int xlate_proc_name(const char *name, struct proc_dir_entry **ret, 186 const char **residual) 187 { 188 int rv; 189 190 read_lock(&proc_subdir_lock); 191 rv = __xlate_proc_name(name, ret, residual); 192 read_unlock(&proc_subdir_lock); 193 return rv; 194 } 195 196 static DEFINE_IDA(proc_inum_ida); 197 198 #define PROC_DYNAMIC_FIRST 0xF0000000U 199 200 /* 201 * Return an inode number between PROC_DYNAMIC_FIRST and 202 * 0xffffffff, or zero on failure. 203 */ 204 int proc_alloc_inum(unsigned int *inum) 205 { 206 int i; 207 208 i = ida_simple_get(&proc_inum_ida, 0, UINT_MAX - PROC_DYNAMIC_FIRST + 1, 209 GFP_KERNEL); 210 if (i < 0) 211 return i; 212 213 *inum = PROC_DYNAMIC_FIRST + (unsigned int)i; 214 return 0; 215 } 216 217 void proc_free_inum(unsigned int inum) 218 { 219 ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); 220 } 221 222 static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags) 223 { 224 if (flags & LOOKUP_RCU) 225 return -ECHILD; 226 227 if (atomic_read(&PDE(d_inode(dentry))->in_use) < 0) 228 return 0; /* revalidate */ 229 return 1; 230 } 231 232 static int proc_misc_d_delete(const struct dentry *dentry) 233 { 234 return atomic_read(&PDE(d_inode(dentry))->in_use) < 0; 235 } 236 237 static const struct dentry_operations proc_misc_dentry_ops = { 238 .d_revalidate = proc_misc_d_revalidate, 239 .d_delete = proc_misc_d_delete, 240 }; 241 242 /* 243 * Don't create negative dentries here, return -ENOENT by hand 244 * instead. 245 */ 246 struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, 247 struct proc_dir_entry *de) 248 { 249 struct inode *inode; 250 251 read_lock(&proc_subdir_lock); 252 de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len); 253 if (de) { 254 pde_get(de); 255 read_unlock(&proc_subdir_lock); 256 inode = proc_get_inode(dir->i_sb, de); 257 if (!inode) 258 return ERR_PTR(-ENOMEM); 259 d_set_d_op(dentry, de->proc_dops); 260 return d_splice_alias(inode, dentry); 261 } 262 read_unlock(&proc_subdir_lock); 263 return ERR_PTR(-ENOENT); 264 } 265 266 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, 267 unsigned int flags) 268 { 269 return proc_lookup_de(dir, dentry, PDE(dir)); 270 } 271 272 /* 273 * This returns non-zero if at EOF, so that the /proc 274 * root directory can use this and check if it should 275 * continue with the <pid> entries.. 276 * 277 * Note that the VFS-layer doesn't care about the return 278 * value of the readdir() call, as long as it's non-negative 279 * for success.. 280 */ 281 int proc_readdir_de(struct file *file, struct dir_context *ctx, 282 struct proc_dir_entry *de) 283 { 284 int i; 285 286 if (!dir_emit_dots(file, ctx)) 287 return 0; 288 289 i = ctx->pos - 2; 290 read_lock(&proc_subdir_lock); 291 de = pde_subdir_first(de); 292 for (;;) { 293 if (!de) { 294 read_unlock(&proc_subdir_lock); 295 return 0; 296 } 297 if (!i) 298 break; 299 de = pde_subdir_next(de); 300 i--; 301 } 302 303 do { 304 struct proc_dir_entry *next; 305 pde_get(de); 306 read_unlock(&proc_subdir_lock); 307 if (!dir_emit(ctx, de->name, de->namelen, 308 de->low_ino, de->mode >> 12)) { 309 pde_put(de); 310 return 0; 311 } 312 ctx->pos++; 313 read_lock(&proc_subdir_lock); 314 next = pde_subdir_next(de); 315 pde_put(de); 316 de = next; 317 } while (de); 318 read_unlock(&proc_subdir_lock); 319 return 1; 320 } 321 322 int proc_readdir(struct file *file, struct dir_context *ctx) 323 { 324 struct inode *inode = file_inode(file); 325 326 return proc_readdir_de(file, ctx, PDE(inode)); 327 } 328 329 /* 330 * These are the generic /proc directory operations. They 331 * use the in-memory "struct proc_dir_entry" tree to parse 332 * the /proc directory. 333 */ 334 static const struct file_operations proc_dir_operations = { 335 .llseek = generic_file_llseek, 336 .read = generic_read_dir, 337 .iterate_shared = proc_readdir, 338 }; 339 340 /* 341 * proc directories can do almost nothing.. 342 */ 343 static const struct inode_operations proc_dir_inode_operations = { 344 .lookup = proc_lookup, 345 .getattr = proc_getattr, 346 .setattr = proc_notify_change, 347 }; 348 349 /* returns the registered entry, or frees dp and returns NULL on failure */ 350 struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, 351 struct proc_dir_entry *dp) 352 { 353 if (proc_alloc_inum(&dp->low_ino)) 354 goto out_free_entry; 355 356 write_lock(&proc_subdir_lock); 357 dp->parent = dir; 358 if (pde_subdir_insert(dir, dp) == false) { 359 WARN(1, "proc_dir_entry '%s/%s' already registered\n", 360 dir->name, dp->name); 361 write_unlock(&proc_subdir_lock); 362 goto out_free_inum; 363 } 364 write_unlock(&proc_subdir_lock); 365 366 return dp; 367 out_free_inum: 368 proc_free_inum(dp->low_ino); 369 out_free_entry: 370 pde_free(dp); 371 return NULL; 372 } 373 374 static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, 375 const char *name, 376 umode_t mode, 377 nlink_t nlink) 378 { 379 struct proc_dir_entry *ent = NULL; 380 const char *fn; 381 struct qstr qstr; 382 383 if (xlate_proc_name(name, parent, &fn) != 0) 384 goto out; 385 qstr.name = fn; 386 qstr.len = strlen(fn); 387 if (qstr.len == 0 || qstr.len >= 256) { 388 WARN(1, "name len %u\n", qstr.len); 389 return NULL; 390 } 391 if (qstr.len == 1 && fn[0] == '.') { 392 WARN(1, "name '.'\n"); 393 return NULL; 394 } 395 if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') { 396 WARN(1, "name '..'\n"); 397 return NULL; 398 } 399 if (*parent == &proc_root && name_to_int(&qstr) != ~0U) { 400 WARN(1, "create '/proc/%s' by hand\n", qstr.name); 401 return NULL; 402 } 403 if (is_empty_pde(*parent)) { 404 WARN(1, "attempt to add to permanently empty directory"); 405 return NULL; 406 } 407 408 ent = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL); 409 if (!ent) 410 goto out; 411 412 if (qstr.len + 1 <= SIZEOF_PDE_INLINE_NAME) { 413 ent->name = ent->inline_name; 414 } else { 415 ent->name = kmalloc(qstr.len + 1, GFP_KERNEL); 416 if (!ent->name) { 417 pde_free(ent); 418 return NULL; 419 } 420 } 421 422 memcpy(ent->name, fn, qstr.len + 1); 423 ent->namelen = qstr.len; 424 ent->mode = mode; 425 ent->nlink = nlink; 426 ent->subdir = RB_ROOT; 427 refcount_set(&ent->refcnt, 1); 428 spin_lock_init(&ent->pde_unload_lock); 429 INIT_LIST_HEAD(&ent->pde_openers); 430 proc_set_user(ent, (*parent)->uid, (*parent)->gid); 431 432 ent->proc_dops = &proc_misc_dentry_ops; 433 434 out: 435 return ent; 436 } 437 438 struct proc_dir_entry *proc_symlink(const char *name, 439 struct proc_dir_entry *parent, const char *dest) 440 { 441 struct proc_dir_entry *ent; 442 443 ent = __proc_create(&parent, name, 444 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 445 446 if (ent) { 447 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 448 if (ent->data) { 449 strcpy((char*)ent->data,dest); 450 ent->proc_iops = &proc_link_inode_operations; 451 ent = proc_register(parent, ent); 452 } else { 453 pde_free(ent); 454 ent = NULL; 455 } 456 } 457 return ent; 458 } 459 EXPORT_SYMBOL(proc_symlink); 460 461 struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, 462 struct proc_dir_entry *parent, void *data) 463 { 464 struct proc_dir_entry *ent; 465 466 if (mode == 0) 467 mode = S_IRUGO | S_IXUGO; 468 469 ent = __proc_create(&parent, name, S_IFDIR | mode, 2); 470 if (ent) { 471 ent->data = data; 472 ent->proc_fops = &proc_dir_operations; 473 ent->proc_iops = &proc_dir_inode_operations; 474 parent->nlink++; 475 ent = proc_register(parent, ent); 476 if (!ent) 477 parent->nlink--; 478 } 479 return ent; 480 } 481 EXPORT_SYMBOL_GPL(proc_mkdir_data); 482 483 struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, 484 struct proc_dir_entry *parent) 485 { 486 return proc_mkdir_data(name, mode, parent, NULL); 487 } 488 EXPORT_SYMBOL(proc_mkdir_mode); 489 490 struct proc_dir_entry *proc_mkdir(const char *name, 491 struct proc_dir_entry *parent) 492 { 493 return proc_mkdir_data(name, 0, parent, NULL); 494 } 495 EXPORT_SYMBOL(proc_mkdir); 496 497 struct proc_dir_entry *proc_create_mount_point(const char *name) 498 { 499 umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO; 500 struct proc_dir_entry *ent, *parent = NULL; 501 502 ent = __proc_create(&parent, name, mode, 2); 503 if (ent) { 504 ent->data = NULL; 505 ent->proc_fops = NULL; 506 ent->proc_iops = NULL; 507 parent->nlink++; 508 ent = proc_register(parent, ent); 509 if (!ent) 510 parent->nlink--; 511 } 512 return ent; 513 } 514 EXPORT_SYMBOL(proc_create_mount_point); 515 516 struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, 517 struct proc_dir_entry **parent, void *data) 518 { 519 struct proc_dir_entry *p; 520 521 if ((mode & S_IFMT) == 0) 522 mode |= S_IFREG; 523 if ((mode & S_IALLUGO) == 0) 524 mode |= S_IRUGO; 525 if (WARN_ON_ONCE(!S_ISREG(mode))) 526 return NULL; 527 528 p = __proc_create(parent, name, mode, 1); 529 if (p) { 530 p->proc_iops = &proc_file_inode_operations; 531 p->data = data; 532 } 533 return p; 534 } 535 536 struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, 537 struct proc_dir_entry *parent, 538 const struct file_operations *proc_fops, void *data) 539 { 540 struct proc_dir_entry *p; 541 542 BUG_ON(proc_fops == NULL); 543 544 p = proc_create_reg(name, mode, &parent, data); 545 if (!p) 546 return NULL; 547 p->proc_fops = proc_fops; 548 return proc_register(parent, p); 549 } 550 EXPORT_SYMBOL(proc_create_data); 551 552 struct proc_dir_entry *proc_create(const char *name, umode_t mode, 553 struct proc_dir_entry *parent, 554 const struct file_operations *proc_fops) 555 { 556 return proc_create_data(name, mode, parent, proc_fops, NULL); 557 } 558 EXPORT_SYMBOL(proc_create); 559 560 static int proc_seq_open(struct inode *inode, struct file *file) 561 { 562 struct proc_dir_entry *de = PDE(inode); 563 564 if (de->state_size) 565 return seq_open_private(file, de->seq_ops, de->state_size); 566 return seq_open(file, de->seq_ops); 567 } 568 569 static int proc_seq_release(struct inode *inode, struct file *file) 570 { 571 struct proc_dir_entry *de = PDE(inode); 572 573 if (de->state_size) 574 return seq_release_private(inode, file); 575 return seq_release(inode, file); 576 } 577 578 static const struct file_operations proc_seq_fops = { 579 .open = proc_seq_open, 580 .read = seq_read, 581 .llseek = seq_lseek, 582 .release = proc_seq_release, 583 }; 584 585 struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, 586 struct proc_dir_entry *parent, const struct seq_operations *ops, 587 unsigned int state_size, void *data) 588 { 589 struct proc_dir_entry *p; 590 591 p = proc_create_reg(name, mode, &parent, data); 592 if (!p) 593 return NULL; 594 p->proc_fops = &proc_seq_fops; 595 p->seq_ops = ops; 596 p->state_size = state_size; 597 return proc_register(parent, p); 598 } 599 EXPORT_SYMBOL(proc_create_seq_private); 600 601 static int proc_single_open(struct inode *inode, struct file *file) 602 { 603 struct proc_dir_entry *de = PDE(inode); 604 605 return single_open(file, de->single_show, de->data); 606 } 607 608 static const struct file_operations proc_single_fops = { 609 .open = proc_single_open, 610 .read = seq_read, 611 .llseek = seq_lseek, 612 .release = single_release, 613 }; 614 615 struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, 616 struct proc_dir_entry *parent, 617 int (*show)(struct seq_file *, void *), void *data) 618 { 619 struct proc_dir_entry *p; 620 621 p = proc_create_reg(name, mode, &parent, data); 622 if (!p) 623 return NULL; 624 p->proc_fops = &proc_single_fops; 625 p->single_show = show; 626 return proc_register(parent, p); 627 } 628 EXPORT_SYMBOL(proc_create_single_data); 629 630 void proc_set_size(struct proc_dir_entry *de, loff_t size) 631 { 632 de->size = size; 633 } 634 EXPORT_SYMBOL(proc_set_size); 635 636 void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) 637 { 638 de->uid = uid; 639 de->gid = gid; 640 } 641 EXPORT_SYMBOL(proc_set_user); 642 643 void pde_put(struct proc_dir_entry *pde) 644 { 645 if (refcount_dec_and_test(&pde->refcnt)) { 646 proc_free_inum(pde->low_ino); 647 pde_free(pde); 648 } 649 } 650 651 /* 652 * Remove a /proc entry and free it if it's not currently in use. 653 */ 654 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 655 { 656 struct proc_dir_entry *de = NULL; 657 const char *fn = name; 658 unsigned int len; 659 660 write_lock(&proc_subdir_lock); 661 if (__xlate_proc_name(name, &parent, &fn) != 0) { 662 write_unlock(&proc_subdir_lock); 663 return; 664 } 665 len = strlen(fn); 666 667 de = pde_subdir_find(parent, fn, len); 668 if (de) 669 rb_erase(&de->subdir_node, &parent->subdir); 670 write_unlock(&proc_subdir_lock); 671 if (!de) { 672 WARN(1, "name '%s'\n", name); 673 return; 674 } 675 676 proc_entry_rundown(de); 677 678 if (S_ISDIR(de->mode)) 679 parent->nlink--; 680 de->nlink = 0; 681 WARN(pde_subdir_first(de), 682 "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n", 683 __func__, de->parent->name, de->name, pde_subdir_first(de)->name); 684 pde_put(de); 685 } 686 EXPORT_SYMBOL(remove_proc_entry); 687 688 int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) 689 { 690 struct proc_dir_entry *root = NULL, *de, *next; 691 const char *fn = name; 692 unsigned int len; 693 694 write_lock(&proc_subdir_lock); 695 if (__xlate_proc_name(name, &parent, &fn) != 0) { 696 write_unlock(&proc_subdir_lock); 697 return -ENOENT; 698 } 699 len = strlen(fn); 700 701 root = pde_subdir_find(parent, fn, len); 702 if (!root) { 703 write_unlock(&proc_subdir_lock); 704 return -ENOENT; 705 } 706 rb_erase(&root->subdir_node, &parent->subdir); 707 708 de = root; 709 while (1) { 710 next = pde_subdir_first(de); 711 if (next) { 712 rb_erase(&next->subdir_node, &de->subdir); 713 de = next; 714 continue; 715 } 716 write_unlock(&proc_subdir_lock); 717 718 proc_entry_rundown(de); 719 next = de->parent; 720 if (S_ISDIR(de->mode)) 721 next->nlink--; 722 de->nlink = 0; 723 if (de == root) 724 break; 725 pde_put(de); 726 727 write_lock(&proc_subdir_lock); 728 de = next; 729 } 730 pde_put(root); 731 return 0; 732 } 733 EXPORT_SYMBOL(remove_proc_subtree); 734 735 void *proc_get_parent_data(const struct inode *inode) 736 { 737 struct proc_dir_entry *de = PDE(inode); 738 return de->parent->data; 739 } 740 EXPORT_SYMBOL_GPL(proc_get_parent_data); 741 742 void proc_remove(struct proc_dir_entry *de) 743 { 744 if (de) 745 remove_proc_subtree(de->name, de->parent); 746 } 747 EXPORT_SYMBOL(proc_remove); 748 749 void *PDE_DATA(const struct inode *inode) 750 { 751 return __PDE_DATA(inode); 752 } 753 EXPORT_SYMBOL(PDE_DATA); 754 755 /* 756 * Pull a user buffer into memory and pass it to the file's write handler if 757 * one is supplied. The ->write() method is permitted to modify the 758 * kernel-side buffer. 759 */ 760 ssize_t proc_simple_write(struct file *f, const char __user *ubuf, size_t size, 761 loff_t *_pos) 762 { 763 struct proc_dir_entry *pde = PDE(file_inode(f)); 764 char *buf; 765 int ret; 766 767 if (!pde->write) 768 return -EACCES; 769 if (size == 0 || size > PAGE_SIZE - 1) 770 return -EINVAL; 771 buf = memdup_user_nul(ubuf, size); 772 if (IS_ERR(buf)) 773 return PTR_ERR(buf); 774 ret = pde->write(f, buf, size); 775 kfree(buf); 776 return ret == 0 ? size : ret; 777 } 778