1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * proc/fs/generic.c --- generic routines for the proc-fs 4 * 5 * This file contains generic proc-fs routines for handling 6 * directories and files. 7 * 8 * Copyright (C) 1991, 1992 Linus Torvalds. 9 * Copyright (C) 1997 Theodore Ts'o 10 */ 11 12 #include <linux/cache.h> 13 #include <linux/errno.h> 14 #include <linux/time.h> 15 #include <linux/proc_fs.h> 16 #include <linux/stat.h> 17 #include <linux/mm.h> 18 #include <linux/module.h> 19 #include <linux/namei.h> 20 #include <linux/slab.h> 21 #include <linux/printk.h> 22 #include <linux/mount.h> 23 #include <linux/init.h> 24 #include <linux/idr.h> 25 #include <linux/bitops.h> 26 #include <linux/spinlock.h> 27 #include <linux/completion.h> 28 #include <linux/uaccess.h> 29 #include <linux/seq_file.h> 30 31 #include "internal.h" 32 33 static DEFINE_RWLOCK(proc_subdir_lock); 34 35 struct kmem_cache *proc_dir_entry_cache __ro_after_init; 36 37 void pde_free(struct proc_dir_entry *pde) 38 { 39 if (S_ISLNK(pde->mode)) 40 kfree(pde->data); 41 if (pde->name != pde->inline_name) 42 kfree(pde->name); 43 kmem_cache_free(proc_dir_entry_cache, pde); 44 } 45 46 static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len) 47 { 48 if (len < de->namelen) 49 return -1; 50 if (len > de->namelen) 51 return 1; 52 53 return memcmp(name, de->name, len); 54 } 55 56 static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) 57 { 58 return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry, 59 subdir_node); 60 } 61 62 static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) 63 { 64 return rb_entry_safe(rb_next(&dir->subdir_node), struct proc_dir_entry, 65 subdir_node); 66 } 67 68 static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, 69 const char *name, 70 unsigned int len) 71 { 72 struct rb_node *node = dir->subdir.rb_node; 73 74 while (node) { 75 struct proc_dir_entry *de = rb_entry(node, 76 struct proc_dir_entry, 77 subdir_node); 78 int result = proc_match(name, de, len); 79 80 if (result < 0) 81 node = node->rb_left; 82 else if (result > 0) 83 node = node->rb_right; 84 else 85 return de; 86 } 87 return NULL; 88 } 89 90 static bool pde_subdir_insert(struct proc_dir_entry *dir, 91 struct proc_dir_entry *de) 92 { 93 struct rb_root *root = &dir->subdir; 94 struct rb_node **new = &root->rb_node, *parent = NULL; 95 96 /* Figure out where to put new node */ 97 while (*new) { 98 struct proc_dir_entry *this = rb_entry(*new, 99 struct proc_dir_entry, 100 subdir_node); 101 int result = proc_match(de->name, this, de->namelen); 102 103 parent = *new; 104 if (result < 0) 105 new = &(*new)->rb_left; 106 else if (result > 0) 107 new = &(*new)->rb_right; 108 else 109 return false; 110 } 111 112 /* Add new node and rebalance tree. */ 113 rb_link_node(&de->subdir_node, parent, new); 114 rb_insert_color(&de->subdir_node, root); 115 return true; 116 } 117 118 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 119 { 120 struct inode *inode = d_inode(dentry); 121 struct proc_dir_entry *de = PDE(inode); 122 int error; 123 124 error = setattr_prepare(dentry, iattr); 125 if (error) 126 return error; 127 128 setattr_copy(inode, iattr); 129 mark_inode_dirty(inode); 130 131 proc_set_user(de, inode->i_uid, inode->i_gid); 132 de->mode = inode->i_mode; 133 return 0; 134 } 135 136 static int proc_getattr(const struct path *path, struct kstat *stat, 137 u32 request_mask, unsigned int query_flags) 138 { 139 struct inode *inode = d_inode(path->dentry); 140 struct proc_dir_entry *de = PDE(inode); 141 if (de) { 142 nlink_t nlink = READ_ONCE(de->nlink); 143 if (nlink > 0) { 144 set_nlink(inode, nlink); 145 } 146 } 147 148 generic_fillattr(inode, stat); 149 return 0; 150 } 151 152 static const struct inode_operations proc_file_inode_operations = { 153 .setattr = proc_notify_change, 154 }; 155 156 /* 157 * This function parses a name such as "tty/driver/serial", and 158 * returns the struct proc_dir_entry for "/proc/tty/driver", and 159 * returns "serial" in residual. 160 */ 161 static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, 162 const char **residual) 163 { 164 const char *cp = name, *next; 165 struct proc_dir_entry *de; 166 167 de = *ret; 168 if (!de) 169 de = &proc_root; 170 171 while (1) { 172 next = strchr(cp, '/'); 173 if (!next) 174 break; 175 176 de = pde_subdir_find(de, cp, next - cp); 177 if (!de) { 178 WARN(1, "name '%s'\n", name); 179 return -ENOENT; 180 } 181 cp = next + 1; 182 } 183 *residual = cp; 184 *ret = de; 185 return 0; 186 } 187 188 static int xlate_proc_name(const char *name, struct proc_dir_entry **ret, 189 const char **residual) 190 { 191 int rv; 192 193 read_lock(&proc_subdir_lock); 194 rv = __xlate_proc_name(name, ret, residual); 195 read_unlock(&proc_subdir_lock); 196 return rv; 197 } 198 199 static DEFINE_IDA(proc_inum_ida); 200 201 #define PROC_DYNAMIC_FIRST 0xF0000000U 202 203 /* 204 * Return an inode number between PROC_DYNAMIC_FIRST and 205 * 0xffffffff, or zero on failure. 206 */ 207 int proc_alloc_inum(unsigned int *inum) 208 { 209 int i; 210 211 i = ida_simple_get(&proc_inum_ida, 0, UINT_MAX - PROC_DYNAMIC_FIRST + 1, 212 GFP_KERNEL); 213 if (i < 0) 214 return i; 215 216 *inum = PROC_DYNAMIC_FIRST + (unsigned int)i; 217 return 0; 218 } 219 220 void proc_free_inum(unsigned int inum) 221 { 222 ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); 223 } 224 225 static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags) 226 { 227 if (flags & LOOKUP_RCU) 228 return -ECHILD; 229 230 if (atomic_read(&PDE(d_inode(dentry))->in_use) < 0) 231 return 0; /* revalidate */ 232 return 1; 233 } 234 235 static int proc_misc_d_delete(const struct dentry *dentry) 236 { 237 return atomic_read(&PDE(d_inode(dentry))->in_use) < 0; 238 } 239 240 static const struct dentry_operations proc_misc_dentry_ops = { 241 .d_revalidate = proc_misc_d_revalidate, 242 .d_delete = proc_misc_d_delete, 243 }; 244 245 /* 246 * Don't create negative dentries here, return -ENOENT by hand 247 * instead. 248 */ 249 struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, 250 struct proc_dir_entry *de) 251 { 252 struct inode *inode; 253 254 read_lock(&proc_subdir_lock); 255 de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len); 256 if (de) { 257 pde_get(de); 258 read_unlock(&proc_subdir_lock); 259 inode = proc_get_inode(dir->i_sb, de); 260 if (!inode) 261 return ERR_PTR(-ENOMEM); 262 d_set_d_op(dentry, de->proc_dops); 263 return d_splice_alias(inode, dentry); 264 } 265 read_unlock(&proc_subdir_lock); 266 return ERR_PTR(-ENOENT); 267 } 268 269 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, 270 unsigned int flags) 271 { 272 return proc_lookup_de(dir, dentry, PDE(dir)); 273 } 274 275 /* 276 * This returns non-zero if at EOF, so that the /proc 277 * root directory can use this and check if it should 278 * continue with the <pid> entries.. 279 * 280 * Note that the VFS-layer doesn't care about the return 281 * value of the readdir() call, as long as it's non-negative 282 * for success.. 283 */ 284 int proc_readdir_de(struct file *file, struct dir_context *ctx, 285 struct proc_dir_entry *de) 286 { 287 int i; 288 289 if (!dir_emit_dots(file, ctx)) 290 return 0; 291 292 i = ctx->pos - 2; 293 read_lock(&proc_subdir_lock); 294 de = pde_subdir_first(de); 295 for (;;) { 296 if (!de) { 297 read_unlock(&proc_subdir_lock); 298 return 0; 299 } 300 if (!i) 301 break; 302 de = pde_subdir_next(de); 303 i--; 304 } 305 306 do { 307 struct proc_dir_entry *next; 308 pde_get(de); 309 read_unlock(&proc_subdir_lock); 310 if (!dir_emit(ctx, de->name, de->namelen, 311 de->low_ino, de->mode >> 12)) { 312 pde_put(de); 313 return 0; 314 } 315 ctx->pos++; 316 read_lock(&proc_subdir_lock); 317 next = pde_subdir_next(de); 318 pde_put(de); 319 de = next; 320 } while (de); 321 read_unlock(&proc_subdir_lock); 322 return 1; 323 } 324 325 int proc_readdir(struct file *file, struct dir_context *ctx) 326 { 327 struct inode *inode = file_inode(file); 328 329 return proc_readdir_de(file, ctx, PDE(inode)); 330 } 331 332 /* 333 * These are the generic /proc directory operations. They 334 * use the in-memory "struct proc_dir_entry" tree to parse 335 * the /proc directory. 336 */ 337 static const struct file_operations proc_dir_operations = { 338 .llseek = generic_file_llseek, 339 .read = generic_read_dir, 340 .iterate_shared = proc_readdir, 341 }; 342 343 /* 344 * proc directories can do almost nothing.. 345 */ 346 static const struct inode_operations proc_dir_inode_operations = { 347 .lookup = proc_lookup, 348 .getattr = proc_getattr, 349 .setattr = proc_notify_change, 350 }; 351 352 /* returns the registered entry, or frees dp and returns NULL on failure */ 353 struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, 354 struct proc_dir_entry *dp) 355 { 356 if (proc_alloc_inum(&dp->low_ino)) 357 goto out_free_entry; 358 359 write_lock(&proc_subdir_lock); 360 dp->parent = dir; 361 if (pde_subdir_insert(dir, dp) == false) { 362 WARN(1, "proc_dir_entry '%s/%s' already registered\n", 363 dir->name, dp->name); 364 write_unlock(&proc_subdir_lock); 365 goto out_free_inum; 366 } 367 dir->nlink++; 368 write_unlock(&proc_subdir_lock); 369 370 return dp; 371 out_free_inum: 372 proc_free_inum(dp->low_ino); 373 out_free_entry: 374 pde_free(dp); 375 return NULL; 376 } 377 378 static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, 379 const char *name, 380 umode_t mode, 381 nlink_t nlink) 382 { 383 struct proc_dir_entry *ent = NULL; 384 const char *fn; 385 struct qstr qstr; 386 387 if (xlate_proc_name(name, parent, &fn) != 0) 388 goto out; 389 qstr.name = fn; 390 qstr.len = strlen(fn); 391 if (qstr.len == 0 || qstr.len >= 256) { 392 WARN(1, "name len %u\n", qstr.len); 393 return NULL; 394 } 395 if (qstr.len == 1 && fn[0] == '.') { 396 WARN(1, "name '.'\n"); 397 return NULL; 398 } 399 if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') { 400 WARN(1, "name '..'\n"); 401 return NULL; 402 } 403 if (*parent == &proc_root && name_to_int(&qstr) != ~0U) { 404 WARN(1, "create '/proc/%s' by hand\n", qstr.name); 405 return NULL; 406 } 407 if (is_empty_pde(*parent)) { 408 WARN(1, "attempt to add to permanently empty directory"); 409 return NULL; 410 } 411 412 ent = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL); 413 if (!ent) 414 goto out; 415 416 if (qstr.len + 1 <= SIZEOF_PDE_INLINE_NAME) { 417 ent->name = ent->inline_name; 418 } else { 419 ent->name = kmalloc(qstr.len + 1, GFP_KERNEL); 420 if (!ent->name) { 421 pde_free(ent); 422 return NULL; 423 } 424 } 425 426 memcpy(ent->name, fn, qstr.len + 1); 427 ent->namelen = qstr.len; 428 ent->mode = mode; 429 ent->nlink = nlink; 430 ent->subdir = RB_ROOT; 431 refcount_set(&ent->refcnt, 1); 432 spin_lock_init(&ent->pde_unload_lock); 433 INIT_LIST_HEAD(&ent->pde_openers); 434 proc_set_user(ent, (*parent)->uid, (*parent)->gid); 435 436 ent->proc_dops = &proc_misc_dentry_ops; 437 438 out: 439 return ent; 440 } 441 442 struct proc_dir_entry *proc_symlink(const char *name, 443 struct proc_dir_entry *parent, const char *dest) 444 { 445 struct proc_dir_entry *ent; 446 447 ent = __proc_create(&parent, name, 448 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 449 450 if (ent) { 451 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 452 if (ent->data) { 453 strcpy((char*)ent->data,dest); 454 ent->proc_iops = &proc_link_inode_operations; 455 ent = proc_register(parent, ent); 456 } else { 457 pde_free(ent); 458 ent = NULL; 459 } 460 } 461 return ent; 462 } 463 EXPORT_SYMBOL(proc_symlink); 464 465 struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, 466 struct proc_dir_entry *parent, void *data) 467 { 468 struct proc_dir_entry *ent; 469 470 if (mode == 0) 471 mode = S_IRUGO | S_IXUGO; 472 473 ent = __proc_create(&parent, name, S_IFDIR | mode, 2); 474 if (ent) { 475 ent->data = data; 476 ent->proc_dir_ops = &proc_dir_operations; 477 ent->proc_iops = &proc_dir_inode_operations; 478 ent = proc_register(parent, ent); 479 } 480 return ent; 481 } 482 EXPORT_SYMBOL_GPL(proc_mkdir_data); 483 484 struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, 485 struct proc_dir_entry *parent) 486 { 487 return proc_mkdir_data(name, mode, parent, NULL); 488 } 489 EXPORT_SYMBOL(proc_mkdir_mode); 490 491 struct proc_dir_entry *proc_mkdir(const char *name, 492 struct proc_dir_entry *parent) 493 { 494 return proc_mkdir_data(name, 0, parent, NULL); 495 } 496 EXPORT_SYMBOL(proc_mkdir); 497 498 struct proc_dir_entry *proc_create_mount_point(const char *name) 499 { 500 umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO; 501 struct proc_dir_entry *ent, *parent = NULL; 502 503 ent = __proc_create(&parent, name, mode, 2); 504 if (ent) { 505 ent->data = NULL; 506 ent->proc_dir_ops = NULL; 507 ent->proc_iops = NULL; 508 ent = proc_register(parent, ent); 509 } 510 return ent; 511 } 512 EXPORT_SYMBOL(proc_create_mount_point); 513 514 struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, 515 struct proc_dir_entry **parent, void *data) 516 { 517 struct proc_dir_entry *p; 518 519 if ((mode & S_IFMT) == 0) 520 mode |= S_IFREG; 521 if ((mode & S_IALLUGO) == 0) 522 mode |= S_IRUGO; 523 if (WARN_ON_ONCE(!S_ISREG(mode))) 524 return NULL; 525 526 p = __proc_create(parent, name, mode, 1); 527 if (p) { 528 p->proc_iops = &proc_file_inode_operations; 529 p->data = data; 530 } 531 return p; 532 } 533 534 struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, 535 struct proc_dir_entry *parent, 536 const struct proc_ops *proc_ops, void *data) 537 { 538 struct proc_dir_entry *p; 539 540 p = proc_create_reg(name, mode, &parent, data); 541 if (!p) 542 return NULL; 543 p->proc_ops = proc_ops; 544 return proc_register(parent, p); 545 } 546 EXPORT_SYMBOL(proc_create_data); 547 548 struct proc_dir_entry *proc_create(const char *name, umode_t mode, 549 struct proc_dir_entry *parent, 550 const struct proc_ops *proc_ops) 551 { 552 return proc_create_data(name, mode, parent, proc_ops, NULL); 553 } 554 EXPORT_SYMBOL(proc_create); 555 556 static int proc_seq_open(struct inode *inode, struct file *file) 557 { 558 struct proc_dir_entry *de = PDE(inode); 559 560 if (de->state_size) 561 return seq_open_private(file, de->seq_ops, de->state_size); 562 return seq_open(file, de->seq_ops); 563 } 564 565 static int proc_seq_release(struct inode *inode, struct file *file) 566 { 567 struct proc_dir_entry *de = PDE(inode); 568 569 if (de->state_size) 570 return seq_release_private(inode, file); 571 return seq_release(inode, file); 572 } 573 574 static const struct proc_ops proc_seq_ops = { 575 .proc_open = proc_seq_open, 576 .proc_read = seq_read, 577 .proc_lseek = seq_lseek, 578 .proc_release = proc_seq_release, 579 }; 580 581 struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, 582 struct proc_dir_entry *parent, const struct seq_operations *ops, 583 unsigned int state_size, void *data) 584 { 585 struct proc_dir_entry *p; 586 587 p = proc_create_reg(name, mode, &parent, data); 588 if (!p) 589 return NULL; 590 p->proc_ops = &proc_seq_ops; 591 p->seq_ops = ops; 592 p->state_size = state_size; 593 return proc_register(parent, p); 594 } 595 EXPORT_SYMBOL(proc_create_seq_private); 596 597 static int proc_single_open(struct inode *inode, struct file *file) 598 { 599 struct proc_dir_entry *de = PDE(inode); 600 601 return single_open(file, de->single_show, de->data); 602 } 603 604 static const struct proc_ops proc_single_ops = { 605 .proc_open = proc_single_open, 606 .proc_read = seq_read, 607 .proc_lseek = seq_lseek, 608 .proc_release = single_release, 609 }; 610 611 struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, 612 struct proc_dir_entry *parent, 613 int (*show)(struct seq_file *, void *), void *data) 614 { 615 struct proc_dir_entry *p; 616 617 p = proc_create_reg(name, mode, &parent, data); 618 if (!p) 619 return NULL; 620 p->proc_ops = &proc_single_ops; 621 p->single_show = show; 622 return proc_register(parent, p); 623 } 624 EXPORT_SYMBOL(proc_create_single_data); 625 626 void proc_set_size(struct proc_dir_entry *de, loff_t size) 627 { 628 de->size = size; 629 } 630 EXPORT_SYMBOL(proc_set_size); 631 632 void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) 633 { 634 de->uid = uid; 635 de->gid = gid; 636 } 637 EXPORT_SYMBOL(proc_set_user); 638 639 void pde_put(struct proc_dir_entry *pde) 640 { 641 if (refcount_dec_and_test(&pde->refcnt)) { 642 proc_free_inum(pde->low_ino); 643 pde_free(pde); 644 } 645 } 646 647 /* 648 * Remove a /proc entry and free it if it's not currently in use. 649 */ 650 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 651 { 652 struct proc_dir_entry *de = NULL; 653 const char *fn = name; 654 unsigned int len; 655 656 write_lock(&proc_subdir_lock); 657 if (__xlate_proc_name(name, &parent, &fn) != 0) { 658 write_unlock(&proc_subdir_lock); 659 return; 660 } 661 len = strlen(fn); 662 663 de = pde_subdir_find(parent, fn, len); 664 if (de) { 665 rb_erase(&de->subdir_node, &parent->subdir); 666 if (S_ISDIR(de->mode)) { 667 parent->nlink--; 668 } 669 } 670 write_unlock(&proc_subdir_lock); 671 if (!de) { 672 WARN(1, "name '%s'\n", name); 673 return; 674 } 675 676 proc_entry_rundown(de); 677 678 WARN(pde_subdir_first(de), 679 "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n", 680 __func__, de->parent->name, de->name, pde_subdir_first(de)->name); 681 pde_put(de); 682 } 683 EXPORT_SYMBOL(remove_proc_entry); 684 685 int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) 686 { 687 struct proc_dir_entry *root = NULL, *de, *next; 688 const char *fn = name; 689 unsigned int len; 690 691 write_lock(&proc_subdir_lock); 692 if (__xlate_proc_name(name, &parent, &fn) != 0) { 693 write_unlock(&proc_subdir_lock); 694 return -ENOENT; 695 } 696 len = strlen(fn); 697 698 root = pde_subdir_find(parent, fn, len); 699 if (!root) { 700 write_unlock(&proc_subdir_lock); 701 return -ENOENT; 702 } 703 rb_erase(&root->subdir_node, &parent->subdir); 704 705 de = root; 706 while (1) { 707 next = pde_subdir_first(de); 708 if (next) { 709 rb_erase(&next->subdir_node, &de->subdir); 710 de = next; 711 continue; 712 } 713 next = de->parent; 714 if (S_ISDIR(de->mode)) 715 next->nlink--; 716 write_unlock(&proc_subdir_lock); 717 718 proc_entry_rundown(de); 719 if (de == root) 720 break; 721 pde_put(de); 722 723 write_lock(&proc_subdir_lock); 724 de = next; 725 } 726 pde_put(root); 727 return 0; 728 } 729 EXPORT_SYMBOL(remove_proc_subtree); 730 731 void *proc_get_parent_data(const struct inode *inode) 732 { 733 struct proc_dir_entry *de = PDE(inode); 734 return de->parent->data; 735 } 736 EXPORT_SYMBOL_GPL(proc_get_parent_data); 737 738 void proc_remove(struct proc_dir_entry *de) 739 { 740 if (de) 741 remove_proc_subtree(de->name, de->parent); 742 } 743 EXPORT_SYMBOL(proc_remove); 744 745 void *PDE_DATA(const struct inode *inode) 746 { 747 return __PDE_DATA(inode); 748 } 749 EXPORT_SYMBOL(PDE_DATA); 750 751 /* 752 * Pull a user buffer into memory and pass it to the file's write handler if 753 * one is supplied. The ->write() method is permitted to modify the 754 * kernel-side buffer. 755 */ 756 ssize_t proc_simple_write(struct file *f, const char __user *ubuf, size_t size, 757 loff_t *_pos) 758 { 759 struct proc_dir_entry *pde = PDE(file_inode(f)); 760 char *buf; 761 int ret; 762 763 if (!pde->write) 764 return -EACCES; 765 if (size == 0 || size > PAGE_SIZE - 1) 766 return -EINVAL; 767 buf = memdup_user_nul(ubuf, size); 768 if (IS_ERR(buf)) 769 return PTR_ERR(buf); 770 ret = pde->write(f, buf, size); 771 kfree(buf); 772 return ret == 0 ? size : ret; 773 } 774