1 /* 2 * fs/kernfs/dir.c - kernfs directory implementation 3 * 4 * Copyright (c) 2001-3 Patrick Mochel 5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 7 * 8 * This file is released under the GPLv2. 9 */ 10 11 #include <linux/sched.h> 12 #include <linux/fs.h> 13 #include <linux/namei.h> 14 #include <linux/idr.h> 15 #include <linux/slab.h> 16 #include <linux/security.h> 17 #include <linux/hash.h> 18 19 #include "kernfs-internal.h" 20 21 DEFINE_MUTEX(kernfs_mutex); 22 static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ 23 static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ 24 25 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) 26 27 static bool kernfs_active(struct kernfs_node *kn) 28 { 29 lockdep_assert_held(&kernfs_mutex); 30 return atomic_read(&kn->active) >= 0; 31 } 32 33 static bool kernfs_lockdep(struct kernfs_node *kn) 34 { 35 #ifdef CONFIG_DEBUG_LOCK_ALLOC 36 return kn->flags & KERNFS_LOCKDEP; 37 #else 38 return false; 39 #endif 40 } 41 42 static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) 43 { 44 return strlcpy(buf, kn->parent ? kn->name : "/", buflen); 45 } 46 47 static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf, 48 size_t buflen) 49 { 50 char *p = buf + buflen; 51 int len; 52 53 *--p = '\0'; 54 55 do { 56 len = strlen(kn->name); 57 if (p - buf < len + 1) { 58 buf[0] = '\0'; 59 p = NULL; 60 break; 61 } 62 p -= len; 63 memcpy(p, kn->name, len); 64 *--p = '/'; 65 kn = kn->parent; 66 } while (kn && kn->parent); 67 68 return p; 69 } 70 71 /** 72 * kernfs_name - obtain the name of a given node 73 * @kn: kernfs_node of interest 74 * @buf: buffer to copy @kn's name into 75 * @buflen: size of @buf 76 * 77 * Copies the name of @kn into @buf of @buflen bytes. The behavior is 78 * similar to strlcpy(). It returns the length of @kn's name and if @buf 79 * isn't long enough, it's filled upto @buflen-1 and nul terminated. 80 * 81 * This function can be called from any context. 82 */ 83 int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) 84 { 85 unsigned long flags; 86 int ret; 87 88 spin_lock_irqsave(&kernfs_rename_lock, flags); 89 ret = kernfs_name_locked(kn, buf, buflen); 90 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 91 return ret; 92 } 93 94 /** 95 * kernfs_path - build full path of a given node 96 * @kn: kernfs_node of interest 97 * @buf: buffer to copy @kn's name into 98 * @buflen: size of @buf 99 * 100 * Builds and returns the full path of @kn in @buf of @buflen bytes. The 101 * path is built from the end of @buf so the returned pointer usually 102 * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated 103 * and %NULL is returned. 104 */ 105 char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) 106 { 107 unsigned long flags; 108 char *p; 109 110 spin_lock_irqsave(&kernfs_rename_lock, flags); 111 p = kernfs_path_locked(kn, buf, buflen); 112 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 113 return p; 114 } 115 EXPORT_SYMBOL_GPL(kernfs_path); 116 117 /** 118 * pr_cont_kernfs_name - pr_cont name of a kernfs_node 119 * @kn: kernfs_node of interest 120 * 121 * This function can be called from any context. 122 */ 123 void pr_cont_kernfs_name(struct kernfs_node *kn) 124 { 125 unsigned long flags; 126 127 spin_lock_irqsave(&kernfs_rename_lock, flags); 128 129 kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); 130 pr_cont("%s", kernfs_pr_cont_buf); 131 132 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 133 } 134 135 /** 136 * pr_cont_kernfs_path - pr_cont path of a kernfs_node 137 * @kn: kernfs_node of interest 138 * 139 * This function can be called from any context. 140 */ 141 void pr_cont_kernfs_path(struct kernfs_node *kn) 142 { 143 unsigned long flags; 144 char *p; 145 146 spin_lock_irqsave(&kernfs_rename_lock, flags); 147 148 p = kernfs_path_locked(kn, kernfs_pr_cont_buf, 149 sizeof(kernfs_pr_cont_buf)); 150 if (p) 151 pr_cont("%s", p); 152 else 153 pr_cont("<name too long>"); 154 155 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 156 } 157 158 /** 159 * kernfs_get_parent - determine the parent node and pin it 160 * @kn: kernfs_node of interest 161 * 162 * Determines @kn's parent, pins and returns it. This function can be 163 * called from any context. 164 */ 165 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) 166 { 167 struct kernfs_node *parent; 168 unsigned long flags; 169 170 spin_lock_irqsave(&kernfs_rename_lock, flags); 171 parent = kn->parent; 172 kernfs_get(parent); 173 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 174 175 return parent; 176 } 177 178 /** 179 * kernfs_name_hash 180 * @name: Null terminated string to hash 181 * @ns: Namespace tag to hash 182 * 183 * Returns 31 bit hash of ns + name (so it fits in an off_t ) 184 */ 185 static unsigned int kernfs_name_hash(const char *name, const void *ns) 186 { 187 unsigned long hash = init_name_hash(); 188 unsigned int len = strlen(name); 189 while (len--) 190 hash = partial_name_hash(*name++, hash); 191 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); 192 hash &= 0x7fffffffU; 193 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ 194 if (hash < 2) 195 hash += 2; 196 if (hash >= INT_MAX) 197 hash = INT_MAX - 1; 198 return hash; 199 } 200 201 static int kernfs_name_compare(unsigned int hash, const char *name, 202 const void *ns, const struct kernfs_node *kn) 203 { 204 if (hash != kn->hash) 205 return hash - kn->hash; 206 if (ns != kn->ns) 207 return ns - kn->ns; 208 return strcmp(name, kn->name); 209 } 210 211 static int kernfs_sd_compare(const struct kernfs_node *left, 212 const struct kernfs_node *right) 213 { 214 return kernfs_name_compare(left->hash, left->name, left->ns, right); 215 } 216 217 /** 218 * kernfs_link_sibling - link kernfs_node into sibling rbtree 219 * @kn: kernfs_node of interest 220 * 221 * Link @kn into its sibling rbtree which starts from 222 * @kn->parent->dir.children. 223 * 224 * Locking: 225 * mutex_lock(kernfs_mutex) 226 * 227 * RETURNS: 228 * 0 on susccess -EEXIST on failure. 229 */ 230 static int kernfs_link_sibling(struct kernfs_node *kn) 231 { 232 struct rb_node **node = &kn->parent->dir.children.rb_node; 233 struct rb_node *parent = NULL; 234 235 if (kernfs_type(kn) == KERNFS_DIR) 236 kn->parent->dir.subdirs++; 237 238 while (*node) { 239 struct kernfs_node *pos; 240 int result; 241 242 pos = rb_to_kn(*node); 243 parent = *node; 244 result = kernfs_sd_compare(kn, pos); 245 if (result < 0) 246 node = &pos->rb.rb_left; 247 else if (result > 0) 248 node = &pos->rb.rb_right; 249 else 250 return -EEXIST; 251 } 252 /* add new node and rebalance the tree */ 253 rb_link_node(&kn->rb, parent, node); 254 rb_insert_color(&kn->rb, &kn->parent->dir.children); 255 return 0; 256 } 257 258 /** 259 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree 260 * @kn: kernfs_node of interest 261 * 262 * Try to unlink @kn from its sibling rbtree which starts from 263 * kn->parent->dir.children. Returns %true if @kn was actually 264 * removed, %false if @kn wasn't on the rbtree. 265 * 266 * Locking: 267 * mutex_lock(kernfs_mutex) 268 */ 269 static bool kernfs_unlink_sibling(struct kernfs_node *kn) 270 { 271 if (RB_EMPTY_NODE(&kn->rb)) 272 return false; 273 274 if (kernfs_type(kn) == KERNFS_DIR) 275 kn->parent->dir.subdirs--; 276 277 rb_erase(&kn->rb, &kn->parent->dir.children); 278 RB_CLEAR_NODE(&kn->rb); 279 return true; 280 } 281 282 /** 283 * kernfs_get_active - get an active reference to kernfs_node 284 * @kn: kernfs_node to get an active reference to 285 * 286 * Get an active reference of @kn. This function is noop if @kn 287 * is NULL. 288 * 289 * RETURNS: 290 * Pointer to @kn on success, NULL on failure. 291 */ 292 struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) 293 { 294 if (unlikely(!kn)) 295 return NULL; 296 297 if (!atomic_inc_unless_negative(&kn->active)) 298 return NULL; 299 300 if (kernfs_lockdep(kn)) 301 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); 302 return kn; 303 } 304 305 /** 306 * kernfs_put_active - put an active reference to kernfs_node 307 * @kn: kernfs_node to put an active reference to 308 * 309 * Put an active reference to @kn. This function is noop if @kn 310 * is NULL. 311 */ 312 void kernfs_put_active(struct kernfs_node *kn) 313 { 314 struct kernfs_root *root = kernfs_root(kn); 315 int v; 316 317 if (unlikely(!kn)) 318 return; 319 320 if (kernfs_lockdep(kn)) 321 rwsem_release(&kn->dep_map, 1, _RET_IP_); 322 v = atomic_dec_return(&kn->active); 323 if (likely(v != KN_DEACTIVATED_BIAS)) 324 return; 325 326 wake_up_all(&root->deactivate_waitq); 327 } 328 329 /** 330 * kernfs_drain - drain kernfs_node 331 * @kn: kernfs_node to drain 332 * 333 * Drain existing usages and nuke all existing mmaps of @kn. Mutiple 334 * removers may invoke this function concurrently on @kn and all will 335 * return after draining is complete. 336 */ 337 static void kernfs_drain(struct kernfs_node *kn) 338 __releases(&kernfs_mutex) __acquires(&kernfs_mutex) 339 { 340 struct kernfs_root *root = kernfs_root(kn); 341 342 lockdep_assert_held(&kernfs_mutex); 343 WARN_ON_ONCE(kernfs_active(kn)); 344 345 mutex_unlock(&kernfs_mutex); 346 347 if (kernfs_lockdep(kn)) { 348 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); 349 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) 350 lock_contended(&kn->dep_map, _RET_IP_); 351 } 352 353 /* but everyone should wait for draining */ 354 wait_event(root->deactivate_waitq, 355 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); 356 357 if (kernfs_lockdep(kn)) { 358 lock_acquired(&kn->dep_map, _RET_IP_); 359 rwsem_release(&kn->dep_map, 1, _RET_IP_); 360 } 361 362 kernfs_unmap_bin_file(kn); 363 364 mutex_lock(&kernfs_mutex); 365 } 366 367 /** 368 * kernfs_get - get a reference count on a kernfs_node 369 * @kn: the target kernfs_node 370 */ 371 void kernfs_get(struct kernfs_node *kn) 372 { 373 if (kn) { 374 WARN_ON(!atomic_read(&kn->count)); 375 atomic_inc(&kn->count); 376 } 377 } 378 EXPORT_SYMBOL_GPL(kernfs_get); 379 380 /** 381 * kernfs_put - put a reference count on a kernfs_node 382 * @kn: the target kernfs_node 383 * 384 * Put a reference count of @kn and destroy it if it reached zero. 385 */ 386 void kernfs_put(struct kernfs_node *kn) 387 { 388 struct kernfs_node *parent; 389 struct kernfs_root *root; 390 391 if (!kn || !atomic_dec_and_test(&kn->count)) 392 return; 393 root = kernfs_root(kn); 394 repeat: 395 /* 396 * Moving/renaming is always done while holding reference. 397 * kn->parent won't change beneath us. 398 */ 399 parent = kn->parent; 400 401 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, 402 "kernfs_put: %s/%s: released with incorrect active_ref %d\n", 403 parent ? parent->name : "", kn->name, atomic_read(&kn->active)); 404 405 if (kernfs_type(kn) == KERNFS_LINK) 406 kernfs_put(kn->symlink.target_kn); 407 if (!(kn->flags & KERNFS_STATIC_NAME)) 408 kfree(kn->name); 409 if (kn->iattr) { 410 if (kn->iattr->ia_secdata) 411 security_release_secctx(kn->iattr->ia_secdata, 412 kn->iattr->ia_secdata_len); 413 simple_xattrs_free(&kn->iattr->xattrs); 414 } 415 kfree(kn->iattr); 416 ida_simple_remove(&root->ino_ida, kn->ino); 417 kmem_cache_free(kernfs_node_cache, kn); 418 419 kn = parent; 420 if (kn) { 421 if (atomic_dec_and_test(&kn->count)) 422 goto repeat; 423 } else { 424 /* just released the root kn, free @root too */ 425 ida_destroy(&root->ino_ida); 426 kfree(root); 427 } 428 } 429 EXPORT_SYMBOL_GPL(kernfs_put); 430 431 static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) 432 { 433 struct kernfs_node *kn; 434 435 if (flags & LOOKUP_RCU) 436 return -ECHILD; 437 438 /* Always perform fresh lookup for negatives */ 439 if (!dentry->d_inode) 440 goto out_bad_unlocked; 441 442 kn = dentry->d_fsdata; 443 mutex_lock(&kernfs_mutex); 444 445 /* The kernfs node has been deactivated */ 446 if (!kernfs_active(kn)) 447 goto out_bad; 448 449 /* The kernfs node has been moved? */ 450 if (dentry->d_parent->d_fsdata != kn->parent) 451 goto out_bad; 452 453 /* The kernfs node has been renamed */ 454 if (strcmp(dentry->d_name.name, kn->name) != 0) 455 goto out_bad; 456 457 /* The kernfs node has been moved to a different namespace */ 458 if (kn->parent && kernfs_ns_enabled(kn->parent) && 459 kernfs_info(dentry->d_sb)->ns != kn->ns) 460 goto out_bad; 461 462 mutex_unlock(&kernfs_mutex); 463 out_valid: 464 return 1; 465 out_bad: 466 mutex_unlock(&kernfs_mutex); 467 out_bad_unlocked: 468 /* 469 * @dentry doesn't match the underlying kernfs node, drop the 470 * dentry and force lookup. If we have submounts we must allow the 471 * vfs caches to lie about the state of the filesystem to prevent 472 * leaks and other nasty things, so use check_submounts_and_drop() 473 * instead of d_drop(). 474 */ 475 if (check_submounts_and_drop(dentry) != 0) 476 goto out_valid; 477 478 return 0; 479 } 480 481 static void kernfs_dop_release(struct dentry *dentry) 482 { 483 kernfs_put(dentry->d_fsdata); 484 } 485 486 const struct dentry_operations kernfs_dops = { 487 .d_revalidate = kernfs_dop_revalidate, 488 .d_release = kernfs_dop_release, 489 }; 490 491 /** 492 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry 493 * @dentry: the dentry in question 494 * 495 * Return the kernfs_node associated with @dentry. If @dentry is not a 496 * kernfs one, %NULL is returned. 497 * 498 * While the returned kernfs_node will stay accessible as long as @dentry 499 * is accessible, the returned node can be in any state and the caller is 500 * fully responsible for determining what's accessible. 501 */ 502 struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) 503 { 504 if (dentry->d_sb->s_op == &kernfs_sops) 505 return dentry->d_fsdata; 506 return NULL; 507 } 508 509 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, 510 const char *name, umode_t mode, 511 unsigned flags) 512 { 513 char *dup_name = NULL; 514 struct kernfs_node *kn; 515 int ret; 516 517 if (!(flags & KERNFS_STATIC_NAME)) { 518 name = dup_name = kstrdup(name, GFP_KERNEL); 519 if (!name) 520 return NULL; 521 } 522 523 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); 524 if (!kn) 525 goto err_out1; 526 527 ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL); 528 if (ret < 0) 529 goto err_out2; 530 kn->ino = ret; 531 532 atomic_set(&kn->count, 1); 533 atomic_set(&kn->active, KN_DEACTIVATED_BIAS); 534 RB_CLEAR_NODE(&kn->rb); 535 536 kn->name = name; 537 kn->mode = mode; 538 kn->flags = flags; 539 540 return kn; 541 542 err_out2: 543 kmem_cache_free(kernfs_node_cache, kn); 544 err_out1: 545 kfree(dup_name); 546 return NULL; 547 } 548 549 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, 550 const char *name, umode_t mode, 551 unsigned flags) 552 { 553 struct kernfs_node *kn; 554 555 kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags); 556 if (kn) { 557 kernfs_get(parent); 558 kn->parent = parent; 559 } 560 return kn; 561 } 562 563 /** 564 * kernfs_add_one - add kernfs_node to parent without warning 565 * @kn: kernfs_node to be added 566 * 567 * The caller must already have initialized @kn->parent. This 568 * function increments nlink of the parent's inode if @kn is a 569 * directory and link into the children list of the parent. 570 * 571 * RETURNS: 572 * 0 on success, -EEXIST if entry with the given name already 573 * exists. 574 */ 575 int kernfs_add_one(struct kernfs_node *kn) 576 { 577 struct kernfs_node *parent = kn->parent; 578 struct kernfs_iattrs *ps_iattr; 579 bool has_ns; 580 int ret; 581 582 mutex_lock(&kernfs_mutex); 583 584 ret = -EINVAL; 585 has_ns = kernfs_ns_enabled(parent); 586 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 587 has_ns ? "required" : "invalid", parent->name, kn->name)) 588 goto out_unlock; 589 590 if (kernfs_type(parent) != KERNFS_DIR) 591 goto out_unlock; 592 593 ret = -ENOENT; 594 if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) 595 goto out_unlock; 596 597 kn->hash = kernfs_name_hash(kn->name, kn->ns); 598 599 ret = kernfs_link_sibling(kn); 600 if (ret) 601 goto out_unlock; 602 603 /* Update timestamps on the parent */ 604 ps_iattr = parent->iattr; 605 if (ps_iattr) { 606 struct iattr *ps_iattrs = &ps_iattr->ia_iattr; 607 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; 608 } 609 610 mutex_unlock(&kernfs_mutex); 611 612 /* 613 * Activate the new node unless CREATE_DEACTIVATED is requested. 614 * If not activated here, the kernfs user is responsible for 615 * activating the node with kernfs_activate(). A node which hasn't 616 * been activated is not visible to userland and its removal won't 617 * trigger deactivation. 618 */ 619 if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) 620 kernfs_activate(kn); 621 return 0; 622 623 out_unlock: 624 mutex_unlock(&kernfs_mutex); 625 return ret; 626 } 627 628 /** 629 * kernfs_find_ns - find kernfs_node with the given name 630 * @parent: kernfs_node to search under 631 * @name: name to look for 632 * @ns: the namespace tag to use 633 * 634 * Look for kernfs_node with name @name under @parent. Returns pointer to 635 * the found kernfs_node on success, %NULL on failure. 636 */ 637 static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, 638 const unsigned char *name, 639 const void *ns) 640 { 641 struct rb_node *node = parent->dir.children.rb_node; 642 bool has_ns = kernfs_ns_enabled(parent); 643 unsigned int hash; 644 645 lockdep_assert_held(&kernfs_mutex); 646 647 if (has_ns != (bool)ns) { 648 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 649 has_ns ? "required" : "invalid", parent->name, name); 650 return NULL; 651 } 652 653 hash = kernfs_name_hash(name, ns); 654 while (node) { 655 struct kernfs_node *kn; 656 int result; 657 658 kn = rb_to_kn(node); 659 result = kernfs_name_compare(hash, name, ns, kn); 660 if (result < 0) 661 node = node->rb_left; 662 else if (result > 0) 663 node = node->rb_right; 664 else 665 return kn; 666 } 667 return NULL; 668 } 669 670 /** 671 * kernfs_find_and_get_ns - find and get kernfs_node with the given name 672 * @parent: kernfs_node to search under 673 * @name: name to look for 674 * @ns: the namespace tag to use 675 * 676 * Look for kernfs_node with name @name under @parent and get a reference 677 * if found. This function may sleep and returns pointer to the found 678 * kernfs_node on success, %NULL on failure. 679 */ 680 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, 681 const char *name, const void *ns) 682 { 683 struct kernfs_node *kn; 684 685 mutex_lock(&kernfs_mutex); 686 kn = kernfs_find_ns(parent, name, ns); 687 kernfs_get(kn); 688 mutex_unlock(&kernfs_mutex); 689 690 return kn; 691 } 692 EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); 693 694 /** 695 * kernfs_create_root - create a new kernfs hierarchy 696 * @scops: optional syscall operations for the hierarchy 697 * @flags: KERNFS_ROOT_* flags 698 * @priv: opaque data associated with the new directory 699 * 700 * Returns the root of the new hierarchy on success, ERR_PTR() value on 701 * failure. 702 */ 703 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, 704 unsigned int flags, void *priv) 705 { 706 struct kernfs_root *root; 707 struct kernfs_node *kn; 708 709 root = kzalloc(sizeof(*root), GFP_KERNEL); 710 if (!root) 711 return ERR_PTR(-ENOMEM); 712 713 ida_init(&root->ino_ida); 714 715 kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, 716 KERNFS_DIR); 717 if (!kn) { 718 ida_destroy(&root->ino_ida); 719 kfree(root); 720 return ERR_PTR(-ENOMEM); 721 } 722 723 kn->priv = priv; 724 kn->dir.root = root; 725 726 root->syscall_ops = scops; 727 root->flags = flags; 728 root->kn = kn; 729 init_waitqueue_head(&root->deactivate_waitq); 730 731 if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) 732 kernfs_activate(kn); 733 734 return root; 735 } 736 737 /** 738 * kernfs_destroy_root - destroy a kernfs hierarchy 739 * @root: root of the hierarchy to destroy 740 * 741 * Destroy the hierarchy anchored at @root by removing all existing 742 * directories and destroying @root. 743 */ 744 void kernfs_destroy_root(struct kernfs_root *root) 745 { 746 kernfs_remove(root->kn); /* will also free @root */ 747 } 748 749 /** 750 * kernfs_create_dir_ns - create a directory 751 * @parent: parent in which to create a new directory 752 * @name: name of the new directory 753 * @mode: mode of the new directory 754 * @priv: opaque data associated with the new directory 755 * @ns: optional namespace tag of the directory 756 * 757 * Returns the created node on success, ERR_PTR() value on failure. 758 */ 759 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, 760 const char *name, umode_t mode, 761 void *priv, const void *ns) 762 { 763 struct kernfs_node *kn; 764 int rc; 765 766 /* allocate */ 767 kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR); 768 if (!kn) 769 return ERR_PTR(-ENOMEM); 770 771 kn->dir.root = parent->dir.root; 772 kn->ns = ns; 773 kn->priv = priv; 774 775 /* link in */ 776 rc = kernfs_add_one(kn); 777 if (!rc) 778 return kn; 779 780 kernfs_put(kn); 781 return ERR_PTR(rc); 782 } 783 784 static struct dentry *kernfs_iop_lookup(struct inode *dir, 785 struct dentry *dentry, 786 unsigned int flags) 787 { 788 struct dentry *ret; 789 struct kernfs_node *parent = dentry->d_parent->d_fsdata; 790 struct kernfs_node *kn; 791 struct inode *inode; 792 const void *ns = NULL; 793 794 mutex_lock(&kernfs_mutex); 795 796 if (kernfs_ns_enabled(parent)) 797 ns = kernfs_info(dir->i_sb)->ns; 798 799 kn = kernfs_find_ns(parent, dentry->d_name.name, ns); 800 801 /* no such entry */ 802 if (!kn || !kernfs_active(kn)) { 803 ret = NULL; 804 goto out_unlock; 805 } 806 kernfs_get(kn); 807 dentry->d_fsdata = kn; 808 809 /* attach dentry and inode */ 810 inode = kernfs_get_inode(dir->i_sb, kn); 811 if (!inode) { 812 ret = ERR_PTR(-ENOMEM); 813 goto out_unlock; 814 } 815 816 /* instantiate and hash dentry */ 817 ret = d_materialise_unique(dentry, inode); 818 out_unlock: 819 mutex_unlock(&kernfs_mutex); 820 return ret; 821 } 822 823 static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, 824 umode_t mode) 825 { 826 struct kernfs_node *parent = dir->i_private; 827 struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; 828 int ret; 829 830 if (!scops || !scops->mkdir) 831 return -EPERM; 832 833 if (!kernfs_get_active(parent)) 834 return -ENODEV; 835 836 ret = scops->mkdir(parent, dentry->d_name.name, mode); 837 838 kernfs_put_active(parent); 839 return ret; 840 } 841 842 static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) 843 { 844 struct kernfs_node *kn = dentry->d_fsdata; 845 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; 846 int ret; 847 848 if (!scops || !scops->rmdir) 849 return -EPERM; 850 851 if (!kernfs_get_active(kn)) 852 return -ENODEV; 853 854 ret = scops->rmdir(kn); 855 856 kernfs_put_active(kn); 857 return ret; 858 } 859 860 static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, 861 struct inode *new_dir, struct dentry *new_dentry) 862 { 863 struct kernfs_node *kn = old_dentry->d_fsdata; 864 struct kernfs_node *new_parent = new_dir->i_private; 865 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; 866 int ret; 867 868 if (!scops || !scops->rename) 869 return -EPERM; 870 871 if (!kernfs_get_active(kn)) 872 return -ENODEV; 873 874 if (!kernfs_get_active(new_parent)) { 875 kernfs_put_active(kn); 876 return -ENODEV; 877 } 878 879 ret = scops->rename(kn, new_parent, new_dentry->d_name.name); 880 881 kernfs_put_active(new_parent); 882 kernfs_put_active(kn); 883 return ret; 884 } 885 886 const struct inode_operations kernfs_dir_iops = { 887 .lookup = kernfs_iop_lookup, 888 .permission = kernfs_iop_permission, 889 .setattr = kernfs_iop_setattr, 890 .getattr = kernfs_iop_getattr, 891 .setxattr = kernfs_iop_setxattr, 892 .removexattr = kernfs_iop_removexattr, 893 .getxattr = kernfs_iop_getxattr, 894 .listxattr = kernfs_iop_listxattr, 895 896 .mkdir = kernfs_iop_mkdir, 897 .rmdir = kernfs_iop_rmdir, 898 .rename = kernfs_iop_rename, 899 }; 900 901 static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos) 902 { 903 struct kernfs_node *last; 904 905 while (true) { 906 struct rb_node *rbn; 907 908 last = pos; 909 910 if (kernfs_type(pos) != KERNFS_DIR) 911 break; 912 913 rbn = rb_first(&pos->dir.children); 914 if (!rbn) 915 break; 916 917 pos = rb_to_kn(rbn); 918 } 919 920 return last; 921 } 922 923 /** 924 * kernfs_next_descendant_post - find the next descendant for post-order walk 925 * @pos: the current position (%NULL to initiate traversal) 926 * @root: kernfs_node whose descendants to walk 927 * 928 * Find the next descendant to visit for post-order traversal of @root's 929 * descendants. @root is included in the iteration and the last node to be 930 * visited. 931 */ 932 static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, 933 struct kernfs_node *root) 934 { 935 struct rb_node *rbn; 936 937 lockdep_assert_held(&kernfs_mutex); 938 939 /* if first iteration, visit leftmost descendant which may be root */ 940 if (!pos) 941 return kernfs_leftmost_descendant(root); 942 943 /* if we visited @root, we're done */ 944 if (pos == root) 945 return NULL; 946 947 /* if there's an unvisited sibling, visit its leftmost descendant */ 948 rbn = rb_next(&pos->rb); 949 if (rbn) 950 return kernfs_leftmost_descendant(rb_to_kn(rbn)); 951 952 /* no sibling left, visit parent */ 953 return pos->parent; 954 } 955 956 /** 957 * kernfs_activate - activate a node which started deactivated 958 * @kn: kernfs_node whose subtree is to be activated 959 * 960 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node 961 * needs to be explicitly activated. A node which hasn't been activated 962 * isn't visible to userland and deactivation is skipped during its 963 * removal. This is useful to construct atomic init sequences where 964 * creation of multiple nodes should either succeed or fail atomically. 965 * 966 * The caller is responsible for ensuring that this function is not called 967 * after kernfs_remove*() is invoked on @kn. 968 */ 969 void kernfs_activate(struct kernfs_node *kn) 970 { 971 struct kernfs_node *pos; 972 973 mutex_lock(&kernfs_mutex); 974 975 pos = NULL; 976 while ((pos = kernfs_next_descendant_post(pos, kn))) { 977 if (!pos || (pos->flags & KERNFS_ACTIVATED)) 978 continue; 979 980 WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); 981 WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS); 982 983 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active); 984 pos->flags |= KERNFS_ACTIVATED; 985 } 986 987 mutex_unlock(&kernfs_mutex); 988 } 989 990 static void __kernfs_remove(struct kernfs_node *kn) 991 { 992 struct kernfs_node *pos; 993 994 lockdep_assert_held(&kernfs_mutex); 995 996 /* 997 * Short-circuit if non-root @kn has already finished removal. 998 * This is for kernfs_remove_self() which plays with active ref 999 * after removal. 1000 */ 1001 if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb))) 1002 return; 1003 1004 pr_debug("kernfs %s: removing\n", kn->name); 1005 1006 /* prevent any new usage under @kn by deactivating all nodes */ 1007 pos = NULL; 1008 while ((pos = kernfs_next_descendant_post(pos, kn))) 1009 if (kernfs_active(pos)) 1010 atomic_add(KN_DEACTIVATED_BIAS, &pos->active); 1011 1012 /* deactivate and unlink the subtree node-by-node */ 1013 do { 1014 pos = kernfs_leftmost_descendant(kn); 1015 1016 /* 1017 * kernfs_drain() drops kernfs_mutex temporarily and @pos's 1018 * base ref could have been put by someone else by the time 1019 * the function returns. Make sure it doesn't go away 1020 * underneath us. 1021 */ 1022 kernfs_get(pos); 1023 1024 /* 1025 * Drain iff @kn was activated. This avoids draining and 1026 * its lockdep annotations for nodes which have never been 1027 * activated and allows embedding kernfs_remove() in create 1028 * error paths without worrying about draining. 1029 */ 1030 if (kn->flags & KERNFS_ACTIVATED) 1031 kernfs_drain(pos); 1032 else 1033 WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); 1034 1035 /* 1036 * kernfs_unlink_sibling() succeeds once per node. Use it 1037 * to decide who's responsible for cleanups. 1038 */ 1039 if (!pos->parent || kernfs_unlink_sibling(pos)) { 1040 struct kernfs_iattrs *ps_iattr = 1041 pos->parent ? pos->parent->iattr : NULL; 1042 1043 /* update timestamps on the parent */ 1044 if (ps_iattr) { 1045 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; 1046 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; 1047 } 1048 1049 kernfs_put(pos); 1050 } 1051 1052 kernfs_put(pos); 1053 } while (pos != kn); 1054 } 1055 1056 /** 1057 * kernfs_remove - remove a kernfs_node recursively 1058 * @kn: the kernfs_node to remove 1059 * 1060 * Remove @kn along with all its subdirectories and files. 1061 */ 1062 void kernfs_remove(struct kernfs_node *kn) 1063 { 1064 mutex_lock(&kernfs_mutex); 1065 __kernfs_remove(kn); 1066 mutex_unlock(&kernfs_mutex); 1067 } 1068 1069 /** 1070 * kernfs_break_active_protection - break out of active protection 1071 * @kn: the self kernfs_node 1072 * 1073 * The caller must be running off of a kernfs operation which is invoked 1074 * with an active reference - e.g. one of kernfs_ops. Each invocation of 1075 * this function must also be matched with an invocation of 1076 * kernfs_unbreak_active_protection(). 1077 * 1078 * This function releases the active reference of @kn the caller is 1079 * holding. Once this function is called, @kn may be removed at any point 1080 * and the caller is solely responsible for ensuring that the objects it 1081 * dereferences are accessible. 1082 */ 1083 void kernfs_break_active_protection(struct kernfs_node *kn) 1084 { 1085 /* 1086 * Take out ourself out of the active ref dependency chain. If 1087 * we're called without an active ref, lockdep will complain. 1088 */ 1089 kernfs_put_active(kn); 1090 } 1091 1092 /** 1093 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection() 1094 * @kn: the self kernfs_node 1095 * 1096 * If kernfs_break_active_protection() was called, this function must be 1097 * invoked before finishing the kernfs operation. Note that while this 1098 * function restores the active reference, it doesn't and can't actually 1099 * restore the active protection - @kn may already or be in the process of 1100 * being removed. Once kernfs_break_active_protection() is invoked, that 1101 * protection is irreversibly gone for the kernfs operation instance. 1102 * 1103 * While this function may be called at any point after 1104 * kernfs_break_active_protection() is invoked, its most useful location 1105 * would be right before the enclosing kernfs operation returns. 1106 */ 1107 void kernfs_unbreak_active_protection(struct kernfs_node *kn) 1108 { 1109 /* 1110 * @kn->active could be in any state; however, the increment we do 1111 * here will be undone as soon as the enclosing kernfs operation 1112 * finishes and this temporary bump can't break anything. If @kn 1113 * is alive, nothing changes. If @kn is being deactivated, the 1114 * soon-to-follow put will either finish deactivation or restore 1115 * deactivated state. If @kn is already removed, the temporary 1116 * bump is guaranteed to be gone before @kn is released. 1117 */ 1118 atomic_inc(&kn->active); 1119 if (kernfs_lockdep(kn)) 1120 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); 1121 } 1122 1123 /** 1124 * kernfs_remove_self - remove a kernfs_node from its own method 1125 * @kn: the self kernfs_node to remove 1126 * 1127 * The caller must be running off of a kernfs operation which is invoked 1128 * with an active reference - e.g. one of kernfs_ops. This can be used to 1129 * implement a file operation which deletes itself. 1130 * 1131 * For example, the "delete" file for a sysfs device directory can be 1132 * implemented by invoking kernfs_remove_self() on the "delete" file 1133 * itself. This function breaks the circular dependency of trying to 1134 * deactivate self while holding an active ref itself. It isn't necessary 1135 * to modify the usual removal path to use kernfs_remove_self(). The 1136 * "delete" implementation can simply invoke kernfs_remove_self() on self 1137 * before proceeding with the usual removal path. kernfs will ignore later 1138 * kernfs_remove() on self. 1139 * 1140 * kernfs_remove_self() can be called multiple times concurrently on the 1141 * same kernfs_node. Only the first one actually performs removal and 1142 * returns %true. All others will wait until the kernfs operation which 1143 * won self-removal finishes and return %false. Note that the losers wait 1144 * for the completion of not only the winning kernfs_remove_self() but also 1145 * the whole kernfs_ops which won the arbitration. This can be used to 1146 * guarantee, for example, all concurrent writes to a "delete" file to 1147 * finish only after the whole operation is complete. 1148 */ 1149 bool kernfs_remove_self(struct kernfs_node *kn) 1150 { 1151 bool ret; 1152 1153 mutex_lock(&kernfs_mutex); 1154 kernfs_break_active_protection(kn); 1155 1156 /* 1157 * SUICIDAL is used to arbitrate among competing invocations. Only 1158 * the first one will actually perform removal. When the removal 1159 * is complete, SUICIDED is set and the active ref is restored 1160 * while holding kernfs_mutex. The ones which lost arbitration 1161 * waits for SUICDED && drained which can happen only after the 1162 * enclosing kernfs operation which executed the winning instance 1163 * of kernfs_remove_self() finished. 1164 */ 1165 if (!(kn->flags & KERNFS_SUICIDAL)) { 1166 kn->flags |= KERNFS_SUICIDAL; 1167 __kernfs_remove(kn); 1168 kn->flags |= KERNFS_SUICIDED; 1169 ret = true; 1170 } else { 1171 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; 1172 DEFINE_WAIT(wait); 1173 1174 while (true) { 1175 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); 1176 1177 if ((kn->flags & KERNFS_SUICIDED) && 1178 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) 1179 break; 1180 1181 mutex_unlock(&kernfs_mutex); 1182 schedule(); 1183 mutex_lock(&kernfs_mutex); 1184 } 1185 finish_wait(waitq, &wait); 1186 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); 1187 ret = false; 1188 } 1189 1190 /* 1191 * This must be done while holding kernfs_mutex; otherwise, waiting 1192 * for SUICIDED && deactivated could finish prematurely. 1193 */ 1194 kernfs_unbreak_active_protection(kn); 1195 1196 mutex_unlock(&kernfs_mutex); 1197 return ret; 1198 } 1199 1200 /** 1201 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it 1202 * @parent: parent of the target 1203 * @name: name of the kernfs_node to remove 1204 * @ns: namespace tag of the kernfs_node to remove 1205 * 1206 * Look for the kernfs_node with @name and @ns under @parent and remove it. 1207 * Returns 0 on success, -ENOENT if such entry doesn't exist. 1208 */ 1209 int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, 1210 const void *ns) 1211 { 1212 struct kernfs_node *kn; 1213 1214 if (!parent) { 1215 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n", 1216 name); 1217 return -ENOENT; 1218 } 1219 1220 mutex_lock(&kernfs_mutex); 1221 1222 kn = kernfs_find_ns(parent, name, ns); 1223 if (kn) 1224 __kernfs_remove(kn); 1225 1226 mutex_unlock(&kernfs_mutex); 1227 1228 if (kn) 1229 return 0; 1230 else 1231 return -ENOENT; 1232 } 1233 1234 /** 1235 * kernfs_rename_ns - move and rename a kernfs_node 1236 * @kn: target node 1237 * @new_parent: new parent to put @sd under 1238 * @new_name: new name 1239 * @new_ns: new namespace tag 1240 */ 1241 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, 1242 const char *new_name, const void *new_ns) 1243 { 1244 struct kernfs_node *old_parent; 1245 const char *old_name = NULL; 1246 int error; 1247 1248 /* can't move or rename root */ 1249 if (!kn->parent) 1250 return -EINVAL; 1251 1252 mutex_lock(&kernfs_mutex); 1253 1254 error = -ENOENT; 1255 if (!kernfs_active(kn) || !kernfs_active(new_parent)) 1256 goto out; 1257 1258 error = 0; 1259 if ((kn->parent == new_parent) && (kn->ns == new_ns) && 1260 (strcmp(kn->name, new_name) == 0)) 1261 goto out; /* nothing to rename */ 1262 1263 error = -EEXIST; 1264 if (kernfs_find_ns(new_parent, new_name, new_ns)) 1265 goto out; 1266 1267 /* rename kernfs_node */ 1268 if (strcmp(kn->name, new_name) != 0) { 1269 error = -ENOMEM; 1270 new_name = kstrdup(new_name, GFP_KERNEL); 1271 if (!new_name) 1272 goto out; 1273 } else { 1274 new_name = NULL; 1275 } 1276 1277 /* 1278 * Move to the appropriate place in the appropriate directories rbtree. 1279 */ 1280 kernfs_unlink_sibling(kn); 1281 kernfs_get(new_parent); 1282 1283 /* rename_lock protects ->parent and ->name accessors */ 1284 spin_lock_irq(&kernfs_rename_lock); 1285 1286 old_parent = kn->parent; 1287 kn->parent = new_parent; 1288 1289 kn->ns = new_ns; 1290 if (new_name) { 1291 if (!(kn->flags & KERNFS_STATIC_NAME)) 1292 old_name = kn->name; 1293 kn->flags &= ~KERNFS_STATIC_NAME; 1294 kn->name = new_name; 1295 } 1296 1297 spin_unlock_irq(&kernfs_rename_lock); 1298 1299 kn->hash = kernfs_name_hash(kn->name, kn->ns); 1300 kernfs_link_sibling(kn); 1301 1302 kernfs_put(old_parent); 1303 kfree(old_name); 1304 1305 error = 0; 1306 out: 1307 mutex_unlock(&kernfs_mutex); 1308 return error; 1309 } 1310 1311 /* Relationship between s_mode and the DT_xxx types */ 1312 static inline unsigned char dt_type(struct kernfs_node *kn) 1313 { 1314 return (kn->mode >> 12) & 15; 1315 } 1316 1317 static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) 1318 { 1319 kernfs_put(filp->private_data); 1320 return 0; 1321 } 1322 1323 static struct kernfs_node *kernfs_dir_pos(const void *ns, 1324 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) 1325 { 1326 if (pos) { 1327 int valid = kernfs_active(pos) && 1328 pos->parent == parent && hash == pos->hash; 1329 kernfs_put(pos); 1330 if (!valid) 1331 pos = NULL; 1332 } 1333 if (!pos && (hash > 1) && (hash < INT_MAX)) { 1334 struct rb_node *node = parent->dir.children.rb_node; 1335 while (node) { 1336 pos = rb_to_kn(node); 1337 1338 if (hash < pos->hash) 1339 node = node->rb_left; 1340 else if (hash > pos->hash) 1341 node = node->rb_right; 1342 else 1343 break; 1344 } 1345 } 1346 /* Skip over entries which are dying/dead or in the wrong namespace */ 1347 while (pos && (!kernfs_active(pos) || pos->ns != ns)) { 1348 struct rb_node *node = rb_next(&pos->rb); 1349 if (!node) 1350 pos = NULL; 1351 else 1352 pos = rb_to_kn(node); 1353 } 1354 return pos; 1355 } 1356 1357 static struct kernfs_node *kernfs_dir_next_pos(const void *ns, 1358 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) 1359 { 1360 pos = kernfs_dir_pos(ns, parent, ino, pos); 1361 if (pos) { 1362 do { 1363 struct rb_node *node = rb_next(&pos->rb); 1364 if (!node) 1365 pos = NULL; 1366 else 1367 pos = rb_to_kn(node); 1368 } while (pos && (!kernfs_active(pos) || pos->ns != ns)); 1369 } 1370 return pos; 1371 } 1372 1373 static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) 1374 { 1375 struct dentry *dentry = file->f_path.dentry; 1376 struct kernfs_node *parent = dentry->d_fsdata; 1377 struct kernfs_node *pos = file->private_data; 1378 const void *ns = NULL; 1379 1380 if (!dir_emit_dots(file, ctx)) 1381 return 0; 1382 mutex_lock(&kernfs_mutex); 1383 1384 if (kernfs_ns_enabled(parent)) 1385 ns = kernfs_info(dentry->d_sb)->ns; 1386 1387 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); 1388 pos; 1389 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { 1390 const char *name = pos->name; 1391 unsigned int type = dt_type(pos); 1392 int len = strlen(name); 1393 ino_t ino = pos->ino; 1394 1395 ctx->pos = pos->hash; 1396 file->private_data = pos; 1397 kernfs_get(pos); 1398 1399 mutex_unlock(&kernfs_mutex); 1400 if (!dir_emit(ctx, name, len, ino, type)) 1401 return 0; 1402 mutex_lock(&kernfs_mutex); 1403 } 1404 mutex_unlock(&kernfs_mutex); 1405 file->private_data = NULL; 1406 ctx->pos = INT_MAX; 1407 return 0; 1408 } 1409 1410 static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset, 1411 int whence) 1412 { 1413 struct inode *inode = file_inode(file); 1414 loff_t ret; 1415 1416 mutex_lock(&inode->i_mutex); 1417 ret = generic_file_llseek(file, offset, whence); 1418 mutex_unlock(&inode->i_mutex); 1419 1420 return ret; 1421 } 1422 1423 const struct file_operations kernfs_dir_fops = { 1424 .read = generic_read_dir, 1425 .iterate = kernfs_fop_readdir, 1426 .release = kernfs_dir_fop_release, 1427 .llseek = kernfs_dir_fop_llseek, 1428 }; 1429