1 /* 2 * fs/kernfs/dir.c - kernfs directory implementation 3 * 4 * Copyright (c) 2001-3 Patrick Mochel 5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 7 * 8 * This file is released under the GPLv2. 9 */ 10 11 #include <linux/sched.h> 12 #include <linux/fs.h> 13 #include <linux/namei.h> 14 #include <linux/idr.h> 15 #include <linux/slab.h> 16 #include <linux/security.h> 17 #include <linux/hash.h> 18 19 #include "kernfs-internal.h" 20 21 DEFINE_MUTEX(kernfs_mutex); 22 static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ 23 static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ 24 25 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) 26 27 static bool kernfs_active(struct kernfs_node *kn) 28 { 29 lockdep_assert_held(&kernfs_mutex); 30 return atomic_read(&kn->active) >= 0; 31 } 32 33 static bool kernfs_lockdep(struct kernfs_node *kn) 34 { 35 #ifdef CONFIG_DEBUG_LOCK_ALLOC 36 return kn->flags & KERNFS_LOCKDEP; 37 #else 38 return false; 39 #endif 40 } 41 42 static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) 43 { 44 return strlcpy(buf, kn->parent ? kn->name : "/", buflen); 45 } 46 47 static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf, 48 size_t buflen) 49 { 50 char *p = buf + buflen; 51 int len; 52 53 *--p = '\0'; 54 55 do { 56 len = strlen(kn->name); 57 if (p - buf < len + 1) { 58 buf[0] = '\0'; 59 p = NULL; 60 break; 61 } 62 p -= len; 63 memcpy(p, kn->name, len); 64 *--p = '/'; 65 kn = kn->parent; 66 } while (kn && kn->parent); 67 68 return p; 69 } 70 71 /** 72 * kernfs_name - obtain the name of a given node 73 * @kn: kernfs_node of interest 74 * @buf: buffer to copy @kn's name into 75 * @buflen: size of @buf 76 * 77 * Copies the name of @kn into @buf of @buflen bytes. The behavior is 78 * similar to strlcpy(). It returns the length of @kn's name and if @buf 79 * isn't long enough, it's filled upto @buflen-1 and nul terminated. 80 * 81 * This function can be called from any context. 82 */ 83 int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) 84 { 85 unsigned long flags; 86 int ret; 87 88 spin_lock_irqsave(&kernfs_rename_lock, flags); 89 ret = kernfs_name_locked(kn, buf, buflen); 90 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 91 return ret; 92 } 93 94 /** 95 * kernfs_path - build full path of a given node 96 * @kn: kernfs_node of interest 97 * @buf: buffer to copy @kn's name into 98 * @buflen: size of @buf 99 * 100 * Builds and returns the full path of @kn in @buf of @buflen bytes. The 101 * path is built from the end of @buf so the returned pointer usually 102 * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated 103 * and %NULL is returned. 104 */ 105 char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) 106 { 107 unsigned long flags; 108 char *p; 109 110 spin_lock_irqsave(&kernfs_rename_lock, flags); 111 p = kernfs_path_locked(kn, buf, buflen); 112 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 113 return p; 114 } 115 EXPORT_SYMBOL_GPL(kernfs_path); 116 117 /** 118 * pr_cont_kernfs_name - pr_cont name of a kernfs_node 119 * @kn: kernfs_node of interest 120 * 121 * This function can be called from any context. 122 */ 123 void pr_cont_kernfs_name(struct kernfs_node *kn) 124 { 125 unsigned long flags; 126 127 spin_lock_irqsave(&kernfs_rename_lock, flags); 128 129 kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); 130 pr_cont("%s", kernfs_pr_cont_buf); 131 132 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 133 } 134 135 /** 136 * pr_cont_kernfs_path - pr_cont path of a kernfs_node 137 * @kn: kernfs_node of interest 138 * 139 * This function can be called from any context. 140 */ 141 void pr_cont_kernfs_path(struct kernfs_node *kn) 142 { 143 unsigned long flags; 144 char *p; 145 146 spin_lock_irqsave(&kernfs_rename_lock, flags); 147 148 p = kernfs_path_locked(kn, kernfs_pr_cont_buf, 149 sizeof(kernfs_pr_cont_buf)); 150 if (p) 151 pr_cont("%s", p); 152 else 153 pr_cont("<name too long>"); 154 155 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 156 } 157 158 /** 159 * kernfs_get_parent - determine the parent node and pin it 160 * @kn: kernfs_node of interest 161 * 162 * Determines @kn's parent, pins and returns it. This function can be 163 * called from any context. 164 */ 165 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) 166 { 167 struct kernfs_node *parent; 168 unsigned long flags; 169 170 spin_lock_irqsave(&kernfs_rename_lock, flags); 171 parent = kn->parent; 172 kernfs_get(parent); 173 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 174 175 return parent; 176 } 177 178 /** 179 * kernfs_name_hash 180 * @name: Null terminated string to hash 181 * @ns: Namespace tag to hash 182 * 183 * Returns 31 bit hash of ns + name (so it fits in an off_t ) 184 */ 185 static unsigned int kernfs_name_hash(const char *name, const void *ns) 186 { 187 unsigned long hash = init_name_hash(); 188 unsigned int len = strlen(name); 189 while (len--) 190 hash = partial_name_hash(*name++, hash); 191 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); 192 hash &= 0x7fffffffU; 193 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ 194 if (hash < 2) 195 hash += 2; 196 if (hash >= INT_MAX) 197 hash = INT_MAX - 1; 198 return hash; 199 } 200 201 static int kernfs_name_compare(unsigned int hash, const char *name, 202 const void *ns, const struct kernfs_node *kn) 203 { 204 if (hash != kn->hash) 205 return hash - kn->hash; 206 if (ns != kn->ns) 207 return ns - kn->ns; 208 return strcmp(name, kn->name); 209 } 210 211 static int kernfs_sd_compare(const struct kernfs_node *left, 212 const struct kernfs_node *right) 213 { 214 return kernfs_name_compare(left->hash, left->name, left->ns, right); 215 } 216 217 /** 218 * kernfs_link_sibling - link kernfs_node into sibling rbtree 219 * @kn: kernfs_node of interest 220 * 221 * Link @kn into its sibling rbtree which starts from 222 * @kn->parent->dir.children. 223 * 224 * Locking: 225 * mutex_lock(kernfs_mutex) 226 * 227 * RETURNS: 228 * 0 on susccess -EEXIST on failure. 229 */ 230 static int kernfs_link_sibling(struct kernfs_node *kn) 231 { 232 struct rb_node **node = &kn->parent->dir.children.rb_node; 233 struct rb_node *parent = NULL; 234 235 while (*node) { 236 struct kernfs_node *pos; 237 int result; 238 239 pos = rb_to_kn(*node); 240 parent = *node; 241 result = kernfs_sd_compare(kn, pos); 242 if (result < 0) 243 node = &pos->rb.rb_left; 244 else if (result > 0) 245 node = &pos->rb.rb_right; 246 else 247 return -EEXIST; 248 } 249 250 /* add new node and rebalance the tree */ 251 rb_link_node(&kn->rb, parent, node); 252 rb_insert_color(&kn->rb, &kn->parent->dir.children); 253 254 /* successfully added, account subdir number */ 255 if (kernfs_type(kn) == KERNFS_DIR) 256 kn->parent->dir.subdirs++; 257 258 return 0; 259 } 260 261 /** 262 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree 263 * @kn: kernfs_node of interest 264 * 265 * Try to unlink @kn from its sibling rbtree which starts from 266 * kn->parent->dir.children. Returns %true if @kn was actually 267 * removed, %false if @kn wasn't on the rbtree. 268 * 269 * Locking: 270 * mutex_lock(kernfs_mutex) 271 */ 272 static bool kernfs_unlink_sibling(struct kernfs_node *kn) 273 { 274 if (RB_EMPTY_NODE(&kn->rb)) 275 return false; 276 277 if (kernfs_type(kn) == KERNFS_DIR) 278 kn->parent->dir.subdirs--; 279 280 rb_erase(&kn->rb, &kn->parent->dir.children); 281 RB_CLEAR_NODE(&kn->rb); 282 return true; 283 } 284 285 /** 286 * kernfs_get_active - get an active reference to kernfs_node 287 * @kn: kernfs_node to get an active reference to 288 * 289 * Get an active reference of @kn. This function is noop if @kn 290 * is NULL. 291 * 292 * RETURNS: 293 * Pointer to @kn on success, NULL on failure. 294 */ 295 struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) 296 { 297 if (unlikely(!kn)) 298 return NULL; 299 300 if (!atomic_inc_unless_negative(&kn->active)) 301 return NULL; 302 303 if (kernfs_lockdep(kn)) 304 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); 305 return kn; 306 } 307 308 /** 309 * kernfs_put_active - put an active reference to kernfs_node 310 * @kn: kernfs_node to put an active reference to 311 * 312 * Put an active reference to @kn. This function is noop if @kn 313 * is NULL. 314 */ 315 void kernfs_put_active(struct kernfs_node *kn) 316 { 317 struct kernfs_root *root = kernfs_root(kn); 318 int v; 319 320 if (unlikely(!kn)) 321 return; 322 323 if (kernfs_lockdep(kn)) 324 rwsem_release(&kn->dep_map, 1, _RET_IP_); 325 v = atomic_dec_return(&kn->active); 326 if (likely(v != KN_DEACTIVATED_BIAS)) 327 return; 328 329 wake_up_all(&root->deactivate_waitq); 330 } 331 332 /** 333 * kernfs_drain - drain kernfs_node 334 * @kn: kernfs_node to drain 335 * 336 * Drain existing usages and nuke all existing mmaps of @kn. Mutiple 337 * removers may invoke this function concurrently on @kn and all will 338 * return after draining is complete. 339 */ 340 static void kernfs_drain(struct kernfs_node *kn) 341 __releases(&kernfs_mutex) __acquires(&kernfs_mutex) 342 { 343 struct kernfs_root *root = kernfs_root(kn); 344 345 lockdep_assert_held(&kernfs_mutex); 346 WARN_ON_ONCE(kernfs_active(kn)); 347 348 mutex_unlock(&kernfs_mutex); 349 350 if (kernfs_lockdep(kn)) { 351 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); 352 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) 353 lock_contended(&kn->dep_map, _RET_IP_); 354 } 355 356 /* but everyone should wait for draining */ 357 wait_event(root->deactivate_waitq, 358 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); 359 360 if (kernfs_lockdep(kn)) { 361 lock_acquired(&kn->dep_map, _RET_IP_); 362 rwsem_release(&kn->dep_map, 1, _RET_IP_); 363 } 364 365 kernfs_unmap_bin_file(kn); 366 367 mutex_lock(&kernfs_mutex); 368 } 369 370 /** 371 * kernfs_get - get a reference count on a kernfs_node 372 * @kn: the target kernfs_node 373 */ 374 void kernfs_get(struct kernfs_node *kn) 375 { 376 if (kn) { 377 WARN_ON(!atomic_read(&kn->count)); 378 atomic_inc(&kn->count); 379 } 380 } 381 EXPORT_SYMBOL_GPL(kernfs_get); 382 383 /** 384 * kernfs_put - put a reference count on a kernfs_node 385 * @kn: the target kernfs_node 386 * 387 * Put a reference count of @kn and destroy it if it reached zero. 388 */ 389 void kernfs_put(struct kernfs_node *kn) 390 { 391 struct kernfs_node *parent; 392 struct kernfs_root *root; 393 394 if (!kn || !atomic_dec_and_test(&kn->count)) 395 return; 396 root = kernfs_root(kn); 397 repeat: 398 /* 399 * Moving/renaming is always done while holding reference. 400 * kn->parent won't change beneath us. 401 */ 402 parent = kn->parent; 403 404 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, 405 "kernfs_put: %s/%s: released with incorrect active_ref %d\n", 406 parent ? parent->name : "", kn->name, atomic_read(&kn->active)); 407 408 if (kernfs_type(kn) == KERNFS_LINK) 409 kernfs_put(kn->symlink.target_kn); 410 if (!(kn->flags & KERNFS_STATIC_NAME)) 411 kfree(kn->name); 412 if (kn->iattr) { 413 if (kn->iattr->ia_secdata) 414 security_release_secctx(kn->iattr->ia_secdata, 415 kn->iattr->ia_secdata_len); 416 simple_xattrs_free(&kn->iattr->xattrs); 417 } 418 kfree(kn->iattr); 419 ida_simple_remove(&root->ino_ida, kn->ino); 420 kmem_cache_free(kernfs_node_cache, kn); 421 422 kn = parent; 423 if (kn) { 424 if (atomic_dec_and_test(&kn->count)) 425 goto repeat; 426 } else { 427 /* just released the root kn, free @root too */ 428 ida_destroy(&root->ino_ida); 429 kfree(root); 430 } 431 } 432 EXPORT_SYMBOL_GPL(kernfs_put); 433 434 static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) 435 { 436 struct kernfs_node *kn; 437 438 if (flags & LOOKUP_RCU) 439 return -ECHILD; 440 441 /* Always perform fresh lookup for negatives */ 442 if (!dentry->d_inode) 443 goto out_bad_unlocked; 444 445 kn = dentry->d_fsdata; 446 mutex_lock(&kernfs_mutex); 447 448 /* The kernfs node has been deactivated */ 449 if (!kernfs_active(kn)) 450 goto out_bad; 451 452 /* The kernfs node has been moved? */ 453 if (dentry->d_parent->d_fsdata != kn->parent) 454 goto out_bad; 455 456 /* The kernfs node has been renamed */ 457 if (strcmp(dentry->d_name.name, kn->name) != 0) 458 goto out_bad; 459 460 /* The kernfs node has been moved to a different namespace */ 461 if (kn->parent && kernfs_ns_enabled(kn->parent) && 462 kernfs_info(dentry->d_sb)->ns != kn->ns) 463 goto out_bad; 464 465 mutex_unlock(&kernfs_mutex); 466 return 1; 467 out_bad: 468 mutex_unlock(&kernfs_mutex); 469 out_bad_unlocked: 470 return 0; 471 } 472 473 static void kernfs_dop_release(struct dentry *dentry) 474 { 475 kernfs_put(dentry->d_fsdata); 476 } 477 478 const struct dentry_operations kernfs_dops = { 479 .d_revalidate = kernfs_dop_revalidate, 480 .d_release = kernfs_dop_release, 481 }; 482 483 /** 484 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry 485 * @dentry: the dentry in question 486 * 487 * Return the kernfs_node associated with @dentry. If @dentry is not a 488 * kernfs one, %NULL is returned. 489 * 490 * While the returned kernfs_node will stay accessible as long as @dentry 491 * is accessible, the returned node can be in any state and the caller is 492 * fully responsible for determining what's accessible. 493 */ 494 struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) 495 { 496 if (dentry->d_sb->s_op == &kernfs_sops) 497 return dentry->d_fsdata; 498 return NULL; 499 } 500 501 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, 502 const char *name, umode_t mode, 503 unsigned flags) 504 { 505 char *dup_name = NULL; 506 struct kernfs_node *kn; 507 int ret; 508 509 if (!(flags & KERNFS_STATIC_NAME)) { 510 name = dup_name = kstrdup(name, GFP_KERNEL); 511 if (!name) 512 return NULL; 513 } 514 515 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); 516 if (!kn) 517 goto err_out1; 518 519 ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL); 520 if (ret < 0) 521 goto err_out2; 522 kn->ino = ret; 523 524 atomic_set(&kn->count, 1); 525 atomic_set(&kn->active, KN_DEACTIVATED_BIAS); 526 RB_CLEAR_NODE(&kn->rb); 527 528 kn->name = name; 529 kn->mode = mode; 530 kn->flags = flags; 531 532 return kn; 533 534 err_out2: 535 kmem_cache_free(kernfs_node_cache, kn); 536 err_out1: 537 kfree(dup_name); 538 return NULL; 539 } 540 541 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, 542 const char *name, umode_t mode, 543 unsigned flags) 544 { 545 struct kernfs_node *kn; 546 547 kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags); 548 if (kn) { 549 kernfs_get(parent); 550 kn->parent = parent; 551 } 552 return kn; 553 } 554 555 /** 556 * kernfs_add_one - add kernfs_node to parent without warning 557 * @kn: kernfs_node to be added 558 * 559 * The caller must already have initialized @kn->parent. This 560 * function increments nlink of the parent's inode if @kn is a 561 * directory and link into the children list of the parent. 562 * 563 * RETURNS: 564 * 0 on success, -EEXIST if entry with the given name already 565 * exists. 566 */ 567 int kernfs_add_one(struct kernfs_node *kn) 568 { 569 struct kernfs_node *parent = kn->parent; 570 struct kernfs_iattrs *ps_iattr; 571 bool has_ns; 572 int ret; 573 574 mutex_lock(&kernfs_mutex); 575 576 ret = -EINVAL; 577 has_ns = kernfs_ns_enabled(parent); 578 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 579 has_ns ? "required" : "invalid", parent->name, kn->name)) 580 goto out_unlock; 581 582 if (kernfs_type(parent) != KERNFS_DIR) 583 goto out_unlock; 584 585 ret = -ENOENT; 586 if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) 587 goto out_unlock; 588 589 kn->hash = kernfs_name_hash(kn->name, kn->ns); 590 591 ret = kernfs_link_sibling(kn); 592 if (ret) 593 goto out_unlock; 594 595 /* Update timestamps on the parent */ 596 ps_iattr = parent->iattr; 597 if (ps_iattr) { 598 struct iattr *ps_iattrs = &ps_iattr->ia_iattr; 599 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; 600 } 601 602 mutex_unlock(&kernfs_mutex); 603 604 /* 605 * Activate the new node unless CREATE_DEACTIVATED is requested. 606 * If not activated here, the kernfs user is responsible for 607 * activating the node with kernfs_activate(). A node which hasn't 608 * been activated is not visible to userland and its removal won't 609 * trigger deactivation. 610 */ 611 if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) 612 kernfs_activate(kn); 613 return 0; 614 615 out_unlock: 616 mutex_unlock(&kernfs_mutex); 617 return ret; 618 } 619 620 /** 621 * kernfs_find_ns - find kernfs_node with the given name 622 * @parent: kernfs_node to search under 623 * @name: name to look for 624 * @ns: the namespace tag to use 625 * 626 * Look for kernfs_node with name @name under @parent. Returns pointer to 627 * the found kernfs_node on success, %NULL on failure. 628 */ 629 static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, 630 const unsigned char *name, 631 const void *ns) 632 { 633 struct rb_node *node = parent->dir.children.rb_node; 634 bool has_ns = kernfs_ns_enabled(parent); 635 unsigned int hash; 636 637 lockdep_assert_held(&kernfs_mutex); 638 639 if (has_ns != (bool)ns) { 640 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 641 has_ns ? "required" : "invalid", parent->name, name); 642 return NULL; 643 } 644 645 hash = kernfs_name_hash(name, ns); 646 while (node) { 647 struct kernfs_node *kn; 648 int result; 649 650 kn = rb_to_kn(node); 651 result = kernfs_name_compare(hash, name, ns, kn); 652 if (result < 0) 653 node = node->rb_left; 654 else if (result > 0) 655 node = node->rb_right; 656 else 657 return kn; 658 } 659 return NULL; 660 } 661 662 /** 663 * kernfs_find_and_get_ns - find and get kernfs_node with the given name 664 * @parent: kernfs_node to search under 665 * @name: name to look for 666 * @ns: the namespace tag to use 667 * 668 * Look for kernfs_node with name @name under @parent and get a reference 669 * if found. This function may sleep and returns pointer to the found 670 * kernfs_node on success, %NULL on failure. 671 */ 672 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, 673 const char *name, const void *ns) 674 { 675 struct kernfs_node *kn; 676 677 mutex_lock(&kernfs_mutex); 678 kn = kernfs_find_ns(parent, name, ns); 679 kernfs_get(kn); 680 mutex_unlock(&kernfs_mutex); 681 682 return kn; 683 } 684 EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); 685 686 /** 687 * kernfs_create_root - create a new kernfs hierarchy 688 * @scops: optional syscall operations for the hierarchy 689 * @flags: KERNFS_ROOT_* flags 690 * @priv: opaque data associated with the new directory 691 * 692 * Returns the root of the new hierarchy on success, ERR_PTR() value on 693 * failure. 694 */ 695 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, 696 unsigned int flags, void *priv) 697 { 698 struct kernfs_root *root; 699 struct kernfs_node *kn; 700 701 root = kzalloc(sizeof(*root), GFP_KERNEL); 702 if (!root) 703 return ERR_PTR(-ENOMEM); 704 705 ida_init(&root->ino_ida); 706 INIT_LIST_HEAD(&root->supers); 707 708 kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, 709 KERNFS_DIR); 710 if (!kn) { 711 ida_destroy(&root->ino_ida); 712 kfree(root); 713 return ERR_PTR(-ENOMEM); 714 } 715 716 kn->priv = priv; 717 kn->dir.root = root; 718 719 root->syscall_ops = scops; 720 root->flags = flags; 721 root->kn = kn; 722 init_waitqueue_head(&root->deactivate_waitq); 723 724 if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) 725 kernfs_activate(kn); 726 727 return root; 728 } 729 730 /** 731 * kernfs_destroy_root - destroy a kernfs hierarchy 732 * @root: root of the hierarchy to destroy 733 * 734 * Destroy the hierarchy anchored at @root by removing all existing 735 * directories and destroying @root. 736 */ 737 void kernfs_destroy_root(struct kernfs_root *root) 738 { 739 kernfs_remove(root->kn); /* will also free @root */ 740 } 741 742 /** 743 * kernfs_create_dir_ns - create a directory 744 * @parent: parent in which to create a new directory 745 * @name: name of the new directory 746 * @mode: mode of the new directory 747 * @priv: opaque data associated with the new directory 748 * @ns: optional namespace tag of the directory 749 * 750 * Returns the created node on success, ERR_PTR() value on failure. 751 */ 752 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, 753 const char *name, umode_t mode, 754 void *priv, const void *ns) 755 { 756 struct kernfs_node *kn; 757 int rc; 758 759 /* allocate */ 760 kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR); 761 if (!kn) 762 return ERR_PTR(-ENOMEM); 763 764 kn->dir.root = parent->dir.root; 765 kn->ns = ns; 766 kn->priv = priv; 767 768 /* link in */ 769 rc = kernfs_add_one(kn); 770 if (!rc) 771 return kn; 772 773 kernfs_put(kn); 774 return ERR_PTR(rc); 775 } 776 777 static struct dentry *kernfs_iop_lookup(struct inode *dir, 778 struct dentry *dentry, 779 unsigned int flags) 780 { 781 struct dentry *ret; 782 struct kernfs_node *parent = dentry->d_parent->d_fsdata; 783 struct kernfs_node *kn; 784 struct inode *inode; 785 const void *ns = NULL; 786 787 mutex_lock(&kernfs_mutex); 788 789 if (kernfs_ns_enabled(parent)) 790 ns = kernfs_info(dir->i_sb)->ns; 791 792 kn = kernfs_find_ns(parent, dentry->d_name.name, ns); 793 794 /* no such entry */ 795 if (!kn || !kernfs_active(kn)) { 796 ret = NULL; 797 goto out_unlock; 798 } 799 kernfs_get(kn); 800 dentry->d_fsdata = kn; 801 802 /* attach dentry and inode */ 803 inode = kernfs_get_inode(dir->i_sb, kn); 804 if (!inode) { 805 ret = ERR_PTR(-ENOMEM); 806 goto out_unlock; 807 } 808 809 /* instantiate and hash dentry */ 810 ret = d_materialise_unique(dentry, inode); 811 out_unlock: 812 mutex_unlock(&kernfs_mutex); 813 return ret; 814 } 815 816 static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, 817 umode_t mode) 818 { 819 struct kernfs_node *parent = dir->i_private; 820 struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; 821 int ret; 822 823 if (!scops || !scops->mkdir) 824 return -EPERM; 825 826 if (!kernfs_get_active(parent)) 827 return -ENODEV; 828 829 ret = scops->mkdir(parent, dentry->d_name.name, mode); 830 831 kernfs_put_active(parent); 832 return ret; 833 } 834 835 static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) 836 { 837 struct kernfs_node *kn = dentry->d_fsdata; 838 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; 839 int ret; 840 841 if (!scops || !scops->rmdir) 842 return -EPERM; 843 844 if (!kernfs_get_active(kn)) 845 return -ENODEV; 846 847 ret = scops->rmdir(kn); 848 849 kernfs_put_active(kn); 850 return ret; 851 } 852 853 static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, 854 struct inode *new_dir, struct dentry *new_dentry) 855 { 856 struct kernfs_node *kn = old_dentry->d_fsdata; 857 struct kernfs_node *new_parent = new_dir->i_private; 858 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; 859 int ret; 860 861 if (!scops || !scops->rename) 862 return -EPERM; 863 864 if (!kernfs_get_active(kn)) 865 return -ENODEV; 866 867 if (!kernfs_get_active(new_parent)) { 868 kernfs_put_active(kn); 869 return -ENODEV; 870 } 871 872 ret = scops->rename(kn, new_parent, new_dentry->d_name.name); 873 874 kernfs_put_active(new_parent); 875 kernfs_put_active(kn); 876 return ret; 877 } 878 879 const struct inode_operations kernfs_dir_iops = { 880 .lookup = kernfs_iop_lookup, 881 .permission = kernfs_iop_permission, 882 .setattr = kernfs_iop_setattr, 883 .getattr = kernfs_iop_getattr, 884 .setxattr = kernfs_iop_setxattr, 885 .removexattr = kernfs_iop_removexattr, 886 .getxattr = kernfs_iop_getxattr, 887 .listxattr = kernfs_iop_listxattr, 888 889 .mkdir = kernfs_iop_mkdir, 890 .rmdir = kernfs_iop_rmdir, 891 .rename = kernfs_iop_rename, 892 }; 893 894 static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos) 895 { 896 struct kernfs_node *last; 897 898 while (true) { 899 struct rb_node *rbn; 900 901 last = pos; 902 903 if (kernfs_type(pos) != KERNFS_DIR) 904 break; 905 906 rbn = rb_first(&pos->dir.children); 907 if (!rbn) 908 break; 909 910 pos = rb_to_kn(rbn); 911 } 912 913 return last; 914 } 915 916 /** 917 * kernfs_next_descendant_post - find the next descendant for post-order walk 918 * @pos: the current position (%NULL to initiate traversal) 919 * @root: kernfs_node whose descendants to walk 920 * 921 * Find the next descendant to visit for post-order traversal of @root's 922 * descendants. @root is included in the iteration and the last node to be 923 * visited. 924 */ 925 static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, 926 struct kernfs_node *root) 927 { 928 struct rb_node *rbn; 929 930 lockdep_assert_held(&kernfs_mutex); 931 932 /* if first iteration, visit leftmost descendant which may be root */ 933 if (!pos) 934 return kernfs_leftmost_descendant(root); 935 936 /* if we visited @root, we're done */ 937 if (pos == root) 938 return NULL; 939 940 /* if there's an unvisited sibling, visit its leftmost descendant */ 941 rbn = rb_next(&pos->rb); 942 if (rbn) 943 return kernfs_leftmost_descendant(rb_to_kn(rbn)); 944 945 /* no sibling left, visit parent */ 946 return pos->parent; 947 } 948 949 /** 950 * kernfs_activate - activate a node which started deactivated 951 * @kn: kernfs_node whose subtree is to be activated 952 * 953 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node 954 * needs to be explicitly activated. A node which hasn't been activated 955 * isn't visible to userland and deactivation is skipped during its 956 * removal. This is useful to construct atomic init sequences where 957 * creation of multiple nodes should either succeed or fail atomically. 958 * 959 * The caller is responsible for ensuring that this function is not called 960 * after kernfs_remove*() is invoked on @kn. 961 */ 962 void kernfs_activate(struct kernfs_node *kn) 963 { 964 struct kernfs_node *pos; 965 966 mutex_lock(&kernfs_mutex); 967 968 pos = NULL; 969 while ((pos = kernfs_next_descendant_post(pos, kn))) { 970 if (!pos || (pos->flags & KERNFS_ACTIVATED)) 971 continue; 972 973 WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); 974 WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS); 975 976 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active); 977 pos->flags |= KERNFS_ACTIVATED; 978 } 979 980 mutex_unlock(&kernfs_mutex); 981 } 982 983 static void __kernfs_remove(struct kernfs_node *kn) 984 { 985 struct kernfs_node *pos; 986 987 lockdep_assert_held(&kernfs_mutex); 988 989 /* 990 * Short-circuit if non-root @kn has already finished removal. 991 * This is for kernfs_remove_self() which plays with active ref 992 * after removal. 993 */ 994 if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb))) 995 return; 996 997 pr_debug("kernfs %s: removing\n", kn->name); 998 999 /* prevent any new usage under @kn by deactivating all nodes */ 1000 pos = NULL; 1001 while ((pos = kernfs_next_descendant_post(pos, kn))) 1002 if (kernfs_active(pos)) 1003 atomic_add(KN_DEACTIVATED_BIAS, &pos->active); 1004 1005 /* deactivate and unlink the subtree node-by-node */ 1006 do { 1007 pos = kernfs_leftmost_descendant(kn); 1008 1009 /* 1010 * kernfs_drain() drops kernfs_mutex temporarily and @pos's 1011 * base ref could have been put by someone else by the time 1012 * the function returns. Make sure it doesn't go away 1013 * underneath us. 1014 */ 1015 kernfs_get(pos); 1016 1017 /* 1018 * Drain iff @kn was activated. This avoids draining and 1019 * its lockdep annotations for nodes which have never been 1020 * activated and allows embedding kernfs_remove() in create 1021 * error paths without worrying about draining. 1022 */ 1023 if (kn->flags & KERNFS_ACTIVATED) 1024 kernfs_drain(pos); 1025 else 1026 WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); 1027 1028 /* 1029 * kernfs_unlink_sibling() succeeds once per node. Use it 1030 * to decide who's responsible for cleanups. 1031 */ 1032 if (!pos->parent || kernfs_unlink_sibling(pos)) { 1033 struct kernfs_iattrs *ps_iattr = 1034 pos->parent ? pos->parent->iattr : NULL; 1035 1036 /* update timestamps on the parent */ 1037 if (ps_iattr) { 1038 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; 1039 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; 1040 } 1041 1042 kernfs_put(pos); 1043 } 1044 1045 kernfs_put(pos); 1046 } while (pos != kn); 1047 } 1048 1049 /** 1050 * kernfs_remove - remove a kernfs_node recursively 1051 * @kn: the kernfs_node to remove 1052 * 1053 * Remove @kn along with all its subdirectories and files. 1054 */ 1055 void kernfs_remove(struct kernfs_node *kn) 1056 { 1057 mutex_lock(&kernfs_mutex); 1058 __kernfs_remove(kn); 1059 mutex_unlock(&kernfs_mutex); 1060 } 1061 1062 /** 1063 * kernfs_break_active_protection - break out of active protection 1064 * @kn: the self kernfs_node 1065 * 1066 * The caller must be running off of a kernfs operation which is invoked 1067 * with an active reference - e.g. one of kernfs_ops. Each invocation of 1068 * this function must also be matched with an invocation of 1069 * kernfs_unbreak_active_protection(). 1070 * 1071 * This function releases the active reference of @kn the caller is 1072 * holding. Once this function is called, @kn may be removed at any point 1073 * and the caller is solely responsible for ensuring that the objects it 1074 * dereferences are accessible. 1075 */ 1076 void kernfs_break_active_protection(struct kernfs_node *kn) 1077 { 1078 /* 1079 * Take out ourself out of the active ref dependency chain. If 1080 * we're called without an active ref, lockdep will complain. 1081 */ 1082 kernfs_put_active(kn); 1083 } 1084 1085 /** 1086 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection() 1087 * @kn: the self kernfs_node 1088 * 1089 * If kernfs_break_active_protection() was called, this function must be 1090 * invoked before finishing the kernfs operation. Note that while this 1091 * function restores the active reference, it doesn't and can't actually 1092 * restore the active protection - @kn may already or be in the process of 1093 * being removed. Once kernfs_break_active_protection() is invoked, that 1094 * protection is irreversibly gone for the kernfs operation instance. 1095 * 1096 * While this function may be called at any point after 1097 * kernfs_break_active_protection() is invoked, its most useful location 1098 * would be right before the enclosing kernfs operation returns. 1099 */ 1100 void kernfs_unbreak_active_protection(struct kernfs_node *kn) 1101 { 1102 /* 1103 * @kn->active could be in any state; however, the increment we do 1104 * here will be undone as soon as the enclosing kernfs operation 1105 * finishes and this temporary bump can't break anything. If @kn 1106 * is alive, nothing changes. If @kn is being deactivated, the 1107 * soon-to-follow put will either finish deactivation or restore 1108 * deactivated state. If @kn is already removed, the temporary 1109 * bump is guaranteed to be gone before @kn is released. 1110 */ 1111 atomic_inc(&kn->active); 1112 if (kernfs_lockdep(kn)) 1113 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); 1114 } 1115 1116 /** 1117 * kernfs_remove_self - remove a kernfs_node from its own method 1118 * @kn: the self kernfs_node to remove 1119 * 1120 * The caller must be running off of a kernfs operation which is invoked 1121 * with an active reference - e.g. one of kernfs_ops. This can be used to 1122 * implement a file operation which deletes itself. 1123 * 1124 * For example, the "delete" file for a sysfs device directory can be 1125 * implemented by invoking kernfs_remove_self() on the "delete" file 1126 * itself. This function breaks the circular dependency of trying to 1127 * deactivate self while holding an active ref itself. It isn't necessary 1128 * to modify the usual removal path to use kernfs_remove_self(). The 1129 * "delete" implementation can simply invoke kernfs_remove_self() on self 1130 * before proceeding with the usual removal path. kernfs will ignore later 1131 * kernfs_remove() on self. 1132 * 1133 * kernfs_remove_self() can be called multiple times concurrently on the 1134 * same kernfs_node. Only the first one actually performs removal and 1135 * returns %true. All others will wait until the kernfs operation which 1136 * won self-removal finishes and return %false. Note that the losers wait 1137 * for the completion of not only the winning kernfs_remove_self() but also 1138 * the whole kernfs_ops which won the arbitration. This can be used to 1139 * guarantee, for example, all concurrent writes to a "delete" file to 1140 * finish only after the whole operation is complete. 1141 */ 1142 bool kernfs_remove_self(struct kernfs_node *kn) 1143 { 1144 bool ret; 1145 1146 mutex_lock(&kernfs_mutex); 1147 kernfs_break_active_protection(kn); 1148 1149 /* 1150 * SUICIDAL is used to arbitrate among competing invocations. Only 1151 * the first one will actually perform removal. When the removal 1152 * is complete, SUICIDED is set and the active ref is restored 1153 * while holding kernfs_mutex. The ones which lost arbitration 1154 * waits for SUICDED && drained which can happen only after the 1155 * enclosing kernfs operation which executed the winning instance 1156 * of kernfs_remove_self() finished. 1157 */ 1158 if (!(kn->flags & KERNFS_SUICIDAL)) { 1159 kn->flags |= KERNFS_SUICIDAL; 1160 __kernfs_remove(kn); 1161 kn->flags |= KERNFS_SUICIDED; 1162 ret = true; 1163 } else { 1164 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; 1165 DEFINE_WAIT(wait); 1166 1167 while (true) { 1168 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); 1169 1170 if ((kn->flags & KERNFS_SUICIDED) && 1171 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) 1172 break; 1173 1174 mutex_unlock(&kernfs_mutex); 1175 schedule(); 1176 mutex_lock(&kernfs_mutex); 1177 } 1178 finish_wait(waitq, &wait); 1179 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); 1180 ret = false; 1181 } 1182 1183 /* 1184 * This must be done while holding kernfs_mutex; otherwise, waiting 1185 * for SUICIDED && deactivated could finish prematurely. 1186 */ 1187 kernfs_unbreak_active_protection(kn); 1188 1189 mutex_unlock(&kernfs_mutex); 1190 return ret; 1191 } 1192 1193 /** 1194 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it 1195 * @parent: parent of the target 1196 * @name: name of the kernfs_node to remove 1197 * @ns: namespace tag of the kernfs_node to remove 1198 * 1199 * Look for the kernfs_node with @name and @ns under @parent and remove it. 1200 * Returns 0 on success, -ENOENT if such entry doesn't exist. 1201 */ 1202 int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, 1203 const void *ns) 1204 { 1205 struct kernfs_node *kn; 1206 1207 if (!parent) { 1208 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n", 1209 name); 1210 return -ENOENT; 1211 } 1212 1213 mutex_lock(&kernfs_mutex); 1214 1215 kn = kernfs_find_ns(parent, name, ns); 1216 if (kn) 1217 __kernfs_remove(kn); 1218 1219 mutex_unlock(&kernfs_mutex); 1220 1221 if (kn) 1222 return 0; 1223 else 1224 return -ENOENT; 1225 } 1226 1227 /** 1228 * kernfs_rename_ns - move and rename a kernfs_node 1229 * @kn: target node 1230 * @new_parent: new parent to put @sd under 1231 * @new_name: new name 1232 * @new_ns: new namespace tag 1233 */ 1234 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, 1235 const char *new_name, const void *new_ns) 1236 { 1237 struct kernfs_node *old_parent; 1238 const char *old_name = NULL; 1239 int error; 1240 1241 /* can't move or rename root */ 1242 if (!kn->parent) 1243 return -EINVAL; 1244 1245 mutex_lock(&kernfs_mutex); 1246 1247 error = -ENOENT; 1248 if (!kernfs_active(kn) || !kernfs_active(new_parent)) 1249 goto out; 1250 1251 error = 0; 1252 if ((kn->parent == new_parent) && (kn->ns == new_ns) && 1253 (strcmp(kn->name, new_name) == 0)) 1254 goto out; /* nothing to rename */ 1255 1256 error = -EEXIST; 1257 if (kernfs_find_ns(new_parent, new_name, new_ns)) 1258 goto out; 1259 1260 /* rename kernfs_node */ 1261 if (strcmp(kn->name, new_name) != 0) { 1262 error = -ENOMEM; 1263 new_name = kstrdup(new_name, GFP_KERNEL); 1264 if (!new_name) 1265 goto out; 1266 } else { 1267 new_name = NULL; 1268 } 1269 1270 /* 1271 * Move to the appropriate place in the appropriate directories rbtree. 1272 */ 1273 kernfs_unlink_sibling(kn); 1274 kernfs_get(new_parent); 1275 1276 /* rename_lock protects ->parent and ->name accessors */ 1277 spin_lock_irq(&kernfs_rename_lock); 1278 1279 old_parent = kn->parent; 1280 kn->parent = new_parent; 1281 1282 kn->ns = new_ns; 1283 if (new_name) { 1284 if (!(kn->flags & KERNFS_STATIC_NAME)) 1285 old_name = kn->name; 1286 kn->flags &= ~KERNFS_STATIC_NAME; 1287 kn->name = new_name; 1288 } 1289 1290 spin_unlock_irq(&kernfs_rename_lock); 1291 1292 kn->hash = kernfs_name_hash(kn->name, kn->ns); 1293 kernfs_link_sibling(kn); 1294 1295 kernfs_put(old_parent); 1296 kfree(old_name); 1297 1298 error = 0; 1299 out: 1300 mutex_unlock(&kernfs_mutex); 1301 return error; 1302 } 1303 1304 /* Relationship between s_mode and the DT_xxx types */ 1305 static inline unsigned char dt_type(struct kernfs_node *kn) 1306 { 1307 return (kn->mode >> 12) & 15; 1308 } 1309 1310 static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) 1311 { 1312 kernfs_put(filp->private_data); 1313 return 0; 1314 } 1315 1316 static struct kernfs_node *kernfs_dir_pos(const void *ns, 1317 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) 1318 { 1319 if (pos) { 1320 int valid = kernfs_active(pos) && 1321 pos->parent == parent && hash == pos->hash; 1322 kernfs_put(pos); 1323 if (!valid) 1324 pos = NULL; 1325 } 1326 if (!pos && (hash > 1) && (hash < INT_MAX)) { 1327 struct rb_node *node = parent->dir.children.rb_node; 1328 while (node) { 1329 pos = rb_to_kn(node); 1330 1331 if (hash < pos->hash) 1332 node = node->rb_left; 1333 else if (hash > pos->hash) 1334 node = node->rb_right; 1335 else 1336 break; 1337 } 1338 } 1339 /* Skip over entries which are dying/dead or in the wrong namespace */ 1340 while (pos && (!kernfs_active(pos) || pos->ns != ns)) { 1341 struct rb_node *node = rb_next(&pos->rb); 1342 if (!node) 1343 pos = NULL; 1344 else 1345 pos = rb_to_kn(node); 1346 } 1347 return pos; 1348 } 1349 1350 static struct kernfs_node *kernfs_dir_next_pos(const void *ns, 1351 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) 1352 { 1353 pos = kernfs_dir_pos(ns, parent, ino, pos); 1354 if (pos) { 1355 do { 1356 struct rb_node *node = rb_next(&pos->rb); 1357 if (!node) 1358 pos = NULL; 1359 else 1360 pos = rb_to_kn(node); 1361 } while (pos && (!kernfs_active(pos) || pos->ns != ns)); 1362 } 1363 return pos; 1364 } 1365 1366 static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) 1367 { 1368 struct dentry *dentry = file->f_path.dentry; 1369 struct kernfs_node *parent = dentry->d_fsdata; 1370 struct kernfs_node *pos = file->private_data; 1371 const void *ns = NULL; 1372 1373 if (!dir_emit_dots(file, ctx)) 1374 return 0; 1375 mutex_lock(&kernfs_mutex); 1376 1377 if (kernfs_ns_enabled(parent)) 1378 ns = kernfs_info(dentry->d_sb)->ns; 1379 1380 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); 1381 pos; 1382 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { 1383 const char *name = pos->name; 1384 unsigned int type = dt_type(pos); 1385 int len = strlen(name); 1386 ino_t ino = pos->ino; 1387 1388 ctx->pos = pos->hash; 1389 file->private_data = pos; 1390 kernfs_get(pos); 1391 1392 mutex_unlock(&kernfs_mutex); 1393 if (!dir_emit(ctx, name, len, ino, type)) 1394 return 0; 1395 mutex_lock(&kernfs_mutex); 1396 } 1397 mutex_unlock(&kernfs_mutex); 1398 file->private_data = NULL; 1399 ctx->pos = INT_MAX; 1400 return 0; 1401 } 1402 1403 static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset, 1404 int whence) 1405 { 1406 struct inode *inode = file_inode(file); 1407 loff_t ret; 1408 1409 mutex_lock(&inode->i_mutex); 1410 ret = generic_file_llseek(file, offset, whence); 1411 mutex_unlock(&inode->i_mutex); 1412 1413 return ret; 1414 } 1415 1416 const struct file_operations kernfs_dir_fops = { 1417 .read = generic_read_dir, 1418 .iterate = kernfs_fop_readdir, 1419 .release = kernfs_dir_fop_release, 1420 .llseek = kernfs_dir_fop_llseek, 1421 }; 1422