1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/kernfs/dir.c - kernfs directory implementation 4 * 5 * Copyright (c) 2001-3 Patrick Mochel 6 * Copyright (c) 2007 SUSE Linux Products GmbH 7 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/fs.h> 12 #include <linux/namei.h> 13 #include <linux/idr.h> 14 #include <linux/slab.h> 15 #include <linux/security.h> 16 #include <linux/hash.h> 17 18 #include "kernfs-internal.h" 19 20 static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ 21 /* 22 * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to 23 * call pr_cont() while holding rename_lock. Because sometimes pr_cont() 24 * will perform wakeups when releasing console_sem. Holding rename_lock 25 * will introduce deadlock if the scheduler reads the kernfs_name in the 26 * wakeup path. 27 */ 28 static DEFINE_SPINLOCK(kernfs_pr_cont_lock); 29 static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ 30 static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ 31 32 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) 33 34 static bool kernfs_active(struct kernfs_node *kn) 35 { 36 lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem); 37 return atomic_read(&kn->active) >= 0; 38 } 39 40 static bool kernfs_lockdep(struct kernfs_node *kn) 41 { 42 #ifdef CONFIG_DEBUG_LOCK_ALLOC 43 return kn->flags & KERNFS_LOCKDEP; 44 #else 45 return false; 46 #endif 47 } 48 49 static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) 50 { 51 if (!kn) 52 return strlcpy(buf, "(null)", buflen); 53 54 return strlcpy(buf, kn->parent ? kn->name : "/", buflen); 55 } 56 57 /* kernfs_node_depth - compute depth from @from to @to */ 58 static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to) 59 { 60 size_t depth = 0; 61 62 while (to->parent && to != from) { 63 depth++; 64 to = to->parent; 65 } 66 return depth; 67 } 68 69 static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, 70 struct kernfs_node *b) 71 { 72 size_t da, db; 73 struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b); 74 75 if (ra != rb) 76 return NULL; 77 78 da = kernfs_depth(ra->kn, a); 79 db = kernfs_depth(rb->kn, b); 80 81 while (da > db) { 82 a = a->parent; 83 da--; 84 } 85 while (db > da) { 86 b = b->parent; 87 db--; 88 } 89 90 /* worst case b and a will be the same at root */ 91 while (b != a) { 92 b = b->parent; 93 a = a->parent; 94 } 95 96 return a; 97 } 98 99 /** 100 * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to, 101 * where kn_from is treated as root of the path. 102 * @kn_from: kernfs node which should be treated as root for the path 103 * @kn_to: kernfs node to which path is needed 104 * @buf: buffer to copy the path into 105 * @buflen: size of @buf 106 * 107 * We need to handle couple of scenarios here: 108 * [1] when @kn_from is an ancestor of @kn_to at some level 109 * kn_from: /n1/n2/n3 110 * kn_to: /n1/n2/n3/n4/n5 111 * result: /n4/n5 112 * 113 * [2] when @kn_from is on a different hierarchy and we need to find common 114 * ancestor between @kn_from and @kn_to. 115 * kn_from: /n1/n2/n3/n4 116 * kn_to: /n1/n2/n5 117 * result: /../../n5 118 * OR 119 * kn_from: /n1/n2/n3/n4/n5 [depth=5] 120 * kn_to: /n1/n2/n3 [depth=3] 121 * result: /../.. 122 * 123 * [3] when @kn_to is NULL result will be "(null)" 124 * 125 * Returns the length of the full path. If the full length is equal to or 126 * greater than @buflen, @buf contains the truncated path with the trailing 127 * '\0'. On error, -errno is returned. 128 */ 129 static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, 130 struct kernfs_node *kn_from, 131 char *buf, size_t buflen) 132 { 133 struct kernfs_node *kn, *common; 134 const char parent_str[] = "/.."; 135 size_t depth_from, depth_to, len = 0; 136 int i, j; 137 138 if (!kn_to) 139 return strlcpy(buf, "(null)", buflen); 140 141 if (!kn_from) 142 kn_from = kernfs_root(kn_to)->kn; 143 144 if (kn_from == kn_to) 145 return strlcpy(buf, "/", buflen); 146 147 if (!buf) 148 return -EINVAL; 149 150 common = kernfs_common_ancestor(kn_from, kn_to); 151 if (WARN_ON(!common)) 152 return -EINVAL; 153 154 depth_to = kernfs_depth(common, kn_to); 155 depth_from = kernfs_depth(common, kn_from); 156 157 buf[0] = '\0'; 158 159 for (i = 0; i < depth_from; i++) 160 len += strlcpy(buf + len, parent_str, 161 len < buflen ? buflen - len : 0); 162 163 /* Calculate how many bytes we need for the rest */ 164 for (i = depth_to - 1; i >= 0; i--) { 165 for (kn = kn_to, j = 0; j < i; j++) 166 kn = kn->parent; 167 len += strlcpy(buf + len, "/", 168 len < buflen ? buflen - len : 0); 169 len += strlcpy(buf + len, kn->name, 170 len < buflen ? buflen - len : 0); 171 } 172 173 return len; 174 } 175 176 /** 177 * kernfs_name - obtain the name of a given node 178 * @kn: kernfs_node of interest 179 * @buf: buffer to copy @kn's name into 180 * @buflen: size of @buf 181 * 182 * Copies the name of @kn into @buf of @buflen bytes. The behavior is 183 * similar to strlcpy(). It returns the length of @kn's name and if @buf 184 * isn't long enough, it's filled upto @buflen-1 and nul terminated. 185 * 186 * Fills buffer with "(null)" if @kn is NULL. 187 * 188 * This function can be called from any context. 189 */ 190 int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) 191 { 192 unsigned long flags; 193 int ret; 194 195 spin_lock_irqsave(&kernfs_rename_lock, flags); 196 ret = kernfs_name_locked(kn, buf, buflen); 197 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 198 return ret; 199 } 200 201 /** 202 * kernfs_path_from_node - build path of node @to relative to @from. 203 * @from: parent kernfs_node relative to which we need to build the path 204 * @to: kernfs_node of interest 205 * @buf: buffer to copy @to's path into 206 * @buflen: size of @buf 207 * 208 * Builds @to's path relative to @from in @buf. @from and @to must 209 * be on the same kernfs-root. If @from is not parent of @to, then a relative 210 * path (which includes '..'s) as needed to reach from @from to @to is 211 * returned. 212 * 213 * Returns the length of the full path. If the full length is equal to or 214 * greater than @buflen, @buf contains the truncated path with the trailing 215 * '\0'. On error, -errno is returned. 216 */ 217 int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from, 218 char *buf, size_t buflen) 219 { 220 unsigned long flags; 221 int ret; 222 223 spin_lock_irqsave(&kernfs_rename_lock, flags); 224 ret = kernfs_path_from_node_locked(to, from, buf, buflen); 225 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 226 return ret; 227 } 228 EXPORT_SYMBOL_GPL(kernfs_path_from_node); 229 230 /** 231 * pr_cont_kernfs_name - pr_cont name of a kernfs_node 232 * @kn: kernfs_node of interest 233 * 234 * This function can be called from any context. 235 */ 236 void pr_cont_kernfs_name(struct kernfs_node *kn) 237 { 238 unsigned long flags; 239 240 spin_lock_irqsave(&kernfs_pr_cont_lock, flags); 241 242 kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); 243 pr_cont("%s", kernfs_pr_cont_buf); 244 245 spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); 246 } 247 248 /** 249 * pr_cont_kernfs_path - pr_cont path of a kernfs_node 250 * @kn: kernfs_node of interest 251 * 252 * This function can be called from any context. 253 */ 254 void pr_cont_kernfs_path(struct kernfs_node *kn) 255 { 256 unsigned long flags; 257 int sz; 258 259 spin_lock_irqsave(&kernfs_pr_cont_lock, flags); 260 261 sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, 262 sizeof(kernfs_pr_cont_buf)); 263 if (sz < 0) { 264 pr_cont("(error)"); 265 goto out; 266 } 267 268 if (sz >= sizeof(kernfs_pr_cont_buf)) { 269 pr_cont("(name too long)"); 270 goto out; 271 } 272 273 pr_cont("%s", kernfs_pr_cont_buf); 274 275 out: 276 spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); 277 } 278 279 /** 280 * kernfs_get_parent - determine the parent node and pin it 281 * @kn: kernfs_node of interest 282 * 283 * Determines @kn's parent, pins and returns it. This function can be 284 * called from any context. 285 */ 286 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) 287 { 288 struct kernfs_node *parent; 289 unsigned long flags; 290 291 spin_lock_irqsave(&kernfs_rename_lock, flags); 292 parent = kn->parent; 293 kernfs_get(parent); 294 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 295 296 return parent; 297 } 298 299 /** 300 * kernfs_name_hash 301 * @name: Null terminated string to hash 302 * @ns: Namespace tag to hash 303 * 304 * Returns 31 bit hash of ns + name (so it fits in an off_t ) 305 */ 306 static unsigned int kernfs_name_hash(const char *name, const void *ns) 307 { 308 unsigned long hash = init_name_hash(ns); 309 unsigned int len = strlen(name); 310 while (len--) 311 hash = partial_name_hash(*name++, hash); 312 hash = end_name_hash(hash); 313 hash &= 0x7fffffffU; 314 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ 315 if (hash < 2) 316 hash += 2; 317 if (hash >= INT_MAX) 318 hash = INT_MAX - 1; 319 return hash; 320 } 321 322 static int kernfs_name_compare(unsigned int hash, const char *name, 323 const void *ns, const struct kernfs_node *kn) 324 { 325 if (hash < kn->hash) 326 return -1; 327 if (hash > kn->hash) 328 return 1; 329 if (ns < kn->ns) 330 return -1; 331 if (ns > kn->ns) 332 return 1; 333 return strcmp(name, kn->name); 334 } 335 336 static int kernfs_sd_compare(const struct kernfs_node *left, 337 const struct kernfs_node *right) 338 { 339 return kernfs_name_compare(left->hash, left->name, left->ns, right); 340 } 341 342 /** 343 * kernfs_link_sibling - link kernfs_node into sibling rbtree 344 * @kn: kernfs_node of interest 345 * 346 * Link @kn into its sibling rbtree which starts from 347 * @kn->parent->dir.children. 348 * 349 * Locking: 350 * kernfs_rwsem held exclusive 351 * 352 * RETURNS: 353 * 0 on susccess -EEXIST on failure. 354 */ 355 static int kernfs_link_sibling(struct kernfs_node *kn) 356 { 357 struct rb_node **node = &kn->parent->dir.children.rb_node; 358 struct rb_node *parent = NULL; 359 360 while (*node) { 361 struct kernfs_node *pos; 362 int result; 363 364 pos = rb_to_kn(*node); 365 parent = *node; 366 result = kernfs_sd_compare(kn, pos); 367 if (result < 0) 368 node = &pos->rb.rb_left; 369 else if (result > 0) 370 node = &pos->rb.rb_right; 371 else 372 return -EEXIST; 373 } 374 375 /* add new node and rebalance the tree */ 376 rb_link_node(&kn->rb, parent, node); 377 rb_insert_color(&kn->rb, &kn->parent->dir.children); 378 379 /* successfully added, account subdir number */ 380 if (kernfs_type(kn) == KERNFS_DIR) 381 kn->parent->dir.subdirs++; 382 kernfs_inc_rev(kn->parent); 383 384 return 0; 385 } 386 387 /** 388 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree 389 * @kn: kernfs_node of interest 390 * 391 * Try to unlink @kn from its sibling rbtree which starts from 392 * kn->parent->dir.children. Returns %true if @kn was actually 393 * removed, %false if @kn wasn't on the rbtree. 394 * 395 * Locking: 396 * kernfs_rwsem held exclusive 397 */ 398 static bool kernfs_unlink_sibling(struct kernfs_node *kn) 399 { 400 if (RB_EMPTY_NODE(&kn->rb)) 401 return false; 402 403 if (kernfs_type(kn) == KERNFS_DIR) 404 kn->parent->dir.subdirs--; 405 kernfs_inc_rev(kn->parent); 406 407 rb_erase(&kn->rb, &kn->parent->dir.children); 408 RB_CLEAR_NODE(&kn->rb); 409 return true; 410 } 411 412 /** 413 * kernfs_get_active - get an active reference to kernfs_node 414 * @kn: kernfs_node to get an active reference to 415 * 416 * Get an active reference of @kn. This function is noop if @kn 417 * is NULL. 418 * 419 * RETURNS: 420 * Pointer to @kn on success, NULL on failure. 421 */ 422 struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) 423 { 424 if (unlikely(!kn)) 425 return NULL; 426 427 if (!atomic_inc_unless_negative(&kn->active)) 428 return NULL; 429 430 if (kernfs_lockdep(kn)) 431 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); 432 return kn; 433 } 434 435 /** 436 * kernfs_put_active - put an active reference to kernfs_node 437 * @kn: kernfs_node to put an active reference to 438 * 439 * Put an active reference to @kn. This function is noop if @kn 440 * is NULL. 441 */ 442 void kernfs_put_active(struct kernfs_node *kn) 443 { 444 int v; 445 446 if (unlikely(!kn)) 447 return; 448 449 if (kernfs_lockdep(kn)) 450 rwsem_release(&kn->dep_map, _RET_IP_); 451 v = atomic_dec_return(&kn->active); 452 if (likely(v != KN_DEACTIVATED_BIAS)) 453 return; 454 455 wake_up_all(&kernfs_root(kn)->deactivate_waitq); 456 } 457 458 /** 459 * kernfs_drain - drain kernfs_node 460 * @kn: kernfs_node to drain 461 * 462 * Drain existing usages and nuke all existing mmaps of @kn. Mutiple 463 * removers may invoke this function concurrently on @kn and all will 464 * return after draining is complete. 465 */ 466 static void kernfs_drain(struct kernfs_node *kn) 467 __releases(&kernfs_root(kn)->kernfs_rwsem) 468 __acquires(&kernfs_root(kn)->kernfs_rwsem) 469 { 470 struct kernfs_root *root = kernfs_root(kn); 471 472 lockdep_assert_held_write(&root->kernfs_rwsem); 473 WARN_ON_ONCE(kernfs_active(kn)); 474 475 /* 476 * Skip draining if already fully drained. This avoids draining and its 477 * lockdep annotations for nodes which have never been activated 478 * allowing embedding kernfs_remove() in create error paths without 479 * worrying about draining. 480 */ 481 if (atomic_read(&kn->active) == KN_DEACTIVATED_BIAS && 482 !kernfs_should_drain_open_files(kn)) 483 return; 484 485 up_write(&root->kernfs_rwsem); 486 487 if (kernfs_lockdep(kn)) { 488 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); 489 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) 490 lock_contended(&kn->dep_map, _RET_IP_); 491 } 492 493 wait_event(root->deactivate_waitq, 494 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); 495 496 if (kernfs_lockdep(kn)) { 497 lock_acquired(&kn->dep_map, _RET_IP_); 498 rwsem_release(&kn->dep_map, _RET_IP_); 499 } 500 501 if (kernfs_should_drain_open_files(kn)) 502 kernfs_drain_open_files(kn); 503 504 down_write(&root->kernfs_rwsem); 505 } 506 507 /** 508 * kernfs_get - get a reference count on a kernfs_node 509 * @kn: the target kernfs_node 510 */ 511 void kernfs_get(struct kernfs_node *kn) 512 { 513 if (kn) { 514 WARN_ON(!atomic_read(&kn->count)); 515 atomic_inc(&kn->count); 516 } 517 } 518 EXPORT_SYMBOL_GPL(kernfs_get); 519 520 /** 521 * kernfs_put - put a reference count on a kernfs_node 522 * @kn: the target kernfs_node 523 * 524 * Put a reference count of @kn and destroy it if it reached zero. 525 */ 526 void kernfs_put(struct kernfs_node *kn) 527 { 528 struct kernfs_node *parent; 529 struct kernfs_root *root; 530 531 if (!kn || !atomic_dec_and_test(&kn->count)) 532 return; 533 root = kernfs_root(kn); 534 repeat: 535 /* 536 * Moving/renaming is always done while holding reference. 537 * kn->parent won't change beneath us. 538 */ 539 parent = kn->parent; 540 541 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, 542 "kernfs_put: %s/%s: released with incorrect active_ref %d\n", 543 parent ? parent->name : "", kn->name, atomic_read(&kn->active)); 544 545 if (kernfs_type(kn) == KERNFS_LINK) 546 kernfs_put(kn->symlink.target_kn); 547 548 kfree_const(kn->name); 549 550 if (kn->iattr) { 551 simple_xattrs_free(&kn->iattr->xattrs); 552 kmem_cache_free(kernfs_iattrs_cache, kn->iattr); 553 } 554 spin_lock(&kernfs_idr_lock); 555 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); 556 spin_unlock(&kernfs_idr_lock); 557 kmem_cache_free(kernfs_node_cache, kn); 558 559 kn = parent; 560 if (kn) { 561 if (atomic_dec_and_test(&kn->count)) 562 goto repeat; 563 } else { 564 /* just released the root kn, free @root too */ 565 idr_destroy(&root->ino_idr); 566 kfree(root); 567 } 568 } 569 EXPORT_SYMBOL_GPL(kernfs_put); 570 571 /** 572 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry 573 * @dentry: the dentry in question 574 * 575 * Return the kernfs_node associated with @dentry. If @dentry is not a 576 * kernfs one, %NULL is returned. 577 * 578 * While the returned kernfs_node will stay accessible as long as @dentry 579 * is accessible, the returned node can be in any state and the caller is 580 * fully responsible for determining what's accessible. 581 */ 582 struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) 583 { 584 if (dentry->d_sb->s_op == &kernfs_sops) 585 return kernfs_dentry_node(dentry); 586 return NULL; 587 } 588 589 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, 590 struct kernfs_node *parent, 591 const char *name, umode_t mode, 592 kuid_t uid, kgid_t gid, 593 unsigned flags) 594 { 595 struct kernfs_node *kn; 596 u32 id_highbits; 597 int ret; 598 599 name = kstrdup_const(name, GFP_KERNEL); 600 if (!name) 601 return NULL; 602 603 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); 604 if (!kn) 605 goto err_out1; 606 607 idr_preload(GFP_KERNEL); 608 spin_lock(&kernfs_idr_lock); 609 ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC); 610 if (ret >= 0 && ret < root->last_id_lowbits) 611 root->id_highbits++; 612 id_highbits = root->id_highbits; 613 root->last_id_lowbits = ret; 614 spin_unlock(&kernfs_idr_lock); 615 idr_preload_end(); 616 if (ret < 0) 617 goto err_out2; 618 619 kn->id = (u64)id_highbits << 32 | ret; 620 621 atomic_set(&kn->count, 1); 622 atomic_set(&kn->active, KN_DEACTIVATED_BIAS); 623 RB_CLEAR_NODE(&kn->rb); 624 625 kn->name = name; 626 kn->mode = mode; 627 kn->flags = flags; 628 629 if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) { 630 struct iattr iattr = { 631 .ia_valid = ATTR_UID | ATTR_GID, 632 .ia_uid = uid, 633 .ia_gid = gid, 634 }; 635 636 ret = __kernfs_setattr(kn, &iattr); 637 if (ret < 0) 638 goto err_out3; 639 } 640 641 if (parent) { 642 ret = security_kernfs_init_security(parent, kn); 643 if (ret) 644 goto err_out3; 645 } 646 647 return kn; 648 649 err_out3: 650 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); 651 err_out2: 652 kmem_cache_free(kernfs_node_cache, kn); 653 err_out1: 654 kfree_const(name); 655 return NULL; 656 } 657 658 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, 659 const char *name, umode_t mode, 660 kuid_t uid, kgid_t gid, 661 unsigned flags) 662 { 663 struct kernfs_node *kn; 664 665 kn = __kernfs_new_node(kernfs_root(parent), parent, 666 name, mode, uid, gid, flags); 667 if (kn) { 668 kernfs_get(parent); 669 kn->parent = parent; 670 } 671 return kn; 672 } 673 674 /* 675 * kernfs_find_and_get_node_by_id - get kernfs_node from node id 676 * @root: the kernfs root 677 * @id: the target node id 678 * 679 * @id's lower 32bits encode ino and upper gen. If the gen portion is 680 * zero, all generations are matched. 681 * 682 * RETURNS: 683 * NULL on failure. Return a kernfs node with reference counter incremented 684 */ 685 struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, 686 u64 id) 687 { 688 struct kernfs_node *kn; 689 ino_t ino = kernfs_id_ino(id); 690 u32 gen = kernfs_id_gen(id); 691 692 spin_lock(&kernfs_idr_lock); 693 694 kn = idr_find(&root->ino_idr, (u32)ino); 695 if (!kn) 696 goto err_unlock; 697 698 if (sizeof(ino_t) >= sizeof(u64)) { 699 /* we looked up with the low 32bits, compare the whole */ 700 if (kernfs_ino(kn) != ino) 701 goto err_unlock; 702 } else { 703 /* 0 matches all generations */ 704 if (unlikely(gen && kernfs_gen(kn) != gen)) 705 goto err_unlock; 706 } 707 708 if (unlikely(!kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) 709 goto err_unlock; 710 711 spin_unlock(&kernfs_idr_lock); 712 return kn; 713 err_unlock: 714 spin_unlock(&kernfs_idr_lock); 715 return NULL; 716 } 717 718 /** 719 * kernfs_add_one - add kernfs_node to parent without warning 720 * @kn: kernfs_node to be added 721 * 722 * The caller must already have initialized @kn->parent. This 723 * function increments nlink of the parent's inode if @kn is a 724 * directory and link into the children list of the parent. 725 * 726 * RETURNS: 727 * 0 on success, -EEXIST if entry with the given name already 728 * exists. 729 */ 730 int kernfs_add_one(struct kernfs_node *kn) 731 { 732 struct kernfs_node *parent = kn->parent; 733 struct kernfs_root *root = kernfs_root(parent); 734 struct kernfs_iattrs *ps_iattr; 735 bool has_ns; 736 int ret; 737 738 down_write(&root->kernfs_rwsem); 739 740 ret = -EINVAL; 741 has_ns = kernfs_ns_enabled(parent); 742 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 743 has_ns ? "required" : "invalid", parent->name, kn->name)) 744 goto out_unlock; 745 746 if (kernfs_type(parent) != KERNFS_DIR) 747 goto out_unlock; 748 749 ret = -ENOENT; 750 if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR)) 751 goto out_unlock; 752 753 kn->hash = kernfs_name_hash(kn->name, kn->ns); 754 755 ret = kernfs_link_sibling(kn); 756 if (ret) 757 goto out_unlock; 758 759 /* Update timestamps on the parent */ 760 ps_iattr = parent->iattr; 761 if (ps_iattr) { 762 ktime_get_real_ts64(&ps_iattr->ia_ctime); 763 ps_iattr->ia_mtime = ps_iattr->ia_ctime; 764 } 765 766 up_write(&root->kernfs_rwsem); 767 768 /* 769 * Activate the new node unless CREATE_DEACTIVATED is requested. 770 * If not activated here, the kernfs user is responsible for 771 * activating the node with kernfs_activate(). A node which hasn't 772 * been activated is not visible to userland and its removal won't 773 * trigger deactivation. 774 */ 775 if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) 776 kernfs_activate(kn); 777 return 0; 778 779 out_unlock: 780 up_write(&root->kernfs_rwsem); 781 return ret; 782 } 783 784 /** 785 * kernfs_find_ns - find kernfs_node with the given name 786 * @parent: kernfs_node to search under 787 * @name: name to look for 788 * @ns: the namespace tag to use 789 * 790 * Look for kernfs_node with name @name under @parent. Returns pointer to 791 * the found kernfs_node on success, %NULL on failure. 792 */ 793 static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, 794 const unsigned char *name, 795 const void *ns) 796 { 797 struct rb_node *node = parent->dir.children.rb_node; 798 bool has_ns = kernfs_ns_enabled(parent); 799 unsigned int hash; 800 801 lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem); 802 803 if (has_ns != (bool)ns) { 804 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 805 has_ns ? "required" : "invalid", parent->name, name); 806 return NULL; 807 } 808 809 hash = kernfs_name_hash(name, ns); 810 while (node) { 811 struct kernfs_node *kn; 812 int result; 813 814 kn = rb_to_kn(node); 815 result = kernfs_name_compare(hash, name, ns, kn); 816 if (result < 0) 817 node = node->rb_left; 818 else if (result > 0) 819 node = node->rb_right; 820 else 821 return kn; 822 } 823 return NULL; 824 } 825 826 static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, 827 const unsigned char *path, 828 const void *ns) 829 { 830 size_t len; 831 char *p, *name; 832 833 lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem); 834 835 spin_lock_irq(&kernfs_pr_cont_lock); 836 837 len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); 838 839 if (len >= sizeof(kernfs_pr_cont_buf)) { 840 spin_unlock_irq(&kernfs_pr_cont_lock); 841 return NULL; 842 } 843 844 p = kernfs_pr_cont_buf; 845 846 while ((name = strsep(&p, "/")) && parent) { 847 if (*name == '\0') 848 continue; 849 parent = kernfs_find_ns(parent, name, ns); 850 } 851 852 spin_unlock_irq(&kernfs_pr_cont_lock); 853 854 return parent; 855 } 856 857 /** 858 * kernfs_find_and_get_ns - find and get kernfs_node with the given name 859 * @parent: kernfs_node to search under 860 * @name: name to look for 861 * @ns: the namespace tag to use 862 * 863 * Look for kernfs_node with name @name under @parent and get a reference 864 * if found. This function may sleep and returns pointer to the found 865 * kernfs_node on success, %NULL on failure. 866 */ 867 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, 868 const char *name, const void *ns) 869 { 870 struct kernfs_node *kn; 871 struct kernfs_root *root = kernfs_root(parent); 872 873 down_read(&root->kernfs_rwsem); 874 kn = kernfs_find_ns(parent, name, ns); 875 kernfs_get(kn); 876 up_read(&root->kernfs_rwsem); 877 878 return kn; 879 } 880 EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); 881 882 /** 883 * kernfs_walk_and_get_ns - find and get kernfs_node with the given path 884 * @parent: kernfs_node to search under 885 * @path: path to look for 886 * @ns: the namespace tag to use 887 * 888 * Look for kernfs_node with path @path under @parent and get a reference 889 * if found. This function may sleep and returns pointer to the found 890 * kernfs_node on success, %NULL on failure. 891 */ 892 struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, 893 const char *path, const void *ns) 894 { 895 struct kernfs_node *kn; 896 struct kernfs_root *root = kernfs_root(parent); 897 898 down_read(&root->kernfs_rwsem); 899 kn = kernfs_walk_ns(parent, path, ns); 900 kernfs_get(kn); 901 up_read(&root->kernfs_rwsem); 902 903 return kn; 904 } 905 906 /** 907 * kernfs_create_root - create a new kernfs hierarchy 908 * @scops: optional syscall operations for the hierarchy 909 * @flags: KERNFS_ROOT_* flags 910 * @priv: opaque data associated with the new directory 911 * 912 * Returns the root of the new hierarchy on success, ERR_PTR() value on 913 * failure. 914 */ 915 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, 916 unsigned int flags, void *priv) 917 { 918 struct kernfs_root *root; 919 struct kernfs_node *kn; 920 921 root = kzalloc(sizeof(*root), GFP_KERNEL); 922 if (!root) 923 return ERR_PTR(-ENOMEM); 924 925 idr_init(&root->ino_idr); 926 init_rwsem(&root->kernfs_rwsem); 927 INIT_LIST_HEAD(&root->supers); 928 929 /* 930 * On 64bit ino setups, id is ino. On 32bit, low 32bits are ino. 931 * High bits generation. The starting value for both ino and 932 * genenration is 1. Initialize upper 32bit allocation 933 * accordingly. 934 */ 935 if (sizeof(ino_t) >= sizeof(u64)) 936 root->id_highbits = 0; 937 else 938 root->id_highbits = 1; 939 940 kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO, 941 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 942 KERNFS_DIR); 943 if (!kn) { 944 idr_destroy(&root->ino_idr); 945 kfree(root); 946 return ERR_PTR(-ENOMEM); 947 } 948 949 kn->priv = priv; 950 kn->dir.root = root; 951 952 root->syscall_ops = scops; 953 root->flags = flags; 954 root->kn = kn; 955 init_waitqueue_head(&root->deactivate_waitq); 956 957 if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) 958 kernfs_activate(kn); 959 960 return root; 961 } 962 963 /** 964 * kernfs_destroy_root - destroy a kernfs hierarchy 965 * @root: root of the hierarchy to destroy 966 * 967 * Destroy the hierarchy anchored at @root by removing all existing 968 * directories and destroying @root. 969 */ 970 void kernfs_destroy_root(struct kernfs_root *root) 971 { 972 /* 973 * kernfs_remove holds kernfs_rwsem from the root so the root 974 * shouldn't be freed during the operation. 975 */ 976 kernfs_get(root->kn); 977 kernfs_remove(root->kn); 978 kernfs_put(root->kn); /* will also free @root */ 979 } 980 981 /** 982 * kernfs_root_to_node - return the kernfs_node associated with a kernfs_root 983 * @root: root to use to lookup 984 */ 985 struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root) 986 { 987 return root->kn; 988 } 989 990 /** 991 * kernfs_create_dir_ns - create a directory 992 * @parent: parent in which to create a new directory 993 * @name: name of the new directory 994 * @mode: mode of the new directory 995 * @uid: uid of the new directory 996 * @gid: gid of the new directory 997 * @priv: opaque data associated with the new directory 998 * @ns: optional namespace tag of the directory 999 * 1000 * Returns the created node on success, ERR_PTR() value on failure. 1001 */ 1002 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, 1003 const char *name, umode_t mode, 1004 kuid_t uid, kgid_t gid, 1005 void *priv, const void *ns) 1006 { 1007 struct kernfs_node *kn; 1008 int rc; 1009 1010 /* allocate */ 1011 kn = kernfs_new_node(parent, name, mode | S_IFDIR, 1012 uid, gid, KERNFS_DIR); 1013 if (!kn) 1014 return ERR_PTR(-ENOMEM); 1015 1016 kn->dir.root = parent->dir.root; 1017 kn->ns = ns; 1018 kn->priv = priv; 1019 1020 /* link in */ 1021 rc = kernfs_add_one(kn); 1022 if (!rc) 1023 return kn; 1024 1025 kernfs_put(kn); 1026 return ERR_PTR(rc); 1027 } 1028 1029 /** 1030 * kernfs_create_empty_dir - create an always empty directory 1031 * @parent: parent in which to create a new directory 1032 * @name: name of the new directory 1033 * 1034 * Returns the created node on success, ERR_PTR() value on failure. 1035 */ 1036 struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, 1037 const char *name) 1038 { 1039 struct kernfs_node *kn; 1040 int rc; 1041 1042 /* allocate */ 1043 kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, 1044 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR); 1045 if (!kn) 1046 return ERR_PTR(-ENOMEM); 1047 1048 kn->flags |= KERNFS_EMPTY_DIR; 1049 kn->dir.root = parent->dir.root; 1050 kn->ns = NULL; 1051 kn->priv = NULL; 1052 1053 /* link in */ 1054 rc = kernfs_add_one(kn); 1055 if (!rc) 1056 return kn; 1057 1058 kernfs_put(kn); 1059 return ERR_PTR(rc); 1060 } 1061 1062 static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) 1063 { 1064 struct kernfs_node *kn; 1065 struct kernfs_root *root; 1066 1067 if (flags & LOOKUP_RCU) 1068 return -ECHILD; 1069 1070 /* Negative hashed dentry? */ 1071 if (d_really_is_negative(dentry)) { 1072 struct kernfs_node *parent; 1073 1074 /* If the kernfs parent node has changed discard and 1075 * proceed to ->lookup. 1076 */ 1077 spin_lock(&dentry->d_lock); 1078 parent = kernfs_dentry_node(dentry->d_parent); 1079 if (parent) { 1080 spin_unlock(&dentry->d_lock); 1081 root = kernfs_root(parent); 1082 down_read(&root->kernfs_rwsem); 1083 if (kernfs_dir_changed(parent, dentry)) { 1084 up_read(&root->kernfs_rwsem); 1085 return 0; 1086 } 1087 up_read(&root->kernfs_rwsem); 1088 } else 1089 spin_unlock(&dentry->d_lock); 1090 1091 /* The kernfs parent node hasn't changed, leave the 1092 * dentry negative and return success. 1093 */ 1094 return 1; 1095 } 1096 1097 kn = kernfs_dentry_node(dentry); 1098 root = kernfs_root(kn); 1099 down_read(&root->kernfs_rwsem); 1100 1101 /* The kernfs node has been deactivated */ 1102 if (!kernfs_active(kn)) 1103 goto out_bad; 1104 1105 /* The kernfs node has been moved? */ 1106 if (kernfs_dentry_node(dentry->d_parent) != kn->parent) 1107 goto out_bad; 1108 1109 /* The kernfs node has been renamed */ 1110 if (strcmp(dentry->d_name.name, kn->name) != 0) 1111 goto out_bad; 1112 1113 /* The kernfs node has been moved to a different namespace */ 1114 if (kn->parent && kernfs_ns_enabled(kn->parent) && 1115 kernfs_info(dentry->d_sb)->ns != kn->ns) 1116 goto out_bad; 1117 1118 up_read(&root->kernfs_rwsem); 1119 return 1; 1120 out_bad: 1121 up_read(&root->kernfs_rwsem); 1122 return 0; 1123 } 1124 1125 const struct dentry_operations kernfs_dops = { 1126 .d_revalidate = kernfs_dop_revalidate, 1127 }; 1128 1129 static struct dentry *kernfs_iop_lookup(struct inode *dir, 1130 struct dentry *dentry, 1131 unsigned int flags) 1132 { 1133 struct kernfs_node *parent = dir->i_private; 1134 struct kernfs_node *kn; 1135 struct kernfs_root *root; 1136 struct inode *inode = NULL; 1137 const void *ns = NULL; 1138 1139 root = kernfs_root(parent); 1140 down_read(&root->kernfs_rwsem); 1141 if (kernfs_ns_enabled(parent)) 1142 ns = kernfs_info(dir->i_sb)->ns; 1143 1144 kn = kernfs_find_ns(parent, dentry->d_name.name, ns); 1145 /* attach dentry and inode */ 1146 if (kn) { 1147 /* Inactive nodes are invisible to the VFS so don't 1148 * create a negative. 1149 */ 1150 if (!kernfs_active(kn)) { 1151 up_read(&root->kernfs_rwsem); 1152 return NULL; 1153 } 1154 inode = kernfs_get_inode(dir->i_sb, kn); 1155 if (!inode) 1156 inode = ERR_PTR(-ENOMEM); 1157 } 1158 /* 1159 * Needed for negative dentry validation. 1160 * The negative dentry can be created in kernfs_iop_lookup() 1161 * or transforms from positive dentry in dentry_unlink_inode() 1162 * called from vfs_rmdir(). 1163 */ 1164 if (!IS_ERR(inode)) 1165 kernfs_set_rev(parent, dentry); 1166 up_read(&root->kernfs_rwsem); 1167 1168 /* instantiate and hash (possibly negative) dentry */ 1169 return d_splice_alias(inode, dentry); 1170 } 1171 1172 static int kernfs_iop_mkdir(struct user_namespace *mnt_userns, 1173 struct inode *dir, struct dentry *dentry, 1174 umode_t mode) 1175 { 1176 struct kernfs_node *parent = dir->i_private; 1177 struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; 1178 int ret; 1179 1180 if (!scops || !scops->mkdir) 1181 return -EPERM; 1182 1183 if (!kernfs_get_active(parent)) 1184 return -ENODEV; 1185 1186 ret = scops->mkdir(parent, dentry->d_name.name, mode); 1187 1188 kernfs_put_active(parent); 1189 return ret; 1190 } 1191 1192 static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) 1193 { 1194 struct kernfs_node *kn = kernfs_dentry_node(dentry); 1195 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; 1196 int ret; 1197 1198 if (!scops || !scops->rmdir) 1199 return -EPERM; 1200 1201 if (!kernfs_get_active(kn)) 1202 return -ENODEV; 1203 1204 ret = scops->rmdir(kn); 1205 1206 kernfs_put_active(kn); 1207 return ret; 1208 } 1209 1210 static int kernfs_iop_rename(struct user_namespace *mnt_userns, 1211 struct inode *old_dir, struct dentry *old_dentry, 1212 struct inode *new_dir, struct dentry *new_dentry, 1213 unsigned int flags) 1214 { 1215 struct kernfs_node *kn = kernfs_dentry_node(old_dentry); 1216 struct kernfs_node *new_parent = new_dir->i_private; 1217 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; 1218 int ret; 1219 1220 if (flags) 1221 return -EINVAL; 1222 1223 if (!scops || !scops->rename) 1224 return -EPERM; 1225 1226 if (!kernfs_get_active(kn)) 1227 return -ENODEV; 1228 1229 if (!kernfs_get_active(new_parent)) { 1230 kernfs_put_active(kn); 1231 return -ENODEV; 1232 } 1233 1234 ret = scops->rename(kn, new_parent, new_dentry->d_name.name); 1235 1236 kernfs_put_active(new_parent); 1237 kernfs_put_active(kn); 1238 return ret; 1239 } 1240 1241 const struct inode_operations kernfs_dir_iops = { 1242 .lookup = kernfs_iop_lookup, 1243 .permission = kernfs_iop_permission, 1244 .setattr = kernfs_iop_setattr, 1245 .getattr = kernfs_iop_getattr, 1246 .listxattr = kernfs_iop_listxattr, 1247 1248 .mkdir = kernfs_iop_mkdir, 1249 .rmdir = kernfs_iop_rmdir, 1250 .rename = kernfs_iop_rename, 1251 }; 1252 1253 static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos) 1254 { 1255 struct kernfs_node *last; 1256 1257 while (true) { 1258 struct rb_node *rbn; 1259 1260 last = pos; 1261 1262 if (kernfs_type(pos) != KERNFS_DIR) 1263 break; 1264 1265 rbn = rb_first(&pos->dir.children); 1266 if (!rbn) 1267 break; 1268 1269 pos = rb_to_kn(rbn); 1270 } 1271 1272 return last; 1273 } 1274 1275 /** 1276 * kernfs_next_descendant_post - find the next descendant for post-order walk 1277 * @pos: the current position (%NULL to initiate traversal) 1278 * @root: kernfs_node whose descendants to walk 1279 * 1280 * Find the next descendant to visit for post-order traversal of @root's 1281 * descendants. @root is included in the iteration and the last node to be 1282 * visited. 1283 */ 1284 static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, 1285 struct kernfs_node *root) 1286 { 1287 struct rb_node *rbn; 1288 1289 lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem); 1290 1291 /* if first iteration, visit leftmost descendant which may be root */ 1292 if (!pos) 1293 return kernfs_leftmost_descendant(root); 1294 1295 /* if we visited @root, we're done */ 1296 if (pos == root) 1297 return NULL; 1298 1299 /* if there's an unvisited sibling, visit its leftmost descendant */ 1300 rbn = rb_next(&pos->rb); 1301 if (rbn) 1302 return kernfs_leftmost_descendant(rb_to_kn(rbn)); 1303 1304 /* no sibling left, visit parent */ 1305 return pos->parent; 1306 } 1307 1308 /** 1309 * kernfs_activate - activate a node which started deactivated 1310 * @kn: kernfs_node whose subtree is to be activated 1311 * 1312 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node 1313 * needs to be explicitly activated. A node which hasn't been activated 1314 * isn't visible to userland and deactivation is skipped during its 1315 * removal. This is useful to construct atomic init sequences where 1316 * creation of multiple nodes should either succeed or fail atomically. 1317 * 1318 * The caller is responsible for ensuring that this function is not called 1319 * after kernfs_remove*() is invoked on @kn. 1320 */ 1321 void kernfs_activate(struct kernfs_node *kn) 1322 { 1323 struct kernfs_node *pos; 1324 struct kernfs_root *root = kernfs_root(kn); 1325 1326 down_write(&root->kernfs_rwsem); 1327 1328 pos = NULL; 1329 while ((pos = kernfs_next_descendant_post(pos, kn))) { 1330 if (kernfs_active(pos) || (pos->flags & KERNFS_REMOVING)) 1331 continue; 1332 1333 WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); 1334 WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS); 1335 1336 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active); 1337 pos->flags |= KERNFS_ACTIVATED; 1338 } 1339 1340 up_write(&root->kernfs_rwsem); 1341 } 1342 1343 static void __kernfs_remove(struct kernfs_node *kn) 1344 { 1345 struct kernfs_node *pos; 1346 1347 /* Short-circuit if non-root @kn has already finished removal. */ 1348 if (!kn) 1349 return; 1350 1351 lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem); 1352 1353 /* 1354 * This is for kernfs_remove_self() which plays with active ref 1355 * after removal. 1356 */ 1357 if (kn->parent && RB_EMPTY_NODE(&kn->rb)) 1358 return; 1359 1360 pr_debug("kernfs %s: removing\n", kn->name); 1361 1362 /* prevent new usage by marking all nodes removing and deactivating */ 1363 pos = NULL; 1364 while ((pos = kernfs_next_descendant_post(pos, kn))) { 1365 pos->flags |= KERNFS_REMOVING; 1366 if (kernfs_active(pos)) 1367 atomic_add(KN_DEACTIVATED_BIAS, &pos->active); 1368 } 1369 1370 /* deactivate and unlink the subtree node-by-node */ 1371 do { 1372 pos = kernfs_leftmost_descendant(kn); 1373 1374 /* 1375 * kernfs_drain() may drop kernfs_rwsem temporarily and @pos's 1376 * base ref could have been put by someone else by the time 1377 * the function returns. Make sure it doesn't go away 1378 * underneath us. 1379 */ 1380 kernfs_get(pos); 1381 1382 kernfs_drain(pos); 1383 1384 /* 1385 * kernfs_unlink_sibling() succeeds once per node. Use it 1386 * to decide who's responsible for cleanups. 1387 */ 1388 if (!pos->parent || kernfs_unlink_sibling(pos)) { 1389 struct kernfs_iattrs *ps_iattr = 1390 pos->parent ? pos->parent->iattr : NULL; 1391 1392 /* update timestamps on the parent */ 1393 if (ps_iattr) { 1394 ktime_get_real_ts64(&ps_iattr->ia_ctime); 1395 ps_iattr->ia_mtime = ps_iattr->ia_ctime; 1396 } 1397 1398 kernfs_put(pos); 1399 } 1400 1401 kernfs_put(pos); 1402 } while (pos != kn); 1403 } 1404 1405 /** 1406 * kernfs_remove - remove a kernfs_node recursively 1407 * @kn: the kernfs_node to remove 1408 * 1409 * Remove @kn along with all its subdirectories and files. 1410 */ 1411 void kernfs_remove(struct kernfs_node *kn) 1412 { 1413 struct kernfs_root *root; 1414 1415 if (!kn) 1416 return; 1417 1418 root = kernfs_root(kn); 1419 1420 down_write(&root->kernfs_rwsem); 1421 __kernfs_remove(kn); 1422 up_write(&root->kernfs_rwsem); 1423 } 1424 1425 /** 1426 * kernfs_break_active_protection - break out of active protection 1427 * @kn: the self kernfs_node 1428 * 1429 * The caller must be running off of a kernfs operation which is invoked 1430 * with an active reference - e.g. one of kernfs_ops. Each invocation of 1431 * this function must also be matched with an invocation of 1432 * kernfs_unbreak_active_protection(). 1433 * 1434 * This function releases the active reference of @kn the caller is 1435 * holding. Once this function is called, @kn may be removed at any point 1436 * and the caller is solely responsible for ensuring that the objects it 1437 * dereferences are accessible. 1438 */ 1439 void kernfs_break_active_protection(struct kernfs_node *kn) 1440 { 1441 /* 1442 * Take out ourself out of the active ref dependency chain. If 1443 * we're called without an active ref, lockdep will complain. 1444 */ 1445 kernfs_put_active(kn); 1446 } 1447 1448 /** 1449 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection() 1450 * @kn: the self kernfs_node 1451 * 1452 * If kernfs_break_active_protection() was called, this function must be 1453 * invoked before finishing the kernfs operation. Note that while this 1454 * function restores the active reference, it doesn't and can't actually 1455 * restore the active protection - @kn may already or be in the process of 1456 * being removed. Once kernfs_break_active_protection() is invoked, that 1457 * protection is irreversibly gone for the kernfs operation instance. 1458 * 1459 * While this function may be called at any point after 1460 * kernfs_break_active_protection() is invoked, its most useful location 1461 * would be right before the enclosing kernfs operation returns. 1462 */ 1463 void kernfs_unbreak_active_protection(struct kernfs_node *kn) 1464 { 1465 /* 1466 * @kn->active could be in any state; however, the increment we do 1467 * here will be undone as soon as the enclosing kernfs operation 1468 * finishes and this temporary bump can't break anything. If @kn 1469 * is alive, nothing changes. If @kn is being deactivated, the 1470 * soon-to-follow put will either finish deactivation or restore 1471 * deactivated state. If @kn is already removed, the temporary 1472 * bump is guaranteed to be gone before @kn is released. 1473 */ 1474 atomic_inc(&kn->active); 1475 if (kernfs_lockdep(kn)) 1476 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); 1477 } 1478 1479 /** 1480 * kernfs_remove_self - remove a kernfs_node from its own method 1481 * @kn: the self kernfs_node to remove 1482 * 1483 * The caller must be running off of a kernfs operation which is invoked 1484 * with an active reference - e.g. one of kernfs_ops. This can be used to 1485 * implement a file operation which deletes itself. 1486 * 1487 * For example, the "delete" file for a sysfs device directory can be 1488 * implemented by invoking kernfs_remove_self() on the "delete" file 1489 * itself. This function breaks the circular dependency of trying to 1490 * deactivate self while holding an active ref itself. It isn't necessary 1491 * to modify the usual removal path to use kernfs_remove_self(). The 1492 * "delete" implementation can simply invoke kernfs_remove_self() on self 1493 * before proceeding with the usual removal path. kernfs will ignore later 1494 * kernfs_remove() on self. 1495 * 1496 * kernfs_remove_self() can be called multiple times concurrently on the 1497 * same kernfs_node. Only the first one actually performs removal and 1498 * returns %true. All others will wait until the kernfs operation which 1499 * won self-removal finishes and return %false. Note that the losers wait 1500 * for the completion of not only the winning kernfs_remove_self() but also 1501 * the whole kernfs_ops which won the arbitration. This can be used to 1502 * guarantee, for example, all concurrent writes to a "delete" file to 1503 * finish only after the whole operation is complete. 1504 */ 1505 bool kernfs_remove_self(struct kernfs_node *kn) 1506 { 1507 bool ret; 1508 struct kernfs_root *root = kernfs_root(kn); 1509 1510 down_write(&root->kernfs_rwsem); 1511 kernfs_break_active_protection(kn); 1512 1513 /* 1514 * SUICIDAL is used to arbitrate among competing invocations. Only 1515 * the first one will actually perform removal. When the removal 1516 * is complete, SUICIDED is set and the active ref is restored 1517 * while kernfs_rwsem for held exclusive. The ones which lost 1518 * arbitration waits for SUICIDED && drained which can happen only 1519 * after the enclosing kernfs operation which executed the winning 1520 * instance of kernfs_remove_self() finished. 1521 */ 1522 if (!(kn->flags & KERNFS_SUICIDAL)) { 1523 kn->flags |= KERNFS_SUICIDAL; 1524 __kernfs_remove(kn); 1525 kn->flags |= KERNFS_SUICIDED; 1526 ret = true; 1527 } else { 1528 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; 1529 DEFINE_WAIT(wait); 1530 1531 while (true) { 1532 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); 1533 1534 if ((kn->flags & KERNFS_SUICIDED) && 1535 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) 1536 break; 1537 1538 up_write(&root->kernfs_rwsem); 1539 schedule(); 1540 down_write(&root->kernfs_rwsem); 1541 } 1542 finish_wait(waitq, &wait); 1543 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); 1544 ret = false; 1545 } 1546 1547 /* 1548 * This must be done while kernfs_rwsem held exclusive; otherwise, 1549 * waiting for SUICIDED && deactivated could finish prematurely. 1550 */ 1551 kernfs_unbreak_active_protection(kn); 1552 1553 up_write(&root->kernfs_rwsem); 1554 return ret; 1555 } 1556 1557 /** 1558 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it 1559 * @parent: parent of the target 1560 * @name: name of the kernfs_node to remove 1561 * @ns: namespace tag of the kernfs_node to remove 1562 * 1563 * Look for the kernfs_node with @name and @ns under @parent and remove it. 1564 * Returns 0 on success, -ENOENT if such entry doesn't exist. 1565 */ 1566 int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, 1567 const void *ns) 1568 { 1569 struct kernfs_node *kn; 1570 struct kernfs_root *root; 1571 1572 if (!parent) { 1573 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n", 1574 name); 1575 return -ENOENT; 1576 } 1577 1578 root = kernfs_root(parent); 1579 down_write(&root->kernfs_rwsem); 1580 1581 kn = kernfs_find_ns(parent, name, ns); 1582 if (kn) 1583 __kernfs_remove(kn); 1584 1585 up_write(&root->kernfs_rwsem); 1586 1587 if (kn) 1588 return 0; 1589 else 1590 return -ENOENT; 1591 } 1592 1593 /** 1594 * kernfs_rename_ns - move and rename a kernfs_node 1595 * @kn: target node 1596 * @new_parent: new parent to put @sd under 1597 * @new_name: new name 1598 * @new_ns: new namespace tag 1599 */ 1600 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, 1601 const char *new_name, const void *new_ns) 1602 { 1603 struct kernfs_node *old_parent; 1604 struct kernfs_root *root; 1605 const char *old_name = NULL; 1606 int error; 1607 1608 /* can't move or rename root */ 1609 if (!kn->parent) 1610 return -EINVAL; 1611 1612 root = kernfs_root(kn); 1613 down_write(&root->kernfs_rwsem); 1614 1615 error = -ENOENT; 1616 if (!kernfs_active(kn) || !kernfs_active(new_parent) || 1617 (new_parent->flags & KERNFS_EMPTY_DIR)) 1618 goto out; 1619 1620 error = 0; 1621 if ((kn->parent == new_parent) && (kn->ns == new_ns) && 1622 (strcmp(kn->name, new_name) == 0)) 1623 goto out; /* nothing to rename */ 1624 1625 error = -EEXIST; 1626 if (kernfs_find_ns(new_parent, new_name, new_ns)) 1627 goto out; 1628 1629 /* rename kernfs_node */ 1630 if (strcmp(kn->name, new_name) != 0) { 1631 error = -ENOMEM; 1632 new_name = kstrdup_const(new_name, GFP_KERNEL); 1633 if (!new_name) 1634 goto out; 1635 } else { 1636 new_name = NULL; 1637 } 1638 1639 /* 1640 * Move to the appropriate place in the appropriate directories rbtree. 1641 */ 1642 kernfs_unlink_sibling(kn); 1643 kernfs_get(new_parent); 1644 1645 /* rename_lock protects ->parent and ->name accessors */ 1646 spin_lock_irq(&kernfs_rename_lock); 1647 1648 old_parent = kn->parent; 1649 kn->parent = new_parent; 1650 1651 kn->ns = new_ns; 1652 if (new_name) { 1653 old_name = kn->name; 1654 kn->name = new_name; 1655 } 1656 1657 spin_unlock_irq(&kernfs_rename_lock); 1658 1659 kn->hash = kernfs_name_hash(kn->name, kn->ns); 1660 kernfs_link_sibling(kn); 1661 1662 kernfs_put(old_parent); 1663 kfree_const(old_name); 1664 1665 error = 0; 1666 out: 1667 up_write(&root->kernfs_rwsem); 1668 return error; 1669 } 1670 1671 /* Relationship between mode and the DT_xxx types */ 1672 static inline unsigned char dt_type(struct kernfs_node *kn) 1673 { 1674 return (kn->mode >> 12) & 15; 1675 } 1676 1677 static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) 1678 { 1679 kernfs_put(filp->private_data); 1680 return 0; 1681 } 1682 1683 static struct kernfs_node *kernfs_dir_pos(const void *ns, 1684 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) 1685 { 1686 if (pos) { 1687 int valid = kernfs_active(pos) && 1688 pos->parent == parent && hash == pos->hash; 1689 kernfs_put(pos); 1690 if (!valid) 1691 pos = NULL; 1692 } 1693 if (!pos && (hash > 1) && (hash < INT_MAX)) { 1694 struct rb_node *node = parent->dir.children.rb_node; 1695 while (node) { 1696 pos = rb_to_kn(node); 1697 1698 if (hash < pos->hash) 1699 node = node->rb_left; 1700 else if (hash > pos->hash) 1701 node = node->rb_right; 1702 else 1703 break; 1704 } 1705 } 1706 /* Skip over entries which are dying/dead or in the wrong namespace */ 1707 while (pos && (!kernfs_active(pos) || pos->ns != ns)) { 1708 struct rb_node *node = rb_next(&pos->rb); 1709 if (!node) 1710 pos = NULL; 1711 else 1712 pos = rb_to_kn(node); 1713 } 1714 return pos; 1715 } 1716 1717 static struct kernfs_node *kernfs_dir_next_pos(const void *ns, 1718 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) 1719 { 1720 pos = kernfs_dir_pos(ns, parent, ino, pos); 1721 if (pos) { 1722 do { 1723 struct rb_node *node = rb_next(&pos->rb); 1724 if (!node) 1725 pos = NULL; 1726 else 1727 pos = rb_to_kn(node); 1728 } while (pos && (!kernfs_active(pos) || pos->ns != ns)); 1729 } 1730 return pos; 1731 } 1732 1733 static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) 1734 { 1735 struct dentry *dentry = file->f_path.dentry; 1736 struct kernfs_node *parent = kernfs_dentry_node(dentry); 1737 struct kernfs_node *pos = file->private_data; 1738 struct kernfs_root *root; 1739 const void *ns = NULL; 1740 1741 if (!dir_emit_dots(file, ctx)) 1742 return 0; 1743 1744 root = kernfs_root(parent); 1745 down_read(&root->kernfs_rwsem); 1746 1747 if (kernfs_ns_enabled(parent)) 1748 ns = kernfs_info(dentry->d_sb)->ns; 1749 1750 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); 1751 pos; 1752 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { 1753 const char *name = pos->name; 1754 unsigned int type = dt_type(pos); 1755 int len = strlen(name); 1756 ino_t ino = kernfs_ino(pos); 1757 1758 ctx->pos = pos->hash; 1759 file->private_data = pos; 1760 kernfs_get(pos); 1761 1762 up_read(&root->kernfs_rwsem); 1763 if (!dir_emit(ctx, name, len, ino, type)) 1764 return 0; 1765 down_read(&root->kernfs_rwsem); 1766 } 1767 up_read(&root->kernfs_rwsem); 1768 file->private_data = NULL; 1769 ctx->pos = INT_MAX; 1770 return 0; 1771 } 1772 1773 const struct file_operations kernfs_dir_fops = { 1774 .read = generic_read_dir, 1775 .iterate_shared = kernfs_fop_readdir, 1776 .release = kernfs_dir_fop_release, 1777 .llseek = generic_file_llseek, 1778 }; 1779