1 /* 2 * fs/sysfs/dir.c - sysfs core and dir operation implementation 3 * 4 * Copyright (c) 2001-3 Patrick Mochel 5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de> 7 * 8 * This file is released under the GPLv2. 9 * 10 * Please see Documentation/filesystems/sysfs.txt for more information. 11 */ 12 13 #undef DEBUG 14 15 #include <linux/fs.h> 16 #include <linux/mount.h> 17 #include <linux/module.h> 18 #include <linux/kobject.h> 19 #include <linux/namei.h> 20 #include <linux/idr.h> 21 #include <linux/completion.h> 22 #include <linux/mutex.h> 23 #include <linux/slab.h> 24 #include <linux/security.h> 25 #include <linux/hash.h> 26 #include "sysfs.h" 27 28 DEFINE_MUTEX(sysfs_mutex); 29 DEFINE_SPINLOCK(sysfs_symlink_target_lock); 30 31 #define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb) 32 33 static DEFINE_SPINLOCK(sysfs_ino_lock); 34 static DEFINE_IDA(sysfs_ino_ida); 35 36 /** 37 * sysfs_name_hash 38 * @name: Null terminated string to hash 39 * @ns: Namespace tag to hash 40 * 41 * Returns 31 bit hash of ns + name (so it fits in an off_t ) 42 */ 43 static unsigned int sysfs_name_hash(const char *name, const void *ns) 44 { 45 unsigned long hash = init_name_hash(); 46 unsigned int len = strlen(name); 47 while (len--) 48 hash = partial_name_hash(*name++, hash); 49 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); 50 hash &= 0x7fffffffU; 51 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ 52 if (hash < 1) 53 hash += 2; 54 if (hash >= INT_MAX) 55 hash = INT_MAX - 1; 56 return hash; 57 } 58 59 static int sysfs_name_compare(unsigned int hash, const char *name, 60 const void *ns, const struct sysfs_dirent *sd) 61 { 62 if (hash != sd->s_hash) 63 return hash - sd->s_hash; 64 if (ns != sd->s_ns) 65 return ns - sd->s_ns; 66 return strcmp(name, sd->s_name); 67 } 68 69 static int sysfs_sd_compare(const struct sysfs_dirent *left, 70 const struct sysfs_dirent *right) 71 { 72 return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns, 73 right); 74 } 75 76 /** 77 * sysfs_link_sibling - link sysfs_dirent into sibling rbtree 78 * @sd: sysfs_dirent of interest 79 * 80 * Link @sd into its sibling rbtree which starts from 81 * sd->s_parent->s_dir.children. 82 * 83 * Locking: 84 * mutex_lock(sysfs_mutex) 85 * 86 * RETURNS: 87 * 0 on susccess -EEXIST on failure. 88 */ 89 static int sysfs_link_sibling(struct sysfs_dirent *sd) 90 { 91 struct rb_node **node = &sd->s_parent->s_dir.children.rb_node; 92 struct rb_node *parent = NULL; 93 94 if (sysfs_type(sd) == SYSFS_DIR) 95 sd->s_parent->s_dir.subdirs++; 96 97 while (*node) { 98 struct sysfs_dirent *pos; 99 int result; 100 101 pos = to_sysfs_dirent(*node); 102 parent = *node; 103 result = sysfs_sd_compare(sd, pos); 104 if (result < 0) 105 node = &pos->s_rb.rb_left; 106 else if (result > 0) 107 node = &pos->s_rb.rb_right; 108 else 109 return -EEXIST; 110 } 111 /* add new node and rebalance the tree */ 112 rb_link_node(&sd->s_rb, parent, node); 113 rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children); 114 return 0; 115 } 116 117 /** 118 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree 119 * @sd: sysfs_dirent of interest 120 * 121 * Unlink @sd from its sibling rbtree which starts from 122 * sd->s_parent->s_dir.children. 123 * 124 * Locking: 125 * mutex_lock(sysfs_mutex) 126 */ 127 static void sysfs_unlink_sibling(struct sysfs_dirent *sd) 128 { 129 if (sysfs_type(sd) == SYSFS_DIR) 130 sd->s_parent->s_dir.subdirs--; 131 132 rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); 133 } 134 135 /** 136 * sysfs_get_active - get an active reference to sysfs_dirent 137 * @sd: sysfs_dirent to get an active reference to 138 * 139 * Get an active reference of @sd. This function is noop if @sd 140 * is NULL. 141 * 142 * RETURNS: 143 * Pointer to @sd on success, NULL on failure. 144 */ 145 struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) 146 { 147 if (unlikely(!sd)) 148 return NULL; 149 150 if (!atomic_inc_unless_negative(&sd->s_active)) 151 return NULL; 152 153 if (likely(!sysfs_ignore_lockdep(sd))) 154 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_); 155 return sd; 156 } 157 158 /** 159 * sysfs_put_active - put an active reference to sysfs_dirent 160 * @sd: sysfs_dirent to put an active reference to 161 * 162 * Put an active reference to @sd. This function is noop if @sd 163 * is NULL. 164 */ 165 void sysfs_put_active(struct sysfs_dirent *sd) 166 { 167 int v; 168 169 if (unlikely(!sd)) 170 return; 171 172 if (likely(!sysfs_ignore_lockdep(sd))) 173 rwsem_release(&sd->dep_map, 1, _RET_IP_); 174 v = atomic_dec_return(&sd->s_active); 175 if (likely(v != SD_DEACTIVATED_BIAS)) 176 return; 177 178 /* atomic_dec_return() is a mb(), we'll always see the updated 179 * sd->u.completion. 180 */ 181 complete(sd->u.completion); 182 } 183 184 /** 185 * sysfs_deactivate - deactivate sysfs_dirent 186 * @sd: sysfs_dirent to deactivate 187 * 188 * Deny new active references and drain existing ones. 189 */ 190 static void sysfs_deactivate(struct sysfs_dirent *sd) 191 { 192 DECLARE_COMPLETION_ONSTACK(wait); 193 int v; 194 195 BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED)); 196 197 if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF)) 198 return; 199 200 sd->u.completion = (void *)&wait; 201 202 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_); 203 /* atomic_add_return() is a mb(), put_active() will always see 204 * the updated sd->u.completion. 205 */ 206 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); 207 208 if (v != SD_DEACTIVATED_BIAS) { 209 lock_contended(&sd->dep_map, _RET_IP_); 210 wait_for_completion(&wait); 211 } 212 213 lock_acquired(&sd->dep_map, _RET_IP_); 214 rwsem_release(&sd->dep_map, 1, _RET_IP_); 215 } 216 217 static int sysfs_alloc_ino(unsigned int *pino) 218 { 219 int ino, rc; 220 221 retry: 222 spin_lock(&sysfs_ino_lock); 223 rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino); 224 spin_unlock(&sysfs_ino_lock); 225 226 if (rc == -EAGAIN) { 227 if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL)) 228 goto retry; 229 rc = -ENOMEM; 230 } 231 232 *pino = ino; 233 return rc; 234 } 235 236 static void sysfs_free_ino(unsigned int ino) 237 { 238 spin_lock(&sysfs_ino_lock); 239 ida_remove(&sysfs_ino_ida, ino); 240 spin_unlock(&sysfs_ino_lock); 241 } 242 243 void release_sysfs_dirent(struct sysfs_dirent *sd) 244 { 245 struct sysfs_dirent *parent_sd; 246 247 repeat: 248 /* Moving/renaming is always done while holding reference. 249 * sd->s_parent won't change beneath us. 250 */ 251 parent_sd = sd->s_parent; 252 253 WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED), 254 "sysfs: free using entry: %s/%s\n", 255 parent_sd ? parent_sd->s_name : "", sd->s_name); 256 257 if (sysfs_type(sd) == SYSFS_KOBJ_LINK) 258 sysfs_put(sd->s_symlink.target_sd); 259 if (sysfs_type(sd) & SYSFS_COPY_NAME) 260 kfree(sd->s_name); 261 if (sd->s_iattr && sd->s_iattr->ia_secdata) 262 security_release_secctx(sd->s_iattr->ia_secdata, 263 sd->s_iattr->ia_secdata_len); 264 kfree(sd->s_iattr); 265 sysfs_free_ino(sd->s_ino); 266 kmem_cache_free(sysfs_dir_cachep, sd); 267 268 sd = parent_sd; 269 if (sd && atomic_dec_and_test(&sd->s_count)) 270 goto repeat; 271 } 272 273 static int sysfs_dentry_delete(const struct dentry *dentry) 274 { 275 struct sysfs_dirent *sd = dentry->d_fsdata; 276 return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED)); 277 } 278 279 static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) 280 { 281 struct sysfs_dirent *sd; 282 int type; 283 284 if (flags & LOOKUP_RCU) 285 return -ECHILD; 286 287 sd = dentry->d_fsdata; 288 mutex_lock(&sysfs_mutex); 289 290 /* The sysfs dirent has been deleted */ 291 if (sd->s_flags & SYSFS_FLAG_REMOVED) 292 goto out_bad; 293 294 /* The sysfs dirent has been moved? */ 295 if (dentry->d_parent->d_fsdata != sd->s_parent) 296 goto out_bad; 297 298 /* The sysfs dirent has been renamed */ 299 if (strcmp(dentry->d_name.name, sd->s_name) != 0) 300 goto out_bad; 301 302 /* The sysfs dirent has been moved to a different namespace */ 303 type = KOBJ_NS_TYPE_NONE; 304 if (sd->s_parent) { 305 type = sysfs_ns_type(sd->s_parent); 306 if (type != KOBJ_NS_TYPE_NONE && 307 sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns) 308 goto out_bad; 309 } 310 311 mutex_unlock(&sysfs_mutex); 312 out_valid: 313 return 1; 314 out_bad: 315 /* Remove the dentry from the dcache hashes. 316 * If this is a deleted dentry we use d_drop instead of d_delete 317 * so sysfs doesn't need to cope with negative dentries. 318 * 319 * If this is a dentry that has simply been renamed we 320 * use d_drop to remove it from the dcache lookup on its 321 * old parent. If this dentry persists later when a lookup 322 * is performed at its new name the dentry will be readded 323 * to the dcache hashes. 324 */ 325 mutex_unlock(&sysfs_mutex); 326 327 /* If we have submounts we must allow the vfs caches 328 * to lie about the state of the filesystem to prevent 329 * leaks and other nasty things. 330 */ 331 if (check_submounts_and_drop(dentry) != 0) 332 goto out_valid; 333 334 return 0; 335 } 336 337 static void sysfs_dentry_release(struct dentry *dentry) 338 { 339 sysfs_put(dentry->d_fsdata); 340 } 341 342 const struct dentry_operations sysfs_dentry_ops = { 343 .d_revalidate = sysfs_dentry_revalidate, 344 .d_delete = sysfs_dentry_delete, 345 .d_release = sysfs_dentry_release, 346 }; 347 348 struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) 349 { 350 char *dup_name = NULL; 351 struct sysfs_dirent *sd; 352 353 if (type & SYSFS_COPY_NAME) { 354 name = dup_name = kstrdup(name, GFP_KERNEL); 355 if (!name) 356 return NULL; 357 } 358 359 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); 360 if (!sd) 361 goto err_out1; 362 363 if (sysfs_alloc_ino(&sd->s_ino)) 364 goto err_out2; 365 366 atomic_set(&sd->s_count, 1); 367 atomic_set(&sd->s_active, 0); 368 369 sd->s_name = name; 370 sd->s_mode = mode; 371 sd->s_flags = type | SYSFS_FLAG_REMOVED; 372 373 return sd; 374 375 err_out2: 376 kmem_cache_free(sysfs_dir_cachep, sd); 377 err_out1: 378 kfree(dup_name); 379 return NULL; 380 } 381 382 /** 383 * sysfs_addrm_start - prepare for sysfs_dirent add/remove 384 * @acxt: pointer to sysfs_addrm_cxt to be used 385 * 386 * This function is called when the caller is about to add or remove 387 * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used 388 * to keep and pass context to other addrm functions. 389 * 390 * LOCKING: 391 * Kernel thread context (may sleep). sysfs_mutex is locked on 392 * return. 393 */ 394 void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) 395 __acquires(sysfs_mutex) 396 { 397 memset(acxt, 0, sizeof(*acxt)); 398 399 mutex_lock(&sysfs_mutex); 400 } 401 402 /** 403 * __sysfs_add_one - add sysfs_dirent to parent without warning 404 * @acxt: addrm context to use 405 * @sd: sysfs_dirent to be added 406 * @parent_sd: the parent sysfs_dirent to add @sd to 407 * 408 * Get @parent_sd and set @sd->s_parent to it and increment nlink of 409 * the parent inode if @sd is a directory and link into the children 410 * list of the parent. 411 * 412 * This function should be called between calls to 413 * sysfs_addrm_start() and sysfs_addrm_finish() and should be 414 * passed the same @acxt as passed to sysfs_addrm_start(). 415 * 416 * LOCKING: 417 * Determined by sysfs_addrm_start(). 418 * 419 * RETURNS: 420 * 0 on success, -EEXIST if entry with the given name already 421 * exists. 422 */ 423 int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, 424 struct sysfs_dirent *parent_sd) 425 { 426 struct sysfs_inode_attrs *ps_iattr; 427 int ret; 428 429 if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) { 430 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", 431 sysfs_ns_type(parent_sd) ? "required" : "invalid", 432 parent_sd->s_name, sd->s_name); 433 return -EINVAL; 434 } 435 436 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); 437 sd->s_parent = sysfs_get(parent_sd); 438 439 ret = sysfs_link_sibling(sd); 440 if (ret) 441 return ret; 442 443 /* Update timestamps on the parent */ 444 ps_iattr = parent_sd->s_iattr; 445 if (ps_iattr) { 446 struct iattr *ps_iattrs = &ps_iattr->ia_iattr; 447 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; 448 } 449 450 /* Mark the entry added into directory tree */ 451 sd->s_flags &= ~SYSFS_FLAG_REMOVED; 452 453 return 0; 454 } 455 456 /** 457 * sysfs_pathname - return full path to sysfs dirent 458 * @sd: sysfs_dirent whose path we want 459 * @path: caller allocated buffer of size PATH_MAX 460 * 461 * Gives the name "/" to the sysfs_root entry; any path returned 462 * is relative to wherever sysfs is mounted. 463 */ 464 static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) 465 { 466 if (sd->s_parent) { 467 sysfs_pathname(sd->s_parent, path); 468 strlcat(path, "/", PATH_MAX); 469 } 470 strlcat(path, sd->s_name, PATH_MAX); 471 return path; 472 } 473 474 void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) 475 { 476 char *path; 477 478 path = kzalloc(PATH_MAX, GFP_KERNEL); 479 if (path) { 480 sysfs_pathname(parent, path); 481 strlcat(path, "/", PATH_MAX); 482 strlcat(path, name, PATH_MAX); 483 } 484 485 WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", 486 path ? path : name); 487 488 kfree(path); 489 } 490 491 /** 492 * sysfs_add_one - add sysfs_dirent to parent 493 * @acxt: addrm context to use 494 * @sd: sysfs_dirent to be added 495 * @parent_sd: the parent sysfs_dirent to add @sd to 496 * 497 * Get @parent_sd and set @sd->s_parent to it and increment nlink of 498 * the parent inode if @sd is a directory and link into the children 499 * list of the parent. 500 * 501 * This function should be called between calls to 502 * sysfs_addrm_start() and sysfs_addrm_finish() and should be 503 * passed the same @acxt as passed to sysfs_addrm_start(). 504 * 505 * LOCKING: 506 * Determined by sysfs_addrm_start(). 507 * 508 * RETURNS: 509 * 0 on success, -EEXIST if entry with the given name already 510 * exists. 511 */ 512 int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, 513 struct sysfs_dirent *parent_sd) 514 { 515 int ret; 516 517 ret = __sysfs_add_one(acxt, sd, parent_sd); 518 519 if (ret == -EEXIST) 520 sysfs_warn_dup(parent_sd, sd->s_name); 521 return ret; 522 } 523 524 /** 525 * sysfs_remove_one - remove sysfs_dirent from parent 526 * @acxt: addrm context to use 527 * @sd: sysfs_dirent to be removed 528 * 529 * Mark @sd removed and drop nlink of parent inode if @sd is a 530 * directory. @sd is unlinked from the children list. 531 * 532 * This function should be called between calls to 533 * sysfs_addrm_start() and sysfs_addrm_finish() and should be 534 * passed the same @acxt as passed to sysfs_addrm_start(). 535 * 536 * LOCKING: 537 * Determined by sysfs_addrm_start(). 538 */ 539 static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, 540 struct sysfs_dirent *sd) 541 { 542 struct sysfs_inode_attrs *ps_iattr; 543 544 /* 545 * Removal can be called multiple times on the same node. Only the 546 * first invocation is effective and puts the base ref. 547 */ 548 if (sd->s_flags & SYSFS_FLAG_REMOVED) 549 return; 550 551 sysfs_unlink_sibling(sd); 552 553 /* Update timestamps on the parent */ 554 ps_iattr = sd->s_parent->s_iattr; 555 if (ps_iattr) { 556 struct iattr *ps_iattrs = &ps_iattr->ia_iattr; 557 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; 558 } 559 560 sd->s_flags |= SYSFS_FLAG_REMOVED; 561 sd->u.removed_list = acxt->removed; 562 acxt->removed = sd; 563 } 564 565 /** 566 * sysfs_addrm_finish - finish up sysfs_dirent add/remove 567 * @acxt: addrm context to finish up 568 * 569 * Finish up sysfs_dirent add/remove. Resources acquired by 570 * sysfs_addrm_start() are released and removed sysfs_dirents are 571 * cleaned up. 572 * 573 * LOCKING: 574 * sysfs_mutex is released. 575 */ 576 void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) 577 __releases(sysfs_mutex) 578 { 579 /* release resources acquired by sysfs_addrm_start() */ 580 mutex_unlock(&sysfs_mutex); 581 582 /* kill removed sysfs_dirents */ 583 while (acxt->removed) { 584 struct sysfs_dirent *sd = acxt->removed; 585 586 acxt->removed = sd->u.removed_list; 587 588 sysfs_deactivate(sd); 589 sysfs_unmap_bin_file(sd); 590 sysfs_put(sd); 591 } 592 } 593 594 /** 595 * sysfs_find_dirent - find sysfs_dirent with the given name 596 * @parent_sd: sysfs_dirent to search under 597 * @name: name to look for 598 * @ns: the namespace tag to use 599 * 600 * Look for sysfs_dirent with name @name under @parent_sd. 601 * 602 * LOCKING: 603 * mutex_lock(sysfs_mutex) 604 * 605 * RETURNS: 606 * Pointer to sysfs_dirent if found, NULL if not. 607 */ 608 struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, 609 const unsigned char *name, 610 const void *ns) 611 { 612 struct rb_node *node = parent_sd->s_dir.children.rb_node; 613 unsigned int hash; 614 615 if (!!sysfs_ns_type(parent_sd) != !!ns) { 616 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", 617 sysfs_ns_type(parent_sd) ? "required" : "invalid", 618 parent_sd->s_name, name); 619 return NULL; 620 } 621 622 hash = sysfs_name_hash(name, ns); 623 while (node) { 624 struct sysfs_dirent *sd; 625 int result; 626 627 sd = to_sysfs_dirent(node); 628 result = sysfs_name_compare(hash, name, ns, sd); 629 if (result < 0) 630 node = node->rb_left; 631 else if (result > 0) 632 node = node->rb_right; 633 else 634 return sd; 635 } 636 return NULL; 637 } 638 639 /** 640 * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name 641 * @parent_sd: sysfs_dirent to search under 642 * @name: name to look for 643 * @ns: the namespace tag to use 644 * 645 * Look for sysfs_dirent with name @name under @parent_sd and get 646 * it if found. 647 * 648 * LOCKING: 649 * Kernel thread context (may sleep). Grabs sysfs_mutex. 650 * 651 * RETURNS: 652 * Pointer to sysfs_dirent if found, NULL if not. 653 */ 654 struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd, 655 const unsigned char *name, 656 const void *ns) 657 { 658 struct sysfs_dirent *sd; 659 660 mutex_lock(&sysfs_mutex); 661 sd = sysfs_find_dirent(parent_sd, name, ns); 662 sysfs_get(sd); 663 mutex_unlock(&sysfs_mutex); 664 665 return sd; 666 } 667 EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns); 668 669 static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, 670 enum kobj_ns_type type, 671 const char *name, const void *ns, 672 struct sysfs_dirent **p_sd) 673 { 674 umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 675 struct sysfs_addrm_cxt acxt; 676 struct sysfs_dirent *sd; 677 int rc; 678 679 /* allocate */ 680 sd = sysfs_new_dirent(name, mode, SYSFS_DIR); 681 if (!sd) 682 return -ENOMEM; 683 684 sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT); 685 sd->s_ns = ns; 686 sd->s_dir.kobj = kobj; 687 688 /* link in */ 689 sysfs_addrm_start(&acxt); 690 rc = sysfs_add_one(&acxt, sd, parent_sd); 691 sysfs_addrm_finish(&acxt); 692 693 if (rc == 0) 694 *p_sd = sd; 695 else 696 sysfs_put(sd); 697 698 return rc; 699 } 700 701 int sysfs_create_subdir(struct kobject *kobj, const char *name, 702 struct sysfs_dirent **p_sd) 703 { 704 return create_dir(kobj, kobj->sd, 705 KOBJ_NS_TYPE_NONE, name, NULL, p_sd); 706 } 707 708 /** 709 * sysfs_read_ns_type: return associated ns_type 710 * @kobj: the kobject being queried 711 * 712 * Each kobject can be tagged with exactly one namespace type 713 * (i.e. network or user). Return the ns_type associated with 714 * this object if any 715 */ 716 static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) 717 { 718 const struct kobj_ns_type_operations *ops; 719 enum kobj_ns_type type; 720 721 ops = kobj_child_ns_ops(kobj); 722 if (!ops) 723 return KOBJ_NS_TYPE_NONE; 724 725 type = ops->type; 726 BUG_ON(type <= KOBJ_NS_TYPE_NONE); 727 BUG_ON(type >= KOBJ_NS_TYPES); 728 BUG_ON(!kobj_ns_type_registered(type)); 729 730 return type; 731 } 732 733 /** 734 * sysfs_create_dir_ns - create a directory for an object with a namespace tag 735 * @kobj: object we're creating directory for 736 * @ns: the namespace tag to use 737 */ 738 int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) 739 { 740 enum kobj_ns_type type; 741 struct sysfs_dirent *parent_sd, *sd; 742 int error = 0; 743 744 BUG_ON(!kobj); 745 746 if (kobj->parent) 747 parent_sd = kobj->parent->sd; 748 else 749 parent_sd = &sysfs_root; 750 751 if (!parent_sd) 752 return -ENOENT; 753 754 type = sysfs_read_ns_type(kobj); 755 756 error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd); 757 if (!error) 758 kobj->sd = sd; 759 return error; 760 } 761 762 static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry, 763 unsigned int flags) 764 { 765 struct dentry *ret = NULL; 766 struct dentry *parent = dentry->d_parent; 767 struct sysfs_dirent *parent_sd = parent->d_fsdata; 768 struct sysfs_dirent *sd; 769 struct inode *inode; 770 enum kobj_ns_type type; 771 const void *ns; 772 773 mutex_lock(&sysfs_mutex); 774 775 type = sysfs_ns_type(parent_sd); 776 ns = sysfs_info(dir->i_sb)->ns[type]; 777 778 sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns); 779 780 /* no such entry */ 781 if (!sd) { 782 ret = ERR_PTR(-ENOENT); 783 goto out_unlock; 784 } 785 dentry->d_fsdata = sysfs_get(sd); 786 787 /* attach dentry and inode */ 788 inode = sysfs_get_inode(dir->i_sb, sd); 789 if (!inode) { 790 ret = ERR_PTR(-ENOMEM); 791 goto out_unlock; 792 } 793 794 /* instantiate and hash dentry */ 795 ret = d_materialise_unique(dentry, inode); 796 out_unlock: 797 mutex_unlock(&sysfs_mutex); 798 return ret; 799 } 800 801 const struct inode_operations sysfs_dir_inode_operations = { 802 .lookup = sysfs_lookup, 803 .permission = sysfs_permission, 804 .setattr = sysfs_setattr, 805 .getattr = sysfs_getattr, 806 .setxattr = sysfs_setxattr, 807 }; 808 809 static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos) 810 { 811 struct sysfs_dirent *last; 812 813 while (true) { 814 struct rb_node *rbn; 815 816 last = pos; 817 818 if (sysfs_type(pos) != SYSFS_DIR) 819 break; 820 821 rbn = rb_first(&pos->s_dir.children); 822 if (!rbn) 823 break; 824 825 pos = to_sysfs_dirent(rbn); 826 } 827 828 return last; 829 } 830 831 /** 832 * sysfs_next_descendant_post - find the next descendant for post-order walk 833 * @pos: the current position (%NULL to initiate traversal) 834 * @root: sysfs_dirent whose descendants to walk 835 * 836 * Find the next descendant to visit for post-order traversal of @root's 837 * descendants. @root is included in the iteration and the last node to be 838 * visited. 839 */ 840 static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos, 841 struct sysfs_dirent *root) 842 { 843 struct rb_node *rbn; 844 845 lockdep_assert_held(&sysfs_mutex); 846 847 /* if first iteration, visit leftmost descendant which may be root */ 848 if (!pos) 849 return sysfs_leftmost_descendant(root); 850 851 /* if we visited @root, we're done */ 852 if (pos == root) 853 return NULL; 854 855 /* if there's an unvisited sibling, visit its leftmost descendant */ 856 rbn = rb_next(&pos->s_rb); 857 if (rbn) 858 return sysfs_leftmost_descendant(to_sysfs_dirent(rbn)); 859 860 /* no sibling left, visit parent */ 861 return pos->s_parent; 862 } 863 864 static void __sysfs_remove(struct sysfs_addrm_cxt *acxt, 865 struct sysfs_dirent *sd) 866 { 867 struct sysfs_dirent *pos, *next; 868 869 if (!sd) 870 return; 871 872 pr_debug("sysfs %s: removing\n", sd->s_name); 873 874 next = NULL; 875 do { 876 pos = next; 877 next = sysfs_next_descendant_post(pos, sd); 878 if (pos) 879 sysfs_remove_one(acxt, pos); 880 } while (next); 881 } 882 883 /** 884 * sysfs_remove - remove a sysfs_dirent recursively 885 * @sd: the sysfs_dirent to remove 886 * 887 * Remove @sd along with all its subdirectories and files. 888 */ 889 void sysfs_remove(struct sysfs_dirent *sd) 890 { 891 struct sysfs_addrm_cxt acxt; 892 893 sysfs_addrm_start(&acxt); 894 __sysfs_remove(&acxt, sd); 895 sysfs_addrm_finish(&acxt); 896 } 897 898 /** 899 * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it 900 * @dir_sd: parent of the target 901 * @name: name of the sysfs_dirent to remove 902 * @ns: namespace tag of the sysfs_dirent to remove 903 * 904 * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove 905 * it. Returns 0 on success, -ENOENT if such entry doesn't exist. 906 */ 907 int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, 908 const void *ns) 909 { 910 struct sysfs_addrm_cxt acxt; 911 struct sysfs_dirent *sd; 912 913 if (!dir_sd) { 914 WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", 915 name); 916 return -ENOENT; 917 } 918 919 sysfs_addrm_start(&acxt); 920 921 sd = sysfs_find_dirent(dir_sd, name, ns); 922 if (sd) 923 __sysfs_remove(&acxt, sd); 924 925 sysfs_addrm_finish(&acxt); 926 927 if (sd) 928 return 0; 929 else 930 return -ENOENT; 931 } 932 933 /** 934 * sysfs_remove_dir - remove an object's directory. 935 * @kobj: object. 936 * 937 * The only thing special about this is that we remove any files in 938 * the directory before we remove the directory, and we've inlined 939 * what used to be sysfs_rmdir() below, instead of calling separately. 940 */ 941 void sysfs_remove_dir(struct kobject *kobj) 942 { 943 struct sysfs_dirent *sd = kobj->sd; 944 945 /* 946 * In general, kboject owner is responsible for ensuring removal 947 * doesn't race with other operations and sysfs doesn't provide any 948 * protection; however, when @kobj is used as a symlink target, the 949 * symlinking entity usually doesn't own @kobj and thus has no 950 * control over removal. @kobj->sd may be removed anytime and 951 * symlink code may end up dereferencing an already freed sd. 952 * 953 * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation 954 * against symlink operations so that symlink code can safely 955 * dereference @kobj->sd. 956 */ 957 spin_lock(&sysfs_symlink_target_lock); 958 kobj->sd = NULL; 959 spin_unlock(&sysfs_symlink_target_lock); 960 961 if (sd) { 962 WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); 963 sysfs_remove(sd); 964 } 965 } 966 967 int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, 968 const char *new_name, const void *new_ns) 969 { 970 int error; 971 972 mutex_lock(&sysfs_mutex); 973 974 error = 0; 975 if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) && 976 (strcmp(sd->s_name, new_name) == 0)) 977 goto out; /* nothing to rename */ 978 979 error = -EEXIST; 980 if (sysfs_find_dirent(new_parent_sd, new_name, new_ns)) 981 goto out; 982 983 /* rename sysfs_dirent */ 984 if (strcmp(sd->s_name, new_name) != 0) { 985 error = -ENOMEM; 986 new_name = kstrdup(new_name, GFP_KERNEL); 987 if (!new_name) 988 goto out; 989 990 kfree(sd->s_name); 991 sd->s_name = new_name; 992 } 993 994 /* 995 * Move to the appropriate place in the appropriate directories rbtree. 996 */ 997 sysfs_unlink_sibling(sd); 998 sysfs_get(new_parent_sd); 999 sysfs_put(sd->s_parent); 1000 sd->s_ns = new_ns; 1001 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); 1002 sd->s_parent = new_parent_sd; 1003 sysfs_link_sibling(sd); 1004 1005 error = 0; 1006 out: 1007 mutex_unlock(&sysfs_mutex); 1008 return error; 1009 } 1010 1011 int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, 1012 const void *new_ns) 1013 { 1014 struct sysfs_dirent *parent_sd = kobj->sd->s_parent; 1015 1016 return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns); 1017 } 1018 1019 int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, 1020 const void *new_ns) 1021 { 1022 struct sysfs_dirent *sd = kobj->sd; 1023 struct sysfs_dirent *new_parent_sd; 1024 1025 BUG_ON(!sd->s_parent); 1026 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? 1027 new_parent_kobj->sd : &sysfs_root; 1028 1029 return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns); 1030 } 1031 1032 /* Relationship between s_mode and the DT_xxx types */ 1033 static inline unsigned char dt_type(struct sysfs_dirent *sd) 1034 { 1035 return (sd->s_mode >> 12) & 15; 1036 } 1037 1038 static int sysfs_dir_release(struct inode *inode, struct file *filp) 1039 { 1040 sysfs_put(filp->private_data); 1041 return 0; 1042 } 1043 1044 static struct sysfs_dirent *sysfs_dir_pos(const void *ns, 1045 struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos) 1046 { 1047 if (pos) { 1048 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && 1049 pos->s_parent == parent_sd && 1050 hash == pos->s_hash; 1051 sysfs_put(pos); 1052 if (!valid) 1053 pos = NULL; 1054 } 1055 if (!pos && (hash > 1) && (hash < INT_MAX)) { 1056 struct rb_node *node = parent_sd->s_dir.children.rb_node; 1057 while (node) { 1058 pos = to_sysfs_dirent(node); 1059 1060 if (hash < pos->s_hash) 1061 node = node->rb_left; 1062 else if (hash > pos->s_hash) 1063 node = node->rb_right; 1064 else 1065 break; 1066 } 1067 } 1068 /* Skip over entries in the wrong namespace */ 1069 while (pos && pos->s_ns != ns) { 1070 struct rb_node *node = rb_next(&pos->s_rb); 1071 if (!node) 1072 pos = NULL; 1073 else 1074 pos = to_sysfs_dirent(node); 1075 } 1076 return pos; 1077 } 1078 1079 static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, 1080 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) 1081 { 1082 pos = sysfs_dir_pos(ns, parent_sd, ino, pos); 1083 if (pos) 1084 do { 1085 struct rb_node *node = rb_next(&pos->s_rb); 1086 if (!node) 1087 pos = NULL; 1088 else 1089 pos = to_sysfs_dirent(node); 1090 } while (pos && pos->s_ns != ns); 1091 return pos; 1092 } 1093 1094 static int sysfs_readdir(struct file *file, struct dir_context *ctx) 1095 { 1096 struct dentry *dentry = file->f_path.dentry; 1097 struct sysfs_dirent *parent_sd = dentry->d_fsdata; 1098 struct sysfs_dirent *pos = file->private_data; 1099 enum kobj_ns_type type; 1100 const void *ns; 1101 1102 type = sysfs_ns_type(parent_sd); 1103 ns = sysfs_info(dentry->d_sb)->ns[type]; 1104 1105 if (!dir_emit_dots(file, ctx)) 1106 return 0; 1107 mutex_lock(&sysfs_mutex); 1108 for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos); 1109 pos; 1110 pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) { 1111 const char *name = pos->s_name; 1112 unsigned int type = dt_type(pos); 1113 int len = strlen(name); 1114 ino_t ino = pos->s_ino; 1115 ctx->pos = pos->s_hash; 1116 file->private_data = sysfs_get(pos); 1117 1118 mutex_unlock(&sysfs_mutex); 1119 if (!dir_emit(ctx, name, len, ino, type)) 1120 return 0; 1121 mutex_lock(&sysfs_mutex); 1122 } 1123 mutex_unlock(&sysfs_mutex); 1124 file->private_data = NULL; 1125 ctx->pos = INT_MAX; 1126 return 0; 1127 } 1128 1129 static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) 1130 { 1131 struct inode *inode = file_inode(file); 1132 loff_t ret; 1133 1134 mutex_lock(&inode->i_mutex); 1135 ret = generic_file_llseek(file, offset, whence); 1136 mutex_unlock(&inode->i_mutex); 1137 1138 return ret; 1139 } 1140 1141 const struct file_operations sysfs_dir_operations = { 1142 .read = generic_read_dir, 1143 .iterate = sysfs_readdir, 1144 .release = sysfs_dir_release, 1145 .llseek = sysfs_dir_llseek, 1146 }; 1147