1 /* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9 /* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17 #include <linux/config.h> 18 #include <linux/syscalls.h> 19 #include <linux/string.h> 20 #include <linux/mm.h> 21 #include <linux/fs.h> 22 #include <linux/fsnotify.h> 23 #include <linux/slab.h> 24 #include <linux/init.h> 25 #include <linux/smp_lock.h> 26 #include <linux/hash.h> 27 #include <linux/cache.h> 28 #include <linux/module.h> 29 #include <linux/mount.h> 30 #include <linux/file.h> 31 #include <asm/uaccess.h> 32 #include <linux/security.h> 33 #include <linux/seqlock.h> 34 #include <linux/swap.h> 35 #include <linux/bootmem.h> 36 37 38 int sysctl_vfs_cache_pressure __read_mostly = 100; 39 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 40 41 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 42 static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; 43 44 EXPORT_SYMBOL(dcache_lock); 45 46 static kmem_cache_t *dentry_cache __read_mostly; 47 48 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) 49 50 /* 51 * This is the single most critical data structure when it comes 52 * to the dcache: the hashtable for lookups. Somebody should try 53 * to make this good - I've just made it work. 54 * 55 * This hash-function tries to avoid losing too many bits of hash 56 * information, yet avoid using a prime hash-size or similar. 57 */ 58 #define D_HASHBITS d_hash_shift 59 #define D_HASHMASK d_hash_mask 60 61 static unsigned int d_hash_mask __read_mostly; 62 static unsigned int d_hash_shift __read_mostly; 63 static struct hlist_head *dentry_hashtable __read_mostly; 64 static LIST_HEAD(dentry_unused); 65 66 /* Statistics gathering. */ 67 struct dentry_stat_t dentry_stat = { 68 .age_limit = 45, 69 }; 70 71 static void d_callback(struct rcu_head *head) 72 { 73 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); 74 75 if (dname_external(dentry)) 76 kfree(dentry->d_name.name); 77 kmem_cache_free(dentry_cache, dentry); 78 } 79 80 /* 81 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry 82 * inside dcache_lock. 83 */ 84 static void d_free(struct dentry *dentry) 85 { 86 if (dentry->d_op && dentry->d_op->d_release) 87 dentry->d_op->d_release(dentry); 88 call_rcu(&dentry->d_u.d_rcu, d_callback); 89 } 90 91 /* 92 * Release the dentry's inode, using the filesystem 93 * d_iput() operation if defined. 94 * Called with dcache_lock and per dentry lock held, drops both. 95 */ 96 static void dentry_iput(struct dentry * dentry) 97 { 98 struct inode *inode = dentry->d_inode; 99 if (inode) { 100 dentry->d_inode = NULL; 101 list_del_init(&dentry->d_alias); 102 spin_unlock(&dentry->d_lock); 103 spin_unlock(&dcache_lock); 104 if (!inode->i_nlink) 105 fsnotify_inoderemove(inode); 106 if (dentry->d_op && dentry->d_op->d_iput) 107 dentry->d_op->d_iput(dentry, inode); 108 else 109 iput(inode); 110 } else { 111 spin_unlock(&dentry->d_lock); 112 spin_unlock(&dcache_lock); 113 } 114 } 115 116 /* 117 * This is dput 118 * 119 * This is complicated by the fact that we do not want to put 120 * dentries that are no longer on any hash chain on the unused 121 * list: we'd much rather just get rid of them immediately. 122 * 123 * However, that implies that we have to traverse the dentry 124 * tree upwards to the parents which might _also_ now be 125 * scheduled for deletion (it may have been only waiting for 126 * its last child to go away). 127 * 128 * This tail recursion is done by hand as we don't want to depend 129 * on the compiler to always get this right (gcc generally doesn't). 130 * Real recursion would eat up our stack space. 131 */ 132 133 /* 134 * dput - release a dentry 135 * @dentry: dentry to release 136 * 137 * Release a dentry. This will drop the usage count and if appropriate 138 * call the dentry unlink method as well as removing it from the queues and 139 * releasing its resources. If the parent dentries were scheduled for release 140 * they too may now get deleted. 141 * 142 * no dcache lock, please. 143 */ 144 145 void dput(struct dentry *dentry) 146 { 147 if (!dentry) 148 return; 149 150 repeat: 151 if (atomic_read(&dentry->d_count) == 1) 152 might_sleep(); 153 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) 154 return; 155 156 spin_lock(&dentry->d_lock); 157 if (atomic_read(&dentry->d_count)) { 158 spin_unlock(&dentry->d_lock); 159 spin_unlock(&dcache_lock); 160 return; 161 } 162 163 /* 164 * AV: ->d_delete() is _NOT_ allowed to block now. 165 */ 166 if (dentry->d_op && dentry->d_op->d_delete) { 167 if (dentry->d_op->d_delete(dentry)) 168 goto unhash_it; 169 } 170 /* Unreachable? Get rid of it */ 171 if (d_unhashed(dentry)) 172 goto kill_it; 173 if (list_empty(&dentry->d_lru)) { 174 dentry->d_flags |= DCACHE_REFERENCED; 175 list_add(&dentry->d_lru, &dentry_unused); 176 dentry_stat.nr_unused++; 177 } 178 spin_unlock(&dentry->d_lock); 179 spin_unlock(&dcache_lock); 180 return; 181 182 unhash_it: 183 __d_drop(dentry); 184 185 kill_it: { 186 struct dentry *parent; 187 188 /* If dentry was on d_lru list 189 * delete it from there 190 */ 191 if (!list_empty(&dentry->d_lru)) { 192 list_del(&dentry->d_lru); 193 dentry_stat.nr_unused--; 194 } 195 list_del(&dentry->d_u.d_child); 196 dentry_stat.nr_dentry--; /* For d_free, below */ 197 /*drops the locks, at that point nobody can reach this dentry */ 198 dentry_iput(dentry); 199 parent = dentry->d_parent; 200 d_free(dentry); 201 if (dentry == parent) 202 return; 203 dentry = parent; 204 goto repeat; 205 } 206 } 207 208 /** 209 * d_invalidate - invalidate a dentry 210 * @dentry: dentry to invalidate 211 * 212 * Try to invalidate the dentry if it turns out to be 213 * possible. If there are other dentries that can be 214 * reached through this one we can't delete it and we 215 * return -EBUSY. On success we return 0. 216 * 217 * no dcache lock. 218 */ 219 220 int d_invalidate(struct dentry * dentry) 221 { 222 /* 223 * If it's already been dropped, return OK. 224 */ 225 spin_lock(&dcache_lock); 226 if (d_unhashed(dentry)) { 227 spin_unlock(&dcache_lock); 228 return 0; 229 } 230 /* 231 * Check whether to do a partial shrink_dcache 232 * to get rid of unused child entries. 233 */ 234 if (!list_empty(&dentry->d_subdirs)) { 235 spin_unlock(&dcache_lock); 236 shrink_dcache_parent(dentry); 237 spin_lock(&dcache_lock); 238 } 239 240 /* 241 * Somebody else still using it? 242 * 243 * If it's a directory, we can't drop it 244 * for fear of somebody re-populating it 245 * with children (even though dropping it 246 * would make it unreachable from the root, 247 * we might still populate it if it was a 248 * working directory or similar). 249 */ 250 spin_lock(&dentry->d_lock); 251 if (atomic_read(&dentry->d_count) > 1) { 252 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 253 spin_unlock(&dentry->d_lock); 254 spin_unlock(&dcache_lock); 255 return -EBUSY; 256 } 257 } 258 259 __d_drop(dentry); 260 spin_unlock(&dentry->d_lock); 261 spin_unlock(&dcache_lock); 262 return 0; 263 } 264 265 /* This should be called _only_ with dcache_lock held */ 266 267 static inline struct dentry * __dget_locked(struct dentry *dentry) 268 { 269 atomic_inc(&dentry->d_count); 270 if (!list_empty(&dentry->d_lru)) { 271 dentry_stat.nr_unused--; 272 list_del_init(&dentry->d_lru); 273 } 274 return dentry; 275 } 276 277 struct dentry * dget_locked(struct dentry *dentry) 278 { 279 return __dget_locked(dentry); 280 } 281 282 /** 283 * d_find_alias - grab a hashed alias of inode 284 * @inode: inode in question 285 * @want_discon: flag, used by d_splice_alias, to request 286 * that only a DISCONNECTED alias be returned. 287 * 288 * If inode has a hashed alias, or is a directory and has any alias, 289 * acquire the reference to alias and return it. Otherwise return NULL. 290 * Notice that if inode is a directory there can be only one alias and 291 * it can be unhashed only if it has no children, or if it is the root 292 * of a filesystem. 293 * 294 * If the inode has a DCACHE_DISCONNECTED alias, then prefer 295 * any other hashed alias over that one unless @want_discon is set, 296 * in which case only return a DCACHE_DISCONNECTED alias. 297 */ 298 299 static struct dentry * __d_find_alias(struct inode *inode, int want_discon) 300 { 301 struct list_head *head, *next, *tmp; 302 struct dentry *alias, *discon_alias=NULL; 303 304 head = &inode->i_dentry; 305 next = inode->i_dentry.next; 306 while (next != head) { 307 tmp = next; 308 next = tmp->next; 309 prefetch(next); 310 alias = list_entry(tmp, struct dentry, d_alias); 311 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 312 if (alias->d_flags & DCACHE_DISCONNECTED) 313 discon_alias = alias; 314 else if (!want_discon) { 315 __dget_locked(alias); 316 return alias; 317 } 318 } 319 } 320 if (discon_alias) 321 __dget_locked(discon_alias); 322 return discon_alias; 323 } 324 325 struct dentry * d_find_alias(struct inode *inode) 326 { 327 struct dentry *de = NULL; 328 329 if (!list_empty(&inode->i_dentry)) { 330 spin_lock(&dcache_lock); 331 de = __d_find_alias(inode, 0); 332 spin_unlock(&dcache_lock); 333 } 334 return de; 335 } 336 337 /* 338 * Try to kill dentries associated with this inode. 339 * WARNING: you must own a reference to inode. 340 */ 341 void d_prune_aliases(struct inode *inode) 342 { 343 struct dentry *dentry; 344 restart: 345 spin_lock(&dcache_lock); 346 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 347 spin_lock(&dentry->d_lock); 348 if (!atomic_read(&dentry->d_count)) { 349 __dget_locked(dentry); 350 __d_drop(dentry); 351 spin_unlock(&dentry->d_lock); 352 spin_unlock(&dcache_lock); 353 dput(dentry); 354 goto restart; 355 } 356 spin_unlock(&dentry->d_lock); 357 } 358 spin_unlock(&dcache_lock); 359 } 360 361 /* 362 * Throw away a dentry - free the inode, dput the parent. This requires that 363 * the LRU list has already been removed. 364 * 365 * Called with dcache_lock, drops it and then regains. 366 * Called with dentry->d_lock held, drops it. 367 */ 368 static void prune_one_dentry(struct dentry * dentry) 369 { 370 struct dentry * parent; 371 372 __d_drop(dentry); 373 list_del(&dentry->d_u.d_child); 374 dentry_stat.nr_dentry--; /* For d_free, below */ 375 dentry_iput(dentry); 376 parent = dentry->d_parent; 377 d_free(dentry); 378 if (parent != dentry) 379 dput(parent); 380 spin_lock(&dcache_lock); 381 } 382 383 /** 384 * prune_dcache - shrink the dcache 385 * @count: number of entries to try and free 386 * @sb: if given, ignore dentries for other superblocks 387 * which are being unmounted. 388 * 389 * Shrink the dcache. This is done when we need 390 * more memory, or simply when we need to unmount 391 * something (at which point we need to unuse 392 * all dentries). 393 * 394 * This function may fail to free any resources if 395 * all the dentries are in use. 396 */ 397 398 static void prune_dcache(int count, struct super_block *sb) 399 { 400 spin_lock(&dcache_lock); 401 for (; count ; count--) { 402 struct dentry *dentry; 403 struct list_head *tmp; 404 struct rw_semaphore *s_umount; 405 406 cond_resched_lock(&dcache_lock); 407 408 tmp = dentry_unused.prev; 409 if (unlikely(sb)) { 410 /* Try to find a dentry for this sb, but don't try 411 * too hard, if they aren't near the tail they will 412 * be moved down again soon 413 */ 414 int skip = count; 415 while (skip && tmp != &dentry_unused && 416 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { 417 skip--; 418 tmp = tmp->prev; 419 } 420 } 421 if (tmp == &dentry_unused) 422 break; 423 list_del_init(tmp); 424 prefetch(dentry_unused.prev); 425 dentry_stat.nr_unused--; 426 dentry = list_entry(tmp, struct dentry, d_lru); 427 428 spin_lock(&dentry->d_lock); 429 /* 430 * We found an inuse dentry which was not removed from 431 * dentry_unused because of laziness during lookup. Do not free 432 * it - just keep it off the dentry_unused list. 433 */ 434 if (atomic_read(&dentry->d_count)) { 435 spin_unlock(&dentry->d_lock); 436 continue; 437 } 438 /* If the dentry was recently referenced, don't free it. */ 439 if (dentry->d_flags & DCACHE_REFERENCED) { 440 dentry->d_flags &= ~DCACHE_REFERENCED; 441 list_add(&dentry->d_lru, &dentry_unused); 442 dentry_stat.nr_unused++; 443 spin_unlock(&dentry->d_lock); 444 continue; 445 } 446 /* 447 * If the dentry is not DCACHED_REFERENCED, it is time 448 * to remove it from the dcache, provided the super block is 449 * NULL (which means we are trying to reclaim memory) 450 * or this dentry belongs to the same super block that 451 * we want to shrink. 452 */ 453 /* 454 * If this dentry is for "my" filesystem, then I can prune it 455 * without taking the s_umount lock (I already hold it). 456 */ 457 if (sb && dentry->d_sb == sb) { 458 prune_one_dentry(dentry); 459 continue; 460 } 461 /* 462 * ...otherwise we need to be sure this filesystem isn't being 463 * unmounted, otherwise we could race with 464 * generic_shutdown_super(), and end up holding a reference to 465 * an inode while the filesystem is unmounted. 466 * So we try to get s_umount, and make sure s_root isn't NULL. 467 * (Take a local copy of s_umount to avoid a use-after-free of 468 * `dentry'). 469 */ 470 s_umount = &dentry->d_sb->s_umount; 471 if (down_read_trylock(s_umount)) { 472 if (dentry->d_sb->s_root != NULL) { 473 prune_one_dentry(dentry); 474 up_read(s_umount); 475 continue; 476 } 477 up_read(s_umount); 478 } 479 spin_unlock(&dentry->d_lock); 480 /* Cannot remove the first dentry, and it isn't appropriate 481 * to move it to the head of the list, so give up, and try 482 * later 483 */ 484 break; 485 } 486 spin_unlock(&dcache_lock); 487 } 488 489 /* 490 * Shrink the dcache for the specified super block. 491 * This allows us to unmount a device without disturbing 492 * the dcache for the other devices. 493 * 494 * This implementation makes just two traversals of the 495 * unused list. On the first pass we move the selected 496 * dentries to the most recent end, and on the second 497 * pass we free them. The second pass must restart after 498 * each dput(), but since the target dentries are all at 499 * the end, it's really just a single traversal. 500 */ 501 502 /** 503 * shrink_dcache_sb - shrink dcache for a superblock 504 * @sb: superblock 505 * 506 * Shrink the dcache for the specified super block. This 507 * is used to free the dcache before unmounting a file 508 * system 509 */ 510 511 void shrink_dcache_sb(struct super_block * sb) 512 { 513 struct list_head *tmp, *next; 514 struct dentry *dentry; 515 516 /* 517 * Pass one ... move the dentries for the specified 518 * superblock to the most recent end of the unused list. 519 */ 520 spin_lock(&dcache_lock); 521 list_for_each_safe(tmp, next, &dentry_unused) { 522 dentry = list_entry(tmp, struct dentry, d_lru); 523 if (dentry->d_sb != sb) 524 continue; 525 list_del(tmp); 526 list_add(tmp, &dentry_unused); 527 } 528 529 /* 530 * Pass two ... free the dentries for this superblock. 531 */ 532 repeat: 533 list_for_each_safe(tmp, next, &dentry_unused) { 534 dentry = list_entry(tmp, struct dentry, d_lru); 535 if (dentry->d_sb != sb) 536 continue; 537 dentry_stat.nr_unused--; 538 list_del_init(tmp); 539 spin_lock(&dentry->d_lock); 540 if (atomic_read(&dentry->d_count)) { 541 spin_unlock(&dentry->d_lock); 542 continue; 543 } 544 prune_one_dentry(dentry); 545 cond_resched_lock(&dcache_lock); 546 goto repeat; 547 } 548 spin_unlock(&dcache_lock); 549 } 550 551 /* 552 * Search for at least 1 mount point in the dentry's subdirs. 553 * We descend to the next level whenever the d_subdirs 554 * list is non-empty and continue searching. 555 */ 556 557 /** 558 * have_submounts - check for mounts over a dentry 559 * @parent: dentry to check. 560 * 561 * Return true if the parent or its subdirectories contain 562 * a mount point 563 */ 564 565 int have_submounts(struct dentry *parent) 566 { 567 struct dentry *this_parent = parent; 568 struct list_head *next; 569 570 spin_lock(&dcache_lock); 571 if (d_mountpoint(parent)) 572 goto positive; 573 repeat: 574 next = this_parent->d_subdirs.next; 575 resume: 576 while (next != &this_parent->d_subdirs) { 577 struct list_head *tmp = next; 578 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 579 next = tmp->next; 580 /* Have we found a mount point ? */ 581 if (d_mountpoint(dentry)) 582 goto positive; 583 if (!list_empty(&dentry->d_subdirs)) { 584 this_parent = dentry; 585 goto repeat; 586 } 587 } 588 /* 589 * All done at this level ... ascend and resume the search. 590 */ 591 if (this_parent != parent) { 592 next = this_parent->d_u.d_child.next; 593 this_parent = this_parent->d_parent; 594 goto resume; 595 } 596 spin_unlock(&dcache_lock); 597 return 0; /* No mount points found in tree */ 598 positive: 599 spin_unlock(&dcache_lock); 600 return 1; 601 } 602 603 /* 604 * Search the dentry child list for the specified parent, 605 * and move any unused dentries to the end of the unused 606 * list for prune_dcache(). We descend to the next level 607 * whenever the d_subdirs list is non-empty and continue 608 * searching. 609 * 610 * It returns zero iff there are no unused children, 611 * otherwise it returns the number of children moved to 612 * the end of the unused list. This may not be the total 613 * number of unused children, because select_parent can 614 * drop the lock and return early due to latency 615 * constraints. 616 */ 617 static int select_parent(struct dentry * parent) 618 { 619 struct dentry *this_parent = parent; 620 struct list_head *next; 621 int found = 0; 622 623 spin_lock(&dcache_lock); 624 repeat: 625 next = this_parent->d_subdirs.next; 626 resume: 627 while (next != &this_parent->d_subdirs) { 628 struct list_head *tmp = next; 629 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 630 next = tmp->next; 631 632 if (!list_empty(&dentry->d_lru)) { 633 dentry_stat.nr_unused--; 634 list_del_init(&dentry->d_lru); 635 } 636 /* 637 * move only zero ref count dentries to the end 638 * of the unused list for prune_dcache 639 */ 640 if (!atomic_read(&dentry->d_count)) { 641 list_add(&dentry->d_lru, dentry_unused.prev); 642 dentry_stat.nr_unused++; 643 found++; 644 } 645 646 /* 647 * We can return to the caller if we have found some (this 648 * ensures forward progress). We'll be coming back to find 649 * the rest. 650 */ 651 if (found && need_resched()) 652 goto out; 653 654 /* 655 * Descend a level if the d_subdirs list is non-empty. 656 */ 657 if (!list_empty(&dentry->d_subdirs)) { 658 this_parent = dentry; 659 goto repeat; 660 } 661 } 662 /* 663 * All done at this level ... ascend and resume the search. 664 */ 665 if (this_parent != parent) { 666 next = this_parent->d_u.d_child.next; 667 this_parent = this_parent->d_parent; 668 goto resume; 669 } 670 out: 671 spin_unlock(&dcache_lock); 672 return found; 673 } 674 675 /** 676 * shrink_dcache_parent - prune dcache 677 * @parent: parent of entries to prune 678 * 679 * Prune the dcache to remove unused children of the parent dentry. 680 */ 681 682 void shrink_dcache_parent(struct dentry * parent) 683 { 684 int found; 685 686 while ((found = select_parent(parent)) != 0) 687 prune_dcache(found, parent->d_sb); 688 } 689 690 /** 691 * shrink_dcache_anon - further prune the cache 692 * @head: head of d_hash list of dentries to prune 693 * 694 * Prune the dentries that are anonymous 695 * 696 * parsing d_hash list does not hlist_for_each_entry_rcu() as it 697 * done under dcache_lock. 698 * 699 */ 700 void shrink_dcache_anon(struct super_block *sb) 701 { 702 struct hlist_node *lp; 703 struct hlist_head *head = &sb->s_anon; 704 int found; 705 do { 706 found = 0; 707 spin_lock(&dcache_lock); 708 hlist_for_each(lp, head) { 709 struct dentry *this = hlist_entry(lp, struct dentry, d_hash); 710 if (!list_empty(&this->d_lru)) { 711 dentry_stat.nr_unused--; 712 list_del_init(&this->d_lru); 713 } 714 715 /* 716 * move only zero ref count dentries to the end 717 * of the unused list for prune_dcache 718 */ 719 if (!atomic_read(&this->d_count)) { 720 list_add_tail(&this->d_lru, &dentry_unused); 721 dentry_stat.nr_unused++; 722 found++; 723 } 724 } 725 spin_unlock(&dcache_lock); 726 prune_dcache(found, sb); 727 } while(found); 728 } 729 730 /* 731 * Scan `nr' dentries and return the number which remain. 732 * 733 * We need to avoid reentering the filesystem if the caller is performing a 734 * GFP_NOFS allocation attempt. One example deadlock is: 735 * 736 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> 737 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> 738 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. 739 * 740 * In this case we return -1 to tell the caller that we baled. 741 */ 742 static int shrink_dcache_memory(int nr, gfp_t gfp_mask) 743 { 744 if (nr) { 745 if (!(gfp_mask & __GFP_FS)) 746 return -1; 747 prune_dcache(nr, NULL); 748 } 749 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 750 } 751 752 /** 753 * d_alloc - allocate a dcache entry 754 * @parent: parent of entry to allocate 755 * @name: qstr of the name 756 * 757 * Allocates a dentry. It returns %NULL if there is insufficient memory 758 * available. On a success the dentry is returned. The name passed in is 759 * copied and the copy passed in may be reused after this call. 760 */ 761 762 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 763 { 764 struct dentry *dentry; 765 char *dname; 766 767 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 768 if (!dentry) 769 return NULL; 770 771 if (name->len > DNAME_INLINE_LEN-1) { 772 dname = kmalloc(name->len + 1, GFP_KERNEL); 773 if (!dname) { 774 kmem_cache_free(dentry_cache, dentry); 775 return NULL; 776 } 777 } else { 778 dname = dentry->d_iname; 779 } 780 dentry->d_name.name = dname; 781 782 dentry->d_name.len = name->len; 783 dentry->d_name.hash = name->hash; 784 memcpy(dname, name->name, name->len); 785 dname[name->len] = 0; 786 787 atomic_set(&dentry->d_count, 1); 788 dentry->d_flags = DCACHE_UNHASHED; 789 spin_lock_init(&dentry->d_lock); 790 dentry->d_inode = NULL; 791 dentry->d_parent = NULL; 792 dentry->d_sb = NULL; 793 dentry->d_op = NULL; 794 dentry->d_fsdata = NULL; 795 dentry->d_mounted = 0; 796 #ifdef CONFIG_PROFILING 797 dentry->d_cookie = NULL; 798 #endif 799 INIT_HLIST_NODE(&dentry->d_hash); 800 INIT_LIST_HEAD(&dentry->d_lru); 801 INIT_LIST_HEAD(&dentry->d_subdirs); 802 INIT_LIST_HEAD(&dentry->d_alias); 803 804 if (parent) { 805 dentry->d_parent = dget(parent); 806 dentry->d_sb = parent->d_sb; 807 } else { 808 INIT_LIST_HEAD(&dentry->d_u.d_child); 809 } 810 811 spin_lock(&dcache_lock); 812 if (parent) 813 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 814 dentry_stat.nr_dentry++; 815 spin_unlock(&dcache_lock); 816 817 return dentry; 818 } 819 820 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 821 { 822 struct qstr q; 823 824 q.name = name; 825 q.len = strlen(name); 826 q.hash = full_name_hash(q.name, q.len); 827 return d_alloc(parent, &q); 828 } 829 830 /** 831 * d_instantiate - fill in inode information for a dentry 832 * @entry: dentry to complete 833 * @inode: inode to attach to this dentry 834 * 835 * Fill in inode information in the entry. 836 * 837 * This turns negative dentries into productive full members 838 * of society. 839 * 840 * NOTE! This assumes that the inode count has been incremented 841 * (or otherwise set) by the caller to indicate that it is now 842 * in use by the dcache. 843 */ 844 845 void d_instantiate(struct dentry *entry, struct inode * inode) 846 { 847 BUG_ON(!list_empty(&entry->d_alias)); 848 spin_lock(&dcache_lock); 849 if (inode) 850 list_add(&entry->d_alias, &inode->i_dentry); 851 entry->d_inode = inode; 852 fsnotify_d_instantiate(entry, inode); 853 spin_unlock(&dcache_lock); 854 security_d_instantiate(entry, inode); 855 } 856 857 /** 858 * d_instantiate_unique - instantiate a non-aliased dentry 859 * @entry: dentry to instantiate 860 * @inode: inode to attach to this dentry 861 * 862 * Fill in inode information in the entry. On success, it returns NULL. 863 * If an unhashed alias of "entry" already exists, then we return the 864 * aliased dentry instead and drop one reference to inode. 865 * 866 * Note that in order to avoid conflicts with rename() etc, the caller 867 * had better be holding the parent directory semaphore. 868 * 869 * This also assumes that the inode count has been incremented 870 * (or otherwise set) by the caller to indicate that it is now 871 * in use by the dcache. 872 */ 873 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 874 { 875 struct dentry *alias; 876 int len = entry->d_name.len; 877 const char *name = entry->d_name.name; 878 unsigned int hash = entry->d_name.hash; 879 880 BUG_ON(!list_empty(&entry->d_alias)); 881 spin_lock(&dcache_lock); 882 if (!inode) 883 goto do_negative; 884 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 885 struct qstr *qstr = &alias->d_name; 886 887 if (qstr->hash != hash) 888 continue; 889 if (alias->d_parent != entry->d_parent) 890 continue; 891 if (qstr->len != len) 892 continue; 893 if (memcmp(qstr->name, name, len)) 894 continue; 895 dget_locked(alias); 896 spin_unlock(&dcache_lock); 897 BUG_ON(!d_unhashed(alias)); 898 iput(inode); 899 return alias; 900 } 901 list_add(&entry->d_alias, &inode->i_dentry); 902 do_negative: 903 entry->d_inode = inode; 904 fsnotify_d_instantiate(entry, inode); 905 spin_unlock(&dcache_lock); 906 security_d_instantiate(entry, inode); 907 return NULL; 908 } 909 EXPORT_SYMBOL(d_instantiate_unique); 910 911 /** 912 * d_alloc_root - allocate root dentry 913 * @root_inode: inode to allocate the root for 914 * 915 * Allocate a root ("/") dentry for the inode given. The inode is 916 * instantiated and returned. %NULL is returned if there is insufficient 917 * memory or the inode passed is %NULL. 918 */ 919 920 struct dentry * d_alloc_root(struct inode * root_inode) 921 { 922 struct dentry *res = NULL; 923 924 if (root_inode) { 925 static const struct qstr name = { .name = "/", .len = 1 }; 926 927 res = d_alloc(NULL, &name); 928 if (res) { 929 res->d_sb = root_inode->i_sb; 930 res->d_parent = res; 931 d_instantiate(res, root_inode); 932 } 933 } 934 return res; 935 } 936 937 static inline struct hlist_head *d_hash(struct dentry *parent, 938 unsigned long hash) 939 { 940 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 941 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 942 return dentry_hashtable + (hash & D_HASHMASK); 943 } 944 945 /** 946 * d_alloc_anon - allocate an anonymous dentry 947 * @inode: inode to allocate the dentry for 948 * 949 * This is similar to d_alloc_root. It is used by filesystems when 950 * creating a dentry for a given inode, often in the process of 951 * mapping a filehandle to a dentry. The returned dentry may be 952 * anonymous, or may have a full name (if the inode was already 953 * in the cache). The file system may need to make further 954 * efforts to connect this dentry into the dcache properly. 955 * 956 * When called on a directory inode, we must ensure that 957 * the inode only ever has one dentry. If a dentry is 958 * found, that is returned instead of allocating a new one. 959 * 960 * On successful return, the reference to the inode has been transferred 961 * to the dentry. If %NULL is returned (indicating kmalloc failure), 962 * the reference on the inode has not been released. 963 */ 964 965 struct dentry * d_alloc_anon(struct inode *inode) 966 { 967 static const struct qstr anonstring = { .name = "" }; 968 struct dentry *tmp; 969 struct dentry *res; 970 971 if ((res = d_find_alias(inode))) { 972 iput(inode); 973 return res; 974 } 975 976 tmp = d_alloc(NULL, &anonstring); 977 if (!tmp) 978 return NULL; 979 980 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 981 982 spin_lock(&dcache_lock); 983 res = __d_find_alias(inode, 0); 984 if (!res) { 985 /* attach a disconnected dentry */ 986 res = tmp; 987 tmp = NULL; 988 spin_lock(&res->d_lock); 989 res->d_sb = inode->i_sb; 990 res->d_parent = res; 991 res->d_inode = inode; 992 res->d_flags |= DCACHE_DISCONNECTED; 993 res->d_flags &= ~DCACHE_UNHASHED; 994 list_add(&res->d_alias, &inode->i_dentry); 995 hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); 996 spin_unlock(&res->d_lock); 997 998 inode = NULL; /* don't drop reference */ 999 } 1000 spin_unlock(&dcache_lock); 1001 1002 if (inode) 1003 iput(inode); 1004 if (tmp) 1005 dput(tmp); 1006 return res; 1007 } 1008 1009 1010 /** 1011 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1012 * @inode: the inode which may have a disconnected dentry 1013 * @dentry: a negative dentry which we want to point to the inode. 1014 * 1015 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 1016 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 1017 * and return it, else simply d_add the inode to the dentry and return NULL. 1018 * 1019 * This is needed in the lookup routine of any filesystem that is exportable 1020 * (via knfsd) so that we can build dcache paths to directories effectively. 1021 * 1022 * If a dentry was found and moved, then it is returned. Otherwise NULL 1023 * is returned. This matches the expected return value of ->lookup. 1024 * 1025 */ 1026 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1027 { 1028 struct dentry *new = NULL; 1029 1030 if (inode) { 1031 spin_lock(&dcache_lock); 1032 new = __d_find_alias(inode, 1); 1033 if (new) { 1034 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1035 fsnotify_d_instantiate(new, inode); 1036 spin_unlock(&dcache_lock); 1037 security_d_instantiate(new, inode); 1038 d_rehash(dentry); 1039 d_move(new, dentry); 1040 iput(inode); 1041 } else { 1042 /* d_instantiate takes dcache_lock, so we do it by hand */ 1043 list_add(&dentry->d_alias, &inode->i_dentry); 1044 dentry->d_inode = inode; 1045 fsnotify_d_instantiate(dentry, inode); 1046 spin_unlock(&dcache_lock); 1047 security_d_instantiate(dentry, inode); 1048 d_rehash(dentry); 1049 } 1050 } else 1051 d_add(dentry, inode); 1052 return new; 1053 } 1054 1055 1056 /** 1057 * d_lookup - search for a dentry 1058 * @parent: parent dentry 1059 * @name: qstr of name we wish to find 1060 * 1061 * Searches the children of the parent dentry for the name in question. If 1062 * the dentry is found its reference count is incremented and the dentry 1063 * is returned. The caller must use d_put to free the entry when it has 1064 * finished using it. %NULL is returned on failure. 1065 * 1066 * __d_lookup is dcache_lock free. The hash list is protected using RCU. 1067 * Memory barriers are used while updating and doing lockless traversal. 1068 * To avoid races with d_move while rename is happening, d_lock is used. 1069 * 1070 * Overflows in memcmp(), while d_move, are avoided by keeping the length 1071 * and name pointer in one structure pointed by d_qstr. 1072 * 1073 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while 1074 * lookup is going on. 1075 * 1076 * dentry_unused list is not updated even if lookup finds the required dentry 1077 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, 1078 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock 1079 * acquisition. 1080 * 1081 * d_lookup() is protected against the concurrent renames in some unrelated 1082 * directory using the seqlockt_t rename_lock. 1083 */ 1084 1085 struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1086 { 1087 struct dentry * dentry = NULL; 1088 unsigned long seq; 1089 1090 do { 1091 seq = read_seqbegin(&rename_lock); 1092 dentry = __d_lookup(parent, name); 1093 if (dentry) 1094 break; 1095 } while (read_seqretry(&rename_lock, seq)); 1096 return dentry; 1097 } 1098 1099 struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1100 { 1101 unsigned int len = name->len; 1102 unsigned int hash = name->hash; 1103 const unsigned char *str = name->name; 1104 struct hlist_head *head = d_hash(parent,hash); 1105 struct dentry *found = NULL; 1106 struct hlist_node *node; 1107 struct dentry *dentry; 1108 1109 rcu_read_lock(); 1110 1111 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1112 struct qstr *qstr; 1113 1114 if (dentry->d_name.hash != hash) 1115 continue; 1116 if (dentry->d_parent != parent) 1117 continue; 1118 1119 spin_lock(&dentry->d_lock); 1120 1121 /* 1122 * Recheck the dentry after taking the lock - d_move may have 1123 * changed things. Don't bother checking the hash because we're 1124 * about to compare the whole name anyway. 1125 */ 1126 if (dentry->d_parent != parent) 1127 goto next; 1128 1129 /* 1130 * It is safe to compare names since d_move() cannot 1131 * change the qstr (protected by d_lock). 1132 */ 1133 qstr = &dentry->d_name; 1134 if (parent->d_op && parent->d_op->d_compare) { 1135 if (parent->d_op->d_compare(parent, qstr, name)) 1136 goto next; 1137 } else { 1138 if (qstr->len != len) 1139 goto next; 1140 if (memcmp(qstr->name, str, len)) 1141 goto next; 1142 } 1143 1144 if (!d_unhashed(dentry)) { 1145 atomic_inc(&dentry->d_count); 1146 found = dentry; 1147 } 1148 spin_unlock(&dentry->d_lock); 1149 break; 1150 next: 1151 spin_unlock(&dentry->d_lock); 1152 } 1153 rcu_read_unlock(); 1154 1155 return found; 1156 } 1157 1158 /** 1159 * d_hash_and_lookup - hash the qstr then search for a dentry 1160 * @dir: Directory to search in 1161 * @name: qstr of name we wish to find 1162 * 1163 * On hash failure or on lookup failure NULL is returned. 1164 */ 1165 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1166 { 1167 struct dentry *dentry = NULL; 1168 1169 /* 1170 * Check for a fs-specific hash function. Note that we must 1171 * calculate the standard hash first, as the d_op->d_hash() 1172 * routine may choose to leave the hash value unchanged. 1173 */ 1174 name->hash = full_name_hash(name->name, name->len); 1175 if (dir->d_op && dir->d_op->d_hash) { 1176 if (dir->d_op->d_hash(dir, name) < 0) 1177 goto out; 1178 } 1179 dentry = d_lookup(dir, name); 1180 out: 1181 return dentry; 1182 } 1183 1184 /** 1185 * d_validate - verify dentry provided from insecure source 1186 * @dentry: The dentry alleged to be valid child of @dparent 1187 * @dparent: The parent dentry (known to be valid) 1188 * @hash: Hash of the dentry 1189 * @len: Length of the name 1190 * 1191 * An insecure source has sent us a dentry, here we verify it and dget() it. 1192 * This is used by ncpfs in its readdir implementation. 1193 * Zero is returned in the dentry is invalid. 1194 */ 1195 1196 int d_validate(struct dentry *dentry, struct dentry *dparent) 1197 { 1198 struct hlist_head *base; 1199 struct hlist_node *lhp; 1200 1201 /* Check whether the ptr might be valid at all.. */ 1202 if (!kmem_ptr_validate(dentry_cache, dentry)) 1203 goto out; 1204 1205 if (dentry->d_parent != dparent) 1206 goto out; 1207 1208 spin_lock(&dcache_lock); 1209 base = d_hash(dparent, dentry->d_name.hash); 1210 hlist_for_each(lhp,base) { 1211 /* hlist_for_each_entry_rcu() not required for d_hash list 1212 * as it is parsed under dcache_lock 1213 */ 1214 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { 1215 __dget_locked(dentry); 1216 spin_unlock(&dcache_lock); 1217 return 1; 1218 } 1219 } 1220 spin_unlock(&dcache_lock); 1221 out: 1222 return 0; 1223 } 1224 1225 /* 1226 * When a file is deleted, we have two options: 1227 * - turn this dentry into a negative dentry 1228 * - unhash this dentry and free it. 1229 * 1230 * Usually, we want to just turn this into 1231 * a negative dentry, but if anybody else is 1232 * currently using the dentry or the inode 1233 * we can't do that and we fall back on removing 1234 * it from the hash queues and waiting for 1235 * it to be deleted later when it has no users 1236 */ 1237 1238 /** 1239 * d_delete - delete a dentry 1240 * @dentry: The dentry to delete 1241 * 1242 * Turn the dentry into a negative dentry if possible, otherwise 1243 * remove it from the hash queues so it can be deleted later 1244 */ 1245 1246 void d_delete(struct dentry * dentry) 1247 { 1248 int isdir = 0; 1249 /* 1250 * Are we the only user? 1251 */ 1252 spin_lock(&dcache_lock); 1253 spin_lock(&dentry->d_lock); 1254 isdir = S_ISDIR(dentry->d_inode->i_mode); 1255 if (atomic_read(&dentry->d_count) == 1) { 1256 dentry_iput(dentry); 1257 fsnotify_nameremove(dentry, isdir); 1258 1259 /* remove this and other inotify debug checks after 2.6.18 */ 1260 dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; 1261 return; 1262 } 1263 1264 if (!d_unhashed(dentry)) 1265 __d_drop(dentry); 1266 1267 spin_unlock(&dentry->d_lock); 1268 spin_unlock(&dcache_lock); 1269 1270 fsnotify_nameremove(dentry, isdir); 1271 } 1272 1273 static void __d_rehash(struct dentry * entry, struct hlist_head *list) 1274 { 1275 1276 entry->d_flags &= ~DCACHE_UNHASHED; 1277 hlist_add_head_rcu(&entry->d_hash, list); 1278 } 1279 1280 /** 1281 * d_rehash - add an entry back to the hash 1282 * @entry: dentry to add to the hash 1283 * 1284 * Adds a dentry to the hash according to its name. 1285 */ 1286 1287 void d_rehash(struct dentry * entry) 1288 { 1289 struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); 1290 1291 spin_lock(&dcache_lock); 1292 spin_lock(&entry->d_lock); 1293 __d_rehash(entry, list); 1294 spin_unlock(&entry->d_lock); 1295 spin_unlock(&dcache_lock); 1296 } 1297 1298 #define do_switch(x,y) do { \ 1299 __typeof__ (x) __tmp = x; \ 1300 x = y; y = __tmp; } while (0) 1301 1302 /* 1303 * When switching names, the actual string doesn't strictly have to 1304 * be preserved in the target - because we're dropping the target 1305 * anyway. As such, we can just do a simple memcpy() to copy over 1306 * the new name before we switch. 1307 * 1308 * Note that we have to be a lot more careful about getting the hash 1309 * switched - we have to switch the hash value properly even if it 1310 * then no longer matches the actual (corrupted) string of the target. 1311 * The hash value has to match the hash queue that the dentry is on.. 1312 */ 1313 static void switch_names(struct dentry *dentry, struct dentry *target) 1314 { 1315 if (dname_external(target)) { 1316 if (dname_external(dentry)) { 1317 /* 1318 * Both external: swap the pointers 1319 */ 1320 do_switch(target->d_name.name, dentry->d_name.name); 1321 } else { 1322 /* 1323 * dentry:internal, target:external. Steal target's 1324 * storage and make target internal. 1325 */ 1326 dentry->d_name.name = target->d_name.name; 1327 target->d_name.name = target->d_iname; 1328 } 1329 } else { 1330 if (dname_external(dentry)) { 1331 /* 1332 * dentry:external, target:internal. Give dentry's 1333 * storage to target and make dentry internal 1334 */ 1335 memcpy(dentry->d_iname, target->d_name.name, 1336 target->d_name.len + 1); 1337 target->d_name.name = dentry->d_name.name; 1338 dentry->d_name.name = dentry->d_iname; 1339 } else { 1340 /* 1341 * Both are internal. Just copy target to dentry 1342 */ 1343 memcpy(dentry->d_iname, target->d_name.name, 1344 target->d_name.len + 1); 1345 } 1346 } 1347 } 1348 1349 /* 1350 * We cannibalize "target" when moving dentry on top of it, 1351 * because it's going to be thrown away anyway. We could be more 1352 * polite about it, though. 1353 * 1354 * This forceful removal will result in ugly /proc output if 1355 * somebody holds a file open that got deleted due to a rename. 1356 * We could be nicer about the deleted file, and let it show 1357 * up under the name it got deleted rather than the name that 1358 * deleted it. 1359 */ 1360 1361 /** 1362 * d_move - move a dentry 1363 * @dentry: entry to move 1364 * @target: new dentry 1365 * 1366 * Update the dcache to reflect the move of a file name. Negative 1367 * dcache entries should not be moved in this way. 1368 */ 1369 1370 void d_move(struct dentry * dentry, struct dentry * target) 1371 { 1372 struct hlist_head *list; 1373 1374 if (!dentry->d_inode) 1375 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 1376 1377 spin_lock(&dcache_lock); 1378 write_seqlock(&rename_lock); 1379 /* 1380 * XXXX: do we really need to take target->d_lock? 1381 */ 1382 if (target < dentry) { 1383 spin_lock(&target->d_lock); 1384 spin_lock(&dentry->d_lock); 1385 } else { 1386 spin_lock(&dentry->d_lock); 1387 spin_lock(&target->d_lock); 1388 } 1389 1390 /* Move the dentry to the target hash queue, if on different bucket */ 1391 if (dentry->d_flags & DCACHE_UNHASHED) 1392 goto already_unhashed; 1393 1394 hlist_del_rcu(&dentry->d_hash); 1395 1396 already_unhashed: 1397 list = d_hash(target->d_parent, target->d_name.hash); 1398 __d_rehash(dentry, list); 1399 1400 /* Unhash the target: dput() will then get rid of it */ 1401 __d_drop(target); 1402 1403 list_del(&dentry->d_u.d_child); 1404 list_del(&target->d_u.d_child); 1405 1406 /* Switch the names.. */ 1407 switch_names(dentry, target); 1408 do_switch(dentry->d_name.len, target->d_name.len); 1409 do_switch(dentry->d_name.hash, target->d_name.hash); 1410 1411 /* ... and switch the parents */ 1412 if (IS_ROOT(dentry)) { 1413 dentry->d_parent = target->d_parent; 1414 target->d_parent = target; 1415 INIT_LIST_HEAD(&target->d_u.d_child); 1416 } else { 1417 do_switch(dentry->d_parent, target->d_parent); 1418 1419 /* And add them back to the (new) parent lists */ 1420 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 1421 } 1422 1423 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 1424 spin_unlock(&target->d_lock); 1425 fsnotify_d_move(dentry); 1426 spin_unlock(&dentry->d_lock); 1427 write_sequnlock(&rename_lock); 1428 spin_unlock(&dcache_lock); 1429 } 1430 1431 /** 1432 * d_path - return the path of a dentry 1433 * @dentry: dentry to report 1434 * @vfsmnt: vfsmnt to which the dentry belongs 1435 * @root: root dentry 1436 * @rootmnt: vfsmnt to which the root dentry belongs 1437 * @buffer: buffer to return value in 1438 * @buflen: buffer length 1439 * 1440 * Convert a dentry into an ASCII path name. If the entry has been deleted 1441 * the string " (deleted)" is appended. Note that this is ambiguous. 1442 * 1443 * Returns the buffer or an error code if the path was too long. 1444 * 1445 * "buflen" should be positive. Caller holds the dcache_lock. 1446 */ 1447 static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, 1448 struct dentry *root, struct vfsmount *rootmnt, 1449 char *buffer, int buflen) 1450 { 1451 char * end = buffer+buflen; 1452 char * retval; 1453 int namelen; 1454 1455 *--end = '\0'; 1456 buflen--; 1457 if (!IS_ROOT(dentry) && d_unhashed(dentry)) { 1458 buflen -= 10; 1459 end -= 10; 1460 if (buflen < 0) 1461 goto Elong; 1462 memcpy(end, " (deleted)", 10); 1463 } 1464 1465 if (buflen < 1) 1466 goto Elong; 1467 /* Get '/' right */ 1468 retval = end-1; 1469 *retval = '/'; 1470 1471 for (;;) { 1472 struct dentry * parent; 1473 1474 if (dentry == root && vfsmnt == rootmnt) 1475 break; 1476 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 1477 /* Global root? */ 1478 spin_lock(&vfsmount_lock); 1479 if (vfsmnt->mnt_parent == vfsmnt) { 1480 spin_unlock(&vfsmount_lock); 1481 goto global_root; 1482 } 1483 dentry = vfsmnt->mnt_mountpoint; 1484 vfsmnt = vfsmnt->mnt_parent; 1485 spin_unlock(&vfsmount_lock); 1486 continue; 1487 } 1488 parent = dentry->d_parent; 1489 prefetch(parent); 1490 namelen = dentry->d_name.len; 1491 buflen -= namelen + 1; 1492 if (buflen < 0) 1493 goto Elong; 1494 end -= namelen; 1495 memcpy(end, dentry->d_name.name, namelen); 1496 *--end = '/'; 1497 retval = end; 1498 dentry = parent; 1499 } 1500 1501 return retval; 1502 1503 global_root: 1504 namelen = dentry->d_name.len; 1505 buflen -= namelen; 1506 if (buflen < 0) 1507 goto Elong; 1508 retval -= namelen-1; /* hit the slash */ 1509 memcpy(retval, dentry->d_name.name, namelen); 1510 return retval; 1511 Elong: 1512 return ERR_PTR(-ENAMETOOLONG); 1513 } 1514 1515 /* write full pathname into buffer and return start of pathname */ 1516 char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, 1517 char *buf, int buflen) 1518 { 1519 char *res; 1520 struct vfsmount *rootmnt; 1521 struct dentry *root; 1522 1523 read_lock(¤t->fs->lock); 1524 rootmnt = mntget(current->fs->rootmnt); 1525 root = dget(current->fs->root); 1526 read_unlock(¤t->fs->lock); 1527 spin_lock(&dcache_lock); 1528 res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); 1529 spin_unlock(&dcache_lock); 1530 dput(root); 1531 mntput(rootmnt); 1532 return res; 1533 } 1534 1535 /* 1536 * NOTE! The user-level library version returns a 1537 * character pointer. The kernel system call just 1538 * returns the length of the buffer filled (which 1539 * includes the ending '\0' character), or a negative 1540 * error value. So libc would do something like 1541 * 1542 * char *getcwd(char * buf, size_t size) 1543 * { 1544 * int retval; 1545 * 1546 * retval = sys_getcwd(buf, size); 1547 * if (retval >= 0) 1548 * return buf; 1549 * errno = -retval; 1550 * return NULL; 1551 * } 1552 */ 1553 asmlinkage long sys_getcwd(char __user *buf, unsigned long size) 1554 { 1555 int error; 1556 struct vfsmount *pwdmnt, *rootmnt; 1557 struct dentry *pwd, *root; 1558 char *page = (char *) __get_free_page(GFP_USER); 1559 1560 if (!page) 1561 return -ENOMEM; 1562 1563 read_lock(¤t->fs->lock); 1564 pwdmnt = mntget(current->fs->pwdmnt); 1565 pwd = dget(current->fs->pwd); 1566 rootmnt = mntget(current->fs->rootmnt); 1567 root = dget(current->fs->root); 1568 read_unlock(¤t->fs->lock); 1569 1570 error = -ENOENT; 1571 /* Has the current directory has been unlinked? */ 1572 spin_lock(&dcache_lock); 1573 if (pwd->d_parent == pwd || !d_unhashed(pwd)) { 1574 unsigned long len; 1575 char * cwd; 1576 1577 cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE); 1578 spin_unlock(&dcache_lock); 1579 1580 error = PTR_ERR(cwd); 1581 if (IS_ERR(cwd)) 1582 goto out; 1583 1584 error = -ERANGE; 1585 len = PAGE_SIZE + page - cwd; 1586 if (len <= size) { 1587 error = len; 1588 if (copy_to_user(buf, cwd, len)) 1589 error = -EFAULT; 1590 } 1591 } else 1592 spin_unlock(&dcache_lock); 1593 1594 out: 1595 dput(pwd); 1596 mntput(pwdmnt); 1597 dput(root); 1598 mntput(rootmnt); 1599 free_page((unsigned long) page); 1600 return error; 1601 } 1602 1603 /* 1604 * Test whether new_dentry is a subdirectory of old_dentry. 1605 * 1606 * Trivially implemented using the dcache structure 1607 */ 1608 1609 /** 1610 * is_subdir - is new dentry a subdirectory of old_dentry 1611 * @new_dentry: new dentry 1612 * @old_dentry: old dentry 1613 * 1614 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 1615 * Returns 0 otherwise. 1616 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 1617 */ 1618 1619 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) 1620 { 1621 int result; 1622 struct dentry * saved = new_dentry; 1623 unsigned long seq; 1624 1625 /* need rcu_readlock to protect against the d_parent trashing due to 1626 * d_move 1627 */ 1628 rcu_read_lock(); 1629 do { 1630 /* for restarting inner loop in case of seq retry */ 1631 new_dentry = saved; 1632 result = 0; 1633 seq = read_seqbegin(&rename_lock); 1634 for (;;) { 1635 if (new_dentry != old_dentry) { 1636 struct dentry * parent = new_dentry->d_parent; 1637 if (parent == new_dentry) 1638 break; 1639 new_dentry = parent; 1640 continue; 1641 } 1642 result = 1; 1643 break; 1644 } 1645 } while (read_seqretry(&rename_lock, seq)); 1646 rcu_read_unlock(); 1647 1648 return result; 1649 } 1650 1651 void d_genocide(struct dentry *root) 1652 { 1653 struct dentry *this_parent = root; 1654 struct list_head *next; 1655 1656 spin_lock(&dcache_lock); 1657 repeat: 1658 next = this_parent->d_subdirs.next; 1659 resume: 1660 while (next != &this_parent->d_subdirs) { 1661 struct list_head *tmp = next; 1662 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1663 next = tmp->next; 1664 if (d_unhashed(dentry)||!dentry->d_inode) 1665 continue; 1666 if (!list_empty(&dentry->d_subdirs)) { 1667 this_parent = dentry; 1668 goto repeat; 1669 } 1670 atomic_dec(&dentry->d_count); 1671 } 1672 if (this_parent != root) { 1673 next = this_parent->d_u.d_child.next; 1674 atomic_dec(&this_parent->d_count); 1675 this_parent = this_parent->d_parent; 1676 goto resume; 1677 } 1678 spin_unlock(&dcache_lock); 1679 } 1680 1681 /** 1682 * find_inode_number - check for dentry with name 1683 * @dir: directory to check 1684 * @name: Name to find. 1685 * 1686 * Check whether a dentry already exists for the given name, 1687 * and return the inode number if it has an inode. Otherwise 1688 * 0 is returned. 1689 * 1690 * This routine is used to post-process directory listings for 1691 * filesystems using synthetic inode numbers, and is necessary 1692 * to keep getcwd() working. 1693 */ 1694 1695 ino_t find_inode_number(struct dentry *dir, struct qstr *name) 1696 { 1697 struct dentry * dentry; 1698 ino_t ino = 0; 1699 1700 dentry = d_hash_and_lookup(dir, name); 1701 if (dentry) { 1702 if (dentry->d_inode) 1703 ino = dentry->d_inode->i_ino; 1704 dput(dentry); 1705 } 1706 return ino; 1707 } 1708 1709 static __initdata unsigned long dhash_entries; 1710 static int __init set_dhash_entries(char *str) 1711 { 1712 if (!str) 1713 return 0; 1714 dhash_entries = simple_strtoul(str, &str, 0); 1715 return 1; 1716 } 1717 __setup("dhash_entries=", set_dhash_entries); 1718 1719 static void __init dcache_init_early(void) 1720 { 1721 int loop; 1722 1723 /* If hashes are distributed across NUMA nodes, defer 1724 * hash allocation until vmalloc space is available. 1725 */ 1726 if (hashdist) 1727 return; 1728 1729 dentry_hashtable = 1730 alloc_large_system_hash("Dentry cache", 1731 sizeof(struct hlist_head), 1732 dhash_entries, 1733 13, 1734 HASH_EARLY, 1735 &d_hash_shift, 1736 &d_hash_mask, 1737 0); 1738 1739 for (loop = 0; loop < (1 << d_hash_shift); loop++) 1740 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 1741 } 1742 1743 static void __init dcache_init(unsigned long mempages) 1744 { 1745 int loop; 1746 1747 /* 1748 * A constructor could be added for stable state like the lists, 1749 * but it is probably not worth it because of the cache nature 1750 * of the dcache. 1751 */ 1752 dentry_cache = kmem_cache_create("dentry_cache", 1753 sizeof(struct dentry), 1754 0, 1755 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1756 SLAB_MEM_SPREAD), 1757 NULL, NULL); 1758 1759 set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); 1760 1761 /* Hash may have been set up in dcache_init_early */ 1762 if (!hashdist) 1763 return; 1764 1765 dentry_hashtable = 1766 alloc_large_system_hash("Dentry cache", 1767 sizeof(struct hlist_head), 1768 dhash_entries, 1769 13, 1770 0, 1771 &d_hash_shift, 1772 &d_hash_mask, 1773 0); 1774 1775 for (loop = 0; loop < (1 << d_hash_shift); loop++) 1776 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 1777 } 1778 1779 /* SLAB cache for __getname() consumers */ 1780 kmem_cache_t *names_cachep __read_mostly; 1781 1782 /* SLAB cache for file structures */ 1783 kmem_cache_t *filp_cachep __read_mostly; 1784 1785 EXPORT_SYMBOL(d_genocide); 1786 1787 extern void bdev_cache_init(void); 1788 extern void chrdev_init(void); 1789 1790 void __init vfs_caches_init_early(void) 1791 { 1792 dcache_init_early(); 1793 inode_init_early(); 1794 } 1795 1796 void __init vfs_caches_init(unsigned long mempages) 1797 { 1798 unsigned long reserve; 1799 1800 /* Base hash sizes on available memory, with a reserve equal to 1801 150% of current kernel size */ 1802 1803 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 1804 mempages -= reserve; 1805 1806 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 1807 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1808 1809 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, 1810 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1811 1812 dcache_init(mempages); 1813 inode_init(mempages); 1814 files_init(mempages); 1815 mnt_init(mempages); 1816 bdev_cache_init(); 1817 chrdev_init(); 1818 } 1819 1820 EXPORT_SYMBOL(d_alloc); 1821 EXPORT_SYMBOL(d_alloc_anon); 1822 EXPORT_SYMBOL(d_alloc_root); 1823 EXPORT_SYMBOL(d_delete); 1824 EXPORT_SYMBOL(d_find_alias); 1825 EXPORT_SYMBOL(d_instantiate); 1826 EXPORT_SYMBOL(d_invalidate); 1827 EXPORT_SYMBOL(d_lookup); 1828 EXPORT_SYMBOL(d_move); 1829 EXPORT_SYMBOL(d_path); 1830 EXPORT_SYMBOL(d_prune_aliases); 1831 EXPORT_SYMBOL(d_rehash); 1832 EXPORT_SYMBOL(d_splice_alias); 1833 EXPORT_SYMBOL(d_validate); 1834 EXPORT_SYMBOL(dget_locked); 1835 EXPORT_SYMBOL(dput); 1836 EXPORT_SYMBOL(find_inode_number); 1837 EXPORT_SYMBOL(have_submounts); 1838 EXPORT_SYMBOL(names_cachep); 1839 EXPORT_SYMBOL(shrink_dcache_parent); 1840 EXPORT_SYMBOL(shrink_dcache_sb); 1841