1 /* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9 /* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/string.h> 19 #include <linux/mm.h> 20 #include <linux/fs.h> 21 #include <linux/fsnotify.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/hash.h> 25 #include <linux/cache.h> 26 #include <linux/module.h> 27 #include <linux/mount.h> 28 #include <linux/file.h> 29 #include <asm/uaccess.h> 30 #include <linux/security.h> 31 #include <linux/seqlock.h> 32 #include <linux/swap.h> 33 #include <linux/bootmem.h> 34 #include "internal.h" 35 36 37 int sysctl_vfs_cache_pressure __read_mostly = 100; 38 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 39 40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 41 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 42 43 EXPORT_SYMBOL(dcache_lock); 44 45 static struct kmem_cache *dentry_cache __read_mostly; 46 47 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) 48 49 /* 50 * This is the single most critical data structure when it comes 51 * to the dcache: the hashtable for lookups. Somebody should try 52 * to make this good - I've just made it work. 53 * 54 * This hash-function tries to avoid losing too many bits of hash 55 * information, yet avoid using a prime hash-size or similar. 56 */ 57 #define D_HASHBITS d_hash_shift 58 #define D_HASHMASK d_hash_mask 59 60 static unsigned int d_hash_mask __read_mostly; 61 static unsigned int d_hash_shift __read_mostly; 62 static struct hlist_head *dentry_hashtable __read_mostly; 63 static LIST_HEAD(dentry_unused); 64 65 /* Statistics gathering. */ 66 struct dentry_stat_t dentry_stat = { 67 .age_limit = 45, 68 }; 69 70 static void __d_free(struct dentry *dentry) 71 { 72 if (dname_external(dentry)) 73 kfree(dentry->d_name.name); 74 kmem_cache_free(dentry_cache, dentry); 75 } 76 77 static void d_callback(struct rcu_head *head) 78 { 79 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); 80 __d_free(dentry); 81 } 82 83 /* 84 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry 85 * inside dcache_lock. 86 */ 87 static void d_free(struct dentry *dentry) 88 { 89 if (dentry->d_op && dentry->d_op->d_release) 90 dentry->d_op->d_release(dentry); 91 /* if dentry was never inserted into hash, immediate free is OK */ 92 if (hlist_unhashed(&dentry->d_hash)) 93 __d_free(dentry); 94 else 95 call_rcu(&dentry->d_u.d_rcu, d_callback); 96 } 97 98 static void dentry_lru_remove(struct dentry *dentry) 99 { 100 if (!list_empty(&dentry->d_lru)) { 101 list_del_init(&dentry->d_lru); 102 dentry_stat.nr_unused--; 103 } 104 } 105 106 /* 107 * Release the dentry's inode, using the filesystem 108 * d_iput() operation if defined. 109 * Called with dcache_lock and per dentry lock held, drops both. 110 */ 111 static void dentry_iput(struct dentry * dentry) 112 { 113 struct inode *inode = dentry->d_inode; 114 if (inode) { 115 dentry->d_inode = NULL; 116 list_del_init(&dentry->d_alias); 117 spin_unlock(&dentry->d_lock); 118 spin_unlock(&dcache_lock); 119 if (!inode->i_nlink) 120 fsnotify_inoderemove(inode); 121 if (dentry->d_op && dentry->d_op->d_iput) 122 dentry->d_op->d_iput(dentry, inode); 123 else 124 iput(inode); 125 } else { 126 spin_unlock(&dentry->d_lock); 127 spin_unlock(&dcache_lock); 128 } 129 } 130 131 /** 132 * d_kill - kill dentry and return parent 133 * @dentry: dentry to kill 134 * 135 * Called with dcache_lock and d_lock, releases both. The dentry must 136 * already be unhashed and removed from the LRU. 137 * 138 * If this is the root of the dentry tree, return NULL. 139 */ 140 static struct dentry *d_kill(struct dentry *dentry) 141 { 142 struct dentry *parent; 143 144 list_del(&dentry->d_u.d_child); 145 dentry_stat.nr_dentry--; /* For d_free, below */ 146 /*drops the locks, at that point nobody can reach this dentry */ 147 dentry_iput(dentry); 148 parent = dentry->d_parent; 149 d_free(dentry); 150 return dentry == parent ? NULL : parent; 151 } 152 153 /* 154 * This is dput 155 * 156 * This is complicated by the fact that we do not want to put 157 * dentries that are no longer on any hash chain on the unused 158 * list: we'd much rather just get rid of them immediately. 159 * 160 * However, that implies that we have to traverse the dentry 161 * tree upwards to the parents which might _also_ now be 162 * scheduled for deletion (it may have been only waiting for 163 * its last child to go away). 164 * 165 * This tail recursion is done by hand as we don't want to depend 166 * on the compiler to always get this right (gcc generally doesn't). 167 * Real recursion would eat up our stack space. 168 */ 169 170 /* 171 * dput - release a dentry 172 * @dentry: dentry to release 173 * 174 * Release a dentry. This will drop the usage count and if appropriate 175 * call the dentry unlink method as well as removing it from the queues and 176 * releasing its resources. If the parent dentries were scheduled for release 177 * they too may now get deleted. 178 * 179 * no dcache lock, please. 180 */ 181 182 void dput(struct dentry *dentry) 183 { 184 if (!dentry) 185 return; 186 187 repeat: 188 if (atomic_read(&dentry->d_count) == 1) 189 might_sleep(); 190 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) 191 return; 192 193 spin_lock(&dentry->d_lock); 194 if (atomic_read(&dentry->d_count)) { 195 spin_unlock(&dentry->d_lock); 196 spin_unlock(&dcache_lock); 197 return; 198 } 199 200 /* 201 * AV: ->d_delete() is _NOT_ allowed to block now. 202 */ 203 if (dentry->d_op && dentry->d_op->d_delete) { 204 if (dentry->d_op->d_delete(dentry)) 205 goto unhash_it; 206 } 207 /* Unreachable? Get rid of it */ 208 if (d_unhashed(dentry)) 209 goto kill_it; 210 if (list_empty(&dentry->d_lru)) { 211 dentry->d_flags |= DCACHE_REFERENCED; 212 list_add(&dentry->d_lru, &dentry_unused); 213 dentry_stat.nr_unused++; 214 } 215 spin_unlock(&dentry->d_lock); 216 spin_unlock(&dcache_lock); 217 return; 218 219 unhash_it: 220 __d_drop(dentry); 221 kill_it: 222 dentry_lru_remove(dentry); 223 dentry = d_kill(dentry); 224 if (dentry) 225 goto repeat; 226 } 227 228 /** 229 * d_invalidate - invalidate a dentry 230 * @dentry: dentry to invalidate 231 * 232 * Try to invalidate the dentry if it turns out to be 233 * possible. If there are other dentries that can be 234 * reached through this one we can't delete it and we 235 * return -EBUSY. On success we return 0. 236 * 237 * no dcache lock. 238 */ 239 240 int d_invalidate(struct dentry * dentry) 241 { 242 /* 243 * If it's already been dropped, return OK. 244 */ 245 spin_lock(&dcache_lock); 246 if (d_unhashed(dentry)) { 247 spin_unlock(&dcache_lock); 248 return 0; 249 } 250 /* 251 * Check whether to do a partial shrink_dcache 252 * to get rid of unused child entries. 253 */ 254 if (!list_empty(&dentry->d_subdirs)) { 255 spin_unlock(&dcache_lock); 256 shrink_dcache_parent(dentry); 257 spin_lock(&dcache_lock); 258 } 259 260 /* 261 * Somebody else still using it? 262 * 263 * If it's a directory, we can't drop it 264 * for fear of somebody re-populating it 265 * with children (even though dropping it 266 * would make it unreachable from the root, 267 * we might still populate it if it was a 268 * working directory or similar). 269 */ 270 spin_lock(&dentry->d_lock); 271 if (atomic_read(&dentry->d_count) > 1) { 272 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 273 spin_unlock(&dentry->d_lock); 274 spin_unlock(&dcache_lock); 275 return -EBUSY; 276 } 277 } 278 279 __d_drop(dentry); 280 spin_unlock(&dentry->d_lock); 281 spin_unlock(&dcache_lock); 282 return 0; 283 } 284 285 /* This should be called _only_ with dcache_lock held */ 286 287 static inline struct dentry * __dget_locked(struct dentry *dentry) 288 { 289 atomic_inc(&dentry->d_count); 290 dentry_lru_remove(dentry); 291 return dentry; 292 } 293 294 struct dentry * dget_locked(struct dentry *dentry) 295 { 296 return __dget_locked(dentry); 297 } 298 299 /** 300 * d_find_alias - grab a hashed alias of inode 301 * @inode: inode in question 302 * @want_discon: flag, used by d_splice_alias, to request 303 * that only a DISCONNECTED alias be returned. 304 * 305 * If inode has a hashed alias, or is a directory and has any alias, 306 * acquire the reference to alias and return it. Otherwise return NULL. 307 * Notice that if inode is a directory there can be only one alias and 308 * it can be unhashed only if it has no children, or if it is the root 309 * of a filesystem. 310 * 311 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 312 * any other hashed alias over that one unless @want_discon is set, 313 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 314 */ 315 316 static struct dentry * __d_find_alias(struct inode *inode, int want_discon) 317 { 318 struct list_head *head, *next, *tmp; 319 struct dentry *alias, *discon_alias=NULL; 320 321 head = &inode->i_dentry; 322 next = inode->i_dentry.next; 323 while (next != head) { 324 tmp = next; 325 next = tmp->next; 326 prefetch(next); 327 alias = list_entry(tmp, struct dentry, d_alias); 328 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 329 if (IS_ROOT(alias) && 330 (alias->d_flags & DCACHE_DISCONNECTED)) 331 discon_alias = alias; 332 else if (!want_discon) { 333 __dget_locked(alias); 334 return alias; 335 } 336 } 337 } 338 if (discon_alias) 339 __dget_locked(discon_alias); 340 return discon_alias; 341 } 342 343 struct dentry * d_find_alias(struct inode *inode) 344 { 345 struct dentry *de = NULL; 346 347 if (!list_empty(&inode->i_dentry)) { 348 spin_lock(&dcache_lock); 349 de = __d_find_alias(inode, 0); 350 spin_unlock(&dcache_lock); 351 } 352 return de; 353 } 354 355 /* 356 * Try to kill dentries associated with this inode. 357 * WARNING: you must own a reference to inode. 358 */ 359 void d_prune_aliases(struct inode *inode) 360 { 361 struct dentry *dentry; 362 restart: 363 spin_lock(&dcache_lock); 364 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 365 spin_lock(&dentry->d_lock); 366 if (!atomic_read(&dentry->d_count)) { 367 __dget_locked(dentry); 368 __d_drop(dentry); 369 spin_unlock(&dentry->d_lock); 370 spin_unlock(&dcache_lock); 371 dput(dentry); 372 goto restart; 373 } 374 spin_unlock(&dentry->d_lock); 375 } 376 spin_unlock(&dcache_lock); 377 } 378 379 /* 380 * Throw away a dentry - free the inode, dput the parent. This requires that 381 * the LRU list has already been removed. 382 * 383 * Try to prune ancestors as well. This is necessary to prevent 384 * quadratic behavior of shrink_dcache_parent(), but is also expected 385 * to be beneficial in reducing dentry cache fragmentation. 386 * 387 * Called with dcache_lock, drops it and then regains. 388 * Called with dentry->d_lock held, drops it. 389 */ 390 static void prune_one_dentry(struct dentry * dentry) 391 { 392 __d_drop(dentry); 393 dentry = d_kill(dentry); 394 395 /* 396 * Prune ancestors. Locking is simpler than in dput(), 397 * because dcache_lock needs to be taken anyway. 398 */ 399 spin_lock(&dcache_lock); 400 while (dentry) { 401 if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) 402 return; 403 404 if (dentry->d_op && dentry->d_op->d_delete) 405 dentry->d_op->d_delete(dentry); 406 dentry_lru_remove(dentry); 407 __d_drop(dentry); 408 dentry = d_kill(dentry); 409 spin_lock(&dcache_lock); 410 } 411 } 412 413 /** 414 * prune_dcache - shrink the dcache 415 * @count: number of entries to try and free 416 * @sb: if given, ignore dentries for other superblocks 417 * which are being unmounted. 418 * 419 * Shrink the dcache. This is done when we need 420 * more memory, or simply when we need to unmount 421 * something (at which point we need to unuse 422 * all dentries). 423 * 424 * This function may fail to free any resources if 425 * all the dentries are in use. 426 */ 427 428 static void prune_dcache(int count, struct super_block *sb) 429 { 430 spin_lock(&dcache_lock); 431 for (; count ; count--) { 432 struct dentry *dentry; 433 struct list_head *tmp; 434 struct rw_semaphore *s_umount; 435 436 cond_resched_lock(&dcache_lock); 437 438 tmp = dentry_unused.prev; 439 if (sb) { 440 /* Try to find a dentry for this sb, but don't try 441 * too hard, if they aren't near the tail they will 442 * be moved down again soon 443 */ 444 int skip = count; 445 while (skip && tmp != &dentry_unused && 446 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { 447 skip--; 448 tmp = tmp->prev; 449 } 450 } 451 if (tmp == &dentry_unused) 452 break; 453 list_del_init(tmp); 454 prefetch(dentry_unused.prev); 455 dentry_stat.nr_unused--; 456 dentry = list_entry(tmp, struct dentry, d_lru); 457 458 spin_lock(&dentry->d_lock); 459 /* 460 * We found an inuse dentry which was not removed from 461 * dentry_unused because of laziness during lookup. Do not free 462 * it - just keep it off the dentry_unused list. 463 */ 464 if (atomic_read(&dentry->d_count)) { 465 spin_unlock(&dentry->d_lock); 466 continue; 467 } 468 /* If the dentry was recently referenced, don't free it. */ 469 if (dentry->d_flags & DCACHE_REFERENCED) { 470 dentry->d_flags &= ~DCACHE_REFERENCED; 471 list_add(&dentry->d_lru, &dentry_unused); 472 dentry_stat.nr_unused++; 473 spin_unlock(&dentry->d_lock); 474 continue; 475 } 476 /* 477 * If the dentry is not DCACHED_REFERENCED, it is time 478 * to remove it from the dcache, provided the super block is 479 * NULL (which means we are trying to reclaim memory) 480 * or this dentry belongs to the same super block that 481 * we want to shrink. 482 */ 483 /* 484 * If this dentry is for "my" filesystem, then I can prune it 485 * without taking the s_umount lock (I already hold it). 486 */ 487 if (sb && dentry->d_sb == sb) { 488 prune_one_dentry(dentry); 489 continue; 490 } 491 /* 492 * ...otherwise we need to be sure this filesystem isn't being 493 * unmounted, otherwise we could race with 494 * generic_shutdown_super(), and end up holding a reference to 495 * an inode while the filesystem is unmounted. 496 * So we try to get s_umount, and make sure s_root isn't NULL. 497 * (Take a local copy of s_umount to avoid a use-after-free of 498 * `dentry'). 499 */ 500 s_umount = &dentry->d_sb->s_umount; 501 if (down_read_trylock(s_umount)) { 502 if (dentry->d_sb->s_root != NULL) { 503 prune_one_dentry(dentry); 504 up_read(s_umount); 505 continue; 506 } 507 up_read(s_umount); 508 } 509 spin_unlock(&dentry->d_lock); 510 /* 511 * Insert dentry at the head of the list as inserting at the 512 * tail leads to a cycle. 513 */ 514 list_add(&dentry->d_lru, &dentry_unused); 515 dentry_stat.nr_unused++; 516 } 517 spin_unlock(&dcache_lock); 518 } 519 520 /* 521 * Shrink the dcache for the specified super block. 522 * This allows us to unmount a device without disturbing 523 * the dcache for the other devices. 524 * 525 * This implementation makes just two traversals of the 526 * unused list. On the first pass we move the selected 527 * dentries to the most recent end, and on the second 528 * pass we free them. The second pass must restart after 529 * each dput(), but since the target dentries are all at 530 * the end, it's really just a single traversal. 531 */ 532 533 /** 534 * shrink_dcache_sb - shrink dcache for a superblock 535 * @sb: superblock 536 * 537 * Shrink the dcache for the specified super block. This 538 * is used to free the dcache before unmounting a file 539 * system 540 */ 541 542 void shrink_dcache_sb(struct super_block * sb) 543 { 544 struct list_head *tmp, *next; 545 struct dentry *dentry; 546 547 /* 548 * Pass one ... move the dentries for the specified 549 * superblock to the most recent end of the unused list. 550 */ 551 spin_lock(&dcache_lock); 552 list_for_each_prev_safe(tmp, next, &dentry_unused) { 553 dentry = list_entry(tmp, struct dentry, d_lru); 554 if (dentry->d_sb != sb) 555 continue; 556 list_move_tail(tmp, &dentry_unused); 557 } 558 559 /* 560 * Pass two ... free the dentries for this superblock. 561 */ 562 repeat: 563 list_for_each_prev_safe(tmp, next, &dentry_unused) { 564 dentry = list_entry(tmp, struct dentry, d_lru); 565 if (dentry->d_sb != sb) 566 continue; 567 dentry_stat.nr_unused--; 568 list_del_init(tmp); 569 spin_lock(&dentry->d_lock); 570 if (atomic_read(&dentry->d_count)) { 571 spin_unlock(&dentry->d_lock); 572 continue; 573 } 574 prune_one_dentry(dentry); 575 cond_resched_lock(&dcache_lock); 576 goto repeat; 577 } 578 spin_unlock(&dcache_lock); 579 } 580 581 /* 582 * destroy a single subtree of dentries for unmount 583 * - see the comments on shrink_dcache_for_umount() for a description of the 584 * locking 585 */ 586 static void shrink_dcache_for_umount_subtree(struct dentry *dentry) 587 { 588 struct dentry *parent; 589 unsigned detached = 0; 590 591 BUG_ON(!IS_ROOT(dentry)); 592 593 /* detach this root from the system */ 594 spin_lock(&dcache_lock); 595 dentry_lru_remove(dentry); 596 __d_drop(dentry); 597 spin_unlock(&dcache_lock); 598 599 for (;;) { 600 /* descend to the first leaf in the current subtree */ 601 while (!list_empty(&dentry->d_subdirs)) { 602 struct dentry *loop; 603 604 /* this is a branch with children - detach all of them 605 * from the system in one go */ 606 spin_lock(&dcache_lock); 607 list_for_each_entry(loop, &dentry->d_subdirs, 608 d_u.d_child) { 609 dentry_lru_remove(loop); 610 __d_drop(loop); 611 cond_resched_lock(&dcache_lock); 612 } 613 spin_unlock(&dcache_lock); 614 615 /* move to the first child */ 616 dentry = list_entry(dentry->d_subdirs.next, 617 struct dentry, d_u.d_child); 618 } 619 620 /* consume the dentries from this leaf up through its parents 621 * until we find one with children or run out altogether */ 622 do { 623 struct inode *inode; 624 625 if (atomic_read(&dentry->d_count) != 0) { 626 printk(KERN_ERR 627 "BUG: Dentry %p{i=%lx,n=%s}" 628 " still in use (%d)" 629 " [unmount of %s %s]\n", 630 dentry, 631 dentry->d_inode ? 632 dentry->d_inode->i_ino : 0UL, 633 dentry->d_name.name, 634 atomic_read(&dentry->d_count), 635 dentry->d_sb->s_type->name, 636 dentry->d_sb->s_id); 637 BUG(); 638 } 639 640 parent = dentry->d_parent; 641 if (parent == dentry) 642 parent = NULL; 643 else 644 atomic_dec(&parent->d_count); 645 646 list_del(&dentry->d_u.d_child); 647 detached++; 648 649 inode = dentry->d_inode; 650 if (inode) { 651 dentry->d_inode = NULL; 652 list_del_init(&dentry->d_alias); 653 if (dentry->d_op && dentry->d_op->d_iput) 654 dentry->d_op->d_iput(dentry, inode); 655 else 656 iput(inode); 657 } 658 659 d_free(dentry); 660 661 /* finished when we fall off the top of the tree, 662 * otherwise we ascend to the parent and move to the 663 * next sibling if there is one */ 664 if (!parent) 665 goto out; 666 667 dentry = parent; 668 669 } while (list_empty(&dentry->d_subdirs)); 670 671 dentry = list_entry(dentry->d_subdirs.next, 672 struct dentry, d_u.d_child); 673 } 674 out: 675 /* several dentries were freed, need to correct nr_dentry */ 676 spin_lock(&dcache_lock); 677 dentry_stat.nr_dentry -= detached; 678 spin_unlock(&dcache_lock); 679 } 680 681 /* 682 * destroy the dentries attached to a superblock on unmounting 683 * - we don't need to use dentry->d_lock, and only need dcache_lock when 684 * removing the dentry from the system lists and hashes because: 685 * - the superblock is detached from all mountings and open files, so the 686 * dentry trees will not be rearranged by the VFS 687 * - s_umount is write-locked, so the memory pressure shrinker will ignore 688 * any dentries belonging to this superblock that it comes across 689 * - the filesystem itself is no longer permitted to rearrange the dentries 690 * in this superblock 691 */ 692 void shrink_dcache_for_umount(struct super_block *sb) 693 { 694 struct dentry *dentry; 695 696 if (down_read_trylock(&sb->s_umount)) 697 BUG(); 698 699 dentry = sb->s_root; 700 sb->s_root = NULL; 701 atomic_dec(&dentry->d_count); 702 shrink_dcache_for_umount_subtree(dentry); 703 704 while (!hlist_empty(&sb->s_anon)) { 705 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); 706 shrink_dcache_for_umount_subtree(dentry); 707 } 708 } 709 710 /* 711 * Search for at least 1 mount point in the dentry's subdirs. 712 * We descend to the next level whenever the d_subdirs 713 * list is non-empty and continue searching. 714 */ 715 716 /** 717 * have_submounts - check for mounts over a dentry 718 * @parent: dentry to check. 719 * 720 * Return true if the parent or its subdirectories contain 721 * a mount point 722 */ 723 724 int have_submounts(struct dentry *parent) 725 { 726 struct dentry *this_parent = parent; 727 struct list_head *next; 728 729 spin_lock(&dcache_lock); 730 if (d_mountpoint(parent)) 731 goto positive; 732 repeat: 733 next = this_parent->d_subdirs.next; 734 resume: 735 while (next != &this_parent->d_subdirs) { 736 struct list_head *tmp = next; 737 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 738 next = tmp->next; 739 /* Have we found a mount point ? */ 740 if (d_mountpoint(dentry)) 741 goto positive; 742 if (!list_empty(&dentry->d_subdirs)) { 743 this_parent = dentry; 744 goto repeat; 745 } 746 } 747 /* 748 * All done at this level ... ascend and resume the search. 749 */ 750 if (this_parent != parent) { 751 next = this_parent->d_u.d_child.next; 752 this_parent = this_parent->d_parent; 753 goto resume; 754 } 755 spin_unlock(&dcache_lock); 756 return 0; /* No mount points found in tree */ 757 positive: 758 spin_unlock(&dcache_lock); 759 return 1; 760 } 761 762 /* 763 * Search the dentry child list for the specified parent, 764 * and move any unused dentries to the end of the unused 765 * list for prune_dcache(). We descend to the next level 766 * whenever the d_subdirs list is non-empty and continue 767 * searching. 768 * 769 * It returns zero iff there are no unused children, 770 * otherwise it returns the number of children moved to 771 * the end of the unused list. This may not be the total 772 * number of unused children, because select_parent can 773 * drop the lock and return early due to latency 774 * constraints. 775 */ 776 static int select_parent(struct dentry * parent) 777 { 778 struct dentry *this_parent = parent; 779 struct list_head *next; 780 int found = 0; 781 782 spin_lock(&dcache_lock); 783 repeat: 784 next = this_parent->d_subdirs.next; 785 resume: 786 while (next != &this_parent->d_subdirs) { 787 struct list_head *tmp = next; 788 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 789 next = tmp->next; 790 791 dentry_lru_remove(dentry); 792 /* 793 * move only zero ref count dentries to the end 794 * of the unused list for prune_dcache 795 */ 796 if (!atomic_read(&dentry->d_count)) { 797 list_add_tail(&dentry->d_lru, &dentry_unused); 798 dentry_stat.nr_unused++; 799 found++; 800 } 801 802 /* 803 * We can return to the caller if we have found some (this 804 * ensures forward progress). We'll be coming back to find 805 * the rest. 806 */ 807 if (found && need_resched()) 808 goto out; 809 810 /* 811 * Descend a level if the d_subdirs list is non-empty. 812 */ 813 if (!list_empty(&dentry->d_subdirs)) { 814 this_parent = dentry; 815 goto repeat; 816 } 817 } 818 /* 819 * All done at this level ... ascend and resume the search. 820 */ 821 if (this_parent != parent) { 822 next = this_parent->d_u.d_child.next; 823 this_parent = this_parent->d_parent; 824 goto resume; 825 } 826 out: 827 spin_unlock(&dcache_lock); 828 return found; 829 } 830 831 /** 832 * shrink_dcache_parent - prune dcache 833 * @parent: parent of entries to prune 834 * 835 * Prune the dcache to remove unused children of the parent dentry. 836 */ 837 838 void shrink_dcache_parent(struct dentry * parent) 839 { 840 int found; 841 842 while ((found = select_parent(parent)) != 0) 843 prune_dcache(found, parent->d_sb); 844 } 845 846 /* 847 * Scan `nr' dentries and return the number which remain. 848 * 849 * We need to avoid reentering the filesystem if the caller is performing a 850 * GFP_NOFS allocation attempt. One example deadlock is: 851 * 852 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> 853 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> 854 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. 855 * 856 * In this case we return -1 to tell the caller that we baled. 857 */ 858 static int shrink_dcache_memory(int nr, gfp_t gfp_mask) 859 { 860 if (nr) { 861 if (!(gfp_mask & __GFP_FS)) 862 return -1; 863 prune_dcache(nr, NULL); 864 } 865 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 866 } 867 868 static struct shrinker dcache_shrinker = { 869 .shrink = shrink_dcache_memory, 870 .seeks = DEFAULT_SEEKS, 871 }; 872 873 /** 874 * d_alloc - allocate a dcache entry 875 * @parent: parent of entry to allocate 876 * @name: qstr of the name 877 * 878 * Allocates a dentry. It returns %NULL if there is insufficient memory 879 * available. On a success the dentry is returned. The name passed in is 880 * copied and the copy passed in may be reused after this call. 881 */ 882 883 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 884 { 885 struct dentry *dentry; 886 char *dname; 887 888 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 889 if (!dentry) 890 return NULL; 891 892 if (name->len > DNAME_INLINE_LEN-1) { 893 dname = kmalloc(name->len + 1, GFP_KERNEL); 894 if (!dname) { 895 kmem_cache_free(dentry_cache, dentry); 896 return NULL; 897 } 898 } else { 899 dname = dentry->d_iname; 900 } 901 dentry->d_name.name = dname; 902 903 dentry->d_name.len = name->len; 904 dentry->d_name.hash = name->hash; 905 memcpy(dname, name->name, name->len); 906 dname[name->len] = 0; 907 908 atomic_set(&dentry->d_count, 1); 909 dentry->d_flags = DCACHE_UNHASHED; 910 spin_lock_init(&dentry->d_lock); 911 dentry->d_inode = NULL; 912 dentry->d_parent = NULL; 913 dentry->d_sb = NULL; 914 dentry->d_op = NULL; 915 dentry->d_fsdata = NULL; 916 dentry->d_mounted = 0; 917 #ifdef CONFIG_PROFILING 918 dentry->d_cookie = NULL; 919 #endif 920 INIT_HLIST_NODE(&dentry->d_hash); 921 INIT_LIST_HEAD(&dentry->d_lru); 922 INIT_LIST_HEAD(&dentry->d_subdirs); 923 INIT_LIST_HEAD(&dentry->d_alias); 924 925 if (parent) { 926 dentry->d_parent = dget(parent); 927 dentry->d_sb = parent->d_sb; 928 } else { 929 INIT_LIST_HEAD(&dentry->d_u.d_child); 930 } 931 932 spin_lock(&dcache_lock); 933 if (parent) 934 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 935 dentry_stat.nr_dentry++; 936 spin_unlock(&dcache_lock); 937 938 return dentry; 939 } 940 941 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 942 { 943 struct qstr q; 944 945 q.name = name; 946 q.len = strlen(name); 947 q.hash = full_name_hash(q.name, q.len); 948 return d_alloc(parent, &q); 949 } 950 951 /** 952 * d_instantiate - fill in inode information for a dentry 953 * @entry: dentry to complete 954 * @inode: inode to attach to this dentry 955 * 956 * Fill in inode information in the entry. 957 * 958 * This turns negative dentries into productive full members 959 * of society. 960 * 961 * NOTE! This assumes that the inode count has been incremented 962 * (or otherwise set) by the caller to indicate that it is now 963 * in use by the dcache. 964 */ 965 966 void d_instantiate(struct dentry *entry, struct inode * inode) 967 { 968 BUG_ON(!list_empty(&entry->d_alias)); 969 spin_lock(&dcache_lock); 970 if (inode) 971 list_add(&entry->d_alias, &inode->i_dentry); 972 entry->d_inode = inode; 973 fsnotify_d_instantiate(entry, inode); 974 spin_unlock(&dcache_lock); 975 security_d_instantiate(entry, inode); 976 } 977 978 /** 979 * d_instantiate_unique - instantiate a non-aliased dentry 980 * @entry: dentry to instantiate 981 * @inode: inode to attach to this dentry 982 * 983 * Fill in inode information in the entry. On success, it returns NULL. 984 * If an unhashed alias of "entry" already exists, then we return the 985 * aliased dentry instead and drop one reference to inode. 986 * 987 * Note that in order to avoid conflicts with rename() etc, the caller 988 * had better be holding the parent directory semaphore. 989 * 990 * This also assumes that the inode count has been incremented 991 * (or otherwise set) by the caller to indicate that it is now 992 * in use by the dcache. 993 */ 994 static struct dentry *__d_instantiate_unique(struct dentry *entry, 995 struct inode *inode) 996 { 997 struct dentry *alias; 998 int len = entry->d_name.len; 999 const char *name = entry->d_name.name; 1000 unsigned int hash = entry->d_name.hash; 1001 1002 if (!inode) { 1003 entry->d_inode = NULL; 1004 return NULL; 1005 } 1006 1007 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1008 struct qstr *qstr = &alias->d_name; 1009 1010 if (qstr->hash != hash) 1011 continue; 1012 if (alias->d_parent != entry->d_parent) 1013 continue; 1014 if (qstr->len != len) 1015 continue; 1016 if (memcmp(qstr->name, name, len)) 1017 continue; 1018 dget_locked(alias); 1019 return alias; 1020 } 1021 1022 list_add(&entry->d_alias, &inode->i_dentry); 1023 entry->d_inode = inode; 1024 fsnotify_d_instantiate(entry, inode); 1025 return NULL; 1026 } 1027 1028 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 1029 { 1030 struct dentry *result; 1031 1032 BUG_ON(!list_empty(&entry->d_alias)); 1033 1034 spin_lock(&dcache_lock); 1035 result = __d_instantiate_unique(entry, inode); 1036 spin_unlock(&dcache_lock); 1037 1038 if (!result) { 1039 security_d_instantiate(entry, inode); 1040 return NULL; 1041 } 1042 1043 BUG_ON(!d_unhashed(result)); 1044 iput(inode); 1045 return result; 1046 } 1047 1048 EXPORT_SYMBOL(d_instantiate_unique); 1049 1050 /** 1051 * d_alloc_root - allocate root dentry 1052 * @root_inode: inode to allocate the root for 1053 * 1054 * Allocate a root ("/") dentry for the inode given. The inode is 1055 * instantiated and returned. %NULL is returned if there is insufficient 1056 * memory or the inode passed is %NULL. 1057 */ 1058 1059 struct dentry * d_alloc_root(struct inode * root_inode) 1060 { 1061 struct dentry *res = NULL; 1062 1063 if (root_inode) { 1064 static const struct qstr name = { .name = "/", .len = 1 }; 1065 1066 res = d_alloc(NULL, &name); 1067 if (res) { 1068 res->d_sb = root_inode->i_sb; 1069 res->d_parent = res; 1070 d_instantiate(res, root_inode); 1071 } 1072 } 1073 return res; 1074 } 1075 1076 static inline struct hlist_head *d_hash(struct dentry *parent, 1077 unsigned long hash) 1078 { 1079 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 1080 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 1081 return dentry_hashtable + (hash & D_HASHMASK); 1082 } 1083 1084 /** 1085 * d_alloc_anon - allocate an anonymous dentry 1086 * @inode: inode to allocate the dentry for 1087 * 1088 * This is similar to d_alloc_root. It is used by filesystems when 1089 * creating a dentry for a given inode, often in the process of 1090 * mapping a filehandle to a dentry. The returned dentry may be 1091 * anonymous, or may have a full name (if the inode was already 1092 * in the cache). The file system may need to make further 1093 * efforts to connect this dentry into the dcache properly. 1094 * 1095 * When called on a directory inode, we must ensure that 1096 * the inode only ever has one dentry. If a dentry is 1097 * found, that is returned instead of allocating a new one. 1098 * 1099 * On successful return, the reference to the inode has been transferred 1100 * to the dentry. If %NULL is returned (indicating kmalloc failure), 1101 * the reference on the inode has not been released. 1102 */ 1103 1104 struct dentry * d_alloc_anon(struct inode *inode) 1105 { 1106 static const struct qstr anonstring = { .name = "" }; 1107 struct dentry *tmp; 1108 struct dentry *res; 1109 1110 if ((res = d_find_alias(inode))) { 1111 iput(inode); 1112 return res; 1113 } 1114 1115 tmp = d_alloc(NULL, &anonstring); 1116 if (!tmp) 1117 return NULL; 1118 1119 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1120 1121 spin_lock(&dcache_lock); 1122 res = __d_find_alias(inode, 0); 1123 if (!res) { 1124 /* attach a disconnected dentry */ 1125 res = tmp; 1126 tmp = NULL; 1127 spin_lock(&res->d_lock); 1128 res->d_sb = inode->i_sb; 1129 res->d_parent = res; 1130 res->d_inode = inode; 1131 res->d_flags |= DCACHE_DISCONNECTED; 1132 res->d_flags &= ~DCACHE_UNHASHED; 1133 list_add(&res->d_alias, &inode->i_dentry); 1134 hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); 1135 spin_unlock(&res->d_lock); 1136 1137 inode = NULL; /* don't drop reference */ 1138 } 1139 spin_unlock(&dcache_lock); 1140 1141 if (inode) 1142 iput(inode); 1143 if (tmp) 1144 dput(tmp); 1145 return res; 1146 } 1147 1148 1149 /** 1150 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1151 * @inode: the inode which may have a disconnected dentry 1152 * @dentry: a negative dentry which we want to point to the inode. 1153 * 1154 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 1155 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 1156 * and return it, else simply d_add the inode to the dentry and return NULL. 1157 * 1158 * This is needed in the lookup routine of any filesystem that is exportable 1159 * (via knfsd) so that we can build dcache paths to directories effectively. 1160 * 1161 * If a dentry was found and moved, then it is returned. Otherwise NULL 1162 * is returned. This matches the expected return value of ->lookup. 1163 * 1164 */ 1165 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1166 { 1167 struct dentry *new = NULL; 1168 1169 if (inode && S_ISDIR(inode->i_mode)) { 1170 spin_lock(&dcache_lock); 1171 new = __d_find_alias(inode, 1); 1172 if (new) { 1173 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1174 fsnotify_d_instantiate(new, inode); 1175 spin_unlock(&dcache_lock); 1176 security_d_instantiate(new, inode); 1177 d_rehash(dentry); 1178 d_move(new, dentry); 1179 iput(inode); 1180 } else { 1181 /* d_instantiate takes dcache_lock, so we do it by hand */ 1182 list_add(&dentry->d_alias, &inode->i_dentry); 1183 dentry->d_inode = inode; 1184 fsnotify_d_instantiate(dentry, inode); 1185 spin_unlock(&dcache_lock); 1186 security_d_instantiate(dentry, inode); 1187 d_rehash(dentry); 1188 } 1189 } else 1190 d_add(dentry, inode); 1191 return new; 1192 } 1193 1194 1195 /** 1196 * d_lookup - search for a dentry 1197 * @parent: parent dentry 1198 * @name: qstr of name we wish to find 1199 * 1200 * Searches the children of the parent dentry for the name in question. If 1201 * the dentry is found its reference count is incremented and the dentry 1202 * is returned. The caller must use d_put to free the entry when it has 1203 * finished using it. %NULL is returned on failure. 1204 * 1205 * __d_lookup is dcache_lock free. The hash list is protected using RCU. 1206 * Memory barriers are used while updating and doing lockless traversal. 1207 * To avoid races with d_move while rename is happening, d_lock is used. 1208 * 1209 * Overflows in memcmp(), while d_move, are avoided by keeping the length 1210 * and name pointer in one structure pointed by d_qstr. 1211 * 1212 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while 1213 * lookup is going on. 1214 * 1215 * dentry_unused list is not updated even if lookup finds the required dentry 1216 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, 1217 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock 1218 * acquisition. 1219 * 1220 * d_lookup() is protected against the concurrent renames in some unrelated 1221 * directory using the seqlockt_t rename_lock. 1222 */ 1223 1224 struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1225 { 1226 struct dentry * dentry = NULL; 1227 unsigned long seq; 1228 1229 do { 1230 seq = read_seqbegin(&rename_lock); 1231 dentry = __d_lookup(parent, name); 1232 if (dentry) 1233 break; 1234 } while (read_seqretry(&rename_lock, seq)); 1235 return dentry; 1236 } 1237 1238 struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1239 { 1240 unsigned int len = name->len; 1241 unsigned int hash = name->hash; 1242 const unsigned char *str = name->name; 1243 struct hlist_head *head = d_hash(parent,hash); 1244 struct dentry *found = NULL; 1245 struct hlist_node *node; 1246 struct dentry *dentry; 1247 1248 rcu_read_lock(); 1249 1250 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1251 struct qstr *qstr; 1252 1253 if (dentry->d_name.hash != hash) 1254 continue; 1255 if (dentry->d_parent != parent) 1256 continue; 1257 1258 spin_lock(&dentry->d_lock); 1259 1260 /* 1261 * Recheck the dentry after taking the lock - d_move may have 1262 * changed things. Don't bother checking the hash because we're 1263 * about to compare the whole name anyway. 1264 */ 1265 if (dentry->d_parent != parent) 1266 goto next; 1267 1268 /* 1269 * It is safe to compare names since d_move() cannot 1270 * change the qstr (protected by d_lock). 1271 */ 1272 qstr = &dentry->d_name; 1273 if (parent->d_op && parent->d_op->d_compare) { 1274 if (parent->d_op->d_compare(parent, qstr, name)) 1275 goto next; 1276 } else { 1277 if (qstr->len != len) 1278 goto next; 1279 if (memcmp(qstr->name, str, len)) 1280 goto next; 1281 } 1282 1283 if (!d_unhashed(dentry)) { 1284 atomic_inc(&dentry->d_count); 1285 found = dentry; 1286 } 1287 spin_unlock(&dentry->d_lock); 1288 break; 1289 next: 1290 spin_unlock(&dentry->d_lock); 1291 } 1292 rcu_read_unlock(); 1293 1294 return found; 1295 } 1296 1297 /** 1298 * d_hash_and_lookup - hash the qstr then search for a dentry 1299 * @dir: Directory to search in 1300 * @name: qstr of name we wish to find 1301 * 1302 * On hash failure or on lookup failure NULL is returned. 1303 */ 1304 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1305 { 1306 struct dentry *dentry = NULL; 1307 1308 /* 1309 * Check for a fs-specific hash function. Note that we must 1310 * calculate the standard hash first, as the d_op->d_hash() 1311 * routine may choose to leave the hash value unchanged. 1312 */ 1313 name->hash = full_name_hash(name->name, name->len); 1314 if (dir->d_op && dir->d_op->d_hash) { 1315 if (dir->d_op->d_hash(dir, name) < 0) 1316 goto out; 1317 } 1318 dentry = d_lookup(dir, name); 1319 out: 1320 return dentry; 1321 } 1322 1323 /** 1324 * d_validate - verify dentry provided from insecure source 1325 * @dentry: The dentry alleged to be valid child of @dparent 1326 * @dparent: The parent dentry (known to be valid) 1327 * @hash: Hash of the dentry 1328 * @len: Length of the name 1329 * 1330 * An insecure source has sent us a dentry, here we verify it and dget() it. 1331 * This is used by ncpfs in its readdir implementation. 1332 * Zero is returned in the dentry is invalid. 1333 */ 1334 1335 int d_validate(struct dentry *dentry, struct dentry *dparent) 1336 { 1337 struct hlist_head *base; 1338 struct hlist_node *lhp; 1339 1340 /* Check whether the ptr might be valid at all.. */ 1341 if (!kmem_ptr_validate(dentry_cache, dentry)) 1342 goto out; 1343 1344 if (dentry->d_parent != dparent) 1345 goto out; 1346 1347 spin_lock(&dcache_lock); 1348 base = d_hash(dparent, dentry->d_name.hash); 1349 hlist_for_each(lhp,base) { 1350 /* hlist_for_each_entry_rcu() not required for d_hash list 1351 * as it is parsed under dcache_lock 1352 */ 1353 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { 1354 __dget_locked(dentry); 1355 spin_unlock(&dcache_lock); 1356 return 1; 1357 } 1358 } 1359 spin_unlock(&dcache_lock); 1360 out: 1361 return 0; 1362 } 1363 1364 /* 1365 * When a file is deleted, we have two options: 1366 * - turn this dentry into a negative dentry 1367 * - unhash this dentry and free it. 1368 * 1369 * Usually, we want to just turn this into 1370 * a negative dentry, but if anybody else is 1371 * currently using the dentry or the inode 1372 * we can't do that and we fall back on removing 1373 * it from the hash queues and waiting for 1374 * it to be deleted later when it has no users 1375 */ 1376 1377 /** 1378 * d_delete - delete a dentry 1379 * @dentry: The dentry to delete 1380 * 1381 * Turn the dentry into a negative dentry if possible, otherwise 1382 * remove it from the hash queues so it can be deleted later 1383 */ 1384 1385 void d_delete(struct dentry * dentry) 1386 { 1387 int isdir = 0; 1388 /* 1389 * Are we the only user? 1390 */ 1391 spin_lock(&dcache_lock); 1392 spin_lock(&dentry->d_lock); 1393 isdir = S_ISDIR(dentry->d_inode->i_mode); 1394 if (atomic_read(&dentry->d_count) == 1) { 1395 dentry_iput(dentry); 1396 fsnotify_nameremove(dentry, isdir); 1397 return; 1398 } 1399 1400 if (!d_unhashed(dentry)) 1401 __d_drop(dentry); 1402 1403 spin_unlock(&dentry->d_lock); 1404 spin_unlock(&dcache_lock); 1405 1406 fsnotify_nameremove(dentry, isdir); 1407 } 1408 1409 static void __d_rehash(struct dentry * entry, struct hlist_head *list) 1410 { 1411 1412 entry->d_flags &= ~DCACHE_UNHASHED; 1413 hlist_add_head_rcu(&entry->d_hash, list); 1414 } 1415 1416 static void _d_rehash(struct dentry * entry) 1417 { 1418 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); 1419 } 1420 1421 /** 1422 * d_rehash - add an entry back to the hash 1423 * @entry: dentry to add to the hash 1424 * 1425 * Adds a dentry to the hash according to its name. 1426 */ 1427 1428 void d_rehash(struct dentry * entry) 1429 { 1430 spin_lock(&dcache_lock); 1431 spin_lock(&entry->d_lock); 1432 _d_rehash(entry); 1433 spin_unlock(&entry->d_lock); 1434 spin_unlock(&dcache_lock); 1435 } 1436 1437 #define do_switch(x,y) do { \ 1438 __typeof__ (x) __tmp = x; \ 1439 x = y; y = __tmp; } while (0) 1440 1441 /* 1442 * When switching names, the actual string doesn't strictly have to 1443 * be preserved in the target - because we're dropping the target 1444 * anyway. As such, we can just do a simple memcpy() to copy over 1445 * the new name before we switch. 1446 * 1447 * Note that we have to be a lot more careful about getting the hash 1448 * switched - we have to switch the hash value properly even if it 1449 * then no longer matches the actual (corrupted) string of the target. 1450 * The hash value has to match the hash queue that the dentry is on.. 1451 */ 1452 static void switch_names(struct dentry *dentry, struct dentry *target) 1453 { 1454 if (dname_external(target)) { 1455 if (dname_external(dentry)) { 1456 /* 1457 * Both external: swap the pointers 1458 */ 1459 do_switch(target->d_name.name, dentry->d_name.name); 1460 } else { 1461 /* 1462 * dentry:internal, target:external. Steal target's 1463 * storage and make target internal. 1464 */ 1465 memcpy(target->d_iname, dentry->d_name.name, 1466 dentry->d_name.len + 1); 1467 dentry->d_name.name = target->d_name.name; 1468 target->d_name.name = target->d_iname; 1469 } 1470 } else { 1471 if (dname_external(dentry)) { 1472 /* 1473 * dentry:external, target:internal. Give dentry's 1474 * storage to target and make dentry internal 1475 */ 1476 memcpy(dentry->d_iname, target->d_name.name, 1477 target->d_name.len + 1); 1478 target->d_name.name = dentry->d_name.name; 1479 dentry->d_name.name = dentry->d_iname; 1480 } else { 1481 /* 1482 * Both are internal. Just copy target to dentry 1483 */ 1484 memcpy(dentry->d_iname, target->d_name.name, 1485 target->d_name.len + 1); 1486 } 1487 } 1488 } 1489 1490 /* 1491 * We cannibalize "target" when moving dentry on top of it, 1492 * because it's going to be thrown away anyway. We could be more 1493 * polite about it, though. 1494 * 1495 * This forceful removal will result in ugly /proc output if 1496 * somebody holds a file open that got deleted due to a rename. 1497 * We could be nicer about the deleted file, and let it show 1498 * up under the name it had before it was deleted rather than 1499 * under the original name of the file that was moved on top of it. 1500 */ 1501 1502 /* 1503 * d_move_locked - move a dentry 1504 * @dentry: entry to move 1505 * @target: new dentry 1506 * 1507 * Update the dcache to reflect the move of a file name. Negative 1508 * dcache entries should not be moved in this way. 1509 */ 1510 static void d_move_locked(struct dentry * dentry, struct dentry * target) 1511 { 1512 struct hlist_head *list; 1513 1514 if (!dentry->d_inode) 1515 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 1516 1517 write_seqlock(&rename_lock); 1518 /* 1519 * XXXX: do we really need to take target->d_lock? 1520 */ 1521 if (target < dentry) { 1522 spin_lock(&target->d_lock); 1523 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1524 } else { 1525 spin_lock(&dentry->d_lock); 1526 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); 1527 } 1528 1529 /* Move the dentry to the target hash queue, if on different bucket */ 1530 if (d_unhashed(dentry)) 1531 goto already_unhashed; 1532 1533 hlist_del_rcu(&dentry->d_hash); 1534 1535 already_unhashed: 1536 list = d_hash(target->d_parent, target->d_name.hash); 1537 __d_rehash(dentry, list); 1538 1539 /* Unhash the target: dput() will then get rid of it */ 1540 __d_drop(target); 1541 1542 list_del(&dentry->d_u.d_child); 1543 list_del(&target->d_u.d_child); 1544 1545 /* Switch the names.. */ 1546 switch_names(dentry, target); 1547 do_switch(dentry->d_name.len, target->d_name.len); 1548 do_switch(dentry->d_name.hash, target->d_name.hash); 1549 1550 /* ... and switch the parents */ 1551 if (IS_ROOT(dentry)) { 1552 dentry->d_parent = target->d_parent; 1553 target->d_parent = target; 1554 INIT_LIST_HEAD(&target->d_u.d_child); 1555 } else { 1556 do_switch(dentry->d_parent, target->d_parent); 1557 1558 /* And add them back to the (new) parent lists */ 1559 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 1560 } 1561 1562 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 1563 spin_unlock(&target->d_lock); 1564 fsnotify_d_move(dentry); 1565 spin_unlock(&dentry->d_lock); 1566 write_sequnlock(&rename_lock); 1567 } 1568 1569 /** 1570 * d_move - move a dentry 1571 * @dentry: entry to move 1572 * @target: new dentry 1573 * 1574 * Update the dcache to reflect the move of a file name. Negative 1575 * dcache entries should not be moved in this way. 1576 */ 1577 1578 void d_move(struct dentry * dentry, struct dentry * target) 1579 { 1580 spin_lock(&dcache_lock); 1581 d_move_locked(dentry, target); 1582 spin_unlock(&dcache_lock); 1583 } 1584 1585 /* 1586 * Helper that returns 1 if p1 is a parent of p2, else 0 1587 */ 1588 static int d_isparent(struct dentry *p1, struct dentry *p2) 1589 { 1590 struct dentry *p; 1591 1592 for (p = p2; p->d_parent != p; p = p->d_parent) { 1593 if (p->d_parent == p1) 1594 return 1; 1595 } 1596 return 0; 1597 } 1598 1599 /* 1600 * This helper attempts to cope with remotely renamed directories 1601 * 1602 * It assumes that the caller is already holding 1603 * dentry->d_parent->d_inode->i_mutex and the dcache_lock 1604 * 1605 * Note: If ever the locking in lock_rename() changes, then please 1606 * remember to update this too... 1607 * 1608 * On return, dcache_lock will have been unlocked. 1609 */ 1610 static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) 1611 { 1612 struct mutex *m1 = NULL, *m2 = NULL; 1613 struct dentry *ret; 1614 1615 /* If alias and dentry share a parent, then no extra locks required */ 1616 if (alias->d_parent == dentry->d_parent) 1617 goto out_unalias; 1618 1619 /* Check for loops */ 1620 ret = ERR_PTR(-ELOOP); 1621 if (d_isparent(alias, dentry)) 1622 goto out_err; 1623 1624 /* See lock_rename() */ 1625 ret = ERR_PTR(-EBUSY); 1626 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 1627 goto out_err; 1628 m1 = &dentry->d_sb->s_vfs_rename_mutex; 1629 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) 1630 goto out_err; 1631 m2 = &alias->d_parent->d_inode->i_mutex; 1632 out_unalias: 1633 d_move_locked(alias, dentry); 1634 ret = alias; 1635 out_err: 1636 spin_unlock(&dcache_lock); 1637 if (m2) 1638 mutex_unlock(m2); 1639 if (m1) 1640 mutex_unlock(m1); 1641 return ret; 1642 } 1643 1644 /* 1645 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 1646 * named dentry in place of the dentry to be replaced. 1647 */ 1648 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 1649 { 1650 struct dentry *dparent, *aparent; 1651 1652 switch_names(dentry, anon); 1653 do_switch(dentry->d_name.len, anon->d_name.len); 1654 do_switch(dentry->d_name.hash, anon->d_name.hash); 1655 1656 dparent = dentry->d_parent; 1657 aparent = anon->d_parent; 1658 1659 dentry->d_parent = (aparent == anon) ? dentry : aparent; 1660 list_del(&dentry->d_u.d_child); 1661 if (!IS_ROOT(dentry)) 1662 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 1663 else 1664 INIT_LIST_HEAD(&dentry->d_u.d_child); 1665 1666 anon->d_parent = (dparent == dentry) ? anon : dparent; 1667 list_del(&anon->d_u.d_child); 1668 if (!IS_ROOT(anon)) 1669 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); 1670 else 1671 INIT_LIST_HEAD(&anon->d_u.d_child); 1672 1673 anon->d_flags &= ~DCACHE_DISCONNECTED; 1674 } 1675 1676 /** 1677 * d_materialise_unique - introduce an inode into the tree 1678 * @dentry: candidate dentry 1679 * @inode: inode to bind to the dentry, to which aliases may be attached 1680 * 1681 * Introduces an dentry into the tree, substituting an extant disconnected 1682 * root directory alias in its place if there is one 1683 */ 1684 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) 1685 { 1686 struct dentry *actual; 1687 1688 BUG_ON(!d_unhashed(dentry)); 1689 1690 spin_lock(&dcache_lock); 1691 1692 if (!inode) { 1693 actual = dentry; 1694 dentry->d_inode = NULL; 1695 goto found_lock; 1696 } 1697 1698 if (S_ISDIR(inode->i_mode)) { 1699 struct dentry *alias; 1700 1701 /* Does an aliased dentry already exist? */ 1702 alias = __d_find_alias(inode, 0); 1703 if (alias) { 1704 actual = alias; 1705 /* Is this an anonymous mountpoint that we could splice 1706 * into our tree? */ 1707 if (IS_ROOT(alias)) { 1708 spin_lock(&alias->d_lock); 1709 __d_materialise_dentry(dentry, alias); 1710 __d_drop(alias); 1711 goto found; 1712 } 1713 /* Nope, but we must(!) avoid directory aliasing */ 1714 actual = __d_unalias(dentry, alias); 1715 if (IS_ERR(actual)) 1716 dput(alias); 1717 goto out_nolock; 1718 } 1719 } 1720 1721 /* Add a unique reference */ 1722 actual = __d_instantiate_unique(dentry, inode); 1723 if (!actual) 1724 actual = dentry; 1725 else if (unlikely(!d_unhashed(actual))) 1726 goto shouldnt_be_hashed; 1727 1728 found_lock: 1729 spin_lock(&actual->d_lock); 1730 found: 1731 _d_rehash(actual); 1732 spin_unlock(&actual->d_lock); 1733 spin_unlock(&dcache_lock); 1734 out_nolock: 1735 if (actual == dentry) { 1736 security_d_instantiate(dentry, inode); 1737 return NULL; 1738 } 1739 1740 iput(inode); 1741 return actual; 1742 1743 shouldnt_be_hashed: 1744 spin_unlock(&dcache_lock); 1745 BUG(); 1746 goto shouldnt_be_hashed; 1747 } 1748 1749 /** 1750 * d_path - return the path of a dentry 1751 * @dentry: dentry to report 1752 * @vfsmnt: vfsmnt to which the dentry belongs 1753 * @root: root dentry 1754 * @rootmnt: vfsmnt to which the root dentry belongs 1755 * @buffer: buffer to return value in 1756 * @buflen: buffer length 1757 * 1758 * Convert a dentry into an ASCII path name. If the entry has been deleted 1759 * the string " (deleted)" is appended. Note that this is ambiguous. 1760 * 1761 * Returns the buffer or an error code if the path was too long. 1762 * 1763 * "buflen" should be positive. Caller holds the dcache_lock. 1764 */ 1765 static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, 1766 struct path *root, char *buffer, int buflen) 1767 { 1768 char * end = buffer+buflen; 1769 char * retval; 1770 int namelen; 1771 1772 *--end = '\0'; 1773 buflen--; 1774 if (!IS_ROOT(dentry) && d_unhashed(dentry)) { 1775 buflen -= 10; 1776 end -= 10; 1777 if (buflen < 0) 1778 goto Elong; 1779 memcpy(end, " (deleted)", 10); 1780 } 1781 1782 if (buflen < 1) 1783 goto Elong; 1784 /* Get '/' right */ 1785 retval = end-1; 1786 *retval = '/'; 1787 1788 for (;;) { 1789 struct dentry * parent; 1790 1791 if (dentry == root->dentry && vfsmnt == root->mnt) 1792 break; 1793 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 1794 /* Global root? */ 1795 spin_lock(&vfsmount_lock); 1796 if (vfsmnt->mnt_parent == vfsmnt) { 1797 spin_unlock(&vfsmount_lock); 1798 goto global_root; 1799 } 1800 dentry = vfsmnt->mnt_mountpoint; 1801 vfsmnt = vfsmnt->mnt_parent; 1802 spin_unlock(&vfsmount_lock); 1803 continue; 1804 } 1805 parent = dentry->d_parent; 1806 prefetch(parent); 1807 namelen = dentry->d_name.len; 1808 buflen -= namelen + 1; 1809 if (buflen < 0) 1810 goto Elong; 1811 end -= namelen; 1812 memcpy(end, dentry->d_name.name, namelen); 1813 *--end = '/'; 1814 retval = end; 1815 dentry = parent; 1816 } 1817 1818 return retval; 1819 1820 global_root: 1821 namelen = dentry->d_name.len; 1822 buflen -= namelen; 1823 if (buflen < 0) 1824 goto Elong; 1825 retval -= namelen-1; /* hit the slash */ 1826 memcpy(retval, dentry->d_name.name, namelen); 1827 return retval; 1828 Elong: 1829 return ERR_PTR(-ENAMETOOLONG); 1830 } 1831 1832 /** 1833 * d_path - return the path of a dentry 1834 * @path: path to report 1835 * @buf: buffer to return value in 1836 * @buflen: buffer length 1837 * 1838 * Convert a dentry into an ASCII path name. If the entry has been deleted 1839 * the string " (deleted)" is appended. Note that this is ambiguous. 1840 * 1841 * Returns the buffer or an error code if the path was too long. 1842 * 1843 * "buflen" should be positive. Caller holds the dcache_lock. 1844 */ 1845 char *d_path(struct path *path, char *buf, int buflen) 1846 { 1847 char *res; 1848 struct path root; 1849 1850 /* 1851 * We have various synthetic filesystems that never get mounted. On 1852 * these filesystems dentries are never used for lookup purposes, and 1853 * thus don't need to be hashed. They also don't need a name until a 1854 * user wants to identify the object in /proc/pid/fd/. The little hack 1855 * below allows us to generate a name for these objects on demand: 1856 */ 1857 if (path->dentry->d_op && path->dentry->d_op->d_dname) 1858 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 1859 1860 read_lock(¤t->fs->lock); 1861 root = current->fs->root; 1862 path_get(¤t->fs->root); 1863 read_unlock(¤t->fs->lock); 1864 spin_lock(&dcache_lock); 1865 res = __d_path(path->dentry, path->mnt, &root, buf, buflen); 1866 spin_unlock(&dcache_lock); 1867 path_put(&root); 1868 return res; 1869 } 1870 1871 /* 1872 * Helper function for dentry_operations.d_dname() members 1873 */ 1874 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, 1875 const char *fmt, ...) 1876 { 1877 va_list args; 1878 char temp[64]; 1879 int sz; 1880 1881 va_start(args, fmt); 1882 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; 1883 va_end(args); 1884 1885 if (sz > sizeof(temp) || sz > buflen) 1886 return ERR_PTR(-ENAMETOOLONG); 1887 1888 buffer += buflen - sz; 1889 return memcpy(buffer, temp, sz); 1890 } 1891 1892 /* 1893 * NOTE! The user-level library version returns a 1894 * character pointer. The kernel system call just 1895 * returns the length of the buffer filled (which 1896 * includes the ending '\0' character), or a negative 1897 * error value. So libc would do something like 1898 * 1899 * char *getcwd(char * buf, size_t size) 1900 * { 1901 * int retval; 1902 * 1903 * retval = sys_getcwd(buf, size); 1904 * if (retval >= 0) 1905 * return buf; 1906 * errno = -retval; 1907 * return NULL; 1908 * } 1909 */ 1910 asmlinkage long sys_getcwd(char __user *buf, unsigned long size) 1911 { 1912 int error; 1913 struct path pwd, root; 1914 char *page = (char *) __get_free_page(GFP_USER); 1915 1916 if (!page) 1917 return -ENOMEM; 1918 1919 read_lock(¤t->fs->lock); 1920 pwd = current->fs->pwd; 1921 path_get(¤t->fs->pwd); 1922 root = current->fs->root; 1923 path_get(¤t->fs->root); 1924 read_unlock(¤t->fs->lock); 1925 1926 error = -ENOENT; 1927 /* Has the current directory has been unlinked? */ 1928 spin_lock(&dcache_lock); 1929 if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { 1930 unsigned long len; 1931 char * cwd; 1932 1933 cwd = __d_path(pwd.dentry, pwd.mnt, &root, page, PAGE_SIZE); 1934 spin_unlock(&dcache_lock); 1935 1936 error = PTR_ERR(cwd); 1937 if (IS_ERR(cwd)) 1938 goto out; 1939 1940 error = -ERANGE; 1941 len = PAGE_SIZE + page - cwd; 1942 if (len <= size) { 1943 error = len; 1944 if (copy_to_user(buf, cwd, len)) 1945 error = -EFAULT; 1946 } 1947 } else 1948 spin_unlock(&dcache_lock); 1949 1950 out: 1951 path_put(&pwd); 1952 path_put(&root); 1953 free_page((unsigned long) page); 1954 return error; 1955 } 1956 1957 /* 1958 * Test whether new_dentry is a subdirectory of old_dentry. 1959 * 1960 * Trivially implemented using the dcache structure 1961 */ 1962 1963 /** 1964 * is_subdir - is new dentry a subdirectory of old_dentry 1965 * @new_dentry: new dentry 1966 * @old_dentry: old dentry 1967 * 1968 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 1969 * Returns 0 otherwise. 1970 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 1971 */ 1972 1973 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) 1974 { 1975 int result; 1976 struct dentry * saved = new_dentry; 1977 unsigned long seq; 1978 1979 /* need rcu_readlock to protect against the d_parent trashing due to 1980 * d_move 1981 */ 1982 rcu_read_lock(); 1983 do { 1984 /* for restarting inner loop in case of seq retry */ 1985 new_dentry = saved; 1986 result = 0; 1987 seq = read_seqbegin(&rename_lock); 1988 for (;;) { 1989 if (new_dentry != old_dentry) { 1990 struct dentry * parent = new_dentry->d_parent; 1991 if (parent == new_dentry) 1992 break; 1993 new_dentry = parent; 1994 continue; 1995 } 1996 result = 1; 1997 break; 1998 } 1999 } while (read_seqretry(&rename_lock, seq)); 2000 rcu_read_unlock(); 2001 2002 return result; 2003 } 2004 2005 void d_genocide(struct dentry *root) 2006 { 2007 struct dentry *this_parent = root; 2008 struct list_head *next; 2009 2010 spin_lock(&dcache_lock); 2011 repeat: 2012 next = this_parent->d_subdirs.next; 2013 resume: 2014 while (next != &this_parent->d_subdirs) { 2015 struct list_head *tmp = next; 2016 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2017 next = tmp->next; 2018 if (d_unhashed(dentry)||!dentry->d_inode) 2019 continue; 2020 if (!list_empty(&dentry->d_subdirs)) { 2021 this_parent = dentry; 2022 goto repeat; 2023 } 2024 atomic_dec(&dentry->d_count); 2025 } 2026 if (this_parent != root) { 2027 next = this_parent->d_u.d_child.next; 2028 atomic_dec(&this_parent->d_count); 2029 this_parent = this_parent->d_parent; 2030 goto resume; 2031 } 2032 spin_unlock(&dcache_lock); 2033 } 2034 2035 /** 2036 * find_inode_number - check for dentry with name 2037 * @dir: directory to check 2038 * @name: Name to find. 2039 * 2040 * Check whether a dentry already exists for the given name, 2041 * and return the inode number if it has an inode. Otherwise 2042 * 0 is returned. 2043 * 2044 * This routine is used to post-process directory listings for 2045 * filesystems using synthetic inode numbers, and is necessary 2046 * to keep getcwd() working. 2047 */ 2048 2049 ino_t find_inode_number(struct dentry *dir, struct qstr *name) 2050 { 2051 struct dentry * dentry; 2052 ino_t ino = 0; 2053 2054 dentry = d_hash_and_lookup(dir, name); 2055 if (dentry) { 2056 if (dentry->d_inode) 2057 ino = dentry->d_inode->i_ino; 2058 dput(dentry); 2059 } 2060 return ino; 2061 } 2062 2063 static __initdata unsigned long dhash_entries; 2064 static int __init set_dhash_entries(char *str) 2065 { 2066 if (!str) 2067 return 0; 2068 dhash_entries = simple_strtoul(str, &str, 0); 2069 return 1; 2070 } 2071 __setup("dhash_entries=", set_dhash_entries); 2072 2073 static void __init dcache_init_early(void) 2074 { 2075 int loop; 2076 2077 /* If hashes are distributed across NUMA nodes, defer 2078 * hash allocation until vmalloc space is available. 2079 */ 2080 if (hashdist) 2081 return; 2082 2083 dentry_hashtable = 2084 alloc_large_system_hash("Dentry cache", 2085 sizeof(struct hlist_head), 2086 dhash_entries, 2087 13, 2088 HASH_EARLY, 2089 &d_hash_shift, 2090 &d_hash_mask, 2091 0); 2092 2093 for (loop = 0; loop < (1 << d_hash_shift); loop++) 2094 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 2095 } 2096 2097 static void __init dcache_init(void) 2098 { 2099 int loop; 2100 2101 /* 2102 * A constructor could be added for stable state like the lists, 2103 * but it is probably not worth it because of the cache nature 2104 * of the dcache. 2105 */ 2106 dentry_cache = KMEM_CACHE(dentry, 2107 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 2108 2109 register_shrinker(&dcache_shrinker); 2110 2111 /* Hash may have been set up in dcache_init_early */ 2112 if (!hashdist) 2113 return; 2114 2115 dentry_hashtable = 2116 alloc_large_system_hash("Dentry cache", 2117 sizeof(struct hlist_head), 2118 dhash_entries, 2119 13, 2120 0, 2121 &d_hash_shift, 2122 &d_hash_mask, 2123 0); 2124 2125 for (loop = 0; loop < (1 << d_hash_shift); loop++) 2126 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 2127 } 2128 2129 /* SLAB cache for __getname() consumers */ 2130 struct kmem_cache *names_cachep __read_mostly; 2131 2132 /* SLAB cache for file structures */ 2133 struct kmem_cache *filp_cachep __read_mostly; 2134 2135 EXPORT_SYMBOL(d_genocide); 2136 2137 void __init vfs_caches_init_early(void) 2138 { 2139 dcache_init_early(); 2140 inode_init_early(); 2141 } 2142 2143 void __init vfs_caches_init(unsigned long mempages) 2144 { 2145 unsigned long reserve; 2146 2147 /* Base hash sizes on available memory, with a reserve equal to 2148 150% of current kernel size */ 2149 2150 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 2151 mempages -= reserve; 2152 2153 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 2154 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 2155 2156 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, 2157 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 2158 2159 dcache_init(); 2160 inode_init(); 2161 files_init(mempages); 2162 mnt_init(); 2163 bdev_cache_init(); 2164 chrdev_init(); 2165 } 2166 2167 EXPORT_SYMBOL(d_alloc); 2168 EXPORT_SYMBOL(d_alloc_anon); 2169 EXPORT_SYMBOL(d_alloc_root); 2170 EXPORT_SYMBOL(d_delete); 2171 EXPORT_SYMBOL(d_find_alias); 2172 EXPORT_SYMBOL(d_instantiate); 2173 EXPORT_SYMBOL(d_invalidate); 2174 EXPORT_SYMBOL(d_lookup); 2175 EXPORT_SYMBOL(d_move); 2176 EXPORT_SYMBOL_GPL(d_materialise_unique); 2177 EXPORT_SYMBOL(d_path); 2178 EXPORT_SYMBOL(d_prune_aliases); 2179 EXPORT_SYMBOL(d_rehash); 2180 EXPORT_SYMBOL(d_splice_alias); 2181 EXPORT_SYMBOL(d_validate); 2182 EXPORT_SYMBOL(dget_locked); 2183 EXPORT_SYMBOL(dput); 2184 EXPORT_SYMBOL(find_inode_number); 2185 EXPORT_SYMBOL(have_submounts); 2186 EXPORT_SYMBOL(names_cachep); 2187 EXPORT_SYMBOL(shrink_dcache_parent); 2188 EXPORT_SYMBOL(shrink_dcache_sb); 2189