1 /* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9 /* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/string.h> 19 #include <linux/mm.h> 20 #include <linux/fs.h> 21 #include <linux/fsnotify.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/hash.h> 25 #include <linux/cache.h> 26 #include <linux/module.h> 27 #include <linux/mount.h> 28 #include <linux/file.h> 29 #include <asm/uaccess.h> 30 #include <linux/security.h> 31 #include <linux/seqlock.h> 32 #include <linux/swap.h> 33 #include <linux/bootmem.h> 34 #include <linux/fs_struct.h> 35 #include <linux/hardirq.h> 36 #include <linux/bit_spinlock.h> 37 #include <linux/rculist_bl.h> 38 #include "internal.h" 39 40 /* 41 * Usage: 42 * dcache->d_inode->i_lock protects: 43 * - i_dentry, d_alias, d_inode of aliases 44 * dcache_hash_bucket lock protects: 45 * - the dcache hash table 46 * s_anon bl list spinlock protects: 47 * - the s_anon list (see __d_drop) 48 * dcache_lru_lock protects: 49 * - the dcache lru lists and counters 50 * d_lock protects: 51 * - d_flags 52 * - d_name 53 * - d_lru 54 * - d_count 55 * - d_unhashed() 56 * - d_parent and d_subdirs 57 * - childrens' d_child and d_parent 58 * - d_alias, d_inode 59 * 60 * Ordering: 61 * dentry->d_inode->i_lock 62 * dentry->d_lock 63 * dcache_lru_lock 64 * dcache_hash_bucket lock 65 * s_anon lock 66 * 67 * If there is an ancestor relationship: 68 * dentry->d_parent->...->d_parent->d_lock 69 * ... 70 * dentry->d_parent->d_lock 71 * dentry->d_lock 72 * 73 * If no ancestor relationship: 74 * if (dentry1 < dentry2) 75 * dentry1->d_lock 76 * dentry2->d_lock 77 */ 78 int sysctl_vfs_cache_pressure __read_mostly = 100; 79 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 80 81 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); 82 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 83 84 EXPORT_SYMBOL(rename_lock); 85 86 static struct kmem_cache *dentry_cache __read_mostly; 87 88 /* 89 * This is the single most critical data structure when it comes 90 * to the dcache: the hashtable for lookups. Somebody should try 91 * to make this good - I've just made it work. 92 * 93 * This hash-function tries to avoid losing too many bits of hash 94 * information, yet avoid using a prime hash-size or similar. 95 */ 96 #define D_HASHBITS d_hash_shift 97 #define D_HASHMASK d_hash_mask 98 99 static unsigned int d_hash_mask __read_mostly; 100 static unsigned int d_hash_shift __read_mostly; 101 102 struct dcache_hash_bucket { 103 struct hlist_bl_head head; 104 }; 105 static struct dcache_hash_bucket *dentry_hashtable __read_mostly; 106 107 static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, 108 unsigned long hash) 109 { 110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 111 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 112 return dentry_hashtable + (hash & D_HASHMASK); 113 } 114 115 static inline void spin_lock_bucket(struct dcache_hash_bucket *b) 116 { 117 bit_spin_lock(0, (unsigned long *)&b->head.first); 118 } 119 120 static inline void spin_unlock_bucket(struct dcache_hash_bucket *b) 121 { 122 __bit_spin_unlock(0, (unsigned long *)&b->head.first); 123 } 124 125 /* Statistics gathering. */ 126 struct dentry_stat_t dentry_stat = { 127 .age_limit = 45, 128 }; 129 130 static DEFINE_PER_CPU(unsigned int, nr_dentry); 131 132 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 133 static int get_nr_dentry(void) 134 { 135 int i; 136 int sum = 0; 137 for_each_possible_cpu(i) 138 sum += per_cpu(nr_dentry, i); 139 return sum < 0 ? 0 : sum; 140 } 141 142 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 143 size_t *lenp, loff_t *ppos) 144 { 145 dentry_stat.nr_dentry = get_nr_dentry(); 146 return proc_dointvec(table, write, buffer, lenp, ppos); 147 } 148 #endif 149 150 static void __d_free(struct rcu_head *head) 151 { 152 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 153 154 WARN_ON(!list_empty(&dentry->d_alias)); 155 if (dname_external(dentry)) 156 kfree(dentry->d_name.name); 157 kmem_cache_free(dentry_cache, dentry); 158 } 159 160 /* 161 * no locks, please. 162 */ 163 static void d_free(struct dentry *dentry) 164 { 165 BUG_ON(dentry->d_count); 166 this_cpu_dec(nr_dentry); 167 if (dentry->d_op && dentry->d_op->d_release) 168 dentry->d_op->d_release(dentry); 169 170 /* if dentry was never inserted into hash, immediate free is OK */ 171 if (hlist_bl_unhashed(&dentry->d_hash)) 172 __d_free(&dentry->d_u.d_rcu); 173 else 174 call_rcu(&dentry->d_u.d_rcu, __d_free); 175 } 176 177 /** 178 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups 179 * After this call, in-progress rcu-walk path lookup will fail. This 180 * should be called after unhashing, and after changing d_inode (if 181 * the dentry has not already been unhashed). 182 */ 183 static inline void dentry_rcuwalk_barrier(struct dentry *dentry) 184 { 185 assert_spin_locked(&dentry->d_lock); 186 /* Go through a barrier */ 187 write_seqcount_barrier(&dentry->d_seq); 188 } 189 190 /* 191 * Release the dentry's inode, using the filesystem 192 * d_iput() operation if defined. Dentry has no refcount 193 * and is unhashed. 194 */ 195 static void dentry_iput(struct dentry * dentry) 196 __releases(dentry->d_lock) 197 __releases(dentry->d_inode->i_lock) 198 { 199 struct inode *inode = dentry->d_inode; 200 if (inode) { 201 dentry->d_inode = NULL; 202 list_del_init(&dentry->d_alias); 203 spin_unlock(&dentry->d_lock); 204 spin_unlock(&inode->i_lock); 205 if (!inode->i_nlink) 206 fsnotify_inoderemove(inode); 207 if (dentry->d_op && dentry->d_op->d_iput) 208 dentry->d_op->d_iput(dentry, inode); 209 else 210 iput(inode); 211 } else { 212 spin_unlock(&dentry->d_lock); 213 } 214 } 215 216 /* 217 * Release the dentry's inode, using the filesystem 218 * d_iput() operation if defined. dentry remains in-use. 219 */ 220 static void dentry_unlink_inode(struct dentry * dentry) 221 __releases(dentry->d_lock) 222 __releases(dentry->d_inode->i_lock) 223 { 224 struct inode *inode = dentry->d_inode; 225 dentry->d_inode = NULL; 226 list_del_init(&dentry->d_alias); 227 dentry_rcuwalk_barrier(dentry); 228 spin_unlock(&dentry->d_lock); 229 spin_unlock(&inode->i_lock); 230 if (!inode->i_nlink) 231 fsnotify_inoderemove(inode); 232 if (dentry->d_op && dentry->d_op->d_iput) 233 dentry->d_op->d_iput(dentry, inode); 234 else 235 iput(inode); 236 } 237 238 /* 239 * dentry_lru_(add|del|move_tail) must be called with d_lock held. 240 */ 241 static void dentry_lru_add(struct dentry *dentry) 242 { 243 if (list_empty(&dentry->d_lru)) { 244 spin_lock(&dcache_lru_lock); 245 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 246 dentry->d_sb->s_nr_dentry_unused++; 247 dentry_stat.nr_unused++; 248 spin_unlock(&dcache_lru_lock); 249 } 250 } 251 252 static void __dentry_lru_del(struct dentry *dentry) 253 { 254 list_del_init(&dentry->d_lru); 255 dentry->d_sb->s_nr_dentry_unused--; 256 dentry_stat.nr_unused--; 257 } 258 259 static void dentry_lru_del(struct dentry *dentry) 260 { 261 if (!list_empty(&dentry->d_lru)) { 262 spin_lock(&dcache_lru_lock); 263 __dentry_lru_del(dentry); 264 spin_unlock(&dcache_lru_lock); 265 } 266 } 267 268 static void dentry_lru_move_tail(struct dentry *dentry) 269 { 270 spin_lock(&dcache_lru_lock); 271 if (list_empty(&dentry->d_lru)) { 272 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 273 dentry->d_sb->s_nr_dentry_unused++; 274 dentry_stat.nr_unused++; 275 } else { 276 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 277 } 278 spin_unlock(&dcache_lru_lock); 279 } 280 281 /** 282 * d_kill - kill dentry and return parent 283 * @dentry: dentry to kill 284 * 285 * The dentry must already be unhashed and removed from the LRU. 286 * 287 * If this is the root of the dentry tree, return NULL. 288 * 289 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by 290 * d_kill. 291 */ 292 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) 293 __releases(dentry->d_lock) 294 __releases(parent->d_lock) 295 __releases(dentry->d_inode->i_lock) 296 { 297 dentry->d_parent = NULL; 298 list_del(&dentry->d_u.d_child); 299 if (parent) 300 spin_unlock(&parent->d_lock); 301 dentry_iput(dentry); 302 /* 303 * dentry_iput drops the locks, at which point nobody (except 304 * transient RCU lookups) can reach this dentry. 305 */ 306 d_free(dentry); 307 return parent; 308 } 309 310 /** 311 * d_drop - drop a dentry 312 * @dentry: dentry to drop 313 * 314 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't 315 * be found through a VFS lookup any more. Note that this is different from 316 * deleting the dentry - d_delete will try to mark the dentry negative if 317 * possible, giving a successful _negative_ lookup, while d_drop will 318 * just make the cache lookup fail. 319 * 320 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some 321 * reason (NFS timeouts or autofs deletes). 322 * 323 * __d_drop requires dentry->d_lock. 324 */ 325 void __d_drop(struct dentry *dentry) 326 { 327 if (!(dentry->d_flags & DCACHE_UNHASHED)) { 328 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { 329 bit_spin_lock(0, 330 (unsigned long *)&dentry->d_sb->s_anon.first); 331 dentry->d_flags |= DCACHE_UNHASHED; 332 hlist_bl_del_init(&dentry->d_hash); 333 __bit_spin_unlock(0, 334 (unsigned long *)&dentry->d_sb->s_anon.first); 335 } else { 336 struct dcache_hash_bucket *b; 337 b = d_hash(dentry->d_parent, dentry->d_name.hash); 338 spin_lock_bucket(b); 339 /* 340 * We may not actually need to put DCACHE_UNHASHED 341 * manipulations under the hash lock, but follow 342 * the principle of least surprise. 343 */ 344 dentry->d_flags |= DCACHE_UNHASHED; 345 hlist_bl_del_rcu(&dentry->d_hash); 346 spin_unlock_bucket(b); 347 dentry_rcuwalk_barrier(dentry); 348 } 349 } 350 } 351 EXPORT_SYMBOL(__d_drop); 352 353 void d_drop(struct dentry *dentry) 354 { 355 spin_lock(&dentry->d_lock); 356 __d_drop(dentry); 357 spin_unlock(&dentry->d_lock); 358 } 359 EXPORT_SYMBOL(d_drop); 360 361 /* 362 * Finish off a dentry we've decided to kill. 363 * dentry->d_lock must be held, returns with it unlocked. 364 * If ref is non-zero, then decrement the refcount too. 365 * Returns dentry requiring refcount drop, or NULL if we're done. 366 */ 367 static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) 368 __releases(dentry->d_lock) 369 { 370 struct inode *inode; 371 struct dentry *parent; 372 373 inode = dentry->d_inode; 374 if (inode && !spin_trylock(&inode->i_lock)) { 375 relock: 376 spin_unlock(&dentry->d_lock); 377 cpu_relax(); 378 return dentry; /* try again with same dentry */ 379 } 380 if (IS_ROOT(dentry)) 381 parent = NULL; 382 else 383 parent = dentry->d_parent; 384 if (parent && !spin_trylock(&parent->d_lock)) { 385 if (inode) 386 spin_unlock(&inode->i_lock); 387 goto relock; 388 } 389 390 if (ref) 391 dentry->d_count--; 392 /* if dentry was on the d_lru list delete it from there */ 393 dentry_lru_del(dentry); 394 /* if it was on the hash then remove it */ 395 __d_drop(dentry); 396 return d_kill(dentry, parent); 397 } 398 399 /* 400 * This is dput 401 * 402 * This is complicated by the fact that we do not want to put 403 * dentries that are no longer on any hash chain on the unused 404 * list: we'd much rather just get rid of them immediately. 405 * 406 * However, that implies that we have to traverse the dentry 407 * tree upwards to the parents which might _also_ now be 408 * scheduled for deletion (it may have been only waiting for 409 * its last child to go away). 410 * 411 * This tail recursion is done by hand as we don't want to depend 412 * on the compiler to always get this right (gcc generally doesn't). 413 * Real recursion would eat up our stack space. 414 */ 415 416 /* 417 * dput - release a dentry 418 * @dentry: dentry to release 419 * 420 * Release a dentry. This will drop the usage count and if appropriate 421 * call the dentry unlink method as well as removing it from the queues and 422 * releasing its resources. If the parent dentries were scheduled for release 423 * they too may now get deleted. 424 */ 425 void dput(struct dentry *dentry) 426 { 427 if (!dentry) 428 return; 429 430 repeat: 431 if (dentry->d_count == 1) 432 might_sleep(); 433 spin_lock(&dentry->d_lock); 434 BUG_ON(!dentry->d_count); 435 if (dentry->d_count > 1) { 436 dentry->d_count--; 437 spin_unlock(&dentry->d_lock); 438 return; 439 } 440 441 if (dentry->d_flags & DCACHE_OP_DELETE) { 442 if (dentry->d_op->d_delete(dentry)) 443 goto kill_it; 444 } 445 446 /* Unreachable? Get rid of it */ 447 if (d_unhashed(dentry)) 448 goto kill_it; 449 450 /* Otherwise leave it cached and ensure it's on the LRU */ 451 dentry->d_flags |= DCACHE_REFERENCED; 452 dentry_lru_add(dentry); 453 454 dentry->d_count--; 455 spin_unlock(&dentry->d_lock); 456 return; 457 458 kill_it: 459 dentry = dentry_kill(dentry, 1); 460 if (dentry) 461 goto repeat; 462 } 463 EXPORT_SYMBOL(dput); 464 465 /** 466 * d_invalidate - invalidate a dentry 467 * @dentry: dentry to invalidate 468 * 469 * Try to invalidate the dentry if it turns out to be 470 * possible. If there are other dentries that can be 471 * reached through this one we can't delete it and we 472 * return -EBUSY. On success we return 0. 473 * 474 * no dcache lock. 475 */ 476 477 int d_invalidate(struct dentry * dentry) 478 { 479 /* 480 * If it's already been dropped, return OK. 481 */ 482 spin_lock(&dentry->d_lock); 483 if (d_unhashed(dentry)) { 484 spin_unlock(&dentry->d_lock); 485 return 0; 486 } 487 /* 488 * Check whether to do a partial shrink_dcache 489 * to get rid of unused child entries. 490 */ 491 if (!list_empty(&dentry->d_subdirs)) { 492 spin_unlock(&dentry->d_lock); 493 shrink_dcache_parent(dentry); 494 spin_lock(&dentry->d_lock); 495 } 496 497 /* 498 * Somebody else still using it? 499 * 500 * If it's a directory, we can't drop it 501 * for fear of somebody re-populating it 502 * with children (even though dropping it 503 * would make it unreachable from the root, 504 * we might still populate it if it was a 505 * working directory or similar). 506 */ 507 if (dentry->d_count > 1) { 508 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 509 spin_unlock(&dentry->d_lock); 510 return -EBUSY; 511 } 512 } 513 514 __d_drop(dentry); 515 spin_unlock(&dentry->d_lock); 516 return 0; 517 } 518 EXPORT_SYMBOL(d_invalidate); 519 520 /* This must be called with d_lock held */ 521 static inline void __dget_dlock(struct dentry *dentry) 522 { 523 dentry->d_count++; 524 } 525 526 static inline void __dget(struct dentry *dentry) 527 { 528 spin_lock(&dentry->d_lock); 529 __dget_dlock(dentry); 530 spin_unlock(&dentry->d_lock); 531 } 532 533 struct dentry *dget_parent(struct dentry *dentry) 534 { 535 struct dentry *ret; 536 537 repeat: 538 /* 539 * Don't need rcu_dereference because we re-check it was correct under 540 * the lock. 541 */ 542 rcu_read_lock(); 543 ret = dentry->d_parent; 544 if (!ret) { 545 rcu_read_unlock(); 546 goto out; 547 } 548 spin_lock(&ret->d_lock); 549 if (unlikely(ret != dentry->d_parent)) { 550 spin_unlock(&ret->d_lock); 551 rcu_read_unlock(); 552 goto repeat; 553 } 554 rcu_read_unlock(); 555 BUG_ON(!ret->d_count); 556 ret->d_count++; 557 spin_unlock(&ret->d_lock); 558 out: 559 return ret; 560 } 561 EXPORT_SYMBOL(dget_parent); 562 563 /** 564 * d_find_alias - grab a hashed alias of inode 565 * @inode: inode in question 566 * @want_discon: flag, used by d_splice_alias, to request 567 * that only a DISCONNECTED alias be returned. 568 * 569 * If inode has a hashed alias, or is a directory and has any alias, 570 * acquire the reference to alias and return it. Otherwise return NULL. 571 * Notice that if inode is a directory there can be only one alias and 572 * it can be unhashed only if it has no children, or if it is the root 573 * of a filesystem. 574 * 575 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 576 * any other hashed alias over that one unless @want_discon is set, 577 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 578 */ 579 static struct dentry *__d_find_alias(struct inode *inode, int want_discon) 580 { 581 struct dentry *alias, *discon_alias; 582 583 again: 584 discon_alias = NULL; 585 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 586 spin_lock(&alias->d_lock); 587 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 588 if (IS_ROOT(alias) && 589 (alias->d_flags & DCACHE_DISCONNECTED)) { 590 discon_alias = alias; 591 } else if (!want_discon) { 592 __dget_dlock(alias); 593 spin_unlock(&alias->d_lock); 594 return alias; 595 } 596 } 597 spin_unlock(&alias->d_lock); 598 } 599 if (discon_alias) { 600 alias = discon_alias; 601 spin_lock(&alias->d_lock); 602 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 603 if (IS_ROOT(alias) && 604 (alias->d_flags & DCACHE_DISCONNECTED)) { 605 __dget_dlock(alias); 606 spin_unlock(&alias->d_lock); 607 return alias; 608 } 609 } 610 spin_unlock(&alias->d_lock); 611 goto again; 612 } 613 return NULL; 614 } 615 616 struct dentry *d_find_alias(struct inode *inode) 617 { 618 struct dentry *de = NULL; 619 620 if (!list_empty(&inode->i_dentry)) { 621 spin_lock(&inode->i_lock); 622 de = __d_find_alias(inode, 0); 623 spin_unlock(&inode->i_lock); 624 } 625 return de; 626 } 627 EXPORT_SYMBOL(d_find_alias); 628 629 /* 630 * Try to kill dentries associated with this inode. 631 * WARNING: you must own a reference to inode. 632 */ 633 void d_prune_aliases(struct inode *inode) 634 { 635 struct dentry *dentry; 636 restart: 637 spin_lock(&inode->i_lock); 638 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 639 spin_lock(&dentry->d_lock); 640 if (!dentry->d_count) { 641 __dget_dlock(dentry); 642 __d_drop(dentry); 643 spin_unlock(&dentry->d_lock); 644 spin_unlock(&inode->i_lock); 645 dput(dentry); 646 goto restart; 647 } 648 spin_unlock(&dentry->d_lock); 649 } 650 spin_unlock(&inode->i_lock); 651 } 652 EXPORT_SYMBOL(d_prune_aliases); 653 654 /* 655 * Try to throw away a dentry - free the inode, dput the parent. 656 * Requires dentry->d_lock is held, and dentry->d_count == 0. 657 * Releases dentry->d_lock. 658 * 659 * This may fail if locks cannot be acquired no problem, just try again. 660 */ 661 static void try_prune_one_dentry(struct dentry *dentry) 662 __releases(dentry->d_lock) 663 { 664 struct dentry *parent; 665 666 parent = dentry_kill(dentry, 0); 667 /* 668 * If dentry_kill returns NULL, we have nothing more to do. 669 * if it returns the same dentry, trylocks failed. In either 670 * case, just loop again. 671 * 672 * Otherwise, we need to prune ancestors too. This is necessary 673 * to prevent quadratic behavior of shrink_dcache_parent(), but 674 * is also expected to be beneficial in reducing dentry cache 675 * fragmentation. 676 */ 677 if (!parent) 678 return; 679 if (parent == dentry) 680 return; 681 682 /* Prune ancestors. */ 683 dentry = parent; 684 while (dentry) { 685 spin_lock(&dentry->d_lock); 686 if (dentry->d_count > 1) { 687 dentry->d_count--; 688 spin_unlock(&dentry->d_lock); 689 return; 690 } 691 dentry = dentry_kill(dentry, 1); 692 } 693 } 694 695 static void shrink_dentry_list(struct list_head *list) 696 { 697 struct dentry *dentry; 698 699 rcu_read_lock(); 700 for (;;) { 701 dentry = list_entry_rcu(list->prev, struct dentry, d_lru); 702 if (&dentry->d_lru == list) 703 break; /* empty */ 704 spin_lock(&dentry->d_lock); 705 if (dentry != list_entry(list->prev, struct dentry, d_lru)) { 706 spin_unlock(&dentry->d_lock); 707 continue; 708 } 709 710 /* 711 * We found an inuse dentry which was not removed from 712 * the LRU because of laziness during lookup. Do not free 713 * it - just keep it off the LRU list. 714 */ 715 if (dentry->d_count) { 716 dentry_lru_del(dentry); 717 spin_unlock(&dentry->d_lock); 718 continue; 719 } 720 721 rcu_read_unlock(); 722 723 try_prune_one_dentry(dentry); 724 725 rcu_read_lock(); 726 } 727 rcu_read_unlock(); 728 } 729 730 /** 731 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock 732 * @sb: superblock to shrink dentry LRU. 733 * @count: number of entries to prune 734 * @flags: flags to control the dentry processing 735 * 736 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. 737 */ 738 static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) 739 { 740 /* called from prune_dcache() and shrink_dcache_parent() */ 741 struct dentry *dentry; 742 LIST_HEAD(referenced); 743 LIST_HEAD(tmp); 744 int cnt = *count; 745 746 relock: 747 spin_lock(&dcache_lru_lock); 748 while (!list_empty(&sb->s_dentry_lru)) { 749 dentry = list_entry(sb->s_dentry_lru.prev, 750 struct dentry, d_lru); 751 BUG_ON(dentry->d_sb != sb); 752 753 if (!spin_trylock(&dentry->d_lock)) { 754 spin_unlock(&dcache_lru_lock); 755 cpu_relax(); 756 goto relock; 757 } 758 759 /* 760 * If we are honouring the DCACHE_REFERENCED flag and the 761 * dentry has this flag set, don't free it. Clear the flag 762 * and put it back on the LRU. 763 */ 764 if (flags & DCACHE_REFERENCED && 765 dentry->d_flags & DCACHE_REFERENCED) { 766 dentry->d_flags &= ~DCACHE_REFERENCED; 767 list_move(&dentry->d_lru, &referenced); 768 spin_unlock(&dentry->d_lock); 769 } else { 770 list_move_tail(&dentry->d_lru, &tmp); 771 spin_unlock(&dentry->d_lock); 772 if (!--cnt) 773 break; 774 } 775 cond_resched_lock(&dcache_lru_lock); 776 } 777 if (!list_empty(&referenced)) 778 list_splice(&referenced, &sb->s_dentry_lru); 779 spin_unlock(&dcache_lru_lock); 780 781 shrink_dentry_list(&tmp); 782 783 *count = cnt; 784 } 785 786 /** 787 * prune_dcache - shrink the dcache 788 * @count: number of entries to try to free 789 * 790 * Shrink the dcache. This is done when we need more memory, or simply when we 791 * need to unmount something (at which point we need to unuse all dentries). 792 * 793 * This function may fail to free any resources if all the dentries are in use. 794 */ 795 static void prune_dcache(int count) 796 { 797 struct super_block *sb, *p = NULL; 798 int w_count; 799 int unused = dentry_stat.nr_unused; 800 int prune_ratio; 801 int pruned; 802 803 if (unused == 0 || count == 0) 804 return; 805 if (count >= unused) 806 prune_ratio = 1; 807 else 808 prune_ratio = unused / count; 809 spin_lock(&sb_lock); 810 list_for_each_entry(sb, &super_blocks, s_list) { 811 if (list_empty(&sb->s_instances)) 812 continue; 813 if (sb->s_nr_dentry_unused == 0) 814 continue; 815 sb->s_count++; 816 /* Now, we reclaim unused dentrins with fairness. 817 * We reclaim them same percentage from each superblock. 818 * We calculate number of dentries to scan on this sb 819 * as follows, but the implementation is arranged to avoid 820 * overflows: 821 * number of dentries to scan on this sb = 822 * count * (number of dentries on this sb / 823 * number of dentries in the machine) 824 */ 825 spin_unlock(&sb_lock); 826 if (prune_ratio != 1) 827 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1; 828 else 829 w_count = sb->s_nr_dentry_unused; 830 pruned = w_count; 831 /* 832 * We need to be sure this filesystem isn't being unmounted, 833 * otherwise we could race with generic_shutdown_super(), and 834 * end up holding a reference to an inode while the filesystem 835 * is unmounted. So we try to get s_umount, and make sure 836 * s_root isn't NULL. 837 */ 838 if (down_read_trylock(&sb->s_umount)) { 839 if ((sb->s_root != NULL) && 840 (!list_empty(&sb->s_dentry_lru))) { 841 __shrink_dcache_sb(sb, &w_count, 842 DCACHE_REFERENCED); 843 pruned -= w_count; 844 } 845 up_read(&sb->s_umount); 846 } 847 spin_lock(&sb_lock); 848 if (p) 849 __put_super(p); 850 count -= pruned; 851 p = sb; 852 /* more work left to do? */ 853 if (count <= 0) 854 break; 855 } 856 if (p) 857 __put_super(p); 858 spin_unlock(&sb_lock); 859 } 860 861 /** 862 * shrink_dcache_sb - shrink dcache for a superblock 863 * @sb: superblock 864 * 865 * Shrink the dcache for the specified super block. This is used to free 866 * the dcache before unmounting a file system. 867 */ 868 void shrink_dcache_sb(struct super_block *sb) 869 { 870 LIST_HEAD(tmp); 871 872 spin_lock(&dcache_lru_lock); 873 while (!list_empty(&sb->s_dentry_lru)) { 874 list_splice_init(&sb->s_dentry_lru, &tmp); 875 spin_unlock(&dcache_lru_lock); 876 shrink_dentry_list(&tmp); 877 spin_lock(&dcache_lru_lock); 878 } 879 spin_unlock(&dcache_lru_lock); 880 } 881 EXPORT_SYMBOL(shrink_dcache_sb); 882 883 /* 884 * destroy a single subtree of dentries for unmount 885 * - see the comments on shrink_dcache_for_umount() for a description of the 886 * locking 887 */ 888 static void shrink_dcache_for_umount_subtree(struct dentry *dentry) 889 { 890 struct dentry *parent; 891 unsigned detached = 0; 892 893 BUG_ON(!IS_ROOT(dentry)); 894 895 /* detach this root from the system */ 896 spin_lock(&dentry->d_lock); 897 dentry_lru_del(dentry); 898 __d_drop(dentry); 899 spin_unlock(&dentry->d_lock); 900 901 for (;;) { 902 /* descend to the first leaf in the current subtree */ 903 while (!list_empty(&dentry->d_subdirs)) { 904 struct dentry *loop; 905 906 /* this is a branch with children - detach all of them 907 * from the system in one go */ 908 spin_lock(&dentry->d_lock); 909 list_for_each_entry(loop, &dentry->d_subdirs, 910 d_u.d_child) { 911 spin_lock_nested(&loop->d_lock, 912 DENTRY_D_LOCK_NESTED); 913 dentry_lru_del(loop); 914 __d_drop(loop); 915 spin_unlock(&loop->d_lock); 916 } 917 spin_unlock(&dentry->d_lock); 918 919 /* move to the first child */ 920 dentry = list_entry(dentry->d_subdirs.next, 921 struct dentry, d_u.d_child); 922 } 923 924 /* consume the dentries from this leaf up through its parents 925 * until we find one with children or run out altogether */ 926 do { 927 struct inode *inode; 928 929 if (dentry->d_count != 0) { 930 printk(KERN_ERR 931 "BUG: Dentry %p{i=%lx,n=%s}" 932 " still in use (%d)" 933 " [unmount of %s %s]\n", 934 dentry, 935 dentry->d_inode ? 936 dentry->d_inode->i_ino : 0UL, 937 dentry->d_name.name, 938 dentry->d_count, 939 dentry->d_sb->s_type->name, 940 dentry->d_sb->s_id); 941 BUG(); 942 } 943 944 if (IS_ROOT(dentry)) { 945 parent = NULL; 946 list_del(&dentry->d_u.d_child); 947 } else { 948 parent = dentry->d_parent; 949 spin_lock(&parent->d_lock); 950 parent->d_count--; 951 list_del(&dentry->d_u.d_child); 952 spin_unlock(&parent->d_lock); 953 } 954 955 detached++; 956 957 inode = dentry->d_inode; 958 if (inode) { 959 dentry->d_inode = NULL; 960 list_del_init(&dentry->d_alias); 961 if (dentry->d_op && dentry->d_op->d_iput) 962 dentry->d_op->d_iput(dentry, inode); 963 else 964 iput(inode); 965 } 966 967 d_free(dentry); 968 969 /* finished when we fall off the top of the tree, 970 * otherwise we ascend to the parent and move to the 971 * next sibling if there is one */ 972 if (!parent) 973 return; 974 dentry = parent; 975 } while (list_empty(&dentry->d_subdirs)); 976 977 dentry = list_entry(dentry->d_subdirs.next, 978 struct dentry, d_u.d_child); 979 } 980 } 981 982 /* 983 * destroy the dentries attached to a superblock on unmounting 984 * - we don't need to use dentry->d_lock because: 985 * - the superblock is detached from all mountings and open files, so the 986 * dentry trees will not be rearranged by the VFS 987 * - s_umount is write-locked, so the memory pressure shrinker will ignore 988 * any dentries belonging to this superblock that it comes across 989 * - the filesystem itself is no longer permitted to rearrange the dentries 990 * in this superblock 991 */ 992 void shrink_dcache_for_umount(struct super_block *sb) 993 { 994 struct dentry *dentry; 995 996 if (down_read_trylock(&sb->s_umount)) 997 BUG(); 998 999 dentry = sb->s_root; 1000 sb->s_root = NULL; 1001 spin_lock(&dentry->d_lock); 1002 dentry->d_count--; 1003 spin_unlock(&dentry->d_lock); 1004 shrink_dcache_for_umount_subtree(dentry); 1005 1006 while (!hlist_bl_empty(&sb->s_anon)) { 1007 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); 1008 shrink_dcache_for_umount_subtree(dentry); 1009 } 1010 } 1011 1012 /* 1013 * Search for at least 1 mount point in the dentry's subdirs. 1014 * We descend to the next level whenever the d_subdirs 1015 * list is non-empty and continue searching. 1016 */ 1017 1018 /** 1019 * have_submounts - check for mounts over a dentry 1020 * @parent: dentry to check. 1021 * 1022 * Return true if the parent or its subdirectories contain 1023 * a mount point 1024 */ 1025 int have_submounts(struct dentry *parent) 1026 { 1027 struct dentry *this_parent; 1028 struct list_head *next; 1029 unsigned seq; 1030 int locked = 0; 1031 1032 seq = read_seqbegin(&rename_lock); 1033 again: 1034 this_parent = parent; 1035 1036 if (d_mountpoint(parent)) 1037 goto positive; 1038 spin_lock(&this_parent->d_lock); 1039 repeat: 1040 next = this_parent->d_subdirs.next; 1041 resume: 1042 while (next != &this_parent->d_subdirs) { 1043 struct list_head *tmp = next; 1044 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1045 next = tmp->next; 1046 1047 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1048 /* Have we found a mount point ? */ 1049 if (d_mountpoint(dentry)) { 1050 spin_unlock(&dentry->d_lock); 1051 spin_unlock(&this_parent->d_lock); 1052 goto positive; 1053 } 1054 if (!list_empty(&dentry->d_subdirs)) { 1055 spin_unlock(&this_parent->d_lock); 1056 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1057 this_parent = dentry; 1058 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1059 goto repeat; 1060 } 1061 spin_unlock(&dentry->d_lock); 1062 } 1063 /* 1064 * All done at this level ... ascend and resume the search. 1065 */ 1066 if (this_parent != parent) { 1067 struct dentry *tmp; 1068 struct dentry *child; 1069 1070 tmp = this_parent->d_parent; 1071 rcu_read_lock(); 1072 spin_unlock(&this_parent->d_lock); 1073 child = this_parent; 1074 this_parent = tmp; 1075 spin_lock(&this_parent->d_lock); 1076 /* might go back up the wrong parent if we have had a rename 1077 * or deletion */ 1078 if (this_parent != child->d_parent || 1079 (!locked && read_seqretry(&rename_lock, seq))) { 1080 spin_unlock(&this_parent->d_lock); 1081 rcu_read_unlock(); 1082 goto rename_retry; 1083 } 1084 rcu_read_unlock(); 1085 next = child->d_u.d_child.next; 1086 goto resume; 1087 } 1088 spin_unlock(&this_parent->d_lock); 1089 if (!locked && read_seqretry(&rename_lock, seq)) 1090 goto rename_retry; 1091 if (locked) 1092 write_sequnlock(&rename_lock); 1093 return 0; /* No mount points found in tree */ 1094 positive: 1095 if (!locked && read_seqretry(&rename_lock, seq)) 1096 goto rename_retry; 1097 if (locked) 1098 write_sequnlock(&rename_lock); 1099 return 1; 1100 1101 rename_retry: 1102 locked = 1; 1103 write_seqlock(&rename_lock); 1104 goto again; 1105 } 1106 EXPORT_SYMBOL(have_submounts); 1107 1108 /* 1109 * Search the dentry child list for the specified parent, 1110 * and move any unused dentries to the end of the unused 1111 * list for prune_dcache(). We descend to the next level 1112 * whenever the d_subdirs list is non-empty and continue 1113 * searching. 1114 * 1115 * It returns zero iff there are no unused children, 1116 * otherwise it returns the number of children moved to 1117 * the end of the unused list. This may not be the total 1118 * number of unused children, because select_parent can 1119 * drop the lock and return early due to latency 1120 * constraints. 1121 */ 1122 static int select_parent(struct dentry * parent) 1123 { 1124 struct dentry *this_parent; 1125 struct list_head *next; 1126 unsigned seq; 1127 int found = 0; 1128 int locked = 0; 1129 1130 seq = read_seqbegin(&rename_lock); 1131 again: 1132 this_parent = parent; 1133 spin_lock(&this_parent->d_lock); 1134 repeat: 1135 next = this_parent->d_subdirs.next; 1136 resume: 1137 while (next != &this_parent->d_subdirs) { 1138 struct list_head *tmp = next; 1139 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1140 next = tmp->next; 1141 1142 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1143 1144 /* 1145 * move only zero ref count dentries to the end 1146 * of the unused list for prune_dcache 1147 */ 1148 if (!dentry->d_count) { 1149 dentry_lru_move_tail(dentry); 1150 found++; 1151 } else { 1152 dentry_lru_del(dentry); 1153 } 1154 1155 /* 1156 * We can return to the caller if we have found some (this 1157 * ensures forward progress). We'll be coming back to find 1158 * the rest. 1159 */ 1160 if (found && need_resched()) { 1161 spin_unlock(&dentry->d_lock); 1162 goto out; 1163 } 1164 1165 /* 1166 * Descend a level if the d_subdirs list is non-empty. 1167 */ 1168 if (!list_empty(&dentry->d_subdirs)) { 1169 spin_unlock(&this_parent->d_lock); 1170 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1171 this_parent = dentry; 1172 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1173 goto repeat; 1174 } 1175 1176 spin_unlock(&dentry->d_lock); 1177 } 1178 /* 1179 * All done at this level ... ascend and resume the search. 1180 */ 1181 if (this_parent != parent) { 1182 struct dentry *tmp; 1183 struct dentry *child; 1184 1185 tmp = this_parent->d_parent; 1186 rcu_read_lock(); 1187 spin_unlock(&this_parent->d_lock); 1188 child = this_parent; 1189 this_parent = tmp; 1190 spin_lock(&this_parent->d_lock); 1191 /* might go back up the wrong parent if we have had a rename 1192 * or deletion */ 1193 if (this_parent != child->d_parent || 1194 (!locked && read_seqretry(&rename_lock, seq))) { 1195 spin_unlock(&this_parent->d_lock); 1196 rcu_read_unlock(); 1197 goto rename_retry; 1198 } 1199 rcu_read_unlock(); 1200 next = child->d_u.d_child.next; 1201 goto resume; 1202 } 1203 out: 1204 spin_unlock(&this_parent->d_lock); 1205 if (!locked && read_seqretry(&rename_lock, seq)) 1206 goto rename_retry; 1207 if (locked) 1208 write_sequnlock(&rename_lock); 1209 return found; 1210 1211 rename_retry: 1212 if (found) 1213 return found; 1214 locked = 1; 1215 write_seqlock(&rename_lock); 1216 goto again; 1217 } 1218 1219 /** 1220 * shrink_dcache_parent - prune dcache 1221 * @parent: parent of entries to prune 1222 * 1223 * Prune the dcache to remove unused children of the parent dentry. 1224 */ 1225 1226 void shrink_dcache_parent(struct dentry * parent) 1227 { 1228 struct super_block *sb = parent->d_sb; 1229 int found; 1230 1231 while ((found = select_parent(parent)) != 0) 1232 __shrink_dcache_sb(sb, &found, 0); 1233 } 1234 EXPORT_SYMBOL(shrink_dcache_parent); 1235 1236 /* 1237 * Scan `nr' dentries and return the number which remain. 1238 * 1239 * We need to avoid reentering the filesystem if the caller is performing a 1240 * GFP_NOFS allocation attempt. One example deadlock is: 1241 * 1242 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> 1243 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> 1244 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. 1245 * 1246 * In this case we return -1 to tell the caller that we baled. 1247 */ 1248 static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1249 { 1250 if (nr) { 1251 if (!(gfp_mask & __GFP_FS)) 1252 return -1; 1253 prune_dcache(nr); 1254 } 1255 1256 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 1257 } 1258 1259 static struct shrinker dcache_shrinker = { 1260 .shrink = shrink_dcache_memory, 1261 .seeks = DEFAULT_SEEKS, 1262 }; 1263 1264 /** 1265 * d_alloc - allocate a dcache entry 1266 * @parent: parent of entry to allocate 1267 * @name: qstr of the name 1268 * 1269 * Allocates a dentry. It returns %NULL if there is insufficient memory 1270 * available. On a success the dentry is returned. The name passed in is 1271 * copied and the copy passed in may be reused after this call. 1272 */ 1273 1274 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 1275 { 1276 struct dentry *dentry; 1277 char *dname; 1278 1279 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 1280 if (!dentry) 1281 return NULL; 1282 1283 if (name->len > DNAME_INLINE_LEN-1) { 1284 dname = kmalloc(name->len + 1, GFP_KERNEL); 1285 if (!dname) { 1286 kmem_cache_free(dentry_cache, dentry); 1287 return NULL; 1288 } 1289 } else { 1290 dname = dentry->d_iname; 1291 } 1292 dentry->d_name.name = dname; 1293 1294 dentry->d_name.len = name->len; 1295 dentry->d_name.hash = name->hash; 1296 memcpy(dname, name->name, name->len); 1297 dname[name->len] = 0; 1298 1299 dentry->d_count = 1; 1300 dentry->d_flags = DCACHE_UNHASHED; 1301 spin_lock_init(&dentry->d_lock); 1302 seqcount_init(&dentry->d_seq); 1303 dentry->d_inode = NULL; 1304 dentry->d_parent = NULL; 1305 dentry->d_sb = NULL; 1306 dentry->d_op = NULL; 1307 dentry->d_fsdata = NULL; 1308 INIT_HLIST_BL_NODE(&dentry->d_hash); 1309 INIT_LIST_HEAD(&dentry->d_lru); 1310 INIT_LIST_HEAD(&dentry->d_subdirs); 1311 INIT_LIST_HEAD(&dentry->d_alias); 1312 INIT_LIST_HEAD(&dentry->d_u.d_child); 1313 1314 if (parent) { 1315 spin_lock(&parent->d_lock); 1316 /* 1317 * don't need child lock because it is not subject 1318 * to concurrency here 1319 */ 1320 __dget_dlock(parent); 1321 dentry->d_parent = parent; 1322 dentry->d_sb = parent->d_sb; 1323 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1324 spin_unlock(&parent->d_lock); 1325 } 1326 1327 this_cpu_inc(nr_dentry); 1328 1329 return dentry; 1330 } 1331 EXPORT_SYMBOL(d_alloc); 1332 1333 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 1334 { 1335 struct dentry *dentry = d_alloc(NULL, name); 1336 if (dentry) { 1337 dentry->d_sb = sb; 1338 dentry->d_parent = dentry; 1339 dentry->d_flags |= DCACHE_DISCONNECTED; 1340 } 1341 return dentry; 1342 } 1343 EXPORT_SYMBOL(d_alloc_pseudo); 1344 1345 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1346 { 1347 struct qstr q; 1348 1349 q.name = name; 1350 q.len = strlen(name); 1351 q.hash = full_name_hash(q.name, q.len); 1352 return d_alloc(parent, &q); 1353 } 1354 EXPORT_SYMBOL(d_alloc_name); 1355 1356 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) 1357 { 1358 BUG_ON(dentry->d_op); 1359 BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | 1360 DCACHE_OP_COMPARE | 1361 DCACHE_OP_REVALIDATE | 1362 DCACHE_OP_DELETE )); 1363 dentry->d_op = op; 1364 if (!op) 1365 return; 1366 if (op->d_hash) 1367 dentry->d_flags |= DCACHE_OP_HASH; 1368 if (op->d_compare) 1369 dentry->d_flags |= DCACHE_OP_COMPARE; 1370 if (op->d_revalidate) 1371 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1372 if (op->d_delete) 1373 dentry->d_flags |= DCACHE_OP_DELETE; 1374 1375 } 1376 EXPORT_SYMBOL(d_set_d_op); 1377 1378 static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1379 { 1380 spin_lock(&dentry->d_lock); 1381 if (inode) 1382 list_add(&dentry->d_alias, &inode->i_dentry); 1383 dentry->d_inode = inode; 1384 dentry_rcuwalk_barrier(dentry); 1385 spin_unlock(&dentry->d_lock); 1386 fsnotify_d_instantiate(dentry, inode); 1387 } 1388 1389 /** 1390 * d_instantiate - fill in inode information for a dentry 1391 * @entry: dentry to complete 1392 * @inode: inode to attach to this dentry 1393 * 1394 * Fill in inode information in the entry. 1395 * 1396 * This turns negative dentries into productive full members 1397 * of society. 1398 * 1399 * NOTE! This assumes that the inode count has been incremented 1400 * (or otherwise set) by the caller to indicate that it is now 1401 * in use by the dcache. 1402 */ 1403 1404 void d_instantiate(struct dentry *entry, struct inode * inode) 1405 { 1406 BUG_ON(!list_empty(&entry->d_alias)); 1407 if (inode) 1408 spin_lock(&inode->i_lock); 1409 __d_instantiate(entry, inode); 1410 if (inode) 1411 spin_unlock(&inode->i_lock); 1412 security_d_instantiate(entry, inode); 1413 } 1414 EXPORT_SYMBOL(d_instantiate); 1415 1416 /** 1417 * d_instantiate_unique - instantiate a non-aliased dentry 1418 * @entry: dentry to instantiate 1419 * @inode: inode to attach to this dentry 1420 * 1421 * Fill in inode information in the entry. On success, it returns NULL. 1422 * If an unhashed alias of "entry" already exists, then we return the 1423 * aliased dentry instead and drop one reference to inode. 1424 * 1425 * Note that in order to avoid conflicts with rename() etc, the caller 1426 * had better be holding the parent directory semaphore. 1427 * 1428 * This also assumes that the inode count has been incremented 1429 * (or otherwise set) by the caller to indicate that it is now 1430 * in use by the dcache. 1431 */ 1432 static struct dentry *__d_instantiate_unique(struct dentry *entry, 1433 struct inode *inode) 1434 { 1435 struct dentry *alias; 1436 int len = entry->d_name.len; 1437 const char *name = entry->d_name.name; 1438 unsigned int hash = entry->d_name.hash; 1439 1440 if (!inode) { 1441 __d_instantiate(entry, NULL); 1442 return NULL; 1443 } 1444 1445 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1446 struct qstr *qstr = &alias->d_name; 1447 1448 /* 1449 * Don't need alias->d_lock here, because aliases with 1450 * d_parent == entry->d_parent are not subject to name or 1451 * parent changes, because the parent inode i_mutex is held. 1452 */ 1453 if (qstr->hash != hash) 1454 continue; 1455 if (alias->d_parent != entry->d_parent) 1456 continue; 1457 if (dentry_cmp(qstr->name, qstr->len, name, len)) 1458 continue; 1459 __dget(alias); 1460 return alias; 1461 } 1462 1463 __d_instantiate(entry, inode); 1464 return NULL; 1465 } 1466 1467 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 1468 { 1469 struct dentry *result; 1470 1471 BUG_ON(!list_empty(&entry->d_alias)); 1472 1473 if (inode) 1474 spin_lock(&inode->i_lock); 1475 result = __d_instantiate_unique(entry, inode); 1476 if (inode) 1477 spin_unlock(&inode->i_lock); 1478 1479 if (!result) { 1480 security_d_instantiate(entry, inode); 1481 return NULL; 1482 } 1483 1484 BUG_ON(!d_unhashed(result)); 1485 iput(inode); 1486 return result; 1487 } 1488 1489 EXPORT_SYMBOL(d_instantiate_unique); 1490 1491 /** 1492 * d_alloc_root - allocate root dentry 1493 * @root_inode: inode to allocate the root for 1494 * 1495 * Allocate a root ("/") dentry for the inode given. The inode is 1496 * instantiated and returned. %NULL is returned if there is insufficient 1497 * memory or the inode passed is %NULL. 1498 */ 1499 1500 struct dentry * d_alloc_root(struct inode * root_inode) 1501 { 1502 struct dentry *res = NULL; 1503 1504 if (root_inode) { 1505 static const struct qstr name = { .name = "/", .len = 1 }; 1506 1507 res = d_alloc(NULL, &name); 1508 if (res) { 1509 res->d_sb = root_inode->i_sb; 1510 res->d_parent = res; 1511 d_instantiate(res, root_inode); 1512 } 1513 } 1514 return res; 1515 } 1516 EXPORT_SYMBOL(d_alloc_root); 1517 1518 /** 1519 * d_obtain_alias - find or allocate a dentry for a given inode 1520 * @inode: inode to allocate the dentry for 1521 * 1522 * Obtain a dentry for an inode resulting from NFS filehandle conversion or 1523 * similar open by handle operations. The returned dentry may be anonymous, 1524 * or may have a full name (if the inode was already in the cache). 1525 * 1526 * When called on a directory inode, we must ensure that the inode only ever 1527 * has one dentry. If a dentry is found, that is returned instead of 1528 * allocating a new one. 1529 * 1530 * On successful return, the reference to the inode has been transferred 1531 * to the dentry. In case of an error the reference on the inode is released. 1532 * To make it easier to use in export operations a %NULL or IS_ERR inode may 1533 * be passed in and will be the error will be propagate to the return value, 1534 * with a %NULL @inode replaced by ERR_PTR(-ESTALE). 1535 */ 1536 struct dentry *d_obtain_alias(struct inode *inode) 1537 { 1538 static const struct qstr anonstring = { .name = "" }; 1539 struct dentry *tmp; 1540 struct dentry *res; 1541 1542 if (!inode) 1543 return ERR_PTR(-ESTALE); 1544 if (IS_ERR(inode)) 1545 return ERR_CAST(inode); 1546 1547 res = d_find_alias(inode); 1548 if (res) 1549 goto out_iput; 1550 1551 tmp = d_alloc(NULL, &anonstring); 1552 if (!tmp) { 1553 res = ERR_PTR(-ENOMEM); 1554 goto out_iput; 1555 } 1556 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1557 1558 1559 spin_lock(&inode->i_lock); 1560 res = __d_find_alias(inode, 0); 1561 if (res) { 1562 spin_unlock(&inode->i_lock); 1563 dput(tmp); 1564 goto out_iput; 1565 } 1566 1567 /* attach a disconnected dentry */ 1568 spin_lock(&tmp->d_lock); 1569 tmp->d_sb = inode->i_sb; 1570 tmp->d_inode = inode; 1571 tmp->d_flags |= DCACHE_DISCONNECTED; 1572 list_add(&tmp->d_alias, &inode->i_dentry); 1573 bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1574 tmp->d_flags &= ~DCACHE_UNHASHED; 1575 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1576 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1577 spin_unlock(&tmp->d_lock); 1578 spin_unlock(&inode->i_lock); 1579 1580 return tmp; 1581 1582 out_iput: 1583 iput(inode); 1584 return res; 1585 } 1586 EXPORT_SYMBOL(d_obtain_alias); 1587 1588 /** 1589 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1590 * @inode: the inode which may have a disconnected dentry 1591 * @dentry: a negative dentry which we want to point to the inode. 1592 * 1593 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 1594 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 1595 * and return it, else simply d_add the inode to the dentry and return NULL. 1596 * 1597 * This is needed in the lookup routine of any filesystem that is exportable 1598 * (via knfsd) so that we can build dcache paths to directories effectively. 1599 * 1600 * If a dentry was found and moved, then it is returned. Otherwise NULL 1601 * is returned. This matches the expected return value of ->lookup. 1602 * 1603 */ 1604 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1605 { 1606 struct dentry *new = NULL; 1607 1608 if (inode && S_ISDIR(inode->i_mode)) { 1609 spin_lock(&inode->i_lock); 1610 new = __d_find_alias(inode, 1); 1611 if (new) { 1612 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1613 spin_unlock(&inode->i_lock); 1614 security_d_instantiate(new, inode); 1615 d_move(new, dentry); 1616 iput(inode); 1617 } else { 1618 /* already taking inode->i_lock, so d_add() by hand */ 1619 __d_instantiate(dentry, inode); 1620 spin_unlock(&inode->i_lock); 1621 security_d_instantiate(dentry, inode); 1622 d_rehash(dentry); 1623 } 1624 } else 1625 d_add(dentry, inode); 1626 return new; 1627 } 1628 EXPORT_SYMBOL(d_splice_alias); 1629 1630 /** 1631 * d_add_ci - lookup or allocate new dentry with case-exact name 1632 * @inode: the inode case-insensitive lookup has found 1633 * @dentry: the negative dentry that was passed to the parent's lookup func 1634 * @name: the case-exact name to be associated with the returned dentry 1635 * 1636 * This is to avoid filling the dcache with case-insensitive names to the 1637 * same inode, only the actual correct case is stored in the dcache for 1638 * case-insensitive filesystems. 1639 * 1640 * For a case-insensitive lookup match and if the the case-exact dentry 1641 * already exists in in the dcache, use it and return it. 1642 * 1643 * If no entry exists with the exact case name, allocate new dentry with 1644 * the exact case, and return the spliced entry. 1645 */ 1646 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, 1647 struct qstr *name) 1648 { 1649 int error; 1650 struct dentry *found; 1651 struct dentry *new; 1652 1653 /* 1654 * First check if a dentry matching the name already exists, 1655 * if not go ahead and create it now. 1656 */ 1657 found = d_hash_and_lookup(dentry->d_parent, name); 1658 if (!found) { 1659 new = d_alloc(dentry->d_parent, name); 1660 if (!new) { 1661 error = -ENOMEM; 1662 goto err_out; 1663 } 1664 1665 found = d_splice_alias(inode, new); 1666 if (found) { 1667 dput(new); 1668 return found; 1669 } 1670 return new; 1671 } 1672 1673 /* 1674 * If a matching dentry exists, and it's not negative use it. 1675 * 1676 * Decrement the reference count to balance the iget() done 1677 * earlier on. 1678 */ 1679 if (found->d_inode) { 1680 if (unlikely(found->d_inode != inode)) { 1681 /* This can't happen because bad inodes are unhashed. */ 1682 BUG_ON(!is_bad_inode(inode)); 1683 BUG_ON(!is_bad_inode(found->d_inode)); 1684 } 1685 iput(inode); 1686 return found; 1687 } 1688 1689 /* 1690 * Negative dentry: instantiate it unless the inode is a directory and 1691 * already has a dentry. 1692 */ 1693 spin_lock(&inode->i_lock); 1694 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1695 __d_instantiate(found, inode); 1696 spin_unlock(&inode->i_lock); 1697 security_d_instantiate(found, inode); 1698 return found; 1699 } 1700 1701 /* 1702 * In case a directory already has a (disconnected) entry grab a 1703 * reference to it, move it in place and use it. 1704 */ 1705 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1706 __dget(new); 1707 spin_unlock(&inode->i_lock); 1708 security_d_instantiate(found, inode); 1709 d_move(new, found); 1710 iput(inode); 1711 dput(found); 1712 return new; 1713 1714 err_out: 1715 iput(inode); 1716 return ERR_PTR(error); 1717 } 1718 EXPORT_SYMBOL(d_add_ci); 1719 1720 /** 1721 * __d_lookup_rcu - search for a dentry (racy, store-free) 1722 * @parent: parent dentry 1723 * @name: qstr of name we wish to find 1724 * @seq: returns d_seq value at the point where the dentry was found 1725 * @inode: returns dentry->d_inode when the inode was found valid. 1726 * Returns: dentry, or NULL 1727 * 1728 * __d_lookup_rcu is the dcache lookup function for rcu-walk name 1729 * resolution (store-free path walking) design described in 1730 * Documentation/filesystems/path-lookup.txt. 1731 * 1732 * This is not to be used outside core vfs. 1733 * 1734 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock 1735 * held, and rcu_read_lock held. The returned dentry must not be stored into 1736 * without taking d_lock and checking d_seq sequence count against @seq 1737 * returned here. 1738 * 1739 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount 1740 * function. 1741 * 1742 * Alternatively, __d_lookup_rcu may be called again to look up the child of 1743 * the returned dentry, so long as its parent's seqlock is checked after the 1744 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1745 * is formed, giving integrity down the path walk. 1746 */ 1747 struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, 1748 unsigned *seq, struct inode **inode) 1749 { 1750 unsigned int len = name->len; 1751 unsigned int hash = name->hash; 1752 const unsigned char *str = name->name; 1753 struct dcache_hash_bucket *b = d_hash(parent, hash); 1754 struct hlist_bl_node *node; 1755 struct dentry *dentry; 1756 1757 /* 1758 * Note: There is significant duplication with __d_lookup_rcu which is 1759 * required to prevent single threaded performance regressions 1760 * especially on architectures where smp_rmb (in seqcounts) are costly. 1761 * Keep the two functions in sync. 1762 */ 1763 1764 /* 1765 * The hash list is protected using RCU. 1766 * 1767 * Carefully use d_seq when comparing a candidate dentry, to avoid 1768 * races with d_move(). 1769 * 1770 * It is possible that concurrent renames can mess up our list 1771 * walk here and result in missing our dentry, resulting in the 1772 * false-negative result. d_lookup() protects against concurrent 1773 * renames using rename_lock seqlock. 1774 * 1775 * See Documentation/vfs/dcache-locking.txt for more details. 1776 */ 1777 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1778 struct inode *i; 1779 const char *tname; 1780 int tlen; 1781 1782 if (dentry->d_name.hash != hash) 1783 continue; 1784 1785 seqretry: 1786 *seq = read_seqcount_begin(&dentry->d_seq); 1787 if (dentry->d_parent != parent) 1788 continue; 1789 if (d_unhashed(dentry)) 1790 continue; 1791 tlen = dentry->d_name.len; 1792 tname = dentry->d_name.name; 1793 i = dentry->d_inode; 1794 prefetch(tname); 1795 if (i) 1796 prefetch(i); 1797 /* 1798 * This seqcount check is required to ensure name and 1799 * len are loaded atomically, so as not to walk off the 1800 * edge of memory when walking. If we could load this 1801 * atomically some other way, we could drop this check. 1802 */ 1803 if (read_seqcount_retry(&dentry->d_seq, *seq)) 1804 goto seqretry; 1805 if (parent->d_flags & DCACHE_OP_COMPARE) { 1806 if (parent->d_op->d_compare(parent, *inode, 1807 dentry, i, 1808 tlen, tname, name)) 1809 continue; 1810 } else { 1811 if (dentry_cmp(tname, tlen, str, len)) 1812 continue; 1813 } 1814 /* 1815 * No extra seqcount check is required after the name 1816 * compare. The caller must perform a seqcount check in 1817 * order to do anything useful with the returned dentry 1818 * anyway. 1819 */ 1820 *inode = i; 1821 return dentry; 1822 } 1823 return NULL; 1824 } 1825 1826 /** 1827 * d_lookup - search for a dentry 1828 * @parent: parent dentry 1829 * @name: qstr of name we wish to find 1830 * Returns: dentry, or NULL 1831 * 1832 * d_lookup searches the children of the parent dentry for the name in 1833 * question. If the dentry is found its reference count is incremented and the 1834 * dentry is returned. The caller must use dput to free the entry when it has 1835 * finished using it. %NULL is returned if the dentry does not exist. 1836 */ 1837 struct dentry *d_lookup(struct dentry *parent, struct qstr *name) 1838 { 1839 struct dentry *dentry; 1840 unsigned seq; 1841 1842 do { 1843 seq = read_seqbegin(&rename_lock); 1844 dentry = __d_lookup(parent, name); 1845 if (dentry) 1846 break; 1847 } while (read_seqretry(&rename_lock, seq)); 1848 return dentry; 1849 } 1850 EXPORT_SYMBOL(d_lookup); 1851 1852 /** 1853 * __d_lookup - search for a dentry (racy) 1854 * @parent: parent dentry 1855 * @name: qstr of name we wish to find 1856 * Returns: dentry, or NULL 1857 * 1858 * __d_lookup is like d_lookup, however it may (rarely) return a 1859 * false-negative result due to unrelated rename activity. 1860 * 1861 * __d_lookup is slightly faster by avoiding rename_lock read seqlock, 1862 * however it must be used carefully, eg. with a following d_lookup in 1863 * the case of failure. 1864 * 1865 * __d_lookup callers must be commented. 1866 */ 1867 struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) 1868 { 1869 unsigned int len = name->len; 1870 unsigned int hash = name->hash; 1871 const unsigned char *str = name->name; 1872 struct dcache_hash_bucket *b = d_hash(parent, hash); 1873 struct hlist_bl_node *node; 1874 struct dentry *found = NULL; 1875 struct dentry *dentry; 1876 1877 /* 1878 * Note: There is significant duplication with __d_lookup_rcu which is 1879 * required to prevent single threaded performance regressions 1880 * especially on architectures where smp_rmb (in seqcounts) are costly. 1881 * Keep the two functions in sync. 1882 */ 1883 1884 /* 1885 * The hash list is protected using RCU. 1886 * 1887 * Take d_lock when comparing a candidate dentry, to avoid races 1888 * with d_move(). 1889 * 1890 * It is possible that concurrent renames can mess up our list 1891 * walk here and result in missing our dentry, resulting in the 1892 * false-negative result. d_lookup() protects against concurrent 1893 * renames using rename_lock seqlock. 1894 * 1895 * See Documentation/vfs/dcache-locking.txt for more details. 1896 */ 1897 rcu_read_lock(); 1898 1899 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1900 const char *tname; 1901 int tlen; 1902 1903 if (dentry->d_name.hash != hash) 1904 continue; 1905 1906 spin_lock(&dentry->d_lock); 1907 if (dentry->d_parent != parent) 1908 goto next; 1909 if (d_unhashed(dentry)) 1910 goto next; 1911 1912 /* 1913 * It is safe to compare names since d_move() cannot 1914 * change the qstr (protected by d_lock). 1915 */ 1916 tlen = dentry->d_name.len; 1917 tname = dentry->d_name.name; 1918 if (parent->d_flags & DCACHE_OP_COMPARE) { 1919 if (parent->d_op->d_compare(parent, parent->d_inode, 1920 dentry, dentry->d_inode, 1921 tlen, tname, name)) 1922 goto next; 1923 } else { 1924 if (dentry_cmp(tname, tlen, str, len)) 1925 goto next; 1926 } 1927 1928 dentry->d_count++; 1929 found = dentry; 1930 spin_unlock(&dentry->d_lock); 1931 break; 1932 next: 1933 spin_unlock(&dentry->d_lock); 1934 } 1935 rcu_read_unlock(); 1936 1937 return found; 1938 } 1939 1940 /** 1941 * d_hash_and_lookup - hash the qstr then search for a dentry 1942 * @dir: Directory to search in 1943 * @name: qstr of name we wish to find 1944 * 1945 * On hash failure or on lookup failure NULL is returned. 1946 */ 1947 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1948 { 1949 struct dentry *dentry = NULL; 1950 1951 /* 1952 * Check for a fs-specific hash function. Note that we must 1953 * calculate the standard hash first, as the d_op->d_hash() 1954 * routine may choose to leave the hash value unchanged. 1955 */ 1956 name->hash = full_name_hash(name->name, name->len); 1957 if (dir->d_flags & DCACHE_OP_HASH) { 1958 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) 1959 goto out; 1960 } 1961 dentry = d_lookup(dir, name); 1962 out: 1963 return dentry; 1964 } 1965 1966 /** 1967 * d_validate - verify dentry provided from insecure source (deprecated) 1968 * @dentry: The dentry alleged to be valid child of @dparent 1969 * @dparent: The parent dentry (known to be valid) 1970 * 1971 * An insecure source has sent us a dentry, here we verify it and dget() it. 1972 * This is used by ncpfs in its readdir implementation. 1973 * Zero is returned in the dentry is invalid. 1974 * 1975 * This function is slow for big directories, and deprecated, do not use it. 1976 */ 1977 int d_validate(struct dentry *dentry, struct dentry *dparent) 1978 { 1979 struct dentry *child; 1980 1981 spin_lock(&dparent->d_lock); 1982 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { 1983 if (dentry == child) { 1984 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1985 __dget_dlock(dentry); 1986 spin_unlock(&dentry->d_lock); 1987 spin_unlock(&dparent->d_lock); 1988 return 1; 1989 } 1990 } 1991 spin_unlock(&dparent->d_lock); 1992 1993 return 0; 1994 } 1995 EXPORT_SYMBOL(d_validate); 1996 1997 /* 1998 * When a file is deleted, we have two options: 1999 * - turn this dentry into a negative dentry 2000 * - unhash this dentry and free it. 2001 * 2002 * Usually, we want to just turn this into 2003 * a negative dentry, but if anybody else is 2004 * currently using the dentry or the inode 2005 * we can't do that and we fall back on removing 2006 * it from the hash queues and waiting for 2007 * it to be deleted later when it has no users 2008 */ 2009 2010 /** 2011 * d_delete - delete a dentry 2012 * @dentry: The dentry to delete 2013 * 2014 * Turn the dentry into a negative dentry if possible, otherwise 2015 * remove it from the hash queues so it can be deleted later 2016 */ 2017 2018 void d_delete(struct dentry * dentry) 2019 { 2020 struct inode *inode; 2021 int isdir = 0; 2022 /* 2023 * Are we the only user? 2024 */ 2025 again: 2026 spin_lock(&dentry->d_lock); 2027 inode = dentry->d_inode; 2028 isdir = S_ISDIR(inode->i_mode); 2029 if (dentry->d_count == 1) { 2030 if (inode && !spin_trylock(&inode->i_lock)) { 2031 spin_unlock(&dentry->d_lock); 2032 cpu_relax(); 2033 goto again; 2034 } 2035 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2036 dentry_unlink_inode(dentry); 2037 fsnotify_nameremove(dentry, isdir); 2038 return; 2039 } 2040 2041 if (!d_unhashed(dentry)) 2042 __d_drop(dentry); 2043 2044 spin_unlock(&dentry->d_lock); 2045 2046 fsnotify_nameremove(dentry, isdir); 2047 } 2048 EXPORT_SYMBOL(d_delete); 2049 2050 static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) 2051 { 2052 BUG_ON(!d_unhashed(entry)); 2053 spin_lock_bucket(b); 2054 entry->d_flags &= ~DCACHE_UNHASHED; 2055 hlist_bl_add_head_rcu(&entry->d_hash, &b->head); 2056 spin_unlock_bucket(b); 2057 } 2058 2059 static void _d_rehash(struct dentry * entry) 2060 { 2061 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); 2062 } 2063 2064 /** 2065 * d_rehash - add an entry back to the hash 2066 * @entry: dentry to add to the hash 2067 * 2068 * Adds a dentry to the hash according to its name. 2069 */ 2070 2071 void d_rehash(struct dentry * entry) 2072 { 2073 spin_lock(&entry->d_lock); 2074 _d_rehash(entry); 2075 spin_unlock(&entry->d_lock); 2076 } 2077 EXPORT_SYMBOL(d_rehash); 2078 2079 /** 2080 * dentry_update_name_case - update case insensitive dentry with a new name 2081 * @dentry: dentry to be updated 2082 * @name: new name 2083 * 2084 * Update a case insensitive dentry with new case of name. 2085 * 2086 * dentry must have been returned by d_lookup with name @name. Old and new 2087 * name lengths must match (ie. no d_compare which allows mismatched name 2088 * lengths). 2089 * 2090 * Parent inode i_mutex must be held over d_lookup and into this call (to 2091 * keep renames and concurrent inserts, and readdir(2) away). 2092 */ 2093 void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 2094 { 2095 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2096 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 2097 2098 spin_lock(&dentry->d_lock); 2099 write_seqcount_begin(&dentry->d_seq); 2100 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); 2101 write_seqcount_end(&dentry->d_seq); 2102 spin_unlock(&dentry->d_lock); 2103 } 2104 EXPORT_SYMBOL(dentry_update_name_case); 2105 2106 static void switch_names(struct dentry *dentry, struct dentry *target) 2107 { 2108 if (dname_external(target)) { 2109 if (dname_external(dentry)) { 2110 /* 2111 * Both external: swap the pointers 2112 */ 2113 swap(target->d_name.name, dentry->d_name.name); 2114 } else { 2115 /* 2116 * dentry:internal, target:external. Steal target's 2117 * storage and make target internal. 2118 */ 2119 memcpy(target->d_iname, dentry->d_name.name, 2120 dentry->d_name.len + 1); 2121 dentry->d_name.name = target->d_name.name; 2122 target->d_name.name = target->d_iname; 2123 } 2124 } else { 2125 if (dname_external(dentry)) { 2126 /* 2127 * dentry:external, target:internal. Give dentry's 2128 * storage to target and make dentry internal 2129 */ 2130 memcpy(dentry->d_iname, target->d_name.name, 2131 target->d_name.len + 1); 2132 target->d_name.name = dentry->d_name.name; 2133 dentry->d_name.name = dentry->d_iname; 2134 } else { 2135 /* 2136 * Both are internal. Just copy target to dentry 2137 */ 2138 memcpy(dentry->d_iname, target->d_name.name, 2139 target->d_name.len + 1); 2140 dentry->d_name.len = target->d_name.len; 2141 return; 2142 } 2143 } 2144 swap(dentry->d_name.len, target->d_name.len); 2145 } 2146 2147 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) 2148 { 2149 /* 2150 * XXXX: do we really need to take target->d_lock? 2151 */ 2152 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) 2153 spin_lock(&target->d_parent->d_lock); 2154 else { 2155 if (d_ancestor(dentry->d_parent, target->d_parent)) { 2156 spin_lock(&dentry->d_parent->d_lock); 2157 spin_lock_nested(&target->d_parent->d_lock, 2158 DENTRY_D_LOCK_NESTED); 2159 } else { 2160 spin_lock(&target->d_parent->d_lock); 2161 spin_lock_nested(&dentry->d_parent->d_lock, 2162 DENTRY_D_LOCK_NESTED); 2163 } 2164 } 2165 if (target < dentry) { 2166 spin_lock_nested(&target->d_lock, 2); 2167 spin_lock_nested(&dentry->d_lock, 3); 2168 } else { 2169 spin_lock_nested(&dentry->d_lock, 2); 2170 spin_lock_nested(&target->d_lock, 3); 2171 } 2172 } 2173 2174 static void dentry_unlock_parents_for_move(struct dentry *dentry, 2175 struct dentry *target) 2176 { 2177 if (target->d_parent != dentry->d_parent) 2178 spin_unlock(&dentry->d_parent->d_lock); 2179 if (target->d_parent != target) 2180 spin_unlock(&target->d_parent->d_lock); 2181 } 2182 2183 /* 2184 * When switching names, the actual string doesn't strictly have to 2185 * be preserved in the target - because we're dropping the target 2186 * anyway. As such, we can just do a simple memcpy() to copy over 2187 * the new name before we switch. 2188 * 2189 * Note that we have to be a lot more careful about getting the hash 2190 * switched - we have to switch the hash value properly even if it 2191 * then no longer matches the actual (corrupted) string of the target. 2192 * The hash value has to match the hash queue that the dentry is on.. 2193 */ 2194 /* 2195 * d_move - move a dentry 2196 * @dentry: entry to move 2197 * @target: new dentry 2198 * 2199 * Update the dcache to reflect the move of a file name. Negative 2200 * dcache entries should not be moved in this way. 2201 */ 2202 void d_move(struct dentry * dentry, struct dentry * target) 2203 { 2204 if (!dentry->d_inode) 2205 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2206 2207 BUG_ON(d_ancestor(dentry, target)); 2208 BUG_ON(d_ancestor(target, dentry)); 2209 2210 write_seqlock(&rename_lock); 2211 2212 dentry_lock_for_move(dentry, target); 2213 2214 write_seqcount_begin(&dentry->d_seq); 2215 write_seqcount_begin(&target->d_seq); 2216 2217 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ 2218 2219 /* 2220 * Move the dentry to the target hash queue. Don't bother checking 2221 * for the same hash queue because of how unlikely it is. 2222 */ 2223 __d_drop(dentry); 2224 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); 2225 2226 /* Unhash the target: dput() will then get rid of it */ 2227 __d_drop(target); 2228 2229 list_del(&dentry->d_u.d_child); 2230 list_del(&target->d_u.d_child); 2231 2232 /* Switch the names.. */ 2233 switch_names(dentry, target); 2234 swap(dentry->d_name.hash, target->d_name.hash); 2235 2236 /* ... and switch the parents */ 2237 if (IS_ROOT(dentry)) { 2238 dentry->d_parent = target->d_parent; 2239 target->d_parent = target; 2240 INIT_LIST_HEAD(&target->d_u.d_child); 2241 } else { 2242 swap(dentry->d_parent, target->d_parent); 2243 2244 /* And add them back to the (new) parent lists */ 2245 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 2246 } 2247 2248 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2249 2250 write_seqcount_end(&target->d_seq); 2251 write_seqcount_end(&dentry->d_seq); 2252 2253 dentry_unlock_parents_for_move(dentry, target); 2254 spin_unlock(&target->d_lock); 2255 fsnotify_d_move(dentry); 2256 spin_unlock(&dentry->d_lock); 2257 write_sequnlock(&rename_lock); 2258 } 2259 EXPORT_SYMBOL(d_move); 2260 2261 /** 2262 * d_ancestor - search for an ancestor 2263 * @p1: ancestor dentry 2264 * @p2: child dentry 2265 * 2266 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is 2267 * an ancestor of p2, else NULL. 2268 */ 2269 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) 2270 { 2271 struct dentry *p; 2272 2273 for (p = p2; !IS_ROOT(p); p = p->d_parent) { 2274 if (p->d_parent == p1) 2275 return p; 2276 } 2277 return NULL; 2278 } 2279 2280 /* 2281 * This helper attempts to cope with remotely renamed directories 2282 * 2283 * It assumes that the caller is already holding 2284 * dentry->d_parent->d_inode->i_mutex and the inode->i_lock 2285 * 2286 * Note: If ever the locking in lock_rename() changes, then please 2287 * remember to update this too... 2288 */ 2289 static struct dentry *__d_unalias(struct inode *inode, 2290 struct dentry *dentry, struct dentry *alias) 2291 { 2292 struct mutex *m1 = NULL, *m2 = NULL; 2293 struct dentry *ret; 2294 2295 /* If alias and dentry share a parent, then no extra locks required */ 2296 if (alias->d_parent == dentry->d_parent) 2297 goto out_unalias; 2298 2299 /* Check for loops */ 2300 ret = ERR_PTR(-ELOOP); 2301 if (d_ancestor(alias, dentry)) 2302 goto out_err; 2303 2304 /* See lock_rename() */ 2305 ret = ERR_PTR(-EBUSY); 2306 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 2307 goto out_err; 2308 m1 = &dentry->d_sb->s_vfs_rename_mutex; 2309 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) 2310 goto out_err; 2311 m2 = &alias->d_parent->d_inode->i_mutex; 2312 out_unalias: 2313 d_move(alias, dentry); 2314 ret = alias; 2315 out_err: 2316 spin_unlock(&inode->i_lock); 2317 if (m2) 2318 mutex_unlock(m2); 2319 if (m1) 2320 mutex_unlock(m1); 2321 return ret; 2322 } 2323 2324 /* 2325 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2326 * named dentry in place of the dentry to be replaced. 2327 * returns with anon->d_lock held! 2328 */ 2329 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2330 { 2331 struct dentry *dparent, *aparent; 2332 2333 dentry_lock_for_move(anon, dentry); 2334 2335 write_seqcount_begin(&dentry->d_seq); 2336 write_seqcount_begin(&anon->d_seq); 2337 2338 dparent = dentry->d_parent; 2339 aparent = anon->d_parent; 2340 2341 switch_names(dentry, anon); 2342 swap(dentry->d_name.hash, anon->d_name.hash); 2343 2344 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2345 list_del(&dentry->d_u.d_child); 2346 if (!IS_ROOT(dentry)) 2347 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2348 else 2349 INIT_LIST_HEAD(&dentry->d_u.d_child); 2350 2351 anon->d_parent = (dparent == dentry) ? anon : dparent; 2352 list_del(&anon->d_u.d_child); 2353 if (!IS_ROOT(anon)) 2354 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); 2355 else 2356 INIT_LIST_HEAD(&anon->d_u.d_child); 2357 2358 write_seqcount_end(&dentry->d_seq); 2359 write_seqcount_end(&anon->d_seq); 2360 2361 dentry_unlock_parents_for_move(anon, dentry); 2362 spin_unlock(&dentry->d_lock); 2363 2364 /* anon->d_lock still locked, returns locked */ 2365 anon->d_flags &= ~DCACHE_DISCONNECTED; 2366 } 2367 2368 /** 2369 * d_materialise_unique - introduce an inode into the tree 2370 * @dentry: candidate dentry 2371 * @inode: inode to bind to the dentry, to which aliases may be attached 2372 * 2373 * Introduces an dentry into the tree, substituting an extant disconnected 2374 * root directory alias in its place if there is one 2375 */ 2376 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) 2377 { 2378 struct dentry *actual; 2379 2380 BUG_ON(!d_unhashed(dentry)); 2381 2382 if (!inode) { 2383 actual = dentry; 2384 __d_instantiate(dentry, NULL); 2385 d_rehash(actual); 2386 goto out_nolock; 2387 } 2388 2389 spin_lock(&inode->i_lock); 2390 2391 if (S_ISDIR(inode->i_mode)) { 2392 struct dentry *alias; 2393 2394 /* Does an aliased dentry already exist? */ 2395 alias = __d_find_alias(inode, 0); 2396 if (alias) { 2397 actual = alias; 2398 /* Is this an anonymous mountpoint that we could splice 2399 * into our tree? */ 2400 if (IS_ROOT(alias)) { 2401 __d_materialise_dentry(dentry, alias); 2402 __d_drop(alias); 2403 goto found; 2404 } 2405 /* Nope, but we must(!) avoid directory aliasing */ 2406 actual = __d_unalias(inode, dentry, alias); 2407 if (IS_ERR(actual)) 2408 dput(alias); 2409 goto out_nolock; 2410 } 2411 } 2412 2413 /* Add a unique reference */ 2414 actual = __d_instantiate_unique(dentry, inode); 2415 if (!actual) 2416 actual = dentry; 2417 else 2418 BUG_ON(!d_unhashed(actual)); 2419 2420 spin_lock(&actual->d_lock); 2421 found: 2422 _d_rehash(actual); 2423 spin_unlock(&actual->d_lock); 2424 spin_unlock(&inode->i_lock); 2425 out_nolock: 2426 if (actual == dentry) { 2427 security_d_instantiate(dentry, inode); 2428 return NULL; 2429 } 2430 2431 iput(inode); 2432 return actual; 2433 } 2434 EXPORT_SYMBOL_GPL(d_materialise_unique); 2435 2436 static int prepend(char **buffer, int *buflen, const char *str, int namelen) 2437 { 2438 *buflen -= namelen; 2439 if (*buflen < 0) 2440 return -ENAMETOOLONG; 2441 *buffer -= namelen; 2442 memcpy(*buffer, str, namelen); 2443 return 0; 2444 } 2445 2446 static int prepend_name(char **buffer, int *buflen, struct qstr *name) 2447 { 2448 return prepend(buffer, buflen, name->name, name->len); 2449 } 2450 2451 /** 2452 * Prepend path string to a buffer 2453 * 2454 * @path: the dentry/vfsmount to report 2455 * @root: root vfsmnt/dentry (may be modified by this function) 2456 * @buffer: pointer to the end of the buffer 2457 * @buflen: pointer to buffer length 2458 * 2459 * Caller holds the rename_lock. 2460 * 2461 * If path is not reachable from the supplied root, then the value of 2462 * root is changed (without modifying refcounts). 2463 */ 2464 static int prepend_path(const struct path *path, struct path *root, 2465 char **buffer, int *buflen) 2466 { 2467 struct dentry *dentry = path->dentry; 2468 struct vfsmount *vfsmnt = path->mnt; 2469 bool slash = false; 2470 int error = 0; 2471 2472 br_read_lock(vfsmount_lock); 2473 while (dentry != root->dentry || vfsmnt != root->mnt) { 2474 struct dentry * parent; 2475 2476 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2477 /* Global root? */ 2478 if (vfsmnt->mnt_parent == vfsmnt) { 2479 goto global_root; 2480 } 2481 dentry = vfsmnt->mnt_mountpoint; 2482 vfsmnt = vfsmnt->mnt_parent; 2483 continue; 2484 } 2485 parent = dentry->d_parent; 2486 prefetch(parent); 2487 spin_lock(&dentry->d_lock); 2488 error = prepend_name(buffer, buflen, &dentry->d_name); 2489 spin_unlock(&dentry->d_lock); 2490 if (!error) 2491 error = prepend(buffer, buflen, "/", 1); 2492 if (error) 2493 break; 2494 2495 slash = true; 2496 dentry = parent; 2497 } 2498 2499 out: 2500 if (!error && !slash) 2501 error = prepend(buffer, buflen, "/", 1); 2502 2503 br_read_unlock(vfsmount_lock); 2504 return error; 2505 2506 global_root: 2507 /* 2508 * Filesystems needing to implement special "root names" 2509 * should do so with ->d_dname() 2510 */ 2511 if (IS_ROOT(dentry) && 2512 (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { 2513 WARN(1, "Root dentry has weird name <%.*s>\n", 2514 (int) dentry->d_name.len, dentry->d_name.name); 2515 } 2516 root->mnt = vfsmnt; 2517 root->dentry = dentry; 2518 goto out; 2519 } 2520 2521 /** 2522 * __d_path - return the path of a dentry 2523 * @path: the dentry/vfsmount to report 2524 * @root: root vfsmnt/dentry (may be modified by this function) 2525 * @buf: buffer to return value in 2526 * @buflen: buffer length 2527 * 2528 * Convert a dentry into an ASCII path name. 2529 * 2530 * Returns a pointer into the buffer or an error code if the 2531 * path was too long. 2532 * 2533 * "buflen" should be positive. 2534 * 2535 * If path is not reachable from the supplied root, then the value of 2536 * root is changed (without modifying refcounts). 2537 */ 2538 char *__d_path(const struct path *path, struct path *root, 2539 char *buf, int buflen) 2540 { 2541 char *res = buf + buflen; 2542 int error; 2543 2544 prepend(&res, &buflen, "\0", 1); 2545 write_seqlock(&rename_lock); 2546 error = prepend_path(path, root, &res, &buflen); 2547 write_sequnlock(&rename_lock); 2548 2549 if (error) 2550 return ERR_PTR(error); 2551 return res; 2552 } 2553 2554 /* 2555 * same as __d_path but appends "(deleted)" for unlinked files. 2556 */ 2557 static int path_with_deleted(const struct path *path, struct path *root, 2558 char **buf, int *buflen) 2559 { 2560 prepend(buf, buflen, "\0", 1); 2561 if (d_unlinked(path->dentry)) { 2562 int error = prepend(buf, buflen, " (deleted)", 10); 2563 if (error) 2564 return error; 2565 } 2566 2567 return prepend_path(path, root, buf, buflen); 2568 } 2569 2570 static int prepend_unreachable(char **buffer, int *buflen) 2571 { 2572 return prepend(buffer, buflen, "(unreachable)", 13); 2573 } 2574 2575 /** 2576 * d_path - return the path of a dentry 2577 * @path: path to report 2578 * @buf: buffer to return value in 2579 * @buflen: buffer length 2580 * 2581 * Convert a dentry into an ASCII path name. If the entry has been deleted 2582 * the string " (deleted)" is appended. Note that this is ambiguous. 2583 * 2584 * Returns a pointer into the buffer or an error code if the path was 2585 * too long. Note: Callers should use the returned pointer, not the passed 2586 * in buffer, to use the name! The implementation often starts at an offset 2587 * into the buffer, and may leave 0 bytes at the start. 2588 * 2589 * "buflen" should be positive. 2590 */ 2591 char *d_path(const struct path *path, char *buf, int buflen) 2592 { 2593 char *res = buf + buflen; 2594 struct path root; 2595 struct path tmp; 2596 int error; 2597 2598 /* 2599 * We have various synthetic filesystems that never get mounted. On 2600 * these filesystems dentries are never used for lookup purposes, and 2601 * thus don't need to be hashed. They also don't need a name until a 2602 * user wants to identify the object in /proc/pid/fd/. The little hack 2603 * below allows us to generate a name for these objects on demand: 2604 */ 2605 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2606 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2607 2608 get_fs_root(current->fs, &root); 2609 write_seqlock(&rename_lock); 2610 tmp = root; 2611 error = path_with_deleted(path, &tmp, &res, &buflen); 2612 if (error) 2613 res = ERR_PTR(error); 2614 write_sequnlock(&rename_lock); 2615 path_put(&root); 2616 return res; 2617 } 2618 EXPORT_SYMBOL(d_path); 2619 2620 /** 2621 * d_path_with_unreachable - return the path of a dentry 2622 * @path: path to report 2623 * @buf: buffer to return value in 2624 * @buflen: buffer length 2625 * 2626 * The difference from d_path() is that this prepends "(unreachable)" 2627 * to paths which are unreachable from the current process' root. 2628 */ 2629 char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) 2630 { 2631 char *res = buf + buflen; 2632 struct path root; 2633 struct path tmp; 2634 int error; 2635 2636 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2637 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2638 2639 get_fs_root(current->fs, &root); 2640 write_seqlock(&rename_lock); 2641 tmp = root; 2642 error = path_with_deleted(path, &tmp, &res, &buflen); 2643 if (!error && !path_equal(&tmp, &root)) 2644 error = prepend_unreachable(&res, &buflen); 2645 write_sequnlock(&rename_lock); 2646 path_put(&root); 2647 if (error) 2648 res = ERR_PTR(error); 2649 2650 return res; 2651 } 2652 2653 /* 2654 * Helper function for dentry_operations.d_dname() members 2655 */ 2656 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, 2657 const char *fmt, ...) 2658 { 2659 va_list args; 2660 char temp[64]; 2661 int sz; 2662 2663 va_start(args, fmt); 2664 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; 2665 va_end(args); 2666 2667 if (sz > sizeof(temp) || sz > buflen) 2668 return ERR_PTR(-ENAMETOOLONG); 2669 2670 buffer += buflen - sz; 2671 return memcpy(buffer, temp, sz); 2672 } 2673 2674 /* 2675 * Write full pathname from the root of the filesystem into the buffer. 2676 */ 2677 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2678 { 2679 char *end = buf + buflen; 2680 char *retval; 2681 2682 prepend(&end, &buflen, "\0", 1); 2683 if (buflen < 1) 2684 goto Elong; 2685 /* Get '/' right */ 2686 retval = end-1; 2687 *retval = '/'; 2688 2689 while (!IS_ROOT(dentry)) { 2690 struct dentry *parent = dentry->d_parent; 2691 int error; 2692 2693 prefetch(parent); 2694 spin_lock(&dentry->d_lock); 2695 error = prepend_name(&end, &buflen, &dentry->d_name); 2696 spin_unlock(&dentry->d_lock); 2697 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) 2698 goto Elong; 2699 2700 retval = end; 2701 dentry = parent; 2702 } 2703 return retval; 2704 Elong: 2705 return ERR_PTR(-ENAMETOOLONG); 2706 } 2707 2708 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) 2709 { 2710 char *retval; 2711 2712 write_seqlock(&rename_lock); 2713 retval = __dentry_path(dentry, buf, buflen); 2714 write_sequnlock(&rename_lock); 2715 2716 return retval; 2717 } 2718 EXPORT_SYMBOL(dentry_path_raw); 2719 2720 char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2721 { 2722 char *p = NULL; 2723 char *retval; 2724 2725 write_seqlock(&rename_lock); 2726 if (d_unlinked(dentry)) { 2727 p = buf + buflen; 2728 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2729 goto Elong; 2730 buflen++; 2731 } 2732 retval = __dentry_path(dentry, buf, buflen); 2733 write_sequnlock(&rename_lock); 2734 if (!IS_ERR(retval) && p) 2735 *p = '/'; /* restore '/' overriden with '\0' */ 2736 return retval; 2737 Elong: 2738 return ERR_PTR(-ENAMETOOLONG); 2739 } 2740 2741 /* 2742 * NOTE! The user-level library version returns a 2743 * character pointer. The kernel system call just 2744 * returns the length of the buffer filled (which 2745 * includes the ending '\0' character), or a negative 2746 * error value. So libc would do something like 2747 * 2748 * char *getcwd(char * buf, size_t size) 2749 * { 2750 * int retval; 2751 * 2752 * retval = sys_getcwd(buf, size); 2753 * if (retval >= 0) 2754 * return buf; 2755 * errno = -retval; 2756 * return NULL; 2757 * } 2758 */ 2759 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) 2760 { 2761 int error; 2762 struct path pwd, root; 2763 char *page = (char *) __get_free_page(GFP_USER); 2764 2765 if (!page) 2766 return -ENOMEM; 2767 2768 get_fs_root_and_pwd(current->fs, &root, &pwd); 2769 2770 error = -ENOENT; 2771 write_seqlock(&rename_lock); 2772 if (!d_unlinked(pwd.dentry)) { 2773 unsigned long len; 2774 struct path tmp = root; 2775 char *cwd = page + PAGE_SIZE; 2776 int buflen = PAGE_SIZE; 2777 2778 prepend(&cwd, &buflen, "\0", 1); 2779 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2780 write_sequnlock(&rename_lock); 2781 2782 if (error) 2783 goto out; 2784 2785 /* Unreachable from current root */ 2786 if (!path_equal(&tmp, &root)) { 2787 error = prepend_unreachable(&cwd, &buflen); 2788 if (error) 2789 goto out; 2790 } 2791 2792 error = -ERANGE; 2793 len = PAGE_SIZE + page - cwd; 2794 if (len <= size) { 2795 error = len; 2796 if (copy_to_user(buf, cwd, len)) 2797 error = -EFAULT; 2798 } 2799 } else { 2800 write_sequnlock(&rename_lock); 2801 } 2802 2803 out: 2804 path_put(&pwd); 2805 path_put(&root); 2806 free_page((unsigned long) page); 2807 return error; 2808 } 2809 2810 /* 2811 * Test whether new_dentry is a subdirectory of old_dentry. 2812 * 2813 * Trivially implemented using the dcache structure 2814 */ 2815 2816 /** 2817 * is_subdir - is new dentry a subdirectory of old_dentry 2818 * @new_dentry: new dentry 2819 * @old_dentry: old dentry 2820 * 2821 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 2822 * Returns 0 otherwise. 2823 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 2824 */ 2825 2826 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2827 { 2828 int result; 2829 unsigned seq; 2830 2831 if (new_dentry == old_dentry) 2832 return 1; 2833 2834 do { 2835 /* for restarting inner loop in case of seq retry */ 2836 seq = read_seqbegin(&rename_lock); 2837 /* 2838 * Need rcu_readlock to protect against the d_parent trashing 2839 * due to d_move 2840 */ 2841 rcu_read_lock(); 2842 if (d_ancestor(old_dentry, new_dentry)) 2843 result = 1; 2844 else 2845 result = 0; 2846 rcu_read_unlock(); 2847 } while (read_seqretry(&rename_lock, seq)); 2848 2849 return result; 2850 } 2851 2852 int path_is_under(struct path *path1, struct path *path2) 2853 { 2854 struct vfsmount *mnt = path1->mnt; 2855 struct dentry *dentry = path1->dentry; 2856 int res; 2857 2858 br_read_lock(vfsmount_lock); 2859 if (mnt != path2->mnt) { 2860 for (;;) { 2861 if (mnt->mnt_parent == mnt) { 2862 br_read_unlock(vfsmount_lock); 2863 return 0; 2864 } 2865 if (mnt->mnt_parent == path2->mnt) 2866 break; 2867 mnt = mnt->mnt_parent; 2868 } 2869 dentry = mnt->mnt_mountpoint; 2870 } 2871 res = is_subdir(dentry, path2->dentry); 2872 br_read_unlock(vfsmount_lock); 2873 return res; 2874 } 2875 EXPORT_SYMBOL(path_is_under); 2876 2877 void d_genocide(struct dentry *root) 2878 { 2879 struct dentry *this_parent; 2880 struct list_head *next; 2881 unsigned seq; 2882 int locked = 0; 2883 2884 seq = read_seqbegin(&rename_lock); 2885 again: 2886 this_parent = root; 2887 spin_lock(&this_parent->d_lock); 2888 repeat: 2889 next = this_parent->d_subdirs.next; 2890 resume: 2891 while (next != &this_parent->d_subdirs) { 2892 struct list_head *tmp = next; 2893 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2894 next = tmp->next; 2895 2896 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 2897 if (d_unhashed(dentry) || !dentry->d_inode) { 2898 spin_unlock(&dentry->d_lock); 2899 continue; 2900 } 2901 if (!list_empty(&dentry->d_subdirs)) { 2902 spin_unlock(&this_parent->d_lock); 2903 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 2904 this_parent = dentry; 2905 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 2906 goto repeat; 2907 } 2908 if (!(dentry->d_flags & DCACHE_GENOCIDE)) { 2909 dentry->d_flags |= DCACHE_GENOCIDE; 2910 dentry->d_count--; 2911 } 2912 spin_unlock(&dentry->d_lock); 2913 } 2914 if (this_parent != root) { 2915 struct dentry *tmp; 2916 struct dentry *child; 2917 2918 tmp = this_parent->d_parent; 2919 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2920 this_parent->d_flags |= DCACHE_GENOCIDE; 2921 this_parent->d_count--; 2922 } 2923 rcu_read_lock(); 2924 spin_unlock(&this_parent->d_lock); 2925 child = this_parent; 2926 this_parent = tmp; 2927 spin_lock(&this_parent->d_lock); 2928 /* might go back up the wrong parent if we have had a rename 2929 * or deletion */ 2930 if (this_parent != child->d_parent || 2931 (!locked && read_seqretry(&rename_lock, seq))) { 2932 spin_unlock(&this_parent->d_lock); 2933 rcu_read_unlock(); 2934 goto rename_retry; 2935 } 2936 rcu_read_unlock(); 2937 next = child->d_u.d_child.next; 2938 goto resume; 2939 } 2940 spin_unlock(&this_parent->d_lock); 2941 if (!locked && read_seqretry(&rename_lock, seq)) 2942 goto rename_retry; 2943 if (locked) 2944 write_sequnlock(&rename_lock); 2945 return; 2946 2947 rename_retry: 2948 locked = 1; 2949 write_seqlock(&rename_lock); 2950 goto again; 2951 } 2952 2953 /** 2954 * find_inode_number - check for dentry with name 2955 * @dir: directory to check 2956 * @name: Name to find. 2957 * 2958 * Check whether a dentry already exists for the given name, 2959 * and return the inode number if it has an inode. Otherwise 2960 * 0 is returned. 2961 * 2962 * This routine is used to post-process directory listings for 2963 * filesystems using synthetic inode numbers, and is necessary 2964 * to keep getcwd() working. 2965 */ 2966 2967 ino_t find_inode_number(struct dentry *dir, struct qstr *name) 2968 { 2969 struct dentry * dentry; 2970 ino_t ino = 0; 2971 2972 dentry = d_hash_and_lookup(dir, name); 2973 if (dentry) { 2974 if (dentry->d_inode) 2975 ino = dentry->d_inode->i_ino; 2976 dput(dentry); 2977 } 2978 return ino; 2979 } 2980 EXPORT_SYMBOL(find_inode_number); 2981 2982 static __initdata unsigned long dhash_entries; 2983 static int __init set_dhash_entries(char *str) 2984 { 2985 if (!str) 2986 return 0; 2987 dhash_entries = simple_strtoul(str, &str, 0); 2988 return 1; 2989 } 2990 __setup("dhash_entries=", set_dhash_entries); 2991 2992 static void __init dcache_init_early(void) 2993 { 2994 int loop; 2995 2996 /* If hashes are distributed across NUMA nodes, defer 2997 * hash allocation until vmalloc space is available. 2998 */ 2999 if (hashdist) 3000 return; 3001 3002 dentry_hashtable = 3003 alloc_large_system_hash("Dentry cache", 3004 sizeof(struct dcache_hash_bucket), 3005 dhash_entries, 3006 13, 3007 HASH_EARLY, 3008 &d_hash_shift, 3009 &d_hash_mask, 3010 0); 3011 3012 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3013 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3014 } 3015 3016 static void __init dcache_init(void) 3017 { 3018 int loop; 3019 3020 /* 3021 * A constructor could be added for stable state like the lists, 3022 * but it is probably not worth it because of the cache nature 3023 * of the dcache. 3024 */ 3025 dentry_cache = KMEM_CACHE(dentry, 3026 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 3027 3028 register_shrinker(&dcache_shrinker); 3029 3030 /* Hash may have been set up in dcache_init_early */ 3031 if (!hashdist) 3032 return; 3033 3034 dentry_hashtable = 3035 alloc_large_system_hash("Dentry cache", 3036 sizeof(struct dcache_hash_bucket), 3037 dhash_entries, 3038 13, 3039 0, 3040 &d_hash_shift, 3041 &d_hash_mask, 3042 0); 3043 3044 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3045 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3046 } 3047 3048 /* SLAB cache for __getname() consumers */ 3049 struct kmem_cache *names_cachep __read_mostly; 3050 EXPORT_SYMBOL(names_cachep); 3051 3052 EXPORT_SYMBOL(d_genocide); 3053 3054 void __init vfs_caches_init_early(void) 3055 { 3056 dcache_init_early(); 3057 inode_init_early(); 3058 } 3059 3060 void __init vfs_caches_init(unsigned long mempages) 3061 { 3062 unsigned long reserve; 3063 3064 /* Base hash sizes on available memory, with a reserve equal to 3065 150% of current kernel size */ 3066 3067 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 3068 mempages -= reserve; 3069 3070 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 3071 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 3072 3073 dcache_init(); 3074 inode_init(); 3075 files_init(mempages); 3076 mnt_init(); 3077 bdev_cache_init(); 3078 chrdev_init(); 3079 } 3080