1 /* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9 /* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/string.h> 19 #include <linux/mm.h> 20 #include <linux/fs.h> 21 #include <linux/fsnotify.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/hash.h> 25 #include <linux/cache.h> 26 #include <linux/export.h> 27 #include <linux/mount.h> 28 #include <linux/file.h> 29 #include <asm/uaccess.h> 30 #include <linux/security.h> 31 #include <linux/seqlock.h> 32 #include <linux/swap.h> 33 #include <linux/bootmem.h> 34 #include <linux/fs_struct.h> 35 #include <linux/hardirq.h> 36 #include <linux/bit_spinlock.h> 37 #include <linux/rculist_bl.h> 38 #include <linux/prefetch.h> 39 #include <linux/ratelimit.h> 40 #include "internal.h" 41 #include "mount.h" 42 43 /* 44 * Usage: 45 * dcache->d_inode->i_lock protects: 46 * - i_dentry, d_alias, d_inode of aliases 47 * dcache_hash_bucket lock protects: 48 * - the dcache hash table 49 * s_anon bl list spinlock protects: 50 * - the s_anon list (see __d_drop) 51 * dcache_lru_lock protects: 52 * - the dcache lru lists and counters 53 * d_lock protects: 54 * - d_flags 55 * - d_name 56 * - d_lru 57 * - d_count 58 * - d_unhashed() 59 * - d_parent and d_subdirs 60 * - childrens' d_child and d_parent 61 * - d_alias, d_inode 62 * 63 * Ordering: 64 * dentry->d_inode->i_lock 65 * dentry->d_lock 66 * dcache_lru_lock 67 * dcache_hash_bucket lock 68 * s_anon lock 69 * 70 * If there is an ancestor relationship: 71 * dentry->d_parent->...->d_parent->d_lock 72 * ... 73 * dentry->d_parent->d_lock 74 * dentry->d_lock 75 * 76 * If no ancestor relationship: 77 * if (dentry1 < dentry2) 78 * dentry1->d_lock 79 * dentry2->d_lock 80 */ 81 int sysctl_vfs_cache_pressure __read_mostly = 100; 82 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 83 84 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); 85 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 86 87 EXPORT_SYMBOL(rename_lock); 88 89 static struct kmem_cache *dentry_cache __read_mostly; 90 91 /* 92 * This is the single most critical data structure when it comes 93 * to the dcache: the hashtable for lookups. Somebody should try 94 * to make this good - I've just made it work. 95 * 96 * This hash-function tries to avoid losing too many bits of hash 97 * information, yet avoid using a prime hash-size or similar. 98 */ 99 #define D_HASHBITS d_hash_shift 100 #define D_HASHMASK d_hash_mask 101 102 static unsigned int d_hash_mask __read_mostly; 103 static unsigned int d_hash_shift __read_mostly; 104 105 static struct hlist_bl_head *dentry_hashtable __read_mostly; 106 107 static inline struct hlist_bl_head *d_hash(const struct dentry *parent, 108 unsigned int hash) 109 { 110 hash += (unsigned long) parent / L1_CACHE_BYTES; 111 hash = hash + (hash >> D_HASHBITS); 112 return dentry_hashtable + (hash & D_HASHMASK); 113 } 114 115 /* Statistics gathering. */ 116 struct dentry_stat_t dentry_stat = { 117 .age_limit = 45, 118 }; 119 120 static DEFINE_PER_CPU(unsigned int, nr_dentry); 121 122 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 123 static int get_nr_dentry(void) 124 { 125 int i; 126 int sum = 0; 127 for_each_possible_cpu(i) 128 sum += per_cpu(nr_dentry, i); 129 return sum < 0 ? 0 : sum; 130 } 131 132 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 133 size_t *lenp, loff_t *ppos) 134 { 135 dentry_stat.nr_dentry = get_nr_dentry(); 136 return proc_dointvec(table, write, buffer, lenp, ppos); 137 } 138 #endif 139 140 /* 141 * Compare 2 name strings, return 0 if they match, otherwise non-zero. 142 * The strings are both count bytes long, and count is non-zero. 143 */ 144 #ifdef CONFIG_DCACHE_WORD_ACCESS 145 146 #include <asm/word-at-a-time.h> 147 /* 148 * NOTE! 'cs' and 'scount' come from a dentry, so it has a 149 * aligned allocation for this particular component. We don't 150 * strictly need the load_unaligned_zeropad() safety, but it 151 * doesn't hurt either. 152 * 153 * In contrast, 'ct' and 'tcount' can be from a pathname, and do 154 * need the careful unaligned handling. 155 */ 156 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) 157 { 158 unsigned long a,b,mask; 159 160 for (;;) { 161 a = *(unsigned long *)cs; 162 b = load_unaligned_zeropad(ct); 163 if (tcount < sizeof(unsigned long)) 164 break; 165 if (unlikely(a != b)) 166 return 1; 167 cs += sizeof(unsigned long); 168 ct += sizeof(unsigned long); 169 tcount -= sizeof(unsigned long); 170 if (!tcount) 171 return 0; 172 } 173 mask = ~(~0ul << tcount*8); 174 return unlikely(!!((a ^ b) & mask)); 175 } 176 177 #else 178 179 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) 180 { 181 do { 182 if (*cs != *ct) 183 return 1; 184 cs++; 185 ct++; 186 tcount--; 187 } while (tcount); 188 return 0; 189 } 190 191 #endif 192 193 static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) 194 { 195 const unsigned char *cs; 196 /* 197 * Be careful about RCU walk racing with rename: 198 * use ACCESS_ONCE to fetch the name pointer. 199 * 200 * NOTE! Even if a rename will mean that the length 201 * was not loaded atomically, we don't care. The 202 * RCU walk will check the sequence count eventually, 203 * and catch it. And we won't overrun the buffer, 204 * because we're reading the name pointer atomically, 205 * and a dentry name is guaranteed to be properly 206 * terminated with a NUL byte. 207 * 208 * End result: even if 'len' is wrong, we'll exit 209 * early because the data cannot match (there can 210 * be no NUL in the ct/tcount data) 211 */ 212 cs = ACCESS_ONCE(dentry->d_name.name); 213 smp_read_barrier_depends(); 214 return dentry_string_cmp(cs, ct, tcount); 215 } 216 217 static void __d_free(struct rcu_head *head) 218 { 219 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 220 221 WARN_ON(!hlist_unhashed(&dentry->d_alias)); 222 if (dname_external(dentry)) 223 kfree(dentry->d_name.name); 224 kmem_cache_free(dentry_cache, dentry); 225 } 226 227 /* 228 * no locks, please. 229 */ 230 static void d_free(struct dentry *dentry) 231 { 232 BUG_ON(dentry->d_lockref.count); 233 this_cpu_dec(nr_dentry); 234 if (dentry->d_op && dentry->d_op->d_release) 235 dentry->d_op->d_release(dentry); 236 237 /* if dentry was never visible to RCU, immediate free is OK */ 238 if (!(dentry->d_flags & DCACHE_RCUACCESS)) 239 __d_free(&dentry->d_u.d_rcu); 240 else 241 call_rcu(&dentry->d_u.d_rcu, __d_free); 242 } 243 244 /** 245 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups 246 * @dentry: the target dentry 247 * After this call, in-progress rcu-walk path lookup will fail. This 248 * should be called after unhashing, and after changing d_inode (if 249 * the dentry has not already been unhashed). 250 */ 251 static inline void dentry_rcuwalk_barrier(struct dentry *dentry) 252 { 253 assert_spin_locked(&dentry->d_lock); 254 /* Go through a barrier */ 255 write_seqcount_barrier(&dentry->d_seq); 256 } 257 258 /* 259 * Release the dentry's inode, using the filesystem 260 * d_iput() operation if defined. Dentry has no refcount 261 * and is unhashed. 262 */ 263 static void dentry_iput(struct dentry * dentry) 264 __releases(dentry->d_lock) 265 __releases(dentry->d_inode->i_lock) 266 { 267 struct inode *inode = dentry->d_inode; 268 if (inode) { 269 dentry->d_inode = NULL; 270 hlist_del_init(&dentry->d_alias); 271 spin_unlock(&dentry->d_lock); 272 spin_unlock(&inode->i_lock); 273 if (!inode->i_nlink) 274 fsnotify_inoderemove(inode); 275 if (dentry->d_op && dentry->d_op->d_iput) 276 dentry->d_op->d_iput(dentry, inode); 277 else 278 iput(inode); 279 } else { 280 spin_unlock(&dentry->d_lock); 281 } 282 } 283 284 /* 285 * Release the dentry's inode, using the filesystem 286 * d_iput() operation if defined. dentry remains in-use. 287 */ 288 static void dentry_unlink_inode(struct dentry * dentry) 289 __releases(dentry->d_lock) 290 __releases(dentry->d_inode->i_lock) 291 { 292 struct inode *inode = dentry->d_inode; 293 dentry->d_inode = NULL; 294 hlist_del_init(&dentry->d_alias); 295 dentry_rcuwalk_barrier(dentry); 296 spin_unlock(&dentry->d_lock); 297 spin_unlock(&inode->i_lock); 298 if (!inode->i_nlink) 299 fsnotify_inoderemove(inode); 300 if (dentry->d_op && dentry->d_op->d_iput) 301 dentry->d_op->d_iput(dentry, inode); 302 else 303 iput(inode); 304 } 305 306 /* 307 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held. 308 */ 309 static void dentry_lru_add(struct dentry *dentry) 310 { 311 if (list_empty(&dentry->d_lru)) { 312 spin_lock(&dcache_lru_lock); 313 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 314 dentry->d_sb->s_nr_dentry_unused++; 315 dentry_stat.nr_unused++; 316 spin_unlock(&dcache_lru_lock); 317 } 318 } 319 320 static void __dentry_lru_del(struct dentry *dentry) 321 { 322 list_del_init(&dentry->d_lru); 323 dentry->d_flags &= ~DCACHE_SHRINK_LIST; 324 dentry->d_sb->s_nr_dentry_unused--; 325 dentry_stat.nr_unused--; 326 } 327 328 /* 329 * Remove a dentry with references from the LRU. 330 */ 331 static void dentry_lru_del(struct dentry *dentry) 332 { 333 if (!list_empty(&dentry->d_lru)) { 334 spin_lock(&dcache_lru_lock); 335 __dentry_lru_del(dentry); 336 spin_unlock(&dcache_lru_lock); 337 } 338 } 339 340 static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) 341 { 342 spin_lock(&dcache_lru_lock); 343 if (list_empty(&dentry->d_lru)) { 344 list_add_tail(&dentry->d_lru, list); 345 dentry->d_sb->s_nr_dentry_unused++; 346 dentry_stat.nr_unused++; 347 } else { 348 list_move_tail(&dentry->d_lru, list); 349 } 350 spin_unlock(&dcache_lru_lock); 351 } 352 353 /** 354 * d_kill - kill dentry and return parent 355 * @dentry: dentry to kill 356 * @parent: parent dentry 357 * 358 * The dentry must already be unhashed and removed from the LRU. 359 * 360 * If this is the root of the dentry tree, return NULL. 361 * 362 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by 363 * d_kill. 364 */ 365 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) 366 __releases(dentry->d_lock) 367 __releases(parent->d_lock) 368 __releases(dentry->d_inode->i_lock) 369 { 370 list_del(&dentry->d_u.d_child); 371 /* 372 * Inform try_to_ascend() that we are no longer attached to the 373 * dentry tree 374 */ 375 dentry->d_flags |= DCACHE_DENTRY_KILLED; 376 if (parent) 377 spin_unlock(&parent->d_lock); 378 dentry_iput(dentry); 379 /* 380 * dentry_iput drops the locks, at which point nobody (except 381 * transient RCU lookups) can reach this dentry. 382 */ 383 d_free(dentry); 384 return parent; 385 } 386 387 /* 388 * Unhash a dentry without inserting an RCU walk barrier or checking that 389 * dentry->d_lock is locked. The caller must take care of that, if 390 * appropriate. 391 */ 392 static void __d_shrink(struct dentry *dentry) 393 { 394 if (!d_unhashed(dentry)) { 395 struct hlist_bl_head *b; 396 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) 397 b = &dentry->d_sb->s_anon; 398 else 399 b = d_hash(dentry->d_parent, dentry->d_name.hash); 400 401 hlist_bl_lock(b); 402 __hlist_bl_del(&dentry->d_hash); 403 dentry->d_hash.pprev = NULL; 404 hlist_bl_unlock(b); 405 } 406 } 407 408 /** 409 * d_drop - drop a dentry 410 * @dentry: dentry to drop 411 * 412 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't 413 * be found through a VFS lookup any more. Note that this is different from 414 * deleting the dentry - d_delete will try to mark the dentry negative if 415 * possible, giving a successful _negative_ lookup, while d_drop will 416 * just make the cache lookup fail. 417 * 418 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some 419 * reason (NFS timeouts or autofs deletes). 420 * 421 * __d_drop requires dentry->d_lock. 422 */ 423 void __d_drop(struct dentry *dentry) 424 { 425 if (!d_unhashed(dentry)) { 426 __d_shrink(dentry); 427 dentry_rcuwalk_barrier(dentry); 428 } 429 } 430 EXPORT_SYMBOL(__d_drop); 431 432 void d_drop(struct dentry *dentry) 433 { 434 spin_lock(&dentry->d_lock); 435 __d_drop(dentry); 436 spin_unlock(&dentry->d_lock); 437 } 438 EXPORT_SYMBOL(d_drop); 439 440 /* 441 * Finish off a dentry we've decided to kill. 442 * dentry->d_lock must be held, returns with it unlocked. 443 * If ref is non-zero, then decrement the refcount too. 444 * Returns dentry requiring refcount drop, or NULL if we're done. 445 */ 446 static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) 447 __releases(dentry->d_lock) 448 { 449 struct inode *inode; 450 struct dentry *parent; 451 452 inode = dentry->d_inode; 453 if (inode && !spin_trylock(&inode->i_lock)) { 454 relock: 455 spin_unlock(&dentry->d_lock); 456 cpu_relax(); 457 return dentry; /* try again with same dentry */ 458 } 459 if (IS_ROOT(dentry)) 460 parent = NULL; 461 else 462 parent = dentry->d_parent; 463 if (parent && !spin_trylock(&parent->d_lock)) { 464 if (inode) 465 spin_unlock(&inode->i_lock); 466 goto relock; 467 } 468 469 if (ref) 470 dentry->d_lockref.count--; 471 /* 472 * inform the fs via d_prune that this dentry is about to be 473 * unhashed and destroyed. 474 */ 475 if (dentry->d_flags & DCACHE_OP_PRUNE) 476 dentry->d_op->d_prune(dentry); 477 478 dentry_lru_del(dentry); 479 /* if it was on the hash then remove it */ 480 __d_drop(dentry); 481 return d_kill(dentry, parent); 482 } 483 484 /* 485 * This is dput 486 * 487 * This is complicated by the fact that we do not want to put 488 * dentries that are no longer on any hash chain on the unused 489 * list: we'd much rather just get rid of them immediately. 490 * 491 * However, that implies that we have to traverse the dentry 492 * tree upwards to the parents which might _also_ now be 493 * scheduled for deletion (it may have been only waiting for 494 * its last child to go away). 495 * 496 * This tail recursion is done by hand as we don't want to depend 497 * on the compiler to always get this right (gcc generally doesn't). 498 * Real recursion would eat up our stack space. 499 */ 500 501 /* 502 * dput - release a dentry 503 * @dentry: dentry to release 504 * 505 * Release a dentry. This will drop the usage count and if appropriate 506 * call the dentry unlink method as well as removing it from the queues and 507 * releasing its resources. If the parent dentries were scheduled for release 508 * they too may now get deleted. 509 */ 510 void dput(struct dentry *dentry) 511 { 512 if (!dentry) 513 return; 514 515 repeat: 516 if (dentry->d_lockref.count == 1) 517 might_sleep(); 518 if (lockref_put_or_lock(&dentry->d_lockref)) 519 return; 520 521 if (dentry->d_flags & DCACHE_OP_DELETE) { 522 if (dentry->d_op->d_delete(dentry)) 523 goto kill_it; 524 } 525 526 /* Unreachable? Get rid of it */ 527 if (d_unhashed(dentry)) 528 goto kill_it; 529 530 dentry->d_flags |= DCACHE_REFERENCED; 531 dentry_lru_add(dentry); 532 533 dentry->d_lockref.count--; 534 spin_unlock(&dentry->d_lock); 535 return; 536 537 kill_it: 538 dentry = dentry_kill(dentry, 1); 539 if (dentry) 540 goto repeat; 541 } 542 EXPORT_SYMBOL(dput); 543 544 /** 545 * d_invalidate - invalidate a dentry 546 * @dentry: dentry to invalidate 547 * 548 * Try to invalidate the dentry if it turns out to be 549 * possible. If there are other dentries that can be 550 * reached through this one we can't delete it and we 551 * return -EBUSY. On success we return 0. 552 * 553 * no dcache lock. 554 */ 555 556 int d_invalidate(struct dentry * dentry) 557 { 558 /* 559 * If it's already been dropped, return OK. 560 */ 561 spin_lock(&dentry->d_lock); 562 if (d_unhashed(dentry)) { 563 spin_unlock(&dentry->d_lock); 564 return 0; 565 } 566 /* 567 * Check whether to do a partial shrink_dcache 568 * to get rid of unused child entries. 569 */ 570 if (!list_empty(&dentry->d_subdirs)) { 571 spin_unlock(&dentry->d_lock); 572 shrink_dcache_parent(dentry); 573 spin_lock(&dentry->d_lock); 574 } 575 576 /* 577 * Somebody else still using it? 578 * 579 * If it's a directory, we can't drop it 580 * for fear of somebody re-populating it 581 * with children (even though dropping it 582 * would make it unreachable from the root, 583 * we might still populate it if it was a 584 * working directory or similar). 585 * We also need to leave mountpoints alone, 586 * directory or not. 587 */ 588 if (dentry->d_lockref.count > 1 && dentry->d_inode) { 589 if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { 590 spin_unlock(&dentry->d_lock); 591 return -EBUSY; 592 } 593 } 594 595 __d_drop(dentry); 596 spin_unlock(&dentry->d_lock); 597 return 0; 598 } 599 EXPORT_SYMBOL(d_invalidate); 600 601 /* This must be called with d_lock held */ 602 static inline void __dget_dlock(struct dentry *dentry) 603 { 604 dentry->d_lockref.count++; 605 } 606 607 static inline void __dget(struct dentry *dentry) 608 { 609 lockref_get(&dentry->d_lockref); 610 } 611 612 struct dentry *dget_parent(struct dentry *dentry) 613 { 614 struct dentry *ret; 615 616 repeat: 617 /* 618 * Don't need rcu_dereference because we re-check it was correct under 619 * the lock. 620 */ 621 rcu_read_lock(); 622 ret = dentry->d_parent; 623 spin_lock(&ret->d_lock); 624 if (unlikely(ret != dentry->d_parent)) { 625 spin_unlock(&ret->d_lock); 626 rcu_read_unlock(); 627 goto repeat; 628 } 629 rcu_read_unlock(); 630 BUG_ON(!ret->d_lockref.count); 631 ret->d_lockref.count++; 632 spin_unlock(&ret->d_lock); 633 return ret; 634 } 635 EXPORT_SYMBOL(dget_parent); 636 637 /** 638 * d_find_alias - grab a hashed alias of inode 639 * @inode: inode in question 640 * @want_discon: flag, used by d_splice_alias, to request 641 * that only a DISCONNECTED alias be returned. 642 * 643 * If inode has a hashed alias, or is a directory and has any alias, 644 * acquire the reference to alias and return it. Otherwise return NULL. 645 * Notice that if inode is a directory there can be only one alias and 646 * it can be unhashed only if it has no children, or if it is the root 647 * of a filesystem. 648 * 649 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 650 * any other hashed alias over that one unless @want_discon is set, 651 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 652 */ 653 static struct dentry *__d_find_alias(struct inode *inode, int want_discon) 654 { 655 struct dentry *alias, *discon_alias; 656 657 again: 658 discon_alias = NULL; 659 hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { 660 spin_lock(&alias->d_lock); 661 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 662 if (IS_ROOT(alias) && 663 (alias->d_flags & DCACHE_DISCONNECTED)) { 664 discon_alias = alias; 665 } else if (!want_discon) { 666 __dget_dlock(alias); 667 spin_unlock(&alias->d_lock); 668 return alias; 669 } 670 } 671 spin_unlock(&alias->d_lock); 672 } 673 if (discon_alias) { 674 alias = discon_alias; 675 spin_lock(&alias->d_lock); 676 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 677 if (IS_ROOT(alias) && 678 (alias->d_flags & DCACHE_DISCONNECTED)) { 679 __dget_dlock(alias); 680 spin_unlock(&alias->d_lock); 681 return alias; 682 } 683 } 684 spin_unlock(&alias->d_lock); 685 goto again; 686 } 687 return NULL; 688 } 689 690 struct dentry *d_find_alias(struct inode *inode) 691 { 692 struct dentry *de = NULL; 693 694 if (!hlist_empty(&inode->i_dentry)) { 695 spin_lock(&inode->i_lock); 696 de = __d_find_alias(inode, 0); 697 spin_unlock(&inode->i_lock); 698 } 699 return de; 700 } 701 EXPORT_SYMBOL(d_find_alias); 702 703 /* 704 * Try to kill dentries associated with this inode. 705 * WARNING: you must own a reference to inode. 706 */ 707 void d_prune_aliases(struct inode *inode) 708 { 709 struct dentry *dentry; 710 restart: 711 spin_lock(&inode->i_lock); 712 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { 713 spin_lock(&dentry->d_lock); 714 if (!dentry->d_lockref.count) { 715 __dget_dlock(dentry); 716 __d_drop(dentry); 717 spin_unlock(&dentry->d_lock); 718 spin_unlock(&inode->i_lock); 719 dput(dentry); 720 goto restart; 721 } 722 spin_unlock(&dentry->d_lock); 723 } 724 spin_unlock(&inode->i_lock); 725 } 726 EXPORT_SYMBOL(d_prune_aliases); 727 728 /* 729 * Try to throw away a dentry - free the inode, dput the parent. 730 * Requires dentry->d_lock is held, and dentry->d_count == 0. 731 * Releases dentry->d_lock. 732 * 733 * This may fail if locks cannot be acquired no problem, just try again. 734 */ 735 static void try_prune_one_dentry(struct dentry *dentry) 736 __releases(dentry->d_lock) 737 { 738 struct dentry *parent; 739 740 parent = dentry_kill(dentry, 0); 741 /* 742 * If dentry_kill returns NULL, we have nothing more to do. 743 * if it returns the same dentry, trylocks failed. In either 744 * case, just loop again. 745 * 746 * Otherwise, we need to prune ancestors too. This is necessary 747 * to prevent quadratic behavior of shrink_dcache_parent(), but 748 * is also expected to be beneficial in reducing dentry cache 749 * fragmentation. 750 */ 751 if (!parent) 752 return; 753 if (parent == dentry) 754 return; 755 756 /* Prune ancestors. */ 757 dentry = parent; 758 while (dentry) { 759 if (lockref_put_or_lock(&dentry->d_lockref)) 760 return; 761 dentry = dentry_kill(dentry, 1); 762 } 763 } 764 765 static void shrink_dentry_list(struct list_head *list) 766 { 767 struct dentry *dentry; 768 769 rcu_read_lock(); 770 for (;;) { 771 dentry = list_entry_rcu(list->prev, struct dentry, d_lru); 772 if (&dentry->d_lru == list) 773 break; /* empty */ 774 spin_lock(&dentry->d_lock); 775 if (dentry != list_entry(list->prev, struct dentry, d_lru)) { 776 spin_unlock(&dentry->d_lock); 777 continue; 778 } 779 780 /* 781 * We found an inuse dentry which was not removed from 782 * the LRU because of laziness during lookup. Do not free 783 * it - just keep it off the LRU list. 784 */ 785 if (dentry->d_lockref.count) { 786 dentry_lru_del(dentry); 787 spin_unlock(&dentry->d_lock); 788 continue; 789 } 790 791 rcu_read_unlock(); 792 793 try_prune_one_dentry(dentry); 794 795 rcu_read_lock(); 796 } 797 rcu_read_unlock(); 798 } 799 800 /** 801 * prune_dcache_sb - shrink the dcache 802 * @sb: superblock 803 * @count: number of entries to try to free 804 * 805 * Attempt to shrink the superblock dcache LRU by @count entries. This is 806 * done when we need more memory an called from the superblock shrinker 807 * function. 808 * 809 * This function may fail to free any resources if all the dentries are in 810 * use. 811 */ 812 void prune_dcache_sb(struct super_block *sb, int count) 813 { 814 struct dentry *dentry; 815 LIST_HEAD(referenced); 816 LIST_HEAD(tmp); 817 818 relock: 819 spin_lock(&dcache_lru_lock); 820 while (!list_empty(&sb->s_dentry_lru)) { 821 dentry = list_entry(sb->s_dentry_lru.prev, 822 struct dentry, d_lru); 823 BUG_ON(dentry->d_sb != sb); 824 825 if (!spin_trylock(&dentry->d_lock)) { 826 spin_unlock(&dcache_lru_lock); 827 cpu_relax(); 828 goto relock; 829 } 830 831 if (dentry->d_flags & DCACHE_REFERENCED) { 832 dentry->d_flags &= ~DCACHE_REFERENCED; 833 list_move(&dentry->d_lru, &referenced); 834 spin_unlock(&dentry->d_lock); 835 } else { 836 list_move_tail(&dentry->d_lru, &tmp); 837 dentry->d_flags |= DCACHE_SHRINK_LIST; 838 spin_unlock(&dentry->d_lock); 839 if (!--count) 840 break; 841 } 842 cond_resched_lock(&dcache_lru_lock); 843 } 844 if (!list_empty(&referenced)) 845 list_splice(&referenced, &sb->s_dentry_lru); 846 spin_unlock(&dcache_lru_lock); 847 848 shrink_dentry_list(&tmp); 849 } 850 851 /** 852 * shrink_dcache_sb - shrink dcache for a superblock 853 * @sb: superblock 854 * 855 * Shrink the dcache for the specified super block. This is used to free 856 * the dcache before unmounting a file system. 857 */ 858 void shrink_dcache_sb(struct super_block *sb) 859 { 860 LIST_HEAD(tmp); 861 862 spin_lock(&dcache_lru_lock); 863 while (!list_empty(&sb->s_dentry_lru)) { 864 list_splice_init(&sb->s_dentry_lru, &tmp); 865 spin_unlock(&dcache_lru_lock); 866 shrink_dentry_list(&tmp); 867 spin_lock(&dcache_lru_lock); 868 } 869 spin_unlock(&dcache_lru_lock); 870 } 871 EXPORT_SYMBOL(shrink_dcache_sb); 872 873 /* 874 * destroy a single subtree of dentries for unmount 875 * - see the comments on shrink_dcache_for_umount() for a description of the 876 * locking 877 */ 878 static void shrink_dcache_for_umount_subtree(struct dentry *dentry) 879 { 880 struct dentry *parent; 881 882 BUG_ON(!IS_ROOT(dentry)); 883 884 for (;;) { 885 /* descend to the first leaf in the current subtree */ 886 while (!list_empty(&dentry->d_subdirs)) 887 dentry = list_entry(dentry->d_subdirs.next, 888 struct dentry, d_u.d_child); 889 890 /* consume the dentries from this leaf up through its parents 891 * until we find one with children or run out altogether */ 892 do { 893 struct inode *inode; 894 895 /* 896 * inform the fs that this dentry is about to be 897 * unhashed and destroyed. 898 */ 899 if (dentry->d_flags & DCACHE_OP_PRUNE) 900 dentry->d_op->d_prune(dentry); 901 902 dentry_lru_del(dentry); 903 __d_shrink(dentry); 904 905 if (dentry->d_lockref.count != 0) { 906 printk(KERN_ERR 907 "BUG: Dentry %p{i=%lx,n=%s}" 908 " still in use (%d)" 909 " [unmount of %s %s]\n", 910 dentry, 911 dentry->d_inode ? 912 dentry->d_inode->i_ino : 0UL, 913 dentry->d_name.name, 914 dentry->d_lockref.count, 915 dentry->d_sb->s_type->name, 916 dentry->d_sb->s_id); 917 BUG(); 918 } 919 920 if (IS_ROOT(dentry)) { 921 parent = NULL; 922 list_del(&dentry->d_u.d_child); 923 } else { 924 parent = dentry->d_parent; 925 parent->d_lockref.count--; 926 list_del(&dentry->d_u.d_child); 927 } 928 929 inode = dentry->d_inode; 930 if (inode) { 931 dentry->d_inode = NULL; 932 hlist_del_init(&dentry->d_alias); 933 if (dentry->d_op && dentry->d_op->d_iput) 934 dentry->d_op->d_iput(dentry, inode); 935 else 936 iput(inode); 937 } 938 939 d_free(dentry); 940 941 /* finished when we fall off the top of the tree, 942 * otherwise we ascend to the parent and move to the 943 * next sibling if there is one */ 944 if (!parent) 945 return; 946 dentry = parent; 947 } while (list_empty(&dentry->d_subdirs)); 948 949 dentry = list_entry(dentry->d_subdirs.next, 950 struct dentry, d_u.d_child); 951 } 952 } 953 954 /* 955 * destroy the dentries attached to a superblock on unmounting 956 * - we don't need to use dentry->d_lock because: 957 * - the superblock is detached from all mountings and open files, so the 958 * dentry trees will not be rearranged by the VFS 959 * - s_umount is write-locked, so the memory pressure shrinker will ignore 960 * any dentries belonging to this superblock that it comes across 961 * - the filesystem itself is no longer permitted to rearrange the dentries 962 * in this superblock 963 */ 964 void shrink_dcache_for_umount(struct super_block *sb) 965 { 966 struct dentry *dentry; 967 968 if (down_read_trylock(&sb->s_umount)) 969 BUG(); 970 971 dentry = sb->s_root; 972 sb->s_root = NULL; 973 dentry->d_lockref.count--; 974 shrink_dcache_for_umount_subtree(dentry); 975 976 while (!hlist_bl_empty(&sb->s_anon)) { 977 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); 978 shrink_dcache_for_umount_subtree(dentry); 979 } 980 } 981 982 /* 983 * This tries to ascend one level of parenthood, but 984 * we can race with renaming, so we need to re-check 985 * the parenthood after dropping the lock and check 986 * that the sequence number still matches. 987 */ 988 static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) 989 { 990 struct dentry *new = old->d_parent; 991 992 rcu_read_lock(); 993 spin_unlock(&old->d_lock); 994 spin_lock(&new->d_lock); 995 996 /* 997 * might go back up the wrong parent if we have had a rename 998 * or deletion 999 */ 1000 if (new != old->d_parent || 1001 (old->d_flags & DCACHE_DENTRY_KILLED) || 1002 (!locked && read_seqretry(&rename_lock, seq))) { 1003 spin_unlock(&new->d_lock); 1004 new = NULL; 1005 } 1006 rcu_read_unlock(); 1007 return new; 1008 } 1009 1010 1011 /* 1012 * Search for at least 1 mount point in the dentry's subdirs. 1013 * We descend to the next level whenever the d_subdirs 1014 * list is non-empty and continue searching. 1015 */ 1016 1017 /** 1018 * have_submounts - check for mounts over a dentry 1019 * @parent: dentry to check. 1020 * 1021 * Return true if the parent or its subdirectories contain 1022 * a mount point 1023 */ 1024 int have_submounts(struct dentry *parent) 1025 { 1026 struct dentry *this_parent; 1027 struct list_head *next; 1028 unsigned seq; 1029 int locked = 0; 1030 1031 seq = read_seqbegin(&rename_lock); 1032 again: 1033 this_parent = parent; 1034 1035 if (d_mountpoint(parent)) 1036 goto positive; 1037 spin_lock(&this_parent->d_lock); 1038 repeat: 1039 next = this_parent->d_subdirs.next; 1040 resume: 1041 while (next != &this_parent->d_subdirs) { 1042 struct list_head *tmp = next; 1043 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1044 next = tmp->next; 1045 1046 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1047 /* Have we found a mount point ? */ 1048 if (d_mountpoint(dentry)) { 1049 spin_unlock(&dentry->d_lock); 1050 spin_unlock(&this_parent->d_lock); 1051 goto positive; 1052 } 1053 if (!list_empty(&dentry->d_subdirs)) { 1054 spin_unlock(&this_parent->d_lock); 1055 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1056 this_parent = dentry; 1057 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1058 goto repeat; 1059 } 1060 spin_unlock(&dentry->d_lock); 1061 } 1062 /* 1063 * All done at this level ... ascend and resume the search. 1064 */ 1065 if (this_parent != parent) { 1066 struct dentry *child = this_parent; 1067 this_parent = try_to_ascend(this_parent, locked, seq); 1068 if (!this_parent) 1069 goto rename_retry; 1070 next = child->d_u.d_child.next; 1071 goto resume; 1072 } 1073 spin_unlock(&this_parent->d_lock); 1074 if (!locked && read_seqretry(&rename_lock, seq)) 1075 goto rename_retry; 1076 if (locked) 1077 write_sequnlock(&rename_lock); 1078 return 0; /* No mount points found in tree */ 1079 positive: 1080 if (!locked && read_seqretry(&rename_lock, seq)) 1081 goto rename_retry; 1082 if (locked) 1083 write_sequnlock(&rename_lock); 1084 return 1; 1085 1086 rename_retry: 1087 if (locked) 1088 goto again; 1089 locked = 1; 1090 write_seqlock(&rename_lock); 1091 goto again; 1092 } 1093 EXPORT_SYMBOL(have_submounts); 1094 1095 /* 1096 * Search the dentry child list of the specified parent, 1097 * and move any unused dentries to the end of the unused 1098 * list for prune_dcache(). We descend to the next level 1099 * whenever the d_subdirs list is non-empty and continue 1100 * searching. 1101 * 1102 * It returns zero iff there are no unused children, 1103 * otherwise it returns the number of children moved to 1104 * the end of the unused list. This may not be the total 1105 * number of unused children, because select_parent can 1106 * drop the lock and return early due to latency 1107 * constraints. 1108 */ 1109 static int select_parent(struct dentry *parent, struct list_head *dispose) 1110 { 1111 struct dentry *this_parent; 1112 struct list_head *next; 1113 unsigned seq; 1114 int found = 0; 1115 int locked = 0; 1116 1117 seq = read_seqbegin(&rename_lock); 1118 again: 1119 this_parent = parent; 1120 spin_lock(&this_parent->d_lock); 1121 repeat: 1122 next = this_parent->d_subdirs.next; 1123 resume: 1124 while (next != &this_parent->d_subdirs) { 1125 struct list_head *tmp = next; 1126 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1127 next = tmp->next; 1128 1129 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1130 1131 /* 1132 * move only zero ref count dentries to the dispose list. 1133 * 1134 * Those which are presently on the shrink list, being processed 1135 * by shrink_dentry_list(), shouldn't be moved. Otherwise the 1136 * loop in shrink_dcache_parent() might not make any progress 1137 * and loop forever. 1138 */ 1139 if (dentry->d_lockref.count) { 1140 dentry_lru_del(dentry); 1141 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { 1142 dentry_lru_move_list(dentry, dispose); 1143 dentry->d_flags |= DCACHE_SHRINK_LIST; 1144 found++; 1145 } 1146 /* 1147 * We can return to the caller if we have found some (this 1148 * ensures forward progress). We'll be coming back to find 1149 * the rest. 1150 */ 1151 if (found && need_resched()) { 1152 spin_unlock(&dentry->d_lock); 1153 goto out; 1154 } 1155 1156 /* 1157 * Descend a level if the d_subdirs list is non-empty. 1158 */ 1159 if (!list_empty(&dentry->d_subdirs)) { 1160 spin_unlock(&this_parent->d_lock); 1161 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1162 this_parent = dentry; 1163 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1164 goto repeat; 1165 } 1166 1167 spin_unlock(&dentry->d_lock); 1168 } 1169 /* 1170 * All done at this level ... ascend and resume the search. 1171 */ 1172 if (this_parent != parent) { 1173 struct dentry *child = this_parent; 1174 this_parent = try_to_ascend(this_parent, locked, seq); 1175 if (!this_parent) 1176 goto rename_retry; 1177 next = child->d_u.d_child.next; 1178 goto resume; 1179 } 1180 out: 1181 spin_unlock(&this_parent->d_lock); 1182 if (!locked && read_seqretry(&rename_lock, seq)) 1183 goto rename_retry; 1184 if (locked) 1185 write_sequnlock(&rename_lock); 1186 return found; 1187 1188 rename_retry: 1189 if (found) 1190 return found; 1191 if (locked) 1192 goto again; 1193 locked = 1; 1194 write_seqlock(&rename_lock); 1195 goto again; 1196 } 1197 1198 /** 1199 * shrink_dcache_parent - prune dcache 1200 * @parent: parent of entries to prune 1201 * 1202 * Prune the dcache to remove unused children of the parent dentry. 1203 */ 1204 void shrink_dcache_parent(struct dentry * parent) 1205 { 1206 LIST_HEAD(dispose); 1207 int found; 1208 1209 while ((found = select_parent(parent, &dispose)) != 0) { 1210 shrink_dentry_list(&dispose); 1211 cond_resched(); 1212 } 1213 } 1214 EXPORT_SYMBOL(shrink_dcache_parent); 1215 1216 /** 1217 * __d_alloc - allocate a dcache entry 1218 * @sb: filesystem it will belong to 1219 * @name: qstr of the name 1220 * 1221 * Allocates a dentry. It returns %NULL if there is insufficient memory 1222 * available. On a success the dentry is returned. The name passed in is 1223 * copied and the copy passed in may be reused after this call. 1224 */ 1225 1226 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) 1227 { 1228 struct dentry *dentry; 1229 char *dname; 1230 1231 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 1232 if (!dentry) 1233 return NULL; 1234 1235 /* 1236 * We guarantee that the inline name is always NUL-terminated. 1237 * This way the memcpy() done by the name switching in rename 1238 * will still always have a NUL at the end, even if we might 1239 * be overwriting an internal NUL character 1240 */ 1241 dentry->d_iname[DNAME_INLINE_LEN-1] = 0; 1242 if (name->len > DNAME_INLINE_LEN-1) { 1243 dname = kmalloc(name->len + 1, GFP_KERNEL); 1244 if (!dname) { 1245 kmem_cache_free(dentry_cache, dentry); 1246 return NULL; 1247 } 1248 } else { 1249 dname = dentry->d_iname; 1250 } 1251 1252 dentry->d_name.len = name->len; 1253 dentry->d_name.hash = name->hash; 1254 memcpy(dname, name->name, name->len); 1255 dname[name->len] = 0; 1256 1257 /* Make sure we always see the terminating NUL character */ 1258 smp_wmb(); 1259 dentry->d_name.name = dname; 1260 1261 dentry->d_lockref.count = 1; 1262 dentry->d_flags = 0; 1263 spin_lock_init(&dentry->d_lock); 1264 seqcount_init(&dentry->d_seq); 1265 dentry->d_inode = NULL; 1266 dentry->d_parent = dentry; 1267 dentry->d_sb = sb; 1268 dentry->d_op = NULL; 1269 dentry->d_fsdata = NULL; 1270 INIT_HLIST_BL_NODE(&dentry->d_hash); 1271 INIT_LIST_HEAD(&dentry->d_lru); 1272 INIT_LIST_HEAD(&dentry->d_subdirs); 1273 INIT_HLIST_NODE(&dentry->d_alias); 1274 INIT_LIST_HEAD(&dentry->d_u.d_child); 1275 d_set_d_op(dentry, dentry->d_sb->s_d_op); 1276 1277 this_cpu_inc(nr_dentry); 1278 1279 return dentry; 1280 } 1281 1282 /** 1283 * d_alloc - allocate a dcache entry 1284 * @parent: parent of entry to allocate 1285 * @name: qstr of the name 1286 * 1287 * Allocates a dentry. It returns %NULL if there is insufficient memory 1288 * available. On a success the dentry is returned. The name passed in is 1289 * copied and the copy passed in may be reused after this call. 1290 */ 1291 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 1292 { 1293 struct dentry *dentry = __d_alloc(parent->d_sb, name); 1294 if (!dentry) 1295 return NULL; 1296 1297 spin_lock(&parent->d_lock); 1298 /* 1299 * don't need child lock because it is not subject 1300 * to concurrency here 1301 */ 1302 __dget_dlock(parent); 1303 dentry->d_parent = parent; 1304 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1305 spin_unlock(&parent->d_lock); 1306 1307 return dentry; 1308 } 1309 EXPORT_SYMBOL(d_alloc); 1310 1311 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 1312 { 1313 struct dentry *dentry = __d_alloc(sb, name); 1314 if (dentry) 1315 dentry->d_flags |= DCACHE_DISCONNECTED; 1316 return dentry; 1317 } 1318 EXPORT_SYMBOL(d_alloc_pseudo); 1319 1320 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1321 { 1322 struct qstr q; 1323 1324 q.name = name; 1325 q.len = strlen(name); 1326 q.hash = full_name_hash(q.name, q.len); 1327 return d_alloc(parent, &q); 1328 } 1329 EXPORT_SYMBOL(d_alloc_name); 1330 1331 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) 1332 { 1333 WARN_ON_ONCE(dentry->d_op); 1334 WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | 1335 DCACHE_OP_COMPARE | 1336 DCACHE_OP_REVALIDATE | 1337 DCACHE_OP_WEAK_REVALIDATE | 1338 DCACHE_OP_DELETE )); 1339 dentry->d_op = op; 1340 if (!op) 1341 return; 1342 if (op->d_hash) 1343 dentry->d_flags |= DCACHE_OP_HASH; 1344 if (op->d_compare) 1345 dentry->d_flags |= DCACHE_OP_COMPARE; 1346 if (op->d_revalidate) 1347 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1348 if (op->d_weak_revalidate) 1349 dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE; 1350 if (op->d_delete) 1351 dentry->d_flags |= DCACHE_OP_DELETE; 1352 if (op->d_prune) 1353 dentry->d_flags |= DCACHE_OP_PRUNE; 1354 1355 } 1356 EXPORT_SYMBOL(d_set_d_op); 1357 1358 static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1359 { 1360 spin_lock(&dentry->d_lock); 1361 if (inode) { 1362 if (unlikely(IS_AUTOMOUNT(inode))) 1363 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; 1364 hlist_add_head(&dentry->d_alias, &inode->i_dentry); 1365 } 1366 dentry->d_inode = inode; 1367 dentry_rcuwalk_barrier(dentry); 1368 spin_unlock(&dentry->d_lock); 1369 fsnotify_d_instantiate(dentry, inode); 1370 } 1371 1372 /** 1373 * d_instantiate - fill in inode information for a dentry 1374 * @entry: dentry to complete 1375 * @inode: inode to attach to this dentry 1376 * 1377 * Fill in inode information in the entry. 1378 * 1379 * This turns negative dentries into productive full members 1380 * of society. 1381 * 1382 * NOTE! This assumes that the inode count has been incremented 1383 * (or otherwise set) by the caller to indicate that it is now 1384 * in use by the dcache. 1385 */ 1386 1387 void d_instantiate(struct dentry *entry, struct inode * inode) 1388 { 1389 BUG_ON(!hlist_unhashed(&entry->d_alias)); 1390 if (inode) 1391 spin_lock(&inode->i_lock); 1392 __d_instantiate(entry, inode); 1393 if (inode) 1394 spin_unlock(&inode->i_lock); 1395 security_d_instantiate(entry, inode); 1396 } 1397 EXPORT_SYMBOL(d_instantiate); 1398 1399 /** 1400 * d_instantiate_unique - instantiate a non-aliased dentry 1401 * @entry: dentry to instantiate 1402 * @inode: inode to attach to this dentry 1403 * 1404 * Fill in inode information in the entry. On success, it returns NULL. 1405 * If an unhashed alias of "entry" already exists, then we return the 1406 * aliased dentry instead and drop one reference to inode. 1407 * 1408 * Note that in order to avoid conflicts with rename() etc, the caller 1409 * had better be holding the parent directory semaphore. 1410 * 1411 * This also assumes that the inode count has been incremented 1412 * (or otherwise set) by the caller to indicate that it is now 1413 * in use by the dcache. 1414 */ 1415 static struct dentry *__d_instantiate_unique(struct dentry *entry, 1416 struct inode *inode) 1417 { 1418 struct dentry *alias; 1419 int len = entry->d_name.len; 1420 const char *name = entry->d_name.name; 1421 unsigned int hash = entry->d_name.hash; 1422 1423 if (!inode) { 1424 __d_instantiate(entry, NULL); 1425 return NULL; 1426 } 1427 1428 hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { 1429 /* 1430 * Don't need alias->d_lock here, because aliases with 1431 * d_parent == entry->d_parent are not subject to name or 1432 * parent changes, because the parent inode i_mutex is held. 1433 */ 1434 if (alias->d_name.hash != hash) 1435 continue; 1436 if (alias->d_parent != entry->d_parent) 1437 continue; 1438 if (alias->d_name.len != len) 1439 continue; 1440 if (dentry_cmp(alias, name, len)) 1441 continue; 1442 __dget(alias); 1443 return alias; 1444 } 1445 1446 __d_instantiate(entry, inode); 1447 return NULL; 1448 } 1449 1450 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 1451 { 1452 struct dentry *result; 1453 1454 BUG_ON(!hlist_unhashed(&entry->d_alias)); 1455 1456 if (inode) 1457 spin_lock(&inode->i_lock); 1458 result = __d_instantiate_unique(entry, inode); 1459 if (inode) 1460 spin_unlock(&inode->i_lock); 1461 1462 if (!result) { 1463 security_d_instantiate(entry, inode); 1464 return NULL; 1465 } 1466 1467 BUG_ON(!d_unhashed(result)); 1468 iput(inode); 1469 return result; 1470 } 1471 1472 EXPORT_SYMBOL(d_instantiate_unique); 1473 1474 struct dentry *d_make_root(struct inode *root_inode) 1475 { 1476 struct dentry *res = NULL; 1477 1478 if (root_inode) { 1479 static const struct qstr name = QSTR_INIT("/", 1); 1480 1481 res = __d_alloc(root_inode->i_sb, &name); 1482 if (res) 1483 d_instantiate(res, root_inode); 1484 else 1485 iput(root_inode); 1486 } 1487 return res; 1488 } 1489 EXPORT_SYMBOL(d_make_root); 1490 1491 static struct dentry * __d_find_any_alias(struct inode *inode) 1492 { 1493 struct dentry *alias; 1494 1495 if (hlist_empty(&inode->i_dentry)) 1496 return NULL; 1497 alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); 1498 __dget(alias); 1499 return alias; 1500 } 1501 1502 /** 1503 * d_find_any_alias - find any alias for a given inode 1504 * @inode: inode to find an alias for 1505 * 1506 * If any aliases exist for the given inode, take and return a 1507 * reference for one of them. If no aliases exist, return %NULL. 1508 */ 1509 struct dentry *d_find_any_alias(struct inode *inode) 1510 { 1511 struct dentry *de; 1512 1513 spin_lock(&inode->i_lock); 1514 de = __d_find_any_alias(inode); 1515 spin_unlock(&inode->i_lock); 1516 return de; 1517 } 1518 EXPORT_SYMBOL(d_find_any_alias); 1519 1520 /** 1521 * d_obtain_alias - find or allocate a dentry for a given inode 1522 * @inode: inode to allocate the dentry for 1523 * 1524 * Obtain a dentry for an inode resulting from NFS filehandle conversion or 1525 * similar open by handle operations. The returned dentry may be anonymous, 1526 * or may have a full name (if the inode was already in the cache). 1527 * 1528 * When called on a directory inode, we must ensure that the inode only ever 1529 * has one dentry. If a dentry is found, that is returned instead of 1530 * allocating a new one. 1531 * 1532 * On successful return, the reference to the inode has been transferred 1533 * to the dentry. In case of an error the reference on the inode is released. 1534 * To make it easier to use in export operations a %NULL or IS_ERR inode may 1535 * be passed in and will be the error will be propagate to the return value, 1536 * with a %NULL @inode replaced by ERR_PTR(-ESTALE). 1537 */ 1538 struct dentry *d_obtain_alias(struct inode *inode) 1539 { 1540 static const struct qstr anonstring = QSTR_INIT("/", 1); 1541 struct dentry *tmp; 1542 struct dentry *res; 1543 1544 if (!inode) 1545 return ERR_PTR(-ESTALE); 1546 if (IS_ERR(inode)) 1547 return ERR_CAST(inode); 1548 1549 res = d_find_any_alias(inode); 1550 if (res) 1551 goto out_iput; 1552 1553 tmp = __d_alloc(inode->i_sb, &anonstring); 1554 if (!tmp) { 1555 res = ERR_PTR(-ENOMEM); 1556 goto out_iput; 1557 } 1558 1559 spin_lock(&inode->i_lock); 1560 res = __d_find_any_alias(inode); 1561 if (res) { 1562 spin_unlock(&inode->i_lock); 1563 dput(tmp); 1564 goto out_iput; 1565 } 1566 1567 /* attach a disconnected dentry */ 1568 spin_lock(&tmp->d_lock); 1569 tmp->d_inode = inode; 1570 tmp->d_flags |= DCACHE_DISCONNECTED; 1571 hlist_add_head(&tmp->d_alias, &inode->i_dentry); 1572 hlist_bl_lock(&tmp->d_sb->s_anon); 1573 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1574 hlist_bl_unlock(&tmp->d_sb->s_anon); 1575 spin_unlock(&tmp->d_lock); 1576 spin_unlock(&inode->i_lock); 1577 security_d_instantiate(tmp, inode); 1578 1579 return tmp; 1580 1581 out_iput: 1582 if (res && !IS_ERR(res)) 1583 security_d_instantiate(res, inode); 1584 iput(inode); 1585 return res; 1586 } 1587 EXPORT_SYMBOL(d_obtain_alias); 1588 1589 /** 1590 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1591 * @inode: the inode which may have a disconnected dentry 1592 * @dentry: a negative dentry which we want to point to the inode. 1593 * 1594 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 1595 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 1596 * and return it, else simply d_add the inode to the dentry and return NULL. 1597 * 1598 * This is needed in the lookup routine of any filesystem that is exportable 1599 * (via knfsd) so that we can build dcache paths to directories effectively. 1600 * 1601 * If a dentry was found and moved, then it is returned. Otherwise NULL 1602 * is returned. This matches the expected return value of ->lookup. 1603 * 1604 * Cluster filesystems may call this function with a negative, hashed dentry. 1605 * In that case, we know that the inode will be a regular file, and also this 1606 * will only occur during atomic_open. So we need to check for the dentry 1607 * being already hashed only in the final case. 1608 */ 1609 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1610 { 1611 struct dentry *new = NULL; 1612 1613 if (IS_ERR(inode)) 1614 return ERR_CAST(inode); 1615 1616 if (inode && S_ISDIR(inode->i_mode)) { 1617 spin_lock(&inode->i_lock); 1618 new = __d_find_alias(inode, 1); 1619 if (new) { 1620 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1621 spin_unlock(&inode->i_lock); 1622 security_d_instantiate(new, inode); 1623 d_move(new, dentry); 1624 iput(inode); 1625 } else { 1626 /* already taking inode->i_lock, so d_add() by hand */ 1627 __d_instantiate(dentry, inode); 1628 spin_unlock(&inode->i_lock); 1629 security_d_instantiate(dentry, inode); 1630 d_rehash(dentry); 1631 } 1632 } else { 1633 d_instantiate(dentry, inode); 1634 if (d_unhashed(dentry)) 1635 d_rehash(dentry); 1636 } 1637 return new; 1638 } 1639 EXPORT_SYMBOL(d_splice_alias); 1640 1641 /** 1642 * d_add_ci - lookup or allocate new dentry with case-exact name 1643 * @inode: the inode case-insensitive lookup has found 1644 * @dentry: the negative dentry that was passed to the parent's lookup func 1645 * @name: the case-exact name to be associated with the returned dentry 1646 * 1647 * This is to avoid filling the dcache with case-insensitive names to the 1648 * same inode, only the actual correct case is stored in the dcache for 1649 * case-insensitive filesystems. 1650 * 1651 * For a case-insensitive lookup match and if the the case-exact dentry 1652 * already exists in in the dcache, use it and return it. 1653 * 1654 * If no entry exists with the exact case name, allocate new dentry with 1655 * the exact case, and return the spliced entry. 1656 */ 1657 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, 1658 struct qstr *name) 1659 { 1660 struct dentry *found; 1661 struct dentry *new; 1662 1663 /* 1664 * First check if a dentry matching the name already exists, 1665 * if not go ahead and create it now. 1666 */ 1667 found = d_hash_and_lookup(dentry->d_parent, name); 1668 if (unlikely(IS_ERR(found))) 1669 goto err_out; 1670 if (!found) { 1671 new = d_alloc(dentry->d_parent, name); 1672 if (!new) { 1673 found = ERR_PTR(-ENOMEM); 1674 goto err_out; 1675 } 1676 1677 found = d_splice_alias(inode, new); 1678 if (found) { 1679 dput(new); 1680 return found; 1681 } 1682 return new; 1683 } 1684 1685 /* 1686 * If a matching dentry exists, and it's not negative use it. 1687 * 1688 * Decrement the reference count to balance the iget() done 1689 * earlier on. 1690 */ 1691 if (found->d_inode) { 1692 if (unlikely(found->d_inode != inode)) { 1693 /* This can't happen because bad inodes are unhashed. */ 1694 BUG_ON(!is_bad_inode(inode)); 1695 BUG_ON(!is_bad_inode(found->d_inode)); 1696 } 1697 iput(inode); 1698 return found; 1699 } 1700 1701 /* 1702 * Negative dentry: instantiate it unless the inode is a directory and 1703 * already has a dentry. 1704 */ 1705 new = d_splice_alias(inode, found); 1706 if (new) { 1707 dput(found); 1708 found = new; 1709 } 1710 return found; 1711 1712 err_out: 1713 iput(inode); 1714 return found; 1715 } 1716 EXPORT_SYMBOL(d_add_ci); 1717 1718 /* 1719 * Do the slow-case of the dentry name compare. 1720 * 1721 * Unlike the dentry_cmp() function, we need to atomically 1722 * load the name and length information, so that the 1723 * filesystem can rely on them, and can use the 'name' and 1724 * 'len' information without worrying about walking off the 1725 * end of memory etc. 1726 * 1727 * Thus the read_seqcount_retry() and the "duplicate" info 1728 * in arguments (the low-level filesystem should not look 1729 * at the dentry inode or name contents directly, since 1730 * rename can change them while we're in RCU mode). 1731 */ 1732 enum slow_d_compare { 1733 D_COMP_OK, 1734 D_COMP_NOMATCH, 1735 D_COMP_SEQRETRY, 1736 }; 1737 1738 static noinline enum slow_d_compare slow_dentry_cmp( 1739 const struct dentry *parent, 1740 struct dentry *dentry, 1741 unsigned int seq, 1742 const struct qstr *name) 1743 { 1744 int tlen = dentry->d_name.len; 1745 const char *tname = dentry->d_name.name; 1746 1747 if (read_seqcount_retry(&dentry->d_seq, seq)) { 1748 cpu_relax(); 1749 return D_COMP_SEQRETRY; 1750 } 1751 if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) 1752 return D_COMP_NOMATCH; 1753 return D_COMP_OK; 1754 } 1755 1756 /** 1757 * __d_lookup_rcu - search for a dentry (racy, store-free) 1758 * @parent: parent dentry 1759 * @name: qstr of name we wish to find 1760 * @seqp: returns d_seq value at the point where the dentry was found 1761 * Returns: dentry, or NULL 1762 * 1763 * __d_lookup_rcu is the dcache lookup function for rcu-walk name 1764 * resolution (store-free path walking) design described in 1765 * Documentation/filesystems/path-lookup.txt. 1766 * 1767 * This is not to be used outside core vfs. 1768 * 1769 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock 1770 * held, and rcu_read_lock held. The returned dentry must not be stored into 1771 * without taking d_lock and checking d_seq sequence count against @seq 1772 * returned here. 1773 * 1774 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount 1775 * function. 1776 * 1777 * Alternatively, __d_lookup_rcu may be called again to look up the child of 1778 * the returned dentry, so long as its parent's seqlock is checked after the 1779 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1780 * is formed, giving integrity down the path walk. 1781 * 1782 * NOTE! The caller *has* to check the resulting dentry against the sequence 1783 * number we've returned before using any of the resulting dentry state! 1784 */ 1785 struct dentry *__d_lookup_rcu(const struct dentry *parent, 1786 const struct qstr *name, 1787 unsigned *seqp) 1788 { 1789 u64 hashlen = name->hash_len; 1790 const unsigned char *str = name->name; 1791 struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen)); 1792 struct hlist_bl_node *node; 1793 struct dentry *dentry; 1794 1795 /* 1796 * Note: There is significant duplication with __d_lookup_rcu which is 1797 * required to prevent single threaded performance regressions 1798 * especially on architectures where smp_rmb (in seqcounts) are costly. 1799 * Keep the two functions in sync. 1800 */ 1801 1802 /* 1803 * The hash list is protected using RCU. 1804 * 1805 * Carefully use d_seq when comparing a candidate dentry, to avoid 1806 * races with d_move(). 1807 * 1808 * It is possible that concurrent renames can mess up our list 1809 * walk here and result in missing our dentry, resulting in the 1810 * false-negative result. d_lookup() protects against concurrent 1811 * renames using rename_lock seqlock. 1812 * 1813 * See Documentation/filesystems/path-lookup.txt for more details. 1814 */ 1815 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1816 unsigned seq; 1817 1818 seqretry: 1819 /* 1820 * The dentry sequence count protects us from concurrent 1821 * renames, and thus protects parent and name fields. 1822 * 1823 * The caller must perform a seqcount check in order 1824 * to do anything useful with the returned dentry. 1825 * 1826 * NOTE! We do a "raw" seqcount_begin here. That means that 1827 * we don't wait for the sequence count to stabilize if it 1828 * is in the middle of a sequence change. If we do the slow 1829 * dentry compare, we will do seqretries until it is stable, 1830 * and if we end up with a successful lookup, we actually 1831 * want to exit RCU lookup anyway. 1832 */ 1833 seq = raw_seqcount_begin(&dentry->d_seq); 1834 if (dentry->d_parent != parent) 1835 continue; 1836 if (d_unhashed(dentry)) 1837 continue; 1838 1839 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { 1840 if (dentry->d_name.hash != hashlen_hash(hashlen)) 1841 continue; 1842 *seqp = seq; 1843 switch (slow_dentry_cmp(parent, dentry, seq, name)) { 1844 case D_COMP_OK: 1845 return dentry; 1846 case D_COMP_NOMATCH: 1847 continue; 1848 default: 1849 goto seqretry; 1850 } 1851 } 1852 1853 if (dentry->d_name.hash_len != hashlen) 1854 continue; 1855 *seqp = seq; 1856 if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) 1857 return dentry; 1858 } 1859 return NULL; 1860 } 1861 1862 /** 1863 * d_lookup - search for a dentry 1864 * @parent: parent dentry 1865 * @name: qstr of name we wish to find 1866 * Returns: dentry, or NULL 1867 * 1868 * d_lookup searches the children of the parent dentry for the name in 1869 * question. If the dentry is found its reference count is incremented and the 1870 * dentry is returned. The caller must use dput to free the entry when it has 1871 * finished using it. %NULL is returned if the dentry does not exist. 1872 */ 1873 struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name) 1874 { 1875 struct dentry *dentry; 1876 unsigned seq; 1877 1878 do { 1879 seq = read_seqbegin(&rename_lock); 1880 dentry = __d_lookup(parent, name); 1881 if (dentry) 1882 break; 1883 } while (read_seqretry(&rename_lock, seq)); 1884 return dentry; 1885 } 1886 EXPORT_SYMBOL(d_lookup); 1887 1888 /** 1889 * __d_lookup - search for a dentry (racy) 1890 * @parent: parent dentry 1891 * @name: qstr of name we wish to find 1892 * Returns: dentry, or NULL 1893 * 1894 * __d_lookup is like d_lookup, however it may (rarely) return a 1895 * false-negative result due to unrelated rename activity. 1896 * 1897 * __d_lookup is slightly faster by avoiding rename_lock read seqlock, 1898 * however it must be used carefully, eg. with a following d_lookup in 1899 * the case of failure. 1900 * 1901 * __d_lookup callers must be commented. 1902 */ 1903 struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) 1904 { 1905 unsigned int len = name->len; 1906 unsigned int hash = name->hash; 1907 const unsigned char *str = name->name; 1908 struct hlist_bl_head *b = d_hash(parent, hash); 1909 struct hlist_bl_node *node; 1910 struct dentry *found = NULL; 1911 struct dentry *dentry; 1912 1913 /* 1914 * Note: There is significant duplication with __d_lookup_rcu which is 1915 * required to prevent single threaded performance regressions 1916 * especially on architectures where smp_rmb (in seqcounts) are costly. 1917 * Keep the two functions in sync. 1918 */ 1919 1920 /* 1921 * The hash list is protected using RCU. 1922 * 1923 * Take d_lock when comparing a candidate dentry, to avoid races 1924 * with d_move(). 1925 * 1926 * It is possible that concurrent renames can mess up our list 1927 * walk here and result in missing our dentry, resulting in the 1928 * false-negative result. d_lookup() protects against concurrent 1929 * renames using rename_lock seqlock. 1930 * 1931 * See Documentation/filesystems/path-lookup.txt for more details. 1932 */ 1933 rcu_read_lock(); 1934 1935 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1936 1937 if (dentry->d_name.hash != hash) 1938 continue; 1939 1940 spin_lock(&dentry->d_lock); 1941 if (dentry->d_parent != parent) 1942 goto next; 1943 if (d_unhashed(dentry)) 1944 goto next; 1945 1946 /* 1947 * It is safe to compare names since d_move() cannot 1948 * change the qstr (protected by d_lock). 1949 */ 1950 if (parent->d_flags & DCACHE_OP_COMPARE) { 1951 int tlen = dentry->d_name.len; 1952 const char *tname = dentry->d_name.name; 1953 if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) 1954 goto next; 1955 } else { 1956 if (dentry->d_name.len != len) 1957 goto next; 1958 if (dentry_cmp(dentry, str, len)) 1959 goto next; 1960 } 1961 1962 dentry->d_lockref.count++; 1963 found = dentry; 1964 spin_unlock(&dentry->d_lock); 1965 break; 1966 next: 1967 spin_unlock(&dentry->d_lock); 1968 } 1969 rcu_read_unlock(); 1970 1971 return found; 1972 } 1973 1974 /** 1975 * d_hash_and_lookup - hash the qstr then search for a dentry 1976 * @dir: Directory to search in 1977 * @name: qstr of name we wish to find 1978 * 1979 * On lookup failure NULL is returned; on bad name - ERR_PTR(-error) 1980 */ 1981 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1982 { 1983 /* 1984 * Check for a fs-specific hash function. Note that we must 1985 * calculate the standard hash first, as the d_op->d_hash() 1986 * routine may choose to leave the hash value unchanged. 1987 */ 1988 name->hash = full_name_hash(name->name, name->len); 1989 if (dir->d_flags & DCACHE_OP_HASH) { 1990 int err = dir->d_op->d_hash(dir, name); 1991 if (unlikely(err < 0)) 1992 return ERR_PTR(err); 1993 } 1994 return d_lookup(dir, name); 1995 } 1996 EXPORT_SYMBOL(d_hash_and_lookup); 1997 1998 /** 1999 * d_validate - verify dentry provided from insecure source (deprecated) 2000 * @dentry: The dentry alleged to be valid child of @dparent 2001 * @dparent: The parent dentry (known to be valid) 2002 * 2003 * An insecure source has sent us a dentry, here we verify it and dget() it. 2004 * This is used by ncpfs in its readdir implementation. 2005 * Zero is returned in the dentry is invalid. 2006 * 2007 * This function is slow for big directories, and deprecated, do not use it. 2008 */ 2009 int d_validate(struct dentry *dentry, struct dentry *dparent) 2010 { 2011 struct dentry *child; 2012 2013 spin_lock(&dparent->d_lock); 2014 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { 2015 if (dentry == child) { 2016 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 2017 __dget_dlock(dentry); 2018 spin_unlock(&dentry->d_lock); 2019 spin_unlock(&dparent->d_lock); 2020 return 1; 2021 } 2022 } 2023 spin_unlock(&dparent->d_lock); 2024 2025 return 0; 2026 } 2027 EXPORT_SYMBOL(d_validate); 2028 2029 /* 2030 * When a file is deleted, we have two options: 2031 * - turn this dentry into a negative dentry 2032 * - unhash this dentry and free it. 2033 * 2034 * Usually, we want to just turn this into 2035 * a negative dentry, but if anybody else is 2036 * currently using the dentry or the inode 2037 * we can't do that and we fall back on removing 2038 * it from the hash queues and waiting for 2039 * it to be deleted later when it has no users 2040 */ 2041 2042 /** 2043 * d_delete - delete a dentry 2044 * @dentry: The dentry to delete 2045 * 2046 * Turn the dentry into a negative dentry if possible, otherwise 2047 * remove it from the hash queues so it can be deleted later 2048 */ 2049 2050 void d_delete(struct dentry * dentry) 2051 { 2052 struct inode *inode; 2053 int isdir = 0; 2054 /* 2055 * Are we the only user? 2056 */ 2057 again: 2058 spin_lock(&dentry->d_lock); 2059 inode = dentry->d_inode; 2060 isdir = S_ISDIR(inode->i_mode); 2061 if (dentry->d_lockref.count == 1) { 2062 if (!spin_trylock(&inode->i_lock)) { 2063 spin_unlock(&dentry->d_lock); 2064 cpu_relax(); 2065 goto again; 2066 } 2067 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2068 dentry_unlink_inode(dentry); 2069 fsnotify_nameremove(dentry, isdir); 2070 return; 2071 } 2072 2073 if (!d_unhashed(dentry)) 2074 __d_drop(dentry); 2075 2076 spin_unlock(&dentry->d_lock); 2077 2078 fsnotify_nameremove(dentry, isdir); 2079 } 2080 EXPORT_SYMBOL(d_delete); 2081 2082 static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) 2083 { 2084 BUG_ON(!d_unhashed(entry)); 2085 hlist_bl_lock(b); 2086 entry->d_flags |= DCACHE_RCUACCESS; 2087 hlist_bl_add_head_rcu(&entry->d_hash, b); 2088 hlist_bl_unlock(b); 2089 } 2090 2091 static void _d_rehash(struct dentry * entry) 2092 { 2093 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); 2094 } 2095 2096 /** 2097 * d_rehash - add an entry back to the hash 2098 * @entry: dentry to add to the hash 2099 * 2100 * Adds a dentry to the hash according to its name. 2101 */ 2102 2103 void d_rehash(struct dentry * entry) 2104 { 2105 spin_lock(&entry->d_lock); 2106 _d_rehash(entry); 2107 spin_unlock(&entry->d_lock); 2108 } 2109 EXPORT_SYMBOL(d_rehash); 2110 2111 /** 2112 * dentry_update_name_case - update case insensitive dentry with a new name 2113 * @dentry: dentry to be updated 2114 * @name: new name 2115 * 2116 * Update a case insensitive dentry with new case of name. 2117 * 2118 * dentry must have been returned by d_lookup with name @name. Old and new 2119 * name lengths must match (ie. no d_compare which allows mismatched name 2120 * lengths). 2121 * 2122 * Parent inode i_mutex must be held over d_lookup and into this call (to 2123 * keep renames and concurrent inserts, and readdir(2) away). 2124 */ 2125 void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 2126 { 2127 BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex)); 2128 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 2129 2130 spin_lock(&dentry->d_lock); 2131 write_seqcount_begin(&dentry->d_seq); 2132 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); 2133 write_seqcount_end(&dentry->d_seq); 2134 spin_unlock(&dentry->d_lock); 2135 } 2136 EXPORT_SYMBOL(dentry_update_name_case); 2137 2138 static void switch_names(struct dentry *dentry, struct dentry *target) 2139 { 2140 if (dname_external(target)) { 2141 if (dname_external(dentry)) { 2142 /* 2143 * Both external: swap the pointers 2144 */ 2145 swap(target->d_name.name, dentry->d_name.name); 2146 } else { 2147 /* 2148 * dentry:internal, target:external. Steal target's 2149 * storage and make target internal. 2150 */ 2151 memcpy(target->d_iname, dentry->d_name.name, 2152 dentry->d_name.len + 1); 2153 dentry->d_name.name = target->d_name.name; 2154 target->d_name.name = target->d_iname; 2155 } 2156 } else { 2157 if (dname_external(dentry)) { 2158 /* 2159 * dentry:external, target:internal. Give dentry's 2160 * storage to target and make dentry internal 2161 */ 2162 memcpy(dentry->d_iname, target->d_name.name, 2163 target->d_name.len + 1); 2164 target->d_name.name = dentry->d_name.name; 2165 dentry->d_name.name = dentry->d_iname; 2166 } else { 2167 /* 2168 * Both are internal. Just copy target to dentry 2169 */ 2170 memcpy(dentry->d_iname, target->d_name.name, 2171 target->d_name.len + 1); 2172 dentry->d_name.len = target->d_name.len; 2173 return; 2174 } 2175 } 2176 swap(dentry->d_name.len, target->d_name.len); 2177 } 2178 2179 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) 2180 { 2181 /* 2182 * XXXX: do we really need to take target->d_lock? 2183 */ 2184 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) 2185 spin_lock(&target->d_parent->d_lock); 2186 else { 2187 if (d_ancestor(dentry->d_parent, target->d_parent)) { 2188 spin_lock(&dentry->d_parent->d_lock); 2189 spin_lock_nested(&target->d_parent->d_lock, 2190 DENTRY_D_LOCK_NESTED); 2191 } else { 2192 spin_lock(&target->d_parent->d_lock); 2193 spin_lock_nested(&dentry->d_parent->d_lock, 2194 DENTRY_D_LOCK_NESTED); 2195 } 2196 } 2197 if (target < dentry) { 2198 spin_lock_nested(&target->d_lock, 2); 2199 spin_lock_nested(&dentry->d_lock, 3); 2200 } else { 2201 spin_lock_nested(&dentry->d_lock, 2); 2202 spin_lock_nested(&target->d_lock, 3); 2203 } 2204 } 2205 2206 static void dentry_unlock_parents_for_move(struct dentry *dentry, 2207 struct dentry *target) 2208 { 2209 if (target->d_parent != dentry->d_parent) 2210 spin_unlock(&dentry->d_parent->d_lock); 2211 if (target->d_parent != target) 2212 spin_unlock(&target->d_parent->d_lock); 2213 } 2214 2215 /* 2216 * When switching names, the actual string doesn't strictly have to 2217 * be preserved in the target - because we're dropping the target 2218 * anyway. As such, we can just do a simple memcpy() to copy over 2219 * the new name before we switch. 2220 * 2221 * Note that we have to be a lot more careful about getting the hash 2222 * switched - we have to switch the hash value properly even if it 2223 * then no longer matches the actual (corrupted) string of the target. 2224 * The hash value has to match the hash queue that the dentry is on.. 2225 */ 2226 /* 2227 * __d_move - move a dentry 2228 * @dentry: entry to move 2229 * @target: new dentry 2230 * 2231 * Update the dcache to reflect the move of a file name. Negative 2232 * dcache entries should not be moved in this way. Caller must hold 2233 * rename_lock, the i_mutex of the source and target directories, 2234 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). 2235 */ 2236 static void __d_move(struct dentry * dentry, struct dentry * target) 2237 { 2238 if (!dentry->d_inode) 2239 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2240 2241 BUG_ON(d_ancestor(dentry, target)); 2242 BUG_ON(d_ancestor(target, dentry)); 2243 2244 dentry_lock_for_move(dentry, target); 2245 2246 write_seqcount_begin(&dentry->d_seq); 2247 write_seqcount_begin(&target->d_seq); 2248 2249 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ 2250 2251 /* 2252 * Move the dentry to the target hash queue. Don't bother checking 2253 * for the same hash queue because of how unlikely it is. 2254 */ 2255 __d_drop(dentry); 2256 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); 2257 2258 /* Unhash the target: dput() will then get rid of it */ 2259 __d_drop(target); 2260 2261 list_del(&dentry->d_u.d_child); 2262 list_del(&target->d_u.d_child); 2263 2264 /* Switch the names.. */ 2265 switch_names(dentry, target); 2266 swap(dentry->d_name.hash, target->d_name.hash); 2267 2268 /* ... and switch the parents */ 2269 if (IS_ROOT(dentry)) { 2270 dentry->d_parent = target->d_parent; 2271 target->d_parent = target; 2272 INIT_LIST_HEAD(&target->d_u.d_child); 2273 } else { 2274 swap(dentry->d_parent, target->d_parent); 2275 2276 /* And add them back to the (new) parent lists */ 2277 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 2278 } 2279 2280 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2281 2282 write_seqcount_end(&target->d_seq); 2283 write_seqcount_end(&dentry->d_seq); 2284 2285 dentry_unlock_parents_for_move(dentry, target); 2286 spin_unlock(&target->d_lock); 2287 fsnotify_d_move(dentry); 2288 spin_unlock(&dentry->d_lock); 2289 } 2290 2291 /* 2292 * d_move - move a dentry 2293 * @dentry: entry to move 2294 * @target: new dentry 2295 * 2296 * Update the dcache to reflect the move of a file name. Negative 2297 * dcache entries should not be moved in this way. See the locking 2298 * requirements for __d_move. 2299 */ 2300 void d_move(struct dentry *dentry, struct dentry *target) 2301 { 2302 write_seqlock(&rename_lock); 2303 __d_move(dentry, target); 2304 write_sequnlock(&rename_lock); 2305 } 2306 EXPORT_SYMBOL(d_move); 2307 2308 /** 2309 * d_ancestor - search for an ancestor 2310 * @p1: ancestor dentry 2311 * @p2: child dentry 2312 * 2313 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is 2314 * an ancestor of p2, else NULL. 2315 */ 2316 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) 2317 { 2318 struct dentry *p; 2319 2320 for (p = p2; !IS_ROOT(p); p = p->d_parent) { 2321 if (p->d_parent == p1) 2322 return p; 2323 } 2324 return NULL; 2325 } 2326 2327 /* 2328 * This helper attempts to cope with remotely renamed directories 2329 * 2330 * It assumes that the caller is already holding 2331 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock 2332 * 2333 * Note: If ever the locking in lock_rename() changes, then please 2334 * remember to update this too... 2335 */ 2336 static struct dentry *__d_unalias(struct inode *inode, 2337 struct dentry *dentry, struct dentry *alias) 2338 { 2339 struct mutex *m1 = NULL, *m2 = NULL; 2340 struct dentry *ret = ERR_PTR(-EBUSY); 2341 2342 /* If alias and dentry share a parent, then no extra locks required */ 2343 if (alias->d_parent == dentry->d_parent) 2344 goto out_unalias; 2345 2346 /* See lock_rename() */ 2347 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 2348 goto out_err; 2349 m1 = &dentry->d_sb->s_vfs_rename_mutex; 2350 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) 2351 goto out_err; 2352 m2 = &alias->d_parent->d_inode->i_mutex; 2353 out_unalias: 2354 if (likely(!d_mountpoint(alias))) { 2355 __d_move(alias, dentry); 2356 ret = alias; 2357 } 2358 out_err: 2359 spin_unlock(&inode->i_lock); 2360 if (m2) 2361 mutex_unlock(m2); 2362 if (m1) 2363 mutex_unlock(m1); 2364 return ret; 2365 } 2366 2367 /* 2368 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2369 * named dentry in place of the dentry to be replaced. 2370 * returns with anon->d_lock held! 2371 */ 2372 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2373 { 2374 struct dentry *dparent; 2375 2376 dentry_lock_for_move(anon, dentry); 2377 2378 write_seqcount_begin(&dentry->d_seq); 2379 write_seqcount_begin(&anon->d_seq); 2380 2381 dparent = dentry->d_parent; 2382 2383 switch_names(dentry, anon); 2384 swap(dentry->d_name.hash, anon->d_name.hash); 2385 2386 dentry->d_parent = dentry; 2387 list_del_init(&dentry->d_u.d_child); 2388 anon->d_parent = dparent; 2389 list_move(&anon->d_u.d_child, &dparent->d_subdirs); 2390 2391 write_seqcount_end(&dentry->d_seq); 2392 write_seqcount_end(&anon->d_seq); 2393 2394 dentry_unlock_parents_for_move(anon, dentry); 2395 spin_unlock(&dentry->d_lock); 2396 2397 /* anon->d_lock still locked, returns locked */ 2398 anon->d_flags &= ~DCACHE_DISCONNECTED; 2399 } 2400 2401 /** 2402 * d_materialise_unique - introduce an inode into the tree 2403 * @dentry: candidate dentry 2404 * @inode: inode to bind to the dentry, to which aliases may be attached 2405 * 2406 * Introduces an dentry into the tree, substituting an extant disconnected 2407 * root directory alias in its place if there is one. Caller must hold the 2408 * i_mutex of the parent directory. 2409 */ 2410 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) 2411 { 2412 struct dentry *actual; 2413 2414 BUG_ON(!d_unhashed(dentry)); 2415 2416 if (!inode) { 2417 actual = dentry; 2418 __d_instantiate(dentry, NULL); 2419 d_rehash(actual); 2420 goto out_nolock; 2421 } 2422 2423 spin_lock(&inode->i_lock); 2424 2425 if (S_ISDIR(inode->i_mode)) { 2426 struct dentry *alias; 2427 2428 /* Does an aliased dentry already exist? */ 2429 alias = __d_find_alias(inode, 0); 2430 if (alias) { 2431 actual = alias; 2432 write_seqlock(&rename_lock); 2433 2434 if (d_ancestor(alias, dentry)) { 2435 /* Check for loops */ 2436 actual = ERR_PTR(-ELOOP); 2437 spin_unlock(&inode->i_lock); 2438 } else if (IS_ROOT(alias)) { 2439 /* Is this an anonymous mountpoint that we 2440 * could splice into our tree? */ 2441 __d_materialise_dentry(dentry, alias); 2442 write_sequnlock(&rename_lock); 2443 __d_drop(alias); 2444 goto found; 2445 } else { 2446 /* Nope, but we must(!) avoid directory 2447 * aliasing. This drops inode->i_lock */ 2448 actual = __d_unalias(inode, dentry, alias); 2449 } 2450 write_sequnlock(&rename_lock); 2451 if (IS_ERR(actual)) { 2452 if (PTR_ERR(actual) == -ELOOP) 2453 pr_warn_ratelimited( 2454 "VFS: Lookup of '%s' in %s %s" 2455 " would have caused loop\n", 2456 dentry->d_name.name, 2457 inode->i_sb->s_type->name, 2458 inode->i_sb->s_id); 2459 dput(alias); 2460 } 2461 goto out_nolock; 2462 } 2463 } 2464 2465 /* Add a unique reference */ 2466 actual = __d_instantiate_unique(dentry, inode); 2467 if (!actual) 2468 actual = dentry; 2469 else 2470 BUG_ON(!d_unhashed(actual)); 2471 2472 spin_lock(&actual->d_lock); 2473 found: 2474 _d_rehash(actual); 2475 spin_unlock(&actual->d_lock); 2476 spin_unlock(&inode->i_lock); 2477 out_nolock: 2478 if (actual == dentry) { 2479 security_d_instantiate(dentry, inode); 2480 return NULL; 2481 } 2482 2483 iput(inode); 2484 return actual; 2485 } 2486 EXPORT_SYMBOL_GPL(d_materialise_unique); 2487 2488 static int prepend(char **buffer, int *buflen, const char *str, int namelen) 2489 { 2490 *buflen -= namelen; 2491 if (*buflen < 0) 2492 return -ENAMETOOLONG; 2493 *buffer -= namelen; 2494 memcpy(*buffer, str, namelen); 2495 return 0; 2496 } 2497 2498 static int prepend_name(char **buffer, int *buflen, struct qstr *name) 2499 { 2500 return prepend(buffer, buflen, name->name, name->len); 2501 } 2502 2503 /** 2504 * prepend_path - Prepend path string to a buffer 2505 * @path: the dentry/vfsmount to report 2506 * @root: root vfsmnt/dentry 2507 * @buffer: pointer to the end of the buffer 2508 * @buflen: pointer to buffer length 2509 * 2510 * Caller holds the rename_lock. 2511 */ 2512 static int prepend_path(const struct path *path, 2513 const struct path *root, 2514 char **buffer, int *buflen) 2515 { 2516 struct dentry *dentry = path->dentry; 2517 struct vfsmount *vfsmnt = path->mnt; 2518 struct mount *mnt = real_mount(vfsmnt); 2519 bool slash = false; 2520 int error = 0; 2521 2522 while (dentry != root->dentry || vfsmnt != root->mnt) { 2523 struct dentry * parent; 2524 2525 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2526 /* Global root? */ 2527 if (!mnt_has_parent(mnt)) 2528 goto global_root; 2529 dentry = mnt->mnt_mountpoint; 2530 mnt = mnt->mnt_parent; 2531 vfsmnt = &mnt->mnt; 2532 continue; 2533 } 2534 parent = dentry->d_parent; 2535 prefetch(parent); 2536 spin_lock(&dentry->d_lock); 2537 error = prepend_name(buffer, buflen, &dentry->d_name); 2538 spin_unlock(&dentry->d_lock); 2539 if (!error) 2540 error = prepend(buffer, buflen, "/", 1); 2541 if (error) 2542 break; 2543 2544 slash = true; 2545 dentry = parent; 2546 } 2547 2548 if (!error && !slash) 2549 error = prepend(buffer, buflen, "/", 1); 2550 2551 return error; 2552 2553 global_root: 2554 /* 2555 * Filesystems needing to implement special "root names" 2556 * should do so with ->d_dname() 2557 */ 2558 if (IS_ROOT(dentry) && 2559 (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { 2560 WARN(1, "Root dentry has weird name <%.*s>\n", 2561 (int) dentry->d_name.len, dentry->d_name.name); 2562 } 2563 if (!slash) 2564 error = prepend(buffer, buflen, "/", 1); 2565 if (!error) 2566 error = is_mounted(vfsmnt) ? 1 : 2; 2567 return error; 2568 } 2569 2570 /** 2571 * __d_path - return the path of a dentry 2572 * @path: the dentry/vfsmount to report 2573 * @root: root vfsmnt/dentry 2574 * @buf: buffer to return value in 2575 * @buflen: buffer length 2576 * 2577 * Convert a dentry into an ASCII path name. 2578 * 2579 * Returns a pointer into the buffer or an error code if the 2580 * path was too long. 2581 * 2582 * "buflen" should be positive. 2583 * 2584 * If the path is not reachable from the supplied root, return %NULL. 2585 */ 2586 char *__d_path(const struct path *path, 2587 const struct path *root, 2588 char *buf, int buflen) 2589 { 2590 char *res = buf + buflen; 2591 int error; 2592 2593 prepend(&res, &buflen, "\0", 1); 2594 br_read_lock(&vfsmount_lock); 2595 write_seqlock(&rename_lock); 2596 error = prepend_path(path, root, &res, &buflen); 2597 write_sequnlock(&rename_lock); 2598 br_read_unlock(&vfsmount_lock); 2599 2600 if (error < 0) 2601 return ERR_PTR(error); 2602 if (error > 0) 2603 return NULL; 2604 return res; 2605 } 2606 2607 char *d_absolute_path(const struct path *path, 2608 char *buf, int buflen) 2609 { 2610 struct path root = {}; 2611 char *res = buf + buflen; 2612 int error; 2613 2614 prepend(&res, &buflen, "\0", 1); 2615 br_read_lock(&vfsmount_lock); 2616 write_seqlock(&rename_lock); 2617 error = prepend_path(path, &root, &res, &buflen); 2618 write_sequnlock(&rename_lock); 2619 br_read_unlock(&vfsmount_lock); 2620 2621 if (error > 1) 2622 error = -EINVAL; 2623 if (error < 0) 2624 return ERR_PTR(error); 2625 return res; 2626 } 2627 2628 /* 2629 * same as __d_path but appends "(deleted)" for unlinked files. 2630 */ 2631 static int path_with_deleted(const struct path *path, 2632 const struct path *root, 2633 char **buf, int *buflen) 2634 { 2635 prepend(buf, buflen, "\0", 1); 2636 if (d_unlinked(path->dentry)) { 2637 int error = prepend(buf, buflen, " (deleted)", 10); 2638 if (error) 2639 return error; 2640 } 2641 2642 return prepend_path(path, root, buf, buflen); 2643 } 2644 2645 static int prepend_unreachable(char **buffer, int *buflen) 2646 { 2647 return prepend(buffer, buflen, "(unreachable)", 13); 2648 } 2649 2650 /** 2651 * d_path - return the path of a dentry 2652 * @path: path to report 2653 * @buf: buffer to return value in 2654 * @buflen: buffer length 2655 * 2656 * Convert a dentry into an ASCII path name. If the entry has been deleted 2657 * the string " (deleted)" is appended. Note that this is ambiguous. 2658 * 2659 * Returns a pointer into the buffer or an error code if the path was 2660 * too long. Note: Callers should use the returned pointer, not the passed 2661 * in buffer, to use the name! The implementation often starts at an offset 2662 * into the buffer, and may leave 0 bytes at the start. 2663 * 2664 * "buflen" should be positive. 2665 */ 2666 char *d_path(const struct path *path, char *buf, int buflen) 2667 { 2668 char *res = buf + buflen; 2669 struct path root; 2670 int error; 2671 2672 /* 2673 * We have various synthetic filesystems that never get mounted. On 2674 * these filesystems dentries are never used for lookup purposes, and 2675 * thus don't need to be hashed. They also don't need a name until a 2676 * user wants to identify the object in /proc/pid/fd/. The little hack 2677 * below allows us to generate a name for these objects on demand: 2678 */ 2679 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2680 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2681 2682 get_fs_root(current->fs, &root); 2683 br_read_lock(&vfsmount_lock); 2684 write_seqlock(&rename_lock); 2685 error = path_with_deleted(path, &root, &res, &buflen); 2686 write_sequnlock(&rename_lock); 2687 br_read_unlock(&vfsmount_lock); 2688 if (error < 0) 2689 res = ERR_PTR(error); 2690 path_put(&root); 2691 return res; 2692 } 2693 EXPORT_SYMBOL(d_path); 2694 2695 /* 2696 * Helper function for dentry_operations.d_dname() members 2697 */ 2698 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, 2699 const char *fmt, ...) 2700 { 2701 va_list args; 2702 char temp[64]; 2703 int sz; 2704 2705 va_start(args, fmt); 2706 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; 2707 va_end(args); 2708 2709 if (sz > sizeof(temp) || sz > buflen) 2710 return ERR_PTR(-ENAMETOOLONG); 2711 2712 buffer += buflen - sz; 2713 return memcpy(buffer, temp, sz); 2714 } 2715 2716 char *simple_dname(struct dentry *dentry, char *buffer, int buflen) 2717 { 2718 char *end = buffer + buflen; 2719 /* these dentries are never renamed, so d_lock is not needed */ 2720 if (prepend(&end, &buflen, " (deleted)", 11) || 2721 prepend_name(&end, &buflen, &dentry->d_name) || 2722 prepend(&end, &buflen, "/", 1)) 2723 end = ERR_PTR(-ENAMETOOLONG); 2724 return end; 2725 } 2726 2727 /* 2728 * Write full pathname from the root of the filesystem into the buffer. 2729 */ 2730 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2731 { 2732 char *end = buf + buflen; 2733 char *retval; 2734 2735 prepend(&end, &buflen, "\0", 1); 2736 if (buflen < 1) 2737 goto Elong; 2738 /* Get '/' right */ 2739 retval = end-1; 2740 *retval = '/'; 2741 2742 while (!IS_ROOT(dentry)) { 2743 struct dentry *parent = dentry->d_parent; 2744 int error; 2745 2746 prefetch(parent); 2747 spin_lock(&dentry->d_lock); 2748 error = prepend_name(&end, &buflen, &dentry->d_name); 2749 spin_unlock(&dentry->d_lock); 2750 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) 2751 goto Elong; 2752 2753 retval = end; 2754 dentry = parent; 2755 } 2756 return retval; 2757 Elong: 2758 return ERR_PTR(-ENAMETOOLONG); 2759 } 2760 2761 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) 2762 { 2763 char *retval; 2764 2765 write_seqlock(&rename_lock); 2766 retval = __dentry_path(dentry, buf, buflen); 2767 write_sequnlock(&rename_lock); 2768 2769 return retval; 2770 } 2771 EXPORT_SYMBOL(dentry_path_raw); 2772 2773 char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2774 { 2775 char *p = NULL; 2776 char *retval; 2777 2778 write_seqlock(&rename_lock); 2779 if (d_unlinked(dentry)) { 2780 p = buf + buflen; 2781 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2782 goto Elong; 2783 buflen++; 2784 } 2785 retval = __dentry_path(dentry, buf, buflen); 2786 write_sequnlock(&rename_lock); 2787 if (!IS_ERR(retval) && p) 2788 *p = '/'; /* restore '/' overriden with '\0' */ 2789 return retval; 2790 Elong: 2791 return ERR_PTR(-ENAMETOOLONG); 2792 } 2793 2794 /* 2795 * NOTE! The user-level library version returns a 2796 * character pointer. The kernel system call just 2797 * returns the length of the buffer filled (which 2798 * includes the ending '\0' character), or a negative 2799 * error value. So libc would do something like 2800 * 2801 * char *getcwd(char * buf, size_t size) 2802 * { 2803 * int retval; 2804 * 2805 * retval = sys_getcwd(buf, size); 2806 * if (retval >= 0) 2807 * return buf; 2808 * errno = -retval; 2809 * return NULL; 2810 * } 2811 */ 2812 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) 2813 { 2814 int error; 2815 struct path pwd, root; 2816 char *page = (char *) __get_free_page(GFP_USER); 2817 2818 if (!page) 2819 return -ENOMEM; 2820 2821 get_fs_root_and_pwd(current->fs, &root, &pwd); 2822 2823 error = -ENOENT; 2824 br_read_lock(&vfsmount_lock); 2825 write_seqlock(&rename_lock); 2826 if (!d_unlinked(pwd.dentry)) { 2827 unsigned long len; 2828 char *cwd = page + PAGE_SIZE; 2829 int buflen = PAGE_SIZE; 2830 2831 prepend(&cwd, &buflen, "\0", 1); 2832 error = prepend_path(&pwd, &root, &cwd, &buflen); 2833 write_sequnlock(&rename_lock); 2834 br_read_unlock(&vfsmount_lock); 2835 2836 if (error < 0) 2837 goto out; 2838 2839 /* Unreachable from current root */ 2840 if (error > 0) { 2841 error = prepend_unreachable(&cwd, &buflen); 2842 if (error) 2843 goto out; 2844 } 2845 2846 error = -ERANGE; 2847 len = PAGE_SIZE + page - cwd; 2848 if (len <= size) { 2849 error = len; 2850 if (copy_to_user(buf, cwd, len)) 2851 error = -EFAULT; 2852 } 2853 } else { 2854 write_sequnlock(&rename_lock); 2855 br_read_unlock(&vfsmount_lock); 2856 } 2857 2858 out: 2859 path_put(&pwd); 2860 path_put(&root); 2861 free_page((unsigned long) page); 2862 return error; 2863 } 2864 2865 /* 2866 * Test whether new_dentry is a subdirectory of old_dentry. 2867 * 2868 * Trivially implemented using the dcache structure 2869 */ 2870 2871 /** 2872 * is_subdir - is new dentry a subdirectory of old_dentry 2873 * @new_dentry: new dentry 2874 * @old_dentry: old dentry 2875 * 2876 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 2877 * Returns 0 otherwise. 2878 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 2879 */ 2880 2881 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2882 { 2883 int result; 2884 unsigned seq; 2885 2886 if (new_dentry == old_dentry) 2887 return 1; 2888 2889 do { 2890 /* for restarting inner loop in case of seq retry */ 2891 seq = read_seqbegin(&rename_lock); 2892 /* 2893 * Need rcu_readlock to protect against the d_parent trashing 2894 * due to d_move 2895 */ 2896 rcu_read_lock(); 2897 if (d_ancestor(old_dentry, new_dentry)) 2898 result = 1; 2899 else 2900 result = 0; 2901 rcu_read_unlock(); 2902 } while (read_seqretry(&rename_lock, seq)); 2903 2904 return result; 2905 } 2906 2907 void d_genocide(struct dentry *root) 2908 { 2909 struct dentry *this_parent; 2910 struct list_head *next; 2911 unsigned seq; 2912 int locked = 0; 2913 2914 seq = read_seqbegin(&rename_lock); 2915 again: 2916 this_parent = root; 2917 spin_lock(&this_parent->d_lock); 2918 repeat: 2919 next = this_parent->d_subdirs.next; 2920 resume: 2921 while (next != &this_parent->d_subdirs) { 2922 struct list_head *tmp = next; 2923 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2924 next = tmp->next; 2925 2926 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 2927 if (d_unhashed(dentry) || !dentry->d_inode) { 2928 spin_unlock(&dentry->d_lock); 2929 continue; 2930 } 2931 if (!list_empty(&dentry->d_subdirs)) { 2932 spin_unlock(&this_parent->d_lock); 2933 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 2934 this_parent = dentry; 2935 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 2936 goto repeat; 2937 } 2938 if (!(dentry->d_flags & DCACHE_GENOCIDE)) { 2939 dentry->d_flags |= DCACHE_GENOCIDE; 2940 dentry->d_lockref.count--; 2941 } 2942 spin_unlock(&dentry->d_lock); 2943 } 2944 if (this_parent != root) { 2945 struct dentry *child = this_parent; 2946 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2947 this_parent->d_flags |= DCACHE_GENOCIDE; 2948 this_parent->d_lockref.count--; 2949 } 2950 this_parent = try_to_ascend(this_parent, locked, seq); 2951 if (!this_parent) 2952 goto rename_retry; 2953 next = child->d_u.d_child.next; 2954 goto resume; 2955 } 2956 spin_unlock(&this_parent->d_lock); 2957 if (!locked && read_seqretry(&rename_lock, seq)) 2958 goto rename_retry; 2959 if (locked) 2960 write_sequnlock(&rename_lock); 2961 return; 2962 2963 rename_retry: 2964 if (locked) 2965 goto again; 2966 locked = 1; 2967 write_seqlock(&rename_lock); 2968 goto again; 2969 } 2970 2971 void d_tmpfile(struct dentry *dentry, struct inode *inode) 2972 { 2973 inode_dec_link_count(inode); 2974 BUG_ON(dentry->d_name.name != dentry->d_iname || 2975 !hlist_unhashed(&dentry->d_alias) || 2976 !d_unlinked(dentry)); 2977 spin_lock(&dentry->d_parent->d_lock); 2978 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 2979 dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", 2980 (unsigned long long)inode->i_ino); 2981 spin_unlock(&dentry->d_lock); 2982 spin_unlock(&dentry->d_parent->d_lock); 2983 d_instantiate(dentry, inode); 2984 } 2985 EXPORT_SYMBOL(d_tmpfile); 2986 2987 static __initdata unsigned long dhash_entries; 2988 static int __init set_dhash_entries(char *str) 2989 { 2990 if (!str) 2991 return 0; 2992 dhash_entries = simple_strtoul(str, &str, 0); 2993 return 1; 2994 } 2995 __setup("dhash_entries=", set_dhash_entries); 2996 2997 static void __init dcache_init_early(void) 2998 { 2999 unsigned int loop; 3000 3001 /* If hashes are distributed across NUMA nodes, defer 3002 * hash allocation until vmalloc space is available. 3003 */ 3004 if (hashdist) 3005 return; 3006 3007 dentry_hashtable = 3008 alloc_large_system_hash("Dentry cache", 3009 sizeof(struct hlist_bl_head), 3010 dhash_entries, 3011 13, 3012 HASH_EARLY, 3013 &d_hash_shift, 3014 &d_hash_mask, 3015 0, 3016 0); 3017 3018 for (loop = 0; loop < (1U << d_hash_shift); loop++) 3019 INIT_HLIST_BL_HEAD(dentry_hashtable + loop); 3020 } 3021 3022 static void __init dcache_init(void) 3023 { 3024 unsigned int loop; 3025 3026 /* 3027 * A constructor could be added for stable state like the lists, 3028 * but it is probably not worth it because of the cache nature 3029 * of the dcache. 3030 */ 3031 dentry_cache = KMEM_CACHE(dentry, 3032 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 3033 3034 /* Hash may have been set up in dcache_init_early */ 3035 if (!hashdist) 3036 return; 3037 3038 dentry_hashtable = 3039 alloc_large_system_hash("Dentry cache", 3040 sizeof(struct hlist_bl_head), 3041 dhash_entries, 3042 13, 3043 0, 3044 &d_hash_shift, 3045 &d_hash_mask, 3046 0, 3047 0); 3048 3049 for (loop = 0; loop < (1U << d_hash_shift); loop++) 3050 INIT_HLIST_BL_HEAD(dentry_hashtable + loop); 3051 } 3052 3053 /* SLAB cache for __getname() consumers */ 3054 struct kmem_cache *names_cachep __read_mostly; 3055 EXPORT_SYMBOL(names_cachep); 3056 3057 EXPORT_SYMBOL(d_genocide); 3058 3059 void __init vfs_caches_init_early(void) 3060 { 3061 dcache_init_early(); 3062 inode_init_early(); 3063 } 3064 3065 void __init vfs_caches_init(unsigned long mempages) 3066 { 3067 unsigned long reserve; 3068 3069 /* Base hash sizes on available memory, with a reserve equal to 3070 150% of current kernel size */ 3071 3072 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 3073 mempages -= reserve; 3074 3075 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 3076 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 3077 3078 dcache_init(); 3079 inode_init(); 3080 files_init(mempages); 3081 mnt_init(); 3082 bdev_cache_init(); 3083 chrdev_init(); 3084 } 3085