xref: /openbmc/linux/fs/dcache.c (revision 64405360)
1 /*
2  * fs/dcache.c
3  *
4  * Complete reimplementation
5  * (C) 1997 Thomas Schoebel-Theuer,
6  * with heavy changes by Linus Torvalds
7  */
8 
9 /*
10  * Notes on the allocation strategy:
11  *
12  * The dcache is a master of the icache - whenever a dcache entry
13  * exists, the inode will always exist. "iput()" is done either when
14  * the dcache entry is deleted or garbage collected.
15  */
16 
17 #include <linux/syscalls.h>
18 #include <linux/string.h>
19 #include <linux/mm.h>
20 #include <linux/fs.h>
21 #include <linux/fsnotify.h>
22 #include <linux/slab.h>
23 #include <linux/init.h>
24 #include <linux/hash.h>
25 #include <linux/cache.h>
26 #include <linux/module.h>
27 #include <linux/mount.h>
28 #include <linux/file.h>
29 #include <asm/uaccess.h>
30 #include <linux/security.h>
31 #include <linux/seqlock.h>
32 #include <linux/swap.h>
33 #include <linux/bootmem.h>
34 #include <linux/fs_struct.h>
35 #include <linux/hardirq.h>
36 #include <linux/bit_spinlock.h>
37 #include <linux/rculist_bl.h>
38 #include <linux/prefetch.h>
39 #include <linux/ratelimit.h>
40 #include "internal.h"
41 #include "mount.h"
42 
43 /*
44  * Usage:
45  * dcache->d_inode->i_lock protects:
46  *   - i_dentry, d_alias, d_inode of aliases
47  * dcache_hash_bucket lock protects:
48  *   - the dcache hash table
49  * s_anon bl list spinlock protects:
50  *   - the s_anon list (see __d_drop)
51  * dcache_lru_lock protects:
52  *   - the dcache lru lists and counters
53  * d_lock protects:
54  *   - d_flags
55  *   - d_name
56  *   - d_lru
57  *   - d_count
58  *   - d_unhashed()
59  *   - d_parent and d_subdirs
60  *   - childrens' d_child and d_parent
61  *   - d_alias, d_inode
62  *
63  * Ordering:
64  * dentry->d_inode->i_lock
65  *   dentry->d_lock
66  *     dcache_lru_lock
67  *     dcache_hash_bucket lock
68  *     s_anon lock
69  *
70  * If there is an ancestor relationship:
71  * dentry->d_parent->...->d_parent->d_lock
72  *   ...
73  *     dentry->d_parent->d_lock
74  *       dentry->d_lock
75  *
76  * If no ancestor relationship:
77  * if (dentry1 < dentry2)
78  *   dentry1->d_lock
79  *     dentry2->d_lock
80  */
81 int sysctl_vfs_cache_pressure __read_mostly = 100;
82 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
83 
84 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
85 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
86 
87 EXPORT_SYMBOL(rename_lock);
88 
89 static struct kmem_cache *dentry_cache __read_mostly;
90 
91 /*
92  * This is the single most critical data structure when it comes
93  * to the dcache: the hashtable for lookups. Somebody should try
94  * to make this good - I've just made it work.
95  *
96  * This hash-function tries to avoid losing too many bits of hash
97  * information, yet avoid using a prime hash-size or similar.
98  */
99 #define D_HASHBITS     d_hash_shift
100 #define D_HASHMASK     d_hash_mask
101 
102 static unsigned int d_hash_mask __read_mostly;
103 static unsigned int d_hash_shift __read_mostly;
104 
105 static struct hlist_bl_head *dentry_hashtable __read_mostly;
106 
107 static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
108 					unsigned long hash)
109 {
110 	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
111 	hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
112 	return dentry_hashtable + (hash & D_HASHMASK);
113 }
114 
115 /* Statistics gathering. */
116 struct dentry_stat_t dentry_stat = {
117 	.age_limit = 45,
118 };
119 
120 static DEFINE_PER_CPU(unsigned int, nr_dentry);
121 
122 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
123 static int get_nr_dentry(void)
124 {
125 	int i;
126 	int sum = 0;
127 	for_each_possible_cpu(i)
128 		sum += per_cpu(nr_dentry, i);
129 	return sum < 0 ? 0 : sum;
130 }
131 
132 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
133 		   size_t *lenp, loff_t *ppos)
134 {
135 	dentry_stat.nr_dentry = get_nr_dentry();
136 	return proc_dointvec(table, write, buffer, lenp, ppos);
137 }
138 #endif
139 
140 /*
141  * Compare 2 name strings, return 0 if they match, otherwise non-zero.
142  * The strings are both count bytes long, and count is non-zero.
143  */
144 static inline int dentry_cmp(const unsigned char *cs, size_t scount,
145 				const unsigned char *ct, size_t tcount)
146 {
147 	if (scount != tcount)
148 		return 1;
149 
150 	do {
151 		if (*cs != *ct)
152 			return 1;
153 		cs++;
154 		ct++;
155 		tcount--;
156 	} while (tcount);
157 	return 0;
158 }
159 
160 static void __d_free(struct rcu_head *head)
161 {
162 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
163 
164 	WARN_ON(!list_empty(&dentry->d_alias));
165 	if (dname_external(dentry))
166 		kfree(dentry->d_name.name);
167 	kmem_cache_free(dentry_cache, dentry);
168 }
169 
170 /*
171  * no locks, please.
172  */
173 static void d_free(struct dentry *dentry)
174 {
175 	BUG_ON(dentry->d_count);
176 	this_cpu_dec(nr_dentry);
177 	if (dentry->d_op && dentry->d_op->d_release)
178 		dentry->d_op->d_release(dentry);
179 
180 	/* if dentry was never visible to RCU, immediate free is OK */
181 	if (!(dentry->d_flags & DCACHE_RCUACCESS))
182 		__d_free(&dentry->d_u.d_rcu);
183 	else
184 		call_rcu(&dentry->d_u.d_rcu, __d_free);
185 }
186 
187 /**
188  * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
189  * @dentry: the target dentry
190  * After this call, in-progress rcu-walk path lookup will fail. This
191  * should be called after unhashing, and after changing d_inode (if
192  * the dentry has not already been unhashed).
193  */
194 static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
195 {
196 	assert_spin_locked(&dentry->d_lock);
197 	/* Go through a barrier */
198 	write_seqcount_barrier(&dentry->d_seq);
199 }
200 
201 /*
202  * Release the dentry's inode, using the filesystem
203  * d_iput() operation if defined. Dentry has no refcount
204  * and is unhashed.
205  */
206 static void dentry_iput(struct dentry * dentry)
207 	__releases(dentry->d_lock)
208 	__releases(dentry->d_inode->i_lock)
209 {
210 	struct inode *inode = dentry->d_inode;
211 	if (inode) {
212 		dentry->d_inode = NULL;
213 		list_del_init(&dentry->d_alias);
214 		spin_unlock(&dentry->d_lock);
215 		spin_unlock(&inode->i_lock);
216 		if (!inode->i_nlink)
217 			fsnotify_inoderemove(inode);
218 		if (dentry->d_op && dentry->d_op->d_iput)
219 			dentry->d_op->d_iput(dentry, inode);
220 		else
221 			iput(inode);
222 	} else {
223 		spin_unlock(&dentry->d_lock);
224 	}
225 }
226 
227 /*
228  * Release the dentry's inode, using the filesystem
229  * d_iput() operation if defined. dentry remains in-use.
230  */
231 static void dentry_unlink_inode(struct dentry * dentry)
232 	__releases(dentry->d_lock)
233 	__releases(dentry->d_inode->i_lock)
234 {
235 	struct inode *inode = dentry->d_inode;
236 	dentry->d_inode = NULL;
237 	list_del_init(&dentry->d_alias);
238 	dentry_rcuwalk_barrier(dentry);
239 	spin_unlock(&dentry->d_lock);
240 	spin_unlock(&inode->i_lock);
241 	if (!inode->i_nlink)
242 		fsnotify_inoderemove(inode);
243 	if (dentry->d_op && dentry->d_op->d_iput)
244 		dentry->d_op->d_iput(dentry, inode);
245 	else
246 		iput(inode);
247 }
248 
249 /*
250  * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
251  */
252 static void dentry_lru_add(struct dentry *dentry)
253 {
254 	if (list_empty(&dentry->d_lru)) {
255 		spin_lock(&dcache_lru_lock);
256 		list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
257 		dentry->d_sb->s_nr_dentry_unused++;
258 		dentry_stat.nr_unused++;
259 		spin_unlock(&dcache_lru_lock);
260 	}
261 }
262 
263 static void __dentry_lru_del(struct dentry *dentry)
264 {
265 	list_del_init(&dentry->d_lru);
266 	dentry->d_flags &= ~DCACHE_SHRINK_LIST;
267 	dentry->d_sb->s_nr_dentry_unused--;
268 	dentry_stat.nr_unused--;
269 }
270 
271 /*
272  * Remove a dentry with references from the LRU.
273  */
274 static void dentry_lru_del(struct dentry *dentry)
275 {
276 	if (!list_empty(&dentry->d_lru)) {
277 		spin_lock(&dcache_lru_lock);
278 		__dentry_lru_del(dentry);
279 		spin_unlock(&dcache_lru_lock);
280 	}
281 }
282 
283 /*
284  * Remove a dentry that is unreferenced and about to be pruned
285  * (unhashed and destroyed) from the LRU, and inform the file system.
286  * This wrapper should be called _prior_ to unhashing a victim dentry.
287  */
288 static void dentry_lru_prune(struct dentry *dentry)
289 {
290 	if (!list_empty(&dentry->d_lru)) {
291 		if (dentry->d_flags & DCACHE_OP_PRUNE)
292 			dentry->d_op->d_prune(dentry);
293 
294 		spin_lock(&dcache_lru_lock);
295 		__dentry_lru_del(dentry);
296 		spin_unlock(&dcache_lru_lock);
297 	}
298 }
299 
300 static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
301 {
302 	spin_lock(&dcache_lru_lock);
303 	if (list_empty(&dentry->d_lru)) {
304 		list_add_tail(&dentry->d_lru, list);
305 		dentry->d_sb->s_nr_dentry_unused++;
306 		dentry_stat.nr_unused++;
307 	} else {
308 		list_move_tail(&dentry->d_lru, list);
309 	}
310 	spin_unlock(&dcache_lru_lock);
311 }
312 
313 /**
314  * d_kill - kill dentry and return parent
315  * @dentry: dentry to kill
316  * @parent: parent dentry
317  *
318  * The dentry must already be unhashed and removed from the LRU.
319  *
320  * If this is the root of the dentry tree, return NULL.
321  *
322  * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
323  * d_kill.
324  */
325 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
326 	__releases(dentry->d_lock)
327 	__releases(parent->d_lock)
328 	__releases(dentry->d_inode->i_lock)
329 {
330 	list_del(&dentry->d_u.d_child);
331 	/*
332 	 * Inform try_to_ascend() that we are no longer attached to the
333 	 * dentry tree
334 	 */
335 	dentry->d_flags |= DCACHE_DISCONNECTED;
336 	if (parent)
337 		spin_unlock(&parent->d_lock);
338 	dentry_iput(dentry);
339 	/*
340 	 * dentry_iput drops the locks, at which point nobody (except
341 	 * transient RCU lookups) can reach this dentry.
342 	 */
343 	d_free(dentry);
344 	return parent;
345 }
346 
347 /*
348  * Unhash a dentry without inserting an RCU walk barrier or checking that
349  * dentry->d_lock is locked.  The caller must take care of that, if
350  * appropriate.
351  */
352 static void __d_shrink(struct dentry *dentry)
353 {
354 	if (!d_unhashed(dentry)) {
355 		struct hlist_bl_head *b;
356 		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
357 			b = &dentry->d_sb->s_anon;
358 		else
359 			b = d_hash(dentry->d_parent, dentry->d_name.hash);
360 
361 		hlist_bl_lock(b);
362 		__hlist_bl_del(&dentry->d_hash);
363 		dentry->d_hash.pprev = NULL;
364 		hlist_bl_unlock(b);
365 	}
366 }
367 
368 /**
369  * d_drop - drop a dentry
370  * @dentry: dentry to drop
371  *
372  * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
373  * be found through a VFS lookup any more. Note that this is different from
374  * deleting the dentry - d_delete will try to mark the dentry negative if
375  * possible, giving a successful _negative_ lookup, while d_drop will
376  * just make the cache lookup fail.
377  *
378  * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
379  * reason (NFS timeouts or autofs deletes).
380  *
381  * __d_drop requires dentry->d_lock.
382  */
383 void __d_drop(struct dentry *dentry)
384 {
385 	if (!d_unhashed(dentry)) {
386 		__d_shrink(dentry);
387 		dentry_rcuwalk_barrier(dentry);
388 	}
389 }
390 EXPORT_SYMBOL(__d_drop);
391 
392 void d_drop(struct dentry *dentry)
393 {
394 	spin_lock(&dentry->d_lock);
395 	__d_drop(dentry);
396 	spin_unlock(&dentry->d_lock);
397 }
398 EXPORT_SYMBOL(d_drop);
399 
400 /*
401  * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
402  * @dentry: dentry to drop
403  *
404  * This is called when we do a lookup on a placeholder dentry that needed to be
405  * looked up.  The dentry should have been hashed in order for it to be found by
406  * the lookup code, but now needs to be unhashed while we do the actual lookup
407  * and clear the DCACHE_NEED_LOOKUP flag.
408  */
409 void d_clear_need_lookup(struct dentry *dentry)
410 {
411 	spin_lock(&dentry->d_lock);
412 	__d_drop(dentry);
413 	dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
414 	spin_unlock(&dentry->d_lock);
415 }
416 EXPORT_SYMBOL(d_clear_need_lookup);
417 
418 /*
419  * Finish off a dentry we've decided to kill.
420  * dentry->d_lock must be held, returns with it unlocked.
421  * If ref is non-zero, then decrement the refcount too.
422  * Returns dentry requiring refcount drop, or NULL if we're done.
423  */
424 static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
425 	__releases(dentry->d_lock)
426 {
427 	struct inode *inode;
428 	struct dentry *parent;
429 
430 	inode = dentry->d_inode;
431 	if (inode && !spin_trylock(&inode->i_lock)) {
432 relock:
433 		spin_unlock(&dentry->d_lock);
434 		cpu_relax();
435 		return dentry; /* try again with same dentry */
436 	}
437 	if (IS_ROOT(dentry))
438 		parent = NULL;
439 	else
440 		parent = dentry->d_parent;
441 	if (parent && !spin_trylock(&parent->d_lock)) {
442 		if (inode)
443 			spin_unlock(&inode->i_lock);
444 		goto relock;
445 	}
446 
447 	if (ref)
448 		dentry->d_count--;
449 	/*
450 	 * if dentry was on the d_lru list delete it from there.
451 	 * inform the fs via d_prune that this dentry is about to be
452 	 * unhashed and destroyed.
453 	 */
454 	dentry_lru_prune(dentry);
455 	/* if it was on the hash then remove it */
456 	__d_drop(dentry);
457 	return d_kill(dentry, parent);
458 }
459 
460 /*
461  * This is dput
462  *
463  * This is complicated by the fact that we do not want to put
464  * dentries that are no longer on any hash chain on the unused
465  * list: we'd much rather just get rid of them immediately.
466  *
467  * However, that implies that we have to traverse the dentry
468  * tree upwards to the parents which might _also_ now be
469  * scheduled for deletion (it may have been only waiting for
470  * its last child to go away).
471  *
472  * This tail recursion is done by hand as we don't want to depend
473  * on the compiler to always get this right (gcc generally doesn't).
474  * Real recursion would eat up our stack space.
475  */
476 
477 /*
478  * dput - release a dentry
479  * @dentry: dentry to release
480  *
481  * Release a dentry. This will drop the usage count and if appropriate
482  * call the dentry unlink method as well as removing it from the queues and
483  * releasing its resources. If the parent dentries were scheduled for release
484  * they too may now get deleted.
485  */
486 void dput(struct dentry *dentry)
487 {
488 	if (!dentry)
489 		return;
490 
491 repeat:
492 	if (dentry->d_count == 1)
493 		might_sleep();
494 	spin_lock(&dentry->d_lock);
495 	BUG_ON(!dentry->d_count);
496 	if (dentry->d_count > 1) {
497 		dentry->d_count--;
498 		spin_unlock(&dentry->d_lock);
499 		return;
500 	}
501 
502 	if (dentry->d_flags & DCACHE_OP_DELETE) {
503 		if (dentry->d_op->d_delete(dentry))
504 			goto kill_it;
505 	}
506 
507 	/* Unreachable? Get rid of it */
508  	if (d_unhashed(dentry))
509 		goto kill_it;
510 
511 	/*
512 	 * If this dentry needs lookup, don't set the referenced flag so that it
513 	 * is more likely to be cleaned up by the dcache shrinker in case of
514 	 * memory pressure.
515 	 */
516 	if (!d_need_lookup(dentry))
517 		dentry->d_flags |= DCACHE_REFERENCED;
518 	dentry_lru_add(dentry);
519 
520 	dentry->d_count--;
521 	spin_unlock(&dentry->d_lock);
522 	return;
523 
524 kill_it:
525 	dentry = dentry_kill(dentry, 1);
526 	if (dentry)
527 		goto repeat;
528 }
529 EXPORT_SYMBOL(dput);
530 
531 /**
532  * d_invalidate - invalidate a dentry
533  * @dentry: dentry to invalidate
534  *
535  * Try to invalidate the dentry if it turns out to be
536  * possible. If there are other dentries that can be
537  * reached through this one we can't delete it and we
538  * return -EBUSY. On success we return 0.
539  *
540  * no dcache lock.
541  */
542 
543 int d_invalidate(struct dentry * dentry)
544 {
545 	/*
546 	 * If it's already been dropped, return OK.
547 	 */
548 	spin_lock(&dentry->d_lock);
549 	if (d_unhashed(dentry)) {
550 		spin_unlock(&dentry->d_lock);
551 		return 0;
552 	}
553 	/*
554 	 * Check whether to do a partial shrink_dcache
555 	 * to get rid of unused child entries.
556 	 */
557 	if (!list_empty(&dentry->d_subdirs)) {
558 		spin_unlock(&dentry->d_lock);
559 		shrink_dcache_parent(dentry);
560 		spin_lock(&dentry->d_lock);
561 	}
562 
563 	/*
564 	 * Somebody else still using it?
565 	 *
566 	 * If it's a directory, we can't drop it
567 	 * for fear of somebody re-populating it
568 	 * with children (even though dropping it
569 	 * would make it unreachable from the root,
570 	 * we might still populate it if it was a
571 	 * working directory or similar).
572 	 * We also need to leave mountpoints alone,
573 	 * directory or not.
574 	 */
575 	if (dentry->d_count > 1 && dentry->d_inode) {
576 		if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
577 			spin_unlock(&dentry->d_lock);
578 			return -EBUSY;
579 		}
580 	}
581 
582 	__d_drop(dentry);
583 	spin_unlock(&dentry->d_lock);
584 	return 0;
585 }
586 EXPORT_SYMBOL(d_invalidate);
587 
588 /* This must be called with d_lock held */
589 static inline void __dget_dlock(struct dentry *dentry)
590 {
591 	dentry->d_count++;
592 }
593 
594 static inline void __dget(struct dentry *dentry)
595 {
596 	spin_lock(&dentry->d_lock);
597 	__dget_dlock(dentry);
598 	spin_unlock(&dentry->d_lock);
599 }
600 
601 struct dentry *dget_parent(struct dentry *dentry)
602 {
603 	struct dentry *ret;
604 
605 repeat:
606 	/*
607 	 * Don't need rcu_dereference because we re-check it was correct under
608 	 * the lock.
609 	 */
610 	rcu_read_lock();
611 	ret = dentry->d_parent;
612 	spin_lock(&ret->d_lock);
613 	if (unlikely(ret != dentry->d_parent)) {
614 		spin_unlock(&ret->d_lock);
615 		rcu_read_unlock();
616 		goto repeat;
617 	}
618 	rcu_read_unlock();
619 	BUG_ON(!ret->d_count);
620 	ret->d_count++;
621 	spin_unlock(&ret->d_lock);
622 	return ret;
623 }
624 EXPORT_SYMBOL(dget_parent);
625 
626 /**
627  * d_find_alias - grab a hashed alias of inode
628  * @inode: inode in question
629  * @want_discon:  flag, used by d_splice_alias, to request
630  *          that only a DISCONNECTED alias be returned.
631  *
632  * If inode has a hashed alias, or is a directory and has any alias,
633  * acquire the reference to alias and return it. Otherwise return NULL.
634  * Notice that if inode is a directory there can be only one alias and
635  * it can be unhashed only if it has no children, or if it is the root
636  * of a filesystem.
637  *
638  * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
639  * any other hashed alias over that one unless @want_discon is set,
640  * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
641  */
642 static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
643 {
644 	struct dentry *alias, *discon_alias;
645 
646 again:
647 	discon_alias = NULL;
648 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
649 		spin_lock(&alias->d_lock);
650  		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
651 			if (IS_ROOT(alias) &&
652 			    (alias->d_flags & DCACHE_DISCONNECTED)) {
653 				discon_alias = alias;
654 			} else if (!want_discon) {
655 				__dget_dlock(alias);
656 				spin_unlock(&alias->d_lock);
657 				return alias;
658 			}
659 		}
660 		spin_unlock(&alias->d_lock);
661 	}
662 	if (discon_alias) {
663 		alias = discon_alias;
664 		spin_lock(&alias->d_lock);
665 		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
666 			if (IS_ROOT(alias) &&
667 			    (alias->d_flags & DCACHE_DISCONNECTED)) {
668 				__dget_dlock(alias);
669 				spin_unlock(&alias->d_lock);
670 				return alias;
671 			}
672 		}
673 		spin_unlock(&alias->d_lock);
674 		goto again;
675 	}
676 	return NULL;
677 }
678 
679 struct dentry *d_find_alias(struct inode *inode)
680 {
681 	struct dentry *de = NULL;
682 
683 	if (!list_empty(&inode->i_dentry)) {
684 		spin_lock(&inode->i_lock);
685 		de = __d_find_alias(inode, 0);
686 		spin_unlock(&inode->i_lock);
687 	}
688 	return de;
689 }
690 EXPORT_SYMBOL(d_find_alias);
691 
692 /*
693  *	Try to kill dentries associated with this inode.
694  * WARNING: you must own a reference to inode.
695  */
696 void d_prune_aliases(struct inode *inode)
697 {
698 	struct dentry *dentry;
699 restart:
700 	spin_lock(&inode->i_lock);
701 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
702 		spin_lock(&dentry->d_lock);
703 		if (!dentry->d_count) {
704 			__dget_dlock(dentry);
705 			__d_drop(dentry);
706 			spin_unlock(&dentry->d_lock);
707 			spin_unlock(&inode->i_lock);
708 			dput(dentry);
709 			goto restart;
710 		}
711 		spin_unlock(&dentry->d_lock);
712 	}
713 	spin_unlock(&inode->i_lock);
714 }
715 EXPORT_SYMBOL(d_prune_aliases);
716 
717 /*
718  * Try to throw away a dentry - free the inode, dput the parent.
719  * Requires dentry->d_lock is held, and dentry->d_count == 0.
720  * Releases dentry->d_lock.
721  *
722  * This may fail if locks cannot be acquired no problem, just try again.
723  */
724 static void try_prune_one_dentry(struct dentry *dentry)
725 	__releases(dentry->d_lock)
726 {
727 	struct dentry *parent;
728 
729 	parent = dentry_kill(dentry, 0);
730 	/*
731 	 * If dentry_kill returns NULL, we have nothing more to do.
732 	 * if it returns the same dentry, trylocks failed. In either
733 	 * case, just loop again.
734 	 *
735 	 * Otherwise, we need to prune ancestors too. This is necessary
736 	 * to prevent quadratic behavior of shrink_dcache_parent(), but
737 	 * is also expected to be beneficial in reducing dentry cache
738 	 * fragmentation.
739 	 */
740 	if (!parent)
741 		return;
742 	if (parent == dentry)
743 		return;
744 
745 	/* Prune ancestors. */
746 	dentry = parent;
747 	while (dentry) {
748 		spin_lock(&dentry->d_lock);
749 		if (dentry->d_count > 1) {
750 			dentry->d_count--;
751 			spin_unlock(&dentry->d_lock);
752 			return;
753 		}
754 		dentry = dentry_kill(dentry, 1);
755 	}
756 }
757 
758 static void shrink_dentry_list(struct list_head *list)
759 {
760 	struct dentry *dentry;
761 
762 	rcu_read_lock();
763 	for (;;) {
764 		dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
765 		if (&dentry->d_lru == list)
766 			break; /* empty */
767 		spin_lock(&dentry->d_lock);
768 		if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
769 			spin_unlock(&dentry->d_lock);
770 			continue;
771 		}
772 
773 		/*
774 		 * We found an inuse dentry which was not removed from
775 		 * the LRU because of laziness during lookup.  Do not free
776 		 * it - just keep it off the LRU list.
777 		 */
778 		if (dentry->d_count) {
779 			dentry_lru_del(dentry);
780 			spin_unlock(&dentry->d_lock);
781 			continue;
782 		}
783 
784 		rcu_read_unlock();
785 
786 		try_prune_one_dentry(dentry);
787 
788 		rcu_read_lock();
789 	}
790 	rcu_read_unlock();
791 }
792 
793 /**
794  * prune_dcache_sb - shrink the dcache
795  * @sb: superblock
796  * @count: number of entries to try to free
797  *
798  * Attempt to shrink the superblock dcache LRU by @count entries. This is
799  * done when we need more memory an called from the superblock shrinker
800  * function.
801  *
802  * This function may fail to free any resources if all the dentries are in
803  * use.
804  */
805 void prune_dcache_sb(struct super_block *sb, int count)
806 {
807 	struct dentry *dentry;
808 	LIST_HEAD(referenced);
809 	LIST_HEAD(tmp);
810 
811 relock:
812 	spin_lock(&dcache_lru_lock);
813 	while (!list_empty(&sb->s_dentry_lru)) {
814 		dentry = list_entry(sb->s_dentry_lru.prev,
815 				struct dentry, d_lru);
816 		BUG_ON(dentry->d_sb != sb);
817 
818 		if (!spin_trylock(&dentry->d_lock)) {
819 			spin_unlock(&dcache_lru_lock);
820 			cpu_relax();
821 			goto relock;
822 		}
823 
824 		if (dentry->d_flags & DCACHE_REFERENCED) {
825 			dentry->d_flags &= ~DCACHE_REFERENCED;
826 			list_move(&dentry->d_lru, &referenced);
827 			spin_unlock(&dentry->d_lock);
828 		} else {
829 			list_move_tail(&dentry->d_lru, &tmp);
830 			dentry->d_flags |= DCACHE_SHRINK_LIST;
831 			spin_unlock(&dentry->d_lock);
832 			if (!--count)
833 				break;
834 		}
835 		cond_resched_lock(&dcache_lru_lock);
836 	}
837 	if (!list_empty(&referenced))
838 		list_splice(&referenced, &sb->s_dentry_lru);
839 	spin_unlock(&dcache_lru_lock);
840 
841 	shrink_dentry_list(&tmp);
842 }
843 
844 /**
845  * shrink_dcache_sb - shrink dcache for a superblock
846  * @sb: superblock
847  *
848  * Shrink the dcache for the specified super block. This is used to free
849  * the dcache before unmounting a file system.
850  */
851 void shrink_dcache_sb(struct super_block *sb)
852 {
853 	LIST_HEAD(tmp);
854 
855 	spin_lock(&dcache_lru_lock);
856 	while (!list_empty(&sb->s_dentry_lru)) {
857 		list_splice_init(&sb->s_dentry_lru, &tmp);
858 		spin_unlock(&dcache_lru_lock);
859 		shrink_dentry_list(&tmp);
860 		spin_lock(&dcache_lru_lock);
861 	}
862 	spin_unlock(&dcache_lru_lock);
863 }
864 EXPORT_SYMBOL(shrink_dcache_sb);
865 
866 /*
867  * destroy a single subtree of dentries for unmount
868  * - see the comments on shrink_dcache_for_umount() for a description of the
869  *   locking
870  */
871 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
872 {
873 	struct dentry *parent;
874 
875 	BUG_ON(!IS_ROOT(dentry));
876 
877 	for (;;) {
878 		/* descend to the first leaf in the current subtree */
879 		while (!list_empty(&dentry->d_subdirs))
880 			dentry = list_entry(dentry->d_subdirs.next,
881 					    struct dentry, d_u.d_child);
882 
883 		/* consume the dentries from this leaf up through its parents
884 		 * until we find one with children or run out altogether */
885 		do {
886 			struct inode *inode;
887 
888 			/*
889 			 * remove the dentry from the lru, and inform
890 			 * the fs that this dentry is about to be
891 			 * unhashed and destroyed.
892 			 */
893 			dentry_lru_prune(dentry);
894 			__d_shrink(dentry);
895 
896 			if (dentry->d_count != 0) {
897 				printk(KERN_ERR
898 				       "BUG: Dentry %p{i=%lx,n=%s}"
899 				       " still in use (%d)"
900 				       " [unmount of %s %s]\n",
901 				       dentry,
902 				       dentry->d_inode ?
903 				       dentry->d_inode->i_ino : 0UL,
904 				       dentry->d_name.name,
905 				       dentry->d_count,
906 				       dentry->d_sb->s_type->name,
907 				       dentry->d_sb->s_id);
908 				BUG();
909 			}
910 
911 			if (IS_ROOT(dentry)) {
912 				parent = NULL;
913 				list_del(&dentry->d_u.d_child);
914 			} else {
915 				parent = dentry->d_parent;
916 				parent->d_count--;
917 				list_del(&dentry->d_u.d_child);
918 			}
919 
920 			inode = dentry->d_inode;
921 			if (inode) {
922 				dentry->d_inode = NULL;
923 				list_del_init(&dentry->d_alias);
924 				if (dentry->d_op && dentry->d_op->d_iput)
925 					dentry->d_op->d_iput(dentry, inode);
926 				else
927 					iput(inode);
928 			}
929 
930 			d_free(dentry);
931 
932 			/* finished when we fall off the top of the tree,
933 			 * otherwise we ascend to the parent and move to the
934 			 * next sibling if there is one */
935 			if (!parent)
936 				return;
937 			dentry = parent;
938 		} while (list_empty(&dentry->d_subdirs));
939 
940 		dentry = list_entry(dentry->d_subdirs.next,
941 				    struct dentry, d_u.d_child);
942 	}
943 }
944 
945 /*
946  * destroy the dentries attached to a superblock on unmounting
947  * - we don't need to use dentry->d_lock because:
948  *   - the superblock is detached from all mountings and open files, so the
949  *     dentry trees will not be rearranged by the VFS
950  *   - s_umount is write-locked, so the memory pressure shrinker will ignore
951  *     any dentries belonging to this superblock that it comes across
952  *   - the filesystem itself is no longer permitted to rearrange the dentries
953  *     in this superblock
954  */
955 void shrink_dcache_for_umount(struct super_block *sb)
956 {
957 	struct dentry *dentry;
958 
959 	if (down_read_trylock(&sb->s_umount))
960 		BUG();
961 
962 	dentry = sb->s_root;
963 	sb->s_root = NULL;
964 	dentry->d_count--;
965 	shrink_dcache_for_umount_subtree(dentry);
966 
967 	while (!hlist_bl_empty(&sb->s_anon)) {
968 		dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
969 		shrink_dcache_for_umount_subtree(dentry);
970 	}
971 }
972 
973 /*
974  * This tries to ascend one level of parenthood, but
975  * we can race with renaming, so we need to re-check
976  * the parenthood after dropping the lock and check
977  * that the sequence number still matches.
978  */
979 static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
980 {
981 	struct dentry *new = old->d_parent;
982 
983 	rcu_read_lock();
984 	spin_unlock(&old->d_lock);
985 	spin_lock(&new->d_lock);
986 
987 	/*
988 	 * might go back up the wrong parent if we have had a rename
989 	 * or deletion
990 	 */
991 	if (new != old->d_parent ||
992 		 (old->d_flags & DCACHE_DISCONNECTED) ||
993 		 (!locked && read_seqretry(&rename_lock, seq))) {
994 		spin_unlock(&new->d_lock);
995 		new = NULL;
996 	}
997 	rcu_read_unlock();
998 	return new;
999 }
1000 
1001 
1002 /*
1003  * Search for at least 1 mount point in the dentry's subdirs.
1004  * We descend to the next level whenever the d_subdirs
1005  * list is non-empty and continue searching.
1006  */
1007 
1008 /**
1009  * have_submounts - check for mounts over a dentry
1010  * @parent: dentry to check.
1011  *
1012  * Return true if the parent or its subdirectories contain
1013  * a mount point
1014  */
1015 int have_submounts(struct dentry *parent)
1016 {
1017 	struct dentry *this_parent;
1018 	struct list_head *next;
1019 	unsigned seq;
1020 	int locked = 0;
1021 
1022 	seq = read_seqbegin(&rename_lock);
1023 again:
1024 	this_parent = parent;
1025 
1026 	if (d_mountpoint(parent))
1027 		goto positive;
1028 	spin_lock(&this_parent->d_lock);
1029 repeat:
1030 	next = this_parent->d_subdirs.next;
1031 resume:
1032 	while (next != &this_parent->d_subdirs) {
1033 		struct list_head *tmp = next;
1034 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1035 		next = tmp->next;
1036 
1037 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1038 		/* Have we found a mount point ? */
1039 		if (d_mountpoint(dentry)) {
1040 			spin_unlock(&dentry->d_lock);
1041 			spin_unlock(&this_parent->d_lock);
1042 			goto positive;
1043 		}
1044 		if (!list_empty(&dentry->d_subdirs)) {
1045 			spin_unlock(&this_parent->d_lock);
1046 			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1047 			this_parent = dentry;
1048 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1049 			goto repeat;
1050 		}
1051 		spin_unlock(&dentry->d_lock);
1052 	}
1053 	/*
1054 	 * All done at this level ... ascend and resume the search.
1055 	 */
1056 	if (this_parent != parent) {
1057 		struct dentry *child = this_parent;
1058 		this_parent = try_to_ascend(this_parent, locked, seq);
1059 		if (!this_parent)
1060 			goto rename_retry;
1061 		next = child->d_u.d_child.next;
1062 		goto resume;
1063 	}
1064 	spin_unlock(&this_parent->d_lock);
1065 	if (!locked && read_seqretry(&rename_lock, seq))
1066 		goto rename_retry;
1067 	if (locked)
1068 		write_sequnlock(&rename_lock);
1069 	return 0; /* No mount points found in tree */
1070 positive:
1071 	if (!locked && read_seqretry(&rename_lock, seq))
1072 		goto rename_retry;
1073 	if (locked)
1074 		write_sequnlock(&rename_lock);
1075 	return 1;
1076 
1077 rename_retry:
1078 	locked = 1;
1079 	write_seqlock(&rename_lock);
1080 	goto again;
1081 }
1082 EXPORT_SYMBOL(have_submounts);
1083 
1084 /*
1085  * Search the dentry child list for the specified parent,
1086  * and move any unused dentries to the end of the unused
1087  * list for prune_dcache(). We descend to the next level
1088  * whenever the d_subdirs list is non-empty and continue
1089  * searching.
1090  *
1091  * It returns zero iff there are no unused children,
1092  * otherwise  it returns the number of children moved to
1093  * the end of the unused list. This may not be the total
1094  * number of unused children, because select_parent can
1095  * drop the lock and return early due to latency
1096  * constraints.
1097  */
1098 static int select_parent(struct dentry *parent, struct list_head *dispose)
1099 {
1100 	struct dentry *this_parent;
1101 	struct list_head *next;
1102 	unsigned seq;
1103 	int found = 0;
1104 	int locked = 0;
1105 
1106 	seq = read_seqbegin(&rename_lock);
1107 again:
1108 	this_parent = parent;
1109 	spin_lock(&this_parent->d_lock);
1110 repeat:
1111 	next = this_parent->d_subdirs.next;
1112 resume:
1113 	while (next != &this_parent->d_subdirs) {
1114 		struct list_head *tmp = next;
1115 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1116 		next = tmp->next;
1117 
1118 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1119 
1120 		/*
1121 		 * move only zero ref count dentries to the dispose list.
1122 		 *
1123 		 * Those which are presently on the shrink list, being processed
1124 		 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
1125 		 * loop in shrink_dcache_parent() might not make any progress
1126 		 * and loop forever.
1127 		 */
1128 		if (dentry->d_count) {
1129 			dentry_lru_del(dentry);
1130 		} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1131 			dentry_lru_move_list(dentry, dispose);
1132 			dentry->d_flags |= DCACHE_SHRINK_LIST;
1133 			found++;
1134 		}
1135 		/*
1136 		 * We can return to the caller if we have found some (this
1137 		 * ensures forward progress). We'll be coming back to find
1138 		 * the rest.
1139 		 */
1140 		if (found && need_resched()) {
1141 			spin_unlock(&dentry->d_lock);
1142 			goto out;
1143 		}
1144 
1145 		/*
1146 		 * Descend a level if the d_subdirs list is non-empty.
1147 		 */
1148 		if (!list_empty(&dentry->d_subdirs)) {
1149 			spin_unlock(&this_parent->d_lock);
1150 			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1151 			this_parent = dentry;
1152 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1153 			goto repeat;
1154 		}
1155 
1156 		spin_unlock(&dentry->d_lock);
1157 	}
1158 	/*
1159 	 * All done at this level ... ascend and resume the search.
1160 	 */
1161 	if (this_parent != parent) {
1162 		struct dentry *child = this_parent;
1163 		this_parent = try_to_ascend(this_parent, locked, seq);
1164 		if (!this_parent)
1165 			goto rename_retry;
1166 		next = child->d_u.d_child.next;
1167 		goto resume;
1168 	}
1169 out:
1170 	spin_unlock(&this_parent->d_lock);
1171 	if (!locked && read_seqretry(&rename_lock, seq))
1172 		goto rename_retry;
1173 	if (locked)
1174 		write_sequnlock(&rename_lock);
1175 	return found;
1176 
1177 rename_retry:
1178 	if (found)
1179 		return found;
1180 	locked = 1;
1181 	write_seqlock(&rename_lock);
1182 	goto again;
1183 }
1184 
1185 /**
1186  * shrink_dcache_parent - prune dcache
1187  * @parent: parent of entries to prune
1188  *
1189  * Prune the dcache to remove unused children of the parent dentry.
1190  */
1191 void shrink_dcache_parent(struct dentry * parent)
1192 {
1193 	LIST_HEAD(dispose);
1194 	int found;
1195 
1196 	while ((found = select_parent(parent, &dispose)) != 0)
1197 		shrink_dentry_list(&dispose);
1198 }
1199 EXPORT_SYMBOL(shrink_dcache_parent);
1200 
1201 /**
1202  * __d_alloc	-	allocate a dcache entry
1203  * @sb: filesystem it will belong to
1204  * @name: qstr of the name
1205  *
1206  * Allocates a dentry. It returns %NULL if there is insufficient memory
1207  * available. On a success the dentry is returned. The name passed in is
1208  * copied and the copy passed in may be reused after this call.
1209  */
1210 
1211 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1212 {
1213 	struct dentry *dentry;
1214 	char *dname;
1215 
1216 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1217 	if (!dentry)
1218 		return NULL;
1219 
1220 	if (name->len > DNAME_INLINE_LEN-1) {
1221 		dname = kmalloc(name->len + 1, GFP_KERNEL);
1222 		if (!dname) {
1223 			kmem_cache_free(dentry_cache, dentry);
1224 			return NULL;
1225 		}
1226 	} else  {
1227 		dname = dentry->d_iname;
1228 	}
1229 	dentry->d_name.name = dname;
1230 
1231 	dentry->d_name.len = name->len;
1232 	dentry->d_name.hash = name->hash;
1233 	memcpy(dname, name->name, name->len);
1234 	dname[name->len] = 0;
1235 
1236 	dentry->d_count = 1;
1237 	dentry->d_flags = 0;
1238 	spin_lock_init(&dentry->d_lock);
1239 	seqcount_init(&dentry->d_seq);
1240 	dentry->d_inode = NULL;
1241 	dentry->d_parent = dentry;
1242 	dentry->d_sb = sb;
1243 	dentry->d_op = NULL;
1244 	dentry->d_fsdata = NULL;
1245 	INIT_HLIST_BL_NODE(&dentry->d_hash);
1246 	INIT_LIST_HEAD(&dentry->d_lru);
1247 	INIT_LIST_HEAD(&dentry->d_subdirs);
1248 	INIT_LIST_HEAD(&dentry->d_alias);
1249 	INIT_LIST_HEAD(&dentry->d_u.d_child);
1250 	d_set_d_op(dentry, dentry->d_sb->s_d_op);
1251 
1252 	this_cpu_inc(nr_dentry);
1253 
1254 	return dentry;
1255 }
1256 
1257 /**
1258  * d_alloc	-	allocate a dcache entry
1259  * @parent: parent of entry to allocate
1260  * @name: qstr of the name
1261  *
1262  * Allocates a dentry. It returns %NULL if there is insufficient memory
1263  * available. On a success the dentry is returned. The name passed in is
1264  * copied and the copy passed in may be reused after this call.
1265  */
1266 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1267 {
1268 	struct dentry *dentry = __d_alloc(parent->d_sb, name);
1269 	if (!dentry)
1270 		return NULL;
1271 
1272 	spin_lock(&parent->d_lock);
1273 	/*
1274 	 * don't need child lock because it is not subject
1275 	 * to concurrency here
1276 	 */
1277 	__dget_dlock(parent);
1278 	dentry->d_parent = parent;
1279 	list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1280 	spin_unlock(&parent->d_lock);
1281 
1282 	return dentry;
1283 }
1284 EXPORT_SYMBOL(d_alloc);
1285 
1286 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1287 {
1288 	struct dentry *dentry = __d_alloc(sb, name);
1289 	if (dentry)
1290 		dentry->d_flags |= DCACHE_DISCONNECTED;
1291 	return dentry;
1292 }
1293 EXPORT_SYMBOL(d_alloc_pseudo);
1294 
1295 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1296 {
1297 	struct qstr q;
1298 
1299 	q.name = name;
1300 	q.len = strlen(name);
1301 	q.hash = full_name_hash(q.name, q.len);
1302 	return d_alloc(parent, &q);
1303 }
1304 EXPORT_SYMBOL(d_alloc_name);
1305 
1306 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1307 {
1308 	WARN_ON_ONCE(dentry->d_op);
1309 	WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH	|
1310 				DCACHE_OP_COMPARE	|
1311 				DCACHE_OP_REVALIDATE	|
1312 				DCACHE_OP_DELETE ));
1313 	dentry->d_op = op;
1314 	if (!op)
1315 		return;
1316 	if (op->d_hash)
1317 		dentry->d_flags |= DCACHE_OP_HASH;
1318 	if (op->d_compare)
1319 		dentry->d_flags |= DCACHE_OP_COMPARE;
1320 	if (op->d_revalidate)
1321 		dentry->d_flags |= DCACHE_OP_REVALIDATE;
1322 	if (op->d_delete)
1323 		dentry->d_flags |= DCACHE_OP_DELETE;
1324 	if (op->d_prune)
1325 		dentry->d_flags |= DCACHE_OP_PRUNE;
1326 
1327 }
1328 EXPORT_SYMBOL(d_set_d_op);
1329 
1330 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1331 {
1332 	spin_lock(&dentry->d_lock);
1333 	if (inode) {
1334 		if (unlikely(IS_AUTOMOUNT(inode)))
1335 			dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1336 		list_add(&dentry->d_alias, &inode->i_dentry);
1337 	}
1338 	dentry->d_inode = inode;
1339 	dentry_rcuwalk_barrier(dentry);
1340 	spin_unlock(&dentry->d_lock);
1341 	fsnotify_d_instantiate(dentry, inode);
1342 }
1343 
1344 /**
1345  * d_instantiate - fill in inode information for a dentry
1346  * @entry: dentry to complete
1347  * @inode: inode to attach to this dentry
1348  *
1349  * Fill in inode information in the entry.
1350  *
1351  * This turns negative dentries into productive full members
1352  * of society.
1353  *
1354  * NOTE! This assumes that the inode count has been incremented
1355  * (or otherwise set) by the caller to indicate that it is now
1356  * in use by the dcache.
1357  */
1358 
1359 void d_instantiate(struct dentry *entry, struct inode * inode)
1360 {
1361 	BUG_ON(!list_empty(&entry->d_alias));
1362 	if (inode)
1363 		spin_lock(&inode->i_lock);
1364 	__d_instantiate(entry, inode);
1365 	if (inode)
1366 		spin_unlock(&inode->i_lock);
1367 	security_d_instantiate(entry, inode);
1368 }
1369 EXPORT_SYMBOL(d_instantiate);
1370 
1371 /**
1372  * d_instantiate_unique - instantiate a non-aliased dentry
1373  * @entry: dentry to instantiate
1374  * @inode: inode to attach to this dentry
1375  *
1376  * Fill in inode information in the entry. On success, it returns NULL.
1377  * If an unhashed alias of "entry" already exists, then we return the
1378  * aliased dentry instead and drop one reference to inode.
1379  *
1380  * Note that in order to avoid conflicts with rename() etc, the caller
1381  * had better be holding the parent directory semaphore.
1382  *
1383  * This also assumes that the inode count has been incremented
1384  * (or otherwise set) by the caller to indicate that it is now
1385  * in use by the dcache.
1386  */
1387 static struct dentry *__d_instantiate_unique(struct dentry *entry,
1388 					     struct inode *inode)
1389 {
1390 	struct dentry *alias;
1391 	int len = entry->d_name.len;
1392 	const char *name = entry->d_name.name;
1393 	unsigned int hash = entry->d_name.hash;
1394 
1395 	if (!inode) {
1396 		__d_instantiate(entry, NULL);
1397 		return NULL;
1398 	}
1399 
1400 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1401 		struct qstr *qstr = &alias->d_name;
1402 
1403 		/*
1404 		 * Don't need alias->d_lock here, because aliases with
1405 		 * d_parent == entry->d_parent are not subject to name or
1406 		 * parent changes, because the parent inode i_mutex is held.
1407 		 */
1408 		if (qstr->hash != hash)
1409 			continue;
1410 		if (alias->d_parent != entry->d_parent)
1411 			continue;
1412 		if (dentry_cmp(qstr->name, qstr->len, name, len))
1413 			continue;
1414 		__dget(alias);
1415 		return alias;
1416 	}
1417 
1418 	__d_instantiate(entry, inode);
1419 	return NULL;
1420 }
1421 
1422 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1423 {
1424 	struct dentry *result;
1425 
1426 	BUG_ON(!list_empty(&entry->d_alias));
1427 
1428 	if (inode)
1429 		spin_lock(&inode->i_lock);
1430 	result = __d_instantiate_unique(entry, inode);
1431 	if (inode)
1432 		spin_unlock(&inode->i_lock);
1433 
1434 	if (!result) {
1435 		security_d_instantiate(entry, inode);
1436 		return NULL;
1437 	}
1438 
1439 	BUG_ON(!d_unhashed(result));
1440 	iput(inode);
1441 	return result;
1442 }
1443 
1444 EXPORT_SYMBOL(d_instantiate_unique);
1445 
1446 /**
1447  * d_alloc_root - allocate root dentry
1448  * @root_inode: inode to allocate the root for
1449  *
1450  * Allocate a root ("/") dentry for the inode given. The inode is
1451  * instantiated and returned. %NULL is returned if there is insufficient
1452  * memory or the inode passed is %NULL.
1453  */
1454 
1455 struct dentry * d_alloc_root(struct inode * root_inode)
1456 {
1457 	struct dentry *res = NULL;
1458 
1459 	if (root_inode) {
1460 		static const struct qstr name = { .name = "/", .len = 1 };
1461 
1462 		res = __d_alloc(root_inode->i_sb, &name);
1463 		if (res)
1464 			d_instantiate(res, root_inode);
1465 	}
1466 	return res;
1467 }
1468 EXPORT_SYMBOL(d_alloc_root);
1469 
1470 struct dentry *d_make_root(struct inode *root_inode)
1471 {
1472 	struct dentry *res = NULL;
1473 
1474 	if (root_inode) {
1475 		static const struct qstr name = { .name = "/", .len = 1 };
1476 
1477 		res = __d_alloc(root_inode->i_sb, &name);
1478 		if (res)
1479 			d_instantiate(res, root_inode);
1480 		else
1481 			iput(root_inode);
1482 	}
1483 	return res;
1484 }
1485 EXPORT_SYMBOL(d_make_root);
1486 
1487 static struct dentry * __d_find_any_alias(struct inode *inode)
1488 {
1489 	struct dentry *alias;
1490 
1491 	if (list_empty(&inode->i_dentry))
1492 		return NULL;
1493 	alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1494 	__dget(alias);
1495 	return alias;
1496 }
1497 
1498 /**
1499  * d_find_any_alias - find any alias for a given inode
1500  * @inode: inode to find an alias for
1501  *
1502  * If any aliases exist for the given inode, take and return a
1503  * reference for one of them.  If no aliases exist, return %NULL.
1504  */
1505 struct dentry *d_find_any_alias(struct inode *inode)
1506 {
1507 	struct dentry *de;
1508 
1509 	spin_lock(&inode->i_lock);
1510 	de = __d_find_any_alias(inode);
1511 	spin_unlock(&inode->i_lock);
1512 	return de;
1513 }
1514 EXPORT_SYMBOL(d_find_any_alias);
1515 
1516 /**
1517  * d_obtain_alias - find or allocate a dentry for a given inode
1518  * @inode: inode to allocate the dentry for
1519  *
1520  * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1521  * similar open by handle operations.  The returned dentry may be anonymous,
1522  * or may have a full name (if the inode was already in the cache).
1523  *
1524  * When called on a directory inode, we must ensure that the inode only ever
1525  * has one dentry.  If a dentry is found, that is returned instead of
1526  * allocating a new one.
1527  *
1528  * On successful return, the reference to the inode has been transferred
1529  * to the dentry.  In case of an error the reference on the inode is released.
1530  * To make it easier to use in export operations a %NULL or IS_ERR inode may
1531  * be passed in and will be the error will be propagate to the return value,
1532  * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1533  */
1534 struct dentry *d_obtain_alias(struct inode *inode)
1535 {
1536 	static const struct qstr anonstring = { .name = "" };
1537 	struct dentry *tmp;
1538 	struct dentry *res;
1539 
1540 	if (!inode)
1541 		return ERR_PTR(-ESTALE);
1542 	if (IS_ERR(inode))
1543 		return ERR_CAST(inode);
1544 
1545 	res = d_find_any_alias(inode);
1546 	if (res)
1547 		goto out_iput;
1548 
1549 	tmp = __d_alloc(inode->i_sb, &anonstring);
1550 	if (!tmp) {
1551 		res = ERR_PTR(-ENOMEM);
1552 		goto out_iput;
1553 	}
1554 
1555 	spin_lock(&inode->i_lock);
1556 	res = __d_find_any_alias(inode);
1557 	if (res) {
1558 		spin_unlock(&inode->i_lock);
1559 		dput(tmp);
1560 		goto out_iput;
1561 	}
1562 
1563 	/* attach a disconnected dentry */
1564 	spin_lock(&tmp->d_lock);
1565 	tmp->d_inode = inode;
1566 	tmp->d_flags |= DCACHE_DISCONNECTED;
1567 	list_add(&tmp->d_alias, &inode->i_dentry);
1568 	hlist_bl_lock(&tmp->d_sb->s_anon);
1569 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1570 	hlist_bl_unlock(&tmp->d_sb->s_anon);
1571 	spin_unlock(&tmp->d_lock);
1572 	spin_unlock(&inode->i_lock);
1573 	security_d_instantiate(tmp, inode);
1574 
1575 	return tmp;
1576 
1577  out_iput:
1578 	if (res && !IS_ERR(res))
1579 		security_d_instantiate(res, inode);
1580 	iput(inode);
1581 	return res;
1582 }
1583 EXPORT_SYMBOL(d_obtain_alias);
1584 
1585 /**
1586  * d_splice_alias - splice a disconnected dentry into the tree if one exists
1587  * @inode:  the inode which may have a disconnected dentry
1588  * @dentry: a negative dentry which we want to point to the inode.
1589  *
1590  * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1591  * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1592  * and return it, else simply d_add the inode to the dentry and return NULL.
1593  *
1594  * This is needed in the lookup routine of any filesystem that is exportable
1595  * (via knfsd) so that we can build dcache paths to directories effectively.
1596  *
1597  * If a dentry was found and moved, then it is returned.  Otherwise NULL
1598  * is returned.  This matches the expected return value of ->lookup.
1599  *
1600  */
1601 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1602 {
1603 	struct dentry *new = NULL;
1604 
1605 	if (IS_ERR(inode))
1606 		return ERR_CAST(inode);
1607 
1608 	if (inode && S_ISDIR(inode->i_mode)) {
1609 		spin_lock(&inode->i_lock);
1610 		new = __d_find_alias(inode, 1);
1611 		if (new) {
1612 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1613 			spin_unlock(&inode->i_lock);
1614 			security_d_instantiate(new, inode);
1615 			d_move(new, dentry);
1616 			iput(inode);
1617 		} else {
1618 			/* already taking inode->i_lock, so d_add() by hand */
1619 			__d_instantiate(dentry, inode);
1620 			spin_unlock(&inode->i_lock);
1621 			security_d_instantiate(dentry, inode);
1622 			d_rehash(dentry);
1623 		}
1624 	} else
1625 		d_add(dentry, inode);
1626 	return new;
1627 }
1628 EXPORT_SYMBOL(d_splice_alias);
1629 
1630 /**
1631  * d_add_ci - lookup or allocate new dentry with case-exact name
1632  * @inode:  the inode case-insensitive lookup has found
1633  * @dentry: the negative dentry that was passed to the parent's lookup func
1634  * @name:   the case-exact name to be associated with the returned dentry
1635  *
1636  * This is to avoid filling the dcache with case-insensitive names to the
1637  * same inode, only the actual correct case is stored in the dcache for
1638  * case-insensitive filesystems.
1639  *
1640  * For a case-insensitive lookup match and if the the case-exact dentry
1641  * already exists in in the dcache, use it and return it.
1642  *
1643  * If no entry exists with the exact case name, allocate new dentry with
1644  * the exact case, and return the spliced entry.
1645  */
1646 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1647 			struct qstr *name)
1648 {
1649 	int error;
1650 	struct dentry *found;
1651 	struct dentry *new;
1652 
1653 	/*
1654 	 * First check if a dentry matching the name already exists,
1655 	 * if not go ahead and create it now.
1656 	 */
1657 	found = d_hash_and_lookup(dentry->d_parent, name);
1658 	if (!found) {
1659 		new = d_alloc(dentry->d_parent, name);
1660 		if (!new) {
1661 			error = -ENOMEM;
1662 			goto err_out;
1663 		}
1664 
1665 		found = d_splice_alias(inode, new);
1666 		if (found) {
1667 			dput(new);
1668 			return found;
1669 		}
1670 		return new;
1671 	}
1672 
1673 	/*
1674 	 * If a matching dentry exists, and it's not negative use it.
1675 	 *
1676 	 * Decrement the reference count to balance the iget() done
1677 	 * earlier on.
1678 	 */
1679 	if (found->d_inode) {
1680 		if (unlikely(found->d_inode != inode)) {
1681 			/* This can't happen because bad inodes are unhashed. */
1682 			BUG_ON(!is_bad_inode(inode));
1683 			BUG_ON(!is_bad_inode(found->d_inode));
1684 		}
1685 		iput(inode);
1686 		return found;
1687 	}
1688 
1689 	/*
1690 	 * We are going to instantiate this dentry, unhash it and clear the
1691 	 * lookup flag so we can do that.
1692 	 */
1693 	if (unlikely(d_need_lookup(found)))
1694 		d_clear_need_lookup(found);
1695 
1696 	/*
1697 	 * Negative dentry: instantiate it unless the inode is a directory and
1698 	 * already has a dentry.
1699 	 */
1700 	new = d_splice_alias(inode, found);
1701 	if (new) {
1702 		dput(found);
1703 		found = new;
1704 	}
1705 	return found;
1706 
1707 err_out:
1708 	iput(inode);
1709 	return ERR_PTR(error);
1710 }
1711 EXPORT_SYMBOL(d_add_ci);
1712 
1713 /**
1714  * __d_lookup_rcu - search for a dentry (racy, store-free)
1715  * @parent: parent dentry
1716  * @name: qstr of name we wish to find
1717  * @seq: returns d_seq value at the point where the dentry was found
1718  * @inode: returns dentry->d_inode when the inode was found valid.
1719  * Returns: dentry, or NULL
1720  *
1721  * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1722  * resolution (store-free path walking) design described in
1723  * Documentation/filesystems/path-lookup.txt.
1724  *
1725  * This is not to be used outside core vfs.
1726  *
1727  * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1728  * held, and rcu_read_lock held. The returned dentry must not be stored into
1729  * without taking d_lock and checking d_seq sequence count against @seq
1730  * returned here.
1731  *
1732  * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1733  * function.
1734  *
1735  * Alternatively, __d_lookup_rcu may be called again to look up the child of
1736  * the returned dentry, so long as its parent's seqlock is checked after the
1737  * child is looked up. Thus, an interlocking stepping of sequence lock checks
1738  * is formed, giving integrity down the path walk.
1739  */
1740 struct dentry *__d_lookup_rcu(const struct dentry *parent,
1741 				const struct qstr *name,
1742 				unsigned *seqp, struct inode **inode)
1743 {
1744 	unsigned int len = name->len;
1745 	unsigned int hash = name->hash;
1746 	const unsigned char *str = name->name;
1747 	struct hlist_bl_head *b = d_hash(parent, hash);
1748 	struct hlist_bl_node *node;
1749 	struct dentry *dentry;
1750 
1751 	/*
1752 	 * Note: There is significant duplication with __d_lookup_rcu which is
1753 	 * required to prevent single threaded performance regressions
1754 	 * especially on architectures where smp_rmb (in seqcounts) are costly.
1755 	 * Keep the two functions in sync.
1756 	 */
1757 
1758 	/*
1759 	 * The hash list is protected using RCU.
1760 	 *
1761 	 * Carefully use d_seq when comparing a candidate dentry, to avoid
1762 	 * races with d_move().
1763 	 *
1764 	 * It is possible that concurrent renames can mess up our list
1765 	 * walk here and result in missing our dentry, resulting in the
1766 	 * false-negative result. d_lookup() protects against concurrent
1767 	 * renames using rename_lock seqlock.
1768 	 *
1769 	 * See Documentation/filesystems/path-lookup.txt for more details.
1770 	 */
1771 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1772 		unsigned seq;
1773 		struct inode *i;
1774 		const char *tname;
1775 		int tlen;
1776 
1777 		if (dentry->d_name.hash != hash)
1778 			continue;
1779 
1780 seqretry:
1781 		seq = read_seqcount_begin(&dentry->d_seq);
1782 		if (dentry->d_parent != parent)
1783 			continue;
1784 		if (d_unhashed(dentry))
1785 			continue;
1786 		tlen = dentry->d_name.len;
1787 		tname = dentry->d_name.name;
1788 		i = dentry->d_inode;
1789 		prefetch(tname);
1790 		/*
1791 		 * This seqcount check is required to ensure name and
1792 		 * len are loaded atomically, so as not to walk off the
1793 		 * edge of memory when walking. If we could load this
1794 		 * atomically some other way, we could drop this check.
1795 		 */
1796 		if (read_seqcount_retry(&dentry->d_seq, seq))
1797 			goto seqretry;
1798 		if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1799 			if (parent->d_op->d_compare(parent, *inode,
1800 						dentry, i,
1801 						tlen, tname, name))
1802 				continue;
1803 		} else {
1804 			if (dentry_cmp(tname, tlen, str, len))
1805 				continue;
1806 		}
1807 		/*
1808 		 * No extra seqcount check is required after the name
1809 		 * compare. The caller must perform a seqcount check in
1810 		 * order to do anything useful with the returned dentry
1811 		 * anyway.
1812 		 */
1813 		*seqp = seq;
1814 		*inode = i;
1815 		return dentry;
1816 	}
1817 	return NULL;
1818 }
1819 
1820 /**
1821  * d_lookup - search for a dentry
1822  * @parent: parent dentry
1823  * @name: qstr of name we wish to find
1824  * Returns: dentry, or NULL
1825  *
1826  * d_lookup searches the children of the parent dentry for the name in
1827  * question. If the dentry is found its reference count is incremented and the
1828  * dentry is returned. The caller must use dput to free the entry when it has
1829  * finished using it. %NULL is returned if the dentry does not exist.
1830  */
1831 struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1832 {
1833 	struct dentry *dentry;
1834 	unsigned seq;
1835 
1836         do {
1837                 seq = read_seqbegin(&rename_lock);
1838                 dentry = __d_lookup(parent, name);
1839                 if (dentry)
1840 			break;
1841 	} while (read_seqretry(&rename_lock, seq));
1842 	return dentry;
1843 }
1844 EXPORT_SYMBOL(d_lookup);
1845 
1846 /**
1847  * __d_lookup - search for a dentry (racy)
1848  * @parent: parent dentry
1849  * @name: qstr of name we wish to find
1850  * Returns: dentry, or NULL
1851  *
1852  * __d_lookup is like d_lookup, however it may (rarely) return a
1853  * false-negative result due to unrelated rename activity.
1854  *
1855  * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1856  * however it must be used carefully, eg. with a following d_lookup in
1857  * the case of failure.
1858  *
1859  * __d_lookup callers must be commented.
1860  */
1861 struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1862 {
1863 	unsigned int len = name->len;
1864 	unsigned int hash = name->hash;
1865 	const unsigned char *str = name->name;
1866 	struct hlist_bl_head *b = d_hash(parent, hash);
1867 	struct hlist_bl_node *node;
1868 	struct dentry *found = NULL;
1869 	struct dentry *dentry;
1870 
1871 	/*
1872 	 * Note: There is significant duplication with __d_lookup_rcu which is
1873 	 * required to prevent single threaded performance regressions
1874 	 * especially on architectures where smp_rmb (in seqcounts) are costly.
1875 	 * Keep the two functions in sync.
1876 	 */
1877 
1878 	/*
1879 	 * The hash list is protected using RCU.
1880 	 *
1881 	 * Take d_lock when comparing a candidate dentry, to avoid races
1882 	 * with d_move().
1883 	 *
1884 	 * It is possible that concurrent renames can mess up our list
1885 	 * walk here and result in missing our dentry, resulting in the
1886 	 * false-negative result. d_lookup() protects against concurrent
1887 	 * renames using rename_lock seqlock.
1888 	 *
1889 	 * See Documentation/filesystems/path-lookup.txt for more details.
1890 	 */
1891 	rcu_read_lock();
1892 
1893 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1894 		const char *tname;
1895 		int tlen;
1896 
1897 		if (dentry->d_name.hash != hash)
1898 			continue;
1899 
1900 		spin_lock(&dentry->d_lock);
1901 		if (dentry->d_parent != parent)
1902 			goto next;
1903 		if (d_unhashed(dentry))
1904 			goto next;
1905 
1906 		/*
1907 		 * It is safe to compare names since d_move() cannot
1908 		 * change the qstr (protected by d_lock).
1909 		 */
1910 		tlen = dentry->d_name.len;
1911 		tname = dentry->d_name.name;
1912 		if (parent->d_flags & DCACHE_OP_COMPARE) {
1913 			if (parent->d_op->d_compare(parent, parent->d_inode,
1914 						dentry, dentry->d_inode,
1915 						tlen, tname, name))
1916 				goto next;
1917 		} else {
1918 			if (dentry_cmp(tname, tlen, str, len))
1919 				goto next;
1920 		}
1921 
1922 		dentry->d_count++;
1923 		found = dentry;
1924 		spin_unlock(&dentry->d_lock);
1925 		break;
1926 next:
1927 		spin_unlock(&dentry->d_lock);
1928  	}
1929  	rcu_read_unlock();
1930 
1931  	return found;
1932 }
1933 
1934 /**
1935  * d_hash_and_lookup - hash the qstr then search for a dentry
1936  * @dir: Directory to search in
1937  * @name: qstr of name we wish to find
1938  *
1939  * On hash failure or on lookup failure NULL is returned.
1940  */
1941 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1942 {
1943 	struct dentry *dentry = NULL;
1944 
1945 	/*
1946 	 * Check for a fs-specific hash function. Note that we must
1947 	 * calculate the standard hash first, as the d_op->d_hash()
1948 	 * routine may choose to leave the hash value unchanged.
1949 	 */
1950 	name->hash = full_name_hash(name->name, name->len);
1951 	if (dir->d_flags & DCACHE_OP_HASH) {
1952 		if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1953 			goto out;
1954 	}
1955 	dentry = d_lookup(dir, name);
1956 out:
1957 	return dentry;
1958 }
1959 
1960 /**
1961  * d_validate - verify dentry provided from insecure source (deprecated)
1962  * @dentry: The dentry alleged to be valid child of @dparent
1963  * @dparent: The parent dentry (known to be valid)
1964  *
1965  * An insecure source has sent us a dentry, here we verify it and dget() it.
1966  * This is used by ncpfs in its readdir implementation.
1967  * Zero is returned in the dentry is invalid.
1968  *
1969  * This function is slow for big directories, and deprecated, do not use it.
1970  */
1971 int d_validate(struct dentry *dentry, struct dentry *dparent)
1972 {
1973 	struct dentry *child;
1974 
1975 	spin_lock(&dparent->d_lock);
1976 	list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1977 		if (dentry == child) {
1978 			spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1979 			__dget_dlock(dentry);
1980 			spin_unlock(&dentry->d_lock);
1981 			spin_unlock(&dparent->d_lock);
1982 			return 1;
1983 		}
1984 	}
1985 	spin_unlock(&dparent->d_lock);
1986 
1987 	return 0;
1988 }
1989 EXPORT_SYMBOL(d_validate);
1990 
1991 /*
1992  * When a file is deleted, we have two options:
1993  * - turn this dentry into a negative dentry
1994  * - unhash this dentry and free it.
1995  *
1996  * Usually, we want to just turn this into
1997  * a negative dentry, but if anybody else is
1998  * currently using the dentry or the inode
1999  * we can't do that and we fall back on removing
2000  * it from the hash queues and waiting for
2001  * it to be deleted later when it has no users
2002  */
2003 
2004 /**
2005  * d_delete - delete a dentry
2006  * @dentry: The dentry to delete
2007  *
2008  * Turn the dentry into a negative dentry if possible, otherwise
2009  * remove it from the hash queues so it can be deleted later
2010  */
2011 
2012 void d_delete(struct dentry * dentry)
2013 {
2014 	struct inode *inode;
2015 	int isdir = 0;
2016 	/*
2017 	 * Are we the only user?
2018 	 */
2019 again:
2020 	spin_lock(&dentry->d_lock);
2021 	inode = dentry->d_inode;
2022 	isdir = S_ISDIR(inode->i_mode);
2023 	if (dentry->d_count == 1) {
2024 		if (inode && !spin_trylock(&inode->i_lock)) {
2025 			spin_unlock(&dentry->d_lock);
2026 			cpu_relax();
2027 			goto again;
2028 		}
2029 		dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2030 		dentry_unlink_inode(dentry);
2031 		fsnotify_nameremove(dentry, isdir);
2032 		return;
2033 	}
2034 
2035 	if (!d_unhashed(dentry))
2036 		__d_drop(dentry);
2037 
2038 	spin_unlock(&dentry->d_lock);
2039 
2040 	fsnotify_nameremove(dentry, isdir);
2041 }
2042 EXPORT_SYMBOL(d_delete);
2043 
2044 static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2045 {
2046 	BUG_ON(!d_unhashed(entry));
2047 	hlist_bl_lock(b);
2048 	entry->d_flags |= DCACHE_RCUACCESS;
2049 	hlist_bl_add_head_rcu(&entry->d_hash, b);
2050 	hlist_bl_unlock(b);
2051 }
2052 
2053 static void _d_rehash(struct dentry * entry)
2054 {
2055 	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2056 }
2057 
2058 /**
2059  * d_rehash	- add an entry back to the hash
2060  * @entry: dentry to add to the hash
2061  *
2062  * Adds a dentry to the hash according to its name.
2063  */
2064 
2065 void d_rehash(struct dentry * entry)
2066 {
2067 	spin_lock(&entry->d_lock);
2068 	_d_rehash(entry);
2069 	spin_unlock(&entry->d_lock);
2070 }
2071 EXPORT_SYMBOL(d_rehash);
2072 
2073 /**
2074  * dentry_update_name_case - update case insensitive dentry with a new name
2075  * @dentry: dentry to be updated
2076  * @name: new name
2077  *
2078  * Update a case insensitive dentry with new case of name.
2079  *
2080  * dentry must have been returned by d_lookup with name @name. Old and new
2081  * name lengths must match (ie. no d_compare which allows mismatched name
2082  * lengths).
2083  *
2084  * Parent inode i_mutex must be held over d_lookup and into this call (to
2085  * keep renames and concurrent inserts, and readdir(2) away).
2086  */
2087 void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2088 {
2089 	BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2090 	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2091 
2092 	spin_lock(&dentry->d_lock);
2093 	write_seqcount_begin(&dentry->d_seq);
2094 	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2095 	write_seqcount_end(&dentry->d_seq);
2096 	spin_unlock(&dentry->d_lock);
2097 }
2098 EXPORT_SYMBOL(dentry_update_name_case);
2099 
2100 static void switch_names(struct dentry *dentry, struct dentry *target)
2101 {
2102 	if (dname_external(target)) {
2103 		if (dname_external(dentry)) {
2104 			/*
2105 			 * Both external: swap the pointers
2106 			 */
2107 			swap(target->d_name.name, dentry->d_name.name);
2108 		} else {
2109 			/*
2110 			 * dentry:internal, target:external.  Steal target's
2111 			 * storage and make target internal.
2112 			 */
2113 			memcpy(target->d_iname, dentry->d_name.name,
2114 					dentry->d_name.len + 1);
2115 			dentry->d_name.name = target->d_name.name;
2116 			target->d_name.name = target->d_iname;
2117 		}
2118 	} else {
2119 		if (dname_external(dentry)) {
2120 			/*
2121 			 * dentry:external, target:internal.  Give dentry's
2122 			 * storage to target and make dentry internal
2123 			 */
2124 			memcpy(dentry->d_iname, target->d_name.name,
2125 					target->d_name.len + 1);
2126 			target->d_name.name = dentry->d_name.name;
2127 			dentry->d_name.name = dentry->d_iname;
2128 		} else {
2129 			/*
2130 			 * Both are internal.  Just copy target to dentry
2131 			 */
2132 			memcpy(dentry->d_iname, target->d_name.name,
2133 					target->d_name.len + 1);
2134 			dentry->d_name.len = target->d_name.len;
2135 			return;
2136 		}
2137 	}
2138 	swap(dentry->d_name.len, target->d_name.len);
2139 }
2140 
2141 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2142 {
2143 	/*
2144 	 * XXXX: do we really need to take target->d_lock?
2145 	 */
2146 	if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2147 		spin_lock(&target->d_parent->d_lock);
2148 	else {
2149 		if (d_ancestor(dentry->d_parent, target->d_parent)) {
2150 			spin_lock(&dentry->d_parent->d_lock);
2151 			spin_lock_nested(&target->d_parent->d_lock,
2152 						DENTRY_D_LOCK_NESTED);
2153 		} else {
2154 			spin_lock(&target->d_parent->d_lock);
2155 			spin_lock_nested(&dentry->d_parent->d_lock,
2156 						DENTRY_D_LOCK_NESTED);
2157 		}
2158 	}
2159 	if (target < dentry) {
2160 		spin_lock_nested(&target->d_lock, 2);
2161 		spin_lock_nested(&dentry->d_lock, 3);
2162 	} else {
2163 		spin_lock_nested(&dentry->d_lock, 2);
2164 		spin_lock_nested(&target->d_lock, 3);
2165 	}
2166 }
2167 
2168 static void dentry_unlock_parents_for_move(struct dentry *dentry,
2169 					struct dentry *target)
2170 {
2171 	if (target->d_parent != dentry->d_parent)
2172 		spin_unlock(&dentry->d_parent->d_lock);
2173 	if (target->d_parent != target)
2174 		spin_unlock(&target->d_parent->d_lock);
2175 }
2176 
2177 /*
2178  * When switching names, the actual string doesn't strictly have to
2179  * be preserved in the target - because we're dropping the target
2180  * anyway. As such, we can just do a simple memcpy() to copy over
2181  * the new name before we switch.
2182  *
2183  * Note that we have to be a lot more careful about getting the hash
2184  * switched - we have to switch the hash value properly even if it
2185  * then no longer matches the actual (corrupted) string of the target.
2186  * The hash value has to match the hash queue that the dentry is on..
2187  */
2188 /*
2189  * __d_move - move a dentry
2190  * @dentry: entry to move
2191  * @target: new dentry
2192  *
2193  * Update the dcache to reflect the move of a file name. Negative
2194  * dcache entries should not be moved in this way. Caller must hold
2195  * rename_lock, the i_mutex of the source and target directories,
2196  * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2197  */
2198 static void __d_move(struct dentry * dentry, struct dentry * target)
2199 {
2200 	if (!dentry->d_inode)
2201 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2202 
2203 	BUG_ON(d_ancestor(dentry, target));
2204 	BUG_ON(d_ancestor(target, dentry));
2205 
2206 	dentry_lock_for_move(dentry, target);
2207 
2208 	write_seqcount_begin(&dentry->d_seq);
2209 	write_seqcount_begin(&target->d_seq);
2210 
2211 	/* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2212 
2213 	/*
2214 	 * Move the dentry to the target hash queue. Don't bother checking
2215 	 * for the same hash queue because of how unlikely it is.
2216 	 */
2217 	__d_drop(dentry);
2218 	__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2219 
2220 	/* Unhash the target: dput() will then get rid of it */
2221 	__d_drop(target);
2222 
2223 	list_del(&dentry->d_u.d_child);
2224 	list_del(&target->d_u.d_child);
2225 
2226 	/* Switch the names.. */
2227 	switch_names(dentry, target);
2228 	swap(dentry->d_name.hash, target->d_name.hash);
2229 
2230 	/* ... and switch the parents */
2231 	if (IS_ROOT(dentry)) {
2232 		dentry->d_parent = target->d_parent;
2233 		target->d_parent = target;
2234 		INIT_LIST_HEAD(&target->d_u.d_child);
2235 	} else {
2236 		swap(dentry->d_parent, target->d_parent);
2237 
2238 		/* And add them back to the (new) parent lists */
2239 		list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2240 	}
2241 
2242 	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2243 
2244 	write_seqcount_end(&target->d_seq);
2245 	write_seqcount_end(&dentry->d_seq);
2246 
2247 	dentry_unlock_parents_for_move(dentry, target);
2248 	spin_unlock(&target->d_lock);
2249 	fsnotify_d_move(dentry);
2250 	spin_unlock(&dentry->d_lock);
2251 }
2252 
2253 /*
2254  * d_move - move a dentry
2255  * @dentry: entry to move
2256  * @target: new dentry
2257  *
2258  * Update the dcache to reflect the move of a file name. Negative
2259  * dcache entries should not be moved in this way. See the locking
2260  * requirements for __d_move.
2261  */
2262 void d_move(struct dentry *dentry, struct dentry *target)
2263 {
2264 	write_seqlock(&rename_lock);
2265 	__d_move(dentry, target);
2266 	write_sequnlock(&rename_lock);
2267 }
2268 EXPORT_SYMBOL(d_move);
2269 
2270 /**
2271  * d_ancestor - search for an ancestor
2272  * @p1: ancestor dentry
2273  * @p2: child dentry
2274  *
2275  * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2276  * an ancestor of p2, else NULL.
2277  */
2278 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2279 {
2280 	struct dentry *p;
2281 
2282 	for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2283 		if (p->d_parent == p1)
2284 			return p;
2285 	}
2286 	return NULL;
2287 }
2288 
2289 /*
2290  * This helper attempts to cope with remotely renamed directories
2291  *
2292  * It assumes that the caller is already holding
2293  * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2294  *
2295  * Note: If ever the locking in lock_rename() changes, then please
2296  * remember to update this too...
2297  */
2298 static struct dentry *__d_unalias(struct inode *inode,
2299 		struct dentry *dentry, struct dentry *alias)
2300 {
2301 	struct mutex *m1 = NULL, *m2 = NULL;
2302 	struct dentry *ret;
2303 
2304 	/* If alias and dentry share a parent, then no extra locks required */
2305 	if (alias->d_parent == dentry->d_parent)
2306 		goto out_unalias;
2307 
2308 	/* See lock_rename() */
2309 	ret = ERR_PTR(-EBUSY);
2310 	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2311 		goto out_err;
2312 	m1 = &dentry->d_sb->s_vfs_rename_mutex;
2313 	if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2314 		goto out_err;
2315 	m2 = &alias->d_parent->d_inode->i_mutex;
2316 out_unalias:
2317 	__d_move(alias, dentry);
2318 	ret = alias;
2319 out_err:
2320 	spin_unlock(&inode->i_lock);
2321 	if (m2)
2322 		mutex_unlock(m2);
2323 	if (m1)
2324 		mutex_unlock(m1);
2325 	return ret;
2326 }
2327 
2328 /*
2329  * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2330  * named dentry in place of the dentry to be replaced.
2331  * returns with anon->d_lock held!
2332  */
2333 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2334 {
2335 	struct dentry *dparent, *aparent;
2336 
2337 	dentry_lock_for_move(anon, dentry);
2338 
2339 	write_seqcount_begin(&dentry->d_seq);
2340 	write_seqcount_begin(&anon->d_seq);
2341 
2342 	dparent = dentry->d_parent;
2343 	aparent = anon->d_parent;
2344 
2345 	switch_names(dentry, anon);
2346 	swap(dentry->d_name.hash, anon->d_name.hash);
2347 
2348 	dentry->d_parent = (aparent == anon) ? dentry : aparent;
2349 	list_del(&dentry->d_u.d_child);
2350 	if (!IS_ROOT(dentry))
2351 		list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2352 	else
2353 		INIT_LIST_HEAD(&dentry->d_u.d_child);
2354 
2355 	anon->d_parent = (dparent == dentry) ? anon : dparent;
2356 	list_del(&anon->d_u.d_child);
2357 	if (!IS_ROOT(anon))
2358 		list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2359 	else
2360 		INIT_LIST_HEAD(&anon->d_u.d_child);
2361 
2362 	write_seqcount_end(&dentry->d_seq);
2363 	write_seqcount_end(&anon->d_seq);
2364 
2365 	dentry_unlock_parents_for_move(anon, dentry);
2366 	spin_unlock(&dentry->d_lock);
2367 
2368 	/* anon->d_lock still locked, returns locked */
2369 	anon->d_flags &= ~DCACHE_DISCONNECTED;
2370 }
2371 
2372 /**
2373  * d_materialise_unique - introduce an inode into the tree
2374  * @dentry: candidate dentry
2375  * @inode: inode to bind to the dentry, to which aliases may be attached
2376  *
2377  * Introduces an dentry into the tree, substituting an extant disconnected
2378  * root directory alias in its place if there is one. Caller must hold the
2379  * i_mutex of the parent directory.
2380  */
2381 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2382 {
2383 	struct dentry *actual;
2384 
2385 	BUG_ON(!d_unhashed(dentry));
2386 
2387 	if (!inode) {
2388 		actual = dentry;
2389 		__d_instantiate(dentry, NULL);
2390 		d_rehash(actual);
2391 		goto out_nolock;
2392 	}
2393 
2394 	spin_lock(&inode->i_lock);
2395 
2396 	if (S_ISDIR(inode->i_mode)) {
2397 		struct dentry *alias;
2398 
2399 		/* Does an aliased dentry already exist? */
2400 		alias = __d_find_alias(inode, 0);
2401 		if (alias) {
2402 			actual = alias;
2403 			write_seqlock(&rename_lock);
2404 
2405 			if (d_ancestor(alias, dentry)) {
2406 				/* Check for loops */
2407 				actual = ERR_PTR(-ELOOP);
2408 			} else if (IS_ROOT(alias)) {
2409 				/* Is this an anonymous mountpoint that we
2410 				 * could splice into our tree? */
2411 				__d_materialise_dentry(dentry, alias);
2412 				write_sequnlock(&rename_lock);
2413 				__d_drop(alias);
2414 				goto found;
2415 			} else {
2416 				/* Nope, but we must(!) avoid directory
2417 				 * aliasing */
2418 				actual = __d_unalias(inode, dentry, alias);
2419 			}
2420 			write_sequnlock(&rename_lock);
2421 			if (IS_ERR(actual)) {
2422 				if (PTR_ERR(actual) == -ELOOP)
2423 					pr_warn_ratelimited(
2424 						"VFS: Lookup of '%s' in %s %s"
2425 						" would have caused loop\n",
2426 						dentry->d_name.name,
2427 						inode->i_sb->s_type->name,
2428 						inode->i_sb->s_id);
2429 				dput(alias);
2430 			}
2431 			goto out_nolock;
2432 		}
2433 	}
2434 
2435 	/* Add a unique reference */
2436 	actual = __d_instantiate_unique(dentry, inode);
2437 	if (!actual)
2438 		actual = dentry;
2439 	else
2440 		BUG_ON(!d_unhashed(actual));
2441 
2442 	spin_lock(&actual->d_lock);
2443 found:
2444 	_d_rehash(actual);
2445 	spin_unlock(&actual->d_lock);
2446 	spin_unlock(&inode->i_lock);
2447 out_nolock:
2448 	if (actual == dentry) {
2449 		security_d_instantiate(dentry, inode);
2450 		return NULL;
2451 	}
2452 
2453 	iput(inode);
2454 	return actual;
2455 }
2456 EXPORT_SYMBOL_GPL(d_materialise_unique);
2457 
2458 static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2459 {
2460 	*buflen -= namelen;
2461 	if (*buflen < 0)
2462 		return -ENAMETOOLONG;
2463 	*buffer -= namelen;
2464 	memcpy(*buffer, str, namelen);
2465 	return 0;
2466 }
2467 
2468 static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2469 {
2470 	return prepend(buffer, buflen, name->name, name->len);
2471 }
2472 
2473 /**
2474  * prepend_path - Prepend path string to a buffer
2475  * @path: the dentry/vfsmount to report
2476  * @root: root vfsmnt/dentry
2477  * @buffer: pointer to the end of the buffer
2478  * @buflen: pointer to buffer length
2479  *
2480  * Caller holds the rename_lock.
2481  */
2482 static int prepend_path(const struct path *path,
2483 			const struct path *root,
2484 			char **buffer, int *buflen)
2485 {
2486 	struct dentry *dentry = path->dentry;
2487 	struct vfsmount *vfsmnt = path->mnt;
2488 	struct mount *mnt = real_mount(vfsmnt);
2489 	bool slash = false;
2490 	int error = 0;
2491 
2492 	br_read_lock(vfsmount_lock);
2493 	while (dentry != root->dentry || vfsmnt != root->mnt) {
2494 		struct dentry * parent;
2495 
2496 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2497 			/* Global root? */
2498 			if (!mnt_has_parent(mnt))
2499 				goto global_root;
2500 			dentry = mnt->mnt_mountpoint;
2501 			mnt = mnt->mnt_parent;
2502 			vfsmnt = &mnt->mnt;
2503 			continue;
2504 		}
2505 		parent = dentry->d_parent;
2506 		prefetch(parent);
2507 		spin_lock(&dentry->d_lock);
2508 		error = prepend_name(buffer, buflen, &dentry->d_name);
2509 		spin_unlock(&dentry->d_lock);
2510 		if (!error)
2511 			error = prepend(buffer, buflen, "/", 1);
2512 		if (error)
2513 			break;
2514 
2515 		slash = true;
2516 		dentry = parent;
2517 	}
2518 
2519 	if (!error && !slash)
2520 		error = prepend(buffer, buflen, "/", 1);
2521 
2522 out:
2523 	br_read_unlock(vfsmount_lock);
2524 	return error;
2525 
2526 global_root:
2527 	/*
2528 	 * Filesystems needing to implement special "root names"
2529 	 * should do so with ->d_dname()
2530 	 */
2531 	if (IS_ROOT(dentry) &&
2532 	    (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2533 		WARN(1, "Root dentry has weird name <%.*s>\n",
2534 		     (int) dentry->d_name.len, dentry->d_name.name);
2535 	}
2536 	if (!slash)
2537 		error = prepend(buffer, buflen, "/", 1);
2538 	if (!error)
2539 		error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
2540 	goto out;
2541 }
2542 
2543 /**
2544  * __d_path - return the path of a dentry
2545  * @path: the dentry/vfsmount to report
2546  * @root: root vfsmnt/dentry
2547  * @buf: buffer to return value in
2548  * @buflen: buffer length
2549  *
2550  * Convert a dentry into an ASCII path name.
2551  *
2552  * Returns a pointer into the buffer or an error code if the
2553  * path was too long.
2554  *
2555  * "buflen" should be positive.
2556  *
2557  * If the path is not reachable from the supplied root, return %NULL.
2558  */
2559 char *__d_path(const struct path *path,
2560 	       const struct path *root,
2561 	       char *buf, int buflen)
2562 {
2563 	char *res = buf + buflen;
2564 	int error;
2565 
2566 	prepend(&res, &buflen, "\0", 1);
2567 	write_seqlock(&rename_lock);
2568 	error = prepend_path(path, root, &res, &buflen);
2569 	write_sequnlock(&rename_lock);
2570 
2571 	if (error < 0)
2572 		return ERR_PTR(error);
2573 	if (error > 0)
2574 		return NULL;
2575 	return res;
2576 }
2577 
2578 char *d_absolute_path(const struct path *path,
2579 	       char *buf, int buflen)
2580 {
2581 	struct path root = {};
2582 	char *res = buf + buflen;
2583 	int error;
2584 
2585 	prepend(&res, &buflen, "\0", 1);
2586 	write_seqlock(&rename_lock);
2587 	error = prepend_path(path, &root, &res, &buflen);
2588 	write_sequnlock(&rename_lock);
2589 
2590 	if (error > 1)
2591 		error = -EINVAL;
2592 	if (error < 0)
2593 		return ERR_PTR(error);
2594 	return res;
2595 }
2596 
2597 /*
2598  * same as __d_path but appends "(deleted)" for unlinked files.
2599  */
2600 static int path_with_deleted(const struct path *path,
2601 			     const struct path *root,
2602 			     char **buf, int *buflen)
2603 {
2604 	prepend(buf, buflen, "\0", 1);
2605 	if (d_unlinked(path->dentry)) {
2606 		int error = prepend(buf, buflen, " (deleted)", 10);
2607 		if (error)
2608 			return error;
2609 	}
2610 
2611 	return prepend_path(path, root, buf, buflen);
2612 }
2613 
2614 static int prepend_unreachable(char **buffer, int *buflen)
2615 {
2616 	return prepend(buffer, buflen, "(unreachable)", 13);
2617 }
2618 
2619 /**
2620  * d_path - return the path of a dentry
2621  * @path: path to report
2622  * @buf: buffer to return value in
2623  * @buflen: buffer length
2624  *
2625  * Convert a dentry into an ASCII path name. If the entry has been deleted
2626  * the string " (deleted)" is appended. Note that this is ambiguous.
2627  *
2628  * Returns a pointer into the buffer or an error code if the path was
2629  * too long. Note: Callers should use the returned pointer, not the passed
2630  * in buffer, to use the name! The implementation often starts at an offset
2631  * into the buffer, and may leave 0 bytes at the start.
2632  *
2633  * "buflen" should be positive.
2634  */
2635 char *d_path(const struct path *path, char *buf, int buflen)
2636 {
2637 	char *res = buf + buflen;
2638 	struct path root;
2639 	int error;
2640 
2641 	/*
2642 	 * We have various synthetic filesystems that never get mounted.  On
2643 	 * these filesystems dentries are never used for lookup purposes, and
2644 	 * thus don't need to be hashed.  They also don't need a name until a
2645 	 * user wants to identify the object in /proc/pid/fd/.  The little hack
2646 	 * below allows us to generate a name for these objects on demand:
2647 	 */
2648 	if (path->dentry->d_op && path->dentry->d_op->d_dname)
2649 		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2650 
2651 	get_fs_root(current->fs, &root);
2652 	write_seqlock(&rename_lock);
2653 	error = path_with_deleted(path, &root, &res, &buflen);
2654 	if (error < 0)
2655 		res = ERR_PTR(error);
2656 	write_sequnlock(&rename_lock);
2657 	path_put(&root);
2658 	return res;
2659 }
2660 EXPORT_SYMBOL(d_path);
2661 
2662 /**
2663  * d_path_with_unreachable - return the path of a dentry
2664  * @path: path to report
2665  * @buf: buffer to return value in
2666  * @buflen: buffer length
2667  *
2668  * The difference from d_path() is that this prepends "(unreachable)"
2669  * to paths which are unreachable from the current process' root.
2670  */
2671 char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2672 {
2673 	char *res = buf + buflen;
2674 	struct path root;
2675 	int error;
2676 
2677 	if (path->dentry->d_op && path->dentry->d_op->d_dname)
2678 		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2679 
2680 	get_fs_root(current->fs, &root);
2681 	write_seqlock(&rename_lock);
2682 	error = path_with_deleted(path, &root, &res, &buflen);
2683 	if (error > 0)
2684 		error = prepend_unreachable(&res, &buflen);
2685 	write_sequnlock(&rename_lock);
2686 	path_put(&root);
2687 	if (error)
2688 		res =  ERR_PTR(error);
2689 
2690 	return res;
2691 }
2692 
2693 /*
2694  * Helper function for dentry_operations.d_dname() members
2695  */
2696 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2697 			const char *fmt, ...)
2698 {
2699 	va_list args;
2700 	char temp[64];
2701 	int sz;
2702 
2703 	va_start(args, fmt);
2704 	sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2705 	va_end(args);
2706 
2707 	if (sz > sizeof(temp) || sz > buflen)
2708 		return ERR_PTR(-ENAMETOOLONG);
2709 
2710 	buffer += buflen - sz;
2711 	return memcpy(buffer, temp, sz);
2712 }
2713 
2714 /*
2715  * Write full pathname from the root of the filesystem into the buffer.
2716  */
2717 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2718 {
2719 	char *end = buf + buflen;
2720 	char *retval;
2721 
2722 	prepend(&end, &buflen, "\0", 1);
2723 	if (buflen < 1)
2724 		goto Elong;
2725 	/* Get '/' right */
2726 	retval = end-1;
2727 	*retval = '/';
2728 
2729 	while (!IS_ROOT(dentry)) {
2730 		struct dentry *parent = dentry->d_parent;
2731 		int error;
2732 
2733 		prefetch(parent);
2734 		spin_lock(&dentry->d_lock);
2735 		error = prepend_name(&end, &buflen, &dentry->d_name);
2736 		spin_unlock(&dentry->d_lock);
2737 		if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2738 			goto Elong;
2739 
2740 		retval = end;
2741 		dentry = parent;
2742 	}
2743 	return retval;
2744 Elong:
2745 	return ERR_PTR(-ENAMETOOLONG);
2746 }
2747 
2748 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2749 {
2750 	char *retval;
2751 
2752 	write_seqlock(&rename_lock);
2753 	retval = __dentry_path(dentry, buf, buflen);
2754 	write_sequnlock(&rename_lock);
2755 
2756 	return retval;
2757 }
2758 EXPORT_SYMBOL(dentry_path_raw);
2759 
2760 char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2761 {
2762 	char *p = NULL;
2763 	char *retval;
2764 
2765 	write_seqlock(&rename_lock);
2766 	if (d_unlinked(dentry)) {
2767 		p = buf + buflen;
2768 		if (prepend(&p, &buflen, "//deleted", 10) != 0)
2769 			goto Elong;
2770 		buflen++;
2771 	}
2772 	retval = __dentry_path(dentry, buf, buflen);
2773 	write_sequnlock(&rename_lock);
2774 	if (!IS_ERR(retval) && p)
2775 		*p = '/';	/* restore '/' overriden with '\0' */
2776 	return retval;
2777 Elong:
2778 	return ERR_PTR(-ENAMETOOLONG);
2779 }
2780 
2781 /*
2782  * NOTE! The user-level library version returns a
2783  * character pointer. The kernel system call just
2784  * returns the length of the buffer filled (which
2785  * includes the ending '\0' character), or a negative
2786  * error value. So libc would do something like
2787  *
2788  *	char *getcwd(char * buf, size_t size)
2789  *	{
2790  *		int retval;
2791  *
2792  *		retval = sys_getcwd(buf, size);
2793  *		if (retval >= 0)
2794  *			return buf;
2795  *		errno = -retval;
2796  *		return NULL;
2797  *	}
2798  */
2799 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2800 {
2801 	int error;
2802 	struct path pwd, root;
2803 	char *page = (char *) __get_free_page(GFP_USER);
2804 
2805 	if (!page)
2806 		return -ENOMEM;
2807 
2808 	get_fs_root_and_pwd(current->fs, &root, &pwd);
2809 
2810 	error = -ENOENT;
2811 	write_seqlock(&rename_lock);
2812 	if (!d_unlinked(pwd.dentry)) {
2813 		unsigned long len;
2814 		char *cwd = page + PAGE_SIZE;
2815 		int buflen = PAGE_SIZE;
2816 
2817 		prepend(&cwd, &buflen, "\0", 1);
2818 		error = prepend_path(&pwd, &root, &cwd, &buflen);
2819 		write_sequnlock(&rename_lock);
2820 
2821 		if (error < 0)
2822 			goto out;
2823 
2824 		/* Unreachable from current root */
2825 		if (error > 0) {
2826 			error = prepend_unreachable(&cwd, &buflen);
2827 			if (error)
2828 				goto out;
2829 		}
2830 
2831 		error = -ERANGE;
2832 		len = PAGE_SIZE + page - cwd;
2833 		if (len <= size) {
2834 			error = len;
2835 			if (copy_to_user(buf, cwd, len))
2836 				error = -EFAULT;
2837 		}
2838 	} else {
2839 		write_sequnlock(&rename_lock);
2840 	}
2841 
2842 out:
2843 	path_put(&pwd);
2844 	path_put(&root);
2845 	free_page((unsigned long) page);
2846 	return error;
2847 }
2848 
2849 /*
2850  * Test whether new_dentry is a subdirectory of old_dentry.
2851  *
2852  * Trivially implemented using the dcache structure
2853  */
2854 
2855 /**
2856  * is_subdir - is new dentry a subdirectory of old_dentry
2857  * @new_dentry: new dentry
2858  * @old_dentry: old dentry
2859  *
2860  * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2861  * Returns 0 otherwise.
2862  * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2863  */
2864 
2865 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2866 {
2867 	int result;
2868 	unsigned seq;
2869 
2870 	if (new_dentry == old_dentry)
2871 		return 1;
2872 
2873 	do {
2874 		/* for restarting inner loop in case of seq retry */
2875 		seq = read_seqbegin(&rename_lock);
2876 		/*
2877 		 * Need rcu_readlock to protect against the d_parent trashing
2878 		 * due to d_move
2879 		 */
2880 		rcu_read_lock();
2881 		if (d_ancestor(old_dentry, new_dentry))
2882 			result = 1;
2883 		else
2884 			result = 0;
2885 		rcu_read_unlock();
2886 	} while (read_seqretry(&rename_lock, seq));
2887 
2888 	return result;
2889 }
2890 
2891 void d_genocide(struct dentry *root)
2892 {
2893 	struct dentry *this_parent;
2894 	struct list_head *next;
2895 	unsigned seq;
2896 	int locked = 0;
2897 
2898 	seq = read_seqbegin(&rename_lock);
2899 again:
2900 	this_parent = root;
2901 	spin_lock(&this_parent->d_lock);
2902 repeat:
2903 	next = this_parent->d_subdirs.next;
2904 resume:
2905 	while (next != &this_parent->d_subdirs) {
2906 		struct list_head *tmp = next;
2907 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2908 		next = tmp->next;
2909 
2910 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2911 		if (d_unhashed(dentry) || !dentry->d_inode) {
2912 			spin_unlock(&dentry->d_lock);
2913 			continue;
2914 		}
2915 		if (!list_empty(&dentry->d_subdirs)) {
2916 			spin_unlock(&this_parent->d_lock);
2917 			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2918 			this_parent = dentry;
2919 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2920 			goto repeat;
2921 		}
2922 		if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2923 			dentry->d_flags |= DCACHE_GENOCIDE;
2924 			dentry->d_count--;
2925 		}
2926 		spin_unlock(&dentry->d_lock);
2927 	}
2928 	if (this_parent != root) {
2929 		struct dentry *child = this_parent;
2930 		if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2931 			this_parent->d_flags |= DCACHE_GENOCIDE;
2932 			this_parent->d_count--;
2933 		}
2934 		this_parent = try_to_ascend(this_parent, locked, seq);
2935 		if (!this_parent)
2936 			goto rename_retry;
2937 		next = child->d_u.d_child.next;
2938 		goto resume;
2939 	}
2940 	spin_unlock(&this_parent->d_lock);
2941 	if (!locked && read_seqretry(&rename_lock, seq))
2942 		goto rename_retry;
2943 	if (locked)
2944 		write_sequnlock(&rename_lock);
2945 	return;
2946 
2947 rename_retry:
2948 	locked = 1;
2949 	write_seqlock(&rename_lock);
2950 	goto again;
2951 }
2952 
2953 /**
2954  * find_inode_number - check for dentry with name
2955  * @dir: directory to check
2956  * @name: Name to find.
2957  *
2958  * Check whether a dentry already exists for the given name,
2959  * and return the inode number if it has an inode. Otherwise
2960  * 0 is returned.
2961  *
2962  * This routine is used to post-process directory listings for
2963  * filesystems using synthetic inode numbers, and is necessary
2964  * to keep getcwd() working.
2965  */
2966 
2967 ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2968 {
2969 	struct dentry * dentry;
2970 	ino_t ino = 0;
2971 
2972 	dentry = d_hash_and_lookup(dir, name);
2973 	if (dentry) {
2974 		if (dentry->d_inode)
2975 			ino = dentry->d_inode->i_ino;
2976 		dput(dentry);
2977 	}
2978 	return ino;
2979 }
2980 EXPORT_SYMBOL(find_inode_number);
2981 
2982 static __initdata unsigned long dhash_entries;
2983 static int __init set_dhash_entries(char *str)
2984 {
2985 	if (!str)
2986 		return 0;
2987 	dhash_entries = simple_strtoul(str, &str, 0);
2988 	return 1;
2989 }
2990 __setup("dhash_entries=", set_dhash_entries);
2991 
2992 static void __init dcache_init_early(void)
2993 {
2994 	unsigned int loop;
2995 
2996 	/* If hashes are distributed across NUMA nodes, defer
2997 	 * hash allocation until vmalloc space is available.
2998 	 */
2999 	if (hashdist)
3000 		return;
3001 
3002 	dentry_hashtable =
3003 		alloc_large_system_hash("Dentry cache",
3004 					sizeof(struct hlist_bl_head),
3005 					dhash_entries,
3006 					13,
3007 					HASH_EARLY,
3008 					&d_hash_shift,
3009 					&d_hash_mask,
3010 					0);
3011 
3012 	for (loop = 0; loop < (1U << d_hash_shift); loop++)
3013 		INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3014 }
3015 
3016 static void __init dcache_init(void)
3017 {
3018 	unsigned int loop;
3019 
3020 	/*
3021 	 * A constructor could be added for stable state like the lists,
3022 	 * but it is probably not worth it because of the cache nature
3023 	 * of the dcache.
3024 	 */
3025 	dentry_cache = KMEM_CACHE(dentry,
3026 		SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3027 
3028 	/* Hash may have been set up in dcache_init_early */
3029 	if (!hashdist)
3030 		return;
3031 
3032 	dentry_hashtable =
3033 		alloc_large_system_hash("Dentry cache",
3034 					sizeof(struct hlist_bl_head),
3035 					dhash_entries,
3036 					13,
3037 					0,
3038 					&d_hash_shift,
3039 					&d_hash_mask,
3040 					0);
3041 
3042 	for (loop = 0; loop < (1U << d_hash_shift); loop++)
3043 		INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3044 }
3045 
3046 /* SLAB cache for __getname() consumers */
3047 struct kmem_cache *names_cachep __read_mostly;
3048 EXPORT_SYMBOL(names_cachep);
3049 
3050 EXPORT_SYMBOL(d_genocide);
3051 
3052 void __init vfs_caches_init_early(void)
3053 {
3054 	dcache_init_early();
3055 	inode_init_early();
3056 }
3057 
3058 void __init vfs_caches_init(unsigned long mempages)
3059 {
3060 	unsigned long reserve;
3061 
3062 	/* Base hash sizes on available memory, with a reserve equal to
3063            150% of current kernel size */
3064 
3065 	reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3066 	mempages -= reserve;
3067 
3068 	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3069 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3070 
3071 	dcache_init();
3072 	inode_init();
3073 	files_init(mempages);
3074 	mnt_init();
3075 	bdev_cache_init();
3076 	chrdev_init();
3077 }
3078