xref: /openbmc/linux/fs/dcache.c (revision c1d45424)
1 /*
2  * fs/dcache.c
3  *
4  * Complete reimplementation
5  * (C) 1997 Thomas Schoebel-Theuer,
6  * with heavy changes by Linus Torvalds
7  */
8 
9 /*
10  * Notes on the allocation strategy:
11  *
12  * The dcache is a master of the icache - whenever a dcache entry
13  * exists, the inode will always exist. "iput()" is done either when
14  * the dcache entry is deleted or garbage collected.
15  */
16 
17 #include <linux/syscalls.h>
18 #include <linux/string.h>
19 #include <linux/mm.h>
20 #include <linux/fs.h>
21 #include <linux/fsnotify.h>
22 #include <linux/slab.h>
23 #include <linux/init.h>
24 #include <linux/hash.h>
25 #include <linux/cache.h>
26 #include <linux/export.h>
27 #include <linux/mount.h>
28 #include <linux/file.h>
29 #include <asm/uaccess.h>
30 #include <linux/security.h>
31 #include <linux/seqlock.h>
32 #include <linux/swap.h>
33 #include <linux/bootmem.h>
34 #include <linux/fs_struct.h>
35 #include <linux/hardirq.h>
36 #include <linux/bit_spinlock.h>
37 #include <linux/rculist_bl.h>
38 #include <linux/prefetch.h>
39 #include <linux/ratelimit.h>
40 #include "internal.h"
41 #include "mount.h"
42 
43 /*
44  * Usage:
45  * dcache->d_inode->i_lock protects:
46  *   - i_dentry, d_alias, d_inode of aliases
47  * dcache_hash_bucket lock protects:
48  *   - the dcache hash table
49  * s_anon bl list spinlock protects:
50  *   - the s_anon list (see __d_drop)
51  * dcache_lru_lock protects:
52  *   - the dcache lru lists and counters
53  * d_lock protects:
54  *   - d_flags
55  *   - d_name
56  *   - d_lru
57  *   - d_count
58  *   - d_unhashed()
59  *   - d_parent and d_subdirs
60  *   - childrens' d_child and d_parent
61  *   - d_alias, d_inode
62  *
63  * Ordering:
64  * dentry->d_inode->i_lock
65  *   dentry->d_lock
66  *     dcache_lru_lock
67  *     dcache_hash_bucket lock
68  *     s_anon lock
69  *
70  * If there is an ancestor relationship:
71  * dentry->d_parent->...->d_parent->d_lock
72  *   ...
73  *     dentry->d_parent->d_lock
74  *       dentry->d_lock
75  *
76  * If no ancestor relationship:
77  * if (dentry1 < dentry2)
78  *   dentry1->d_lock
79  *     dentry2->d_lock
80  */
81 int sysctl_vfs_cache_pressure __read_mostly = 100;
82 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
83 
84 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
85 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
86 
87 EXPORT_SYMBOL(rename_lock);
88 
89 static struct kmem_cache *dentry_cache __read_mostly;
90 
91 /*
92  * This is the single most critical data structure when it comes
93  * to the dcache: the hashtable for lookups. Somebody should try
94  * to make this good - I've just made it work.
95  *
96  * This hash-function tries to avoid losing too many bits of hash
97  * information, yet avoid using a prime hash-size or similar.
98  */
99 #define D_HASHBITS     d_hash_shift
100 #define D_HASHMASK     d_hash_mask
101 
102 static unsigned int d_hash_mask __read_mostly;
103 static unsigned int d_hash_shift __read_mostly;
104 
105 static struct hlist_bl_head *dentry_hashtable __read_mostly;
106 
107 static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
108 					unsigned int hash)
109 {
110 	hash += (unsigned long) parent / L1_CACHE_BYTES;
111 	hash = hash + (hash >> D_HASHBITS);
112 	return dentry_hashtable + (hash & D_HASHMASK);
113 }
114 
115 /* Statistics gathering. */
116 struct dentry_stat_t dentry_stat = {
117 	.age_limit = 45,
118 };
119 
120 static DEFINE_PER_CPU(unsigned int, nr_dentry);
121 
122 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
123 static int get_nr_dentry(void)
124 {
125 	int i;
126 	int sum = 0;
127 	for_each_possible_cpu(i)
128 		sum += per_cpu(nr_dentry, i);
129 	return sum < 0 ? 0 : sum;
130 }
131 
132 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
133 		   size_t *lenp, loff_t *ppos)
134 {
135 	dentry_stat.nr_dentry = get_nr_dentry();
136 	return proc_dointvec(table, write, buffer, lenp, ppos);
137 }
138 #endif
139 
140 /*
141  * Compare 2 name strings, return 0 if they match, otherwise non-zero.
142  * The strings are both count bytes long, and count is non-zero.
143  */
144 #ifdef CONFIG_DCACHE_WORD_ACCESS
145 
146 #include <asm/word-at-a-time.h>
147 /*
148  * NOTE! 'cs' and 'scount' come from a dentry, so it has a
149  * aligned allocation for this particular component. We don't
150  * strictly need the load_unaligned_zeropad() safety, but it
151  * doesn't hurt either.
152  *
153  * In contrast, 'ct' and 'tcount' can be from a pathname, and do
154  * need the careful unaligned handling.
155  */
156 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
157 {
158 	unsigned long a,b,mask;
159 
160 	for (;;) {
161 		a = *(unsigned long *)cs;
162 		b = load_unaligned_zeropad(ct);
163 		if (tcount < sizeof(unsigned long))
164 			break;
165 		if (unlikely(a != b))
166 			return 1;
167 		cs += sizeof(unsigned long);
168 		ct += sizeof(unsigned long);
169 		tcount -= sizeof(unsigned long);
170 		if (!tcount)
171 			return 0;
172 	}
173 	mask = ~(~0ul << tcount*8);
174 	return unlikely(!!((a ^ b) & mask));
175 }
176 
177 #else
178 
179 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
180 {
181 	do {
182 		if (*cs != *ct)
183 			return 1;
184 		cs++;
185 		ct++;
186 		tcount--;
187 	} while (tcount);
188 	return 0;
189 }
190 
191 #endif
192 
193 static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
194 {
195 	const unsigned char *cs;
196 	/*
197 	 * Be careful about RCU walk racing with rename:
198 	 * use ACCESS_ONCE to fetch the name pointer.
199 	 *
200 	 * NOTE! Even if a rename will mean that the length
201 	 * was not loaded atomically, we don't care. The
202 	 * RCU walk will check the sequence count eventually,
203 	 * and catch it. And we won't overrun the buffer,
204 	 * because we're reading the name pointer atomically,
205 	 * and a dentry name is guaranteed to be properly
206 	 * terminated with a NUL byte.
207 	 *
208 	 * End result: even if 'len' is wrong, we'll exit
209 	 * early because the data cannot match (there can
210 	 * be no NUL in the ct/tcount data)
211 	 */
212 	cs = ACCESS_ONCE(dentry->d_name.name);
213 	smp_read_barrier_depends();
214 	return dentry_string_cmp(cs, ct, tcount);
215 }
216 
217 static void __d_free(struct rcu_head *head)
218 {
219 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
220 
221 	WARN_ON(!hlist_unhashed(&dentry->d_alias));
222 	if (dname_external(dentry))
223 		kfree(dentry->d_name.name);
224 	kmem_cache_free(dentry_cache, dentry);
225 }
226 
227 /*
228  * no locks, please.
229  */
230 static void d_free(struct dentry *dentry)
231 {
232 	BUG_ON(dentry->d_lockref.count);
233 	this_cpu_dec(nr_dentry);
234 	if (dentry->d_op && dentry->d_op->d_release)
235 		dentry->d_op->d_release(dentry);
236 
237 	/* if dentry was never visible to RCU, immediate free is OK */
238 	if (!(dentry->d_flags & DCACHE_RCUACCESS))
239 		__d_free(&dentry->d_u.d_rcu);
240 	else
241 		call_rcu(&dentry->d_u.d_rcu, __d_free);
242 }
243 
244 /**
245  * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
246  * @dentry: the target dentry
247  * After this call, in-progress rcu-walk path lookup will fail. This
248  * should be called after unhashing, and after changing d_inode (if
249  * the dentry has not already been unhashed).
250  */
251 static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
252 {
253 	assert_spin_locked(&dentry->d_lock);
254 	/* Go through a barrier */
255 	write_seqcount_barrier(&dentry->d_seq);
256 }
257 
258 /*
259  * Release the dentry's inode, using the filesystem
260  * d_iput() operation if defined. Dentry has no refcount
261  * and is unhashed.
262  */
263 static void dentry_iput(struct dentry * dentry)
264 	__releases(dentry->d_lock)
265 	__releases(dentry->d_inode->i_lock)
266 {
267 	struct inode *inode = dentry->d_inode;
268 	if (inode) {
269 		dentry->d_inode = NULL;
270 		hlist_del_init(&dentry->d_alias);
271 		spin_unlock(&dentry->d_lock);
272 		spin_unlock(&inode->i_lock);
273 		if (!inode->i_nlink)
274 			fsnotify_inoderemove(inode);
275 		if (dentry->d_op && dentry->d_op->d_iput)
276 			dentry->d_op->d_iput(dentry, inode);
277 		else
278 			iput(inode);
279 	} else {
280 		spin_unlock(&dentry->d_lock);
281 	}
282 }
283 
284 /*
285  * Release the dentry's inode, using the filesystem
286  * d_iput() operation if defined. dentry remains in-use.
287  */
288 static void dentry_unlink_inode(struct dentry * dentry)
289 	__releases(dentry->d_lock)
290 	__releases(dentry->d_inode->i_lock)
291 {
292 	struct inode *inode = dentry->d_inode;
293 	dentry->d_inode = NULL;
294 	hlist_del_init(&dentry->d_alias);
295 	dentry_rcuwalk_barrier(dentry);
296 	spin_unlock(&dentry->d_lock);
297 	spin_unlock(&inode->i_lock);
298 	if (!inode->i_nlink)
299 		fsnotify_inoderemove(inode);
300 	if (dentry->d_op && dentry->d_op->d_iput)
301 		dentry->d_op->d_iput(dentry, inode);
302 	else
303 		iput(inode);
304 }
305 
306 /*
307  * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
308  */
309 static void dentry_lru_add(struct dentry *dentry)
310 {
311 	if (list_empty(&dentry->d_lru)) {
312 		spin_lock(&dcache_lru_lock);
313 		list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
314 		dentry->d_sb->s_nr_dentry_unused++;
315 		dentry_stat.nr_unused++;
316 		spin_unlock(&dcache_lru_lock);
317 	}
318 }
319 
320 static void __dentry_lru_del(struct dentry *dentry)
321 {
322 	list_del_init(&dentry->d_lru);
323 	dentry->d_flags &= ~DCACHE_SHRINK_LIST;
324 	dentry->d_sb->s_nr_dentry_unused--;
325 	dentry_stat.nr_unused--;
326 }
327 
328 /*
329  * Remove a dentry with references from the LRU.
330  */
331 static void dentry_lru_del(struct dentry *dentry)
332 {
333 	if (!list_empty(&dentry->d_lru)) {
334 		spin_lock(&dcache_lru_lock);
335 		__dentry_lru_del(dentry);
336 		spin_unlock(&dcache_lru_lock);
337 	}
338 }
339 
340 static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
341 {
342 	spin_lock(&dcache_lru_lock);
343 	if (list_empty(&dentry->d_lru)) {
344 		list_add_tail(&dentry->d_lru, list);
345 		dentry->d_sb->s_nr_dentry_unused++;
346 		dentry_stat.nr_unused++;
347 	} else {
348 		list_move_tail(&dentry->d_lru, list);
349 	}
350 	spin_unlock(&dcache_lru_lock);
351 }
352 
353 /**
354  * d_kill - kill dentry and return parent
355  * @dentry: dentry to kill
356  * @parent: parent dentry
357  *
358  * The dentry must already be unhashed and removed from the LRU.
359  *
360  * If this is the root of the dentry tree, return NULL.
361  *
362  * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
363  * d_kill.
364  */
365 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
366 	__releases(dentry->d_lock)
367 	__releases(parent->d_lock)
368 	__releases(dentry->d_inode->i_lock)
369 {
370 	list_del(&dentry->d_u.d_child);
371 	/*
372 	 * Inform try_to_ascend() that we are no longer attached to the
373 	 * dentry tree
374 	 */
375 	dentry->d_flags |= DCACHE_DENTRY_KILLED;
376 	if (parent)
377 		spin_unlock(&parent->d_lock);
378 	dentry_iput(dentry);
379 	/*
380 	 * dentry_iput drops the locks, at which point nobody (except
381 	 * transient RCU lookups) can reach this dentry.
382 	 */
383 	d_free(dentry);
384 	return parent;
385 }
386 
387 /*
388  * Unhash a dentry without inserting an RCU walk barrier or checking that
389  * dentry->d_lock is locked.  The caller must take care of that, if
390  * appropriate.
391  */
392 static void __d_shrink(struct dentry *dentry)
393 {
394 	if (!d_unhashed(dentry)) {
395 		struct hlist_bl_head *b;
396 		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
397 			b = &dentry->d_sb->s_anon;
398 		else
399 			b = d_hash(dentry->d_parent, dentry->d_name.hash);
400 
401 		hlist_bl_lock(b);
402 		__hlist_bl_del(&dentry->d_hash);
403 		dentry->d_hash.pprev = NULL;
404 		hlist_bl_unlock(b);
405 	}
406 }
407 
408 /**
409  * d_drop - drop a dentry
410  * @dentry: dentry to drop
411  *
412  * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
413  * be found through a VFS lookup any more. Note that this is different from
414  * deleting the dentry - d_delete will try to mark the dentry negative if
415  * possible, giving a successful _negative_ lookup, while d_drop will
416  * just make the cache lookup fail.
417  *
418  * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
419  * reason (NFS timeouts or autofs deletes).
420  *
421  * __d_drop requires dentry->d_lock.
422  */
423 void __d_drop(struct dentry *dentry)
424 {
425 	if (!d_unhashed(dentry)) {
426 		__d_shrink(dentry);
427 		dentry_rcuwalk_barrier(dentry);
428 	}
429 }
430 EXPORT_SYMBOL(__d_drop);
431 
432 void d_drop(struct dentry *dentry)
433 {
434 	spin_lock(&dentry->d_lock);
435 	__d_drop(dentry);
436 	spin_unlock(&dentry->d_lock);
437 }
438 EXPORT_SYMBOL(d_drop);
439 
440 /*
441  * Finish off a dentry we've decided to kill.
442  * dentry->d_lock must be held, returns with it unlocked.
443  * If ref is non-zero, then decrement the refcount too.
444  * Returns dentry requiring refcount drop, or NULL if we're done.
445  */
446 static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
447 	__releases(dentry->d_lock)
448 {
449 	struct inode *inode;
450 	struct dentry *parent;
451 
452 	inode = dentry->d_inode;
453 	if (inode && !spin_trylock(&inode->i_lock)) {
454 relock:
455 		spin_unlock(&dentry->d_lock);
456 		cpu_relax();
457 		return dentry; /* try again with same dentry */
458 	}
459 	if (IS_ROOT(dentry))
460 		parent = NULL;
461 	else
462 		parent = dentry->d_parent;
463 	if (parent && !spin_trylock(&parent->d_lock)) {
464 		if (inode)
465 			spin_unlock(&inode->i_lock);
466 		goto relock;
467 	}
468 
469 	if (ref)
470 		dentry->d_lockref.count--;
471 	/*
472 	 * inform the fs via d_prune that this dentry is about to be
473 	 * unhashed and destroyed.
474 	 */
475 	if (dentry->d_flags & DCACHE_OP_PRUNE)
476 		dentry->d_op->d_prune(dentry);
477 
478 	dentry_lru_del(dentry);
479 	/* if it was on the hash then remove it */
480 	__d_drop(dentry);
481 	return d_kill(dentry, parent);
482 }
483 
484 /*
485  * This is dput
486  *
487  * This is complicated by the fact that we do not want to put
488  * dentries that are no longer on any hash chain on the unused
489  * list: we'd much rather just get rid of them immediately.
490  *
491  * However, that implies that we have to traverse the dentry
492  * tree upwards to the parents which might _also_ now be
493  * scheduled for deletion (it may have been only waiting for
494  * its last child to go away).
495  *
496  * This tail recursion is done by hand as we don't want to depend
497  * on the compiler to always get this right (gcc generally doesn't).
498  * Real recursion would eat up our stack space.
499  */
500 
501 /*
502  * dput - release a dentry
503  * @dentry: dentry to release
504  *
505  * Release a dentry. This will drop the usage count and if appropriate
506  * call the dentry unlink method as well as removing it from the queues and
507  * releasing its resources. If the parent dentries were scheduled for release
508  * they too may now get deleted.
509  */
510 void dput(struct dentry *dentry)
511 {
512 	if (!dentry)
513 		return;
514 
515 repeat:
516 	if (dentry->d_lockref.count == 1)
517 		might_sleep();
518 	if (lockref_put_or_lock(&dentry->d_lockref))
519 		return;
520 
521 	if (dentry->d_flags & DCACHE_OP_DELETE) {
522 		if (dentry->d_op->d_delete(dentry))
523 			goto kill_it;
524 	}
525 
526 	/* Unreachable? Get rid of it */
527  	if (d_unhashed(dentry))
528 		goto kill_it;
529 
530 	dentry->d_flags |= DCACHE_REFERENCED;
531 	dentry_lru_add(dentry);
532 
533 	dentry->d_lockref.count--;
534 	spin_unlock(&dentry->d_lock);
535 	return;
536 
537 kill_it:
538 	dentry = dentry_kill(dentry, 1);
539 	if (dentry)
540 		goto repeat;
541 }
542 EXPORT_SYMBOL(dput);
543 
544 /**
545  * d_invalidate - invalidate a dentry
546  * @dentry: dentry to invalidate
547  *
548  * Try to invalidate the dentry if it turns out to be
549  * possible. If there are other dentries that can be
550  * reached through this one we can't delete it and we
551  * return -EBUSY. On success we return 0.
552  *
553  * no dcache lock.
554  */
555 
556 int d_invalidate(struct dentry * dentry)
557 {
558 	/*
559 	 * If it's already been dropped, return OK.
560 	 */
561 	spin_lock(&dentry->d_lock);
562 	if (d_unhashed(dentry)) {
563 		spin_unlock(&dentry->d_lock);
564 		return 0;
565 	}
566 	/*
567 	 * Check whether to do a partial shrink_dcache
568 	 * to get rid of unused child entries.
569 	 */
570 	if (!list_empty(&dentry->d_subdirs)) {
571 		spin_unlock(&dentry->d_lock);
572 		shrink_dcache_parent(dentry);
573 		spin_lock(&dentry->d_lock);
574 	}
575 
576 	/*
577 	 * Somebody else still using it?
578 	 *
579 	 * If it's a directory, we can't drop it
580 	 * for fear of somebody re-populating it
581 	 * with children (even though dropping it
582 	 * would make it unreachable from the root,
583 	 * we might still populate it if it was a
584 	 * working directory or similar).
585 	 * We also need to leave mountpoints alone,
586 	 * directory or not.
587 	 */
588 	if (dentry->d_lockref.count > 1 && dentry->d_inode) {
589 		if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
590 			spin_unlock(&dentry->d_lock);
591 			return -EBUSY;
592 		}
593 	}
594 
595 	__d_drop(dentry);
596 	spin_unlock(&dentry->d_lock);
597 	return 0;
598 }
599 EXPORT_SYMBOL(d_invalidate);
600 
601 /* This must be called with d_lock held */
602 static inline void __dget_dlock(struct dentry *dentry)
603 {
604 	dentry->d_lockref.count++;
605 }
606 
607 static inline void __dget(struct dentry *dentry)
608 {
609 	lockref_get(&dentry->d_lockref);
610 }
611 
612 struct dentry *dget_parent(struct dentry *dentry)
613 {
614 	struct dentry *ret;
615 
616 repeat:
617 	/*
618 	 * Don't need rcu_dereference because we re-check it was correct under
619 	 * the lock.
620 	 */
621 	rcu_read_lock();
622 	ret = dentry->d_parent;
623 	spin_lock(&ret->d_lock);
624 	if (unlikely(ret != dentry->d_parent)) {
625 		spin_unlock(&ret->d_lock);
626 		rcu_read_unlock();
627 		goto repeat;
628 	}
629 	rcu_read_unlock();
630 	BUG_ON(!ret->d_lockref.count);
631 	ret->d_lockref.count++;
632 	spin_unlock(&ret->d_lock);
633 	return ret;
634 }
635 EXPORT_SYMBOL(dget_parent);
636 
637 /**
638  * d_find_alias - grab a hashed alias of inode
639  * @inode: inode in question
640  * @want_discon:  flag, used by d_splice_alias, to request
641  *          that only a DISCONNECTED alias be returned.
642  *
643  * If inode has a hashed alias, or is a directory and has any alias,
644  * acquire the reference to alias and return it. Otherwise return NULL.
645  * Notice that if inode is a directory there can be only one alias and
646  * it can be unhashed only if it has no children, or if it is the root
647  * of a filesystem.
648  *
649  * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
650  * any other hashed alias over that one unless @want_discon is set,
651  * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
652  */
653 static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
654 {
655 	struct dentry *alias, *discon_alias;
656 
657 again:
658 	discon_alias = NULL;
659 	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
660 		spin_lock(&alias->d_lock);
661  		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
662 			if (IS_ROOT(alias) &&
663 			    (alias->d_flags & DCACHE_DISCONNECTED)) {
664 				discon_alias = alias;
665 			} else if (!want_discon) {
666 				__dget_dlock(alias);
667 				spin_unlock(&alias->d_lock);
668 				return alias;
669 			}
670 		}
671 		spin_unlock(&alias->d_lock);
672 	}
673 	if (discon_alias) {
674 		alias = discon_alias;
675 		spin_lock(&alias->d_lock);
676 		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
677 			if (IS_ROOT(alias) &&
678 			    (alias->d_flags & DCACHE_DISCONNECTED)) {
679 				__dget_dlock(alias);
680 				spin_unlock(&alias->d_lock);
681 				return alias;
682 			}
683 		}
684 		spin_unlock(&alias->d_lock);
685 		goto again;
686 	}
687 	return NULL;
688 }
689 
690 struct dentry *d_find_alias(struct inode *inode)
691 {
692 	struct dentry *de = NULL;
693 
694 	if (!hlist_empty(&inode->i_dentry)) {
695 		spin_lock(&inode->i_lock);
696 		de = __d_find_alias(inode, 0);
697 		spin_unlock(&inode->i_lock);
698 	}
699 	return de;
700 }
701 EXPORT_SYMBOL(d_find_alias);
702 
703 /*
704  *	Try to kill dentries associated with this inode.
705  * WARNING: you must own a reference to inode.
706  */
707 void d_prune_aliases(struct inode *inode)
708 {
709 	struct dentry *dentry;
710 restart:
711 	spin_lock(&inode->i_lock);
712 	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
713 		spin_lock(&dentry->d_lock);
714 		if (!dentry->d_lockref.count) {
715 			__dget_dlock(dentry);
716 			__d_drop(dentry);
717 			spin_unlock(&dentry->d_lock);
718 			spin_unlock(&inode->i_lock);
719 			dput(dentry);
720 			goto restart;
721 		}
722 		spin_unlock(&dentry->d_lock);
723 	}
724 	spin_unlock(&inode->i_lock);
725 }
726 EXPORT_SYMBOL(d_prune_aliases);
727 
728 /*
729  * Try to throw away a dentry - free the inode, dput the parent.
730  * Requires dentry->d_lock is held, and dentry->d_count == 0.
731  * Releases dentry->d_lock.
732  *
733  * This may fail if locks cannot be acquired no problem, just try again.
734  */
735 static void try_prune_one_dentry(struct dentry *dentry)
736 	__releases(dentry->d_lock)
737 {
738 	struct dentry *parent;
739 
740 	parent = dentry_kill(dentry, 0);
741 	/*
742 	 * If dentry_kill returns NULL, we have nothing more to do.
743 	 * if it returns the same dentry, trylocks failed. In either
744 	 * case, just loop again.
745 	 *
746 	 * Otherwise, we need to prune ancestors too. This is necessary
747 	 * to prevent quadratic behavior of shrink_dcache_parent(), but
748 	 * is also expected to be beneficial in reducing dentry cache
749 	 * fragmentation.
750 	 */
751 	if (!parent)
752 		return;
753 	if (parent == dentry)
754 		return;
755 
756 	/* Prune ancestors. */
757 	dentry = parent;
758 	while (dentry) {
759 		if (lockref_put_or_lock(&dentry->d_lockref))
760 			return;
761 		dentry = dentry_kill(dentry, 1);
762 	}
763 }
764 
765 static void shrink_dentry_list(struct list_head *list)
766 {
767 	struct dentry *dentry;
768 
769 	rcu_read_lock();
770 	for (;;) {
771 		dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
772 		if (&dentry->d_lru == list)
773 			break; /* empty */
774 		spin_lock(&dentry->d_lock);
775 		if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
776 			spin_unlock(&dentry->d_lock);
777 			continue;
778 		}
779 
780 		/*
781 		 * We found an inuse dentry which was not removed from
782 		 * the LRU because of laziness during lookup.  Do not free
783 		 * it - just keep it off the LRU list.
784 		 */
785 		if (dentry->d_lockref.count) {
786 			dentry_lru_del(dentry);
787 			spin_unlock(&dentry->d_lock);
788 			continue;
789 		}
790 
791 		rcu_read_unlock();
792 
793 		try_prune_one_dentry(dentry);
794 
795 		rcu_read_lock();
796 	}
797 	rcu_read_unlock();
798 }
799 
800 /**
801  * prune_dcache_sb - shrink the dcache
802  * @sb: superblock
803  * @count: number of entries to try to free
804  *
805  * Attempt to shrink the superblock dcache LRU by @count entries. This is
806  * done when we need more memory an called from the superblock shrinker
807  * function.
808  *
809  * This function may fail to free any resources if all the dentries are in
810  * use.
811  */
812 void prune_dcache_sb(struct super_block *sb, int count)
813 {
814 	struct dentry *dentry;
815 	LIST_HEAD(referenced);
816 	LIST_HEAD(tmp);
817 
818 relock:
819 	spin_lock(&dcache_lru_lock);
820 	while (!list_empty(&sb->s_dentry_lru)) {
821 		dentry = list_entry(sb->s_dentry_lru.prev,
822 				struct dentry, d_lru);
823 		BUG_ON(dentry->d_sb != sb);
824 
825 		if (!spin_trylock(&dentry->d_lock)) {
826 			spin_unlock(&dcache_lru_lock);
827 			cpu_relax();
828 			goto relock;
829 		}
830 
831 		if (dentry->d_flags & DCACHE_REFERENCED) {
832 			dentry->d_flags &= ~DCACHE_REFERENCED;
833 			list_move(&dentry->d_lru, &referenced);
834 			spin_unlock(&dentry->d_lock);
835 		} else {
836 			list_move_tail(&dentry->d_lru, &tmp);
837 			dentry->d_flags |= DCACHE_SHRINK_LIST;
838 			spin_unlock(&dentry->d_lock);
839 			if (!--count)
840 				break;
841 		}
842 		cond_resched_lock(&dcache_lru_lock);
843 	}
844 	if (!list_empty(&referenced))
845 		list_splice(&referenced, &sb->s_dentry_lru);
846 	spin_unlock(&dcache_lru_lock);
847 
848 	shrink_dentry_list(&tmp);
849 }
850 
851 /**
852  * shrink_dcache_sb - shrink dcache for a superblock
853  * @sb: superblock
854  *
855  * Shrink the dcache for the specified super block. This is used to free
856  * the dcache before unmounting a file system.
857  */
858 void shrink_dcache_sb(struct super_block *sb)
859 {
860 	LIST_HEAD(tmp);
861 
862 	spin_lock(&dcache_lru_lock);
863 	while (!list_empty(&sb->s_dentry_lru)) {
864 		list_splice_init(&sb->s_dentry_lru, &tmp);
865 		spin_unlock(&dcache_lru_lock);
866 		shrink_dentry_list(&tmp);
867 		spin_lock(&dcache_lru_lock);
868 	}
869 	spin_unlock(&dcache_lru_lock);
870 }
871 EXPORT_SYMBOL(shrink_dcache_sb);
872 
873 /*
874  * destroy a single subtree of dentries for unmount
875  * - see the comments on shrink_dcache_for_umount() for a description of the
876  *   locking
877  */
878 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
879 {
880 	struct dentry *parent;
881 
882 	BUG_ON(!IS_ROOT(dentry));
883 
884 	for (;;) {
885 		/* descend to the first leaf in the current subtree */
886 		while (!list_empty(&dentry->d_subdirs))
887 			dentry = list_entry(dentry->d_subdirs.next,
888 					    struct dentry, d_u.d_child);
889 
890 		/* consume the dentries from this leaf up through its parents
891 		 * until we find one with children or run out altogether */
892 		do {
893 			struct inode *inode;
894 
895 			/*
896 			 * inform the fs that this dentry is about to be
897 			 * unhashed and destroyed.
898 			 */
899 			if (dentry->d_flags & DCACHE_OP_PRUNE)
900 				dentry->d_op->d_prune(dentry);
901 
902 			dentry_lru_del(dentry);
903 			__d_shrink(dentry);
904 
905 			if (dentry->d_lockref.count != 0) {
906 				printk(KERN_ERR
907 				       "BUG: Dentry %p{i=%lx,n=%s}"
908 				       " still in use (%d)"
909 				       " [unmount of %s %s]\n",
910 				       dentry,
911 				       dentry->d_inode ?
912 				       dentry->d_inode->i_ino : 0UL,
913 				       dentry->d_name.name,
914 				       dentry->d_lockref.count,
915 				       dentry->d_sb->s_type->name,
916 				       dentry->d_sb->s_id);
917 				BUG();
918 			}
919 
920 			if (IS_ROOT(dentry)) {
921 				parent = NULL;
922 				list_del(&dentry->d_u.d_child);
923 			} else {
924 				parent = dentry->d_parent;
925 				parent->d_lockref.count--;
926 				list_del(&dentry->d_u.d_child);
927 			}
928 
929 			inode = dentry->d_inode;
930 			if (inode) {
931 				dentry->d_inode = NULL;
932 				hlist_del_init(&dentry->d_alias);
933 				if (dentry->d_op && dentry->d_op->d_iput)
934 					dentry->d_op->d_iput(dentry, inode);
935 				else
936 					iput(inode);
937 			}
938 
939 			d_free(dentry);
940 
941 			/* finished when we fall off the top of the tree,
942 			 * otherwise we ascend to the parent and move to the
943 			 * next sibling if there is one */
944 			if (!parent)
945 				return;
946 			dentry = parent;
947 		} while (list_empty(&dentry->d_subdirs));
948 
949 		dentry = list_entry(dentry->d_subdirs.next,
950 				    struct dentry, d_u.d_child);
951 	}
952 }
953 
954 /*
955  * destroy the dentries attached to a superblock on unmounting
956  * - we don't need to use dentry->d_lock because:
957  *   - the superblock is detached from all mountings and open files, so the
958  *     dentry trees will not be rearranged by the VFS
959  *   - s_umount is write-locked, so the memory pressure shrinker will ignore
960  *     any dentries belonging to this superblock that it comes across
961  *   - the filesystem itself is no longer permitted to rearrange the dentries
962  *     in this superblock
963  */
964 void shrink_dcache_for_umount(struct super_block *sb)
965 {
966 	struct dentry *dentry;
967 
968 	if (down_read_trylock(&sb->s_umount))
969 		BUG();
970 
971 	dentry = sb->s_root;
972 	sb->s_root = NULL;
973 	dentry->d_lockref.count--;
974 	shrink_dcache_for_umount_subtree(dentry);
975 
976 	while (!hlist_bl_empty(&sb->s_anon)) {
977 		dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
978 		shrink_dcache_for_umount_subtree(dentry);
979 	}
980 }
981 
982 /*
983  * This tries to ascend one level of parenthood, but
984  * we can race with renaming, so we need to re-check
985  * the parenthood after dropping the lock and check
986  * that the sequence number still matches.
987  */
988 static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
989 {
990 	struct dentry *new = old->d_parent;
991 
992 	rcu_read_lock();
993 	spin_unlock(&old->d_lock);
994 	spin_lock(&new->d_lock);
995 
996 	/*
997 	 * might go back up the wrong parent if we have had a rename
998 	 * or deletion
999 	 */
1000 	if (new != old->d_parent ||
1001 		 (old->d_flags & DCACHE_DENTRY_KILLED) ||
1002 		 (!locked && read_seqretry(&rename_lock, seq))) {
1003 		spin_unlock(&new->d_lock);
1004 		new = NULL;
1005 	}
1006 	rcu_read_unlock();
1007 	return new;
1008 }
1009 
1010 
1011 /*
1012  * Search for at least 1 mount point in the dentry's subdirs.
1013  * We descend to the next level whenever the d_subdirs
1014  * list is non-empty and continue searching.
1015  */
1016 
1017 /**
1018  * have_submounts - check for mounts over a dentry
1019  * @parent: dentry to check.
1020  *
1021  * Return true if the parent or its subdirectories contain
1022  * a mount point
1023  */
1024 int have_submounts(struct dentry *parent)
1025 {
1026 	struct dentry *this_parent;
1027 	struct list_head *next;
1028 	unsigned seq;
1029 	int locked = 0;
1030 
1031 	seq = read_seqbegin(&rename_lock);
1032 again:
1033 	this_parent = parent;
1034 
1035 	if (d_mountpoint(parent))
1036 		goto positive;
1037 	spin_lock(&this_parent->d_lock);
1038 repeat:
1039 	next = this_parent->d_subdirs.next;
1040 resume:
1041 	while (next != &this_parent->d_subdirs) {
1042 		struct list_head *tmp = next;
1043 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1044 		next = tmp->next;
1045 
1046 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1047 		/* Have we found a mount point ? */
1048 		if (d_mountpoint(dentry)) {
1049 			spin_unlock(&dentry->d_lock);
1050 			spin_unlock(&this_parent->d_lock);
1051 			goto positive;
1052 		}
1053 		if (!list_empty(&dentry->d_subdirs)) {
1054 			spin_unlock(&this_parent->d_lock);
1055 			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1056 			this_parent = dentry;
1057 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1058 			goto repeat;
1059 		}
1060 		spin_unlock(&dentry->d_lock);
1061 	}
1062 	/*
1063 	 * All done at this level ... ascend and resume the search.
1064 	 */
1065 	if (this_parent != parent) {
1066 		struct dentry *child = this_parent;
1067 		this_parent = try_to_ascend(this_parent, locked, seq);
1068 		if (!this_parent)
1069 			goto rename_retry;
1070 		next = child->d_u.d_child.next;
1071 		goto resume;
1072 	}
1073 	spin_unlock(&this_parent->d_lock);
1074 	if (!locked && read_seqretry(&rename_lock, seq))
1075 		goto rename_retry;
1076 	if (locked)
1077 		write_sequnlock(&rename_lock);
1078 	return 0; /* No mount points found in tree */
1079 positive:
1080 	if (!locked && read_seqretry(&rename_lock, seq))
1081 		goto rename_retry;
1082 	if (locked)
1083 		write_sequnlock(&rename_lock);
1084 	return 1;
1085 
1086 rename_retry:
1087 	if (locked)
1088 		goto again;
1089 	locked = 1;
1090 	write_seqlock(&rename_lock);
1091 	goto again;
1092 }
1093 EXPORT_SYMBOL(have_submounts);
1094 
1095 /*
1096  * Search the dentry child list of the specified parent,
1097  * and move any unused dentries to the end of the unused
1098  * list for prune_dcache(). We descend to the next level
1099  * whenever the d_subdirs list is non-empty and continue
1100  * searching.
1101  *
1102  * It returns zero iff there are no unused children,
1103  * otherwise  it returns the number of children moved to
1104  * the end of the unused list. This may not be the total
1105  * number of unused children, because select_parent can
1106  * drop the lock and return early due to latency
1107  * constraints.
1108  */
1109 static int select_parent(struct dentry *parent, struct list_head *dispose)
1110 {
1111 	struct dentry *this_parent;
1112 	struct list_head *next;
1113 	unsigned seq;
1114 	int found = 0;
1115 	int locked = 0;
1116 
1117 	seq = read_seqbegin(&rename_lock);
1118 again:
1119 	this_parent = parent;
1120 	spin_lock(&this_parent->d_lock);
1121 repeat:
1122 	next = this_parent->d_subdirs.next;
1123 resume:
1124 	while (next != &this_parent->d_subdirs) {
1125 		struct list_head *tmp = next;
1126 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1127 		next = tmp->next;
1128 
1129 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1130 
1131 		/*
1132 		 * move only zero ref count dentries to the dispose list.
1133 		 *
1134 		 * Those which are presently on the shrink list, being processed
1135 		 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
1136 		 * loop in shrink_dcache_parent() might not make any progress
1137 		 * and loop forever.
1138 		 */
1139 		if (dentry->d_lockref.count) {
1140 			dentry_lru_del(dentry);
1141 		} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1142 			dentry_lru_move_list(dentry, dispose);
1143 			dentry->d_flags |= DCACHE_SHRINK_LIST;
1144 			found++;
1145 		}
1146 		/*
1147 		 * We can return to the caller if we have found some (this
1148 		 * ensures forward progress). We'll be coming back to find
1149 		 * the rest.
1150 		 */
1151 		if (found && need_resched()) {
1152 			spin_unlock(&dentry->d_lock);
1153 			goto out;
1154 		}
1155 
1156 		/*
1157 		 * Descend a level if the d_subdirs list is non-empty.
1158 		 */
1159 		if (!list_empty(&dentry->d_subdirs)) {
1160 			spin_unlock(&this_parent->d_lock);
1161 			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1162 			this_parent = dentry;
1163 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1164 			goto repeat;
1165 		}
1166 
1167 		spin_unlock(&dentry->d_lock);
1168 	}
1169 	/*
1170 	 * All done at this level ... ascend and resume the search.
1171 	 */
1172 	if (this_parent != parent) {
1173 		struct dentry *child = this_parent;
1174 		this_parent = try_to_ascend(this_parent, locked, seq);
1175 		if (!this_parent)
1176 			goto rename_retry;
1177 		next = child->d_u.d_child.next;
1178 		goto resume;
1179 	}
1180 out:
1181 	spin_unlock(&this_parent->d_lock);
1182 	if (!locked && read_seqretry(&rename_lock, seq))
1183 		goto rename_retry;
1184 	if (locked)
1185 		write_sequnlock(&rename_lock);
1186 	return found;
1187 
1188 rename_retry:
1189 	if (found)
1190 		return found;
1191 	if (locked)
1192 		goto again;
1193 	locked = 1;
1194 	write_seqlock(&rename_lock);
1195 	goto again;
1196 }
1197 
1198 /**
1199  * shrink_dcache_parent - prune dcache
1200  * @parent: parent of entries to prune
1201  *
1202  * Prune the dcache to remove unused children of the parent dentry.
1203  */
1204 void shrink_dcache_parent(struct dentry * parent)
1205 {
1206 	LIST_HEAD(dispose);
1207 	int found;
1208 
1209 	while ((found = select_parent(parent, &dispose)) != 0) {
1210 		shrink_dentry_list(&dispose);
1211 		cond_resched();
1212 	}
1213 }
1214 EXPORT_SYMBOL(shrink_dcache_parent);
1215 
1216 /**
1217  * __d_alloc	-	allocate a dcache entry
1218  * @sb: filesystem it will belong to
1219  * @name: qstr of the name
1220  *
1221  * Allocates a dentry. It returns %NULL if there is insufficient memory
1222  * available. On a success the dentry is returned. The name passed in is
1223  * copied and the copy passed in may be reused after this call.
1224  */
1225 
1226 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1227 {
1228 	struct dentry *dentry;
1229 	char *dname;
1230 
1231 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1232 	if (!dentry)
1233 		return NULL;
1234 
1235 	/*
1236 	 * We guarantee that the inline name is always NUL-terminated.
1237 	 * This way the memcpy() done by the name switching in rename
1238 	 * will still always have a NUL at the end, even if we might
1239 	 * be overwriting an internal NUL character
1240 	 */
1241 	dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1242 	if (name->len > DNAME_INLINE_LEN-1) {
1243 		dname = kmalloc(name->len + 1, GFP_KERNEL);
1244 		if (!dname) {
1245 			kmem_cache_free(dentry_cache, dentry);
1246 			return NULL;
1247 		}
1248 	} else  {
1249 		dname = dentry->d_iname;
1250 	}
1251 
1252 	dentry->d_name.len = name->len;
1253 	dentry->d_name.hash = name->hash;
1254 	memcpy(dname, name->name, name->len);
1255 	dname[name->len] = 0;
1256 
1257 	/* Make sure we always see the terminating NUL character */
1258 	smp_wmb();
1259 	dentry->d_name.name = dname;
1260 
1261 	dentry->d_lockref.count = 1;
1262 	dentry->d_flags = 0;
1263 	spin_lock_init(&dentry->d_lock);
1264 	seqcount_init(&dentry->d_seq);
1265 	dentry->d_inode = NULL;
1266 	dentry->d_parent = dentry;
1267 	dentry->d_sb = sb;
1268 	dentry->d_op = NULL;
1269 	dentry->d_fsdata = NULL;
1270 	INIT_HLIST_BL_NODE(&dentry->d_hash);
1271 	INIT_LIST_HEAD(&dentry->d_lru);
1272 	INIT_LIST_HEAD(&dentry->d_subdirs);
1273 	INIT_HLIST_NODE(&dentry->d_alias);
1274 	INIT_LIST_HEAD(&dentry->d_u.d_child);
1275 	d_set_d_op(dentry, dentry->d_sb->s_d_op);
1276 
1277 	this_cpu_inc(nr_dentry);
1278 
1279 	return dentry;
1280 }
1281 
1282 /**
1283  * d_alloc	-	allocate a dcache entry
1284  * @parent: parent of entry to allocate
1285  * @name: qstr of the name
1286  *
1287  * Allocates a dentry. It returns %NULL if there is insufficient memory
1288  * available. On a success the dentry is returned. The name passed in is
1289  * copied and the copy passed in may be reused after this call.
1290  */
1291 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1292 {
1293 	struct dentry *dentry = __d_alloc(parent->d_sb, name);
1294 	if (!dentry)
1295 		return NULL;
1296 
1297 	spin_lock(&parent->d_lock);
1298 	/*
1299 	 * don't need child lock because it is not subject
1300 	 * to concurrency here
1301 	 */
1302 	__dget_dlock(parent);
1303 	dentry->d_parent = parent;
1304 	list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1305 	spin_unlock(&parent->d_lock);
1306 
1307 	return dentry;
1308 }
1309 EXPORT_SYMBOL(d_alloc);
1310 
1311 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1312 {
1313 	struct dentry *dentry = __d_alloc(sb, name);
1314 	if (dentry)
1315 		dentry->d_flags |= DCACHE_DISCONNECTED;
1316 	return dentry;
1317 }
1318 EXPORT_SYMBOL(d_alloc_pseudo);
1319 
1320 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1321 {
1322 	struct qstr q;
1323 
1324 	q.name = name;
1325 	q.len = strlen(name);
1326 	q.hash = full_name_hash(q.name, q.len);
1327 	return d_alloc(parent, &q);
1328 }
1329 EXPORT_SYMBOL(d_alloc_name);
1330 
1331 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1332 {
1333 	WARN_ON_ONCE(dentry->d_op);
1334 	WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH	|
1335 				DCACHE_OP_COMPARE	|
1336 				DCACHE_OP_REVALIDATE	|
1337 				DCACHE_OP_WEAK_REVALIDATE	|
1338 				DCACHE_OP_DELETE ));
1339 	dentry->d_op = op;
1340 	if (!op)
1341 		return;
1342 	if (op->d_hash)
1343 		dentry->d_flags |= DCACHE_OP_HASH;
1344 	if (op->d_compare)
1345 		dentry->d_flags |= DCACHE_OP_COMPARE;
1346 	if (op->d_revalidate)
1347 		dentry->d_flags |= DCACHE_OP_REVALIDATE;
1348 	if (op->d_weak_revalidate)
1349 		dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1350 	if (op->d_delete)
1351 		dentry->d_flags |= DCACHE_OP_DELETE;
1352 	if (op->d_prune)
1353 		dentry->d_flags |= DCACHE_OP_PRUNE;
1354 
1355 }
1356 EXPORT_SYMBOL(d_set_d_op);
1357 
1358 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1359 {
1360 	spin_lock(&dentry->d_lock);
1361 	if (inode) {
1362 		if (unlikely(IS_AUTOMOUNT(inode)))
1363 			dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1364 		hlist_add_head(&dentry->d_alias, &inode->i_dentry);
1365 	}
1366 	dentry->d_inode = inode;
1367 	dentry_rcuwalk_barrier(dentry);
1368 	spin_unlock(&dentry->d_lock);
1369 	fsnotify_d_instantiate(dentry, inode);
1370 }
1371 
1372 /**
1373  * d_instantiate - fill in inode information for a dentry
1374  * @entry: dentry to complete
1375  * @inode: inode to attach to this dentry
1376  *
1377  * Fill in inode information in the entry.
1378  *
1379  * This turns negative dentries into productive full members
1380  * of society.
1381  *
1382  * NOTE! This assumes that the inode count has been incremented
1383  * (or otherwise set) by the caller to indicate that it is now
1384  * in use by the dcache.
1385  */
1386 
1387 void d_instantiate(struct dentry *entry, struct inode * inode)
1388 {
1389 	BUG_ON(!hlist_unhashed(&entry->d_alias));
1390 	if (inode)
1391 		spin_lock(&inode->i_lock);
1392 	__d_instantiate(entry, inode);
1393 	if (inode)
1394 		spin_unlock(&inode->i_lock);
1395 	security_d_instantiate(entry, inode);
1396 }
1397 EXPORT_SYMBOL(d_instantiate);
1398 
1399 /**
1400  * d_instantiate_unique - instantiate a non-aliased dentry
1401  * @entry: dentry to instantiate
1402  * @inode: inode to attach to this dentry
1403  *
1404  * Fill in inode information in the entry. On success, it returns NULL.
1405  * If an unhashed alias of "entry" already exists, then we return the
1406  * aliased dentry instead and drop one reference to inode.
1407  *
1408  * Note that in order to avoid conflicts with rename() etc, the caller
1409  * had better be holding the parent directory semaphore.
1410  *
1411  * This also assumes that the inode count has been incremented
1412  * (or otherwise set) by the caller to indicate that it is now
1413  * in use by the dcache.
1414  */
1415 static struct dentry *__d_instantiate_unique(struct dentry *entry,
1416 					     struct inode *inode)
1417 {
1418 	struct dentry *alias;
1419 	int len = entry->d_name.len;
1420 	const char *name = entry->d_name.name;
1421 	unsigned int hash = entry->d_name.hash;
1422 
1423 	if (!inode) {
1424 		__d_instantiate(entry, NULL);
1425 		return NULL;
1426 	}
1427 
1428 	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
1429 		/*
1430 		 * Don't need alias->d_lock here, because aliases with
1431 		 * d_parent == entry->d_parent are not subject to name or
1432 		 * parent changes, because the parent inode i_mutex is held.
1433 		 */
1434 		if (alias->d_name.hash != hash)
1435 			continue;
1436 		if (alias->d_parent != entry->d_parent)
1437 			continue;
1438 		if (alias->d_name.len != len)
1439 			continue;
1440 		if (dentry_cmp(alias, name, len))
1441 			continue;
1442 		__dget(alias);
1443 		return alias;
1444 	}
1445 
1446 	__d_instantiate(entry, inode);
1447 	return NULL;
1448 }
1449 
1450 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1451 {
1452 	struct dentry *result;
1453 
1454 	BUG_ON(!hlist_unhashed(&entry->d_alias));
1455 
1456 	if (inode)
1457 		spin_lock(&inode->i_lock);
1458 	result = __d_instantiate_unique(entry, inode);
1459 	if (inode)
1460 		spin_unlock(&inode->i_lock);
1461 
1462 	if (!result) {
1463 		security_d_instantiate(entry, inode);
1464 		return NULL;
1465 	}
1466 
1467 	BUG_ON(!d_unhashed(result));
1468 	iput(inode);
1469 	return result;
1470 }
1471 
1472 EXPORT_SYMBOL(d_instantiate_unique);
1473 
1474 struct dentry *d_make_root(struct inode *root_inode)
1475 {
1476 	struct dentry *res = NULL;
1477 
1478 	if (root_inode) {
1479 		static const struct qstr name = QSTR_INIT("/", 1);
1480 
1481 		res = __d_alloc(root_inode->i_sb, &name);
1482 		if (res)
1483 			d_instantiate(res, root_inode);
1484 		else
1485 			iput(root_inode);
1486 	}
1487 	return res;
1488 }
1489 EXPORT_SYMBOL(d_make_root);
1490 
1491 static struct dentry * __d_find_any_alias(struct inode *inode)
1492 {
1493 	struct dentry *alias;
1494 
1495 	if (hlist_empty(&inode->i_dentry))
1496 		return NULL;
1497 	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
1498 	__dget(alias);
1499 	return alias;
1500 }
1501 
1502 /**
1503  * d_find_any_alias - find any alias for a given inode
1504  * @inode: inode to find an alias for
1505  *
1506  * If any aliases exist for the given inode, take and return a
1507  * reference for one of them.  If no aliases exist, return %NULL.
1508  */
1509 struct dentry *d_find_any_alias(struct inode *inode)
1510 {
1511 	struct dentry *de;
1512 
1513 	spin_lock(&inode->i_lock);
1514 	de = __d_find_any_alias(inode);
1515 	spin_unlock(&inode->i_lock);
1516 	return de;
1517 }
1518 EXPORT_SYMBOL(d_find_any_alias);
1519 
1520 /**
1521  * d_obtain_alias - find or allocate a dentry for a given inode
1522  * @inode: inode to allocate the dentry for
1523  *
1524  * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1525  * similar open by handle operations.  The returned dentry may be anonymous,
1526  * or may have a full name (if the inode was already in the cache).
1527  *
1528  * When called on a directory inode, we must ensure that the inode only ever
1529  * has one dentry.  If a dentry is found, that is returned instead of
1530  * allocating a new one.
1531  *
1532  * On successful return, the reference to the inode has been transferred
1533  * to the dentry.  In case of an error the reference on the inode is released.
1534  * To make it easier to use in export operations a %NULL or IS_ERR inode may
1535  * be passed in and will be the error will be propagate to the return value,
1536  * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1537  */
1538 struct dentry *d_obtain_alias(struct inode *inode)
1539 {
1540 	static const struct qstr anonstring = QSTR_INIT("/", 1);
1541 	struct dentry *tmp;
1542 	struct dentry *res;
1543 
1544 	if (!inode)
1545 		return ERR_PTR(-ESTALE);
1546 	if (IS_ERR(inode))
1547 		return ERR_CAST(inode);
1548 
1549 	res = d_find_any_alias(inode);
1550 	if (res)
1551 		goto out_iput;
1552 
1553 	tmp = __d_alloc(inode->i_sb, &anonstring);
1554 	if (!tmp) {
1555 		res = ERR_PTR(-ENOMEM);
1556 		goto out_iput;
1557 	}
1558 
1559 	spin_lock(&inode->i_lock);
1560 	res = __d_find_any_alias(inode);
1561 	if (res) {
1562 		spin_unlock(&inode->i_lock);
1563 		dput(tmp);
1564 		goto out_iput;
1565 	}
1566 
1567 	/* attach a disconnected dentry */
1568 	spin_lock(&tmp->d_lock);
1569 	tmp->d_inode = inode;
1570 	tmp->d_flags |= DCACHE_DISCONNECTED;
1571 	hlist_add_head(&tmp->d_alias, &inode->i_dentry);
1572 	hlist_bl_lock(&tmp->d_sb->s_anon);
1573 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1574 	hlist_bl_unlock(&tmp->d_sb->s_anon);
1575 	spin_unlock(&tmp->d_lock);
1576 	spin_unlock(&inode->i_lock);
1577 	security_d_instantiate(tmp, inode);
1578 
1579 	return tmp;
1580 
1581  out_iput:
1582 	if (res && !IS_ERR(res))
1583 		security_d_instantiate(res, inode);
1584 	iput(inode);
1585 	return res;
1586 }
1587 EXPORT_SYMBOL(d_obtain_alias);
1588 
1589 /**
1590  * d_splice_alias - splice a disconnected dentry into the tree if one exists
1591  * @inode:  the inode which may have a disconnected dentry
1592  * @dentry: a negative dentry which we want to point to the inode.
1593  *
1594  * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1595  * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1596  * and return it, else simply d_add the inode to the dentry and return NULL.
1597  *
1598  * This is needed in the lookup routine of any filesystem that is exportable
1599  * (via knfsd) so that we can build dcache paths to directories effectively.
1600  *
1601  * If a dentry was found and moved, then it is returned.  Otherwise NULL
1602  * is returned.  This matches the expected return value of ->lookup.
1603  *
1604  * Cluster filesystems may call this function with a negative, hashed dentry.
1605  * In that case, we know that the inode will be a regular file, and also this
1606  * will only occur during atomic_open. So we need to check for the dentry
1607  * being already hashed only in the final case.
1608  */
1609 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1610 {
1611 	struct dentry *new = NULL;
1612 
1613 	if (IS_ERR(inode))
1614 		return ERR_CAST(inode);
1615 
1616 	if (inode && S_ISDIR(inode->i_mode)) {
1617 		spin_lock(&inode->i_lock);
1618 		new = __d_find_alias(inode, 1);
1619 		if (new) {
1620 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1621 			spin_unlock(&inode->i_lock);
1622 			security_d_instantiate(new, inode);
1623 			d_move(new, dentry);
1624 			iput(inode);
1625 		} else {
1626 			/* already taking inode->i_lock, so d_add() by hand */
1627 			__d_instantiate(dentry, inode);
1628 			spin_unlock(&inode->i_lock);
1629 			security_d_instantiate(dentry, inode);
1630 			d_rehash(dentry);
1631 		}
1632 	} else {
1633 		d_instantiate(dentry, inode);
1634 		if (d_unhashed(dentry))
1635 			d_rehash(dentry);
1636 	}
1637 	return new;
1638 }
1639 EXPORT_SYMBOL(d_splice_alias);
1640 
1641 /**
1642  * d_add_ci - lookup or allocate new dentry with case-exact name
1643  * @inode:  the inode case-insensitive lookup has found
1644  * @dentry: the negative dentry that was passed to the parent's lookup func
1645  * @name:   the case-exact name to be associated with the returned dentry
1646  *
1647  * This is to avoid filling the dcache with case-insensitive names to the
1648  * same inode, only the actual correct case is stored in the dcache for
1649  * case-insensitive filesystems.
1650  *
1651  * For a case-insensitive lookup match and if the the case-exact dentry
1652  * already exists in in the dcache, use it and return it.
1653  *
1654  * If no entry exists with the exact case name, allocate new dentry with
1655  * the exact case, and return the spliced entry.
1656  */
1657 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1658 			struct qstr *name)
1659 {
1660 	struct dentry *found;
1661 	struct dentry *new;
1662 
1663 	/*
1664 	 * First check if a dentry matching the name already exists,
1665 	 * if not go ahead and create it now.
1666 	 */
1667 	found = d_hash_and_lookup(dentry->d_parent, name);
1668 	if (unlikely(IS_ERR(found)))
1669 		goto err_out;
1670 	if (!found) {
1671 		new = d_alloc(dentry->d_parent, name);
1672 		if (!new) {
1673 			found = ERR_PTR(-ENOMEM);
1674 			goto err_out;
1675 		}
1676 
1677 		found = d_splice_alias(inode, new);
1678 		if (found) {
1679 			dput(new);
1680 			return found;
1681 		}
1682 		return new;
1683 	}
1684 
1685 	/*
1686 	 * If a matching dentry exists, and it's not negative use it.
1687 	 *
1688 	 * Decrement the reference count to balance the iget() done
1689 	 * earlier on.
1690 	 */
1691 	if (found->d_inode) {
1692 		if (unlikely(found->d_inode != inode)) {
1693 			/* This can't happen because bad inodes are unhashed. */
1694 			BUG_ON(!is_bad_inode(inode));
1695 			BUG_ON(!is_bad_inode(found->d_inode));
1696 		}
1697 		iput(inode);
1698 		return found;
1699 	}
1700 
1701 	/*
1702 	 * Negative dentry: instantiate it unless the inode is a directory and
1703 	 * already has a dentry.
1704 	 */
1705 	new = d_splice_alias(inode, found);
1706 	if (new) {
1707 		dput(found);
1708 		found = new;
1709 	}
1710 	return found;
1711 
1712 err_out:
1713 	iput(inode);
1714 	return found;
1715 }
1716 EXPORT_SYMBOL(d_add_ci);
1717 
1718 /*
1719  * Do the slow-case of the dentry name compare.
1720  *
1721  * Unlike the dentry_cmp() function, we need to atomically
1722  * load the name and length information, so that the
1723  * filesystem can rely on them, and can use the 'name' and
1724  * 'len' information without worrying about walking off the
1725  * end of memory etc.
1726  *
1727  * Thus the read_seqcount_retry() and the "duplicate" info
1728  * in arguments (the low-level filesystem should not look
1729  * at the dentry inode or name contents directly, since
1730  * rename can change them while we're in RCU mode).
1731  */
1732 enum slow_d_compare {
1733 	D_COMP_OK,
1734 	D_COMP_NOMATCH,
1735 	D_COMP_SEQRETRY,
1736 };
1737 
1738 static noinline enum slow_d_compare slow_dentry_cmp(
1739 		const struct dentry *parent,
1740 		struct dentry *dentry,
1741 		unsigned int seq,
1742 		const struct qstr *name)
1743 {
1744 	int tlen = dentry->d_name.len;
1745 	const char *tname = dentry->d_name.name;
1746 
1747 	if (read_seqcount_retry(&dentry->d_seq, seq)) {
1748 		cpu_relax();
1749 		return D_COMP_SEQRETRY;
1750 	}
1751 	if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
1752 		return D_COMP_NOMATCH;
1753 	return D_COMP_OK;
1754 }
1755 
1756 /**
1757  * __d_lookup_rcu - search for a dentry (racy, store-free)
1758  * @parent: parent dentry
1759  * @name: qstr of name we wish to find
1760  * @seqp: returns d_seq value at the point where the dentry was found
1761  * Returns: dentry, or NULL
1762  *
1763  * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1764  * resolution (store-free path walking) design described in
1765  * Documentation/filesystems/path-lookup.txt.
1766  *
1767  * This is not to be used outside core vfs.
1768  *
1769  * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1770  * held, and rcu_read_lock held. The returned dentry must not be stored into
1771  * without taking d_lock and checking d_seq sequence count against @seq
1772  * returned here.
1773  *
1774  * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1775  * function.
1776  *
1777  * Alternatively, __d_lookup_rcu may be called again to look up the child of
1778  * the returned dentry, so long as its parent's seqlock is checked after the
1779  * child is looked up. Thus, an interlocking stepping of sequence lock checks
1780  * is formed, giving integrity down the path walk.
1781  *
1782  * NOTE! The caller *has* to check the resulting dentry against the sequence
1783  * number we've returned before using any of the resulting dentry state!
1784  */
1785 struct dentry *__d_lookup_rcu(const struct dentry *parent,
1786 				const struct qstr *name,
1787 				unsigned *seqp)
1788 {
1789 	u64 hashlen = name->hash_len;
1790 	const unsigned char *str = name->name;
1791 	struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
1792 	struct hlist_bl_node *node;
1793 	struct dentry *dentry;
1794 
1795 	/*
1796 	 * Note: There is significant duplication with __d_lookup_rcu which is
1797 	 * required to prevent single threaded performance regressions
1798 	 * especially on architectures where smp_rmb (in seqcounts) are costly.
1799 	 * Keep the two functions in sync.
1800 	 */
1801 
1802 	/*
1803 	 * The hash list is protected using RCU.
1804 	 *
1805 	 * Carefully use d_seq when comparing a candidate dentry, to avoid
1806 	 * races with d_move().
1807 	 *
1808 	 * It is possible that concurrent renames can mess up our list
1809 	 * walk here and result in missing our dentry, resulting in the
1810 	 * false-negative result. d_lookup() protects against concurrent
1811 	 * renames using rename_lock seqlock.
1812 	 *
1813 	 * See Documentation/filesystems/path-lookup.txt for more details.
1814 	 */
1815 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1816 		unsigned seq;
1817 
1818 seqretry:
1819 		/*
1820 		 * The dentry sequence count protects us from concurrent
1821 		 * renames, and thus protects parent and name fields.
1822 		 *
1823 		 * The caller must perform a seqcount check in order
1824 		 * to do anything useful with the returned dentry.
1825 		 *
1826 		 * NOTE! We do a "raw" seqcount_begin here. That means that
1827 		 * we don't wait for the sequence count to stabilize if it
1828 		 * is in the middle of a sequence change. If we do the slow
1829 		 * dentry compare, we will do seqretries until it is stable,
1830 		 * and if we end up with a successful lookup, we actually
1831 		 * want to exit RCU lookup anyway.
1832 		 */
1833 		seq = raw_seqcount_begin(&dentry->d_seq);
1834 		if (dentry->d_parent != parent)
1835 			continue;
1836 		if (d_unhashed(dentry))
1837 			continue;
1838 
1839 		if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1840 			if (dentry->d_name.hash != hashlen_hash(hashlen))
1841 				continue;
1842 			*seqp = seq;
1843 			switch (slow_dentry_cmp(parent, dentry, seq, name)) {
1844 			case D_COMP_OK:
1845 				return dentry;
1846 			case D_COMP_NOMATCH:
1847 				continue;
1848 			default:
1849 				goto seqretry;
1850 			}
1851 		}
1852 
1853 		if (dentry->d_name.hash_len != hashlen)
1854 			continue;
1855 		*seqp = seq;
1856 		if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
1857 			return dentry;
1858 	}
1859 	return NULL;
1860 }
1861 
1862 /**
1863  * d_lookup - search for a dentry
1864  * @parent: parent dentry
1865  * @name: qstr of name we wish to find
1866  * Returns: dentry, or NULL
1867  *
1868  * d_lookup searches the children of the parent dentry for the name in
1869  * question. If the dentry is found its reference count is incremented and the
1870  * dentry is returned. The caller must use dput to free the entry when it has
1871  * finished using it. %NULL is returned if the dentry does not exist.
1872  */
1873 struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
1874 {
1875 	struct dentry *dentry;
1876 	unsigned seq;
1877 
1878         do {
1879                 seq = read_seqbegin(&rename_lock);
1880                 dentry = __d_lookup(parent, name);
1881                 if (dentry)
1882 			break;
1883 	} while (read_seqretry(&rename_lock, seq));
1884 	return dentry;
1885 }
1886 EXPORT_SYMBOL(d_lookup);
1887 
1888 /**
1889  * __d_lookup - search for a dentry (racy)
1890  * @parent: parent dentry
1891  * @name: qstr of name we wish to find
1892  * Returns: dentry, or NULL
1893  *
1894  * __d_lookup is like d_lookup, however it may (rarely) return a
1895  * false-negative result due to unrelated rename activity.
1896  *
1897  * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1898  * however it must be used carefully, eg. with a following d_lookup in
1899  * the case of failure.
1900  *
1901  * __d_lookup callers must be commented.
1902  */
1903 struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
1904 {
1905 	unsigned int len = name->len;
1906 	unsigned int hash = name->hash;
1907 	const unsigned char *str = name->name;
1908 	struct hlist_bl_head *b = d_hash(parent, hash);
1909 	struct hlist_bl_node *node;
1910 	struct dentry *found = NULL;
1911 	struct dentry *dentry;
1912 
1913 	/*
1914 	 * Note: There is significant duplication with __d_lookup_rcu which is
1915 	 * required to prevent single threaded performance regressions
1916 	 * especially on architectures where smp_rmb (in seqcounts) are costly.
1917 	 * Keep the two functions in sync.
1918 	 */
1919 
1920 	/*
1921 	 * The hash list is protected using RCU.
1922 	 *
1923 	 * Take d_lock when comparing a candidate dentry, to avoid races
1924 	 * with d_move().
1925 	 *
1926 	 * It is possible that concurrent renames can mess up our list
1927 	 * walk here and result in missing our dentry, resulting in the
1928 	 * false-negative result. d_lookup() protects against concurrent
1929 	 * renames using rename_lock seqlock.
1930 	 *
1931 	 * See Documentation/filesystems/path-lookup.txt for more details.
1932 	 */
1933 	rcu_read_lock();
1934 
1935 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1936 
1937 		if (dentry->d_name.hash != hash)
1938 			continue;
1939 
1940 		spin_lock(&dentry->d_lock);
1941 		if (dentry->d_parent != parent)
1942 			goto next;
1943 		if (d_unhashed(dentry))
1944 			goto next;
1945 
1946 		/*
1947 		 * It is safe to compare names since d_move() cannot
1948 		 * change the qstr (protected by d_lock).
1949 		 */
1950 		if (parent->d_flags & DCACHE_OP_COMPARE) {
1951 			int tlen = dentry->d_name.len;
1952 			const char *tname = dentry->d_name.name;
1953 			if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
1954 				goto next;
1955 		} else {
1956 			if (dentry->d_name.len != len)
1957 				goto next;
1958 			if (dentry_cmp(dentry, str, len))
1959 				goto next;
1960 		}
1961 
1962 		dentry->d_lockref.count++;
1963 		found = dentry;
1964 		spin_unlock(&dentry->d_lock);
1965 		break;
1966 next:
1967 		spin_unlock(&dentry->d_lock);
1968  	}
1969  	rcu_read_unlock();
1970 
1971  	return found;
1972 }
1973 
1974 /**
1975  * d_hash_and_lookup - hash the qstr then search for a dentry
1976  * @dir: Directory to search in
1977  * @name: qstr of name we wish to find
1978  *
1979  * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
1980  */
1981 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1982 {
1983 	/*
1984 	 * Check for a fs-specific hash function. Note that we must
1985 	 * calculate the standard hash first, as the d_op->d_hash()
1986 	 * routine may choose to leave the hash value unchanged.
1987 	 */
1988 	name->hash = full_name_hash(name->name, name->len);
1989 	if (dir->d_flags & DCACHE_OP_HASH) {
1990 		int err = dir->d_op->d_hash(dir, name);
1991 		if (unlikely(err < 0))
1992 			return ERR_PTR(err);
1993 	}
1994 	return d_lookup(dir, name);
1995 }
1996 EXPORT_SYMBOL(d_hash_and_lookup);
1997 
1998 /**
1999  * d_validate - verify dentry provided from insecure source (deprecated)
2000  * @dentry: The dentry alleged to be valid child of @dparent
2001  * @dparent: The parent dentry (known to be valid)
2002  *
2003  * An insecure source has sent us a dentry, here we verify it and dget() it.
2004  * This is used by ncpfs in its readdir implementation.
2005  * Zero is returned in the dentry is invalid.
2006  *
2007  * This function is slow for big directories, and deprecated, do not use it.
2008  */
2009 int d_validate(struct dentry *dentry, struct dentry *dparent)
2010 {
2011 	struct dentry *child;
2012 
2013 	spin_lock(&dparent->d_lock);
2014 	list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
2015 		if (dentry == child) {
2016 			spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2017 			__dget_dlock(dentry);
2018 			spin_unlock(&dentry->d_lock);
2019 			spin_unlock(&dparent->d_lock);
2020 			return 1;
2021 		}
2022 	}
2023 	spin_unlock(&dparent->d_lock);
2024 
2025 	return 0;
2026 }
2027 EXPORT_SYMBOL(d_validate);
2028 
2029 /*
2030  * When a file is deleted, we have two options:
2031  * - turn this dentry into a negative dentry
2032  * - unhash this dentry and free it.
2033  *
2034  * Usually, we want to just turn this into
2035  * a negative dentry, but if anybody else is
2036  * currently using the dentry or the inode
2037  * we can't do that and we fall back on removing
2038  * it from the hash queues and waiting for
2039  * it to be deleted later when it has no users
2040  */
2041 
2042 /**
2043  * d_delete - delete a dentry
2044  * @dentry: The dentry to delete
2045  *
2046  * Turn the dentry into a negative dentry if possible, otherwise
2047  * remove it from the hash queues so it can be deleted later
2048  */
2049 
2050 void d_delete(struct dentry * dentry)
2051 {
2052 	struct inode *inode;
2053 	int isdir = 0;
2054 	/*
2055 	 * Are we the only user?
2056 	 */
2057 again:
2058 	spin_lock(&dentry->d_lock);
2059 	inode = dentry->d_inode;
2060 	isdir = S_ISDIR(inode->i_mode);
2061 	if (dentry->d_lockref.count == 1) {
2062 		if (!spin_trylock(&inode->i_lock)) {
2063 			spin_unlock(&dentry->d_lock);
2064 			cpu_relax();
2065 			goto again;
2066 		}
2067 		dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2068 		dentry_unlink_inode(dentry);
2069 		fsnotify_nameremove(dentry, isdir);
2070 		return;
2071 	}
2072 
2073 	if (!d_unhashed(dentry))
2074 		__d_drop(dentry);
2075 
2076 	spin_unlock(&dentry->d_lock);
2077 
2078 	fsnotify_nameremove(dentry, isdir);
2079 }
2080 EXPORT_SYMBOL(d_delete);
2081 
2082 static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2083 {
2084 	BUG_ON(!d_unhashed(entry));
2085 	hlist_bl_lock(b);
2086 	entry->d_flags |= DCACHE_RCUACCESS;
2087 	hlist_bl_add_head_rcu(&entry->d_hash, b);
2088 	hlist_bl_unlock(b);
2089 }
2090 
2091 static void _d_rehash(struct dentry * entry)
2092 {
2093 	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2094 }
2095 
2096 /**
2097  * d_rehash	- add an entry back to the hash
2098  * @entry: dentry to add to the hash
2099  *
2100  * Adds a dentry to the hash according to its name.
2101  */
2102 
2103 void d_rehash(struct dentry * entry)
2104 {
2105 	spin_lock(&entry->d_lock);
2106 	_d_rehash(entry);
2107 	spin_unlock(&entry->d_lock);
2108 }
2109 EXPORT_SYMBOL(d_rehash);
2110 
2111 /**
2112  * dentry_update_name_case - update case insensitive dentry with a new name
2113  * @dentry: dentry to be updated
2114  * @name: new name
2115  *
2116  * Update a case insensitive dentry with new case of name.
2117  *
2118  * dentry must have been returned by d_lookup with name @name. Old and new
2119  * name lengths must match (ie. no d_compare which allows mismatched name
2120  * lengths).
2121  *
2122  * Parent inode i_mutex must be held over d_lookup and into this call (to
2123  * keep renames and concurrent inserts, and readdir(2) away).
2124  */
2125 void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2126 {
2127 	BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2128 	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2129 
2130 	spin_lock(&dentry->d_lock);
2131 	write_seqcount_begin(&dentry->d_seq);
2132 	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2133 	write_seqcount_end(&dentry->d_seq);
2134 	spin_unlock(&dentry->d_lock);
2135 }
2136 EXPORT_SYMBOL(dentry_update_name_case);
2137 
2138 static void switch_names(struct dentry *dentry, struct dentry *target)
2139 {
2140 	if (dname_external(target)) {
2141 		if (dname_external(dentry)) {
2142 			/*
2143 			 * Both external: swap the pointers
2144 			 */
2145 			swap(target->d_name.name, dentry->d_name.name);
2146 		} else {
2147 			/*
2148 			 * dentry:internal, target:external.  Steal target's
2149 			 * storage and make target internal.
2150 			 */
2151 			memcpy(target->d_iname, dentry->d_name.name,
2152 					dentry->d_name.len + 1);
2153 			dentry->d_name.name = target->d_name.name;
2154 			target->d_name.name = target->d_iname;
2155 		}
2156 	} else {
2157 		if (dname_external(dentry)) {
2158 			/*
2159 			 * dentry:external, target:internal.  Give dentry's
2160 			 * storage to target and make dentry internal
2161 			 */
2162 			memcpy(dentry->d_iname, target->d_name.name,
2163 					target->d_name.len + 1);
2164 			target->d_name.name = dentry->d_name.name;
2165 			dentry->d_name.name = dentry->d_iname;
2166 		} else {
2167 			/*
2168 			 * Both are internal.  Just copy target to dentry
2169 			 */
2170 			memcpy(dentry->d_iname, target->d_name.name,
2171 					target->d_name.len + 1);
2172 			dentry->d_name.len = target->d_name.len;
2173 			return;
2174 		}
2175 	}
2176 	swap(dentry->d_name.len, target->d_name.len);
2177 }
2178 
2179 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2180 {
2181 	/*
2182 	 * XXXX: do we really need to take target->d_lock?
2183 	 */
2184 	if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2185 		spin_lock(&target->d_parent->d_lock);
2186 	else {
2187 		if (d_ancestor(dentry->d_parent, target->d_parent)) {
2188 			spin_lock(&dentry->d_parent->d_lock);
2189 			spin_lock_nested(&target->d_parent->d_lock,
2190 						DENTRY_D_LOCK_NESTED);
2191 		} else {
2192 			spin_lock(&target->d_parent->d_lock);
2193 			spin_lock_nested(&dentry->d_parent->d_lock,
2194 						DENTRY_D_LOCK_NESTED);
2195 		}
2196 	}
2197 	if (target < dentry) {
2198 		spin_lock_nested(&target->d_lock, 2);
2199 		spin_lock_nested(&dentry->d_lock, 3);
2200 	} else {
2201 		spin_lock_nested(&dentry->d_lock, 2);
2202 		spin_lock_nested(&target->d_lock, 3);
2203 	}
2204 }
2205 
2206 static void dentry_unlock_parents_for_move(struct dentry *dentry,
2207 					struct dentry *target)
2208 {
2209 	if (target->d_parent != dentry->d_parent)
2210 		spin_unlock(&dentry->d_parent->d_lock);
2211 	if (target->d_parent != target)
2212 		spin_unlock(&target->d_parent->d_lock);
2213 }
2214 
2215 /*
2216  * When switching names, the actual string doesn't strictly have to
2217  * be preserved in the target - because we're dropping the target
2218  * anyway. As such, we can just do a simple memcpy() to copy over
2219  * the new name before we switch.
2220  *
2221  * Note that we have to be a lot more careful about getting the hash
2222  * switched - we have to switch the hash value properly even if it
2223  * then no longer matches the actual (corrupted) string of the target.
2224  * The hash value has to match the hash queue that the dentry is on..
2225  */
2226 /*
2227  * __d_move - move a dentry
2228  * @dentry: entry to move
2229  * @target: new dentry
2230  *
2231  * Update the dcache to reflect the move of a file name. Negative
2232  * dcache entries should not be moved in this way. Caller must hold
2233  * rename_lock, the i_mutex of the source and target directories,
2234  * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2235  */
2236 static void __d_move(struct dentry * dentry, struct dentry * target)
2237 {
2238 	if (!dentry->d_inode)
2239 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2240 
2241 	BUG_ON(d_ancestor(dentry, target));
2242 	BUG_ON(d_ancestor(target, dentry));
2243 
2244 	dentry_lock_for_move(dentry, target);
2245 
2246 	write_seqcount_begin(&dentry->d_seq);
2247 	write_seqcount_begin(&target->d_seq);
2248 
2249 	/* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2250 
2251 	/*
2252 	 * Move the dentry to the target hash queue. Don't bother checking
2253 	 * for the same hash queue because of how unlikely it is.
2254 	 */
2255 	__d_drop(dentry);
2256 	__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2257 
2258 	/* Unhash the target: dput() will then get rid of it */
2259 	__d_drop(target);
2260 
2261 	list_del(&dentry->d_u.d_child);
2262 	list_del(&target->d_u.d_child);
2263 
2264 	/* Switch the names.. */
2265 	switch_names(dentry, target);
2266 	swap(dentry->d_name.hash, target->d_name.hash);
2267 
2268 	/* ... and switch the parents */
2269 	if (IS_ROOT(dentry)) {
2270 		dentry->d_parent = target->d_parent;
2271 		target->d_parent = target;
2272 		INIT_LIST_HEAD(&target->d_u.d_child);
2273 	} else {
2274 		swap(dentry->d_parent, target->d_parent);
2275 
2276 		/* And add them back to the (new) parent lists */
2277 		list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2278 	}
2279 
2280 	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2281 
2282 	write_seqcount_end(&target->d_seq);
2283 	write_seqcount_end(&dentry->d_seq);
2284 
2285 	dentry_unlock_parents_for_move(dentry, target);
2286 	spin_unlock(&target->d_lock);
2287 	fsnotify_d_move(dentry);
2288 	spin_unlock(&dentry->d_lock);
2289 }
2290 
2291 /*
2292  * d_move - move a dentry
2293  * @dentry: entry to move
2294  * @target: new dentry
2295  *
2296  * Update the dcache to reflect the move of a file name. Negative
2297  * dcache entries should not be moved in this way. See the locking
2298  * requirements for __d_move.
2299  */
2300 void d_move(struct dentry *dentry, struct dentry *target)
2301 {
2302 	write_seqlock(&rename_lock);
2303 	__d_move(dentry, target);
2304 	write_sequnlock(&rename_lock);
2305 }
2306 EXPORT_SYMBOL(d_move);
2307 
2308 /**
2309  * d_ancestor - search for an ancestor
2310  * @p1: ancestor dentry
2311  * @p2: child dentry
2312  *
2313  * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2314  * an ancestor of p2, else NULL.
2315  */
2316 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2317 {
2318 	struct dentry *p;
2319 
2320 	for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2321 		if (p->d_parent == p1)
2322 			return p;
2323 	}
2324 	return NULL;
2325 }
2326 
2327 /*
2328  * This helper attempts to cope with remotely renamed directories
2329  *
2330  * It assumes that the caller is already holding
2331  * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2332  *
2333  * Note: If ever the locking in lock_rename() changes, then please
2334  * remember to update this too...
2335  */
2336 static struct dentry *__d_unalias(struct inode *inode,
2337 		struct dentry *dentry, struct dentry *alias)
2338 {
2339 	struct mutex *m1 = NULL, *m2 = NULL;
2340 	struct dentry *ret = ERR_PTR(-EBUSY);
2341 
2342 	/* If alias and dentry share a parent, then no extra locks required */
2343 	if (alias->d_parent == dentry->d_parent)
2344 		goto out_unalias;
2345 
2346 	/* See lock_rename() */
2347 	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2348 		goto out_err;
2349 	m1 = &dentry->d_sb->s_vfs_rename_mutex;
2350 	if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2351 		goto out_err;
2352 	m2 = &alias->d_parent->d_inode->i_mutex;
2353 out_unalias:
2354 	if (likely(!d_mountpoint(alias))) {
2355 		__d_move(alias, dentry);
2356 		ret = alias;
2357 	}
2358 out_err:
2359 	spin_unlock(&inode->i_lock);
2360 	if (m2)
2361 		mutex_unlock(m2);
2362 	if (m1)
2363 		mutex_unlock(m1);
2364 	return ret;
2365 }
2366 
2367 /*
2368  * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2369  * named dentry in place of the dentry to be replaced.
2370  * returns with anon->d_lock held!
2371  */
2372 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2373 {
2374 	struct dentry *dparent;
2375 
2376 	dentry_lock_for_move(anon, dentry);
2377 
2378 	write_seqcount_begin(&dentry->d_seq);
2379 	write_seqcount_begin(&anon->d_seq);
2380 
2381 	dparent = dentry->d_parent;
2382 
2383 	switch_names(dentry, anon);
2384 	swap(dentry->d_name.hash, anon->d_name.hash);
2385 
2386 	dentry->d_parent = dentry;
2387 	list_del_init(&dentry->d_u.d_child);
2388 	anon->d_parent = dparent;
2389 	list_move(&anon->d_u.d_child, &dparent->d_subdirs);
2390 
2391 	write_seqcount_end(&dentry->d_seq);
2392 	write_seqcount_end(&anon->d_seq);
2393 
2394 	dentry_unlock_parents_for_move(anon, dentry);
2395 	spin_unlock(&dentry->d_lock);
2396 
2397 	/* anon->d_lock still locked, returns locked */
2398 	anon->d_flags &= ~DCACHE_DISCONNECTED;
2399 }
2400 
2401 /**
2402  * d_materialise_unique - introduce an inode into the tree
2403  * @dentry: candidate dentry
2404  * @inode: inode to bind to the dentry, to which aliases may be attached
2405  *
2406  * Introduces an dentry into the tree, substituting an extant disconnected
2407  * root directory alias in its place if there is one. Caller must hold the
2408  * i_mutex of the parent directory.
2409  */
2410 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2411 {
2412 	struct dentry *actual;
2413 
2414 	BUG_ON(!d_unhashed(dentry));
2415 
2416 	if (!inode) {
2417 		actual = dentry;
2418 		__d_instantiate(dentry, NULL);
2419 		d_rehash(actual);
2420 		goto out_nolock;
2421 	}
2422 
2423 	spin_lock(&inode->i_lock);
2424 
2425 	if (S_ISDIR(inode->i_mode)) {
2426 		struct dentry *alias;
2427 
2428 		/* Does an aliased dentry already exist? */
2429 		alias = __d_find_alias(inode, 0);
2430 		if (alias) {
2431 			actual = alias;
2432 			write_seqlock(&rename_lock);
2433 
2434 			if (d_ancestor(alias, dentry)) {
2435 				/* Check for loops */
2436 				actual = ERR_PTR(-ELOOP);
2437 				spin_unlock(&inode->i_lock);
2438 			} else if (IS_ROOT(alias)) {
2439 				/* Is this an anonymous mountpoint that we
2440 				 * could splice into our tree? */
2441 				__d_materialise_dentry(dentry, alias);
2442 				write_sequnlock(&rename_lock);
2443 				__d_drop(alias);
2444 				goto found;
2445 			} else {
2446 				/* Nope, but we must(!) avoid directory
2447 				 * aliasing. This drops inode->i_lock */
2448 				actual = __d_unalias(inode, dentry, alias);
2449 			}
2450 			write_sequnlock(&rename_lock);
2451 			if (IS_ERR(actual)) {
2452 				if (PTR_ERR(actual) == -ELOOP)
2453 					pr_warn_ratelimited(
2454 						"VFS: Lookup of '%s' in %s %s"
2455 						" would have caused loop\n",
2456 						dentry->d_name.name,
2457 						inode->i_sb->s_type->name,
2458 						inode->i_sb->s_id);
2459 				dput(alias);
2460 			}
2461 			goto out_nolock;
2462 		}
2463 	}
2464 
2465 	/* Add a unique reference */
2466 	actual = __d_instantiate_unique(dentry, inode);
2467 	if (!actual)
2468 		actual = dentry;
2469 	else
2470 		BUG_ON(!d_unhashed(actual));
2471 
2472 	spin_lock(&actual->d_lock);
2473 found:
2474 	_d_rehash(actual);
2475 	spin_unlock(&actual->d_lock);
2476 	spin_unlock(&inode->i_lock);
2477 out_nolock:
2478 	if (actual == dentry) {
2479 		security_d_instantiate(dentry, inode);
2480 		return NULL;
2481 	}
2482 
2483 	iput(inode);
2484 	return actual;
2485 }
2486 EXPORT_SYMBOL_GPL(d_materialise_unique);
2487 
2488 static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2489 {
2490 	*buflen -= namelen;
2491 	if (*buflen < 0)
2492 		return -ENAMETOOLONG;
2493 	*buffer -= namelen;
2494 	memcpy(*buffer, str, namelen);
2495 	return 0;
2496 }
2497 
2498 static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2499 {
2500 	return prepend(buffer, buflen, name->name, name->len);
2501 }
2502 
2503 /**
2504  * prepend_path - Prepend path string to a buffer
2505  * @path: the dentry/vfsmount to report
2506  * @root: root vfsmnt/dentry
2507  * @buffer: pointer to the end of the buffer
2508  * @buflen: pointer to buffer length
2509  *
2510  * Caller holds the rename_lock.
2511  */
2512 static int prepend_path(const struct path *path,
2513 			const struct path *root,
2514 			char **buffer, int *buflen)
2515 {
2516 	struct dentry *dentry = path->dentry;
2517 	struct vfsmount *vfsmnt = path->mnt;
2518 	struct mount *mnt = real_mount(vfsmnt);
2519 	bool slash = false;
2520 	int error = 0;
2521 
2522 	while (dentry != root->dentry || vfsmnt != root->mnt) {
2523 		struct dentry * parent;
2524 
2525 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2526 			/* Global root? */
2527 			if (!mnt_has_parent(mnt))
2528 				goto global_root;
2529 			dentry = mnt->mnt_mountpoint;
2530 			mnt = mnt->mnt_parent;
2531 			vfsmnt = &mnt->mnt;
2532 			continue;
2533 		}
2534 		parent = dentry->d_parent;
2535 		prefetch(parent);
2536 		spin_lock(&dentry->d_lock);
2537 		error = prepend_name(buffer, buflen, &dentry->d_name);
2538 		spin_unlock(&dentry->d_lock);
2539 		if (!error)
2540 			error = prepend(buffer, buflen, "/", 1);
2541 		if (error)
2542 			break;
2543 
2544 		slash = true;
2545 		dentry = parent;
2546 	}
2547 
2548 	if (!error && !slash)
2549 		error = prepend(buffer, buflen, "/", 1);
2550 
2551 	return error;
2552 
2553 global_root:
2554 	/*
2555 	 * Filesystems needing to implement special "root names"
2556 	 * should do so with ->d_dname()
2557 	 */
2558 	if (IS_ROOT(dentry) &&
2559 	    (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2560 		WARN(1, "Root dentry has weird name <%.*s>\n",
2561 		     (int) dentry->d_name.len, dentry->d_name.name);
2562 	}
2563 	if (!slash)
2564 		error = prepend(buffer, buflen, "/", 1);
2565 	if (!error)
2566 		error = is_mounted(vfsmnt) ? 1 : 2;
2567 	return error;
2568 }
2569 
2570 /**
2571  * __d_path - return the path of a dentry
2572  * @path: the dentry/vfsmount to report
2573  * @root: root vfsmnt/dentry
2574  * @buf: buffer to return value in
2575  * @buflen: buffer length
2576  *
2577  * Convert a dentry into an ASCII path name.
2578  *
2579  * Returns a pointer into the buffer or an error code if the
2580  * path was too long.
2581  *
2582  * "buflen" should be positive.
2583  *
2584  * If the path is not reachable from the supplied root, return %NULL.
2585  */
2586 char *__d_path(const struct path *path,
2587 	       const struct path *root,
2588 	       char *buf, int buflen)
2589 {
2590 	char *res = buf + buflen;
2591 	int error;
2592 
2593 	prepend(&res, &buflen, "\0", 1);
2594 	br_read_lock(&vfsmount_lock);
2595 	write_seqlock(&rename_lock);
2596 	error = prepend_path(path, root, &res, &buflen);
2597 	write_sequnlock(&rename_lock);
2598 	br_read_unlock(&vfsmount_lock);
2599 
2600 	if (error < 0)
2601 		return ERR_PTR(error);
2602 	if (error > 0)
2603 		return NULL;
2604 	return res;
2605 }
2606 
2607 char *d_absolute_path(const struct path *path,
2608 	       char *buf, int buflen)
2609 {
2610 	struct path root = {};
2611 	char *res = buf + buflen;
2612 	int error;
2613 
2614 	prepend(&res, &buflen, "\0", 1);
2615 	br_read_lock(&vfsmount_lock);
2616 	write_seqlock(&rename_lock);
2617 	error = prepend_path(path, &root, &res, &buflen);
2618 	write_sequnlock(&rename_lock);
2619 	br_read_unlock(&vfsmount_lock);
2620 
2621 	if (error > 1)
2622 		error = -EINVAL;
2623 	if (error < 0)
2624 		return ERR_PTR(error);
2625 	return res;
2626 }
2627 
2628 /*
2629  * same as __d_path but appends "(deleted)" for unlinked files.
2630  */
2631 static int path_with_deleted(const struct path *path,
2632 			     const struct path *root,
2633 			     char **buf, int *buflen)
2634 {
2635 	prepend(buf, buflen, "\0", 1);
2636 	if (d_unlinked(path->dentry)) {
2637 		int error = prepend(buf, buflen, " (deleted)", 10);
2638 		if (error)
2639 			return error;
2640 	}
2641 
2642 	return prepend_path(path, root, buf, buflen);
2643 }
2644 
2645 static int prepend_unreachable(char **buffer, int *buflen)
2646 {
2647 	return prepend(buffer, buflen, "(unreachable)", 13);
2648 }
2649 
2650 /**
2651  * d_path - return the path of a dentry
2652  * @path: path to report
2653  * @buf: buffer to return value in
2654  * @buflen: buffer length
2655  *
2656  * Convert a dentry into an ASCII path name. If the entry has been deleted
2657  * the string " (deleted)" is appended. Note that this is ambiguous.
2658  *
2659  * Returns a pointer into the buffer or an error code if the path was
2660  * too long. Note: Callers should use the returned pointer, not the passed
2661  * in buffer, to use the name! The implementation often starts at an offset
2662  * into the buffer, and may leave 0 bytes at the start.
2663  *
2664  * "buflen" should be positive.
2665  */
2666 char *d_path(const struct path *path, char *buf, int buflen)
2667 {
2668 	char *res = buf + buflen;
2669 	struct path root;
2670 	int error;
2671 
2672 	/*
2673 	 * We have various synthetic filesystems that never get mounted.  On
2674 	 * these filesystems dentries are never used for lookup purposes, and
2675 	 * thus don't need to be hashed.  They also don't need a name until a
2676 	 * user wants to identify the object in /proc/pid/fd/.  The little hack
2677 	 * below allows us to generate a name for these objects on demand:
2678 	 */
2679 	if (path->dentry->d_op && path->dentry->d_op->d_dname)
2680 		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2681 
2682 	get_fs_root(current->fs, &root);
2683 	br_read_lock(&vfsmount_lock);
2684 	write_seqlock(&rename_lock);
2685 	error = path_with_deleted(path, &root, &res, &buflen);
2686 	write_sequnlock(&rename_lock);
2687 	br_read_unlock(&vfsmount_lock);
2688 	if (error < 0)
2689 		res = ERR_PTR(error);
2690 	path_put(&root);
2691 	return res;
2692 }
2693 EXPORT_SYMBOL(d_path);
2694 
2695 /*
2696  * Helper function for dentry_operations.d_dname() members
2697  */
2698 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2699 			const char *fmt, ...)
2700 {
2701 	va_list args;
2702 	char temp[64];
2703 	int sz;
2704 
2705 	va_start(args, fmt);
2706 	sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2707 	va_end(args);
2708 
2709 	if (sz > sizeof(temp) || sz > buflen)
2710 		return ERR_PTR(-ENAMETOOLONG);
2711 
2712 	buffer += buflen - sz;
2713 	return memcpy(buffer, temp, sz);
2714 }
2715 
2716 char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
2717 {
2718 	char *end = buffer + buflen;
2719 	/* these dentries are never renamed, so d_lock is not needed */
2720 	if (prepend(&end, &buflen, " (deleted)", 11) ||
2721 	    prepend_name(&end, &buflen, &dentry->d_name) ||
2722 	    prepend(&end, &buflen, "/", 1))
2723 		end = ERR_PTR(-ENAMETOOLONG);
2724 	return end;
2725 }
2726 
2727 /*
2728  * Write full pathname from the root of the filesystem into the buffer.
2729  */
2730 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2731 {
2732 	char *end = buf + buflen;
2733 	char *retval;
2734 
2735 	prepend(&end, &buflen, "\0", 1);
2736 	if (buflen < 1)
2737 		goto Elong;
2738 	/* Get '/' right */
2739 	retval = end-1;
2740 	*retval = '/';
2741 
2742 	while (!IS_ROOT(dentry)) {
2743 		struct dentry *parent = dentry->d_parent;
2744 		int error;
2745 
2746 		prefetch(parent);
2747 		spin_lock(&dentry->d_lock);
2748 		error = prepend_name(&end, &buflen, &dentry->d_name);
2749 		spin_unlock(&dentry->d_lock);
2750 		if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2751 			goto Elong;
2752 
2753 		retval = end;
2754 		dentry = parent;
2755 	}
2756 	return retval;
2757 Elong:
2758 	return ERR_PTR(-ENAMETOOLONG);
2759 }
2760 
2761 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2762 {
2763 	char *retval;
2764 
2765 	write_seqlock(&rename_lock);
2766 	retval = __dentry_path(dentry, buf, buflen);
2767 	write_sequnlock(&rename_lock);
2768 
2769 	return retval;
2770 }
2771 EXPORT_SYMBOL(dentry_path_raw);
2772 
2773 char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2774 {
2775 	char *p = NULL;
2776 	char *retval;
2777 
2778 	write_seqlock(&rename_lock);
2779 	if (d_unlinked(dentry)) {
2780 		p = buf + buflen;
2781 		if (prepend(&p, &buflen, "//deleted", 10) != 0)
2782 			goto Elong;
2783 		buflen++;
2784 	}
2785 	retval = __dentry_path(dentry, buf, buflen);
2786 	write_sequnlock(&rename_lock);
2787 	if (!IS_ERR(retval) && p)
2788 		*p = '/';	/* restore '/' overriden with '\0' */
2789 	return retval;
2790 Elong:
2791 	return ERR_PTR(-ENAMETOOLONG);
2792 }
2793 
2794 /*
2795  * NOTE! The user-level library version returns a
2796  * character pointer. The kernel system call just
2797  * returns the length of the buffer filled (which
2798  * includes the ending '\0' character), or a negative
2799  * error value. So libc would do something like
2800  *
2801  *	char *getcwd(char * buf, size_t size)
2802  *	{
2803  *		int retval;
2804  *
2805  *		retval = sys_getcwd(buf, size);
2806  *		if (retval >= 0)
2807  *			return buf;
2808  *		errno = -retval;
2809  *		return NULL;
2810  *	}
2811  */
2812 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2813 {
2814 	int error;
2815 	struct path pwd, root;
2816 	char *page = (char *) __get_free_page(GFP_USER);
2817 
2818 	if (!page)
2819 		return -ENOMEM;
2820 
2821 	get_fs_root_and_pwd(current->fs, &root, &pwd);
2822 
2823 	error = -ENOENT;
2824 	br_read_lock(&vfsmount_lock);
2825 	write_seqlock(&rename_lock);
2826 	if (!d_unlinked(pwd.dentry)) {
2827 		unsigned long len;
2828 		char *cwd = page + PAGE_SIZE;
2829 		int buflen = PAGE_SIZE;
2830 
2831 		prepend(&cwd, &buflen, "\0", 1);
2832 		error = prepend_path(&pwd, &root, &cwd, &buflen);
2833 		write_sequnlock(&rename_lock);
2834 		br_read_unlock(&vfsmount_lock);
2835 
2836 		if (error < 0)
2837 			goto out;
2838 
2839 		/* Unreachable from current root */
2840 		if (error > 0) {
2841 			error = prepend_unreachable(&cwd, &buflen);
2842 			if (error)
2843 				goto out;
2844 		}
2845 
2846 		error = -ERANGE;
2847 		len = PAGE_SIZE + page - cwd;
2848 		if (len <= size) {
2849 			error = len;
2850 			if (copy_to_user(buf, cwd, len))
2851 				error = -EFAULT;
2852 		}
2853 	} else {
2854 		write_sequnlock(&rename_lock);
2855 		br_read_unlock(&vfsmount_lock);
2856 	}
2857 
2858 out:
2859 	path_put(&pwd);
2860 	path_put(&root);
2861 	free_page((unsigned long) page);
2862 	return error;
2863 }
2864 
2865 /*
2866  * Test whether new_dentry is a subdirectory of old_dentry.
2867  *
2868  * Trivially implemented using the dcache structure
2869  */
2870 
2871 /**
2872  * is_subdir - is new dentry a subdirectory of old_dentry
2873  * @new_dentry: new dentry
2874  * @old_dentry: old dentry
2875  *
2876  * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2877  * Returns 0 otherwise.
2878  * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2879  */
2880 
2881 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2882 {
2883 	int result;
2884 	unsigned seq;
2885 
2886 	if (new_dentry == old_dentry)
2887 		return 1;
2888 
2889 	do {
2890 		/* for restarting inner loop in case of seq retry */
2891 		seq = read_seqbegin(&rename_lock);
2892 		/*
2893 		 * Need rcu_readlock to protect against the d_parent trashing
2894 		 * due to d_move
2895 		 */
2896 		rcu_read_lock();
2897 		if (d_ancestor(old_dentry, new_dentry))
2898 			result = 1;
2899 		else
2900 			result = 0;
2901 		rcu_read_unlock();
2902 	} while (read_seqretry(&rename_lock, seq));
2903 
2904 	return result;
2905 }
2906 
2907 void d_genocide(struct dentry *root)
2908 {
2909 	struct dentry *this_parent;
2910 	struct list_head *next;
2911 	unsigned seq;
2912 	int locked = 0;
2913 
2914 	seq = read_seqbegin(&rename_lock);
2915 again:
2916 	this_parent = root;
2917 	spin_lock(&this_parent->d_lock);
2918 repeat:
2919 	next = this_parent->d_subdirs.next;
2920 resume:
2921 	while (next != &this_parent->d_subdirs) {
2922 		struct list_head *tmp = next;
2923 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2924 		next = tmp->next;
2925 
2926 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2927 		if (d_unhashed(dentry) || !dentry->d_inode) {
2928 			spin_unlock(&dentry->d_lock);
2929 			continue;
2930 		}
2931 		if (!list_empty(&dentry->d_subdirs)) {
2932 			spin_unlock(&this_parent->d_lock);
2933 			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2934 			this_parent = dentry;
2935 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2936 			goto repeat;
2937 		}
2938 		if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2939 			dentry->d_flags |= DCACHE_GENOCIDE;
2940 			dentry->d_lockref.count--;
2941 		}
2942 		spin_unlock(&dentry->d_lock);
2943 	}
2944 	if (this_parent != root) {
2945 		struct dentry *child = this_parent;
2946 		if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2947 			this_parent->d_flags |= DCACHE_GENOCIDE;
2948 			this_parent->d_lockref.count--;
2949 		}
2950 		this_parent = try_to_ascend(this_parent, locked, seq);
2951 		if (!this_parent)
2952 			goto rename_retry;
2953 		next = child->d_u.d_child.next;
2954 		goto resume;
2955 	}
2956 	spin_unlock(&this_parent->d_lock);
2957 	if (!locked && read_seqretry(&rename_lock, seq))
2958 		goto rename_retry;
2959 	if (locked)
2960 		write_sequnlock(&rename_lock);
2961 	return;
2962 
2963 rename_retry:
2964 	if (locked)
2965 		goto again;
2966 	locked = 1;
2967 	write_seqlock(&rename_lock);
2968 	goto again;
2969 }
2970 
2971 void d_tmpfile(struct dentry *dentry, struct inode *inode)
2972 {
2973 	inode_dec_link_count(inode);
2974 	BUG_ON(dentry->d_name.name != dentry->d_iname ||
2975 		!hlist_unhashed(&dentry->d_alias) ||
2976 		!d_unlinked(dentry));
2977 	spin_lock(&dentry->d_parent->d_lock);
2978 	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2979 	dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
2980 				(unsigned long long)inode->i_ino);
2981 	spin_unlock(&dentry->d_lock);
2982 	spin_unlock(&dentry->d_parent->d_lock);
2983 	d_instantiate(dentry, inode);
2984 }
2985 EXPORT_SYMBOL(d_tmpfile);
2986 
2987 static __initdata unsigned long dhash_entries;
2988 static int __init set_dhash_entries(char *str)
2989 {
2990 	if (!str)
2991 		return 0;
2992 	dhash_entries = simple_strtoul(str, &str, 0);
2993 	return 1;
2994 }
2995 __setup("dhash_entries=", set_dhash_entries);
2996 
2997 static void __init dcache_init_early(void)
2998 {
2999 	unsigned int loop;
3000 
3001 	/* If hashes are distributed across NUMA nodes, defer
3002 	 * hash allocation until vmalloc space is available.
3003 	 */
3004 	if (hashdist)
3005 		return;
3006 
3007 	dentry_hashtable =
3008 		alloc_large_system_hash("Dentry cache",
3009 					sizeof(struct hlist_bl_head),
3010 					dhash_entries,
3011 					13,
3012 					HASH_EARLY,
3013 					&d_hash_shift,
3014 					&d_hash_mask,
3015 					0,
3016 					0);
3017 
3018 	for (loop = 0; loop < (1U << d_hash_shift); loop++)
3019 		INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3020 }
3021 
3022 static void __init dcache_init(void)
3023 {
3024 	unsigned int loop;
3025 
3026 	/*
3027 	 * A constructor could be added for stable state like the lists,
3028 	 * but it is probably not worth it because of the cache nature
3029 	 * of the dcache.
3030 	 */
3031 	dentry_cache = KMEM_CACHE(dentry,
3032 		SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3033 
3034 	/* Hash may have been set up in dcache_init_early */
3035 	if (!hashdist)
3036 		return;
3037 
3038 	dentry_hashtable =
3039 		alloc_large_system_hash("Dentry cache",
3040 					sizeof(struct hlist_bl_head),
3041 					dhash_entries,
3042 					13,
3043 					0,
3044 					&d_hash_shift,
3045 					&d_hash_mask,
3046 					0,
3047 					0);
3048 
3049 	for (loop = 0; loop < (1U << d_hash_shift); loop++)
3050 		INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3051 }
3052 
3053 /* SLAB cache for __getname() consumers */
3054 struct kmem_cache *names_cachep __read_mostly;
3055 EXPORT_SYMBOL(names_cachep);
3056 
3057 EXPORT_SYMBOL(d_genocide);
3058 
3059 void __init vfs_caches_init_early(void)
3060 {
3061 	dcache_init_early();
3062 	inode_init_early();
3063 }
3064 
3065 void __init vfs_caches_init(unsigned long mempages)
3066 {
3067 	unsigned long reserve;
3068 
3069 	/* Base hash sizes on available memory, with a reserve equal to
3070            150% of current kernel size */
3071 
3072 	reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3073 	mempages -= reserve;
3074 
3075 	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3076 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3077 
3078 	dcache_init();
3079 	inode_init();
3080 	files_init(mempages);
3081 	mnt_init();
3082 	bdev_cache_init();
3083 	chrdev_init();
3084 }
3085