xref: /openbmc/linux/fs/dcache.c (revision d6b6592ac6d11eab91e6758d224eac35f4122aca)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * fs/dcache.c
4   *
5   * Complete reimplementation
6   * (C) 1997 Thomas Schoebel-Theuer,
7   * with heavy changes by Linus Torvalds
8   */
9  
10  /*
11   * Notes on the allocation strategy:
12   *
13   * The dcache is a master of the icache - whenever a dcache entry
14   * exists, the inode will always exist. "iput()" is done either when
15   * the dcache entry is deleted or garbage collected.
16   */
17  
18  #include <linux/ratelimit.h>
19  #include <linux/string.h>
20  #include <linux/mm.h>
21  #include <linux/fs.h>
22  #include <linux/fscrypt.h>
23  #include <linux/fsnotify.h>
24  #include <linux/slab.h>
25  #include <linux/init.h>
26  #include <linux/hash.h>
27  #include <linux/cache.h>
28  #include <linux/export.h>
29  #include <linux/security.h>
30  #include <linux/seqlock.h>
31  #include <linux/memblock.h>
32  #include <linux/bit_spinlock.h>
33  #include <linux/rculist_bl.h>
34  #include <linux/list_lru.h>
35  #include "internal.h"
36  #include "mount.h"
37  
38  /*
39   * Usage:
40   * dcache->d_inode->i_lock protects:
41   *   - i_dentry, d_u.d_alias, d_inode of aliases
42   * dcache_hash_bucket lock protects:
43   *   - the dcache hash table
44   * s_roots bl list spinlock protects:
45   *   - the s_roots list (see __d_drop)
46   * dentry->d_sb->s_dentry_lru_lock protects:
47   *   - the dcache lru lists and counters
48   * d_lock protects:
49   *   - d_flags
50   *   - d_name
51   *   - d_lru
52   *   - d_count
53   *   - d_unhashed()
54   *   - d_parent and d_subdirs
55   *   - childrens' d_child and d_parent
56   *   - d_u.d_alias, d_inode
57   *
58   * Ordering:
59   * dentry->d_inode->i_lock
60   *   dentry->d_lock
61   *     dentry->d_sb->s_dentry_lru_lock
62   *     dcache_hash_bucket lock
63   *     s_roots lock
64   *
65   * If there is an ancestor relationship:
66   * dentry->d_parent->...->d_parent->d_lock
67   *   ...
68   *     dentry->d_parent->d_lock
69   *       dentry->d_lock
70   *
71   * If no ancestor relationship:
72   * arbitrary, since it's serialized on rename_lock
73   */
74  int sysctl_vfs_cache_pressure __read_mostly = 100;
75  EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
76  
77  __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
78  
79  EXPORT_SYMBOL(rename_lock);
80  
81  static struct kmem_cache *dentry_cache __read_mostly;
82  
83  const struct qstr empty_name = QSTR_INIT("", 0);
84  EXPORT_SYMBOL(empty_name);
85  const struct qstr slash_name = QSTR_INIT("/", 1);
86  EXPORT_SYMBOL(slash_name);
87  const struct qstr dotdot_name = QSTR_INIT("..", 2);
88  EXPORT_SYMBOL(dotdot_name);
89  
90  /*
91   * This is the single most critical data structure when it comes
92   * to the dcache: the hashtable for lookups. Somebody should try
93   * to make this good - I've just made it work.
94   *
95   * This hash-function tries to avoid losing too many bits of hash
96   * information, yet avoid using a prime hash-size or similar.
97   */
98  
99  static unsigned int d_hash_shift __read_mostly;
100  
101  static struct hlist_bl_head *dentry_hashtable __read_mostly;
102  
d_hash(unsigned int hash)103  static inline struct hlist_bl_head *d_hash(unsigned int hash)
104  {
105  	return dentry_hashtable + (hash >> d_hash_shift);
106  }
107  
108  #define IN_LOOKUP_SHIFT 10
109  static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT];
110  
in_lookup_hash(const struct dentry * parent,unsigned int hash)111  static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
112  					unsigned int hash)
113  {
114  	hash += (unsigned long) parent / L1_CACHE_BYTES;
115  	return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
116  }
117  
118  struct dentry_stat_t {
119  	long nr_dentry;
120  	long nr_unused;
121  	long age_limit;		/* age in seconds */
122  	long want_pages;	/* pages requested by system */
123  	long nr_negative;	/* # of unused negative dentries */
124  	long dummy;		/* Reserved for future use */
125  };
126  
127  static DEFINE_PER_CPU(long, nr_dentry);
128  static DEFINE_PER_CPU(long, nr_dentry_unused);
129  static DEFINE_PER_CPU(long, nr_dentry_negative);
130  
131  #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
132  /* Statistics gathering. */
133  static struct dentry_stat_t dentry_stat = {
134  	.age_limit = 45,
135  };
136  
137  /*
138   * Here we resort to our own counters instead of using generic per-cpu counters
139   * for consistency with what the vfs inode code does. We are expected to harvest
140   * better code and performance by having our own specialized counters.
141   *
142   * Please note that the loop is done over all possible CPUs, not over all online
143   * CPUs. The reason for this is that we don't want to play games with CPUs going
144   * on and off. If one of them goes off, we will just keep their counters.
145   *
146   * glommer: See cffbc8a for details, and if you ever intend to change this,
147   * please update all vfs counters to match.
148   */
get_nr_dentry(void)149  static long get_nr_dentry(void)
150  {
151  	int i;
152  	long sum = 0;
153  	for_each_possible_cpu(i)
154  		sum += per_cpu(nr_dentry, i);
155  	return sum < 0 ? 0 : sum;
156  }
157  
get_nr_dentry_unused(void)158  static long get_nr_dentry_unused(void)
159  {
160  	int i;
161  	long sum = 0;
162  	for_each_possible_cpu(i)
163  		sum += per_cpu(nr_dentry_unused, i);
164  	return sum < 0 ? 0 : sum;
165  }
166  
get_nr_dentry_negative(void)167  static long get_nr_dentry_negative(void)
168  {
169  	int i;
170  	long sum = 0;
171  
172  	for_each_possible_cpu(i)
173  		sum += per_cpu(nr_dentry_negative, i);
174  	return sum < 0 ? 0 : sum;
175  }
176  
proc_nr_dentry(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)177  static int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
178  			  size_t *lenp, loff_t *ppos)
179  {
180  	dentry_stat.nr_dentry = get_nr_dentry();
181  	dentry_stat.nr_unused = get_nr_dentry_unused();
182  	dentry_stat.nr_negative = get_nr_dentry_negative();
183  	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
184  }
185  
186  static struct ctl_table fs_dcache_sysctls[] = {
187  	{
188  		.procname	= "dentry-state",
189  		.data		= &dentry_stat,
190  		.maxlen		= 6*sizeof(long),
191  		.mode		= 0444,
192  		.proc_handler	= proc_nr_dentry,
193  	},
194  	{ }
195  };
196  
init_fs_dcache_sysctls(void)197  static int __init init_fs_dcache_sysctls(void)
198  {
199  	register_sysctl_init("fs", fs_dcache_sysctls);
200  	return 0;
201  }
202  fs_initcall(init_fs_dcache_sysctls);
203  #endif
204  
205  /*
206   * Compare 2 name strings, return 0 if they match, otherwise non-zero.
207   * The strings are both count bytes long, and count is non-zero.
208   */
209  #ifdef CONFIG_DCACHE_WORD_ACCESS
210  
211  #include <asm/word-at-a-time.h>
212  /*
213   * NOTE! 'cs' and 'scount' come from a dentry, so it has a
214   * aligned allocation for this particular component. We don't
215   * strictly need the load_unaligned_zeropad() safety, but it
216   * doesn't hurt either.
217   *
218   * In contrast, 'ct' and 'tcount' can be from a pathname, and do
219   * need the careful unaligned handling.
220   */
dentry_string_cmp(const unsigned char * cs,const unsigned char * ct,unsigned tcount)221  static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
222  {
223  	unsigned long a,b,mask;
224  
225  	for (;;) {
226  		a = read_word_at_a_time(cs);
227  		b = load_unaligned_zeropad(ct);
228  		if (tcount < sizeof(unsigned long))
229  			break;
230  		if (unlikely(a != b))
231  			return 1;
232  		cs += sizeof(unsigned long);
233  		ct += sizeof(unsigned long);
234  		tcount -= sizeof(unsigned long);
235  		if (!tcount)
236  			return 0;
237  	}
238  	mask = bytemask_from_count(tcount);
239  	return unlikely(!!((a ^ b) & mask));
240  }
241  
242  #else
243  
dentry_string_cmp(const unsigned char * cs,const unsigned char * ct,unsigned tcount)244  static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
245  {
246  	do {
247  		if (*cs != *ct)
248  			return 1;
249  		cs++;
250  		ct++;
251  		tcount--;
252  	} while (tcount);
253  	return 0;
254  }
255  
256  #endif
257  
dentry_cmp(const struct dentry * dentry,const unsigned char * ct,unsigned tcount)258  static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
259  {
260  	/*
261  	 * Be careful about RCU walk racing with rename:
262  	 * use 'READ_ONCE' to fetch the name pointer.
263  	 *
264  	 * NOTE! Even if a rename will mean that the length
265  	 * was not loaded atomically, we don't care. The
266  	 * RCU walk will check the sequence count eventually,
267  	 * and catch it. And we won't overrun the buffer,
268  	 * because we're reading the name pointer atomically,
269  	 * and a dentry name is guaranteed to be properly
270  	 * terminated with a NUL byte.
271  	 *
272  	 * End result: even if 'len' is wrong, we'll exit
273  	 * early because the data cannot match (there can
274  	 * be no NUL in the ct/tcount data)
275  	 */
276  	const unsigned char *cs = READ_ONCE(dentry->d_name.name);
277  
278  	return dentry_string_cmp(cs, ct, tcount);
279  }
280  
281  struct external_name {
282  	union {
283  		atomic_t count;
284  		struct rcu_head head;
285  	} u;
286  	unsigned char name[];
287  };
288  
external_name(struct dentry * dentry)289  static inline struct external_name *external_name(struct dentry *dentry)
290  {
291  	return container_of(dentry->d_name.name, struct external_name, name[0]);
292  }
293  
__d_free(struct rcu_head * head)294  static void __d_free(struct rcu_head *head)
295  {
296  	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
297  
298  	kmem_cache_free(dentry_cache, dentry);
299  }
300  
__d_free_external(struct rcu_head * head)301  static void __d_free_external(struct rcu_head *head)
302  {
303  	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
304  	kfree(external_name(dentry));
305  	kmem_cache_free(dentry_cache, dentry);
306  }
307  
dname_external(const struct dentry * dentry)308  static inline int dname_external(const struct dentry *dentry)
309  {
310  	return dentry->d_name.name != dentry->d_iname;
311  }
312  
take_dentry_name_snapshot(struct name_snapshot * name,struct dentry * dentry)313  void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
314  {
315  	spin_lock(&dentry->d_lock);
316  	name->name = dentry->d_name;
317  	if (unlikely(dname_external(dentry))) {
318  		atomic_inc(&external_name(dentry)->u.count);
319  	} else {
320  		memcpy(name->inline_name, dentry->d_iname,
321  		       dentry->d_name.len + 1);
322  		name->name.name = name->inline_name;
323  	}
324  	spin_unlock(&dentry->d_lock);
325  }
326  EXPORT_SYMBOL(take_dentry_name_snapshot);
327  
release_dentry_name_snapshot(struct name_snapshot * name)328  void release_dentry_name_snapshot(struct name_snapshot *name)
329  {
330  	if (unlikely(name->name.name != name->inline_name)) {
331  		struct external_name *p;
332  		p = container_of(name->name.name, struct external_name, name[0]);
333  		if (unlikely(atomic_dec_and_test(&p->u.count)))
334  			kfree_rcu(p, u.head);
335  	}
336  }
337  EXPORT_SYMBOL(release_dentry_name_snapshot);
338  
__d_set_inode_and_type(struct dentry * dentry,struct inode * inode,unsigned type_flags)339  static inline void __d_set_inode_and_type(struct dentry *dentry,
340  					  struct inode *inode,
341  					  unsigned type_flags)
342  {
343  	unsigned flags;
344  
345  	dentry->d_inode = inode;
346  	flags = READ_ONCE(dentry->d_flags);
347  	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
348  	flags |= type_flags;
349  	smp_store_release(&dentry->d_flags, flags);
350  }
351  
__d_clear_type_and_inode(struct dentry * dentry)352  static inline void __d_clear_type_and_inode(struct dentry *dentry)
353  {
354  	unsigned flags = READ_ONCE(dentry->d_flags);
355  
356  	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
357  	WRITE_ONCE(dentry->d_flags, flags);
358  	dentry->d_inode = NULL;
359  	/*
360  	 * The negative counter only tracks dentries on the LRU. Don't inc if
361  	 * d_lru is on another list.
362  	 */
363  	if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
364  		this_cpu_inc(nr_dentry_negative);
365  }
366  
dentry_free(struct dentry * dentry)367  static void dentry_free(struct dentry *dentry)
368  {
369  	WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
370  	if (unlikely(dname_external(dentry))) {
371  		struct external_name *p = external_name(dentry);
372  		if (likely(atomic_dec_and_test(&p->u.count))) {
373  			call_rcu(&dentry->d_u.d_rcu, __d_free_external);
374  			return;
375  		}
376  	}
377  	/* if dentry was never visible to RCU, immediate free is OK */
378  	if (dentry->d_flags & DCACHE_NORCU)
379  		__d_free(&dentry->d_u.d_rcu);
380  	else
381  		call_rcu(&dentry->d_u.d_rcu, __d_free);
382  }
383  
384  /*
385   * Release the dentry's inode, using the filesystem
386   * d_iput() operation if defined.
387   */
dentry_unlink_inode(struct dentry * dentry)388  static void dentry_unlink_inode(struct dentry * dentry)
389  	__releases(dentry->d_lock)
390  	__releases(dentry->d_inode->i_lock)
391  {
392  	struct inode *inode = dentry->d_inode;
393  
394  	raw_write_seqcount_begin(&dentry->d_seq);
395  	__d_clear_type_and_inode(dentry);
396  	hlist_del_init(&dentry->d_u.d_alias);
397  	raw_write_seqcount_end(&dentry->d_seq);
398  	spin_unlock(&dentry->d_lock);
399  	spin_unlock(&inode->i_lock);
400  	if (!inode->i_nlink)
401  		fsnotify_inoderemove(inode);
402  	if (dentry->d_op && dentry->d_op->d_iput)
403  		dentry->d_op->d_iput(dentry, inode);
404  	else
405  		iput(inode);
406  }
407  
408  /*
409   * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
410   * is in use - which includes both the "real" per-superblock
411   * LRU list _and_ the DCACHE_SHRINK_LIST use.
412   *
413   * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
414   * on the shrink list (ie not on the superblock LRU list).
415   *
416   * The per-cpu "nr_dentry_unused" counters are updated with
417   * the DCACHE_LRU_LIST bit.
418   *
419   * The per-cpu "nr_dentry_negative" counters are only updated
420   * when deleted from or added to the per-superblock LRU list, not
421   * from/to the shrink list. That is to avoid an unneeded dec/inc
422   * pair when moving from LRU to shrink list in select_collect().
423   *
424   * These helper functions make sure we always follow the
425   * rules. d_lock must be held by the caller.
426   */
427  #define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
d_lru_add(struct dentry * dentry)428  static void d_lru_add(struct dentry *dentry)
429  {
430  	D_FLAG_VERIFY(dentry, 0);
431  	dentry->d_flags |= DCACHE_LRU_LIST;
432  	this_cpu_inc(nr_dentry_unused);
433  	if (d_is_negative(dentry))
434  		this_cpu_inc(nr_dentry_negative);
435  	WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
436  }
437  
d_lru_del(struct dentry * dentry)438  static void d_lru_del(struct dentry *dentry)
439  {
440  	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
441  	dentry->d_flags &= ~DCACHE_LRU_LIST;
442  	this_cpu_dec(nr_dentry_unused);
443  	if (d_is_negative(dentry))
444  		this_cpu_dec(nr_dentry_negative);
445  	WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
446  }
447  
d_shrink_del(struct dentry * dentry)448  static void d_shrink_del(struct dentry *dentry)
449  {
450  	D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
451  	list_del_init(&dentry->d_lru);
452  	dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
453  	this_cpu_dec(nr_dentry_unused);
454  }
455  
d_shrink_add(struct dentry * dentry,struct list_head * list)456  static void d_shrink_add(struct dentry *dentry, struct list_head *list)
457  {
458  	D_FLAG_VERIFY(dentry, 0);
459  	list_add(&dentry->d_lru, list);
460  	dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
461  	this_cpu_inc(nr_dentry_unused);
462  }
463  
464  /*
465   * These can only be called under the global LRU lock, ie during the
466   * callback for freeing the LRU list. "isolate" removes it from the
467   * LRU lists entirely, while shrink_move moves it to the indicated
468   * private list.
469   */
d_lru_isolate(struct list_lru_one * lru,struct dentry * dentry)470  static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
471  {
472  	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
473  	dentry->d_flags &= ~DCACHE_LRU_LIST;
474  	this_cpu_dec(nr_dentry_unused);
475  	if (d_is_negative(dentry))
476  		this_cpu_dec(nr_dentry_negative);
477  	list_lru_isolate(lru, &dentry->d_lru);
478  }
479  
d_lru_shrink_move(struct list_lru_one * lru,struct dentry * dentry,struct list_head * list)480  static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
481  			      struct list_head *list)
482  {
483  	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
484  	dentry->d_flags |= DCACHE_SHRINK_LIST;
485  	if (d_is_negative(dentry))
486  		this_cpu_dec(nr_dentry_negative);
487  	list_lru_isolate_move(lru, &dentry->d_lru, list);
488  }
489  
___d_drop(struct dentry * dentry)490  static void ___d_drop(struct dentry *dentry)
491  {
492  	struct hlist_bl_head *b;
493  	/*
494  	 * Hashed dentries are normally on the dentry hashtable,
495  	 * with the exception of those newly allocated by
496  	 * d_obtain_root, which are always IS_ROOT:
497  	 */
498  	if (unlikely(IS_ROOT(dentry)))
499  		b = &dentry->d_sb->s_roots;
500  	else
501  		b = d_hash(dentry->d_name.hash);
502  
503  	hlist_bl_lock(b);
504  	__hlist_bl_del(&dentry->d_hash);
505  	hlist_bl_unlock(b);
506  }
507  
__d_drop(struct dentry * dentry)508  void __d_drop(struct dentry *dentry)
509  {
510  	if (!d_unhashed(dentry)) {
511  		___d_drop(dentry);
512  		dentry->d_hash.pprev = NULL;
513  		write_seqcount_invalidate(&dentry->d_seq);
514  	}
515  }
516  EXPORT_SYMBOL(__d_drop);
517  
518  /**
519   * d_drop - drop a dentry
520   * @dentry: dentry to drop
521   *
522   * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
523   * be found through a VFS lookup any more. Note that this is different from
524   * deleting the dentry - d_delete will try to mark the dentry negative if
525   * possible, giving a successful _negative_ lookup, while d_drop will
526   * just make the cache lookup fail.
527   *
528   * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
529   * reason (NFS timeouts or autofs deletes).
530   *
531   * __d_drop requires dentry->d_lock
532   *
533   * ___d_drop doesn't mark dentry as "unhashed"
534   * (dentry->d_hash.pprev will be LIST_POISON2, not NULL).
535   */
d_drop(struct dentry * dentry)536  void d_drop(struct dentry *dentry)
537  {
538  	spin_lock(&dentry->d_lock);
539  	__d_drop(dentry);
540  	spin_unlock(&dentry->d_lock);
541  }
542  EXPORT_SYMBOL(d_drop);
543  
dentry_unlist(struct dentry * dentry,struct dentry * parent)544  static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
545  {
546  	struct dentry *next;
547  	/*
548  	 * Inform d_walk() and shrink_dentry_list() that we are no longer
549  	 * attached to the dentry tree
550  	 */
551  	dentry->d_flags |= DCACHE_DENTRY_KILLED;
552  	if (unlikely(list_empty(&dentry->d_child)))
553  		return;
554  	__list_del_entry(&dentry->d_child);
555  	/*
556  	 * Cursors can move around the list of children.  While we'd been
557  	 * a normal list member, it didn't matter - ->d_child.next would've
558  	 * been updated.  However, from now on it won't be and for the
559  	 * things like d_walk() it might end up with a nasty surprise.
560  	 * Normally d_walk() doesn't care about cursors moving around -
561  	 * ->d_lock on parent prevents that and since a cursor has no children
562  	 * of its own, we get through it without ever unlocking the parent.
563  	 * There is one exception, though - if we ascend from a child that
564  	 * gets killed as soon as we unlock it, the next sibling is found
565  	 * using the value left in its ->d_child.next.  And if _that_
566  	 * pointed to a cursor, and cursor got moved (e.g. by lseek())
567  	 * before d_walk() regains parent->d_lock, we'll end up skipping
568  	 * everything the cursor had been moved past.
569  	 *
570  	 * Solution: make sure that the pointer left behind in ->d_child.next
571  	 * points to something that won't be moving around.  I.e. skip the
572  	 * cursors.
573  	 */
574  	while (dentry->d_child.next != &parent->d_subdirs) {
575  		next = list_entry(dentry->d_child.next, struct dentry, d_child);
576  		if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
577  			break;
578  		dentry->d_child.next = next->d_child.next;
579  	}
580  }
581  
__dentry_kill(struct dentry * dentry)582  static void __dentry_kill(struct dentry *dentry)
583  {
584  	struct dentry *parent = NULL;
585  	bool can_free = true;
586  	if (!IS_ROOT(dentry))
587  		parent = dentry->d_parent;
588  
589  	/*
590  	 * The dentry is now unrecoverably dead to the world.
591  	 */
592  	lockref_mark_dead(&dentry->d_lockref);
593  
594  	/*
595  	 * inform the fs via d_prune that this dentry is about to be
596  	 * unhashed and destroyed.
597  	 */
598  	if (dentry->d_flags & DCACHE_OP_PRUNE)
599  		dentry->d_op->d_prune(dentry);
600  
601  	if (dentry->d_flags & DCACHE_LRU_LIST) {
602  		if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
603  			d_lru_del(dentry);
604  	}
605  	/* if it was on the hash then remove it */
606  	__d_drop(dentry);
607  	dentry_unlist(dentry, parent);
608  	if (parent)
609  		spin_unlock(&parent->d_lock);
610  	if (dentry->d_inode)
611  		dentry_unlink_inode(dentry);
612  	else
613  		spin_unlock(&dentry->d_lock);
614  	this_cpu_dec(nr_dentry);
615  	if (dentry->d_op && dentry->d_op->d_release)
616  		dentry->d_op->d_release(dentry);
617  
618  	spin_lock(&dentry->d_lock);
619  	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
620  		dentry->d_flags |= DCACHE_MAY_FREE;
621  		can_free = false;
622  	}
623  	spin_unlock(&dentry->d_lock);
624  	if (likely(can_free))
625  		dentry_free(dentry);
626  	cond_resched();
627  }
628  
__lock_parent(struct dentry * dentry)629  static struct dentry *__lock_parent(struct dentry *dentry)
630  {
631  	struct dentry *parent;
632  	rcu_read_lock();
633  	spin_unlock(&dentry->d_lock);
634  again:
635  	parent = READ_ONCE(dentry->d_parent);
636  	spin_lock(&parent->d_lock);
637  	/*
638  	 * We can't blindly lock dentry until we are sure
639  	 * that we won't violate the locking order.
640  	 * Any changes of dentry->d_parent must have
641  	 * been done with parent->d_lock held, so
642  	 * spin_lock() above is enough of a barrier
643  	 * for checking if it's still our child.
644  	 */
645  	if (unlikely(parent != dentry->d_parent)) {
646  		spin_unlock(&parent->d_lock);
647  		goto again;
648  	}
649  	rcu_read_unlock();
650  	if (parent != dentry)
651  		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
652  	else
653  		parent = NULL;
654  	return parent;
655  }
656  
lock_parent(struct dentry * dentry)657  static inline struct dentry *lock_parent(struct dentry *dentry)
658  {
659  	struct dentry *parent = dentry->d_parent;
660  	if (IS_ROOT(dentry))
661  		return NULL;
662  	if (likely(spin_trylock(&parent->d_lock)))
663  		return parent;
664  	return __lock_parent(dentry);
665  }
666  
retain_dentry(struct dentry * dentry)667  static inline bool retain_dentry(struct dentry *dentry)
668  {
669  	WARN_ON(d_in_lookup(dentry));
670  
671  	/* Unreachable? Get rid of it */
672  	if (unlikely(d_unhashed(dentry)))
673  		return false;
674  
675  	if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
676  		return false;
677  
678  	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
679  		if (dentry->d_op->d_delete(dentry))
680  			return false;
681  	}
682  
683  	if (unlikely(dentry->d_flags & DCACHE_DONTCACHE))
684  		return false;
685  
686  	/* retain; LRU fodder */
687  	dentry->d_lockref.count--;
688  	if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
689  		d_lru_add(dentry);
690  	else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
691  		dentry->d_flags |= DCACHE_REFERENCED;
692  	return true;
693  }
694  
d_mark_dontcache(struct inode * inode)695  void d_mark_dontcache(struct inode *inode)
696  {
697  	struct dentry *de;
698  
699  	spin_lock(&inode->i_lock);
700  	hlist_for_each_entry(de, &inode->i_dentry, d_u.d_alias) {
701  		spin_lock(&de->d_lock);
702  		de->d_flags |= DCACHE_DONTCACHE;
703  		spin_unlock(&de->d_lock);
704  	}
705  	inode->i_state |= I_DONTCACHE;
706  	spin_unlock(&inode->i_lock);
707  }
708  EXPORT_SYMBOL(d_mark_dontcache);
709  
710  /*
711   * Finish off a dentry we've decided to kill.
712   * dentry->d_lock must be held, returns with it unlocked.
713   * Returns dentry requiring refcount drop, or NULL if we're done.
714   */
dentry_kill(struct dentry * dentry)715  static struct dentry *dentry_kill(struct dentry *dentry)
716  	__releases(dentry->d_lock)
717  {
718  	struct inode *inode = dentry->d_inode;
719  	struct dentry *parent = NULL;
720  
721  	if (inode && unlikely(!spin_trylock(&inode->i_lock)))
722  		goto slow_positive;
723  
724  	if (!IS_ROOT(dentry)) {
725  		parent = dentry->d_parent;
726  		if (unlikely(!spin_trylock(&parent->d_lock))) {
727  			parent = __lock_parent(dentry);
728  			if (likely(inode || !dentry->d_inode))
729  				goto got_locks;
730  			/* negative that became positive */
731  			if (parent)
732  				spin_unlock(&parent->d_lock);
733  			inode = dentry->d_inode;
734  			goto slow_positive;
735  		}
736  	}
737  	__dentry_kill(dentry);
738  	return parent;
739  
740  slow_positive:
741  	spin_unlock(&dentry->d_lock);
742  	spin_lock(&inode->i_lock);
743  	spin_lock(&dentry->d_lock);
744  	parent = lock_parent(dentry);
745  got_locks:
746  	if (unlikely(dentry->d_lockref.count != 1)) {
747  		dentry->d_lockref.count--;
748  	} else if (likely(!retain_dentry(dentry))) {
749  		__dentry_kill(dentry);
750  		return parent;
751  	}
752  	/* we are keeping it, after all */
753  	if (inode)
754  		spin_unlock(&inode->i_lock);
755  	if (parent)
756  		spin_unlock(&parent->d_lock);
757  	spin_unlock(&dentry->d_lock);
758  	return NULL;
759  }
760  
761  /*
762   * Try to do a lockless dput(), and return whether that was successful.
763   *
764   * If unsuccessful, we return false, having already taken the dentry lock.
765   *
766   * The caller needs to hold the RCU read lock, so that the dentry is
767   * guaranteed to stay around even if the refcount goes down to zero!
768   */
fast_dput(struct dentry * dentry)769  static inline bool fast_dput(struct dentry *dentry)
770  {
771  	int ret;
772  	unsigned int d_flags;
773  
774  	/*
775  	 * If we have a d_op->d_delete() operation, we sould not
776  	 * let the dentry count go to zero, so use "put_or_lock".
777  	 */
778  	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
779  		return lockref_put_or_lock(&dentry->d_lockref);
780  
781  	/*
782  	 * .. otherwise, we can try to just decrement the
783  	 * lockref optimistically.
784  	 */
785  	ret = lockref_put_return(&dentry->d_lockref);
786  
787  	/*
788  	 * If the lockref_put_return() failed due to the lock being held
789  	 * by somebody else, the fast path has failed. We will need to
790  	 * get the lock, and then check the count again.
791  	 */
792  	if (unlikely(ret < 0)) {
793  		spin_lock(&dentry->d_lock);
794  		if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) {
795  			spin_unlock(&dentry->d_lock);
796  			return true;
797  		}
798  		dentry->d_lockref.count--;
799  		goto locked;
800  	}
801  
802  	/*
803  	 * If we weren't the last ref, we're done.
804  	 */
805  	if (ret)
806  		return true;
807  
808  	/*
809  	 * Careful, careful. The reference count went down
810  	 * to zero, but we don't hold the dentry lock, so
811  	 * somebody else could get it again, and do another
812  	 * dput(), and we need to not race with that.
813  	 *
814  	 * However, there is a very special and common case
815  	 * where we don't care, because there is nothing to
816  	 * do: the dentry is still hashed, it does not have
817  	 * a 'delete' op, and it's referenced and already on
818  	 * the LRU list.
819  	 *
820  	 * NOTE! Since we aren't locked, these values are
821  	 * not "stable". However, it is sufficient that at
822  	 * some point after we dropped the reference the
823  	 * dentry was hashed and the flags had the proper
824  	 * value. Other dentry users may have re-gotten
825  	 * a reference to the dentry and change that, but
826  	 * our work is done - we can leave the dentry
827  	 * around with a zero refcount.
828  	 *
829  	 * Nevertheless, there are two cases that we should kill
830  	 * the dentry anyway.
831  	 * 1. free disconnected dentries as soon as their refcount
832  	 *    reached zero.
833  	 * 2. free dentries if they should not be cached.
834  	 */
835  	smp_rmb();
836  	d_flags = READ_ONCE(dentry->d_flags);
837  	d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST |
838  			DCACHE_DISCONNECTED | DCACHE_DONTCACHE;
839  
840  	/* Nothing to do? Dropping the reference was all we needed? */
841  	if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
842  		return true;
843  
844  	/*
845  	 * Not the fast normal case? Get the lock. We've already decremented
846  	 * the refcount, but we'll need to re-check the situation after
847  	 * getting the lock.
848  	 */
849  	spin_lock(&dentry->d_lock);
850  
851  	/*
852  	 * Did somebody else grab a reference to it in the meantime, and
853  	 * we're no longer the last user after all? Alternatively, somebody
854  	 * else could have killed it and marked it dead. Either way, we
855  	 * don't need to do anything else.
856  	 */
857  locked:
858  	if (dentry->d_lockref.count) {
859  		spin_unlock(&dentry->d_lock);
860  		return true;
861  	}
862  
863  	/*
864  	 * Re-get the reference we optimistically dropped. We hold the
865  	 * lock, and we just tested that it was zero, so we can just
866  	 * set it to 1.
867  	 */
868  	dentry->d_lockref.count = 1;
869  	return false;
870  }
871  
872  
873  /*
874   * This is dput
875   *
876   * This is complicated by the fact that we do not want to put
877   * dentries that are no longer on any hash chain on the unused
878   * list: we'd much rather just get rid of them immediately.
879   *
880   * However, that implies that we have to traverse the dentry
881   * tree upwards to the parents which might _also_ now be
882   * scheduled for deletion (it may have been only waiting for
883   * its last child to go away).
884   *
885   * This tail recursion is done by hand as we don't want to depend
886   * on the compiler to always get this right (gcc generally doesn't).
887   * Real recursion would eat up our stack space.
888   */
889  
890  /*
891   * dput - release a dentry
892   * @dentry: dentry to release
893   *
894   * Release a dentry. This will drop the usage count and if appropriate
895   * call the dentry unlink method as well as removing it from the queues and
896   * releasing its resources. If the parent dentries were scheduled for release
897   * they too may now get deleted.
898   */
dput(struct dentry * dentry)899  void dput(struct dentry *dentry)
900  {
901  	while (dentry) {
902  		might_sleep();
903  
904  		rcu_read_lock();
905  		if (likely(fast_dput(dentry))) {
906  			rcu_read_unlock();
907  			return;
908  		}
909  
910  		/* Slow case: now with the dentry lock held */
911  		rcu_read_unlock();
912  
913  		if (likely(retain_dentry(dentry))) {
914  			spin_unlock(&dentry->d_lock);
915  			return;
916  		}
917  
918  		dentry = dentry_kill(dentry);
919  	}
920  }
921  EXPORT_SYMBOL(dput);
922  
__dput_to_list(struct dentry * dentry,struct list_head * list)923  static void __dput_to_list(struct dentry *dentry, struct list_head *list)
924  __must_hold(&dentry->d_lock)
925  {
926  	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
927  		/* let the owner of the list it's on deal with it */
928  		--dentry->d_lockref.count;
929  	} else {
930  		if (dentry->d_flags & DCACHE_LRU_LIST)
931  			d_lru_del(dentry);
932  		if (!--dentry->d_lockref.count)
933  			d_shrink_add(dentry, list);
934  	}
935  }
936  
dput_to_list(struct dentry * dentry,struct list_head * list)937  void dput_to_list(struct dentry *dentry, struct list_head *list)
938  {
939  	rcu_read_lock();
940  	if (likely(fast_dput(dentry))) {
941  		rcu_read_unlock();
942  		return;
943  	}
944  	rcu_read_unlock();
945  	if (!retain_dentry(dentry))
946  		__dput_to_list(dentry, list);
947  	spin_unlock(&dentry->d_lock);
948  }
949  
950  /* This must be called with d_lock held */
__dget_dlock(struct dentry * dentry)951  static inline void __dget_dlock(struct dentry *dentry)
952  {
953  	dentry->d_lockref.count++;
954  }
955  
__dget(struct dentry * dentry)956  static inline void __dget(struct dentry *dentry)
957  {
958  	lockref_get(&dentry->d_lockref);
959  }
960  
dget_parent(struct dentry * dentry)961  struct dentry *dget_parent(struct dentry *dentry)
962  {
963  	int gotref;
964  	struct dentry *ret;
965  	unsigned seq;
966  
967  	/*
968  	 * Do optimistic parent lookup without any
969  	 * locking.
970  	 */
971  	rcu_read_lock();
972  	seq = raw_seqcount_begin(&dentry->d_seq);
973  	ret = READ_ONCE(dentry->d_parent);
974  	gotref = lockref_get_not_zero(&ret->d_lockref);
975  	rcu_read_unlock();
976  	if (likely(gotref)) {
977  		if (!read_seqcount_retry(&dentry->d_seq, seq))
978  			return ret;
979  		dput(ret);
980  	}
981  
982  repeat:
983  	/*
984  	 * Don't need rcu_dereference because we re-check it was correct under
985  	 * the lock.
986  	 */
987  	rcu_read_lock();
988  	ret = dentry->d_parent;
989  	spin_lock(&ret->d_lock);
990  	if (unlikely(ret != dentry->d_parent)) {
991  		spin_unlock(&ret->d_lock);
992  		rcu_read_unlock();
993  		goto repeat;
994  	}
995  	rcu_read_unlock();
996  	BUG_ON(!ret->d_lockref.count);
997  	ret->d_lockref.count++;
998  	spin_unlock(&ret->d_lock);
999  	return ret;
1000  }
1001  EXPORT_SYMBOL(dget_parent);
1002  
__d_find_any_alias(struct inode * inode)1003  static struct dentry * __d_find_any_alias(struct inode *inode)
1004  {
1005  	struct dentry *alias;
1006  
1007  	if (hlist_empty(&inode->i_dentry))
1008  		return NULL;
1009  	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
1010  	__dget(alias);
1011  	return alias;
1012  }
1013  
1014  /**
1015   * d_find_any_alias - find any alias for a given inode
1016   * @inode: inode to find an alias for
1017   *
1018   * If any aliases exist for the given inode, take and return a
1019   * reference for one of them.  If no aliases exist, return %NULL.
1020   */
d_find_any_alias(struct inode * inode)1021  struct dentry *d_find_any_alias(struct inode *inode)
1022  {
1023  	struct dentry *de;
1024  
1025  	spin_lock(&inode->i_lock);
1026  	de = __d_find_any_alias(inode);
1027  	spin_unlock(&inode->i_lock);
1028  	return de;
1029  }
1030  EXPORT_SYMBOL(d_find_any_alias);
1031  
__d_find_alias(struct inode * inode)1032  static struct dentry *__d_find_alias(struct inode *inode)
1033  {
1034  	struct dentry *alias;
1035  
1036  	if (S_ISDIR(inode->i_mode))
1037  		return __d_find_any_alias(inode);
1038  
1039  	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
1040  		spin_lock(&alias->d_lock);
1041   		if (!d_unhashed(alias)) {
1042  			__dget_dlock(alias);
1043  			spin_unlock(&alias->d_lock);
1044  			return alias;
1045  		}
1046  		spin_unlock(&alias->d_lock);
1047  	}
1048  	return NULL;
1049  }
1050  
1051  /**
1052   * d_find_alias - grab a hashed alias of inode
1053   * @inode: inode in question
1054   *
1055   * If inode has a hashed alias, or is a directory and has any alias,
1056   * acquire the reference to alias and return it. Otherwise return NULL.
1057   * Notice that if inode is a directory there can be only one alias and
1058   * it can be unhashed only if it has no children, or if it is the root
1059   * of a filesystem, or if the directory was renamed and d_revalidate
1060   * was the first vfs operation to notice.
1061   *
1062   * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
1063   * any other hashed alias over that one.
1064   */
d_find_alias(struct inode * inode)1065  struct dentry *d_find_alias(struct inode *inode)
1066  {
1067  	struct dentry *de = NULL;
1068  
1069  	if (!hlist_empty(&inode->i_dentry)) {
1070  		spin_lock(&inode->i_lock);
1071  		de = __d_find_alias(inode);
1072  		spin_unlock(&inode->i_lock);
1073  	}
1074  	return de;
1075  }
1076  EXPORT_SYMBOL(d_find_alias);
1077  
1078  /*
1079   *  Caller MUST be holding rcu_read_lock() and be guaranteed
1080   *  that inode won't get freed until rcu_read_unlock().
1081   */
d_find_alias_rcu(struct inode * inode)1082  struct dentry *d_find_alias_rcu(struct inode *inode)
1083  {
1084  	struct hlist_head *l = &inode->i_dentry;
1085  	struct dentry *de = NULL;
1086  
1087  	spin_lock(&inode->i_lock);
1088  	// ->i_dentry and ->i_rcu are colocated, but the latter won't be
1089  	// used without having I_FREEING set, which means no aliases left
1090  	if (likely(!(inode->i_state & I_FREEING) && !hlist_empty(l))) {
1091  		if (S_ISDIR(inode->i_mode)) {
1092  			de = hlist_entry(l->first, struct dentry, d_u.d_alias);
1093  		} else {
1094  			hlist_for_each_entry(de, l, d_u.d_alias)
1095  				if (!d_unhashed(de))
1096  					break;
1097  		}
1098  	}
1099  	spin_unlock(&inode->i_lock);
1100  	return de;
1101  }
1102  
1103  /*
1104   *	Try to kill dentries associated with this inode.
1105   * WARNING: you must own a reference to inode.
1106   */
d_prune_aliases(struct inode * inode)1107  void d_prune_aliases(struct inode *inode)
1108  {
1109  	struct dentry *dentry;
1110  restart:
1111  	spin_lock(&inode->i_lock);
1112  	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
1113  		spin_lock(&dentry->d_lock);
1114  		if (!dentry->d_lockref.count) {
1115  			struct dentry *parent = lock_parent(dentry);
1116  			if (likely(!dentry->d_lockref.count)) {
1117  				__dentry_kill(dentry);
1118  				dput(parent);
1119  				goto restart;
1120  			}
1121  			if (parent)
1122  				spin_unlock(&parent->d_lock);
1123  		}
1124  		spin_unlock(&dentry->d_lock);
1125  	}
1126  	spin_unlock(&inode->i_lock);
1127  }
1128  EXPORT_SYMBOL(d_prune_aliases);
1129  
1130  /*
1131   * Lock a dentry from shrink list.
1132   * Called under rcu_read_lock() and dentry->d_lock; the former
1133   * guarantees that nothing we access will be freed under us.
1134   * Note that dentry is *not* protected from concurrent dentry_kill(),
1135   * d_delete(), etc.
1136   *
1137   * Return false if dentry has been disrupted or grabbed, leaving
1138   * the caller to kick it off-list.  Otherwise, return true and have
1139   * that dentry's inode and parent both locked.
1140   */
shrink_lock_dentry(struct dentry * dentry)1141  static bool shrink_lock_dentry(struct dentry *dentry)
1142  {
1143  	struct inode *inode;
1144  	struct dentry *parent;
1145  
1146  	if (dentry->d_lockref.count)
1147  		return false;
1148  
1149  	inode = dentry->d_inode;
1150  	if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
1151  		spin_unlock(&dentry->d_lock);
1152  		spin_lock(&inode->i_lock);
1153  		spin_lock(&dentry->d_lock);
1154  		if (unlikely(dentry->d_lockref.count))
1155  			goto out;
1156  		/* changed inode means that somebody had grabbed it */
1157  		if (unlikely(inode != dentry->d_inode))
1158  			goto out;
1159  	}
1160  
1161  	parent = dentry->d_parent;
1162  	if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
1163  		return true;
1164  
1165  	spin_unlock(&dentry->d_lock);
1166  	spin_lock(&parent->d_lock);
1167  	if (unlikely(parent != dentry->d_parent)) {
1168  		spin_unlock(&parent->d_lock);
1169  		spin_lock(&dentry->d_lock);
1170  		goto out;
1171  	}
1172  	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1173  	if (likely(!dentry->d_lockref.count))
1174  		return true;
1175  	spin_unlock(&parent->d_lock);
1176  out:
1177  	if (inode)
1178  		spin_unlock(&inode->i_lock);
1179  	return false;
1180  }
1181  
shrink_dentry_list(struct list_head * list)1182  void shrink_dentry_list(struct list_head *list)
1183  {
1184  	while (!list_empty(list)) {
1185  		struct dentry *dentry, *parent;
1186  
1187  		dentry = list_entry(list->prev, struct dentry, d_lru);
1188  		spin_lock(&dentry->d_lock);
1189  		rcu_read_lock();
1190  		if (!shrink_lock_dentry(dentry)) {
1191  			bool can_free = false;
1192  			rcu_read_unlock();
1193  			d_shrink_del(dentry);
1194  			if (dentry->d_lockref.count < 0)
1195  				can_free = dentry->d_flags & DCACHE_MAY_FREE;
1196  			spin_unlock(&dentry->d_lock);
1197  			if (can_free)
1198  				dentry_free(dentry);
1199  			continue;
1200  		}
1201  		rcu_read_unlock();
1202  		d_shrink_del(dentry);
1203  		parent = dentry->d_parent;
1204  		if (parent != dentry)
1205  			__dput_to_list(parent, list);
1206  		__dentry_kill(dentry);
1207  	}
1208  }
1209  
dentry_lru_isolate(struct list_head * item,struct list_lru_one * lru,spinlock_t * lru_lock,void * arg)1210  static enum lru_status dentry_lru_isolate(struct list_head *item,
1211  		struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
1212  {
1213  	struct list_head *freeable = arg;
1214  	struct dentry	*dentry = container_of(item, struct dentry, d_lru);
1215  
1216  
1217  	/*
1218  	 * we are inverting the lru lock/dentry->d_lock here,
1219  	 * so use a trylock. If we fail to get the lock, just skip
1220  	 * it
1221  	 */
1222  	if (!spin_trylock(&dentry->d_lock))
1223  		return LRU_SKIP;
1224  
1225  	/*
1226  	 * Referenced dentries are still in use. If they have active
1227  	 * counts, just remove them from the LRU. Otherwise give them
1228  	 * another pass through the LRU.
1229  	 */
1230  	if (dentry->d_lockref.count) {
1231  		d_lru_isolate(lru, dentry);
1232  		spin_unlock(&dentry->d_lock);
1233  		return LRU_REMOVED;
1234  	}
1235  
1236  	if (dentry->d_flags & DCACHE_REFERENCED) {
1237  		dentry->d_flags &= ~DCACHE_REFERENCED;
1238  		spin_unlock(&dentry->d_lock);
1239  
1240  		/*
1241  		 * The list move itself will be made by the common LRU code. At
1242  		 * this point, we've dropped the dentry->d_lock but keep the
1243  		 * lru lock. This is safe to do, since every list movement is
1244  		 * protected by the lru lock even if both locks are held.
1245  		 *
1246  		 * This is guaranteed by the fact that all LRU management
1247  		 * functions are intermediated by the LRU API calls like
1248  		 * list_lru_add and list_lru_del. List movement in this file
1249  		 * only ever occur through this functions or through callbacks
1250  		 * like this one, that are called from the LRU API.
1251  		 *
1252  		 * The only exceptions to this are functions like
1253  		 * shrink_dentry_list, and code that first checks for the
1254  		 * DCACHE_SHRINK_LIST flag.  Those are guaranteed to be
1255  		 * operating only with stack provided lists after they are
1256  		 * properly isolated from the main list.  It is thus, always a
1257  		 * local access.
1258  		 */
1259  		return LRU_ROTATE;
1260  	}
1261  
1262  	d_lru_shrink_move(lru, dentry, freeable);
1263  	spin_unlock(&dentry->d_lock);
1264  
1265  	return LRU_REMOVED;
1266  }
1267  
1268  /**
1269   * prune_dcache_sb - shrink the dcache
1270   * @sb: superblock
1271   * @sc: shrink control, passed to list_lru_shrink_walk()
1272   *
1273   * Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This
1274   * is done when we need more memory and called from the superblock shrinker
1275   * function.
1276   *
1277   * This function may fail to free any resources if all the dentries are in
1278   * use.
1279   */
prune_dcache_sb(struct super_block * sb,struct shrink_control * sc)1280  long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc)
1281  {
1282  	LIST_HEAD(dispose);
1283  	long freed;
1284  
1285  	freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc,
1286  				     dentry_lru_isolate, &dispose);
1287  	shrink_dentry_list(&dispose);
1288  	return freed;
1289  }
1290  
dentry_lru_isolate_shrink(struct list_head * item,struct list_lru_one * lru,spinlock_t * lru_lock,void * arg)1291  static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
1292  		struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
1293  {
1294  	struct list_head *freeable = arg;
1295  	struct dentry	*dentry = container_of(item, struct dentry, d_lru);
1296  
1297  	/*
1298  	 * we are inverting the lru lock/dentry->d_lock here,
1299  	 * so use a trylock. If we fail to get the lock, just skip
1300  	 * it
1301  	 */
1302  	if (!spin_trylock(&dentry->d_lock))
1303  		return LRU_SKIP;
1304  
1305  	d_lru_shrink_move(lru, dentry, freeable);
1306  	spin_unlock(&dentry->d_lock);
1307  
1308  	return LRU_REMOVED;
1309  }
1310  
1311  
1312  /**
1313   * shrink_dcache_sb - shrink dcache for a superblock
1314   * @sb: superblock
1315   *
1316   * Shrink the dcache for the specified super block. This is used to free
1317   * the dcache before unmounting a file system.
1318   */
shrink_dcache_sb(struct super_block * sb)1319  void shrink_dcache_sb(struct super_block *sb)
1320  {
1321  	do {
1322  		LIST_HEAD(dispose);
1323  
1324  		list_lru_walk(&sb->s_dentry_lru,
1325  			dentry_lru_isolate_shrink, &dispose, 1024);
1326  		shrink_dentry_list(&dispose);
1327  	} while (list_lru_count(&sb->s_dentry_lru) > 0);
1328  }
1329  EXPORT_SYMBOL(shrink_dcache_sb);
1330  
1331  /**
1332   * enum d_walk_ret - action to talke during tree walk
1333   * @D_WALK_CONTINUE:	contrinue walk
1334   * @D_WALK_QUIT:	quit walk
1335   * @D_WALK_NORETRY:	quit when retry is needed
1336   * @D_WALK_SKIP:	skip this dentry and its children
1337   */
1338  enum d_walk_ret {
1339  	D_WALK_CONTINUE,
1340  	D_WALK_QUIT,
1341  	D_WALK_NORETRY,
1342  	D_WALK_SKIP,
1343  };
1344  
1345  /**
1346   * d_walk - walk the dentry tree
1347   * @parent:	start of walk
1348   * @data:	data passed to @enter() and @finish()
1349   * @enter:	callback when first entering the dentry
1350   *
1351   * The @enter() callbacks are called with d_lock held.
1352   */
d_walk(struct dentry * parent,void * data,enum d_walk_ret (* enter)(void *,struct dentry *))1353  static void d_walk(struct dentry *parent, void *data,
1354  		   enum d_walk_ret (*enter)(void *, struct dentry *))
1355  {
1356  	struct dentry *this_parent;
1357  	struct list_head *next;
1358  	unsigned seq = 0;
1359  	enum d_walk_ret ret;
1360  	bool retry = true;
1361  
1362  again:
1363  	read_seqbegin_or_lock(&rename_lock, &seq);
1364  	this_parent = parent;
1365  	spin_lock(&this_parent->d_lock);
1366  
1367  	ret = enter(data, this_parent);
1368  	switch (ret) {
1369  	case D_WALK_CONTINUE:
1370  		break;
1371  	case D_WALK_QUIT:
1372  	case D_WALK_SKIP:
1373  		goto out_unlock;
1374  	case D_WALK_NORETRY:
1375  		retry = false;
1376  		break;
1377  	}
1378  repeat:
1379  	next = this_parent->d_subdirs.next;
1380  resume:
1381  	while (next != &this_parent->d_subdirs) {
1382  		struct list_head *tmp = next;
1383  		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
1384  		next = tmp->next;
1385  
1386  		if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
1387  			continue;
1388  
1389  		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1390  
1391  		ret = enter(data, dentry);
1392  		switch (ret) {
1393  		case D_WALK_CONTINUE:
1394  			break;
1395  		case D_WALK_QUIT:
1396  			spin_unlock(&dentry->d_lock);
1397  			goto out_unlock;
1398  		case D_WALK_NORETRY:
1399  			retry = false;
1400  			break;
1401  		case D_WALK_SKIP:
1402  			spin_unlock(&dentry->d_lock);
1403  			continue;
1404  		}
1405  
1406  		if (!list_empty(&dentry->d_subdirs)) {
1407  			spin_unlock(&this_parent->d_lock);
1408  			spin_release(&dentry->d_lock.dep_map, _RET_IP_);
1409  			this_parent = dentry;
1410  			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1411  			goto repeat;
1412  		}
1413  		spin_unlock(&dentry->d_lock);
1414  	}
1415  	/*
1416  	 * All done at this level ... ascend and resume the search.
1417  	 */
1418  	rcu_read_lock();
1419  ascend:
1420  	if (this_parent != parent) {
1421  		struct dentry *child = this_parent;
1422  		this_parent = child->d_parent;
1423  
1424  		spin_unlock(&child->d_lock);
1425  		spin_lock(&this_parent->d_lock);
1426  
1427  		/* might go back up the wrong parent if we have had a rename. */
1428  		if (need_seqretry(&rename_lock, seq))
1429  			goto rename_retry;
1430  		/* go into the first sibling still alive */
1431  		do {
1432  			next = child->d_child.next;
1433  			if (next == &this_parent->d_subdirs)
1434  				goto ascend;
1435  			child = list_entry(next, struct dentry, d_child);
1436  		} while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
1437  		rcu_read_unlock();
1438  		goto resume;
1439  	}
1440  	if (need_seqretry(&rename_lock, seq))
1441  		goto rename_retry;
1442  	rcu_read_unlock();
1443  
1444  out_unlock:
1445  	spin_unlock(&this_parent->d_lock);
1446  	done_seqretry(&rename_lock, seq);
1447  	return;
1448  
1449  rename_retry:
1450  	spin_unlock(&this_parent->d_lock);
1451  	rcu_read_unlock();
1452  	BUG_ON(seq & 1);
1453  	if (!retry)
1454  		return;
1455  	seq = 1;
1456  	goto again;
1457  }
1458  
1459  struct check_mount {
1460  	struct vfsmount *mnt;
1461  	unsigned int mounted;
1462  };
1463  
path_check_mount(void * data,struct dentry * dentry)1464  static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry)
1465  {
1466  	struct check_mount *info = data;
1467  	struct path path = { .mnt = info->mnt, .dentry = dentry };
1468  
1469  	if (likely(!d_mountpoint(dentry)))
1470  		return D_WALK_CONTINUE;
1471  	if (__path_is_mountpoint(&path)) {
1472  		info->mounted = 1;
1473  		return D_WALK_QUIT;
1474  	}
1475  	return D_WALK_CONTINUE;
1476  }
1477  
1478  /**
1479   * path_has_submounts - check for mounts over a dentry in the
1480   *                      current namespace.
1481   * @parent: path to check.
1482   *
1483   * Return true if the parent or its subdirectories contain
1484   * a mount point in the current namespace.
1485   */
path_has_submounts(const struct path * parent)1486  int path_has_submounts(const struct path *parent)
1487  {
1488  	struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
1489  
1490  	read_seqlock_excl(&mount_lock);
1491  	d_walk(parent->dentry, &data, path_check_mount);
1492  	read_sequnlock_excl(&mount_lock);
1493  
1494  	return data.mounted;
1495  }
1496  EXPORT_SYMBOL(path_has_submounts);
1497  
1498  /*
1499   * Called by mount code to set a mountpoint and check if the mountpoint is
1500   * reachable (e.g. NFS can unhash a directory dentry and then the complete
1501   * subtree can become unreachable).
1502   *
1503   * Only one of d_invalidate() and d_set_mounted() must succeed.  For
1504   * this reason take rename_lock and d_lock on dentry and ancestors.
1505   */
d_set_mounted(struct dentry * dentry)1506  int d_set_mounted(struct dentry *dentry)
1507  {
1508  	struct dentry *p;
1509  	int ret = -ENOENT;
1510  	write_seqlock(&rename_lock);
1511  	for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
1512  		/* Need exclusion wrt. d_invalidate() */
1513  		spin_lock(&p->d_lock);
1514  		if (unlikely(d_unhashed(p))) {
1515  			spin_unlock(&p->d_lock);
1516  			goto out;
1517  		}
1518  		spin_unlock(&p->d_lock);
1519  	}
1520  	spin_lock(&dentry->d_lock);
1521  	if (!d_unlinked(dentry)) {
1522  		ret = -EBUSY;
1523  		if (!d_mountpoint(dentry)) {
1524  			dentry->d_flags |= DCACHE_MOUNTED;
1525  			ret = 0;
1526  		}
1527  	}
1528   	spin_unlock(&dentry->d_lock);
1529  out:
1530  	write_sequnlock(&rename_lock);
1531  	return ret;
1532  }
1533  
1534  /*
1535   * Search the dentry child list of the specified parent,
1536   * and move any unused dentries to the end of the unused
1537   * list for prune_dcache(). We descend to the next level
1538   * whenever the d_subdirs list is non-empty and continue
1539   * searching.
1540   *
1541   * It returns zero iff there are no unused children,
1542   * otherwise  it returns the number of children moved to
1543   * the end of the unused list. This may not be the total
1544   * number of unused children, because select_parent can
1545   * drop the lock and return early due to latency
1546   * constraints.
1547   */
1548  
1549  struct select_data {
1550  	struct dentry *start;
1551  	union {
1552  		long found;
1553  		struct dentry *victim;
1554  	};
1555  	struct list_head dispose;
1556  };
1557  
select_collect(void * _data,struct dentry * dentry)1558  static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
1559  {
1560  	struct select_data *data = _data;
1561  	enum d_walk_ret ret = D_WALK_CONTINUE;
1562  
1563  	if (data->start == dentry)
1564  		goto out;
1565  
1566  	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1567  		data->found++;
1568  	} else {
1569  		if (dentry->d_flags & DCACHE_LRU_LIST)
1570  			d_lru_del(dentry);
1571  		if (!dentry->d_lockref.count) {
1572  			d_shrink_add(dentry, &data->dispose);
1573  			data->found++;
1574  		}
1575  	}
1576  	/*
1577  	 * We can return to the caller if we have found some (this
1578  	 * ensures forward progress). We'll be coming back to find
1579  	 * the rest.
1580  	 */
1581  	if (!list_empty(&data->dispose))
1582  		ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1583  out:
1584  	return ret;
1585  }
1586  
select_collect2(void * _data,struct dentry * dentry)1587  static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
1588  {
1589  	struct select_data *data = _data;
1590  	enum d_walk_ret ret = D_WALK_CONTINUE;
1591  
1592  	if (data->start == dentry)
1593  		goto out;
1594  
1595  	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1596  		if (!dentry->d_lockref.count) {
1597  			rcu_read_lock();
1598  			data->victim = dentry;
1599  			return D_WALK_QUIT;
1600  		}
1601  	} else {
1602  		if (dentry->d_flags & DCACHE_LRU_LIST)
1603  			d_lru_del(dentry);
1604  		if (!dentry->d_lockref.count)
1605  			d_shrink_add(dentry, &data->dispose);
1606  	}
1607  	/*
1608  	 * We can return to the caller if we have found some (this
1609  	 * ensures forward progress). We'll be coming back to find
1610  	 * the rest.
1611  	 */
1612  	if (!list_empty(&data->dispose))
1613  		ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1614  out:
1615  	return ret;
1616  }
1617  
1618  /**
1619   * shrink_dcache_parent - prune dcache
1620   * @parent: parent of entries to prune
1621   *
1622   * Prune the dcache to remove unused children of the parent dentry.
1623   */
shrink_dcache_parent(struct dentry * parent)1624  void shrink_dcache_parent(struct dentry *parent)
1625  {
1626  	for (;;) {
1627  		struct select_data data = {.start = parent};
1628  
1629  		INIT_LIST_HEAD(&data.dispose);
1630  		d_walk(parent, &data, select_collect);
1631  
1632  		if (!list_empty(&data.dispose)) {
1633  			shrink_dentry_list(&data.dispose);
1634  			continue;
1635  		}
1636  
1637  		cond_resched();
1638  		if (!data.found)
1639  			break;
1640  		data.victim = NULL;
1641  		d_walk(parent, &data, select_collect2);
1642  		if (data.victim) {
1643  			struct dentry *parent;
1644  			spin_lock(&data.victim->d_lock);
1645  			if (!shrink_lock_dentry(data.victim)) {
1646  				spin_unlock(&data.victim->d_lock);
1647  				rcu_read_unlock();
1648  			} else {
1649  				rcu_read_unlock();
1650  				parent = data.victim->d_parent;
1651  				if (parent != data.victim)
1652  					__dput_to_list(parent, &data.dispose);
1653  				__dentry_kill(data.victim);
1654  			}
1655  		}
1656  		if (!list_empty(&data.dispose))
1657  			shrink_dentry_list(&data.dispose);
1658  	}
1659  }
1660  EXPORT_SYMBOL(shrink_dcache_parent);
1661  
umount_check(void * _data,struct dentry * dentry)1662  static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
1663  {
1664  	/* it has busy descendents; complain about those instead */
1665  	if (!list_empty(&dentry->d_subdirs))
1666  		return D_WALK_CONTINUE;
1667  
1668  	/* root with refcount 1 is fine */
1669  	if (dentry == _data && dentry->d_lockref.count == 1)
1670  		return D_WALK_CONTINUE;
1671  
1672  	WARN(1, "BUG: Dentry %p{i=%lx,n=%pd} "
1673  			" still in use (%d) [unmount of %s %s]\n",
1674  		       dentry,
1675  		       dentry->d_inode ?
1676  		       dentry->d_inode->i_ino : 0UL,
1677  		       dentry,
1678  		       dentry->d_lockref.count,
1679  		       dentry->d_sb->s_type->name,
1680  		       dentry->d_sb->s_id);
1681  	return D_WALK_CONTINUE;
1682  }
1683  
do_one_tree(struct dentry * dentry)1684  static void do_one_tree(struct dentry *dentry)
1685  {
1686  	shrink_dcache_parent(dentry);
1687  	d_walk(dentry, dentry, umount_check);
1688  	d_drop(dentry);
1689  	dput(dentry);
1690  }
1691  
1692  /*
1693   * destroy the dentries attached to a superblock on unmounting
1694   */
shrink_dcache_for_umount(struct super_block * sb)1695  void shrink_dcache_for_umount(struct super_block *sb)
1696  {
1697  	struct dentry *dentry;
1698  
1699  	WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
1700  
1701  	dentry = sb->s_root;
1702  	sb->s_root = NULL;
1703  	do_one_tree(dentry);
1704  
1705  	while (!hlist_bl_empty(&sb->s_roots)) {
1706  		dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash));
1707  		do_one_tree(dentry);
1708  	}
1709  }
1710  
find_submount(void * _data,struct dentry * dentry)1711  static enum d_walk_ret find_submount(void *_data, struct dentry *dentry)
1712  {
1713  	struct dentry **victim = _data;
1714  	if (d_mountpoint(dentry)) {
1715  		__dget_dlock(dentry);
1716  		*victim = dentry;
1717  		return D_WALK_QUIT;
1718  	}
1719  	return D_WALK_CONTINUE;
1720  }
1721  
1722  /**
1723   * d_invalidate - detach submounts, prune dcache, and drop
1724   * @dentry: dentry to invalidate (aka detach, prune and drop)
1725   */
d_invalidate(struct dentry * dentry)1726  void d_invalidate(struct dentry *dentry)
1727  {
1728  	bool had_submounts = false;
1729  	spin_lock(&dentry->d_lock);
1730  	if (d_unhashed(dentry)) {
1731  		spin_unlock(&dentry->d_lock);
1732  		return;
1733  	}
1734  	__d_drop(dentry);
1735  	spin_unlock(&dentry->d_lock);
1736  
1737  	/* Negative dentries can be dropped without further checks */
1738  	if (!dentry->d_inode)
1739  		return;
1740  
1741  	shrink_dcache_parent(dentry);
1742  	for (;;) {
1743  		struct dentry *victim = NULL;
1744  		d_walk(dentry, &victim, find_submount);
1745  		if (!victim) {
1746  			if (had_submounts)
1747  				shrink_dcache_parent(dentry);
1748  			return;
1749  		}
1750  		had_submounts = true;
1751  		detach_mounts(victim);
1752  		dput(victim);
1753  	}
1754  }
1755  EXPORT_SYMBOL(d_invalidate);
1756  
1757  /**
1758   * __d_alloc	-	allocate a dcache entry
1759   * @sb: filesystem it will belong to
1760   * @name: qstr of the name
1761   *
1762   * Allocates a dentry. It returns %NULL if there is insufficient memory
1763   * available. On a success the dentry is returned. The name passed in is
1764   * copied and the copy passed in may be reused after this call.
1765   */
1766  
__d_alloc(struct super_block * sb,const struct qstr * name)1767  static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1768  {
1769  	struct dentry *dentry;
1770  	char *dname;
1771  	int err;
1772  
1773  	dentry = kmem_cache_alloc_lru(dentry_cache, &sb->s_dentry_lru,
1774  				      GFP_KERNEL);
1775  	if (!dentry)
1776  		return NULL;
1777  
1778  	/*
1779  	 * We guarantee that the inline name is always NUL-terminated.
1780  	 * This way the memcpy() done by the name switching in rename
1781  	 * will still always have a NUL at the end, even if we might
1782  	 * be overwriting an internal NUL character
1783  	 */
1784  	dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1785  	if (unlikely(!name)) {
1786  		name = &slash_name;
1787  		dname = dentry->d_iname;
1788  	} else if (name->len > DNAME_INLINE_LEN-1) {
1789  		size_t size = offsetof(struct external_name, name[1]);
1790  		struct external_name *p = kmalloc(size + name->len,
1791  						  GFP_KERNEL_ACCOUNT |
1792  						  __GFP_RECLAIMABLE);
1793  		if (!p) {
1794  			kmem_cache_free(dentry_cache, dentry);
1795  			return NULL;
1796  		}
1797  		atomic_set(&p->u.count, 1);
1798  		dname = p->name;
1799  	} else  {
1800  		dname = dentry->d_iname;
1801  	}
1802  
1803  	dentry->d_name.len = name->len;
1804  	dentry->d_name.hash = name->hash;
1805  	memcpy(dname, name->name, name->len);
1806  	dname[name->len] = 0;
1807  
1808  	/* Make sure we always see the terminating NUL character */
1809  	smp_store_release(&dentry->d_name.name, dname); /* ^^^ */
1810  
1811  	dentry->d_lockref.count = 1;
1812  	dentry->d_flags = 0;
1813  	spin_lock_init(&dentry->d_lock);
1814  	seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock);
1815  	dentry->d_inode = NULL;
1816  	dentry->d_parent = dentry;
1817  	dentry->d_sb = sb;
1818  	dentry->d_op = NULL;
1819  	dentry->d_fsdata = NULL;
1820  	INIT_HLIST_BL_NODE(&dentry->d_hash);
1821  	INIT_LIST_HEAD(&dentry->d_lru);
1822  	INIT_LIST_HEAD(&dentry->d_subdirs);
1823  	INIT_HLIST_NODE(&dentry->d_u.d_alias);
1824  	INIT_LIST_HEAD(&dentry->d_child);
1825  	d_set_d_op(dentry, dentry->d_sb->s_d_op);
1826  
1827  	if (dentry->d_op && dentry->d_op->d_init) {
1828  		err = dentry->d_op->d_init(dentry);
1829  		if (err) {
1830  			if (dname_external(dentry))
1831  				kfree(external_name(dentry));
1832  			kmem_cache_free(dentry_cache, dentry);
1833  			return NULL;
1834  		}
1835  	}
1836  
1837  	this_cpu_inc(nr_dentry);
1838  
1839  	return dentry;
1840  }
1841  
1842  /**
1843   * d_alloc	-	allocate a dcache entry
1844   * @parent: parent of entry to allocate
1845   * @name: qstr of the name
1846   *
1847   * Allocates a dentry. It returns %NULL if there is insufficient memory
1848   * available. On a success the dentry is returned. The name passed in is
1849   * copied and the copy passed in may be reused after this call.
1850   */
d_alloc(struct dentry * parent,const struct qstr * name)1851  struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1852  {
1853  	struct dentry *dentry = __d_alloc(parent->d_sb, name);
1854  	if (!dentry)
1855  		return NULL;
1856  	spin_lock(&parent->d_lock);
1857  	/*
1858  	 * don't need child lock because it is not subject
1859  	 * to concurrency here
1860  	 */
1861  	__dget_dlock(parent);
1862  	dentry->d_parent = parent;
1863  	list_add(&dentry->d_child, &parent->d_subdirs);
1864  	spin_unlock(&parent->d_lock);
1865  
1866  	return dentry;
1867  }
1868  EXPORT_SYMBOL(d_alloc);
1869  
d_alloc_anon(struct super_block * sb)1870  struct dentry *d_alloc_anon(struct super_block *sb)
1871  {
1872  	return __d_alloc(sb, NULL);
1873  }
1874  EXPORT_SYMBOL(d_alloc_anon);
1875  
d_alloc_cursor(struct dentry * parent)1876  struct dentry *d_alloc_cursor(struct dentry * parent)
1877  {
1878  	struct dentry *dentry = d_alloc_anon(parent->d_sb);
1879  	if (dentry) {
1880  		dentry->d_flags |= DCACHE_DENTRY_CURSOR;
1881  		dentry->d_parent = dget(parent);
1882  	}
1883  	return dentry;
1884  }
1885  
1886  /**
1887   * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
1888   * @sb: the superblock
1889   * @name: qstr of the name
1890   *
1891   * For a filesystem that just pins its dentries in memory and never
1892   * performs lookups at all, return an unhashed IS_ROOT dentry.
1893   * This is used for pipes, sockets et.al. - the stuff that should
1894   * never be anyone's children or parents.  Unlike all other
1895   * dentries, these will not have RCU delay between dropping the
1896   * last reference and freeing them.
1897   *
1898   * The only user is alloc_file_pseudo() and that's what should
1899   * be considered a public interface.  Don't use directly.
1900   */
d_alloc_pseudo(struct super_block * sb,const struct qstr * name)1901  struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1902  {
1903  	struct dentry *dentry = __d_alloc(sb, name);
1904  	if (likely(dentry))
1905  		dentry->d_flags |= DCACHE_NORCU;
1906  	return dentry;
1907  }
1908  
d_alloc_name(struct dentry * parent,const char * name)1909  struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1910  {
1911  	struct qstr q;
1912  
1913  	q.name = name;
1914  	q.hash_len = hashlen_string(parent, name);
1915  	return d_alloc(parent, &q);
1916  }
1917  EXPORT_SYMBOL(d_alloc_name);
1918  
d_set_d_op(struct dentry * dentry,const struct dentry_operations * op)1919  void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1920  {
1921  	WARN_ON_ONCE(dentry->d_op);
1922  	WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH	|
1923  				DCACHE_OP_COMPARE	|
1924  				DCACHE_OP_REVALIDATE	|
1925  				DCACHE_OP_WEAK_REVALIDATE	|
1926  				DCACHE_OP_DELETE	|
1927  				DCACHE_OP_REAL));
1928  	dentry->d_op = op;
1929  	if (!op)
1930  		return;
1931  	if (op->d_hash)
1932  		dentry->d_flags |= DCACHE_OP_HASH;
1933  	if (op->d_compare)
1934  		dentry->d_flags |= DCACHE_OP_COMPARE;
1935  	if (op->d_revalidate)
1936  		dentry->d_flags |= DCACHE_OP_REVALIDATE;
1937  	if (op->d_weak_revalidate)
1938  		dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1939  	if (op->d_delete)
1940  		dentry->d_flags |= DCACHE_OP_DELETE;
1941  	if (op->d_prune)
1942  		dentry->d_flags |= DCACHE_OP_PRUNE;
1943  	if (op->d_real)
1944  		dentry->d_flags |= DCACHE_OP_REAL;
1945  
1946  }
1947  EXPORT_SYMBOL(d_set_d_op);
1948  
1949  
1950  /*
1951   * d_set_fallthru - Mark a dentry as falling through to a lower layer
1952   * @dentry - The dentry to mark
1953   *
1954   * Mark a dentry as falling through to the lower layer (as set with
1955   * d_pin_lower()).  This flag may be recorded on the medium.
1956   */
d_set_fallthru(struct dentry * dentry)1957  void d_set_fallthru(struct dentry *dentry)
1958  {
1959  	spin_lock(&dentry->d_lock);
1960  	dentry->d_flags |= DCACHE_FALLTHRU;
1961  	spin_unlock(&dentry->d_lock);
1962  }
1963  EXPORT_SYMBOL(d_set_fallthru);
1964  
d_flags_for_inode(struct inode * inode)1965  static unsigned d_flags_for_inode(struct inode *inode)
1966  {
1967  	unsigned add_flags = DCACHE_REGULAR_TYPE;
1968  
1969  	if (!inode)
1970  		return DCACHE_MISS_TYPE;
1971  
1972  	if (S_ISDIR(inode->i_mode)) {
1973  		add_flags = DCACHE_DIRECTORY_TYPE;
1974  		if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
1975  			if (unlikely(!inode->i_op->lookup))
1976  				add_flags = DCACHE_AUTODIR_TYPE;
1977  			else
1978  				inode->i_opflags |= IOP_LOOKUP;
1979  		}
1980  		goto type_determined;
1981  	}
1982  
1983  	if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1984  		if (unlikely(inode->i_op->get_link)) {
1985  			add_flags = DCACHE_SYMLINK_TYPE;
1986  			goto type_determined;
1987  		}
1988  		inode->i_opflags |= IOP_NOFOLLOW;
1989  	}
1990  
1991  	if (unlikely(!S_ISREG(inode->i_mode)))
1992  		add_flags = DCACHE_SPECIAL_TYPE;
1993  
1994  type_determined:
1995  	if (unlikely(IS_AUTOMOUNT(inode)))
1996  		add_flags |= DCACHE_NEED_AUTOMOUNT;
1997  	return add_flags;
1998  }
1999  
__d_instantiate(struct dentry * dentry,struct inode * inode)2000  static void __d_instantiate(struct dentry *dentry, struct inode *inode)
2001  {
2002  	unsigned add_flags = d_flags_for_inode(inode);
2003  	WARN_ON(d_in_lookup(dentry));
2004  
2005  	spin_lock(&dentry->d_lock);
2006  	/*
2007  	 * The negative counter only tracks dentries on the LRU. Don't dec if
2008  	 * d_lru is on another list.
2009  	 */
2010  	if ((dentry->d_flags &
2011  	     (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
2012  		this_cpu_dec(nr_dentry_negative);
2013  	hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
2014  	raw_write_seqcount_begin(&dentry->d_seq);
2015  	__d_set_inode_and_type(dentry, inode, add_flags);
2016  	raw_write_seqcount_end(&dentry->d_seq);
2017  	fsnotify_update_flags(dentry);
2018  	spin_unlock(&dentry->d_lock);
2019  }
2020  
2021  /**
2022   * d_instantiate - fill in inode information for a dentry
2023   * @entry: dentry to complete
2024   * @inode: inode to attach to this dentry
2025   *
2026   * Fill in inode information in the entry.
2027   *
2028   * This turns negative dentries into productive full members
2029   * of society.
2030   *
2031   * NOTE! This assumes that the inode count has been incremented
2032   * (or otherwise set) by the caller to indicate that it is now
2033   * in use by the dcache.
2034   */
2035  
d_instantiate(struct dentry * entry,struct inode * inode)2036  void d_instantiate(struct dentry *entry, struct inode * inode)
2037  {
2038  	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
2039  	if (inode) {
2040  		security_d_instantiate(entry, inode);
2041  		spin_lock(&inode->i_lock);
2042  		__d_instantiate(entry, inode);
2043  		spin_unlock(&inode->i_lock);
2044  	}
2045  }
2046  EXPORT_SYMBOL(d_instantiate);
2047  
2048  /*
2049   * This should be equivalent to d_instantiate() + unlock_new_inode(),
2050   * with lockdep-related part of unlock_new_inode() done before
2051   * anything else.  Use that instead of open-coding d_instantiate()/
2052   * unlock_new_inode() combinations.
2053   */
d_instantiate_new(struct dentry * entry,struct inode * inode)2054  void d_instantiate_new(struct dentry *entry, struct inode *inode)
2055  {
2056  	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
2057  	BUG_ON(!inode);
2058  	lockdep_annotate_inode_mutex_key(inode);
2059  	security_d_instantiate(entry, inode);
2060  	spin_lock(&inode->i_lock);
2061  	__d_instantiate(entry, inode);
2062  	WARN_ON(!(inode->i_state & I_NEW));
2063  	inode->i_state &= ~I_NEW & ~I_CREATING;
2064  	smp_mb();
2065  	wake_up_bit(&inode->i_state, __I_NEW);
2066  	spin_unlock(&inode->i_lock);
2067  }
2068  EXPORT_SYMBOL(d_instantiate_new);
2069  
d_make_root(struct inode * root_inode)2070  struct dentry *d_make_root(struct inode *root_inode)
2071  {
2072  	struct dentry *res = NULL;
2073  
2074  	if (root_inode) {
2075  		res = d_alloc_anon(root_inode->i_sb);
2076  		if (res)
2077  			d_instantiate(res, root_inode);
2078  		else
2079  			iput(root_inode);
2080  	}
2081  	return res;
2082  }
2083  EXPORT_SYMBOL(d_make_root);
2084  
__d_instantiate_anon(struct dentry * dentry,struct inode * inode,bool disconnected)2085  static struct dentry *__d_instantiate_anon(struct dentry *dentry,
2086  					   struct inode *inode,
2087  					   bool disconnected)
2088  {
2089  	struct dentry *res;
2090  	unsigned add_flags;
2091  
2092  	security_d_instantiate(dentry, inode);
2093  	spin_lock(&inode->i_lock);
2094  	res = __d_find_any_alias(inode);
2095  	if (res) {
2096  		spin_unlock(&inode->i_lock);
2097  		dput(dentry);
2098  		goto out_iput;
2099  	}
2100  
2101  	/* attach a disconnected dentry */
2102  	add_flags = d_flags_for_inode(inode);
2103  
2104  	if (disconnected)
2105  		add_flags |= DCACHE_DISCONNECTED;
2106  
2107  	spin_lock(&dentry->d_lock);
2108  	__d_set_inode_and_type(dentry, inode, add_flags);
2109  	hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
2110  	if (!disconnected) {
2111  		hlist_bl_lock(&dentry->d_sb->s_roots);
2112  		hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_roots);
2113  		hlist_bl_unlock(&dentry->d_sb->s_roots);
2114  	}
2115  	spin_unlock(&dentry->d_lock);
2116  	spin_unlock(&inode->i_lock);
2117  
2118  	return dentry;
2119  
2120   out_iput:
2121  	iput(inode);
2122  	return res;
2123  }
2124  
d_instantiate_anon(struct dentry * dentry,struct inode * inode)2125  struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode)
2126  {
2127  	return __d_instantiate_anon(dentry, inode, true);
2128  }
2129  EXPORT_SYMBOL(d_instantiate_anon);
2130  
__d_obtain_alias(struct inode * inode,bool disconnected)2131  static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
2132  {
2133  	struct dentry *tmp;
2134  	struct dentry *res;
2135  
2136  	if (!inode)
2137  		return ERR_PTR(-ESTALE);
2138  	if (IS_ERR(inode))
2139  		return ERR_CAST(inode);
2140  
2141  	res = d_find_any_alias(inode);
2142  	if (res)
2143  		goto out_iput;
2144  
2145  	tmp = d_alloc_anon(inode->i_sb);
2146  	if (!tmp) {
2147  		res = ERR_PTR(-ENOMEM);
2148  		goto out_iput;
2149  	}
2150  
2151  	return __d_instantiate_anon(tmp, inode, disconnected);
2152  
2153  out_iput:
2154  	iput(inode);
2155  	return res;
2156  }
2157  
2158  /**
2159   * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
2160   * @inode: inode to allocate the dentry for
2161   *
2162   * Obtain a dentry for an inode resulting from NFS filehandle conversion or
2163   * similar open by handle operations.  The returned dentry may be anonymous,
2164   * or may have a full name (if the inode was already in the cache).
2165   *
2166   * When called on a directory inode, we must ensure that the inode only ever
2167   * has one dentry.  If a dentry is found, that is returned instead of
2168   * allocating a new one.
2169   *
2170   * On successful return, the reference to the inode has been transferred
2171   * to the dentry.  In case of an error the reference on the inode is released.
2172   * To make it easier to use in export operations a %NULL or IS_ERR inode may
2173   * be passed in and the error will be propagated to the return value,
2174   * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
2175   */
d_obtain_alias(struct inode * inode)2176  struct dentry *d_obtain_alias(struct inode *inode)
2177  {
2178  	return __d_obtain_alias(inode, true);
2179  }
2180  EXPORT_SYMBOL(d_obtain_alias);
2181  
2182  /**
2183   * d_obtain_root - find or allocate a dentry for a given inode
2184   * @inode: inode to allocate the dentry for
2185   *
2186   * Obtain an IS_ROOT dentry for the root of a filesystem.
2187   *
2188   * We must ensure that directory inodes only ever have one dentry.  If a
2189   * dentry is found, that is returned instead of allocating a new one.
2190   *
2191   * On successful return, the reference to the inode has been transferred
2192   * to the dentry.  In case of an error the reference on the inode is
2193   * released.  A %NULL or IS_ERR inode may be passed in and will be the
2194   * error will be propagate to the return value, with a %NULL @inode
2195   * replaced by ERR_PTR(-ESTALE).
2196   */
d_obtain_root(struct inode * inode)2197  struct dentry *d_obtain_root(struct inode *inode)
2198  {
2199  	return __d_obtain_alias(inode, false);
2200  }
2201  EXPORT_SYMBOL(d_obtain_root);
2202  
2203  /**
2204   * d_add_ci - lookup or allocate new dentry with case-exact name
2205   * @inode:  the inode case-insensitive lookup has found
2206   * @dentry: the negative dentry that was passed to the parent's lookup func
2207   * @name:   the case-exact name to be associated with the returned dentry
2208   *
2209   * This is to avoid filling the dcache with case-insensitive names to the
2210   * same inode, only the actual correct case is stored in the dcache for
2211   * case-insensitive filesystems.
2212   *
2213   * For a case-insensitive lookup match and if the case-exact dentry
2214   * already exists in the dcache, use it and return it.
2215   *
2216   * If no entry exists with the exact case name, allocate new dentry with
2217   * the exact case, and return the spliced entry.
2218   */
d_add_ci(struct dentry * dentry,struct inode * inode,struct qstr * name)2219  struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
2220  			struct qstr *name)
2221  {
2222  	struct dentry *found, *res;
2223  
2224  	/*
2225  	 * First check if a dentry matching the name already exists,
2226  	 * if not go ahead and create it now.
2227  	 */
2228  	found = d_hash_and_lookup(dentry->d_parent, name);
2229  	if (found) {
2230  		iput(inode);
2231  		return found;
2232  	}
2233  	if (d_in_lookup(dentry)) {
2234  		found = d_alloc_parallel(dentry->d_parent, name,
2235  					dentry->d_wait);
2236  		if (IS_ERR(found) || !d_in_lookup(found)) {
2237  			iput(inode);
2238  			return found;
2239  		}
2240  	} else {
2241  		found = d_alloc(dentry->d_parent, name);
2242  		if (!found) {
2243  			iput(inode);
2244  			return ERR_PTR(-ENOMEM);
2245  		}
2246  	}
2247  	res = d_splice_alias(inode, found);
2248  	if (res) {
2249  		d_lookup_done(found);
2250  		dput(found);
2251  		return res;
2252  	}
2253  	return found;
2254  }
2255  EXPORT_SYMBOL(d_add_ci);
2256  
2257  /**
2258   * d_same_name - compare dentry name with case-exact name
2259   * @parent: parent dentry
2260   * @dentry: the negative dentry that was passed to the parent's lookup func
2261   * @name:   the case-exact name to be associated with the returned dentry
2262   *
2263   * Return: true if names are same, or false
2264   */
d_same_name(const struct dentry * dentry,const struct dentry * parent,const struct qstr * name)2265  bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
2266  		 const struct qstr *name)
2267  {
2268  	if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) {
2269  		if (dentry->d_name.len != name->len)
2270  			return false;
2271  		return dentry_cmp(dentry, name->name, name->len) == 0;
2272  	}
2273  	return parent->d_op->d_compare(dentry,
2274  				       dentry->d_name.len, dentry->d_name.name,
2275  				       name) == 0;
2276  }
2277  EXPORT_SYMBOL_GPL(d_same_name);
2278  
2279  /*
2280   * This is __d_lookup_rcu() when the parent dentry has
2281   * DCACHE_OP_COMPARE, which makes things much nastier.
2282   */
__d_lookup_rcu_op_compare(const struct dentry * parent,const struct qstr * name,unsigned * seqp)2283  static noinline struct dentry *__d_lookup_rcu_op_compare(
2284  	const struct dentry *parent,
2285  	const struct qstr *name,
2286  	unsigned *seqp)
2287  {
2288  	u64 hashlen = name->hash_len;
2289  	struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
2290  	struct hlist_bl_node *node;
2291  	struct dentry *dentry;
2292  
2293  	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2294  		int tlen;
2295  		const char *tname;
2296  		unsigned seq;
2297  
2298  seqretry:
2299  		seq = raw_seqcount_begin(&dentry->d_seq);
2300  		if (dentry->d_parent != parent)
2301  			continue;
2302  		if (d_unhashed(dentry))
2303  			continue;
2304  		if (dentry->d_name.hash != hashlen_hash(hashlen))
2305  			continue;
2306  		tlen = dentry->d_name.len;
2307  		tname = dentry->d_name.name;
2308  		/* we want a consistent (name,len) pair */
2309  		if (read_seqcount_retry(&dentry->d_seq, seq)) {
2310  			cpu_relax();
2311  			goto seqretry;
2312  		}
2313  		if (parent->d_op->d_compare(dentry, tlen, tname, name) != 0)
2314  			continue;
2315  		*seqp = seq;
2316  		return dentry;
2317  	}
2318  	return NULL;
2319  }
2320  
2321  /**
2322   * __d_lookup_rcu - search for a dentry (racy, store-free)
2323   * @parent: parent dentry
2324   * @name: qstr of name we wish to find
2325   * @seqp: returns d_seq value at the point where the dentry was found
2326   * Returns: dentry, or NULL
2327   *
2328   * __d_lookup_rcu is the dcache lookup function for rcu-walk name
2329   * resolution (store-free path walking) design described in
2330   * Documentation/filesystems/path-lookup.txt.
2331   *
2332   * This is not to be used outside core vfs.
2333   *
2334   * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
2335   * held, and rcu_read_lock held. The returned dentry must not be stored into
2336   * without taking d_lock and checking d_seq sequence count against @seq
2337   * returned here.
2338   *
2339   * A refcount may be taken on the found dentry with the d_rcu_to_refcount
2340   * function.
2341   *
2342   * Alternatively, __d_lookup_rcu may be called again to look up the child of
2343   * the returned dentry, so long as its parent's seqlock is checked after the
2344   * child is looked up. Thus, an interlocking stepping of sequence lock checks
2345   * is formed, giving integrity down the path walk.
2346   *
2347   * NOTE! The caller *has* to check the resulting dentry against the sequence
2348   * number we've returned before using any of the resulting dentry state!
2349   */
__d_lookup_rcu(const struct dentry * parent,const struct qstr * name,unsigned * seqp)2350  struct dentry *__d_lookup_rcu(const struct dentry *parent,
2351  				const struct qstr *name,
2352  				unsigned *seqp)
2353  {
2354  	u64 hashlen = name->hash_len;
2355  	const unsigned char *str = name->name;
2356  	struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
2357  	struct hlist_bl_node *node;
2358  	struct dentry *dentry;
2359  
2360  	/*
2361  	 * Note: There is significant duplication with __d_lookup_rcu which is
2362  	 * required to prevent single threaded performance regressions
2363  	 * especially on architectures where smp_rmb (in seqcounts) are costly.
2364  	 * Keep the two functions in sync.
2365  	 */
2366  
2367  	if (unlikely(parent->d_flags & DCACHE_OP_COMPARE))
2368  		return __d_lookup_rcu_op_compare(parent, name, seqp);
2369  
2370  	/*
2371  	 * The hash list is protected using RCU.
2372  	 *
2373  	 * Carefully use d_seq when comparing a candidate dentry, to avoid
2374  	 * races with d_move().
2375  	 *
2376  	 * It is possible that concurrent renames can mess up our list
2377  	 * walk here and result in missing our dentry, resulting in the
2378  	 * false-negative result. d_lookup() protects against concurrent
2379  	 * renames using rename_lock seqlock.
2380  	 *
2381  	 * See Documentation/filesystems/path-lookup.txt for more details.
2382  	 */
2383  	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2384  		unsigned seq;
2385  
2386  		/*
2387  		 * The dentry sequence count protects us from concurrent
2388  		 * renames, and thus protects parent and name fields.
2389  		 *
2390  		 * The caller must perform a seqcount check in order
2391  		 * to do anything useful with the returned dentry.
2392  		 *
2393  		 * NOTE! We do a "raw" seqcount_begin here. That means that
2394  		 * we don't wait for the sequence count to stabilize if it
2395  		 * is in the middle of a sequence change. If we do the slow
2396  		 * dentry compare, we will do seqretries until it is stable,
2397  		 * and if we end up with a successful lookup, we actually
2398  		 * want to exit RCU lookup anyway.
2399  		 *
2400  		 * Note that raw_seqcount_begin still *does* smp_rmb(), so
2401  		 * we are still guaranteed NUL-termination of ->d_name.name.
2402  		 */
2403  		seq = raw_seqcount_begin(&dentry->d_seq);
2404  		if (dentry->d_parent != parent)
2405  			continue;
2406  		if (d_unhashed(dentry))
2407  			continue;
2408  		if (dentry->d_name.hash_len != hashlen)
2409  			continue;
2410  		if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0)
2411  			continue;
2412  		*seqp = seq;
2413  		return dentry;
2414  	}
2415  	return NULL;
2416  }
2417  
2418  /**
2419   * d_lookup - search for a dentry
2420   * @parent: parent dentry
2421   * @name: qstr of name we wish to find
2422   * Returns: dentry, or NULL
2423   *
2424   * d_lookup searches the children of the parent dentry for the name in
2425   * question. If the dentry is found its reference count is incremented and the
2426   * dentry is returned. The caller must use dput to free the entry when it has
2427   * finished using it. %NULL is returned if the dentry does not exist.
2428   */
d_lookup(const struct dentry * parent,const struct qstr * name)2429  struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
2430  {
2431  	struct dentry *dentry;
2432  	unsigned seq;
2433  
2434  	do {
2435  		seq = read_seqbegin(&rename_lock);
2436  		dentry = __d_lookup(parent, name);
2437  		if (dentry)
2438  			break;
2439  	} while (read_seqretry(&rename_lock, seq));
2440  	return dentry;
2441  }
2442  EXPORT_SYMBOL(d_lookup);
2443  
2444  /**
2445   * __d_lookup - search for a dentry (racy)
2446   * @parent: parent dentry
2447   * @name: qstr of name we wish to find
2448   * Returns: dentry, or NULL
2449   *
2450   * __d_lookup is like d_lookup, however it may (rarely) return a
2451   * false-negative result due to unrelated rename activity.
2452   *
2453   * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
2454   * however it must be used carefully, eg. with a following d_lookup in
2455   * the case of failure.
2456   *
2457   * __d_lookup callers must be commented.
2458   */
__d_lookup(const struct dentry * parent,const struct qstr * name)2459  struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
2460  {
2461  	unsigned int hash = name->hash;
2462  	struct hlist_bl_head *b = d_hash(hash);
2463  	struct hlist_bl_node *node;
2464  	struct dentry *found = NULL;
2465  	struct dentry *dentry;
2466  
2467  	/*
2468  	 * Note: There is significant duplication with __d_lookup_rcu which is
2469  	 * required to prevent single threaded performance regressions
2470  	 * especially on architectures where smp_rmb (in seqcounts) are costly.
2471  	 * Keep the two functions in sync.
2472  	 */
2473  
2474  	/*
2475  	 * The hash list is protected using RCU.
2476  	 *
2477  	 * Take d_lock when comparing a candidate dentry, to avoid races
2478  	 * with d_move().
2479  	 *
2480  	 * It is possible that concurrent renames can mess up our list
2481  	 * walk here and result in missing our dentry, resulting in the
2482  	 * false-negative result. d_lookup() protects against concurrent
2483  	 * renames using rename_lock seqlock.
2484  	 *
2485  	 * See Documentation/filesystems/path-lookup.txt for more details.
2486  	 */
2487  	rcu_read_lock();
2488  
2489  	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2490  
2491  		if (dentry->d_name.hash != hash)
2492  			continue;
2493  
2494  		spin_lock(&dentry->d_lock);
2495  		if (dentry->d_parent != parent)
2496  			goto next;
2497  		if (d_unhashed(dentry))
2498  			goto next;
2499  
2500  		if (!d_same_name(dentry, parent, name))
2501  			goto next;
2502  
2503  		dentry->d_lockref.count++;
2504  		found = dentry;
2505  		spin_unlock(&dentry->d_lock);
2506  		break;
2507  next:
2508  		spin_unlock(&dentry->d_lock);
2509   	}
2510   	rcu_read_unlock();
2511  
2512   	return found;
2513  }
2514  
2515  /**
2516   * d_hash_and_lookup - hash the qstr then search for a dentry
2517   * @dir: Directory to search in
2518   * @name: qstr of name we wish to find
2519   *
2520   * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
2521   */
d_hash_and_lookup(struct dentry * dir,struct qstr * name)2522  struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
2523  {
2524  	/*
2525  	 * Check for a fs-specific hash function. Note that we must
2526  	 * calculate the standard hash first, as the d_op->d_hash()
2527  	 * routine may choose to leave the hash value unchanged.
2528  	 */
2529  	name->hash = full_name_hash(dir, name->name, name->len);
2530  	if (dir->d_flags & DCACHE_OP_HASH) {
2531  		int err = dir->d_op->d_hash(dir, name);
2532  		if (unlikely(err < 0))
2533  			return ERR_PTR(err);
2534  	}
2535  	return d_lookup(dir, name);
2536  }
2537  EXPORT_SYMBOL(d_hash_and_lookup);
2538  
2539  /*
2540   * When a file is deleted, we have two options:
2541   * - turn this dentry into a negative dentry
2542   * - unhash this dentry and free it.
2543   *
2544   * Usually, we want to just turn this into
2545   * a negative dentry, but if anybody else is
2546   * currently using the dentry or the inode
2547   * we can't do that and we fall back on removing
2548   * it from the hash queues and waiting for
2549   * it to be deleted later when it has no users
2550   */
2551  
2552  /**
2553   * d_delete - delete a dentry
2554   * @dentry: The dentry to delete
2555   *
2556   * Turn the dentry into a negative dentry if possible, otherwise
2557   * remove it from the hash queues so it can be deleted later
2558   */
2559  
d_delete(struct dentry * dentry)2560  void d_delete(struct dentry * dentry)
2561  {
2562  	struct inode *inode = dentry->d_inode;
2563  
2564  	spin_lock(&inode->i_lock);
2565  	spin_lock(&dentry->d_lock);
2566  	/*
2567  	 * Are we the only user?
2568  	 */
2569  	if (dentry->d_lockref.count == 1) {
2570  		dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2571  		dentry_unlink_inode(dentry);
2572  	} else {
2573  		__d_drop(dentry);
2574  		spin_unlock(&dentry->d_lock);
2575  		spin_unlock(&inode->i_lock);
2576  	}
2577  }
2578  EXPORT_SYMBOL(d_delete);
2579  
__d_rehash(struct dentry * entry)2580  static void __d_rehash(struct dentry *entry)
2581  {
2582  	struct hlist_bl_head *b = d_hash(entry->d_name.hash);
2583  
2584  	hlist_bl_lock(b);
2585  	hlist_bl_add_head_rcu(&entry->d_hash, b);
2586  	hlist_bl_unlock(b);
2587  }
2588  
2589  /**
2590   * d_rehash	- add an entry back to the hash
2591   * @entry: dentry to add to the hash
2592   *
2593   * Adds a dentry to the hash according to its name.
2594   */
2595  
d_rehash(struct dentry * entry)2596  void d_rehash(struct dentry * entry)
2597  {
2598  	spin_lock(&entry->d_lock);
2599  	__d_rehash(entry);
2600  	spin_unlock(&entry->d_lock);
2601  }
2602  EXPORT_SYMBOL(d_rehash);
2603  
start_dir_add(struct inode * dir)2604  static inline unsigned start_dir_add(struct inode *dir)
2605  {
2606  	preempt_disable_nested();
2607  	for (;;) {
2608  		unsigned n = dir->i_dir_seq;
2609  		if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
2610  			return n;
2611  		cpu_relax();
2612  	}
2613  }
2614  
end_dir_add(struct inode * dir,unsigned int n,wait_queue_head_t * d_wait)2615  static inline void end_dir_add(struct inode *dir, unsigned int n,
2616  			       wait_queue_head_t *d_wait)
2617  {
2618  	smp_store_release(&dir->i_dir_seq, n + 2);
2619  	preempt_enable_nested();
2620  	wake_up_all(d_wait);
2621  }
2622  
d_wait_lookup(struct dentry * dentry)2623  static void d_wait_lookup(struct dentry *dentry)
2624  {
2625  	if (d_in_lookup(dentry)) {
2626  		DECLARE_WAITQUEUE(wait, current);
2627  		add_wait_queue(dentry->d_wait, &wait);
2628  		do {
2629  			set_current_state(TASK_UNINTERRUPTIBLE);
2630  			spin_unlock(&dentry->d_lock);
2631  			schedule();
2632  			spin_lock(&dentry->d_lock);
2633  		} while (d_in_lookup(dentry));
2634  	}
2635  }
2636  
d_alloc_parallel(struct dentry * parent,const struct qstr * name,wait_queue_head_t * wq)2637  struct dentry *d_alloc_parallel(struct dentry *parent,
2638  				const struct qstr *name,
2639  				wait_queue_head_t *wq)
2640  {
2641  	unsigned int hash = name->hash;
2642  	struct hlist_bl_head *b = in_lookup_hash(parent, hash);
2643  	struct hlist_bl_node *node;
2644  	struct dentry *new = d_alloc(parent, name);
2645  	struct dentry *dentry;
2646  	unsigned seq, r_seq, d_seq;
2647  
2648  	if (unlikely(!new))
2649  		return ERR_PTR(-ENOMEM);
2650  
2651  retry:
2652  	rcu_read_lock();
2653  	seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
2654  	r_seq = read_seqbegin(&rename_lock);
2655  	dentry = __d_lookup_rcu(parent, name, &d_seq);
2656  	if (unlikely(dentry)) {
2657  		if (!lockref_get_not_dead(&dentry->d_lockref)) {
2658  			rcu_read_unlock();
2659  			goto retry;
2660  		}
2661  		if (read_seqcount_retry(&dentry->d_seq, d_seq)) {
2662  			rcu_read_unlock();
2663  			dput(dentry);
2664  			goto retry;
2665  		}
2666  		rcu_read_unlock();
2667  		dput(new);
2668  		return dentry;
2669  	}
2670  	if (unlikely(read_seqretry(&rename_lock, r_seq))) {
2671  		rcu_read_unlock();
2672  		goto retry;
2673  	}
2674  
2675  	if (unlikely(seq & 1)) {
2676  		rcu_read_unlock();
2677  		goto retry;
2678  	}
2679  
2680  	hlist_bl_lock(b);
2681  	if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
2682  		hlist_bl_unlock(b);
2683  		rcu_read_unlock();
2684  		goto retry;
2685  	}
2686  	/*
2687  	 * No changes for the parent since the beginning of d_lookup().
2688  	 * Since all removals from the chain happen with hlist_bl_lock(),
2689  	 * any potential in-lookup matches are going to stay here until
2690  	 * we unlock the chain.  All fields are stable in everything
2691  	 * we encounter.
2692  	 */
2693  	hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {
2694  		if (dentry->d_name.hash != hash)
2695  			continue;
2696  		if (dentry->d_parent != parent)
2697  			continue;
2698  		if (!d_same_name(dentry, parent, name))
2699  			continue;
2700  		hlist_bl_unlock(b);
2701  		/* now we can try to grab a reference */
2702  		if (!lockref_get_not_dead(&dentry->d_lockref)) {
2703  			rcu_read_unlock();
2704  			goto retry;
2705  		}
2706  
2707  		rcu_read_unlock();
2708  		/*
2709  		 * somebody is likely to be still doing lookup for it;
2710  		 * wait for them to finish
2711  		 */
2712  		spin_lock(&dentry->d_lock);
2713  		d_wait_lookup(dentry);
2714  		/*
2715  		 * it's not in-lookup anymore; in principle we should repeat
2716  		 * everything from dcache lookup, but it's likely to be what
2717  		 * d_lookup() would've found anyway.  If it is, just return it;
2718  		 * otherwise we really have to repeat the whole thing.
2719  		 */
2720  		if (unlikely(dentry->d_name.hash != hash))
2721  			goto mismatch;
2722  		if (unlikely(dentry->d_parent != parent))
2723  			goto mismatch;
2724  		if (unlikely(d_unhashed(dentry)))
2725  			goto mismatch;
2726  		if (unlikely(!d_same_name(dentry, parent, name)))
2727  			goto mismatch;
2728  		/* OK, it *is* a hashed match; return it */
2729  		spin_unlock(&dentry->d_lock);
2730  		dput(new);
2731  		return dentry;
2732  	}
2733  	rcu_read_unlock();
2734  	/* we can't take ->d_lock here; it's OK, though. */
2735  	new->d_flags |= DCACHE_PAR_LOOKUP;
2736  	new->d_wait = wq;
2737  	hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b);
2738  	hlist_bl_unlock(b);
2739  	return new;
2740  mismatch:
2741  	spin_unlock(&dentry->d_lock);
2742  	dput(dentry);
2743  	goto retry;
2744  }
2745  EXPORT_SYMBOL(d_alloc_parallel);
2746  
2747  /*
2748   * - Unhash the dentry
2749   * - Retrieve and clear the waitqueue head in dentry
2750   * - Return the waitqueue head
2751   */
__d_lookup_unhash(struct dentry * dentry)2752  static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
2753  {
2754  	wait_queue_head_t *d_wait;
2755  	struct hlist_bl_head *b;
2756  
2757  	lockdep_assert_held(&dentry->d_lock);
2758  
2759  	b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash);
2760  	hlist_bl_lock(b);
2761  	dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
2762  	__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
2763  	d_wait = dentry->d_wait;
2764  	dentry->d_wait = NULL;
2765  	hlist_bl_unlock(b);
2766  	INIT_HLIST_NODE(&dentry->d_u.d_alias);
2767  	INIT_LIST_HEAD(&dentry->d_lru);
2768  	return d_wait;
2769  }
2770  
__d_lookup_unhash_wake(struct dentry * dentry)2771  void __d_lookup_unhash_wake(struct dentry *dentry)
2772  {
2773  	spin_lock(&dentry->d_lock);
2774  	wake_up_all(__d_lookup_unhash(dentry));
2775  	spin_unlock(&dentry->d_lock);
2776  }
2777  EXPORT_SYMBOL(__d_lookup_unhash_wake);
2778  
2779  /* inode->i_lock held if inode is non-NULL */
2780  
__d_add(struct dentry * dentry,struct inode * inode)2781  static inline void __d_add(struct dentry *dentry, struct inode *inode)
2782  {
2783  	wait_queue_head_t *d_wait;
2784  	struct inode *dir = NULL;
2785  	unsigned n;
2786  	spin_lock(&dentry->d_lock);
2787  	if (unlikely(d_in_lookup(dentry))) {
2788  		dir = dentry->d_parent->d_inode;
2789  		n = start_dir_add(dir);
2790  		d_wait = __d_lookup_unhash(dentry);
2791  	}
2792  	if (inode) {
2793  		unsigned add_flags = d_flags_for_inode(inode);
2794  		hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
2795  		raw_write_seqcount_begin(&dentry->d_seq);
2796  		__d_set_inode_and_type(dentry, inode, add_flags);
2797  		raw_write_seqcount_end(&dentry->d_seq);
2798  		fsnotify_update_flags(dentry);
2799  	}
2800  	__d_rehash(dentry);
2801  	if (dir)
2802  		end_dir_add(dir, n, d_wait);
2803  	spin_unlock(&dentry->d_lock);
2804  	if (inode)
2805  		spin_unlock(&inode->i_lock);
2806  }
2807  
2808  /**
2809   * d_add - add dentry to hash queues
2810   * @entry: dentry to add
2811   * @inode: The inode to attach to this dentry
2812   *
2813   * This adds the entry to the hash queues and initializes @inode.
2814   * The entry was actually filled in earlier during d_alloc().
2815   */
2816  
d_add(struct dentry * entry,struct inode * inode)2817  void d_add(struct dentry *entry, struct inode *inode)
2818  {
2819  	if (inode) {
2820  		security_d_instantiate(entry, inode);
2821  		spin_lock(&inode->i_lock);
2822  	}
2823  	__d_add(entry, inode);
2824  }
2825  EXPORT_SYMBOL(d_add);
2826  
2827  /**
2828   * d_exact_alias - find and hash an exact unhashed alias
2829   * @entry: dentry to add
2830   * @inode: The inode to go with this dentry
2831   *
2832   * If an unhashed dentry with the same name/parent and desired
2833   * inode already exists, hash and return it.  Otherwise, return
2834   * NULL.
2835   *
2836   * Parent directory should be locked.
2837   */
d_exact_alias(struct dentry * entry,struct inode * inode)2838  struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
2839  {
2840  	struct dentry *alias;
2841  	unsigned int hash = entry->d_name.hash;
2842  
2843  	spin_lock(&inode->i_lock);
2844  	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
2845  		/*
2846  		 * Don't need alias->d_lock here, because aliases with
2847  		 * d_parent == entry->d_parent are not subject to name or
2848  		 * parent changes, because the parent inode i_mutex is held.
2849  		 */
2850  		if (alias->d_name.hash != hash)
2851  			continue;
2852  		if (alias->d_parent != entry->d_parent)
2853  			continue;
2854  		if (!d_same_name(alias, entry->d_parent, &entry->d_name))
2855  			continue;
2856  		spin_lock(&alias->d_lock);
2857  		if (!d_unhashed(alias)) {
2858  			spin_unlock(&alias->d_lock);
2859  			alias = NULL;
2860  		} else {
2861  			__dget_dlock(alias);
2862  			__d_rehash(alias);
2863  			spin_unlock(&alias->d_lock);
2864  		}
2865  		spin_unlock(&inode->i_lock);
2866  		return alias;
2867  	}
2868  	spin_unlock(&inode->i_lock);
2869  	return NULL;
2870  }
2871  EXPORT_SYMBOL(d_exact_alias);
2872  
swap_names(struct dentry * dentry,struct dentry * target)2873  static void swap_names(struct dentry *dentry, struct dentry *target)
2874  {
2875  	if (unlikely(dname_external(target))) {
2876  		if (unlikely(dname_external(dentry))) {
2877  			/*
2878  			 * Both external: swap the pointers
2879  			 */
2880  			swap(target->d_name.name, dentry->d_name.name);
2881  		} else {
2882  			/*
2883  			 * dentry:internal, target:external.  Steal target's
2884  			 * storage and make target internal.
2885  			 */
2886  			memcpy(target->d_iname, dentry->d_name.name,
2887  					dentry->d_name.len + 1);
2888  			dentry->d_name.name = target->d_name.name;
2889  			target->d_name.name = target->d_iname;
2890  		}
2891  	} else {
2892  		if (unlikely(dname_external(dentry))) {
2893  			/*
2894  			 * dentry:external, target:internal.  Give dentry's
2895  			 * storage to target and make dentry internal
2896  			 */
2897  			memcpy(dentry->d_iname, target->d_name.name,
2898  					target->d_name.len + 1);
2899  			target->d_name.name = dentry->d_name.name;
2900  			dentry->d_name.name = dentry->d_iname;
2901  		} else {
2902  			/*
2903  			 * Both are internal.
2904  			 */
2905  			unsigned int i;
2906  			BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
2907  			for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
2908  				swap(((long *) &dentry->d_iname)[i],
2909  				     ((long *) &target->d_iname)[i]);
2910  			}
2911  		}
2912  	}
2913  	swap(dentry->d_name.hash_len, target->d_name.hash_len);
2914  }
2915  
copy_name(struct dentry * dentry,struct dentry * target)2916  static void copy_name(struct dentry *dentry, struct dentry *target)
2917  {
2918  	struct external_name *old_name = NULL;
2919  	if (unlikely(dname_external(dentry)))
2920  		old_name = external_name(dentry);
2921  	if (unlikely(dname_external(target))) {
2922  		atomic_inc(&external_name(target)->u.count);
2923  		dentry->d_name = target->d_name;
2924  	} else {
2925  		memcpy(dentry->d_iname, target->d_name.name,
2926  				target->d_name.len + 1);
2927  		dentry->d_name.name = dentry->d_iname;
2928  		dentry->d_name.hash_len = target->d_name.hash_len;
2929  	}
2930  	if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
2931  		kfree_rcu(old_name, u.head);
2932  }
2933  
2934  /*
2935   * __d_move - move a dentry
2936   * @dentry: entry to move
2937   * @target: new dentry
2938   * @exchange: exchange the two dentries
2939   *
2940   * Update the dcache to reflect the move of a file name. Negative
2941   * dcache entries should not be moved in this way. Caller must hold
2942   * rename_lock, the i_mutex of the source and target directories,
2943   * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2944   */
__d_move(struct dentry * dentry,struct dentry * target,bool exchange)2945  static void __d_move(struct dentry *dentry, struct dentry *target,
2946  		     bool exchange)
2947  {
2948  	struct dentry *old_parent, *p;
2949  	wait_queue_head_t *d_wait;
2950  	struct inode *dir = NULL;
2951  	unsigned n;
2952  
2953  	WARN_ON(!dentry->d_inode);
2954  	if (WARN_ON(dentry == target))
2955  		return;
2956  
2957  	BUG_ON(d_ancestor(target, dentry));
2958  	old_parent = dentry->d_parent;
2959  	p = d_ancestor(old_parent, target);
2960  	if (IS_ROOT(dentry)) {
2961  		BUG_ON(p);
2962  		spin_lock(&target->d_parent->d_lock);
2963  	} else if (!p) {
2964  		/* target is not a descendent of dentry->d_parent */
2965  		spin_lock(&target->d_parent->d_lock);
2966  		spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED);
2967  	} else {
2968  		BUG_ON(p == dentry);
2969  		spin_lock(&old_parent->d_lock);
2970  		if (p != target)
2971  			spin_lock_nested(&target->d_parent->d_lock,
2972  					DENTRY_D_LOCK_NESTED);
2973  	}
2974  	spin_lock_nested(&dentry->d_lock, 2);
2975  	spin_lock_nested(&target->d_lock, 3);
2976  
2977  	if (unlikely(d_in_lookup(target))) {
2978  		dir = target->d_parent->d_inode;
2979  		n = start_dir_add(dir);
2980  		d_wait = __d_lookup_unhash(target);
2981  	}
2982  
2983  	write_seqcount_begin(&dentry->d_seq);
2984  	write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
2985  
2986  	/* unhash both */
2987  	if (!d_unhashed(dentry))
2988  		___d_drop(dentry);
2989  	if (!d_unhashed(target))
2990  		___d_drop(target);
2991  
2992  	/* ... and switch them in the tree */
2993  	dentry->d_parent = target->d_parent;
2994  	if (!exchange) {
2995  		copy_name(dentry, target);
2996  		target->d_hash.pprev = NULL;
2997  		dentry->d_parent->d_lockref.count++;
2998  		if (dentry != old_parent) /* wasn't IS_ROOT */
2999  			WARN_ON(!--old_parent->d_lockref.count);
3000  	} else {
3001  		target->d_parent = old_parent;
3002  		swap_names(dentry, target);
3003  		list_move(&target->d_child, &target->d_parent->d_subdirs);
3004  		__d_rehash(target);
3005  		fsnotify_update_flags(target);
3006  	}
3007  	list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
3008  	__d_rehash(dentry);
3009  	fsnotify_update_flags(dentry);
3010  	fscrypt_handle_d_move(dentry);
3011  
3012  	write_seqcount_end(&target->d_seq);
3013  	write_seqcount_end(&dentry->d_seq);
3014  
3015  	if (dir)
3016  		end_dir_add(dir, n, d_wait);
3017  
3018  	if (dentry->d_parent != old_parent)
3019  		spin_unlock(&dentry->d_parent->d_lock);
3020  	if (dentry != old_parent)
3021  		spin_unlock(&old_parent->d_lock);
3022  	spin_unlock(&target->d_lock);
3023  	spin_unlock(&dentry->d_lock);
3024  }
3025  
3026  /*
3027   * d_move - move a dentry
3028   * @dentry: entry to move
3029   * @target: new dentry
3030   *
3031   * Update the dcache to reflect the move of a file name. Negative
3032   * dcache entries should not be moved in this way. See the locking
3033   * requirements for __d_move.
3034   */
d_move(struct dentry * dentry,struct dentry * target)3035  void d_move(struct dentry *dentry, struct dentry *target)
3036  {
3037  	write_seqlock(&rename_lock);
3038  	__d_move(dentry, target, false);
3039  	write_sequnlock(&rename_lock);
3040  }
3041  EXPORT_SYMBOL(d_move);
3042  
3043  /*
3044   * d_exchange - exchange two dentries
3045   * @dentry1: first dentry
3046   * @dentry2: second dentry
3047   */
d_exchange(struct dentry * dentry1,struct dentry * dentry2)3048  void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
3049  {
3050  	write_seqlock(&rename_lock);
3051  
3052  	WARN_ON(!dentry1->d_inode);
3053  	WARN_ON(!dentry2->d_inode);
3054  	WARN_ON(IS_ROOT(dentry1));
3055  	WARN_ON(IS_ROOT(dentry2));
3056  
3057  	__d_move(dentry1, dentry2, true);
3058  
3059  	write_sequnlock(&rename_lock);
3060  }
3061  
3062  /**
3063   * d_ancestor - search for an ancestor
3064   * @p1: ancestor dentry
3065   * @p2: child dentry
3066   *
3067   * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
3068   * an ancestor of p2, else NULL.
3069   */
d_ancestor(struct dentry * p1,struct dentry * p2)3070  struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
3071  {
3072  	struct dentry *p;
3073  
3074  	for (p = p2; !IS_ROOT(p); p = p->d_parent) {
3075  		if (p->d_parent == p1)
3076  			return p;
3077  	}
3078  	return NULL;
3079  }
3080  
3081  /*
3082   * This helper attempts to cope with remotely renamed directories
3083   *
3084   * It assumes that the caller is already holding
3085   * dentry->d_parent->d_inode->i_mutex, and rename_lock
3086   *
3087   * Note: If ever the locking in lock_rename() changes, then please
3088   * remember to update this too...
3089   */
__d_unalias(struct inode * inode,struct dentry * dentry,struct dentry * alias)3090  static int __d_unalias(struct inode *inode,
3091  		struct dentry *dentry, struct dentry *alias)
3092  {
3093  	struct mutex *m1 = NULL;
3094  	struct rw_semaphore *m2 = NULL;
3095  	int ret = -ESTALE;
3096  
3097  	/* If alias and dentry share a parent, then no extra locks required */
3098  	if (alias->d_parent == dentry->d_parent)
3099  		goto out_unalias;
3100  
3101  	/* See lock_rename() */
3102  	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
3103  		goto out_err;
3104  	m1 = &dentry->d_sb->s_vfs_rename_mutex;
3105  	if (!inode_trylock_shared(alias->d_parent->d_inode))
3106  		goto out_err;
3107  	m2 = &alias->d_parent->d_inode->i_rwsem;
3108  out_unalias:
3109  	__d_move(alias, dentry, false);
3110  	ret = 0;
3111  out_err:
3112  	if (m2)
3113  		up_read(m2);
3114  	if (m1)
3115  		mutex_unlock(m1);
3116  	return ret;
3117  }
3118  
3119  /**
3120   * d_splice_alias - splice a disconnected dentry into the tree if one exists
3121   * @inode:  the inode which may have a disconnected dentry
3122   * @dentry: a negative dentry which we want to point to the inode.
3123   *
3124   * If inode is a directory and has an IS_ROOT alias, then d_move that in
3125   * place of the given dentry and return it, else simply d_add the inode
3126   * to the dentry and return NULL.
3127   *
3128   * If a non-IS_ROOT directory is found, the filesystem is corrupt, and
3129   * we should error out: directories can't have multiple aliases.
3130   *
3131   * This is needed in the lookup routine of any filesystem that is exportable
3132   * (via knfsd) so that we can build dcache paths to directories effectively.
3133   *
3134   * If a dentry was found and moved, then it is returned.  Otherwise NULL
3135   * is returned.  This matches the expected return value of ->lookup.
3136   *
3137   * Cluster filesystems may call this function with a negative, hashed dentry.
3138   * In that case, we know that the inode will be a regular file, and also this
3139   * will only occur during atomic_open. So we need to check for the dentry
3140   * being already hashed only in the final case.
3141   */
d_splice_alias(struct inode * inode,struct dentry * dentry)3142  struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
3143  {
3144  	if (IS_ERR(inode))
3145  		return ERR_CAST(inode);
3146  
3147  	BUG_ON(!d_unhashed(dentry));
3148  
3149  	if (!inode)
3150  		goto out;
3151  
3152  	security_d_instantiate(dentry, inode);
3153  	spin_lock(&inode->i_lock);
3154  	if (S_ISDIR(inode->i_mode)) {
3155  		struct dentry *new = __d_find_any_alias(inode);
3156  		if (unlikely(new)) {
3157  			/* The reference to new ensures it remains an alias */
3158  			spin_unlock(&inode->i_lock);
3159  			write_seqlock(&rename_lock);
3160  			if (unlikely(d_ancestor(new, dentry))) {
3161  				write_sequnlock(&rename_lock);
3162  				dput(new);
3163  				new = ERR_PTR(-ELOOP);
3164  				pr_warn_ratelimited(
3165  					"VFS: Lookup of '%s' in %s %s"
3166  					" would have caused loop\n",
3167  					dentry->d_name.name,
3168  					inode->i_sb->s_type->name,
3169  					inode->i_sb->s_id);
3170  			} else if (!IS_ROOT(new)) {
3171  				struct dentry *old_parent = dget(new->d_parent);
3172  				int err = __d_unalias(inode, dentry, new);
3173  				write_sequnlock(&rename_lock);
3174  				if (err) {
3175  					dput(new);
3176  					new = ERR_PTR(err);
3177  				}
3178  				dput(old_parent);
3179  			} else {
3180  				__d_move(new, dentry, false);
3181  				write_sequnlock(&rename_lock);
3182  			}
3183  			iput(inode);
3184  			return new;
3185  		}
3186  	}
3187  out:
3188  	__d_add(dentry, inode);
3189  	return NULL;
3190  }
3191  EXPORT_SYMBOL(d_splice_alias);
3192  
3193  /*
3194   * Test whether new_dentry is a subdirectory of old_dentry.
3195   *
3196   * Trivially implemented using the dcache structure
3197   */
3198  
3199  /**
3200   * is_subdir - is new dentry a subdirectory of old_dentry
3201   * @new_dentry: new dentry
3202   * @old_dentry: old dentry
3203   *
3204   * Returns true if new_dentry is a subdirectory of the parent (at any depth).
3205   * Returns false otherwise.
3206   * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
3207   */
3208  
is_subdir(struct dentry * new_dentry,struct dentry * old_dentry)3209  bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
3210  {
3211  	bool subdir;
3212  	unsigned seq;
3213  
3214  	if (new_dentry == old_dentry)
3215  		return true;
3216  
3217  	/* Access d_parent under rcu as d_move() may change it. */
3218  	rcu_read_lock();
3219  	seq = read_seqbegin(&rename_lock);
3220  	subdir = d_ancestor(old_dentry, new_dentry);
3221  	 /* Try lockless once... */
3222  	if (read_seqretry(&rename_lock, seq)) {
3223  		/* ...else acquire lock for progress even on deep chains. */
3224  		read_seqlock_excl(&rename_lock);
3225  		subdir = d_ancestor(old_dentry, new_dentry);
3226  		read_sequnlock_excl(&rename_lock);
3227  	}
3228  	rcu_read_unlock();
3229  	return subdir;
3230  }
3231  EXPORT_SYMBOL(is_subdir);
3232  
d_genocide_kill(void * data,struct dentry * dentry)3233  static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
3234  {
3235  	struct dentry *root = data;
3236  	if (dentry != root) {
3237  		if (d_unhashed(dentry) || !dentry->d_inode)
3238  			return D_WALK_SKIP;
3239  
3240  		if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
3241  			dentry->d_flags |= DCACHE_GENOCIDE;
3242  			dentry->d_lockref.count--;
3243  		}
3244  	}
3245  	return D_WALK_CONTINUE;
3246  }
3247  
d_genocide(struct dentry * parent)3248  void d_genocide(struct dentry *parent)
3249  {
3250  	d_walk(parent, parent, d_genocide_kill);
3251  }
3252  
d_tmpfile(struct file * file,struct inode * inode)3253  void d_tmpfile(struct file *file, struct inode *inode)
3254  {
3255  	struct dentry *dentry = file->f_path.dentry;
3256  
3257  	inode_dec_link_count(inode);
3258  	BUG_ON(dentry->d_name.name != dentry->d_iname ||
3259  		!hlist_unhashed(&dentry->d_u.d_alias) ||
3260  		!d_unlinked(dentry));
3261  	spin_lock(&dentry->d_parent->d_lock);
3262  	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
3263  	dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
3264  				(unsigned long long)inode->i_ino);
3265  	spin_unlock(&dentry->d_lock);
3266  	spin_unlock(&dentry->d_parent->d_lock);
3267  	d_instantiate(dentry, inode);
3268  }
3269  EXPORT_SYMBOL(d_tmpfile);
3270  
3271  static __initdata unsigned long dhash_entries;
set_dhash_entries(char * str)3272  static int __init set_dhash_entries(char *str)
3273  {
3274  	if (!str)
3275  		return 0;
3276  	dhash_entries = simple_strtoul(str, &str, 0);
3277  	return 1;
3278  }
3279  __setup("dhash_entries=", set_dhash_entries);
3280  
dcache_init_early(void)3281  static void __init dcache_init_early(void)
3282  {
3283  	/* If hashes are distributed across NUMA nodes, defer
3284  	 * hash allocation until vmalloc space is available.
3285  	 */
3286  	if (hashdist)
3287  		return;
3288  
3289  	dentry_hashtable =
3290  		alloc_large_system_hash("Dentry cache",
3291  					sizeof(struct hlist_bl_head),
3292  					dhash_entries,
3293  					13,
3294  					HASH_EARLY | HASH_ZERO,
3295  					&d_hash_shift,
3296  					NULL,
3297  					0,
3298  					0);
3299  	d_hash_shift = 32 - d_hash_shift;
3300  }
3301  
dcache_init(void)3302  static void __init dcache_init(void)
3303  {
3304  	/*
3305  	 * A constructor could be added for stable state like the lists,
3306  	 * but it is probably not worth it because of the cache nature
3307  	 * of the dcache.
3308  	 */
3309  	dentry_cache = KMEM_CACHE_USERCOPY(dentry,
3310  		SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
3311  		d_iname);
3312  
3313  	/* Hash may have been set up in dcache_init_early */
3314  	if (!hashdist)
3315  		return;
3316  
3317  	dentry_hashtable =
3318  		alloc_large_system_hash("Dentry cache",
3319  					sizeof(struct hlist_bl_head),
3320  					dhash_entries,
3321  					13,
3322  					HASH_ZERO,
3323  					&d_hash_shift,
3324  					NULL,
3325  					0,
3326  					0);
3327  	d_hash_shift = 32 - d_hash_shift;
3328  }
3329  
3330  /* SLAB cache for __getname() consumers */
3331  struct kmem_cache *names_cachep __read_mostly;
3332  EXPORT_SYMBOL(names_cachep);
3333  
vfs_caches_init_early(void)3334  void __init vfs_caches_init_early(void)
3335  {
3336  	int i;
3337  
3338  	for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++)
3339  		INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]);
3340  
3341  	dcache_init_early();
3342  	inode_init_early();
3343  }
3344  
vfs_caches_init(void)3345  void __init vfs_caches_init(void)
3346  {
3347  	names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0,
3348  			SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL);
3349  
3350  	dcache_init();
3351  	inode_init();
3352  	files_init();
3353  	files_maxfiles_init();
3354  	mnt_init();
3355  	bdev_cache_init();
3356  	chrdev_init();
3357  }
3358