xref: /openbmc/linux/fs/kernfs/dir.c (revision 9f6df573)
1b8441ed2STejun Heo /*
2b8441ed2STejun Heo  * fs/kernfs/dir.c - kernfs directory implementation
3b8441ed2STejun Heo  *
4b8441ed2STejun Heo  * Copyright (c) 2001-3 Patrick Mochel
5b8441ed2STejun Heo  * Copyright (c) 2007 SUSE Linux Products GmbH
6b8441ed2STejun Heo  * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7b8441ed2STejun Heo  *
8b8441ed2STejun Heo  * This file is released under the GPLv2.
9b8441ed2STejun Heo  */
10fd7b9f7bSTejun Heo 
11abd54f02STejun Heo #include <linux/sched.h>
12fd7b9f7bSTejun Heo #include <linux/fs.h>
13fd7b9f7bSTejun Heo #include <linux/namei.h>
14fd7b9f7bSTejun Heo #include <linux/idr.h>
15fd7b9f7bSTejun Heo #include <linux/slab.h>
16fd7b9f7bSTejun Heo #include <linux/security.h>
17fd7b9f7bSTejun Heo #include <linux/hash.h>
18fd7b9f7bSTejun Heo 
19fd7b9f7bSTejun Heo #include "kernfs-internal.h"
20fd7b9f7bSTejun Heo 
21a797bfc3STejun Heo DEFINE_MUTEX(kernfs_mutex);
223eef34adSTejun Heo static DEFINE_SPINLOCK(kernfs_rename_lock);	/* kn->parent and ->name */
233eef34adSTejun Heo static char kernfs_pr_cont_buf[PATH_MAX];	/* protected by rename_lock */
24fd7b9f7bSTejun Heo 
25adc5e8b5STejun Heo #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
26fd7b9f7bSTejun Heo 
2781c173cbSTejun Heo static bool kernfs_active(struct kernfs_node *kn)
2881c173cbSTejun Heo {
2981c173cbSTejun Heo 	lockdep_assert_held(&kernfs_mutex);
3081c173cbSTejun Heo 	return atomic_read(&kn->active) >= 0;
3181c173cbSTejun Heo }
3281c173cbSTejun Heo 
33182fd64bSTejun Heo static bool kernfs_lockdep(struct kernfs_node *kn)
34182fd64bSTejun Heo {
35182fd64bSTejun Heo #ifdef CONFIG_DEBUG_LOCK_ALLOC
36182fd64bSTejun Heo 	return kn->flags & KERNFS_LOCKDEP;
37182fd64bSTejun Heo #else
38182fd64bSTejun Heo 	return false;
39182fd64bSTejun Heo #endif
40182fd64bSTejun Heo }
41182fd64bSTejun Heo 
423eef34adSTejun Heo static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
433eef34adSTejun Heo {
443eef34adSTejun Heo 	return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
453eef34adSTejun Heo }
463eef34adSTejun Heo 
479f6df573SAditya Kali /* kernfs_node_depth - compute depth from @from to @to */
489f6df573SAditya Kali static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
493eef34adSTejun Heo {
509f6df573SAditya Kali 	size_t depth = 0;
513eef34adSTejun Heo 
529f6df573SAditya Kali 	while (to->parent && to != from) {
539f6df573SAditya Kali 		depth++;
549f6df573SAditya Kali 		to = to->parent;
553eef34adSTejun Heo 	}
569f6df573SAditya Kali 	return depth;
579f6df573SAditya Kali }
583eef34adSTejun Heo 
599f6df573SAditya Kali static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
609f6df573SAditya Kali 						  struct kernfs_node *b)
619f6df573SAditya Kali {
629f6df573SAditya Kali 	size_t da, db;
639f6df573SAditya Kali 	struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
649f6df573SAditya Kali 
659f6df573SAditya Kali 	if (ra != rb)
669f6df573SAditya Kali 		return NULL;
679f6df573SAditya Kali 
689f6df573SAditya Kali 	da = kernfs_depth(ra->kn, a);
699f6df573SAditya Kali 	db = kernfs_depth(rb->kn, b);
709f6df573SAditya Kali 
719f6df573SAditya Kali 	while (da > db) {
729f6df573SAditya Kali 		a = a->parent;
739f6df573SAditya Kali 		da--;
749f6df573SAditya Kali 	}
759f6df573SAditya Kali 	while (db > da) {
769f6df573SAditya Kali 		b = b->parent;
779f6df573SAditya Kali 		db--;
789f6df573SAditya Kali 	}
799f6df573SAditya Kali 
809f6df573SAditya Kali 	/* worst case b and a will be the same at root */
819f6df573SAditya Kali 	while (b != a) {
829f6df573SAditya Kali 		b = b->parent;
839f6df573SAditya Kali 		a = a->parent;
849f6df573SAditya Kali 	}
859f6df573SAditya Kali 
869f6df573SAditya Kali 	return a;
879f6df573SAditya Kali }
889f6df573SAditya Kali 
899f6df573SAditya Kali /**
909f6df573SAditya Kali  * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to,
919f6df573SAditya Kali  * where kn_from is treated as root of the path.
929f6df573SAditya Kali  * @kn_from: kernfs node which should be treated as root for the path
939f6df573SAditya Kali  * @kn_to: kernfs node to which path is needed
949f6df573SAditya Kali  * @buf: buffer to copy the path into
959f6df573SAditya Kali  * @buflen: size of @buf
969f6df573SAditya Kali  *
979f6df573SAditya Kali  * We need to handle couple of scenarios here:
989f6df573SAditya Kali  * [1] when @kn_from is an ancestor of @kn_to at some level
999f6df573SAditya Kali  * kn_from: /n1/n2/n3
1009f6df573SAditya Kali  * kn_to:   /n1/n2/n3/n4/n5
1019f6df573SAditya Kali  * result:  /n4/n5
1029f6df573SAditya Kali  *
1039f6df573SAditya Kali  * [2] when @kn_from is on a different hierarchy and we need to find common
1049f6df573SAditya Kali  * ancestor between @kn_from and @kn_to.
1059f6df573SAditya Kali  * kn_from: /n1/n2/n3/n4
1069f6df573SAditya Kali  * kn_to:   /n1/n2/n5
1079f6df573SAditya Kali  * result:  /../../n5
1089f6df573SAditya Kali  * OR
1099f6df573SAditya Kali  * kn_from: /n1/n2/n3/n4/n5   [depth=5]
1109f6df573SAditya Kali  * kn_to:   /n1/n2/n3         [depth=3]
1119f6df573SAditya Kali  * result:  /../..
1129f6df573SAditya Kali  *
1139f6df573SAditya Kali  * return value: length of the string.  If greater than buflen,
1149f6df573SAditya Kali  * then contents of buf are undefined.  On error, -1 is returned.
1159f6df573SAditya Kali  */
1169f6df573SAditya Kali static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
1179f6df573SAditya Kali 					struct kernfs_node *kn_from,
1189f6df573SAditya Kali 					char *buf, size_t buflen)
1199f6df573SAditya Kali {
1209f6df573SAditya Kali 	struct kernfs_node *kn, *common;
1219f6df573SAditya Kali 	const char parent_str[] = "/..";
1229f6df573SAditya Kali 	size_t depth_from, depth_to, len = 0, nlen = 0;
1239f6df573SAditya Kali 	char *p;
1249f6df573SAditya Kali 	int i;
1259f6df573SAditya Kali 
1269f6df573SAditya Kali 	if (!kn_from)
1279f6df573SAditya Kali 		kn_from = kernfs_root(kn_to)->kn;
1289f6df573SAditya Kali 
1299f6df573SAditya Kali 	if (kn_from == kn_to)
1309f6df573SAditya Kali 		return strlcpy(buf, "/", buflen);
1319f6df573SAditya Kali 
1329f6df573SAditya Kali 	common = kernfs_common_ancestor(kn_from, kn_to);
1339f6df573SAditya Kali 	if (WARN_ON(!common))
1349f6df573SAditya Kali 		return -1;
1359f6df573SAditya Kali 
1369f6df573SAditya Kali 	depth_to = kernfs_depth(common, kn_to);
1379f6df573SAditya Kali 	depth_from = kernfs_depth(common, kn_from);
1389f6df573SAditya Kali 
1399f6df573SAditya Kali 	if (buf)
1409f6df573SAditya Kali 		buf[0] = '\0';
1419f6df573SAditya Kali 
1429f6df573SAditya Kali 	for (i = 0; i < depth_from; i++)
1439f6df573SAditya Kali 		len += strlcpy(buf + len, parent_str,
1449f6df573SAditya Kali 			       len < buflen ? buflen - len : 0);
1459f6df573SAditya Kali 
1469f6df573SAditya Kali 	/* Calculate how many bytes we need for the rest */
1479f6df573SAditya Kali 	for (kn = kn_to; kn != common; kn = kn->parent)
1489f6df573SAditya Kali 		nlen += strlen(kn->name) + 1;
1499f6df573SAditya Kali 
1509f6df573SAditya Kali 	if (len + nlen >= buflen)
1519f6df573SAditya Kali 		return len + nlen;
1529f6df573SAditya Kali 
1539f6df573SAditya Kali 	p = buf + len + nlen;
1549f6df573SAditya Kali 	*p = '\0';
1559f6df573SAditya Kali 	for (kn = kn_to; kn != common; kn = kn->parent) {
1569f6df573SAditya Kali 		nlen = strlen(kn->name);
1579f6df573SAditya Kali 		p -= nlen;
1589f6df573SAditya Kali 		memcpy(p, kn->name, nlen);
1599f6df573SAditya Kali 		*(--p) = '/';
1609f6df573SAditya Kali 	}
1619f6df573SAditya Kali 
1629f6df573SAditya Kali 	return len + nlen;
1633eef34adSTejun Heo }
1643eef34adSTejun Heo 
1653eef34adSTejun Heo /**
1663eef34adSTejun Heo  * kernfs_name - obtain the name of a given node
1673eef34adSTejun Heo  * @kn: kernfs_node of interest
1683eef34adSTejun Heo  * @buf: buffer to copy @kn's name into
1693eef34adSTejun Heo  * @buflen: size of @buf
1703eef34adSTejun Heo  *
1713eef34adSTejun Heo  * Copies the name of @kn into @buf of @buflen bytes.  The behavior is
1723eef34adSTejun Heo  * similar to strlcpy().  It returns the length of @kn's name and if @buf
1733eef34adSTejun Heo  * isn't long enough, it's filled upto @buflen-1 and nul terminated.
1743eef34adSTejun Heo  *
1753eef34adSTejun Heo  * This function can be called from any context.
1763eef34adSTejun Heo  */
1773eef34adSTejun Heo int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
1783eef34adSTejun Heo {
1793eef34adSTejun Heo 	unsigned long flags;
1803eef34adSTejun Heo 	int ret;
1813eef34adSTejun Heo 
1823eef34adSTejun Heo 	spin_lock_irqsave(&kernfs_rename_lock, flags);
1833eef34adSTejun Heo 	ret = kernfs_name_locked(kn, buf, buflen);
1843eef34adSTejun Heo 	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
1853eef34adSTejun Heo 	return ret;
1863eef34adSTejun Heo }
1873eef34adSTejun Heo 
1883eef34adSTejun Heo /**
1899acee9c5STejun Heo  * kernfs_path_len - determine the length of the full path of a given node
1909acee9c5STejun Heo  * @kn: kernfs_node of interest
1919acee9c5STejun Heo  *
1929acee9c5STejun Heo  * The returned length doesn't include the space for the terminating '\0'.
1939acee9c5STejun Heo  */
1949acee9c5STejun Heo size_t kernfs_path_len(struct kernfs_node *kn)
1959acee9c5STejun Heo {
1969acee9c5STejun Heo 	size_t len = 0;
1979acee9c5STejun Heo 	unsigned long flags;
1989acee9c5STejun Heo 
1999acee9c5STejun Heo 	spin_lock_irqsave(&kernfs_rename_lock, flags);
2009acee9c5STejun Heo 
2019acee9c5STejun Heo 	do {
2029acee9c5STejun Heo 		len += strlen(kn->name) + 1;
2039acee9c5STejun Heo 		kn = kn->parent;
2049acee9c5STejun Heo 	} while (kn && kn->parent);
2059acee9c5STejun Heo 
2069acee9c5STejun Heo 	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
2079acee9c5STejun Heo 
2089acee9c5STejun Heo 	return len;
2099acee9c5STejun Heo }
2109acee9c5STejun Heo 
2119acee9c5STejun Heo /**
2129f6df573SAditya Kali  * kernfs_path_from_node - build path of node @to relative to @from.
2139f6df573SAditya Kali  * @from: parent kernfs_node relative to which we need to build the path
2149f6df573SAditya Kali  * @to: kernfs_node of interest
2159f6df573SAditya Kali  * @buf: buffer to copy @to's path into
2169f6df573SAditya Kali  * @buflen: size of @buf
2179f6df573SAditya Kali  *
2189f6df573SAditya Kali  * Builds @to's path relative to @from in @buf. @from and @to must
2199f6df573SAditya Kali  * be on the same kernfs-root. If @from is not parent of @to, then a relative
2209f6df573SAditya Kali  * path (which includes '..'s) as needed to reach from @from to @to is
2219f6df573SAditya Kali  * returned.
2229f6df573SAditya Kali  *
2239f6df573SAditya Kali  * If @buf isn't long enough, the return value will be greater than @buflen
2249f6df573SAditya Kali  * and @buf contents are undefined.
2259f6df573SAditya Kali  */
2269f6df573SAditya Kali int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
2279f6df573SAditya Kali 			  char *buf, size_t buflen)
2289f6df573SAditya Kali {
2299f6df573SAditya Kali 	unsigned long flags;
2309f6df573SAditya Kali 	int ret;
2319f6df573SAditya Kali 
2329f6df573SAditya Kali 	spin_lock_irqsave(&kernfs_rename_lock, flags);
2339f6df573SAditya Kali 	ret = kernfs_path_from_node_locked(to, from, buf, buflen);
2349f6df573SAditya Kali 	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
2359f6df573SAditya Kali 	return ret;
2369f6df573SAditya Kali }
2379f6df573SAditya Kali EXPORT_SYMBOL_GPL(kernfs_path_from_node);
2389f6df573SAditya Kali 
2399f6df573SAditya Kali /**
2403eef34adSTejun Heo  * kernfs_path - build full path of a given node
2413eef34adSTejun Heo  * @kn: kernfs_node of interest
2423eef34adSTejun Heo  * @buf: buffer to copy @kn's name into
2433eef34adSTejun Heo  * @buflen: size of @buf
2443eef34adSTejun Heo  *
2453eef34adSTejun Heo  * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
2463eef34adSTejun Heo  * path is built from the end of @buf so the returned pointer usually
2473eef34adSTejun Heo  * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
2483eef34adSTejun Heo  * and %NULL is returned.
2493eef34adSTejun Heo  */
2503eef34adSTejun Heo char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
2513eef34adSTejun Heo {
2529f6df573SAditya Kali 	int ret;
2533eef34adSTejun Heo 
2549f6df573SAditya Kali 	ret = kernfs_path_from_node(kn, NULL, buf, buflen);
2559f6df573SAditya Kali 	if (ret < 0 || ret >= buflen)
2569f6df573SAditya Kali 		return NULL;
2579f6df573SAditya Kali 	return buf;
2583eef34adSTejun Heo }
259e61734c5STejun Heo EXPORT_SYMBOL_GPL(kernfs_path);
2603eef34adSTejun Heo 
2613eef34adSTejun Heo /**
2623eef34adSTejun Heo  * pr_cont_kernfs_name - pr_cont name of a kernfs_node
2633eef34adSTejun Heo  * @kn: kernfs_node of interest
2643eef34adSTejun Heo  *
2653eef34adSTejun Heo  * This function can be called from any context.
2663eef34adSTejun Heo  */
2673eef34adSTejun Heo void pr_cont_kernfs_name(struct kernfs_node *kn)
2683eef34adSTejun Heo {
2693eef34adSTejun Heo 	unsigned long flags;
2703eef34adSTejun Heo 
2713eef34adSTejun Heo 	spin_lock_irqsave(&kernfs_rename_lock, flags);
2723eef34adSTejun Heo 
2733eef34adSTejun Heo 	kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
2743eef34adSTejun Heo 	pr_cont("%s", kernfs_pr_cont_buf);
2753eef34adSTejun Heo 
2763eef34adSTejun Heo 	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
2773eef34adSTejun Heo }
2783eef34adSTejun Heo 
2793eef34adSTejun Heo /**
2803eef34adSTejun Heo  * pr_cont_kernfs_path - pr_cont path of a kernfs_node
2813eef34adSTejun Heo  * @kn: kernfs_node of interest
2823eef34adSTejun Heo  *
2833eef34adSTejun Heo  * This function can be called from any context.
2843eef34adSTejun Heo  */
2853eef34adSTejun Heo void pr_cont_kernfs_path(struct kernfs_node *kn)
2863eef34adSTejun Heo {
2873eef34adSTejun Heo 	unsigned long flags;
2889f6df573SAditya Kali 	int sz;
2893eef34adSTejun Heo 
2903eef34adSTejun Heo 	spin_lock_irqsave(&kernfs_rename_lock, flags);
2913eef34adSTejun Heo 
2929f6df573SAditya Kali 	sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
2933eef34adSTejun Heo 					  sizeof(kernfs_pr_cont_buf));
2949f6df573SAditya Kali 	if (sz < 0) {
2959f6df573SAditya Kali 		pr_cont("(error)");
2969f6df573SAditya Kali 		goto out;
2979f6df573SAditya Kali 	}
2983eef34adSTejun Heo 
2999f6df573SAditya Kali 	if (sz >= sizeof(kernfs_pr_cont_buf)) {
3009f6df573SAditya Kali 		pr_cont("(name too long)");
3019f6df573SAditya Kali 		goto out;
3029f6df573SAditya Kali 	}
3039f6df573SAditya Kali 
3049f6df573SAditya Kali 	pr_cont("%s", kernfs_pr_cont_buf);
3059f6df573SAditya Kali 
3069f6df573SAditya Kali out:
3073eef34adSTejun Heo 	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
3083eef34adSTejun Heo }
3093eef34adSTejun Heo 
3103eef34adSTejun Heo /**
3113eef34adSTejun Heo  * kernfs_get_parent - determine the parent node and pin it
3123eef34adSTejun Heo  * @kn: kernfs_node of interest
3133eef34adSTejun Heo  *
3143eef34adSTejun Heo  * Determines @kn's parent, pins and returns it.  This function can be
3153eef34adSTejun Heo  * called from any context.
3163eef34adSTejun Heo  */
3173eef34adSTejun Heo struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
3183eef34adSTejun Heo {
3193eef34adSTejun Heo 	struct kernfs_node *parent;
3203eef34adSTejun Heo 	unsigned long flags;
3213eef34adSTejun Heo 
3223eef34adSTejun Heo 	spin_lock_irqsave(&kernfs_rename_lock, flags);
3233eef34adSTejun Heo 	parent = kn->parent;
3243eef34adSTejun Heo 	kernfs_get(parent);
3253eef34adSTejun Heo 	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
3263eef34adSTejun Heo 
3273eef34adSTejun Heo 	return parent;
3283eef34adSTejun Heo }
3293eef34adSTejun Heo 
330fd7b9f7bSTejun Heo /**
331c637b8acSTejun Heo  *	kernfs_name_hash
332fd7b9f7bSTejun Heo  *	@name: Null terminated string to hash
333fd7b9f7bSTejun Heo  *	@ns:   Namespace tag to hash
334fd7b9f7bSTejun Heo  *
335fd7b9f7bSTejun Heo  *	Returns 31 bit hash of ns + name (so it fits in an off_t )
336fd7b9f7bSTejun Heo  */
337c637b8acSTejun Heo static unsigned int kernfs_name_hash(const char *name, const void *ns)
338fd7b9f7bSTejun Heo {
339fd7b9f7bSTejun Heo 	unsigned long hash = init_name_hash();
340fd7b9f7bSTejun Heo 	unsigned int len = strlen(name);
341fd7b9f7bSTejun Heo 	while (len--)
342fd7b9f7bSTejun Heo 		hash = partial_name_hash(*name++, hash);
343fd7b9f7bSTejun Heo 	hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
344fd7b9f7bSTejun Heo 	hash &= 0x7fffffffU;
345fd7b9f7bSTejun Heo 	/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
34688391d49SRichard Cochran 	if (hash < 2)
347fd7b9f7bSTejun Heo 		hash += 2;
348fd7b9f7bSTejun Heo 	if (hash >= INT_MAX)
349fd7b9f7bSTejun Heo 		hash = INT_MAX - 1;
350fd7b9f7bSTejun Heo 	return hash;
351fd7b9f7bSTejun Heo }
352fd7b9f7bSTejun Heo 
353c637b8acSTejun Heo static int kernfs_name_compare(unsigned int hash, const char *name,
354324a56e1STejun Heo 			       const void *ns, const struct kernfs_node *kn)
355fd7b9f7bSTejun Heo {
35672392ed0SRasmus Villemoes 	if (hash < kn->hash)
35772392ed0SRasmus Villemoes 		return -1;
35872392ed0SRasmus Villemoes 	if (hash > kn->hash)
35972392ed0SRasmus Villemoes 		return 1;
36072392ed0SRasmus Villemoes 	if (ns < kn->ns)
36172392ed0SRasmus Villemoes 		return -1;
36272392ed0SRasmus Villemoes 	if (ns > kn->ns)
36372392ed0SRasmus Villemoes 		return 1;
364adc5e8b5STejun Heo 	return strcmp(name, kn->name);
365fd7b9f7bSTejun Heo }
366fd7b9f7bSTejun Heo 
367c637b8acSTejun Heo static int kernfs_sd_compare(const struct kernfs_node *left,
368324a56e1STejun Heo 			     const struct kernfs_node *right)
369fd7b9f7bSTejun Heo {
370c637b8acSTejun Heo 	return kernfs_name_compare(left->hash, left->name, left->ns, right);
371fd7b9f7bSTejun Heo }
372fd7b9f7bSTejun Heo 
373fd7b9f7bSTejun Heo /**
374c637b8acSTejun Heo  *	kernfs_link_sibling - link kernfs_node into sibling rbtree
375324a56e1STejun Heo  *	@kn: kernfs_node of interest
376fd7b9f7bSTejun Heo  *
377324a56e1STejun Heo  *	Link @kn into its sibling rbtree which starts from
378adc5e8b5STejun Heo  *	@kn->parent->dir.children.
379fd7b9f7bSTejun Heo  *
380fd7b9f7bSTejun Heo  *	Locking:
381a797bfc3STejun Heo  *	mutex_lock(kernfs_mutex)
382fd7b9f7bSTejun Heo  *
383fd7b9f7bSTejun Heo  *	RETURNS:
384fd7b9f7bSTejun Heo  *	0 on susccess -EEXIST on failure.
385fd7b9f7bSTejun Heo  */
386c637b8acSTejun Heo static int kernfs_link_sibling(struct kernfs_node *kn)
387fd7b9f7bSTejun Heo {
388adc5e8b5STejun Heo 	struct rb_node **node = &kn->parent->dir.children.rb_node;
389fd7b9f7bSTejun Heo 	struct rb_node *parent = NULL;
390fd7b9f7bSTejun Heo 
391fd7b9f7bSTejun Heo 	while (*node) {
392324a56e1STejun Heo 		struct kernfs_node *pos;
393fd7b9f7bSTejun Heo 		int result;
394fd7b9f7bSTejun Heo 
395324a56e1STejun Heo 		pos = rb_to_kn(*node);
396fd7b9f7bSTejun Heo 		parent = *node;
397c637b8acSTejun Heo 		result = kernfs_sd_compare(kn, pos);
398fd7b9f7bSTejun Heo 		if (result < 0)
399adc5e8b5STejun Heo 			node = &pos->rb.rb_left;
400fd7b9f7bSTejun Heo 		else if (result > 0)
401adc5e8b5STejun Heo 			node = &pos->rb.rb_right;
402fd7b9f7bSTejun Heo 		else
403fd7b9f7bSTejun Heo 			return -EEXIST;
404fd7b9f7bSTejun Heo 	}
405c1befb88SJianyu Zhan 
406fd7b9f7bSTejun Heo 	/* add new node and rebalance the tree */
407adc5e8b5STejun Heo 	rb_link_node(&kn->rb, parent, node);
408adc5e8b5STejun Heo 	rb_insert_color(&kn->rb, &kn->parent->dir.children);
409c1befb88SJianyu Zhan 
410c1befb88SJianyu Zhan 	/* successfully added, account subdir number */
411c1befb88SJianyu Zhan 	if (kernfs_type(kn) == KERNFS_DIR)
412c1befb88SJianyu Zhan 		kn->parent->dir.subdirs++;
413c1befb88SJianyu Zhan 
414fd7b9f7bSTejun Heo 	return 0;
415fd7b9f7bSTejun Heo }
416fd7b9f7bSTejun Heo 
417fd7b9f7bSTejun Heo /**
418c637b8acSTejun Heo  *	kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
419324a56e1STejun Heo  *	@kn: kernfs_node of interest
420fd7b9f7bSTejun Heo  *
42135beab06STejun Heo  *	Try to unlink @kn from its sibling rbtree which starts from
42235beab06STejun Heo  *	kn->parent->dir.children.  Returns %true if @kn was actually
42335beab06STejun Heo  *	removed, %false if @kn wasn't on the rbtree.
424fd7b9f7bSTejun Heo  *
425fd7b9f7bSTejun Heo  *	Locking:
426a797bfc3STejun Heo  *	mutex_lock(kernfs_mutex)
427fd7b9f7bSTejun Heo  */
42835beab06STejun Heo static bool kernfs_unlink_sibling(struct kernfs_node *kn)
429fd7b9f7bSTejun Heo {
43035beab06STejun Heo 	if (RB_EMPTY_NODE(&kn->rb))
43135beab06STejun Heo 		return false;
43235beab06STejun Heo 
433df23fc39STejun Heo 	if (kernfs_type(kn) == KERNFS_DIR)
434adc5e8b5STejun Heo 		kn->parent->dir.subdirs--;
435fd7b9f7bSTejun Heo 
436adc5e8b5STejun Heo 	rb_erase(&kn->rb, &kn->parent->dir.children);
43735beab06STejun Heo 	RB_CLEAR_NODE(&kn->rb);
43835beab06STejun Heo 	return true;
439fd7b9f7bSTejun Heo }
440fd7b9f7bSTejun Heo 
441fd7b9f7bSTejun Heo /**
442c637b8acSTejun Heo  *	kernfs_get_active - get an active reference to kernfs_node
443324a56e1STejun Heo  *	@kn: kernfs_node to get an active reference to
444fd7b9f7bSTejun Heo  *
445324a56e1STejun Heo  *	Get an active reference of @kn.  This function is noop if @kn
446fd7b9f7bSTejun Heo  *	is NULL.
447fd7b9f7bSTejun Heo  *
448fd7b9f7bSTejun Heo  *	RETURNS:
449324a56e1STejun Heo  *	Pointer to @kn on success, NULL on failure.
450fd7b9f7bSTejun Heo  */
451c637b8acSTejun Heo struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
452fd7b9f7bSTejun Heo {
453324a56e1STejun Heo 	if (unlikely(!kn))
454fd7b9f7bSTejun Heo 		return NULL;
455fd7b9f7bSTejun Heo 
456f4b3e631SGreg Kroah-Hartman 	if (!atomic_inc_unless_negative(&kn->active))
457f4b3e631SGreg Kroah-Hartman 		return NULL;
458f4b3e631SGreg Kroah-Hartman 
459182fd64bSTejun Heo 	if (kernfs_lockdep(kn))
460324a56e1STejun Heo 		rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
461324a56e1STejun Heo 	return kn;
462fd7b9f7bSTejun Heo }
463fd7b9f7bSTejun Heo 
464fd7b9f7bSTejun Heo /**
465c637b8acSTejun Heo  *	kernfs_put_active - put an active reference to kernfs_node
466324a56e1STejun Heo  *	@kn: kernfs_node to put an active reference to
467fd7b9f7bSTejun Heo  *
468324a56e1STejun Heo  *	Put an active reference to @kn.  This function is noop if @kn
469fd7b9f7bSTejun Heo  *	is NULL.
470fd7b9f7bSTejun Heo  */
471c637b8acSTejun Heo void kernfs_put_active(struct kernfs_node *kn)
472fd7b9f7bSTejun Heo {
473abd54f02STejun Heo 	struct kernfs_root *root = kernfs_root(kn);
474fd7b9f7bSTejun Heo 	int v;
475fd7b9f7bSTejun Heo 
476324a56e1STejun Heo 	if (unlikely(!kn))
477fd7b9f7bSTejun Heo 		return;
478fd7b9f7bSTejun Heo 
479182fd64bSTejun Heo 	if (kernfs_lockdep(kn))
480324a56e1STejun Heo 		rwsem_release(&kn->dep_map, 1, _RET_IP_);
481adc5e8b5STejun Heo 	v = atomic_dec_return(&kn->active);
482df23fc39STejun Heo 	if (likely(v != KN_DEACTIVATED_BIAS))
483fd7b9f7bSTejun Heo 		return;
484fd7b9f7bSTejun Heo 
485abd54f02STejun Heo 	wake_up_all(&root->deactivate_waitq);
486fd7b9f7bSTejun Heo }
487fd7b9f7bSTejun Heo 
488fd7b9f7bSTejun Heo /**
48981c173cbSTejun Heo  * kernfs_drain - drain kernfs_node
49081c173cbSTejun Heo  * @kn: kernfs_node to drain
491fd7b9f7bSTejun Heo  *
49281c173cbSTejun Heo  * Drain existing usages and nuke all existing mmaps of @kn.  Mutiple
49381c173cbSTejun Heo  * removers may invoke this function concurrently on @kn and all will
49481c173cbSTejun Heo  * return after draining is complete.
495fd7b9f7bSTejun Heo  */
49681c173cbSTejun Heo static void kernfs_drain(struct kernfs_node *kn)
49735beab06STejun Heo 	__releases(&kernfs_mutex) __acquires(&kernfs_mutex)
498fd7b9f7bSTejun Heo {
499abd54f02STejun Heo 	struct kernfs_root *root = kernfs_root(kn);
500fd7b9f7bSTejun Heo 
50135beab06STejun Heo 	lockdep_assert_held(&kernfs_mutex);
50281c173cbSTejun Heo 	WARN_ON_ONCE(kernfs_active(kn));
503abd54f02STejun Heo 
50435beab06STejun Heo 	mutex_unlock(&kernfs_mutex);
505abd54f02STejun Heo 
506182fd64bSTejun Heo 	if (kernfs_lockdep(kn)) {
50735beab06STejun Heo 		rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
50835beab06STejun Heo 		if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
50935beab06STejun Heo 			lock_contended(&kn->dep_map, _RET_IP_);
51035beab06STejun Heo 	}
51135beab06STejun Heo 
51235beab06STejun Heo 	/* but everyone should wait for draining */
513abd54f02STejun Heo 	wait_event(root->deactivate_waitq,
514abd54f02STejun Heo 		   atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
515fd7b9f7bSTejun Heo 
516182fd64bSTejun Heo 	if (kernfs_lockdep(kn)) {
517324a56e1STejun Heo 		lock_acquired(&kn->dep_map, _RET_IP_);
518324a56e1STejun Heo 		rwsem_release(&kn->dep_map, 1, _RET_IP_);
519fd7b9f7bSTejun Heo 	}
52035beab06STejun Heo 
521ccf02aafSTejun Heo 	kernfs_unmap_bin_file(kn);
522ccf02aafSTejun Heo 
52335beab06STejun Heo 	mutex_lock(&kernfs_mutex);
524a6607930STejun Heo }
525fd7b9f7bSTejun Heo 
526fd7b9f7bSTejun Heo /**
527324a56e1STejun Heo  * kernfs_get - get a reference count on a kernfs_node
528324a56e1STejun Heo  * @kn: the target kernfs_node
529fd7b9f7bSTejun Heo  */
530324a56e1STejun Heo void kernfs_get(struct kernfs_node *kn)
531fd7b9f7bSTejun Heo {
532324a56e1STejun Heo 	if (kn) {
533adc5e8b5STejun Heo 		WARN_ON(!atomic_read(&kn->count));
534adc5e8b5STejun Heo 		atomic_inc(&kn->count);
535fd7b9f7bSTejun Heo 	}
536fd7b9f7bSTejun Heo }
537fd7b9f7bSTejun Heo EXPORT_SYMBOL_GPL(kernfs_get);
538fd7b9f7bSTejun Heo 
539fd7b9f7bSTejun Heo /**
540324a56e1STejun Heo  * kernfs_put - put a reference count on a kernfs_node
541324a56e1STejun Heo  * @kn: the target kernfs_node
542fd7b9f7bSTejun Heo  *
543324a56e1STejun Heo  * Put a reference count of @kn and destroy it if it reached zero.
544fd7b9f7bSTejun Heo  */
545324a56e1STejun Heo void kernfs_put(struct kernfs_node *kn)
546fd7b9f7bSTejun Heo {
547324a56e1STejun Heo 	struct kernfs_node *parent;
548ba7443bcSTejun Heo 	struct kernfs_root *root;
549fd7b9f7bSTejun Heo 
550adc5e8b5STejun Heo 	if (!kn || !atomic_dec_and_test(&kn->count))
551fd7b9f7bSTejun Heo 		return;
552324a56e1STejun Heo 	root = kernfs_root(kn);
553fd7b9f7bSTejun Heo  repeat:
55481c173cbSTejun Heo 	/*
55581c173cbSTejun Heo 	 * Moving/renaming is always done while holding reference.
556adc5e8b5STejun Heo 	 * kn->parent won't change beneath us.
557fd7b9f7bSTejun Heo 	 */
558adc5e8b5STejun Heo 	parent = kn->parent;
559fd7b9f7bSTejun Heo 
56081c173cbSTejun Heo 	WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
56181c173cbSTejun Heo 		  "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
56281c173cbSTejun Heo 		  parent ? parent->name : "", kn->name, atomic_read(&kn->active));
563fd7b9f7bSTejun Heo 
564df23fc39STejun Heo 	if (kernfs_type(kn) == KERNFS_LINK)
565adc5e8b5STejun Heo 		kernfs_put(kn->symlink.target_kn);
566dfeb0750STejun Heo 
56775287a67SAndrzej Hajda 	kfree_const(kn->name);
568dfeb0750STejun Heo 
569adc5e8b5STejun Heo 	if (kn->iattr) {
570adc5e8b5STejun Heo 		if (kn->iattr->ia_secdata)
571adc5e8b5STejun Heo 			security_release_secctx(kn->iattr->ia_secdata,
572adc5e8b5STejun Heo 						kn->iattr->ia_secdata_len);
573adc5e8b5STejun Heo 		simple_xattrs_free(&kn->iattr->xattrs);
5742322392bSTejun Heo 	}
575adc5e8b5STejun Heo 	kfree(kn->iattr);
576adc5e8b5STejun Heo 	ida_simple_remove(&root->ino_ida, kn->ino);
577a797bfc3STejun Heo 	kmem_cache_free(kernfs_node_cache, kn);
578fd7b9f7bSTejun Heo 
579324a56e1STejun Heo 	kn = parent;
580324a56e1STejun Heo 	if (kn) {
581adc5e8b5STejun Heo 		if (atomic_dec_and_test(&kn->count))
582fd7b9f7bSTejun Heo 			goto repeat;
583ba7443bcSTejun Heo 	} else {
584324a56e1STejun Heo 		/* just released the root kn, free @root too */
585bc755553STejun Heo 		ida_destroy(&root->ino_ida);
586ba7443bcSTejun Heo 		kfree(root);
587ba7443bcSTejun Heo 	}
588fd7b9f7bSTejun Heo }
589fd7b9f7bSTejun Heo EXPORT_SYMBOL_GPL(kernfs_put);
590fd7b9f7bSTejun Heo 
591c637b8acSTejun Heo static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
592fd7b9f7bSTejun Heo {
593324a56e1STejun Heo 	struct kernfs_node *kn;
594fd7b9f7bSTejun Heo 
595fd7b9f7bSTejun Heo 	if (flags & LOOKUP_RCU)
596fd7b9f7bSTejun Heo 		return -ECHILD;
597fd7b9f7bSTejun Heo 
59819bbb926STejun Heo 	/* Always perform fresh lookup for negatives */
5992b0143b5SDavid Howells 	if (d_really_is_negative(dentry))
60019bbb926STejun Heo 		goto out_bad_unlocked;
60119bbb926STejun Heo 
602324a56e1STejun Heo 	kn = dentry->d_fsdata;
603a797bfc3STejun Heo 	mutex_lock(&kernfs_mutex);
604fd7b9f7bSTejun Heo 
60581c173cbSTejun Heo 	/* The kernfs node has been deactivated */
60681c173cbSTejun Heo 	if (!kernfs_active(kn))
607fd7b9f7bSTejun Heo 		goto out_bad;
608fd7b9f7bSTejun Heo 
609c637b8acSTejun Heo 	/* The kernfs node has been moved? */
610adc5e8b5STejun Heo 	if (dentry->d_parent->d_fsdata != kn->parent)
611fd7b9f7bSTejun Heo 		goto out_bad;
612fd7b9f7bSTejun Heo 
613c637b8acSTejun Heo 	/* The kernfs node has been renamed */
614adc5e8b5STejun Heo 	if (strcmp(dentry->d_name.name, kn->name) != 0)
615fd7b9f7bSTejun Heo 		goto out_bad;
616fd7b9f7bSTejun Heo 
617c637b8acSTejun Heo 	/* The kernfs node has been moved to a different namespace */
618adc5e8b5STejun Heo 	if (kn->parent && kernfs_ns_enabled(kn->parent) &&
619c525aaddSTejun Heo 	    kernfs_info(dentry->d_sb)->ns != kn->ns)
620fd7b9f7bSTejun Heo 		goto out_bad;
621fd7b9f7bSTejun Heo 
622a797bfc3STejun Heo 	mutex_unlock(&kernfs_mutex);
623fd7b9f7bSTejun Heo 	return 1;
624fd7b9f7bSTejun Heo out_bad:
625a797bfc3STejun Heo 	mutex_unlock(&kernfs_mutex);
62619bbb926STejun Heo out_bad_unlocked:
627fd7b9f7bSTejun Heo 	return 0;
628fd7b9f7bSTejun Heo }
629fd7b9f7bSTejun Heo 
630c637b8acSTejun Heo static void kernfs_dop_release(struct dentry *dentry)
631fd7b9f7bSTejun Heo {
632fd7b9f7bSTejun Heo 	kernfs_put(dentry->d_fsdata);
633fd7b9f7bSTejun Heo }
634fd7b9f7bSTejun Heo 
635a797bfc3STejun Heo const struct dentry_operations kernfs_dops = {
636c637b8acSTejun Heo 	.d_revalidate	= kernfs_dop_revalidate,
637c637b8acSTejun Heo 	.d_release	= kernfs_dop_release,
638fd7b9f7bSTejun Heo };
639fd7b9f7bSTejun Heo 
6400c23b225STejun Heo /**
6410c23b225STejun Heo  * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
6420c23b225STejun Heo  * @dentry: the dentry in question
6430c23b225STejun Heo  *
6440c23b225STejun Heo  * Return the kernfs_node associated with @dentry.  If @dentry is not a
6450c23b225STejun Heo  * kernfs one, %NULL is returned.
6460c23b225STejun Heo  *
6470c23b225STejun Heo  * While the returned kernfs_node will stay accessible as long as @dentry
6480c23b225STejun Heo  * is accessible, the returned node can be in any state and the caller is
6490c23b225STejun Heo  * fully responsible for determining what's accessible.
6500c23b225STejun Heo  */
6510c23b225STejun Heo struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
6520c23b225STejun Heo {
653f41c5934SLi Zefan 	if (dentry->d_sb->s_op == &kernfs_sops)
6540c23b225STejun Heo 		return dentry->d_fsdata;
6550c23b225STejun Heo 	return NULL;
6560c23b225STejun Heo }
6570c23b225STejun Heo 
658db4aad20STejun Heo static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
659db4aad20STejun Heo 					     const char *name, umode_t mode,
660db4aad20STejun Heo 					     unsigned flags)
661fd7b9f7bSTejun Heo {
662324a56e1STejun Heo 	struct kernfs_node *kn;
663bc755553STejun Heo 	int ret;
664fd7b9f7bSTejun Heo 
665dfeb0750STejun Heo 	name = kstrdup_const(name, GFP_KERNEL);
666fd7b9f7bSTejun Heo 	if (!name)
667fd7b9f7bSTejun Heo 		return NULL;
668fd7b9f7bSTejun Heo 
669a797bfc3STejun Heo 	kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
670324a56e1STejun Heo 	if (!kn)
671fd7b9f7bSTejun Heo 		goto err_out1;
672fd7b9f7bSTejun Heo 
673b2a209ffSVladimir Davydov 	ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
674bc755553STejun Heo 	if (ret < 0)
675fd7b9f7bSTejun Heo 		goto err_out2;
676adc5e8b5STejun Heo 	kn->ino = ret;
677fd7b9f7bSTejun Heo 
678adc5e8b5STejun Heo 	atomic_set(&kn->count, 1);
67981c173cbSTejun Heo 	atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
68035beab06STejun Heo 	RB_CLEAR_NODE(&kn->rb);
681fd7b9f7bSTejun Heo 
682adc5e8b5STejun Heo 	kn->name = name;
683adc5e8b5STejun Heo 	kn->mode = mode;
68481c173cbSTejun Heo 	kn->flags = flags;
685fd7b9f7bSTejun Heo 
686324a56e1STejun Heo 	return kn;
687fd7b9f7bSTejun Heo 
688fd7b9f7bSTejun Heo  err_out2:
689a797bfc3STejun Heo 	kmem_cache_free(kernfs_node_cache, kn);
690fd7b9f7bSTejun Heo  err_out1:
691dfeb0750STejun Heo 	kfree_const(name);
692fd7b9f7bSTejun Heo 	return NULL;
693fd7b9f7bSTejun Heo }
694fd7b9f7bSTejun Heo 
695db4aad20STejun Heo struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
696db4aad20STejun Heo 				    const char *name, umode_t mode,
697db4aad20STejun Heo 				    unsigned flags)
698db4aad20STejun Heo {
699db4aad20STejun Heo 	struct kernfs_node *kn;
700db4aad20STejun Heo 
701db4aad20STejun Heo 	kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
702db4aad20STejun Heo 	if (kn) {
703db4aad20STejun Heo 		kernfs_get(parent);
704db4aad20STejun Heo 		kn->parent = parent;
705db4aad20STejun Heo 	}
706db4aad20STejun Heo 	return kn;
707db4aad20STejun Heo }
708db4aad20STejun Heo 
709fd7b9f7bSTejun Heo /**
710c637b8acSTejun Heo  *	kernfs_add_one - add kernfs_node to parent without warning
711324a56e1STejun Heo  *	@kn: kernfs_node to be added
712fd7b9f7bSTejun Heo  *
713db4aad20STejun Heo  *	The caller must already have initialized @kn->parent.  This
714db4aad20STejun Heo  *	function increments nlink of the parent's inode if @kn is a
715db4aad20STejun Heo  *	directory and link into the children list of the parent.
716fd7b9f7bSTejun Heo  *
717fd7b9f7bSTejun Heo  *	RETURNS:
718fd7b9f7bSTejun Heo  *	0 on success, -EEXIST if entry with the given name already
719fd7b9f7bSTejun Heo  *	exists.
720fd7b9f7bSTejun Heo  */
721988cd7afSTejun Heo int kernfs_add_one(struct kernfs_node *kn)
722fd7b9f7bSTejun Heo {
723db4aad20STejun Heo 	struct kernfs_node *parent = kn->parent;
724c525aaddSTejun Heo 	struct kernfs_iattrs *ps_iattr;
725988cd7afSTejun Heo 	bool has_ns;
726fd7b9f7bSTejun Heo 	int ret;
727fd7b9f7bSTejun Heo 
728988cd7afSTejun Heo 	mutex_lock(&kernfs_mutex);
729988cd7afSTejun Heo 
730988cd7afSTejun Heo 	ret = -EINVAL;
731988cd7afSTejun Heo 	has_ns = kernfs_ns_enabled(parent);
732988cd7afSTejun Heo 	if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
733988cd7afSTejun Heo 		 has_ns ? "required" : "invalid", parent->name, kn->name))
734988cd7afSTejun Heo 		goto out_unlock;
735fd7b9f7bSTejun Heo 
736df23fc39STejun Heo 	if (kernfs_type(parent) != KERNFS_DIR)
737988cd7afSTejun Heo 		goto out_unlock;
738fd7b9f7bSTejun Heo 
739988cd7afSTejun Heo 	ret = -ENOENT;
740ea015218SEric W. Biederman 	if (parent->flags & KERNFS_EMPTY_DIR)
741ea015218SEric W. Biederman 		goto out_unlock;
742ea015218SEric W. Biederman 
743d35258efSTejun Heo 	if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
744988cd7afSTejun Heo 		goto out_unlock;
745798c75a0SGreg Kroah-Hartman 
746c637b8acSTejun Heo 	kn->hash = kernfs_name_hash(kn->name, kn->ns);
747fd7b9f7bSTejun Heo 
748c637b8acSTejun Heo 	ret = kernfs_link_sibling(kn);
749fd7b9f7bSTejun Heo 	if (ret)
750988cd7afSTejun Heo 		goto out_unlock;
751fd7b9f7bSTejun Heo 
752fd7b9f7bSTejun Heo 	/* Update timestamps on the parent */
753adc5e8b5STejun Heo 	ps_iattr = parent->iattr;
754fd7b9f7bSTejun Heo 	if (ps_iattr) {
755fd7b9f7bSTejun Heo 		struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
756fd7b9f7bSTejun Heo 		ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
757fd7b9f7bSTejun Heo 	}
758fd7b9f7bSTejun Heo 
759d35258efSTejun Heo 	mutex_unlock(&kernfs_mutex);
760d35258efSTejun Heo 
761d35258efSTejun Heo 	/*
762d35258efSTejun Heo 	 * Activate the new node unless CREATE_DEACTIVATED is requested.
763d35258efSTejun Heo 	 * If not activated here, the kernfs user is responsible for
764d35258efSTejun Heo 	 * activating the node with kernfs_activate().  A node which hasn't
765d35258efSTejun Heo 	 * been activated is not visible to userland and its removal won't
766d35258efSTejun Heo 	 * trigger deactivation.
767d35258efSTejun Heo 	 */
768d35258efSTejun Heo 	if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
769d35258efSTejun Heo 		kernfs_activate(kn);
770d35258efSTejun Heo 	return 0;
771d35258efSTejun Heo 
772988cd7afSTejun Heo out_unlock:
773a797bfc3STejun Heo 	mutex_unlock(&kernfs_mutex);
774988cd7afSTejun Heo 	return ret;
775fd7b9f7bSTejun Heo }
776fd7b9f7bSTejun Heo 
777fd7b9f7bSTejun Heo /**
778324a56e1STejun Heo  * kernfs_find_ns - find kernfs_node with the given name
779324a56e1STejun Heo  * @parent: kernfs_node to search under
780fd7b9f7bSTejun Heo  * @name: name to look for
781fd7b9f7bSTejun Heo  * @ns: the namespace tag to use
782fd7b9f7bSTejun Heo  *
783324a56e1STejun Heo  * Look for kernfs_node with name @name under @parent.  Returns pointer to
784324a56e1STejun Heo  * the found kernfs_node on success, %NULL on failure.
785fd7b9f7bSTejun Heo  */
786324a56e1STejun Heo static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
787fd7b9f7bSTejun Heo 					  const unsigned char *name,
788fd7b9f7bSTejun Heo 					  const void *ns)
789fd7b9f7bSTejun Heo {
790adc5e8b5STejun Heo 	struct rb_node *node = parent->dir.children.rb_node;
791ac9bba03STejun Heo 	bool has_ns = kernfs_ns_enabled(parent);
792fd7b9f7bSTejun Heo 	unsigned int hash;
793fd7b9f7bSTejun Heo 
794a797bfc3STejun Heo 	lockdep_assert_held(&kernfs_mutex);
795fd7b9f7bSTejun Heo 
796fd7b9f7bSTejun Heo 	if (has_ns != (bool)ns) {
797c637b8acSTejun Heo 		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
798adc5e8b5STejun Heo 		     has_ns ? "required" : "invalid", parent->name, name);
799fd7b9f7bSTejun Heo 		return NULL;
800fd7b9f7bSTejun Heo 	}
801fd7b9f7bSTejun Heo 
802c637b8acSTejun Heo 	hash = kernfs_name_hash(name, ns);
803fd7b9f7bSTejun Heo 	while (node) {
804324a56e1STejun Heo 		struct kernfs_node *kn;
805fd7b9f7bSTejun Heo 		int result;
806fd7b9f7bSTejun Heo 
807324a56e1STejun Heo 		kn = rb_to_kn(node);
808c637b8acSTejun Heo 		result = kernfs_name_compare(hash, name, ns, kn);
809fd7b9f7bSTejun Heo 		if (result < 0)
810fd7b9f7bSTejun Heo 			node = node->rb_left;
811fd7b9f7bSTejun Heo 		else if (result > 0)
812fd7b9f7bSTejun Heo 			node = node->rb_right;
813fd7b9f7bSTejun Heo 		else
814324a56e1STejun Heo 			return kn;
815fd7b9f7bSTejun Heo 	}
816fd7b9f7bSTejun Heo 	return NULL;
817fd7b9f7bSTejun Heo }
818fd7b9f7bSTejun Heo 
819bd96f76aSTejun Heo static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
820bd96f76aSTejun Heo 					  const unsigned char *path,
821bd96f76aSTejun Heo 					  const void *ns)
822bd96f76aSTejun Heo {
823bd96f76aSTejun Heo 	static char path_buf[PATH_MAX];	/* protected by kernfs_mutex */
824bd96f76aSTejun Heo 	size_t len = strlcpy(path_buf, path, PATH_MAX);
825bd96f76aSTejun Heo 	char *p = path_buf;
826bd96f76aSTejun Heo 	char *name;
827bd96f76aSTejun Heo 
828bd96f76aSTejun Heo 	lockdep_assert_held(&kernfs_mutex);
829bd96f76aSTejun Heo 
830bd96f76aSTejun Heo 	if (len >= PATH_MAX)
831bd96f76aSTejun Heo 		return NULL;
832bd96f76aSTejun Heo 
833bd96f76aSTejun Heo 	while ((name = strsep(&p, "/")) && parent) {
834bd96f76aSTejun Heo 		if (*name == '\0')
835bd96f76aSTejun Heo 			continue;
836bd96f76aSTejun Heo 		parent = kernfs_find_ns(parent, name, ns);
837bd96f76aSTejun Heo 	}
838bd96f76aSTejun Heo 
839bd96f76aSTejun Heo 	return parent;
840bd96f76aSTejun Heo }
841bd96f76aSTejun Heo 
842fd7b9f7bSTejun Heo /**
843324a56e1STejun Heo  * kernfs_find_and_get_ns - find and get kernfs_node with the given name
844324a56e1STejun Heo  * @parent: kernfs_node to search under
845fd7b9f7bSTejun Heo  * @name: name to look for
846fd7b9f7bSTejun Heo  * @ns: the namespace tag to use
847fd7b9f7bSTejun Heo  *
848324a56e1STejun Heo  * Look for kernfs_node with name @name under @parent and get a reference
849fd7b9f7bSTejun Heo  * if found.  This function may sleep and returns pointer to the found
850324a56e1STejun Heo  * kernfs_node on success, %NULL on failure.
851fd7b9f7bSTejun Heo  */
852324a56e1STejun Heo struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
853fd7b9f7bSTejun Heo 					   const char *name, const void *ns)
854fd7b9f7bSTejun Heo {
855324a56e1STejun Heo 	struct kernfs_node *kn;
856fd7b9f7bSTejun Heo 
857a797bfc3STejun Heo 	mutex_lock(&kernfs_mutex);
858324a56e1STejun Heo 	kn = kernfs_find_ns(parent, name, ns);
859324a56e1STejun Heo 	kernfs_get(kn);
860a797bfc3STejun Heo 	mutex_unlock(&kernfs_mutex);
861fd7b9f7bSTejun Heo 
862324a56e1STejun Heo 	return kn;
863fd7b9f7bSTejun Heo }
864fd7b9f7bSTejun Heo EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
865fd7b9f7bSTejun Heo 
866fd7b9f7bSTejun Heo /**
867bd96f76aSTejun Heo  * kernfs_walk_and_get_ns - find and get kernfs_node with the given path
868bd96f76aSTejun Heo  * @parent: kernfs_node to search under
869bd96f76aSTejun Heo  * @path: path to look for
870bd96f76aSTejun Heo  * @ns: the namespace tag to use
871bd96f76aSTejun Heo  *
872bd96f76aSTejun Heo  * Look for kernfs_node with path @path under @parent and get a reference
873bd96f76aSTejun Heo  * if found.  This function may sleep and returns pointer to the found
874bd96f76aSTejun Heo  * kernfs_node on success, %NULL on failure.
875bd96f76aSTejun Heo  */
876bd96f76aSTejun Heo struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
877bd96f76aSTejun Heo 					   const char *path, const void *ns)
878bd96f76aSTejun Heo {
879bd96f76aSTejun Heo 	struct kernfs_node *kn;
880bd96f76aSTejun Heo 
881bd96f76aSTejun Heo 	mutex_lock(&kernfs_mutex);
882bd96f76aSTejun Heo 	kn = kernfs_walk_ns(parent, path, ns);
883bd96f76aSTejun Heo 	kernfs_get(kn);
884bd96f76aSTejun Heo 	mutex_unlock(&kernfs_mutex);
885bd96f76aSTejun Heo 
886bd96f76aSTejun Heo 	return kn;
887bd96f76aSTejun Heo }
888bd96f76aSTejun Heo 
889bd96f76aSTejun Heo /**
890ba7443bcSTejun Heo  * kernfs_create_root - create a new kernfs hierarchy
89190c07c89STejun Heo  * @scops: optional syscall operations for the hierarchy
892d35258efSTejun Heo  * @flags: KERNFS_ROOT_* flags
893ba7443bcSTejun Heo  * @priv: opaque data associated with the new directory
894ba7443bcSTejun Heo  *
895ba7443bcSTejun Heo  * Returns the root of the new hierarchy on success, ERR_PTR() value on
896ba7443bcSTejun Heo  * failure.
897ba7443bcSTejun Heo  */
89890c07c89STejun Heo struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
899d35258efSTejun Heo 				       unsigned int flags, void *priv)
900ba7443bcSTejun Heo {
901ba7443bcSTejun Heo 	struct kernfs_root *root;
902324a56e1STejun Heo 	struct kernfs_node *kn;
903ba7443bcSTejun Heo 
904ba7443bcSTejun Heo 	root = kzalloc(sizeof(*root), GFP_KERNEL);
905ba7443bcSTejun Heo 	if (!root)
906ba7443bcSTejun Heo 		return ERR_PTR(-ENOMEM);
907ba7443bcSTejun Heo 
908bc755553STejun Heo 	ida_init(&root->ino_ida);
9097d568a83STejun Heo 	INIT_LIST_HEAD(&root->supers);
910bc755553STejun Heo 
911db4aad20STejun Heo 	kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
912db4aad20STejun Heo 			       KERNFS_DIR);
913324a56e1STejun Heo 	if (!kn) {
914bc755553STejun Heo 		ida_destroy(&root->ino_ida);
915ba7443bcSTejun Heo 		kfree(root);
916ba7443bcSTejun Heo 		return ERR_PTR(-ENOMEM);
917ba7443bcSTejun Heo 	}
918ba7443bcSTejun Heo 
919324a56e1STejun Heo 	kn->priv = priv;
920adc5e8b5STejun Heo 	kn->dir.root = root;
921ba7443bcSTejun Heo 
92290c07c89STejun Heo 	root->syscall_ops = scops;
923d35258efSTejun Heo 	root->flags = flags;
924324a56e1STejun Heo 	root->kn = kn;
925abd54f02STejun Heo 	init_waitqueue_head(&root->deactivate_waitq);
926ba7443bcSTejun Heo 
927d35258efSTejun Heo 	if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
928d35258efSTejun Heo 		kernfs_activate(kn);
929d35258efSTejun Heo 
930ba7443bcSTejun Heo 	return root;
931ba7443bcSTejun Heo }
932ba7443bcSTejun Heo 
933ba7443bcSTejun Heo /**
934ba7443bcSTejun Heo  * kernfs_destroy_root - destroy a kernfs hierarchy
935ba7443bcSTejun Heo  * @root: root of the hierarchy to destroy
936ba7443bcSTejun Heo  *
937ba7443bcSTejun Heo  * Destroy the hierarchy anchored at @root by removing all existing
938ba7443bcSTejun Heo  * directories and destroying @root.
939ba7443bcSTejun Heo  */
940ba7443bcSTejun Heo void kernfs_destroy_root(struct kernfs_root *root)
941ba7443bcSTejun Heo {
942324a56e1STejun Heo 	kernfs_remove(root->kn);	/* will also free @root */
943ba7443bcSTejun Heo }
944ba7443bcSTejun Heo 
945ba7443bcSTejun Heo /**
946fd7b9f7bSTejun Heo  * kernfs_create_dir_ns - create a directory
947fd7b9f7bSTejun Heo  * @parent: parent in which to create a new directory
948fd7b9f7bSTejun Heo  * @name: name of the new directory
949bb8b9d09STejun Heo  * @mode: mode of the new directory
950fd7b9f7bSTejun Heo  * @priv: opaque data associated with the new directory
951fd7b9f7bSTejun Heo  * @ns: optional namespace tag of the directory
952fd7b9f7bSTejun Heo  *
953fd7b9f7bSTejun Heo  * Returns the created node on success, ERR_PTR() value on failure.
954fd7b9f7bSTejun Heo  */
955324a56e1STejun Heo struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
956bb8b9d09STejun Heo 					 const char *name, umode_t mode,
957bb8b9d09STejun Heo 					 void *priv, const void *ns)
958fd7b9f7bSTejun Heo {
959324a56e1STejun Heo 	struct kernfs_node *kn;
960fd7b9f7bSTejun Heo 	int rc;
961fd7b9f7bSTejun Heo 
962fd7b9f7bSTejun Heo 	/* allocate */
963db4aad20STejun Heo 	kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
964324a56e1STejun Heo 	if (!kn)
965fd7b9f7bSTejun Heo 		return ERR_PTR(-ENOMEM);
966fd7b9f7bSTejun Heo 
967adc5e8b5STejun Heo 	kn->dir.root = parent->dir.root;
968adc5e8b5STejun Heo 	kn->ns = ns;
969324a56e1STejun Heo 	kn->priv = priv;
970fd7b9f7bSTejun Heo 
971fd7b9f7bSTejun Heo 	/* link in */
972988cd7afSTejun Heo 	rc = kernfs_add_one(kn);
973fd7b9f7bSTejun Heo 	if (!rc)
974324a56e1STejun Heo 		return kn;
975fd7b9f7bSTejun Heo 
976324a56e1STejun Heo 	kernfs_put(kn);
977fd7b9f7bSTejun Heo 	return ERR_PTR(rc);
978fd7b9f7bSTejun Heo }
979fd7b9f7bSTejun Heo 
980ea015218SEric W. Biederman /**
981ea015218SEric W. Biederman  * kernfs_create_empty_dir - create an always empty directory
982ea015218SEric W. Biederman  * @parent: parent in which to create a new directory
983ea015218SEric W. Biederman  * @name: name of the new directory
984ea015218SEric W. Biederman  *
985ea015218SEric W. Biederman  * Returns the created node on success, ERR_PTR() value on failure.
986ea015218SEric W. Biederman  */
987ea015218SEric W. Biederman struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
988ea015218SEric W. Biederman 					    const char *name)
989ea015218SEric W. Biederman {
990ea015218SEric W. Biederman 	struct kernfs_node *kn;
991ea015218SEric W. Biederman 	int rc;
992ea015218SEric W. Biederman 
993ea015218SEric W. Biederman 	/* allocate */
994ea015218SEric W. Biederman 	kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
995ea015218SEric W. Biederman 	if (!kn)
996ea015218SEric W. Biederman 		return ERR_PTR(-ENOMEM);
997ea015218SEric W. Biederman 
998ea015218SEric W. Biederman 	kn->flags |= KERNFS_EMPTY_DIR;
999ea015218SEric W. Biederman 	kn->dir.root = parent->dir.root;
1000ea015218SEric W. Biederman 	kn->ns = NULL;
1001ea015218SEric W. Biederman 	kn->priv = NULL;
1002ea015218SEric W. Biederman 
1003ea015218SEric W. Biederman 	/* link in */
1004ea015218SEric W. Biederman 	rc = kernfs_add_one(kn);
1005ea015218SEric W. Biederman 	if (!rc)
1006ea015218SEric W. Biederman 		return kn;
1007ea015218SEric W. Biederman 
1008ea015218SEric W. Biederman 	kernfs_put(kn);
1009ea015218SEric W. Biederman 	return ERR_PTR(rc);
1010ea015218SEric W. Biederman }
1011ea015218SEric W. Biederman 
1012c637b8acSTejun Heo static struct dentry *kernfs_iop_lookup(struct inode *dir,
1013c637b8acSTejun Heo 					struct dentry *dentry,
1014fd7b9f7bSTejun Heo 					unsigned int flags)
1015fd7b9f7bSTejun Heo {
101619bbb926STejun Heo 	struct dentry *ret;
1017324a56e1STejun Heo 	struct kernfs_node *parent = dentry->d_parent->d_fsdata;
1018324a56e1STejun Heo 	struct kernfs_node *kn;
1019fd7b9f7bSTejun Heo 	struct inode *inode;
1020fd7b9f7bSTejun Heo 	const void *ns = NULL;
1021fd7b9f7bSTejun Heo 
1022a797bfc3STejun Heo 	mutex_lock(&kernfs_mutex);
1023fd7b9f7bSTejun Heo 
1024324a56e1STejun Heo 	if (kernfs_ns_enabled(parent))
1025c525aaddSTejun Heo 		ns = kernfs_info(dir->i_sb)->ns;
1026fd7b9f7bSTejun Heo 
1027324a56e1STejun Heo 	kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
1028fd7b9f7bSTejun Heo 
1029fd7b9f7bSTejun Heo 	/* no such entry */
1030b9c9dad0STejun Heo 	if (!kn || !kernfs_active(kn)) {
103119bbb926STejun Heo 		ret = NULL;
1032fd7b9f7bSTejun Heo 		goto out_unlock;
1033fd7b9f7bSTejun Heo 	}
1034324a56e1STejun Heo 	kernfs_get(kn);
1035324a56e1STejun Heo 	dentry->d_fsdata = kn;
1036fd7b9f7bSTejun Heo 
1037fd7b9f7bSTejun Heo 	/* attach dentry and inode */
1038c637b8acSTejun Heo 	inode = kernfs_get_inode(dir->i_sb, kn);
1039fd7b9f7bSTejun Heo 	if (!inode) {
1040fd7b9f7bSTejun Heo 		ret = ERR_PTR(-ENOMEM);
1041fd7b9f7bSTejun Heo 		goto out_unlock;
1042fd7b9f7bSTejun Heo 	}
1043fd7b9f7bSTejun Heo 
1044fd7b9f7bSTejun Heo 	/* instantiate and hash dentry */
104541d28bcaSAl Viro 	ret = d_splice_alias(inode, dentry);
1046fd7b9f7bSTejun Heo  out_unlock:
1047a797bfc3STejun Heo 	mutex_unlock(&kernfs_mutex);
1048fd7b9f7bSTejun Heo 	return ret;
1049fd7b9f7bSTejun Heo }
1050fd7b9f7bSTejun Heo 
105180b9bbefSTejun Heo static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
105280b9bbefSTejun Heo 			    umode_t mode)
105380b9bbefSTejun Heo {
105480b9bbefSTejun Heo 	struct kernfs_node *parent = dir->i_private;
105590c07c89STejun Heo 	struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
105607c7530dSTejun Heo 	int ret;
105780b9bbefSTejun Heo 
105890c07c89STejun Heo 	if (!scops || !scops->mkdir)
105980b9bbefSTejun Heo 		return -EPERM;
106080b9bbefSTejun Heo 
106107c7530dSTejun Heo 	if (!kernfs_get_active(parent))
106207c7530dSTejun Heo 		return -ENODEV;
106307c7530dSTejun Heo 
106490c07c89STejun Heo 	ret = scops->mkdir(parent, dentry->d_name.name, mode);
106507c7530dSTejun Heo 
106607c7530dSTejun Heo 	kernfs_put_active(parent);
106707c7530dSTejun Heo 	return ret;
106880b9bbefSTejun Heo }
106980b9bbefSTejun Heo 
107080b9bbefSTejun Heo static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
107180b9bbefSTejun Heo {
107280b9bbefSTejun Heo 	struct kernfs_node *kn  = dentry->d_fsdata;
107390c07c89STejun Heo 	struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
107407c7530dSTejun Heo 	int ret;
107580b9bbefSTejun Heo 
107690c07c89STejun Heo 	if (!scops || !scops->rmdir)
107780b9bbefSTejun Heo 		return -EPERM;
107880b9bbefSTejun Heo 
107907c7530dSTejun Heo 	if (!kernfs_get_active(kn))
108007c7530dSTejun Heo 		return -ENODEV;
108107c7530dSTejun Heo 
108290c07c89STejun Heo 	ret = scops->rmdir(kn);
108307c7530dSTejun Heo 
108407c7530dSTejun Heo 	kernfs_put_active(kn);
108507c7530dSTejun Heo 	return ret;
108680b9bbefSTejun Heo }
108780b9bbefSTejun Heo 
108880b9bbefSTejun Heo static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
108980b9bbefSTejun Heo 			     struct inode *new_dir, struct dentry *new_dentry)
109080b9bbefSTejun Heo {
109180b9bbefSTejun Heo 	struct kernfs_node *kn  = old_dentry->d_fsdata;
109280b9bbefSTejun Heo 	struct kernfs_node *new_parent = new_dir->i_private;
109390c07c89STejun Heo 	struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
109407c7530dSTejun Heo 	int ret;
109580b9bbefSTejun Heo 
109690c07c89STejun Heo 	if (!scops || !scops->rename)
109780b9bbefSTejun Heo 		return -EPERM;
109880b9bbefSTejun Heo 
109907c7530dSTejun Heo 	if (!kernfs_get_active(kn))
110007c7530dSTejun Heo 		return -ENODEV;
110107c7530dSTejun Heo 
110207c7530dSTejun Heo 	if (!kernfs_get_active(new_parent)) {
110307c7530dSTejun Heo 		kernfs_put_active(kn);
110407c7530dSTejun Heo 		return -ENODEV;
110507c7530dSTejun Heo 	}
110607c7530dSTejun Heo 
110790c07c89STejun Heo 	ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
110807c7530dSTejun Heo 
110907c7530dSTejun Heo 	kernfs_put_active(new_parent);
111007c7530dSTejun Heo 	kernfs_put_active(kn);
111107c7530dSTejun Heo 	return ret;
111280b9bbefSTejun Heo }
111380b9bbefSTejun Heo 
1114a797bfc3STejun Heo const struct inode_operations kernfs_dir_iops = {
1115c637b8acSTejun Heo 	.lookup		= kernfs_iop_lookup,
1116c637b8acSTejun Heo 	.permission	= kernfs_iop_permission,
1117c637b8acSTejun Heo 	.setattr	= kernfs_iop_setattr,
1118c637b8acSTejun Heo 	.getattr	= kernfs_iop_getattr,
1119c637b8acSTejun Heo 	.setxattr	= kernfs_iop_setxattr,
1120c637b8acSTejun Heo 	.removexattr	= kernfs_iop_removexattr,
1121c637b8acSTejun Heo 	.getxattr	= kernfs_iop_getxattr,
1122c637b8acSTejun Heo 	.listxattr	= kernfs_iop_listxattr,
112380b9bbefSTejun Heo 
112480b9bbefSTejun Heo 	.mkdir		= kernfs_iop_mkdir,
112580b9bbefSTejun Heo 	.rmdir		= kernfs_iop_rmdir,
112680b9bbefSTejun Heo 	.rename		= kernfs_iop_rename,
1127fd7b9f7bSTejun Heo };
1128fd7b9f7bSTejun Heo 
1129c637b8acSTejun Heo static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
1130fd7b9f7bSTejun Heo {
1131324a56e1STejun Heo 	struct kernfs_node *last;
1132fd7b9f7bSTejun Heo 
1133fd7b9f7bSTejun Heo 	while (true) {
1134fd7b9f7bSTejun Heo 		struct rb_node *rbn;
1135fd7b9f7bSTejun Heo 
1136fd7b9f7bSTejun Heo 		last = pos;
1137fd7b9f7bSTejun Heo 
1138df23fc39STejun Heo 		if (kernfs_type(pos) != KERNFS_DIR)
1139fd7b9f7bSTejun Heo 			break;
1140fd7b9f7bSTejun Heo 
1141adc5e8b5STejun Heo 		rbn = rb_first(&pos->dir.children);
1142fd7b9f7bSTejun Heo 		if (!rbn)
1143fd7b9f7bSTejun Heo 			break;
1144fd7b9f7bSTejun Heo 
1145324a56e1STejun Heo 		pos = rb_to_kn(rbn);
1146fd7b9f7bSTejun Heo 	}
1147fd7b9f7bSTejun Heo 
1148fd7b9f7bSTejun Heo 	return last;
1149fd7b9f7bSTejun Heo }
1150fd7b9f7bSTejun Heo 
1151fd7b9f7bSTejun Heo /**
1152c637b8acSTejun Heo  * kernfs_next_descendant_post - find the next descendant for post-order walk
1153fd7b9f7bSTejun Heo  * @pos: the current position (%NULL to initiate traversal)
1154324a56e1STejun Heo  * @root: kernfs_node whose descendants to walk
1155fd7b9f7bSTejun Heo  *
1156fd7b9f7bSTejun Heo  * Find the next descendant to visit for post-order traversal of @root's
1157fd7b9f7bSTejun Heo  * descendants.  @root is included in the iteration and the last node to be
1158fd7b9f7bSTejun Heo  * visited.
1159fd7b9f7bSTejun Heo  */
1160c637b8acSTejun Heo static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
1161324a56e1STejun Heo 						       struct kernfs_node *root)
1162fd7b9f7bSTejun Heo {
1163fd7b9f7bSTejun Heo 	struct rb_node *rbn;
1164fd7b9f7bSTejun Heo 
1165a797bfc3STejun Heo 	lockdep_assert_held(&kernfs_mutex);
1166fd7b9f7bSTejun Heo 
1167fd7b9f7bSTejun Heo 	/* if first iteration, visit leftmost descendant which may be root */
1168fd7b9f7bSTejun Heo 	if (!pos)
1169c637b8acSTejun Heo 		return kernfs_leftmost_descendant(root);
1170fd7b9f7bSTejun Heo 
1171fd7b9f7bSTejun Heo 	/* if we visited @root, we're done */
1172fd7b9f7bSTejun Heo 	if (pos == root)
1173fd7b9f7bSTejun Heo 		return NULL;
1174fd7b9f7bSTejun Heo 
1175fd7b9f7bSTejun Heo 	/* if there's an unvisited sibling, visit its leftmost descendant */
1176adc5e8b5STejun Heo 	rbn = rb_next(&pos->rb);
1177fd7b9f7bSTejun Heo 	if (rbn)
1178c637b8acSTejun Heo 		return kernfs_leftmost_descendant(rb_to_kn(rbn));
1179fd7b9f7bSTejun Heo 
1180fd7b9f7bSTejun Heo 	/* no sibling left, visit parent */
1181adc5e8b5STejun Heo 	return pos->parent;
1182fd7b9f7bSTejun Heo }
1183fd7b9f7bSTejun Heo 
1184d35258efSTejun Heo /**
1185d35258efSTejun Heo  * kernfs_activate - activate a node which started deactivated
1186d35258efSTejun Heo  * @kn: kernfs_node whose subtree is to be activated
1187d35258efSTejun Heo  *
1188d35258efSTejun Heo  * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
1189d35258efSTejun Heo  * needs to be explicitly activated.  A node which hasn't been activated
1190d35258efSTejun Heo  * isn't visible to userland and deactivation is skipped during its
1191d35258efSTejun Heo  * removal.  This is useful to construct atomic init sequences where
1192d35258efSTejun Heo  * creation of multiple nodes should either succeed or fail atomically.
1193d35258efSTejun Heo  *
1194d35258efSTejun Heo  * The caller is responsible for ensuring that this function is not called
1195d35258efSTejun Heo  * after kernfs_remove*() is invoked on @kn.
1196d35258efSTejun Heo  */
1197d35258efSTejun Heo void kernfs_activate(struct kernfs_node *kn)
1198d35258efSTejun Heo {
1199d35258efSTejun Heo 	struct kernfs_node *pos;
1200d35258efSTejun Heo 
1201d35258efSTejun Heo 	mutex_lock(&kernfs_mutex);
1202d35258efSTejun Heo 
1203d35258efSTejun Heo 	pos = NULL;
1204d35258efSTejun Heo 	while ((pos = kernfs_next_descendant_post(pos, kn))) {
1205d35258efSTejun Heo 		if (!pos || (pos->flags & KERNFS_ACTIVATED))
1206d35258efSTejun Heo 			continue;
1207d35258efSTejun Heo 
1208d35258efSTejun Heo 		WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
1209d35258efSTejun Heo 		WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
1210d35258efSTejun Heo 
1211d35258efSTejun Heo 		atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
1212d35258efSTejun Heo 		pos->flags |= KERNFS_ACTIVATED;
1213d35258efSTejun Heo 	}
1214d35258efSTejun Heo 
1215d35258efSTejun Heo 	mutex_unlock(&kernfs_mutex);
1216d35258efSTejun Heo }
1217d35258efSTejun Heo 
1218988cd7afSTejun Heo static void __kernfs_remove(struct kernfs_node *kn)
1219fd7b9f7bSTejun Heo {
122035beab06STejun Heo 	struct kernfs_node *pos;
122135beab06STejun Heo 
122235beab06STejun Heo 	lockdep_assert_held(&kernfs_mutex);
1223fd7b9f7bSTejun Heo 
12246b0afc2aSTejun Heo 	/*
12256b0afc2aSTejun Heo 	 * Short-circuit if non-root @kn has already finished removal.
12266b0afc2aSTejun Heo 	 * This is for kernfs_remove_self() which plays with active ref
12276b0afc2aSTejun Heo 	 * after removal.
12286b0afc2aSTejun Heo 	 */
12296b0afc2aSTejun Heo 	if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
1230ce9b499cSGreg Kroah-Hartman 		return;
1231ce9b499cSGreg Kroah-Hartman 
1232c637b8acSTejun Heo 	pr_debug("kernfs %s: removing\n", kn->name);
1233fd7b9f7bSTejun Heo 
123481c173cbSTejun Heo 	/* prevent any new usage under @kn by deactivating all nodes */
123535beab06STejun Heo 	pos = NULL;
123635beab06STejun Heo 	while ((pos = kernfs_next_descendant_post(pos, kn)))
123781c173cbSTejun Heo 		if (kernfs_active(pos))
123881c173cbSTejun Heo 			atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
123935beab06STejun Heo 
124035beab06STejun Heo 	/* deactivate and unlink the subtree node-by-node */
1241fd7b9f7bSTejun Heo 	do {
124235beab06STejun Heo 		pos = kernfs_leftmost_descendant(kn);
124335beab06STejun Heo 
124435beab06STejun Heo 		/*
124581c173cbSTejun Heo 		 * kernfs_drain() drops kernfs_mutex temporarily and @pos's
124681c173cbSTejun Heo 		 * base ref could have been put by someone else by the time
124781c173cbSTejun Heo 		 * the function returns.  Make sure it doesn't go away
124881c173cbSTejun Heo 		 * underneath us.
124935beab06STejun Heo 		 */
125035beab06STejun Heo 		kernfs_get(pos);
125135beab06STejun Heo 
1252d35258efSTejun Heo 		/*
1253d35258efSTejun Heo 		 * Drain iff @kn was activated.  This avoids draining and
1254d35258efSTejun Heo 		 * its lockdep annotations for nodes which have never been
1255d35258efSTejun Heo 		 * activated and allows embedding kernfs_remove() in create
1256d35258efSTejun Heo 		 * error paths without worrying about draining.
1257d35258efSTejun Heo 		 */
1258d35258efSTejun Heo 		if (kn->flags & KERNFS_ACTIVATED)
125981c173cbSTejun Heo 			kernfs_drain(pos);
1260d35258efSTejun Heo 		else
1261d35258efSTejun Heo 			WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
126235beab06STejun Heo 
126335beab06STejun Heo 		/*
126435beab06STejun Heo 		 * kernfs_unlink_sibling() succeeds once per node.  Use it
126535beab06STejun Heo 		 * to decide who's responsible for cleanups.
126635beab06STejun Heo 		 */
126735beab06STejun Heo 		if (!pos->parent || kernfs_unlink_sibling(pos)) {
126835beab06STejun Heo 			struct kernfs_iattrs *ps_iattr =
126935beab06STejun Heo 				pos->parent ? pos->parent->iattr : NULL;
127035beab06STejun Heo 
127135beab06STejun Heo 			/* update timestamps on the parent */
127235beab06STejun Heo 			if (ps_iattr) {
127335beab06STejun Heo 				ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
127435beab06STejun Heo 				ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
127535beab06STejun Heo 			}
127635beab06STejun Heo 
1277988cd7afSTejun Heo 			kernfs_put(pos);
127835beab06STejun Heo 		}
127935beab06STejun Heo 
128035beab06STejun Heo 		kernfs_put(pos);
128135beab06STejun Heo 	} while (pos != kn);
1282fd7b9f7bSTejun Heo }
1283fd7b9f7bSTejun Heo 
1284fd7b9f7bSTejun Heo /**
1285324a56e1STejun Heo  * kernfs_remove - remove a kernfs_node recursively
1286324a56e1STejun Heo  * @kn: the kernfs_node to remove
1287fd7b9f7bSTejun Heo  *
1288324a56e1STejun Heo  * Remove @kn along with all its subdirectories and files.
1289fd7b9f7bSTejun Heo  */
1290324a56e1STejun Heo void kernfs_remove(struct kernfs_node *kn)
1291fd7b9f7bSTejun Heo {
1292988cd7afSTejun Heo 	mutex_lock(&kernfs_mutex);
1293988cd7afSTejun Heo 	__kernfs_remove(kn);
1294988cd7afSTejun Heo 	mutex_unlock(&kernfs_mutex);
1295fd7b9f7bSTejun Heo }
1296fd7b9f7bSTejun Heo 
1297fd7b9f7bSTejun Heo /**
12986b0afc2aSTejun Heo  * kernfs_break_active_protection - break out of active protection
12996b0afc2aSTejun Heo  * @kn: the self kernfs_node
13006b0afc2aSTejun Heo  *
13016b0afc2aSTejun Heo  * The caller must be running off of a kernfs operation which is invoked
13026b0afc2aSTejun Heo  * with an active reference - e.g. one of kernfs_ops.  Each invocation of
13036b0afc2aSTejun Heo  * this function must also be matched with an invocation of
13046b0afc2aSTejun Heo  * kernfs_unbreak_active_protection().
13056b0afc2aSTejun Heo  *
13066b0afc2aSTejun Heo  * This function releases the active reference of @kn the caller is
13076b0afc2aSTejun Heo  * holding.  Once this function is called, @kn may be removed at any point
13086b0afc2aSTejun Heo  * and the caller is solely responsible for ensuring that the objects it
13096b0afc2aSTejun Heo  * dereferences are accessible.
13106b0afc2aSTejun Heo  */
13116b0afc2aSTejun Heo void kernfs_break_active_protection(struct kernfs_node *kn)
13126b0afc2aSTejun Heo {
13136b0afc2aSTejun Heo 	/*
13146b0afc2aSTejun Heo 	 * Take out ourself out of the active ref dependency chain.  If
13156b0afc2aSTejun Heo 	 * we're called without an active ref, lockdep will complain.
13166b0afc2aSTejun Heo 	 */
13176b0afc2aSTejun Heo 	kernfs_put_active(kn);
13186b0afc2aSTejun Heo }
13196b0afc2aSTejun Heo 
13206b0afc2aSTejun Heo /**
13216b0afc2aSTejun Heo  * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
13226b0afc2aSTejun Heo  * @kn: the self kernfs_node
13236b0afc2aSTejun Heo  *
13246b0afc2aSTejun Heo  * If kernfs_break_active_protection() was called, this function must be
13256b0afc2aSTejun Heo  * invoked before finishing the kernfs operation.  Note that while this
13266b0afc2aSTejun Heo  * function restores the active reference, it doesn't and can't actually
13276b0afc2aSTejun Heo  * restore the active protection - @kn may already or be in the process of
13286b0afc2aSTejun Heo  * being removed.  Once kernfs_break_active_protection() is invoked, that
13296b0afc2aSTejun Heo  * protection is irreversibly gone for the kernfs operation instance.
13306b0afc2aSTejun Heo  *
13316b0afc2aSTejun Heo  * While this function may be called at any point after
13326b0afc2aSTejun Heo  * kernfs_break_active_protection() is invoked, its most useful location
13336b0afc2aSTejun Heo  * would be right before the enclosing kernfs operation returns.
13346b0afc2aSTejun Heo  */
13356b0afc2aSTejun Heo void kernfs_unbreak_active_protection(struct kernfs_node *kn)
13366b0afc2aSTejun Heo {
13376b0afc2aSTejun Heo 	/*
13386b0afc2aSTejun Heo 	 * @kn->active could be in any state; however, the increment we do
13396b0afc2aSTejun Heo 	 * here will be undone as soon as the enclosing kernfs operation
13406b0afc2aSTejun Heo 	 * finishes and this temporary bump can't break anything.  If @kn
13416b0afc2aSTejun Heo 	 * is alive, nothing changes.  If @kn is being deactivated, the
13426b0afc2aSTejun Heo 	 * soon-to-follow put will either finish deactivation or restore
13436b0afc2aSTejun Heo 	 * deactivated state.  If @kn is already removed, the temporary
13446b0afc2aSTejun Heo 	 * bump is guaranteed to be gone before @kn is released.
13456b0afc2aSTejun Heo 	 */
13466b0afc2aSTejun Heo 	atomic_inc(&kn->active);
13476b0afc2aSTejun Heo 	if (kernfs_lockdep(kn))
13486b0afc2aSTejun Heo 		rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
13496b0afc2aSTejun Heo }
13506b0afc2aSTejun Heo 
13516b0afc2aSTejun Heo /**
13526b0afc2aSTejun Heo  * kernfs_remove_self - remove a kernfs_node from its own method
13536b0afc2aSTejun Heo  * @kn: the self kernfs_node to remove
13546b0afc2aSTejun Heo  *
13556b0afc2aSTejun Heo  * The caller must be running off of a kernfs operation which is invoked
13566b0afc2aSTejun Heo  * with an active reference - e.g. one of kernfs_ops.  This can be used to
13576b0afc2aSTejun Heo  * implement a file operation which deletes itself.
13586b0afc2aSTejun Heo  *
13596b0afc2aSTejun Heo  * For example, the "delete" file for a sysfs device directory can be
13606b0afc2aSTejun Heo  * implemented by invoking kernfs_remove_self() on the "delete" file
13616b0afc2aSTejun Heo  * itself.  This function breaks the circular dependency of trying to
13626b0afc2aSTejun Heo  * deactivate self while holding an active ref itself.  It isn't necessary
13636b0afc2aSTejun Heo  * to modify the usual removal path to use kernfs_remove_self().  The
13646b0afc2aSTejun Heo  * "delete" implementation can simply invoke kernfs_remove_self() on self
13656b0afc2aSTejun Heo  * before proceeding with the usual removal path.  kernfs will ignore later
13666b0afc2aSTejun Heo  * kernfs_remove() on self.
13676b0afc2aSTejun Heo  *
13686b0afc2aSTejun Heo  * kernfs_remove_self() can be called multiple times concurrently on the
13696b0afc2aSTejun Heo  * same kernfs_node.  Only the first one actually performs removal and
13706b0afc2aSTejun Heo  * returns %true.  All others will wait until the kernfs operation which
13716b0afc2aSTejun Heo  * won self-removal finishes and return %false.  Note that the losers wait
13726b0afc2aSTejun Heo  * for the completion of not only the winning kernfs_remove_self() but also
13736b0afc2aSTejun Heo  * the whole kernfs_ops which won the arbitration.  This can be used to
13746b0afc2aSTejun Heo  * guarantee, for example, all concurrent writes to a "delete" file to
13756b0afc2aSTejun Heo  * finish only after the whole operation is complete.
13766b0afc2aSTejun Heo  */
13776b0afc2aSTejun Heo bool kernfs_remove_self(struct kernfs_node *kn)
13786b0afc2aSTejun Heo {
13796b0afc2aSTejun Heo 	bool ret;
13806b0afc2aSTejun Heo 
13816b0afc2aSTejun Heo 	mutex_lock(&kernfs_mutex);
13826b0afc2aSTejun Heo 	kernfs_break_active_protection(kn);
13836b0afc2aSTejun Heo 
13846b0afc2aSTejun Heo 	/*
13856b0afc2aSTejun Heo 	 * SUICIDAL is used to arbitrate among competing invocations.  Only
13866b0afc2aSTejun Heo 	 * the first one will actually perform removal.  When the removal
13876b0afc2aSTejun Heo 	 * is complete, SUICIDED is set and the active ref is restored
13886b0afc2aSTejun Heo 	 * while holding kernfs_mutex.  The ones which lost arbitration
13896b0afc2aSTejun Heo 	 * waits for SUICDED && drained which can happen only after the
13906b0afc2aSTejun Heo 	 * enclosing kernfs operation which executed the winning instance
13916b0afc2aSTejun Heo 	 * of kernfs_remove_self() finished.
13926b0afc2aSTejun Heo 	 */
13936b0afc2aSTejun Heo 	if (!(kn->flags & KERNFS_SUICIDAL)) {
13946b0afc2aSTejun Heo 		kn->flags |= KERNFS_SUICIDAL;
13956b0afc2aSTejun Heo 		__kernfs_remove(kn);
13966b0afc2aSTejun Heo 		kn->flags |= KERNFS_SUICIDED;
13976b0afc2aSTejun Heo 		ret = true;
13986b0afc2aSTejun Heo 	} else {
13996b0afc2aSTejun Heo 		wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
14006b0afc2aSTejun Heo 		DEFINE_WAIT(wait);
14016b0afc2aSTejun Heo 
14026b0afc2aSTejun Heo 		while (true) {
14036b0afc2aSTejun Heo 			prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
14046b0afc2aSTejun Heo 
14056b0afc2aSTejun Heo 			if ((kn->flags & KERNFS_SUICIDED) &&
14066b0afc2aSTejun Heo 			    atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
14076b0afc2aSTejun Heo 				break;
14086b0afc2aSTejun Heo 
14096b0afc2aSTejun Heo 			mutex_unlock(&kernfs_mutex);
14106b0afc2aSTejun Heo 			schedule();
14116b0afc2aSTejun Heo 			mutex_lock(&kernfs_mutex);
14126b0afc2aSTejun Heo 		}
14136b0afc2aSTejun Heo 		finish_wait(waitq, &wait);
14146b0afc2aSTejun Heo 		WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
14156b0afc2aSTejun Heo 		ret = false;
14166b0afc2aSTejun Heo 	}
14176b0afc2aSTejun Heo 
14186b0afc2aSTejun Heo 	/*
14196b0afc2aSTejun Heo 	 * This must be done while holding kernfs_mutex; otherwise, waiting
14206b0afc2aSTejun Heo 	 * for SUICIDED && deactivated could finish prematurely.
14216b0afc2aSTejun Heo 	 */
14226b0afc2aSTejun Heo 	kernfs_unbreak_active_protection(kn);
14236b0afc2aSTejun Heo 
14246b0afc2aSTejun Heo 	mutex_unlock(&kernfs_mutex);
14256b0afc2aSTejun Heo 	return ret;
14266b0afc2aSTejun Heo }
14276b0afc2aSTejun Heo 
14286b0afc2aSTejun Heo /**
1429324a56e1STejun Heo  * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
1430324a56e1STejun Heo  * @parent: parent of the target
1431324a56e1STejun Heo  * @name: name of the kernfs_node to remove
1432324a56e1STejun Heo  * @ns: namespace tag of the kernfs_node to remove
1433fd7b9f7bSTejun Heo  *
1434324a56e1STejun Heo  * Look for the kernfs_node with @name and @ns under @parent and remove it.
1435324a56e1STejun Heo  * Returns 0 on success, -ENOENT if such entry doesn't exist.
1436fd7b9f7bSTejun Heo  */
1437324a56e1STejun Heo int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1438fd7b9f7bSTejun Heo 			     const void *ns)
1439fd7b9f7bSTejun Heo {
1440324a56e1STejun Heo 	struct kernfs_node *kn;
1441fd7b9f7bSTejun Heo 
1442324a56e1STejun Heo 	if (!parent) {
1443c637b8acSTejun Heo 		WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1444fd7b9f7bSTejun Heo 			name);
1445fd7b9f7bSTejun Heo 		return -ENOENT;
1446fd7b9f7bSTejun Heo 	}
1447fd7b9f7bSTejun Heo 
1448988cd7afSTejun Heo 	mutex_lock(&kernfs_mutex);
1449fd7b9f7bSTejun Heo 
1450324a56e1STejun Heo 	kn = kernfs_find_ns(parent, name, ns);
1451324a56e1STejun Heo 	if (kn)
1452988cd7afSTejun Heo 		__kernfs_remove(kn);
1453fd7b9f7bSTejun Heo 
1454988cd7afSTejun Heo 	mutex_unlock(&kernfs_mutex);
1455fd7b9f7bSTejun Heo 
1456324a56e1STejun Heo 	if (kn)
1457fd7b9f7bSTejun Heo 		return 0;
1458fd7b9f7bSTejun Heo 	else
1459fd7b9f7bSTejun Heo 		return -ENOENT;
1460fd7b9f7bSTejun Heo }
1461fd7b9f7bSTejun Heo 
1462fd7b9f7bSTejun Heo /**
1463fd7b9f7bSTejun Heo  * kernfs_rename_ns - move and rename a kernfs_node
1464324a56e1STejun Heo  * @kn: target node
1465fd7b9f7bSTejun Heo  * @new_parent: new parent to put @sd under
1466fd7b9f7bSTejun Heo  * @new_name: new name
1467fd7b9f7bSTejun Heo  * @new_ns: new namespace tag
1468fd7b9f7bSTejun Heo  */
1469324a56e1STejun Heo int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1470fd7b9f7bSTejun Heo 		     const char *new_name, const void *new_ns)
1471fd7b9f7bSTejun Heo {
14723eef34adSTejun Heo 	struct kernfs_node *old_parent;
14733eef34adSTejun Heo 	const char *old_name = NULL;
1474fd7b9f7bSTejun Heo 	int error;
1475fd7b9f7bSTejun Heo 
14763eef34adSTejun Heo 	/* can't move or rename root */
14773eef34adSTejun Heo 	if (!kn->parent)
14783eef34adSTejun Heo 		return -EINVAL;
14793eef34adSTejun Heo 
1480ae34372eSTejun Heo 	mutex_lock(&kernfs_mutex);
1481d0ae3d43STejun Heo 
1482798c75a0SGreg Kroah-Hartman 	error = -ENOENT;
1483ea015218SEric W. Biederman 	if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
1484ea015218SEric W. Biederman 	    (new_parent->flags & KERNFS_EMPTY_DIR))
1485798c75a0SGreg Kroah-Hartman 		goto out;
1486798c75a0SGreg Kroah-Hartman 
1487fd7b9f7bSTejun Heo 	error = 0;
1488adc5e8b5STejun Heo 	if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1489adc5e8b5STejun Heo 	    (strcmp(kn->name, new_name) == 0))
1490798c75a0SGreg Kroah-Hartman 		goto out;	/* nothing to rename */
1491fd7b9f7bSTejun Heo 
1492fd7b9f7bSTejun Heo 	error = -EEXIST;
1493fd7b9f7bSTejun Heo 	if (kernfs_find_ns(new_parent, new_name, new_ns))
1494798c75a0SGreg Kroah-Hartman 		goto out;
1495fd7b9f7bSTejun Heo 
1496324a56e1STejun Heo 	/* rename kernfs_node */
1497adc5e8b5STejun Heo 	if (strcmp(kn->name, new_name) != 0) {
1498fd7b9f7bSTejun Heo 		error = -ENOMEM;
149975287a67SAndrzej Hajda 		new_name = kstrdup_const(new_name, GFP_KERNEL);
1500fd7b9f7bSTejun Heo 		if (!new_name)
1501798c75a0SGreg Kroah-Hartman 			goto out;
15023eef34adSTejun Heo 	} else {
15033eef34adSTejun Heo 		new_name = NULL;
1504fd7b9f7bSTejun Heo 	}
1505fd7b9f7bSTejun Heo 
1506fd7b9f7bSTejun Heo 	/*
1507fd7b9f7bSTejun Heo 	 * Move to the appropriate place in the appropriate directories rbtree.
1508fd7b9f7bSTejun Heo 	 */
1509c637b8acSTejun Heo 	kernfs_unlink_sibling(kn);
1510fd7b9f7bSTejun Heo 	kernfs_get(new_parent);
15113eef34adSTejun Heo 
15123eef34adSTejun Heo 	/* rename_lock protects ->parent and ->name accessors */
15133eef34adSTejun Heo 	spin_lock_irq(&kernfs_rename_lock);
15143eef34adSTejun Heo 
15153eef34adSTejun Heo 	old_parent = kn->parent;
1516adc5e8b5STejun Heo 	kn->parent = new_parent;
15173eef34adSTejun Heo 
15183eef34adSTejun Heo 	kn->ns = new_ns;
15193eef34adSTejun Heo 	if (new_name) {
15203eef34adSTejun Heo 		old_name = kn->name;
15213eef34adSTejun Heo 		kn->name = new_name;
15223eef34adSTejun Heo 	}
15233eef34adSTejun Heo 
15243eef34adSTejun Heo 	spin_unlock_irq(&kernfs_rename_lock);
15253eef34adSTejun Heo 
15269561a896STejun Heo 	kn->hash = kernfs_name_hash(kn->name, kn->ns);
1527c637b8acSTejun Heo 	kernfs_link_sibling(kn);
1528fd7b9f7bSTejun Heo 
15293eef34adSTejun Heo 	kernfs_put(old_parent);
153075287a67SAndrzej Hajda 	kfree_const(old_name);
15313eef34adSTejun Heo 
1532fd7b9f7bSTejun Heo 	error = 0;
1533ae34372eSTejun Heo  out:
1534798c75a0SGreg Kroah-Hartman 	mutex_unlock(&kernfs_mutex);
1535fd7b9f7bSTejun Heo 	return error;
1536fd7b9f7bSTejun Heo }
1537fd7b9f7bSTejun Heo 
1538fd7b9f7bSTejun Heo /* Relationship between s_mode and the DT_xxx types */
1539324a56e1STejun Heo static inline unsigned char dt_type(struct kernfs_node *kn)
1540fd7b9f7bSTejun Heo {
1541adc5e8b5STejun Heo 	return (kn->mode >> 12) & 15;
1542fd7b9f7bSTejun Heo }
1543fd7b9f7bSTejun Heo 
1544c637b8acSTejun Heo static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1545fd7b9f7bSTejun Heo {
1546fd7b9f7bSTejun Heo 	kernfs_put(filp->private_data);
1547fd7b9f7bSTejun Heo 	return 0;
1548fd7b9f7bSTejun Heo }
1549fd7b9f7bSTejun Heo 
1550c637b8acSTejun Heo static struct kernfs_node *kernfs_dir_pos(const void *ns,
1551324a56e1STejun Heo 	struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1552fd7b9f7bSTejun Heo {
1553fd7b9f7bSTejun Heo 	if (pos) {
155481c173cbSTejun Heo 		int valid = kernfs_active(pos) &&
1555798c75a0SGreg Kroah-Hartman 			pos->parent == parent && hash == pos->hash;
1556fd7b9f7bSTejun Heo 		kernfs_put(pos);
1557fd7b9f7bSTejun Heo 		if (!valid)
1558fd7b9f7bSTejun Heo 			pos = NULL;
1559fd7b9f7bSTejun Heo 	}
1560fd7b9f7bSTejun Heo 	if (!pos && (hash > 1) && (hash < INT_MAX)) {
1561adc5e8b5STejun Heo 		struct rb_node *node = parent->dir.children.rb_node;
1562fd7b9f7bSTejun Heo 		while (node) {
1563324a56e1STejun Heo 			pos = rb_to_kn(node);
1564fd7b9f7bSTejun Heo 
1565adc5e8b5STejun Heo 			if (hash < pos->hash)
1566fd7b9f7bSTejun Heo 				node = node->rb_left;
1567adc5e8b5STejun Heo 			else if (hash > pos->hash)
1568fd7b9f7bSTejun Heo 				node = node->rb_right;
1569fd7b9f7bSTejun Heo 			else
1570fd7b9f7bSTejun Heo 				break;
1571fd7b9f7bSTejun Heo 		}
1572fd7b9f7bSTejun Heo 	}
1573b9c9dad0STejun Heo 	/* Skip over entries which are dying/dead or in the wrong namespace */
1574b9c9dad0STejun Heo 	while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
1575adc5e8b5STejun Heo 		struct rb_node *node = rb_next(&pos->rb);
1576fd7b9f7bSTejun Heo 		if (!node)
1577fd7b9f7bSTejun Heo 			pos = NULL;
1578fd7b9f7bSTejun Heo 		else
1579324a56e1STejun Heo 			pos = rb_to_kn(node);
1580fd7b9f7bSTejun Heo 	}
1581fd7b9f7bSTejun Heo 	return pos;
1582fd7b9f7bSTejun Heo }
1583fd7b9f7bSTejun Heo 
1584c637b8acSTejun Heo static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1585324a56e1STejun Heo 	struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1586fd7b9f7bSTejun Heo {
1587c637b8acSTejun Heo 	pos = kernfs_dir_pos(ns, parent, ino, pos);
1588b9c9dad0STejun Heo 	if (pos) {
1589fd7b9f7bSTejun Heo 		do {
1590adc5e8b5STejun Heo 			struct rb_node *node = rb_next(&pos->rb);
1591fd7b9f7bSTejun Heo 			if (!node)
1592fd7b9f7bSTejun Heo 				pos = NULL;
1593fd7b9f7bSTejun Heo 			else
1594324a56e1STejun Heo 				pos = rb_to_kn(node);
1595b9c9dad0STejun Heo 		} while (pos && (!kernfs_active(pos) || pos->ns != ns));
1596b9c9dad0STejun Heo 	}
1597fd7b9f7bSTejun Heo 	return pos;
1598fd7b9f7bSTejun Heo }
1599fd7b9f7bSTejun Heo 
1600c637b8acSTejun Heo static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1601fd7b9f7bSTejun Heo {
1602fd7b9f7bSTejun Heo 	struct dentry *dentry = file->f_path.dentry;
1603324a56e1STejun Heo 	struct kernfs_node *parent = dentry->d_fsdata;
1604324a56e1STejun Heo 	struct kernfs_node *pos = file->private_data;
1605fd7b9f7bSTejun Heo 	const void *ns = NULL;
1606fd7b9f7bSTejun Heo 
1607fd7b9f7bSTejun Heo 	if (!dir_emit_dots(file, ctx))
1608fd7b9f7bSTejun Heo 		return 0;
1609a797bfc3STejun Heo 	mutex_lock(&kernfs_mutex);
1610fd7b9f7bSTejun Heo 
1611324a56e1STejun Heo 	if (kernfs_ns_enabled(parent))
1612c525aaddSTejun Heo 		ns = kernfs_info(dentry->d_sb)->ns;
1613fd7b9f7bSTejun Heo 
1614c637b8acSTejun Heo 	for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1615fd7b9f7bSTejun Heo 	     pos;
1616c637b8acSTejun Heo 	     pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1617adc5e8b5STejun Heo 		const char *name = pos->name;
1618fd7b9f7bSTejun Heo 		unsigned int type = dt_type(pos);
1619fd7b9f7bSTejun Heo 		int len = strlen(name);
1620adc5e8b5STejun Heo 		ino_t ino = pos->ino;
1621fd7b9f7bSTejun Heo 
1622adc5e8b5STejun Heo 		ctx->pos = pos->hash;
1623fd7b9f7bSTejun Heo 		file->private_data = pos;
1624fd7b9f7bSTejun Heo 		kernfs_get(pos);
1625fd7b9f7bSTejun Heo 
1626a797bfc3STejun Heo 		mutex_unlock(&kernfs_mutex);
1627fd7b9f7bSTejun Heo 		if (!dir_emit(ctx, name, len, ino, type))
1628fd7b9f7bSTejun Heo 			return 0;
1629a797bfc3STejun Heo 		mutex_lock(&kernfs_mutex);
1630fd7b9f7bSTejun Heo 	}
1631a797bfc3STejun Heo 	mutex_unlock(&kernfs_mutex);
1632fd7b9f7bSTejun Heo 	file->private_data = NULL;
1633fd7b9f7bSTejun Heo 	ctx->pos = INT_MAX;
1634fd7b9f7bSTejun Heo 	return 0;
1635fd7b9f7bSTejun Heo }
1636fd7b9f7bSTejun Heo 
1637c637b8acSTejun Heo static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
1638c637b8acSTejun Heo 				    int whence)
1639fd7b9f7bSTejun Heo {
1640fd7b9f7bSTejun Heo 	struct inode *inode = file_inode(file);
1641fd7b9f7bSTejun Heo 	loff_t ret;
1642fd7b9f7bSTejun Heo 
1643fd7b9f7bSTejun Heo 	mutex_lock(&inode->i_mutex);
1644fd7b9f7bSTejun Heo 	ret = generic_file_llseek(file, offset, whence);
1645fd7b9f7bSTejun Heo 	mutex_unlock(&inode->i_mutex);
1646fd7b9f7bSTejun Heo 
1647fd7b9f7bSTejun Heo 	return ret;
1648fd7b9f7bSTejun Heo }
1649fd7b9f7bSTejun Heo 
1650a797bfc3STejun Heo const struct file_operations kernfs_dir_fops = {
1651fd7b9f7bSTejun Heo 	.read		= generic_read_dir,
1652c637b8acSTejun Heo 	.iterate	= kernfs_fop_readdir,
1653c637b8acSTejun Heo 	.release	= kernfs_dir_fop_release,
1654c637b8acSTejun Heo 	.llseek		= kernfs_dir_fop_llseek,
1655fd7b9f7bSTejun Heo };
1656