xref: /openbmc/linux/fs/overlayfs/super.c (revision cd4d09ec)
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/namei.h>
12 #include <linux/pagemap.h>
13 #include <linux/xattr.h>
14 #include <linux/security.h>
15 #include <linux/mount.h>
16 #include <linux/slab.h>
17 #include <linux/parser.h>
18 #include <linux/module.h>
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/statfs.h>
22 #include <linux/seq_file.h>
23 #include "overlayfs.h"
24 
25 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
26 MODULE_DESCRIPTION("Overlay filesystem");
27 MODULE_LICENSE("GPL");
28 
29 struct ovl_config {
30 	char *lowerdir;
31 	char *upperdir;
32 	char *workdir;
33 	bool default_permissions;
34 };
35 
36 /* private information held for overlayfs's superblock */
37 struct ovl_fs {
38 	struct vfsmount *upper_mnt;
39 	unsigned numlower;
40 	struct vfsmount **lower_mnt;
41 	struct dentry *workdir;
42 	long lower_namelen;
43 	/* pathnames of lower and upper dirs, for show_options */
44 	struct ovl_config config;
45 };
46 
47 struct ovl_dir_cache;
48 
49 /* private information held for every overlayfs dentry */
50 struct ovl_entry {
51 	struct dentry *__upperdentry;
52 	struct ovl_dir_cache *cache;
53 	union {
54 		struct {
55 			u64 version;
56 			bool opaque;
57 		};
58 		struct rcu_head rcu;
59 	};
60 	unsigned numlower;
61 	struct path lowerstack[];
62 };
63 
64 #define OVL_MAX_STACK 500
65 
66 static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe)
67 {
68 	return oe->numlower ? oe->lowerstack[0].dentry : NULL;
69 }
70 
71 enum ovl_path_type ovl_path_type(struct dentry *dentry)
72 {
73 	struct ovl_entry *oe = dentry->d_fsdata;
74 	enum ovl_path_type type = 0;
75 
76 	if (oe->__upperdentry) {
77 		type = __OVL_PATH_UPPER;
78 
79 		if (oe->numlower) {
80 			if (S_ISDIR(dentry->d_inode->i_mode))
81 				type |= __OVL_PATH_MERGE;
82 		} else if (!oe->opaque) {
83 			type |= __OVL_PATH_PURE;
84 		}
85 	} else {
86 		if (oe->numlower > 1)
87 			type |= __OVL_PATH_MERGE;
88 	}
89 	return type;
90 }
91 
92 static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
93 {
94 	return lockless_dereference(oe->__upperdentry);
95 }
96 
97 void ovl_path_upper(struct dentry *dentry, struct path *path)
98 {
99 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
100 	struct ovl_entry *oe = dentry->d_fsdata;
101 
102 	path->mnt = ofs->upper_mnt;
103 	path->dentry = ovl_upperdentry_dereference(oe);
104 }
105 
106 enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
107 {
108 	enum ovl_path_type type = ovl_path_type(dentry);
109 
110 	if (!OVL_TYPE_UPPER(type))
111 		ovl_path_lower(dentry, path);
112 	else
113 		ovl_path_upper(dentry, path);
114 
115 	return type;
116 }
117 
118 struct dentry *ovl_dentry_upper(struct dentry *dentry)
119 {
120 	struct ovl_entry *oe = dentry->d_fsdata;
121 
122 	return ovl_upperdentry_dereference(oe);
123 }
124 
125 struct dentry *ovl_dentry_lower(struct dentry *dentry)
126 {
127 	struct ovl_entry *oe = dentry->d_fsdata;
128 
129 	return __ovl_dentry_lower(oe);
130 }
131 
132 struct dentry *ovl_dentry_real(struct dentry *dentry)
133 {
134 	struct ovl_entry *oe = dentry->d_fsdata;
135 	struct dentry *realdentry;
136 
137 	realdentry = ovl_upperdentry_dereference(oe);
138 	if (!realdentry)
139 		realdentry = __ovl_dentry_lower(oe);
140 
141 	return realdentry;
142 }
143 
144 struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
145 {
146 	struct dentry *realdentry;
147 
148 	realdentry = ovl_upperdentry_dereference(oe);
149 	if (realdentry) {
150 		*is_upper = true;
151 	} else {
152 		realdentry = __ovl_dentry_lower(oe);
153 		*is_upper = false;
154 	}
155 	return realdentry;
156 }
157 
158 struct vfsmount *ovl_entry_mnt_real(struct ovl_entry *oe, struct inode *inode,
159 				    bool is_upper)
160 {
161 	if (is_upper) {
162 		struct ovl_fs *ofs = inode->i_sb->s_fs_info;
163 
164 		return ofs->upper_mnt;
165 	} else {
166 		return oe->numlower ? oe->lowerstack[0].mnt : NULL;
167 	}
168 }
169 
170 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
171 {
172 	struct ovl_entry *oe = dentry->d_fsdata;
173 
174 	return oe->cache;
175 }
176 
177 bool ovl_is_default_permissions(struct inode *inode)
178 {
179 	struct ovl_fs *ofs = inode->i_sb->s_fs_info;
180 
181 	return ofs->config.default_permissions;
182 }
183 
184 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
185 {
186 	struct ovl_entry *oe = dentry->d_fsdata;
187 
188 	oe->cache = cache;
189 }
190 
191 void ovl_path_lower(struct dentry *dentry, struct path *path)
192 {
193 	struct ovl_entry *oe = dentry->d_fsdata;
194 
195 	*path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL };
196 }
197 
198 int ovl_want_write(struct dentry *dentry)
199 {
200 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
201 	return mnt_want_write(ofs->upper_mnt);
202 }
203 
204 void ovl_drop_write(struct dentry *dentry)
205 {
206 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
207 	mnt_drop_write(ofs->upper_mnt);
208 }
209 
210 struct dentry *ovl_workdir(struct dentry *dentry)
211 {
212 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
213 	return ofs->workdir;
214 }
215 
216 bool ovl_dentry_is_opaque(struct dentry *dentry)
217 {
218 	struct ovl_entry *oe = dentry->d_fsdata;
219 	return oe->opaque;
220 }
221 
222 void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
223 {
224 	struct ovl_entry *oe = dentry->d_fsdata;
225 	oe->opaque = opaque;
226 }
227 
228 void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
229 {
230 	struct ovl_entry *oe = dentry->d_fsdata;
231 
232 	WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
233 	WARN_ON(oe->__upperdentry);
234 	BUG_ON(!upperdentry->d_inode);
235 	/*
236 	 * Make sure upperdentry is consistent before making it visible to
237 	 * ovl_upperdentry_dereference().
238 	 */
239 	smp_wmb();
240 	oe->__upperdentry = upperdentry;
241 }
242 
243 void ovl_dentry_version_inc(struct dentry *dentry)
244 {
245 	struct ovl_entry *oe = dentry->d_fsdata;
246 
247 	WARN_ON(!inode_is_locked(dentry->d_inode));
248 	oe->version++;
249 }
250 
251 u64 ovl_dentry_version_get(struct dentry *dentry)
252 {
253 	struct ovl_entry *oe = dentry->d_fsdata;
254 
255 	WARN_ON(!inode_is_locked(dentry->d_inode));
256 	return oe->version;
257 }
258 
259 bool ovl_is_whiteout(struct dentry *dentry)
260 {
261 	struct inode *inode = dentry->d_inode;
262 
263 	return inode && IS_WHITEOUT(inode);
264 }
265 
266 static bool ovl_is_opaquedir(struct dentry *dentry)
267 {
268 	int res;
269 	char val;
270 	struct inode *inode = dentry->d_inode;
271 
272 	if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr)
273 		return false;
274 
275 	res = inode->i_op->getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
276 	if (res == 1 && val == 'y')
277 		return true;
278 
279 	return false;
280 }
281 
282 static void ovl_dentry_release(struct dentry *dentry)
283 {
284 	struct ovl_entry *oe = dentry->d_fsdata;
285 
286 	if (oe) {
287 		unsigned int i;
288 
289 		dput(oe->__upperdentry);
290 		for (i = 0; i < oe->numlower; i++)
291 			dput(oe->lowerstack[i].dentry);
292 		kfree_rcu(oe, rcu);
293 	}
294 }
295 
296 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
297 {
298 	struct ovl_entry *oe = dentry->d_fsdata;
299 	unsigned int i;
300 	int ret = 1;
301 
302 	for (i = 0; i < oe->numlower; i++) {
303 		struct dentry *d = oe->lowerstack[i].dentry;
304 
305 		if (d->d_flags & DCACHE_OP_REVALIDATE) {
306 			ret = d->d_op->d_revalidate(d, flags);
307 			if (ret < 0)
308 				return ret;
309 			if (!ret) {
310 				if (!(flags & LOOKUP_RCU))
311 					d_invalidate(d);
312 				return -ESTALE;
313 			}
314 		}
315 	}
316 	return 1;
317 }
318 
319 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
320 {
321 	struct ovl_entry *oe = dentry->d_fsdata;
322 	unsigned int i;
323 	int ret = 1;
324 
325 	for (i = 0; i < oe->numlower; i++) {
326 		struct dentry *d = oe->lowerstack[i].dentry;
327 
328 		if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
329 			ret = d->d_op->d_weak_revalidate(d, flags);
330 			if (ret <= 0)
331 				break;
332 		}
333 	}
334 	return ret;
335 }
336 
337 static const struct dentry_operations ovl_dentry_operations = {
338 	.d_release = ovl_dentry_release,
339 	.d_select_inode = ovl_d_select_inode,
340 };
341 
342 static const struct dentry_operations ovl_reval_dentry_operations = {
343 	.d_release = ovl_dentry_release,
344 	.d_revalidate = ovl_dentry_revalidate,
345 	.d_weak_revalidate = ovl_dentry_weak_revalidate,
346 };
347 
348 static struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
349 {
350 	size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
351 	struct ovl_entry *oe = kzalloc(size, GFP_KERNEL);
352 
353 	if (oe)
354 		oe->numlower = numlower;
355 
356 	return oe;
357 }
358 
359 static bool ovl_dentry_remote(struct dentry *dentry)
360 {
361 	return dentry->d_flags &
362 		(DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
363 }
364 
365 static bool ovl_dentry_weird(struct dentry *dentry)
366 {
367 	return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
368 				  DCACHE_MANAGE_TRANSIT |
369 				  DCACHE_OP_HASH |
370 				  DCACHE_OP_COMPARE);
371 }
372 
373 static inline struct dentry *ovl_lookup_real(struct dentry *dir,
374 					     struct qstr *name)
375 {
376 	struct dentry *dentry;
377 
378 	inode_lock(dir->d_inode);
379 	dentry = lookup_one_len(name->name, dir, name->len);
380 	inode_unlock(dir->d_inode);
381 
382 	if (IS_ERR(dentry)) {
383 		if (PTR_ERR(dentry) == -ENOENT)
384 			dentry = NULL;
385 	} else if (!dentry->d_inode) {
386 		dput(dentry);
387 		dentry = NULL;
388 	} else if (ovl_dentry_weird(dentry)) {
389 		dput(dentry);
390 		/* Don't support traversing automounts and other weirdness */
391 		dentry = ERR_PTR(-EREMOTE);
392 	}
393 	return dentry;
394 }
395 
396 /*
397  * Returns next layer in stack starting from top.
398  * Returns -1 if this is the last layer.
399  */
400 int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
401 {
402 	struct ovl_entry *oe = dentry->d_fsdata;
403 
404 	BUG_ON(idx < 0);
405 	if (idx == 0) {
406 		ovl_path_upper(dentry, path);
407 		if (path->dentry)
408 			return oe->numlower ? 1 : -1;
409 		idx++;
410 	}
411 	BUG_ON(idx > oe->numlower);
412 	*path = oe->lowerstack[idx - 1];
413 
414 	return (idx < oe->numlower) ? idx + 1 : -1;
415 }
416 
417 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
418 			  unsigned int flags)
419 {
420 	struct ovl_entry *oe;
421 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
422 	struct path *stack = NULL;
423 	struct dentry *upperdir, *upperdentry = NULL;
424 	unsigned int ctr = 0;
425 	struct inode *inode = NULL;
426 	bool upperopaque = false;
427 	struct dentry *this, *prev = NULL;
428 	unsigned int i;
429 	int err;
430 
431 	upperdir = ovl_upperdentry_dereference(poe);
432 	if (upperdir) {
433 		this = ovl_lookup_real(upperdir, &dentry->d_name);
434 		err = PTR_ERR(this);
435 		if (IS_ERR(this))
436 			goto out;
437 
438 		if (this) {
439 			if (unlikely(ovl_dentry_remote(this))) {
440 				dput(this);
441 				err = -EREMOTE;
442 				goto out;
443 			}
444 			if (ovl_is_whiteout(this)) {
445 				dput(this);
446 				this = NULL;
447 				upperopaque = true;
448 			} else if (poe->numlower && ovl_is_opaquedir(this)) {
449 				upperopaque = true;
450 			}
451 		}
452 		upperdentry = prev = this;
453 	}
454 
455 	if (!upperopaque && poe->numlower) {
456 		err = -ENOMEM;
457 		stack = kcalloc(poe->numlower, sizeof(struct path), GFP_KERNEL);
458 		if (!stack)
459 			goto out_put_upper;
460 	}
461 
462 	for (i = 0; !upperopaque && i < poe->numlower; i++) {
463 		bool opaque = false;
464 		struct path lowerpath = poe->lowerstack[i];
465 
466 		this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name);
467 		err = PTR_ERR(this);
468 		if (IS_ERR(this)) {
469 			/*
470 			 * If it's positive, then treat ENAMETOOLONG as ENOENT.
471 			 */
472 			if (err == -ENAMETOOLONG && (upperdentry || ctr))
473 				continue;
474 			goto out_put;
475 		}
476 		if (!this)
477 			continue;
478 		if (ovl_is_whiteout(this)) {
479 			dput(this);
480 			break;
481 		}
482 		/*
483 		 * Only makes sense to check opaque dir if this is not the
484 		 * lowermost layer.
485 		 */
486 		if (i < poe->numlower - 1 && ovl_is_opaquedir(this))
487 			opaque = true;
488 
489 		if (prev && (!S_ISDIR(prev->d_inode->i_mode) ||
490 			     !S_ISDIR(this->d_inode->i_mode))) {
491 			/*
492 			 * FIXME: check for upper-opaqueness maybe better done
493 			 * in remove code.
494 			 */
495 			if (prev == upperdentry)
496 				upperopaque = true;
497 			dput(this);
498 			break;
499 		}
500 		/*
501 		 * If this is a non-directory then stop here.
502 		 */
503 		if (!S_ISDIR(this->d_inode->i_mode))
504 			opaque = true;
505 
506 		stack[ctr].dentry = this;
507 		stack[ctr].mnt = lowerpath.mnt;
508 		ctr++;
509 		prev = this;
510 		if (opaque)
511 			break;
512 	}
513 
514 	oe = ovl_alloc_entry(ctr);
515 	err = -ENOMEM;
516 	if (!oe)
517 		goto out_put;
518 
519 	if (upperdentry || ctr) {
520 		struct dentry *realdentry;
521 
522 		realdentry = upperdentry ? upperdentry : stack[0].dentry;
523 
524 		err = -ENOMEM;
525 		inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
526 				      oe);
527 		if (!inode)
528 			goto out_free_oe;
529 		ovl_copyattr(realdentry->d_inode, inode);
530 	}
531 
532 	oe->opaque = upperopaque;
533 	oe->__upperdentry = upperdentry;
534 	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
535 	kfree(stack);
536 	dentry->d_fsdata = oe;
537 	d_add(dentry, inode);
538 
539 	return NULL;
540 
541 out_free_oe:
542 	kfree(oe);
543 out_put:
544 	for (i = 0; i < ctr; i++)
545 		dput(stack[i].dentry);
546 	kfree(stack);
547 out_put_upper:
548 	dput(upperdentry);
549 out:
550 	return ERR_PTR(err);
551 }
552 
553 struct file *ovl_path_open(struct path *path, int flags)
554 {
555 	return dentry_open(path, flags, current_cred());
556 }
557 
558 static void ovl_put_super(struct super_block *sb)
559 {
560 	struct ovl_fs *ufs = sb->s_fs_info;
561 	unsigned i;
562 
563 	dput(ufs->workdir);
564 	mntput(ufs->upper_mnt);
565 	for (i = 0; i < ufs->numlower; i++)
566 		mntput(ufs->lower_mnt[i]);
567 	kfree(ufs->lower_mnt);
568 
569 	kfree(ufs->config.lowerdir);
570 	kfree(ufs->config.upperdir);
571 	kfree(ufs->config.workdir);
572 	kfree(ufs);
573 }
574 
575 /**
576  * ovl_statfs
577  * @sb: The overlayfs super block
578  * @buf: The struct kstatfs to fill in with stats
579  *
580  * Get the filesystem statistics.  As writes always target the upper layer
581  * filesystem pass the statfs to the upper filesystem (if it exists)
582  */
583 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
584 {
585 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
586 	struct dentry *root_dentry = dentry->d_sb->s_root;
587 	struct path path;
588 	int err;
589 
590 	ovl_path_real(root_dentry, &path);
591 
592 	err = vfs_statfs(&path, buf);
593 	if (!err) {
594 		buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen);
595 		buf->f_type = OVERLAYFS_SUPER_MAGIC;
596 	}
597 
598 	return err;
599 }
600 
601 /**
602  * ovl_show_options
603  *
604  * Prints the mount options for a given superblock.
605  * Returns zero; does not fail.
606  */
607 static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
608 {
609 	struct super_block *sb = dentry->d_sb;
610 	struct ovl_fs *ufs = sb->s_fs_info;
611 
612 	seq_show_option(m, "lowerdir", ufs->config.lowerdir);
613 	if (ufs->config.upperdir) {
614 		seq_show_option(m, "upperdir", ufs->config.upperdir);
615 		seq_show_option(m, "workdir", ufs->config.workdir);
616 	}
617 	if (ufs->config.default_permissions)
618 		seq_puts(m, ",default_permissions");
619 	return 0;
620 }
621 
622 static int ovl_remount(struct super_block *sb, int *flags, char *data)
623 {
624 	struct ovl_fs *ufs = sb->s_fs_info;
625 
626 	if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
627 		return -EROFS;
628 
629 	return 0;
630 }
631 
632 static const struct super_operations ovl_super_operations = {
633 	.put_super	= ovl_put_super,
634 	.statfs		= ovl_statfs,
635 	.show_options	= ovl_show_options,
636 	.remount_fs	= ovl_remount,
637 };
638 
639 enum {
640 	OPT_LOWERDIR,
641 	OPT_UPPERDIR,
642 	OPT_WORKDIR,
643 	OPT_DEFAULT_PERMISSIONS,
644 	OPT_ERR,
645 };
646 
647 static const match_table_t ovl_tokens = {
648 	{OPT_LOWERDIR,			"lowerdir=%s"},
649 	{OPT_UPPERDIR,			"upperdir=%s"},
650 	{OPT_WORKDIR,			"workdir=%s"},
651 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
652 	{OPT_ERR,			NULL}
653 };
654 
655 static char *ovl_next_opt(char **s)
656 {
657 	char *sbegin = *s;
658 	char *p;
659 
660 	if (sbegin == NULL)
661 		return NULL;
662 
663 	for (p = sbegin; *p; p++) {
664 		if (*p == '\\') {
665 			p++;
666 			if (!*p)
667 				break;
668 		} else if (*p == ',') {
669 			*p = '\0';
670 			*s = p + 1;
671 			return sbegin;
672 		}
673 	}
674 	*s = NULL;
675 	return sbegin;
676 }
677 
678 static int ovl_parse_opt(char *opt, struct ovl_config *config)
679 {
680 	char *p;
681 
682 	while ((p = ovl_next_opt(&opt)) != NULL) {
683 		int token;
684 		substring_t args[MAX_OPT_ARGS];
685 
686 		if (!*p)
687 			continue;
688 
689 		token = match_token(p, ovl_tokens, args);
690 		switch (token) {
691 		case OPT_UPPERDIR:
692 			kfree(config->upperdir);
693 			config->upperdir = match_strdup(&args[0]);
694 			if (!config->upperdir)
695 				return -ENOMEM;
696 			break;
697 
698 		case OPT_LOWERDIR:
699 			kfree(config->lowerdir);
700 			config->lowerdir = match_strdup(&args[0]);
701 			if (!config->lowerdir)
702 				return -ENOMEM;
703 			break;
704 
705 		case OPT_WORKDIR:
706 			kfree(config->workdir);
707 			config->workdir = match_strdup(&args[0]);
708 			if (!config->workdir)
709 				return -ENOMEM;
710 			break;
711 
712 		case OPT_DEFAULT_PERMISSIONS:
713 			config->default_permissions = true;
714 			break;
715 
716 		default:
717 			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
718 			return -EINVAL;
719 		}
720 	}
721 
722 	/* Workdir is useless in non-upper mount */
723 	if (!config->upperdir && config->workdir) {
724 		pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
725 			config->workdir);
726 		kfree(config->workdir);
727 		config->workdir = NULL;
728 	}
729 
730 	return 0;
731 }
732 
733 #define OVL_WORKDIR_NAME "work"
734 
735 static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
736 					 struct dentry *dentry)
737 {
738 	struct inode *dir = dentry->d_inode;
739 	struct dentry *work;
740 	int err;
741 	bool retried = false;
742 
743 	err = mnt_want_write(mnt);
744 	if (err)
745 		return ERR_PTR(err);
746 
747 	inode_lock_nested(dir, I_MUTEX_PARENT);
748 retry:
749 	work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
750 			      strlen(OVL_WORKDIR_NAME));
751 
752 	if (!IS_ERR(work)) {
753 		struct kstat stat = {
754 			.mode = S_IFDIR | 0,
755 		};
756 
757 		if (work->d_inode) {
758 			err = -EEXIST;
759 			if (retried)
760 				goto out_dput;
761 
762 			retried = true;
763 			ovl_cleanup(dir, work);
764 			dput(work);
765 			goto retry;
766 		}
767 
768 		err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
769 		if (err)
770 			goto out_dput;
771 	}
772 out_unlock:
773 	inode_unlock(dir);
774 	mnt_drop_write(mnt);
775 
776 	return work;
777 
778 out_dput:
779 	dput(work);
780 	work = ERR_PTR(err);
781 	goto out_unlock;
782 }
783 
784 static void ovl_unescape(char *s)
785 {
786 	char *d = s;
787 
788 	for (;; s++, d++) {
789 		if (*s == '\\')
790 			s++;
791 		*d = *s;
792 		if (!*s)
793 			break;
794 	}
795 }
796 
797 static int ovl_mount_dir_noesc(const char *name, struct path *path)
798 {
799 	int err = -EINVAL;
800 
801 	if (!*name) {
802 		pr_err("overlayfs: empty lowerdir\n");
803 		goto out;
804 	}
805 	err = kern_path(name, LOOKUP_FOLLOW, path);
806 	if (err) {
807 		pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
808 		goto out;
809 	}
810 	err = -EINVAL;
811 	if (ovl_dentry_weird(path->dentry)) {
812 		pr_err("overlayfs: filesystem on '%s' not supported\n", name);
813 		goto out_put;
814 	}
815 	if (!S_ISDIR(path->dentry->d_inode->i_mode)) {
816 		pr_err("overlayfs: '%s' not a directory\n", name);
817 		goto out_put;
818 	}
819 	return 0;
820 
821 out_put:
822 	path_put(path);
823 out:
824 	return err;
825 }
826 
827 static int ovl_mount_dir(const char *name, struct path *path)
828 {
829 	int err = -ENOMEM;
830 	char *tmp = kstrdup(name, GFP_KERNEL);
831 
832 	if (tmp) {
833 		ovl_unescape(tmp);
834 		err = ovl_mount_dir_noesc(tmp, path);
835 
836 		if (!err)
837 			if (ovl_dentry_remote(path->dentry)) {
838 				pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
839 				       tmp);
840 				path_put(path);
841 				err = -EINVAL;
842 			}
843 		kfree(tmp);
844 	}
845 	return err;
846 }
847 
848 static int ovl_lower_dir(const char *name, struct path *path, long *namelen,
849 			 int *stack_depth, bool *remote)
850 {
851 	int err;
852 	struct kstatfs statfs;
853 
854 	err = ovl_mount_dir_noesc(name, path);
855 	if (err)
856 		goto out;
857 
858 	err = vfs_statfs(path, &statfs);
859 	if (err) {
860 		pr_err("overlayfs: statfs failed on '%s'\n", name);
861 		goto out_put;
862 	}
863 	*namelen = max(*namelen, statfs.f_namelen);
864 	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
865 
866 	if (ovl_dentry_remote(path->dentry))
867 		*remote = true;
868 
869 	return 0;
870 
871 out_put:
872 	path_put(path);
873 out:
874 	return err;
875 }
876 
877 /* Workdir should not be subdir of upperdir and vice versa */
878 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
879 {
880 	bool ok = false;
881 
882 	if (workdir != upperdir) {
883 		ok = (lock_rename(workdir, upperdir) == NULL);
884 		unlock_rename(workdir, upperdir);
885 	}
886 	return ok;
887 }
888 
889 static unsigned int ovl_split_lowerdirs(char *str)
890 {
891 	unsigned int ctr = 1;
892 	char *s, *d;
893 
894 	for (s = d = str;; s++, d++) {
895 		if (*s == '\\') {
896 			s++;
897 		} else if (*s == ':') {
898 			*d = '\0';
899 			ctr++;
900 			continue;
901 		}
902 		*d = *s;
903 		if (!*s)
904 			break;
905 	}
906 	return ctr;
907 }
908 
909 static int ovl_fill_super(struct super_block *sb, void *data, int silent)
910 {
911 	struct path upperpath = { NULL, NULL };
912 	struct path workpath = { NULL, NULL };
913 	struct dentry *root_dentry;
914 	struct ovl_entry *oe;
915 	struct ovl_fs *ufs;
916 	struct path *stack = NULL;
917 	char *lowertmp;
918 	char *lower;
919 	unsigned int numlower;
920 	unsigned int stacklen = 0;
921 	unsigned int i;
922 	bool remote = false;
923 	int err;
924 
925 	err = -ENOMEM;
926 	ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
927 	if (!ufs)
928 		goto out;
929 
930 	err = ovl_parse_opt((char *) data, &ufs->config);
931 	if (err)
932 		goto out_free_config;
933 
934 	err = -EINVAL;
935 	if (!ufs->config.lowerdir) {
936 		pr_err("overlayfs: missing 'lowerdir'\n");
937 		goto out_free_config;
938 	}
939 
940 	sb->s_stack_depth = 0;
941 	sb->s_maxbytes = MAX_LFS_FILESIZE;
942 	if (ufs->config.upperdir) {
943 		if (!ufs->config.workdir) {
944 			pr_err("overlayfs: missing 'workdir'\n");
945 			goto out_free_config;
946 		}
947 
948 		err = ovl_mount_dir(ufs->config.upperdir, &upperpath);
949 		if (err)
950 			goto out_free_config;
951 
952 		/* Upper fs should not be r/o */
953 		if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) {
954 			pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
955 			err = -EINVAL;
956 			goto out_put_upperpath;
957 		}
958 
959 		err = ovl_mount_dir(ufs->config.workdir, &workpath);
960 		if (err)
961 			goto out_put_upperpath;
962 
963 		err = -EINVAL;
964 		if (upperpath.mnt != workpath.mnt) {
965 			pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
966 			goto out_put_workpath;
967 		}
968 		if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) {
969 			pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
970 			goto out_put_workpath;
971 		}
972 		sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth;
973 	}
974 	err = -ENOMEM;
975 	lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL);
976 	if (!lowertmp)
977 		goto out_put_workpath;
978 
979 	err = -EINVAL;
980 	stacklen = ovl_split_lowerdirs(lowertmp);
981 	if (stacklen > OVL_MAX_STACK) {
982 		pr_err("overlayfs: too many lower directries, limit is %d\n",
983 		       OVL_MAX_STACK);
984 		goto out_free_lowertmp;
985 	} else if (!ufs->config.upperdir && stacklen == 1) {
986 		pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
987 		goto out_free_lowertmp;
988 	}
989 
990 	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
991 	if (!stack)
992 		goto out_free_lowertmp;
993 
994 	lower = lowertmp;
995 	for (numlower = 0; numlower < stacklen; numlower++) {
996 		err = ovl_lower_dir(lower, &stack[numlower],
997 				    &ufs->lower_namelen, &sb->s_stack_depth,
998 				    &remote);
999 		if (err)
1000 			goto out_put_lowerpath;
1001 
1002 		lower = strchr(lower, '\0') + 1;
1003 	}
1004 
1005 	err = -EINVAL;
1006 	sb->s_stack_depth++;
1007 	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1008 		pr_err("overlayfs: maximum fs stacking depth exceeded\n");
1009 		goto out_put_lowerpath;
1010 	}
1011 
1012 	if (ufs->config.upperdir) {
1013 		ufs->upper_mnt = clone_private_mount(&upperpath);
1014 		err = PTR_ERR(ufs->upper_mnt);
1015 		if (IS_ERR(ufs->upper_mnt)) {
1016 			pr_err("overlayfs: failed to clone upperpath\n");
1017 			goto out_put_lowerpath;
1018 		}
1019 
1020 		ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
1021 		err = PTR_ERR(ufs->workdir);
1022 		if (IS_ERR(ufs->workdir)) {
1023 			pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
1024 				ufs->config.workdir, OVL_WORKDIR_NAME, -err);
1025 			sb->s_flags |= MS_RDONLY;
1026 			ufs->workdir = NULL;
1027 		}
1028 	}
1029 
1030 	err = -ENOMEM;
1031 	ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL);
1032 	if (ufs->lower_mnt == NULL)
1033 		goto out_put_workdir;
1034 	for (i = 0; i < numlower; i++) {
1035 		struct vfsmount *mnt = clone_private_mount(&stack[i]);
1036 
1037 		err = PTR_ERR(mnt);
1038 		if (IS_ERR(mnt)) {
1039 			pr_err("overlayfs: failed to clone lowerpath\n");
1040 			goto out_put_lower_mnt;
1041 		}
1042 		/*
1043 		 * Make lower_mnt R/O.  That way fchmod/fchown on lower file
1044 		 * will fail instead of modifying lower fs.
1045 		 */
1046 		mnt->mnt_flags |= MNT_READONLY;
1047 
1048 		ufs->lower_mnt[ufs->numlower] = mnt;
1049 		ufs->numlower++;
1050 	}
1051 
1052 	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
1053 	if (!ufs->upper_mnt)
1054 		sb->s_flags |= MS_RDONLY;
1055 
1056 	if (remote)
1057 		sb->s_d_op = &ovl_reval_dentry_operations;
1058 	else
1059 		sb->s_d_op = &ovl_dentry_operations;
1060 
1061 	err = -ENOMEM;
1062 	oe = ovl_alloc_entry(numlower);
1063 	if (!oe)
1064 		goto out_put_lower_mnt;
1065 
1066 	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, oe));
1067 	if (!root_dentry)
1068 		goto out_free_oe;
1069 
1070 	mntput(upperpath.mnt);
1071 	for (i = 0; i < numlower; i++)
1072 		mntput(stack[i].mnt);
1073 	path_put(&workpath);
1074 	kfree(lowertmp);
1075 
1076 	oe->__upperdentry = upperpath.dentry;
1077 	for (i = 0; i < numlower; i++) {
1078 		oe->lowerstack[i].dentry = stack[i].dentry;
1079 		oe->lowerstack[i].mnt = ufs->lower_mnt[i];
1080 	}
1081 	kfree(stack);
1082 
1083 	root_dentry->d_fsdata = oe;
1084 
1085 	ovl_copyattr(ovl_dentry_real(root_dentry)->d_inode,
1086 		     root_dentry->d_inode);
1087 
1088 	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
1089 	sb->s_op = &ovl_super_operations;
1090 	sb->s_root = root_dentry;
1091 	sb->s_fs_info = ufs;
1092 
1093 	return 0;
1094 
1095 out_free_oe:
1096 	kfree(oe);
1097 out_put_lower_mnt:
1098 	for (i = 0; i < ufs->numlower; i++)
1099 		mntput(ufs->lower_mnt[i]);
1100 	kfree(ufs->lower_mnt);
1101 out_put_workdir:
1102 	dput(ufs->workdir);
1103 	mntput(ufs->upper_mnt);
1104 out_put_lowerpath:
1105 	for (i = 0; i < numlower; i++)
1106 		path_put(&stack[i]);
1107 	kfree(stack);
1108 out_free_lowertmp:
1109 	kfree(lowertmp);
1110 out_put_workpath:
1111 	path_put(&workpath);
1112 out_put_upperpath:
1113 	path_put(&upperpath);
1114 out_free_config:
1115 	kfree(ufs->config.lowerdir);
1116 	kfree(ufs->config.upperdir);
1117 	kfree(ufs->config.workdir);
1118 	kfree(ufs);
1119 out:
1120 	return err;
1121 }
1122 
1123 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
1124 				const char *dev_name, void *raw_data)
1125 {
1126 	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
1127 }
1128 
1129 static struct file_system_type ovl_fs_type = {
1130 	.owner		= THIS_MODULE,
1131 	.name		= "overlay",
1132 	.mount		= ovl_mount,
1133 	.kill_sb	= kill_anon_super,
1134 };
1135 MODULE_ALIAS_FS("overlay");
1136 
1137 static int __init ovl_init(void)
1138 {
1139 	return register_filesystem(&ovl_fs_type);
1140 }
1141 
1142 static void __exit ovl_exit(void)
1143 {
1144 	unregister_filesystem(&ovl_fs_type);
1145 }
1146 
1147 module_init(ovl_init);
1148 module_exit(ovl_exit);
1149