xref: /openbmc/linux/fs/overlayfs/dir.c (revision 6396bb221514d2876fd6dc0aa2a1f240d99b37bb)
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/namei.h>
12 #include <linux/xattr.h>
13 #include <linux/security.h>
14 #include <linux/cred.h>
15 #include <linux/module.h>
16 #include <linux/posix_acl.h>
17 #include <linux/posix_acl_xattr.h>
18 #include <linux/atomic.h>
19 #include <linux/ratelimit.h>
20 #include "overlayfs.h"
21 
22 static unsigned short ovl_redirect_max = 256;
23 module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
24 MODULE_PARM_DESC(ovl_redirect_max,
25 		 "Maximum length of absolute redirect xattr value");
26 
27 int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
28 {
29 	int err;
30 
31 	dget(wdentry);
32 	if (d_is_dir(wdentry))
33 		err = ovl_do_rmdir(wdir, wdentry);
34 	else
35 		err = ovl_do_unlink(wdir, wdentry);
36 	dput(wdentry);
37 
38 	if (err) {
39 		pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
40 		       wdentry, err);
41 	}
42 
43 	return err;
44 }
45 
46 static struct dentry *ovl_lookup_temp(struct dentry *workdir)
47 {
48 	struct dentry *temp;
49 	char name[20];
50 	static atomic_t temp_id = ATOMIC_INIT(0);
51 
52 	/* counter is allowed to wrap, since temp dentries are ephemeral */
53 	snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
54 
55 	temp = lookup_one_len(name, workdir, strlen(name));
56 	if (!IS_ERR(temp) && temp->d_inode) {
57 		pr_err("overlayfs: workdir/%s already exists\n", name);
58 		dput(temp);
59 		temp = ERR_PTR(-EIO);
60 	}
61 
62 	return temp;
63 }
64 
65 /* caller holds i_mutex on workdir */
66 static struct dentry *ovl_whiteout(struct dentry *workdir)
67 {
68 	int err;
69 	struct dentry *whiteout;
70 	struct inode *wdir = workdir->d_inode;
71 
72 	whiteout = ovl_lookup_temp(workdir);
73 	if (IS_ERR(whiteout))
74 		return whiteout;
75 
76 	err = ovl_do_whiteout(wdir, whiteout);
77 	if (err) {
78 		dput(whiteout);
79 		whiteout = ERR_PTR(err);
80 	}
81 
82 	return whiteout;
83 }
84 
85 /* Caller must hold i_mutex on both workdir and dir */
86 int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
87 			     struct dentry *dentry)
88 {
89 	struct inode *wdir = workdir->d_inode;
90 	struct dentry *whiteout;
91 	int err;
92 	int flags = 0;
93 
94 	whiteout = ovl_whiteout(workdir);
95 	err = PTR_ERR(whiteout);
96 	if (IS_ERR(whiteout))
97 		return err;
98 
99 	if (d_is_dir(dentry))
100 		flags = RENAME_EXCHANGE;
101 
102 	err = ovl_do_rename(wdir, whiteout, dir, dentry, flags);
103 	if (err)
104 		goto kill_whiteout;
105 	if (flags)
106 		ovl_cleanup(wdir, dentry);
107 
108 out:
109 	dput(whiteout);
110 	return err;
111 
112 kill_whiteout:
113 	ovl_cleanup(wdir, whiteout);
114 	goto out;
115 }
116 
117 static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry,
118 			  umode_t mode)
119 {
120 	int err;
121 	struct dentry *d, *dentry = *newdentry;
122 
123 	err = ovl_do_mkdir(dir, dentry, mode);
124 	if (err)
125 		return err;
126 
127 	if (likely(!d_unhashed(dentry)))
128 		return 0;
129 
130 	/*
131 	 * vfs_mkdir() may succeed and leave the dentry passed
132 	 * to it unhashed and negative. If that happens, try to
133 	 * lookup a new hashed and positive dentry.
134 	 */
135 	d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
136 			   dentry->d_name.len);
137 	if (IS_ERR(d)) {
138 		pr_warn("overlayfs: failed lookup after mkdir (%pd2, err=%i).\n",
139 			dentry, err);
140 		return PTR_ERR(d);
141 	}
142 	dput(dentry);
143 	*newdentry = d;
144 
145 	return 0;
146 }
147 
148 struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
149 			       struct ovl_cattr *attr)
150 {
151 	int err;
152 
153 	if (IS_ERR(newdentry))
154 		return newdentry;
155 
156 	err = -ESTALE;
157 	if (newdentry->d_inode)
158 		goto out;
159 
160 	if (attr->hardlink) {
161 		err = ovl_do_link(attr->hardlink, dir, newdentry);
162 	} else {
163 		switch (attr->mode & S_IFMT) {
164 		case S_IFREG:
165 			err = ovl_do_create(dir, newdentry, attr->mode);
166 			break;
167 
168 		case S_IFDIR:
169 			/* mkdir is special... */
170 			err =  ovl_mkdir_real(dir, &newdentry, attr->mode);
171 			break;
172 
173 		case S_IFCHR:
174 		case S_IFBLK:
175 		case S_IFIFO:
176 		case S_IFSOCK:
177 			err = ovl_do_mknod(dir, newdentry, attr->mode,
178 					   attr->rdev);
179 			break;
180 
181 		case S_IFLNK:
182 			err = ovl_do_symlink(dir, newdentry, attr->link);
183 			break;
184 
185 		default:
186 			err = -EPERM;
187 		}
188 	}
189 	if (!err && WARN_ON(!newdentry->d_inode)) {
190 		/*
191 		 * Not quite sure if non-instantiated dentry is legal or not.
192 		 * VFS doesn't seem to care so check and warn here.
193 		 */
194 		err = -EIO;
195 	}
196 out:
197 	if (err) {
198 		dput(newdentry);
199 		return ERR_PTR(err);
200 	}
201 	return newdentry;
202 }
203 
204 struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr)
205 {
206 	return ovl_create_real(d_inode(workdir), ovl_lookup_temp(workdir),
207 			       attr);
208 }
209 
210 static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
211 			       int xerr)
212 {
213 	int err;
214 
215 	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
216 	if (!err)
217 		ovl_dentry_set_opaque(dentry);
218 
219 	return err;
220 }
221 
222 static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
223 {
224 	/*
225 	 * Fail with -EIO when trying to create opaque dir and upper doesn't
226 	 * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
227 	 * return a specific error for noxattr case.
228 	 */
229 	return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
230 }
231 
232 /*
233  * Common operations required to be done after creation of file on upper.
234  * If @hardlink is false, then @inode is a pre-allocated inode, we may or
235  * may not use to instantiate the new dentry.
236  */
237 static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
238 			   struct dentry *newdentry, bool hardlink)
239 {
240 	struct ovl_inode_params oip = {
241 		.upperdentry = newdentry,
242 		.newinode = inode,
243 	};
244 
245 	ovl_dentry_version_inc(dentry->d_parent, false);
246 	ovl_dentry_set_upper_alias(dentry);
247 	if (!hardlink) {
248 		/*
249 		 * ovl_obtain_alias() can be called after ovl_create_real()
250 		 * and before we get here, so we may get an inode from cache
251 		 * with the same real upperdentry that is not the inode we
252 		 * pre-allocated.  In this case we will use the cached inode
253 		 * to instantiate the new dentry.
254 		 *
255 		 * XXX: if we ever use ovl_obtain_alias() to decode directory
256 		 * file handles, need to use ovl_get_inode_locked() and
257 		 * d_instantiate_new() here to prevent from creating two
258 		 * hashed directory inode aliases.
259 		 */
260 		inode = ovl_get_inode(dentry->d_sb, &oip);
261 		if (WARN_ON(IS_ERR(inode)))
262 			return PTR_ERR(inode);
263 	} else {
264 		WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
265 		dput(newdentry);
266 		inc_nlink(inode);
267 	}
268 
269 	d_instantiate(dentry, inode);
270 	if (inode != oip.newinode) {
271 		pr_warn_ratelimited("overlayfs: newly created inode found in cache (%pd2)\n",
272 				    dentry);
273 	}
274 
275 	/* Force lookup of new upper hardlink to find its lower */
276 	if (hardlink)
277 		d_drop(dentry);
278 
279 	return 0;
280 }
281 
282 static bool ovl_type_merge(struct dentry *dentry)
283 {
284 	return OVL_TYPE_MERGE(ovl_path_type(dentry));
285 }
286 
287 static bool ovl_type_origin(struct dentry *dentry)
288 {
289 	return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
290 }
291 
292 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
293 			    struct ovl_cattr *attr)
294 {
295 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
296 	struct inode *udir = upperdir->d_inode;
297 	struct dentry *newdentry;
298 	int err;
299 
300 	if (!attr->hardlink && !IS_POSIXACL(udir))
301 		attr->mode &= ~current_umask();
302 
303 	inode_lock_nested(udir, I_MUTEX_PARENT);
304 	newdentry = ovl_create_real(udir,
305 				    lookup_one_len(dentry->d_name.name,
306 						   upperdir,
307 						   dentry->d_name.len),
308 				    attr);
309 	err = PTR_ERR(newdentry);
310 	if (IS_ERR(newdentry))
311 		goto out_unlock;
312 
313 	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
314 		/* Setting opaque here is just an optimization, allow to fail */
315 		ovl_set_opaque(dentry, newdentry);
316 	}
317 
318 	err = ovl_instantiate(dentry, inode, newdentry, !!attr->hardlink);
319 	if (err)
320 		goto out_cleanup;
321 out_unlock:
322 	inode_unlock(udir);
323 	return err;
324 
325 out_cleanup:
326 	ovl_cleanup(udir, newdentry);
327 	dput(newdentry);
328 	goto out_unlock;
329 }
330 
331 static struct dentry *ovl_clear_empty(struct dentry *dentry,
332 				      struct list_head *list)
333 {
334 	struct dentry *workdir = ovl_workdir(dentry);
335 	struct inode *wdir = workdir->d_inode;
336 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
337 	struct inode *udir = upperdir->d_inode;
338 	struct path upperpath;
339 	struct dentry *upper;
340 	struct dentry *opaquedir;
341 	struct kstat stat;
342 	int err;
343 
344 	if (WARN_ON(!workdir))
345 		return ERR_PTR(-EROFS);
346 
347 	err = ovl_lock_rename_workdir(workdir, upperdir);
348 	if (err)
349 		goto out;
350 
351 	ovl_path_upper(dentry, &upperpath);
352 	err = vfs_getattr(&upperpath, &stat,
353 			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
354 	if (err)
355 		goto out_unlock;
356 
357 	err = -ESTALE;
358 	if (!S_ISDIR(stat.mode))
359 		goto out_unlock;
360 	upper = upperpath.dentry;
361 	if (upper->d_parent->d_inode != udir)
362 		goto out_unlock;
363 
364 	opaquedir = ovl_create_temp(workdir, OVL_CATTR(stat.mode));
365 	err = PTR_ERR(opaquedir);
366 	if (IS_ERR(opaquedir))
367 		goto out_unlock;
368 
369 	err = ovl_copy_xattr(upper, opaquedir);
370 	if (err)
371 		goto out_cleanup;
372 
373 	err = ovl_set_opaque(dentry, opaquedir);
374 	if (err)
375 		goto out_cleanup;
376 
377 	inode_lock(opaquedir->d_inode);
378 	err = ovl_set_attr(opaquedir, &stat);
379 	inode_unlock(opaquedir->d_inode);
380 	if (err)
381 		goto out_cleanup;
382 
383 	err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
384 	if (err)
385 		goto out_cleanup;
386 
387 	ovl_cleanup_whiteouts(upper, list);
388 	ovl_cleanup(wdir, upper);
389 	unlock_rename(workdir, upperdir);
390 
391 	/* dentry's upper doesn't match now, get rid of it */
392 	d_drop(dentry);
393 
394 	return opaquedir;
395 
396 out_cleanup:
397 	ovl_cleanup(wdir, opaquedir);
398 	dput(opaquedir);
399 out_unlock:
400 	unlock_rename(workdir, upperdir);
401 out:
402 	return ERR_PTR(err);
403 }
404 
405 static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
406 			     const struct posix_acl *acl)
407 {
408 	void *buffer;
409 	size_t size;
410 	int err;
411 
412 	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
413 		return 0;
414 
415 	size = posix_acl_to_xattr(NULL, acl, NULL, 0);
416 	buffer = kmalloc(size, GFP_KERNEL);
417 	if (!buffer)
418 		return -ENOMEM;
419 
420 	size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
421 	err = size;
422 	if (err < 0)
423 		goto out_free;
424 
425 	err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
426 out_free:
427 	kfree(buffer);
428 	return err;
429 }
430 
431 static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
432 				    struct ovl_cattr *cattr)
433 {
434 	struct dentry *workdir = ovl_workdir(dentry);
435 	struct inode *wdir = workdir->d_inode;
436 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
437 	struct inode *udir = upperdir->d_inode;
438 	struct dentry *upper;
439 	struct dentry *newdentry;
440 	int err;
441 	struct posix_acl *acl, *default_acl;
442 	bool hardlink = !!cattr->hardlink;
443 
444 	if (WARN_ON(!workdir))
445 		return -EROFS;
446 
447 	if (!hardlink) {
448 		err = posix_acl_create(dentry->d_parent->d_inode,
449 				       &cattr->mode, &default_acl, &acl);
450 		if (err)
451 			return err;
452 	}
453 
454 	err = ovl_lock_rename_workdir(workdir, upperdir);
455 	if (err)
456 		goto out;
457 
458 	upper = lookup_one_len(dentry->d_name.name, upperdir,
459 			       dentry->d_name.len);
460 	err = PTR_ERR(upper);
461 	if (IS_ERR(upper))
462 		goto out_unlock;
463 
464 	newdentry = ovl_create_temp(workdir, cattr);
465 	err = PTR_ERR(newdentry);
466 	if (IS_ERR(newdentry))
467 		goto out_dput;
468 
469 	/*
470 	 * mode could have been mutilated due to umask (e.g. sgid directory)
471 	 */
472 	if (!hardlink &&
473 	    !S_ISLNK(cattr->mode) &&
474 	    newdentry->d_inode->i_mode != cattr->mode) {
475 		struct iattr attr = {
476 			.ia_valid = ATTR_MODE,
477 			.ia_mode = cattr->mode,
478 		};
479 		inode_lock(newdentry->d_inode);
480 		err = notify_change(newdentry, &attr, NULL);
481 		inode_unlock(newdentry->d_inode);
482 		if (err)
483 			goto out_cleanup;
484 	}
485 	if (!hardlink) {
486 		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
487 					acl);
488 		if (err)
489 			goto out_cleanup;
490 
491 		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
492 					default_acl);
493 		if (err)
494 			goto out_cleanup;
495 	}
496 
497 	if (!hardlink && S_ISDIR(cattr->mode)) {
498 		err = ovl_set_opaque(dentry, newdentry);
499 		if (err)
500 			goto out_cleanup;
501 
502 		err = ovl_do_rename(wdir, newdentry, udir, upper,
503 				    RENAME_EXCHANGE);
504 		if (err)
505 			goto out_cleanup;
506 
507 		ovl_cleanup(wdir, upper);
508 	} else {
509 		err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
510 		if (err)
511 			goto out_cleanup;
512 	}
513 	err = ovl_instantiate(dentry, inode, newdentry, hardlink);
514 	if (err)
515 		goto out_cleanup;
516 out_dput:
517 	dput(upper);
518 out_unlock:
519 	unlock_rename(workdir, upperdir);
520 out:
521 	if (!hardlink) {
522 		posix_acl_release(acl);
523 		posix_acl_release(default_acl);
524 	}
525 	return err;
526 
527 out_cleanup:
528 	ovl_cleanup(wdir, newdentry);
529 	dput(newdentry);
530 	goto out_dput;
531 }
532 
533 static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
534 			      struct ovl_cattr *attr, bool origin)
535 {
536 	int err;
537 	const struct cred *old_cred;
538 	struct cred *override_cred;
539 	struct dentry *parent = dentry->d_parent;
540 
541 	err = ovl_copy_up(parent);
542 	if (err)
543 		return err;
544 
545 	old_cred = ovl_override_creds(dentry->d_sb);
546 
547 	/*
548 	 * When linking a file with copy up origin into a new parent, mark the
549 	 * new parent dir "impure".
550 	 */
551 	if (origin) {
552 		err = ovl_set_impure(parent, ovl_dentry_upper(parent));
553 		if (err)
554 			goto out_revert_creds;
555 	}
556 
557 	err = -ENOMEM;
558 	override_cred = prepare_creds();
559 	if (override_cred) {
560 		override_cred->fsuid = inode->i_uid;
561 		override_cred->fsgid = inode->i_gid;
562 		if (!attr->hardlink) {
563 			err = security_dentry_create_files_as(dentry,
564 					attr->mode, &dentry->d_name, old_cred,
565 					override_cred);
566 			if (err) {
567 				put_cred(override_cred);
568 				goto out_revert_creds;
569 			}
570 		}
571 		put_cred(override_creds(override_cred));
572 		put_cred(override_cred);
573 
574 		if (!ovl_dentry_is_whiteout(dentry))
575 			err = ovl_create_upper(dentry, inode, attr);
576 		else
577 			err = ovl_create_over_whiteout(dentry, inode, attr);
578 	}
579 out_revert_creds:
580 	revert_creds(old_cred);
581 	return err;
582 }
583 
584 static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
585 			     const char *link)
586 {
587 	int err;
588 	struct inode *inode;
589 	struct ovl_cattr attr = {
590 		.rdev = rdev,
591 		.link = link,
592 	};
593 
594 	err = ovl_want_write(dentry);
595 	if (err)
596 		goto out;
597 
598 	/* Preallocate inode to be used by ovl_get_inode() */
599 	err = -ENOMEM;
600 	inode = ovl_new_inode(dentry->d_sb, mode, rdev);
601 	if (!inode)
602 		goto out_drop_write;
603 
604 	inode_init_owner(inode, dentry->d_parent->d_inode, mode);
605 	attr.mode = inode->i_mode;
606 
607 	err = ovl_create_or_link(dentry, inode, &attr, false);
608 	/* Did we end up using the preallocated inode? */
609 	if (inode != d_inode(dentry))
610 		iput(inode);
611 
612 out_drop_write:
613 	ovl_drop_write(dentry);
614 out:
615 	return err;
616 }
617 
618 static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
619 		      bool excl)
620 {
621 	return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
622 }
623 
624 static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
625 {
626 	return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
627 }
628 
629 static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
630 		     dev_t rdev)
631 {
632 	/* Don't allow creation of "whiteout" on overlay */
633 	if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
634 		return -EPERM;
635 
636 	return ovl_create_object(dentry, mode, rdev, NULL);
637 }
638 
639 static int ovl_symlink(struct inode *dir, struct dentry *dentry,
640 		       const char *link)
641 {
642 	return ovl_create_object(dentry, S_IFLNK, 0, link);
643 }
644 
645 static int ovl_link(struct dentry *old, struct inode *newdir,
646 		    struct dentry *new)
647 {
648 	int err;
649 	bool locked = false;
650 	struct inode *inode;
651 
652 	err = ovl_want_write(old);
653 	if (err)
654 		goto out;
655 
656 	err = ovl_copy_up(old);
657 	if (err)
658 		goto out_drop_write;
659 
660 	err = ovl_nlink_start(old, &locked);
661 	if (err)
662 		goto out_drop_write;
663 
664 	inode = d_inode(old);
665 	ihold(inode);
666 
667 	err = ovl_create_or_link(new, inode,
668 			&(struct ovl_cattr) {.hardlink = ovl_dentry_upper(old)},
669 			ovl_type_origin(old));
670 	if (err)
671 		iput(inode);
672 
673 	ovl_nlink_end(old, locked);
674 out_drop_write:
675 	ovl_drop_write(old);
676 out:
677 	return err;
678 }
679 
680 static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
681 {
682 	return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
683 }
684 
685 static int ovl_remove_and_whiteout(struct dentry *dentry,
686 				   struct list_head *list)
687 {
688 	struct dentry *workdir = ovl_workdir(dentry);
689 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
690 	struct dentry *upper;
691 	struct dentry *opaquedir = NULL;
692 	int err;
693 
694 	if (WARN_ON(!workdir))
695 		return -EROFS;
696 
697 	if (!list_empty(list)) {
698 		opaquedir = ovl_clear_empty(dentry, list);
699 		err = PTR_ERR(opaquedir);
700 		if (IS_ERR(opaquedir))
701 			goto out;
702 	}
703 
704 	err = ovl_lock_rename_workdir(workdir, upperdir);
705 	if (err)
706 		goto out_dput;
707 
708 	upper = lookup_one_len(dentry->d_name.name, upperdir,
709 			       dentry->d_name.len);
710 	err = PTR_ERR(upper);
711 	if (IS_ERR(upper))
712 		goto out_unlock;
713 
714 	err = -ESTALE;
715 	if ((opaquedir && upper != opaquedir) ||
716 	    (!opaquedir && ovl_dentry_upper(dentry) &&
717 	     !ovl_matches_upper(dentry, upper))) {
718 		goto out_dput_upper;
719 	}
720 
721 	err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
722 	if (err)
723 		goto out_d_drop;
724 
725 	ovl_dentry_version_inc(dentry->d_parent, true);
726 out_d_drop:
727 	d_drop(dentry);
728 out_dput_upper:
729 	dput(upper);
730 out_unlock:
731 	unlock_rename(workdir, upperdir);
732 out_dput:
733 	dput(opaquedir);
734 out:
735 	return err;
736 }
737 
738 static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
739 			    struct list_head *list)
740 {
741 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
742 	struct inode *dir = upperdir->d_inode;
743 	struct dentry *upper;
744 	struct dentry *opaquedir = NULL;
745 	int err;
746 
747 	if (!list_empty(list)) {
748 		opaquedir = ovl_clear_empty(dentry, list);
749 		err = PTR_ERR(opaquedir);
750 		if (IS_ERR(opaquedir))
751 			goto out;
752 	}
753 
754 	inode_lock_nested(dir, I_MUTEX_PARENT);
755 	upper = lookup_one_len(dentry->d_name.name, upperdir,
756 			       dentry->d_name.len);
757 	err = PTR_ERR(upper);
758 	if (IS_ERR(upper))
759 		goto out_unlock;
760 
761 	err = -ESTALE;
762 	if ((opaquedir && upper != opaquedir) ||
763 	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
764 		goto out_dput_upper;
765 
766 	if (is_dir)
767 		err = vfs_rmdir(dir, upper);
768 	else
769 		err = vfs_unlink(dir, upper, NULL);
770 	ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry));
771 
772 	/*
773 	 * Keeping this dentry hashed would mean having to release
774 	 * upperpath/lowerpath, which could only be done if we are the
775 	 * sole user of this dentry.  Too tricky...  Just unhash for
776 	 * now.
777 	 */
778 	if (!err)
779 		d_drop(dentry);
780 out_dput_upper:
781 	dput(upper);
782 out_unlock:
783 	inode_unlock(dir);
784 	dput(opaquedir);
785 out:
786 	return err;
787 }
788 
789 static bool ovl_pure_upper(struct dentry *dentry)
790 {
791 	return !ovl_dentry_lower(dentry) &&
792 	       !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
793 }
794 
795 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
796 {
797 	int err;
798 	bool locked = false;
799 	const struct cred *old_cred;
800 	bool lower_positive = ovl_lower_positive(dentry);
801 	LIST_HEAD(list);
802 
803 	/* No need to clean pure upper removed by vfs_rmdir() */
804 	if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) {
805 		err = ovl_check_empty_dir(dentry, &list);
806 		if (err)
807 			goto out;
808 	}
809 
810 	err = ovl_want_write(dentry);
811 	if (err)
812 		goto out;
813 
814 	err = ovl_copy_up(dentry->d_parent);
815 	if (err)
816 		goto out_drop_write;
817 
818 	err = ovl_nlink_start(dentry, &locked);
819 	if (err)
820 		goto out_drop_write;
821 
822 	old_cred = ovl_override_creds(dentry->d_sb);
823 	if (!lower_positive)
824 		err = ovl_remove_upper(dentry, is_dir, &list);
825 	else
826 		err = ovl_remove_and_whiteout(dentry, &list);
827 	revert_creds(old_cred);
828 	if (!err) {
829 		if (is_dir)
830 			clear_nlink(dentry->d_inode);
831 		else
832 			drop_nlink(dentry->d_inode);
833 	}
834 	ovl_nlink_end(dentry, locked);
835 out_drop_write:
836 	ovl_drop_write(dentry);
837 out:
838 	ovl_cache_free(&list);
839 	return err;
840 }
841 
842 static int ovl_unlink(struct inode *dir, struct dentry *dentry)
843 {
844 	return ovl_do_remove(dentry, false);
845 }
846 
847 static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
848 {
849 	return ovl_do_remove(dentry, true);
850 }
851 
852 static bool ovl_type_merge_or_lower(struct dentry *dentry)
853 {
854 	enum ovl_path_type type = ovl_path_type(dentry);
855 
856 	return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
857 }
858 
859 static bool ovl_can_move(struct dentry *dentry)
860 {
861 	return ovl_redirect_dir(dentry->d_sb) ||
862 		!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
863 }
864 
865 static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
866 {
867 	char *buf, *ret;
868 	struct dentry *d, *tmp;
869 	int buflen = ovl_redirect_max + 1;
870 
871 	if (samedir) {
872 		ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
873 			       GFP_KERNEL);
874 		goto out;
875 	}
876 
877 	buf = ret = kmalloc(buflen, GFP_KERNEL);
878 	if (!buf)
879 		goto out;
880 
881 	buflen--;
882 	buf[buflen] = '\0';
883 	for (d = dget(dentry); !IS_ROOT(d);) {
884 		const char *name;
885 		int thislen;
886 
887 		spin_lock(&d->d_lock);
888 		name = ovl_dentry_get_redirect(d);
889 		if (name) {
890 			thislen = strlen(name);
891 		} else {
892 			name = d->d_name.name;
893 			thislen = d->d_name.len;
894 		}
895 
896 		/* If path is too long, fall back to userspace move */
897 		if (thislen + (name[0] != '/') > buflen) {
898 			ret = ERR_PTR(-EXDEV);
899 			spin_unlock(&d->d_lock);
900 			goto out_put;
901 		}
902 
903 		buflen -= thislen;
904 		memcpy(&buf[buflen], name, thislen);
905 		tmp = dget_dlock(d->d_parent);
906 		spin_unlock(&d->d_lock);
907 
908 		dput(d);
909 		d = tmp;
910 
911 		/* Absolute redirect: finished */
912 		if (buf[buflen] == '/')
913 			break;
914 		buflen--;
915 		buf[buflen] = '/';
916 	}
917 	ret = kstrdup(&buf[buflen], GFP_KERNEL);
918 out_put:
919 	dput(d);
920 	kfree(buf);
921 out:
922 	return ret ? ret : ERR_PTR(-ENOMEM);
923 }
924 
925 static int ovl_set_redirect(struct dentry *dentry, bool samedir)
926 {
927 	int err;
928 	const char *redirect = ovl_dentry_get_redirect(dentry);
929 
930 	if (redirect && (samedir || redirect[0] == '/'))
931 		return 0;
932 
933 	redirect = ovl_get_redirect(dentry, samedir);
934 	if (IS_ERR(redirect))
935 		return PTR_ERR(redirect);
936 
937 	err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
938 				 OVL_XATTR_REDIRECT,
939 				 redirect, strlen(redirect), -EXDEV);
940 	if (!err) {
941 		spin_lock(&dentry->d_lock);
942 		ovl_dentry_set_redirect(dentry, redirect);
943 		spin_unlock(&dentry->d_lock);
944 	} else {
945 		kfree(redirect);
946 		pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
947 				    err);
948 		/* Fall back to userspace copy-up */
949 		err = -EXDEV;
950 	}
951 	return err;
952 }
953 
954 static int ovl_rename(struct inode *olddir, struct dentry *old,
955 		      struct inode *newdir, struct dentry *new,
956 		      unsigned int flags)
957 {
958 	int err;
959 	bool locked = false;
960 	struct dentry *old_upperdir;
961 	struct dentry *new_upperdir;
962 	struct dentry *olddentry;
963 	struct dentry *newdentry;
964 	struct dentry *trap;
965 	bool old_opaque;
966 	bool new_opaque;
967 	bool cleanup_whiteout = false;
968 	bool overwrite = !(flags & RENAME_EXCHANGE);
969 	bool is_dir = d_is_dir(old);
970 	bool new_is_dir = d_is_dir(new);
971 	bool samedir = olddir == newdir;
972 	struct dentry *opaquedir = NULL;
973 	const struct cred *old_cred = NULL;
974 	LIST_HEAD(list);
975 
976 	err = -EINVAL;
977 	if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
978 		goto out;
979 
980 	flags &= ~RENAME_NOREPLACE;
981 
982 	/* Don't copy up directory trees */
983 	err = -EXDEV;
984 	if (!ovl_can_move(old))
985 		goto out;
986 	if (!overwrite && !ovl_can_move(new))
987 		goto out;
988 
989 	if (overwrite && new_is_dir && !ovl_pure_upper(new)) {
990 		err = ovl_check_empty_dir(new, &list);
991 		if (err)
992 			goto out;
993 	}
994 
995 	if (overwrite) {
996 		if (ovl_lower_positive(old)) {
997 			if (!ovl_dentry_is_whiteout(new)) {
998 				/* Whiteout source */
999 				flags |= RENAME_WHITEOUT;
1000 			} else {
1001 				/* Switch whiteouts */
1002 				flags |= RENAME_EXCHANGE;
1003 			}
1004 		} else if (is_dir && ovl_dentry_is_whiteout(new)) {
1005 			flags |= RENAME_EXCHANGE;
1006 			cleanup_whiteout = true;
1007 		}
1008 	}
1009 
1010 	err = ovl_want_write(old);
1011 	if (err)
1012 		goto out;
1013 
1014 	err = ovl_copy_up(old);
1015 	if (err)
1016 		goto out_drop_write;
1017 
1018 	err = ovl_copy_up(new->d_parent);
1019 	if (err)
1020 		goto out_drop_write;
1021 	if (!overwrite) {
1022 		err = ovl_copy_up(new);
1023 		if (err)
1024 			goto out_drop_write;
1025 	} else {
1026 		err = ovl_nlink_start(new, &locked);
1027 		if (err)
1028 			goto out_drop_write;
1029 	}
1030 
1031 	old_cred = ovl_override_creds(old->d_sb);
1032 
1033 	if (!list_empty(&list)) {
1034 		opaquedir = ovl_clear_empty(new, &list);
1035 		err = PTR_ERR(opaquedir);
1036 		if (IS_ERR(opaquedir)) {
1037 			opaquedir = NULL;
1038 			goto out_revert_creds;
1039 		}
1040 	}
1041 
1042 	old_upperdir = ovl_dentry_upper(old->d_parent);
1043 	new_upperdir = ovl_dentry_upper(new->d_parent);
1044 
1045 	if (!samedir) {
1046 		/*
1047 		 * When moving a merge dir or non-dir with copy up origin into
1048 		 * a new parent, we are marking the new parent dir "impure".
1049 		 * When ovl_iterate() iterates an "impure" upper dir, it will
1050 		 * lookup the origin inodes of the entries to fill d_ino.
1051 		 */
1052 		if (ovl_type_origin(old)) {
1053 			err = ovl_set_impure(new->d_parent, new_upperdir);
1054 			if (err)
1055 				goto out_revert_creds;
1056 		}
1057 		if (!overwrite && ovl_type_origin(new)) {
1058 			err = ovl_set_impure(old->d_parent, old_upperdir);
1059 			if (err)
1060 				goto out_revert_creds;
1061 		}
1062 	}
1063 
1064 	trap = lock_rename(new_upperdir, old_upperdir);
1065 
1066 	olddentry = lookup_one_len(old->d_name.name, old_upperdir,
1067 				   old->d_name.len);
1068 	err = PTR_ERR(olddentry);
1069 	if (IS_ERR(olddentry))
1070 		goto out_unlock;
1071 
1072 	err = -ESTALE;
1073 	if (!ovl_matches_upper(old, olddentry))
1074 		goto out_dput_old;
1075 
1076 	newdentry = lookup_one_len(new->d_name.name, new_upperdir,
1077 				   new->d_name.len);
1078 	err = PTR_ERR(newdentry);
1079 	if (IS_ERR(newdentry))
1080 		goto out_dput_old;
1081 
1082 	old_opaque = ovl_dentry_is_opaque(old);
1083 	new_opaque = ovl_dentry_is_opaque(new);
1084 
1085 	err = -ESTALE;
1086 	if (d_inode(new) && ovl_dentry_upper(new)) {
1087 		if (opaquedir) {
1088 			if (newdentry != opaquedir)
1089 				goto out_dput;
1090 		} else {
1091 			if (!ovl_matches_upper(new, newdentry))
1092 				goto out_dput;
1093 		}
1094 	} else {
1095 		if (!d_is_negative(newdentry) &&
1096 		    (!new_opaque || !ovl_is_whiteout(newdentry)))
1097 			goto out_dput;
1098 	}
1099 
1100 	if (olddentry == trap)
1101 		goto out_dput;
1102 	if (newdentry == trap)
1103 		goto out_dput;
1104 
1105 	if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
1106 		goto out_dput;
1107 
1108 	err = 0;
1109 	if (is_dir) {
1110 		if (ovl_type_merge_or_lower(old))
1111 			err = ovl_set_redirect(old, samedir);
1112 		else if (!old_opaque && ovl_type_merge(new->d_parent))
1113 			err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
1114 		if (err)
1115 			goto out_dput;
1116 	}
1117 	if (!overwrite && new_is_dir) {
1118 		if (ovl_type_merge_or_lower(new))
1119 			err = ovl_set_redirect(new, samedir);
1120 		else if (!new_opaque && ovl_type_merge(old->d_parent))
1121 			err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
1122 		if (err)
1123 			goto out_dput;
1124 	}
1125 
1126 	err = ovl_do_rename(old_upperdir->d_inode, olddentry,
1127 			    new_upperdir->d_inode, newdentry, flags);
1128 	if (err)
1129 		goto out_dput;
1130 
1131 	if (cleanup_whiteout)
1132 		ovl_cleanup(old_upperdir->d_inode, newdentry);
1133 
1134 	if (overwrite && d_inode(new)) {
1135 		if (new_is_dir)
1136 			clear_nlink(d_inode(new));
1137 		else
1138 			drop_nlink(d_inode(new));
1139 	}
1140 
1141 	ovl_dentry_version_inc(old->d_parent, ovl_type_origin(old) ||
1142 			       (!overwrite && ovl_type_origin(new)));
1143 	ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old) ||
1144 			       (d_inode(new) && ovl_type_origin(new)));
1145 
1146 out_dput:
1147 	dput(newdentry);
1148 out_dput_old:
1149 	dput(olddentry);
1150 out_unlock:
1151 	unlock_rename(new_upperdir, old_upperdir);
1152 out_revert_creds:
1153 	revert_creds(old_cred);
1154 	ovl_nlink_end(new, locked);
1155 out_drop_write:
1156 	ovl_drop_write(old);
1157 out:
1158 	dput(opaquedir);
1159 	ovl_cache_free(&list);
1160 	return err;
1161 }
1162 
1163 const struct inode_operations ovl_dir_inode_operations = {
1164 	.lookup		= ovl_lookup,
1165 	.mkdir		= ovl_mkdir,
1166 	.symlink	= ovl_symlink,
1167 	.unlink		= ovl_unlink,
1168 	.rmdir		= ovl_rmdir,
1169 	.rename		= ovl_rename,
1170 	.link		= ovl_link,
1171 	.setattr	= ovl_setattr,
1172 	.create		= ovl_create,
1173 	.mknod		= ovl_mknod,
1174 	.permission	= ovl_permission,
1175 	.getattr	= ovl_getattr,
1176 	.listxattr	= ovl_listxattr,
1177 	.get_acl	= ovl_get_acl,
1178 	.update_time	= ovl_update_time,
1179 };
1180