xref: /openbmc/linux/fs/overlayfs/dir.c (revision 0edff03d)
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/namei.h>
12 #include <linux/xattr.h>
13 #include <linux/security.h>
14 #include <linux/cred.h>
15 #include <linux/module.h>
16 #include <linux/posix_acl.h>
17 #include <linux/posix_acl_xattr.h>
18 #include <linux/atomic.h>
19 #include <linux/ratelimit.h>
20 #include "overlayfs.h"
21 
22 static unsigned short ovl_redirect_max = 256;
23 module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
24 MODULE_PARM_DESC(ovl_redirect_max,
25 		 "Maximum length of absolute redirect xattr value");
26 
27 int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
28 {
29 	int err;
30 
31 	dget(wdentry);
32 	if (d_is_dir(wdentry))
33 		err = ovl_do_rmdir(wdir, wdentry);
34 	else
35 		err = ovl_do_unlink(wdir, wdentry);
36 	dput(wdentry);
37 
38 	if (err) {
39 		pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
40 		       wdentry, err);
41 	}
42 
43 	return err;
44 }
45 
46 struct dentry *ovl_lookup_temp(struct dentry *workdir)
47 {
48 	struct dentry *temp;
49 	char name[20];
50 	static atomic_t temp_id = ATOMIC_INIT(0);
51 
52 	/* counter is allowed to wrap, since temp dentries are ephemeral */
53 	snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
54 
55 	temp = lookup_one_len(name, workdir, strlen(name));
56 	if (!IS_ERR(temp) && temp->d_inode) {
57 		pr_err("overlayfs: workdir/%s already exists\n", name);
58 		dput(temp);
59 		temp = ERR_PTR(-EIO);
60 	}
61 
62 	return temp;
63 }
64 
65 /* caller holds i_mutex on workdir */
66 static struct dentry *ovl_whiteout(struct dentry *workdir,
67 				   struct dentry *dentry)
68 {
69 	int err;
70 	struct dentry *whiteout;
71 	struct inode *wdir = workdir->d_inode;
72 
73 	whiteout = ovl_lookup_temp(workdir);
74 	if (IS_ERR(whiteout))
75 		return whiteout;
76 
77 	err = ovl_do_whiteout(wdir, whiteout);
78 	if (err) {
79 		dput(whiteout);
80 		whiteout = ERR_PTR(err);
81 	}
82 
83 	return whiteout;
84 }
85 
86 int ovl_create_real(struct inode *dir, struct dentry *newdentry,
87 		    struct cattr *attr, struct dentry *hardlink, bool debug)
88 {
89 	int err;
90 
91 	if (newdentry->d_inode)
92 		return -ESTALE;
93 
94 	if (hardlink) {
95 		err = ovl_do_link(hardlink, dir, newdentry, debug);
96 	} else {
97 		switch (attr->mode & S_IFMT) {
98 		case S_IFREG:
99 			err = ovl_do_create(dir, newdentry, attr->mode, debug);
100 			break;
101 
102 		case S_IFDIR:
103 			err = ovl_do_mkdir(dir, newdentry, attr->mode, debug);
104 			break;
105 
106 		case S_IFCHR:
107 		case S_IFBLK:
108 		case S_IFIFO:
109 		case S_IFSOCK:
110 			err = ovl_do_mknod(dir, newdentry,
111 					   attr->mode, attr->rdev, debug);
112 			break;
113 
114 		case S_IFLNK:
115 			err = ovl_do_symlink(dir, newdentry, attr->link, debug);
116 			break;
117 
118 		default:
119 			err = -EPERM;
120 		}
121 	}
122 	if (!err && WARN_ON(!newdentry->d_inode)) {
123 		/*
124 		 * Not quite sure if non-instantiated dentry is legal or not.
125 		 * VFS doesn't seem to care so check and warn here.
126 		 */
127 		err = -ENOENT;
128 	}
129 	return err;
130 }
131 
132 static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
133 			       int xerr)
134 {
135 	int err;
136 
137 	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
138 	if (!err)
139 		ovl_dentry_set_opaque(dentry);
140 
141 	return err;
142 }
143 
144 static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
145 {
146 	/*
147 	 * Fail with -EIO when trying to create opaque dir and upper doesn't
148 	 * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
149 	 * return a specific error for noxattr case.
150 	 */
151 	return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
152 }
153 
154 /* Common operations required to be done after creation of file on upper */
155 static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
156 			    struct dentry *newdentry, bool hardlink)
157 {
158 	ovl_dentry_version_inc(dentry->d_parent, false);
159 	ovl_dentry_set_upper_alias(dentry);
160 	if (!hardlink) {
161 		ovl_inode_update(inode, newdentry);
162 		ovl_copyattr(newdentry->d_inode, inode);
163 	} else {
164 		WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
165 		dput(newdentry);
166 		inc_nlink(inode);
167 	}
168 	d_instantiate(dentry, inode);
169 	/* Force lookup of new upper hardlink to find its lower */
170 	if (hardlink)
171 		d_drop(dentry);
172 }
173 
174 static bool ovl_type_merge(struct dentry *dentry)
175 {
176 	return OVL_TYPE_MERGE(ovl_path_type(dentry));
177 }
178 
179 static bool ovl_type_origin(struct dentry *dentry)
180 {
181 	return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
182 }
183 
184 static bool ovl_may_have_whiteouts(struct dentry *dentry)
185 {
186 	return ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
187 }
188 
189 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
190 			    struct cattr *attr, struct dentry *hardlink)
191 {
192 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
193 	struct inode *udir = upperdir->d_inode;
194 	struct dentry *newdentry;
195 	int err;
196 
197 	if (!hardlink && !IS_POSIXACL(udir))
198 		attr->mode &= ~current_umask();
199 
200 	inode_lock_nested(udir, I_MUTEX_PARENT);
201 	newdentry = lookup_one_len(dentry->d_name.name, upperdir,
202 				   dentry->d_name.len);
203 	err = PTR_ERR(newdentry);
204 	if (IS_ERR(newdentry))
205 		goto out_unlock;
206 	err = ovl_create_real(udir, newdentry, attr, hardlink, false);
207 	if (err)
208 		goto out_dput;
209 
210 	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
211 		/* Setting opaque here is just an optimization, allow to fail */
212 		ovl_set_opaque(dentry, newdentry);
213 	}
214 
215 	ovl_instantiate(dentry, inode, newdentry, !!hardlink);
216 	newdentry = NULL;
217 out_dput:
218 	dput(newdentry);
219 out_unlock:
220 	inode_unlock(udir);
221 	return err;
222 }
223 
224 static struct dentry *ovl_clear_empty(struct dentry *dentry,
225 				      struct list_head *list)
226 {
227 	struct dentry *workdir = ovl_workdir(dentry);
228 	struct inode *wdir = workdir->d_inode;
229 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
230 	struct inode *udir = upperdir->d_inode;
231 	struct path upperpath;
232 	struct dentry *upper;
233 	struct dentry *opaquedir;
234 	struct kstat stat;
235 	int err;
236 
237 	if (WARN_ON(!workdir))
238 		return ERR_PTR(-EROFS);
239 
240 	err = ovl_lock_rename_workdir(workdir, upperdir);
241 	if (err)
242 		goto out;
243 
244 	ovl_path_upper(dentry, &upperpath);
245 	err = vfs_getattr(&upperpath, &stat,
246 			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
247 	if (err)
248 		goto out_unlock;
249 
250 	err = -ESTALE;
251 	if (!S_ISDIR(stat.mode))
252 		goto out_unlock;
253 	upper = upperpath.dentry;
254 	if (upper->d_parent->d_inode != udir)
255 		goto out_unlock;
256 
257 	opaquedir = ovl_lookup_temp(workdir);
258 	err = PTR_ERR(opaquedir);
259 	if (IS_ERR(opaquedir))
260 		goto out_unlock;
261 
262 	err = ovl_create_real(wdir, opaquedir,
263 			      &(struct cattr){.mode = stat.mode}, NULL, true);
264 	if (err)
265 		goto out_dput;
266 
267 	err = ovl_copy_xattr(upper, opaquedir);
268 	if (err)
269 		goto out_cleanup;
270 
271 	err = ovl_set_opaque(dentry, opaquedir);
272 	if (err)
273 		goto out_cleanup;
274 
275 	inode_lock(opaquedir->d_inode);
276 	err = ovl_set_attr(opaquedir, &stat);
277 	inode_unlock(opaquedir->d_inode);
278 	if (err)
279 		goto out_cleanup;
280 
281 	err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
282 	if (err)
283 		goto out_cleanup;
284 
285 	ovl_cleanup_whiteouts(upper, list);
286 	ovl_cleanup(wdir, upper);
287 	unlock_rename(workdir, upperdir);
288 
289 	/* dentry's upper doesn't match now, get rid of it */
290 	d_drop(dentry);
291 
292 	return opaquedir;
293 
294 out_cleanup:
295 	ovl_cleanup(wdir, opaquedir);
296 out_dput:
297 	dput(opaquedir);
298 out_unlock:
299 	unlock_rename(workdir, upperdir);
300 out:
301 	return ERR_PTR(err);
302 }
303 
304 static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
305 {
306 	int err;
307 	struct dentry *ret = NULL;
308 	LIST_HEAD(list);
309 
310 	err = ovl_check_empty_dir(dentry, &list);
311 	if (err) {
312 		ret = ERR_PTR(err);
313 		goto out_free;
314 	}
315 
316 	/*
317 	 * When removing an empty opaque directory, then it makes no sense to
318 	 * replace it with an exact replica of itself.
319 	 *
320 	 * If upperdentry has whiteouts, clear them.
321 	 *
322 	 * Can race with copy-up, since we don't hold the upperdir mutex.
323 	 * Doesn't matter, since copy-up can't create a non-empty directory
324 	 * from an empty one.
325 	 */
326 	if (!list_empty(&list))
327 		ret = ovl_clear_empty(dentry, &list);
328 
329 out_free:
330 	ovl_cache_free(&list);
331 
332 	return ret;
333 }
334 
335 static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
336 			     const struct posix_acl *acl)
337 {
338 	void *buffer;
339 	size_t size;
340 	int err;
341 
342 	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
343 		return 0;
344 
345 	size = posix_acl_to_xattr(NULL, acl, NULL, 0);
346 	buffer = kmalloc(size, GFP_KERNEL);
347 	if (!buffer)
348 		return -ENOMEM;
349 
350 	size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
351 	err = size;
352 	if (err < 0)
353 		goto out_free;
354 
355 	err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
356 out_free:
357 	kfree(buffer);
358 	return err;
359 }
360 
361 static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
362 				    struct cattr *cattr,
363 				    struct dentry *hardlink)
364 {
365 	struct dentry *workdir = ovl_workdir(dentry);
366 	struct inode *wdir = workdir->d_inode;
367 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
368 	struct inode *udir = upperdir->d_inode;
369 	struct dentry *upper;
370 	struct dentry *newdentry;
371 	int err;
372 	struct posix_acl *acl, *default_acl;
373 
374 	if (WARN_ON(!workdir))
375 		return -EROFS;
376 
377 	if (!hardlink) {
378 		err = posix_acl_create(dentry->d_parent->d_inode,
379 				       &cattr->mode, &default_acl, &acl);
380 		if (err)
381 			return err;
382 	}
383 
384 	err = ovl_lock_rename_workdir(workdir, upperdir);
385 	if (err)
386 		goto out;
387 
388 	newdentry = ovl_lookup_temp(workdir);
389 	err = PTR_ERR(newdentry);
390 	if (IS_ERR(newdentry))
391 		goto out_unlock;
392 
393 	upper = lookup_one_len(dentry->d_name.name, upperdir,
394 			       dentry->d_name.len);
395 	err = PTR_ERR(upper);
396 	if (IS_ERR(upper))
397 		goto out_dput;
398 
399 	err = ovl_create_real(wdir, newdentry, cattr, hardlink, true);
400 	if (err)
401 		goto out_dput2;
402 
403 	/*
404 	 * mode could have been mutilated due to umask (e.g. sgid directory)
405 	 */
406 	if (!hardlink &&
407 	    !S_ISLNK(cattr->mode) &&
408 	    newdentry->d_inode->i_mode != cattr->mode) {
409 		struct iattr attr = {
410 			.ia_valid = ATTR_MODE,
411 			.ia_mode = cattr->mode,
412 		};
413 		inode_lock(newdentry->d_inode);
414 		err = notify_change(newdentry, &attr, NULL);
415 		inode_unlock(newdentry->d_inode);
416 		if (err)
417 			goto out_cleanup;
418 	}
419 	if (!hardlink) {
420 		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
421 					acl);
422 		if (err)
423 			goto out_cleanup;
424 
425 		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
426 					default_acl);
427 		if (err)
428 			goto out_cleanup;
429 	}
430 
431 	if (!hardlink && S_ISDIR(cattr->mode)) {
432 		err = ovl_set_opaque(dentry, newdentry);
433 		if (err)
434 			goto out_cleanup;
435 
436 		err = ovl_do_rename(wdir, newdentry, udir, upper,
437 				    RENAME_EXCHANGE);
438 		if (err)
439 			goto out_cleanup;
440 
441 		ovl_cleanup(wdir, upper);
442 	} else {
443 		err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
444 		if (err)
445 			goto out_cleanup;
446 	}
447 	ovl_instantiate(dentry, inode, newdentry, !!hardlink);
448 	newdentry = NULL;
449 out_dput2:
450 	dput(upper);
451 out_dput:
452 	dput(newdentry);
453 out_unlock:
454 	unlock_rename(workdir, upperdir);
455 out:
456 	if (!hardlink) {
457 		posix_acl_release(acl);
458 		posix_acl_release(default_acl);
459 	}
460 	return err;
461 
462 out_cleanup:
463 	ovl_cleanup(wdir, newdentry);
464 	goto out_dput2;
465 }
466 
467 static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
468 			      struct cattr *attr, struct dentry *hardlink,
469 			      bool origin)
470 {
471 	int err;
472 	const struct cred *old_cred;
473 	struct cred *override_cred;
474 	struct dentry *parent = dentry->d_parent;
475 
476 	err = ovl_copy_up(parent);
477 	if (err)
478 		return err;
479 
480 	old_cred = ovl_override_creds(dentry->d_sb);
481 
482 	/*
483 	 * When linking a file with copy up origin into a new parent, mark the
484 	 * new parent dir "impure".
485 	 */
486 	if (origin) {
487 		err = ovl_set_impure(parent, ovl_dentry_upper(parent));
488 		if (err)
489 			goto out_revert_creds;
490 	}
491 
492 	err = -ENOMEM;
493 	override_cred = prepare_creds();
494 	if (override_cred) {
495 		override_cred->fsuid = inode->i_uid;
496 		override_cred->fsgid = inode->i_gid;
497 		if (!hardlink) {
498 			err = security_dentry_create_files_as(dentry,
499 					attr->mode, &dentry->d_name, old_cred,
500 					override_cred);
501 			if (err) {
502 				put_cred(override_cred);
503 				goto out_revert_creds;
504 			}
505 		}
506 		put_cred(override_creds(override_cred));
507 		put_cred(override_cred);
508 
509 		if (!ovl_dentry_is_whiteout(dentry))
510 			err = ovl_create_upper(dentry, inode, attr,
511 						hardlink);
512 		else
513 			err = ovl_create_over_whiteout(dentry, inode, attr,
514 							hardlink);
515 	}
516 out_revert_creds:
517 	revert_creds(old_cred);
518 	if (!err) {
519 		struct inode *realinode = d_inode(ovl_dentry_upper(dentry));
520 
521 		WARN_ON(inode->i_mode != realinode->i_mode);
522 		WARN_ON(!uid_eq(inode->i_uid, realinode->i_uid));
523 		WARN_ON(!gid_eq(inode->i_gid, realinode->i_gid));
524 	}
525 	return err;
526 }
527 
528 static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
529 			     const char *link)
530 {
531 	int err;
532 	struct inode *inode;
533 	struct cattr attr = {
534 		.rdev = rdev,
535 		.link = link,
536 	};
537 
538 	err = ovl_want_write(dentry);
539 	if (err)
540 		goto out;
541 
542 	err = -ENOMEM;
543 	inode = ovl_new_inode(dentry->d_sb, mode, rdev);
544 	if (!inode)
545 		goto out_drop_write;
546 
547 	inode_init_owner(inode, dentry->d_parent->d_inode, mode);
548 	attr.mode = inode->i_mode;
549 
550 	err = ovl_create_or_link(dentry, inode, &attr, NULL, false);
551 	if (err)
552 		iput(inode);
553 
554 out_drop_write:
555 	ovl_drop_write(dentry);
556 out:
557 	return err;
558 }
559 
560 static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
561 		      bool excl)
562 {
563 	return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
564 }
565 
566 static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
567 {
568 	return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
569 }
570 
571 static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
572 		     dev_t rdev)
573 {
574 	/* Don't allow creation of "whiteout" on overlay */
575 	if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
576 		return -EPERM;
577 
578 	return ovl_create_object(dentry, mode, rdev, NULL);
579 }
580 
581 static int ovl_symlink(struct inode *dir, struct dentry *dentry,
582 		       const char *link)
583 {
584 	return ovl_create_object(dentry, S_IFLNK, 0, link);
585 }
586 
587 static int ovl_link(struct dentry *old, struct inode *newdir,
588 		    struct dentry *new)
589 {
590 	int err;
591 	bool locked = false;
592 	struct inode *inode;
593 
594 	err = ovl_want_write(old);
595 	if (err)
596 		goto out;
597 
598 	err = ovl_copy_up(old);
599 	if (err)
600 		goto out_drop_write;
601 
602 	err = ovl_nlink_start(old, &locked);
603 	if (err)
604 		goto out_drop_write;
605 
606 	inode = d_inode(old);
607 	ihold(inode);
608 
609 	err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old),
610 				 ovl_type_origin(old));
611 	if (err)
612 		iput(inode);
613 
614 	ovl_nlink_end(old, locked);
615 out_drop_write:
616 	ovl_drop_write(old);
617 out:
618 	return err;
619 }
620 
621 static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
622 {
623 	return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
624 }
625 
626 static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
627 {
628 	struct dentry *workdir = ovl_workdir(dentry);
629 	struct inode *wdir = workdir->d_inode;
630 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
631 	struct inode *udir = upperdir->d_inode;
632 	struct dentry *whiteout;
633 	struct dentry *upper;
634 	struct dentry *opaquedir = NULL;
635 	int err;
636 	int flags = 0;
637 
638 	if (WARN_ON(!workdir))
639 		return -EROFS;
640 
641 	if (is_dir) {
642 		opaquedir = ovl_check_empty_and_clear(dentry);
643 		err = PTR_ERR(opaquedir);
644 		if (IS_ERR(opaquedir))
645 			goto out;
646 	}
647 
648 	err = ovl_lock_rename_workdir(workdir, upperdir);
649 	if (err)
650 		goto out_dput;
651 
652 	upper = lookup_one_len(dentry->d_name.name, upperdir,
653 			       dentry->d_name.len);
654 	err = PTR_ERR(upper);
655 	if (IS_ERR(upper))
656 		goto out_unlock;
657 
658 	err = -ESTALE;
659 	if ((opaquedir && upper != opaquedir) ||
660 	    (!opaquedir && ovl_dentry_upper(dentry) &&
661 	     !ovl_matches_upper(dentry, upper))) {
662 		goto out_dput_upper;
663 	}
664 
665 	whiteout = ovl_whiteout(workdir, dentry);
666 	err = PTR_ERR(whiteout);
667 	if (IS_ERR(whiteout))
668 		goto out_dput_upper;
669 
670 	if (d_is_dir(upper))
671 		flags = RENAME_EXCHANGE;
672 
673 	err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
674 	if (err)
675 		goto kill_whiteout;
676 	if (flags)
677 		ovl_cleanup(wdir, upper);
678 
679 	ovl_dentry_version_inc(dentry->d_parent, true);
680 out_d_drop:
681 	d_drop(dentry);
682 	dput(whiteout);
683 out_dput_upper:
684 	dput(upper);
685 out_unlock:
686 	unlock_rename(workdir, upperdir);
687 out_dput:
688 	dput(opaquedir);
689 out:
690 	return err;
691 
692 kill_whiteout:
693 	ovl_cleanup(wdir, whiteout);
694 	goto out_d_drop;
695 }
696 
697 static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
698 {
699 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
700 	struct inode *dir = upperdir->d_inode;
701 	struct dentry *upper;
702 	struct dentry *opaquedir = NULL;
703 	int err;
704 
705 	/* Redirect/origin dir can be !ovl_lower_positive && not clean */
706 	if (is_dir && (ovl_dentry_get_redirect(dentry) ||
707 		       ovl_may_have_whiteouts(dentry))) {
708 		opaquedir = ovl_check_empty_and_clear(dentry);
709 		err = PTR_ERR(opaquedir);
710 		if (IS_ERR(opaquedir))
711 			goto out;
712 	}
713 
714 	inode_lock_nested(dir, I_MUTEX_PARENT);
715 	upper = lookup_one_len(dentry->d_name.name, upperdir,
716 			       dentry->d_name.len);
717 	err = PTR_ERR(upper);
718 	if (IS_ERR(upper))
719 		goto out_unlock;
720 
721 	err = -ESTALE;
722 	if ((opaquedir && upper != opaquedir) ||
723 	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
724 		goto out_dput_upper;
725 
726 	if (is_dir)
727 		err = vfs_rmdir(dir, upper);
728 	else
729 		err = vfs_unlink(dir, upper, NULL);
730 	ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry));
731 
732 	/*
733 	 * Keeping this dentry hashed would mean having to release
734 	 * upperpath/lowerpath, which could only be done if we are the
735 	 * sole user of this dentry.  Too tricky...  Just unhash for
736 	 * now.
737 	 */
738 	if (!err)
739 		d_drop(dentry);
740 out_dput_upper:
741 	dput(upper);
742 out_unlock:
743 	inode_unlock(dir);
744 	dput(opaquedir);
745 out:
746 	return err;
747 }
748 
749 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
750 {
751 	int err;
752 	bool locked = false;
753 	const struct cred *old_cred;
754 
755 	err = ovl_want_write(dentry);
756 	if (err)
757 		goto out;
758 
759 	err = ovl_copy_up(dentry->d_parent);
760 	if (err)
761 		goto out_drop_write;
762 
763 	err = ovl_nlink_start(dentry, &locked);
764 	if (err)
765 		goto out_drop_write;
766 
767 	old_cred = ovl_override_creds(dentry->d_sb);
768 	if (!ovl_lower_positive(dentry))
769 		err = ovl_remove_upper(dentry, is_dir);
770 	else
771 		err = ovl_remove_and_whiteout(dentry, is_dir);
772 	revert_creds(old_cred);
773 	if (!err) {
774 		if (is_dir)
775 			clear_nlink(dentry->d_inode);
776 		else
777 			drop_nlink(dentry->d_inode);
778 	}
779 	ovl_nlink_end(dentry, locked);
780 out_drop_write:
781 	ovl_drop_write(dentry);
782 out:
783 	return err;
784 }
785 
786 static int ovl_unlink(struct inode *dir, struct dentry *dentry)
787 {
788 	return ovl_do_remove(dentry, false);
789 }
790 
791 static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
792 {
793 	return ovl_do_remove(dentry, true);
794 }
795 
796 static bool ovl_type_merge_or_lower(struct dentry *dentry)
797 {
798 	enum ovl_path_type type = ovl_path_type(dentry);
799 
800 	return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
801 }
802 
803 static bool ovl_can_move(struct dentry *dentry)
804 {
805 	return ovl_redirect_dir(dentry->d_sb) ||
806 		!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
807 }
808 
809 static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
810 {
811 	char *buf, *ret;
812 	struct dentry *d, *tmp;
813 	int buflen = ovl_redirect_max + 1;
814 
815 	if (samedir) {
816 		ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
817 			       GFP_KERNEL);
818 		goto out;
819 	}
820 
821 	buf = ret = kmalloc(buflen, GFP_KERNEL);
822 	if (!buf)
823 		goto out;
824 
825 	buflen--;
826 	buf[buflen] = '\0';
827 	for (d = dget(dentry); !IS_ROOT(d);) {
828 		const char *name;
829 		int thislen;
830 
831 		spin_lock(&d->d_lock);
832 		name = ovl_dentry_get_redirect(d);
833 		if (name) {
834 			thislen = strlen(name);
835 		} else {
836 			name = d->d_name.name;
837 			thislen = d->d_name.len;
838 		}
839 
840 		/* If path is too long, fall back to userspace move */
841 		if (thislen + (name[0] != '/') > buflen) {
842 			ret = ERR_PTR(-EXDEV);
843 			spin_unlock(&d->d_lock);
844 			goto out_put;
845 		}
846 
847 		buflen -= thislen;
848 		memcpy(&buf[buflen], name, thislen);
849 		tmp = dget_dlock(d->d_parent);
850 		spin_unlock(&d->d_lock);
851 
852 		dput(d);
853 		d = tmp;
854 
855 		/* Absolute redirect: finished */
856 		if (buf[buflen] == '/')
857 			break;
858 		buflen--;
859 		buf[buflen] = '/';
860 	}
861 	ret = kstrdup(&buf[buflen], GFP_KERNEL);
862 out_put:
863 	dput(d);
864 	kfree(buf);
865 out:
866 	return ret ? ret : ERR_PTR(-ENOMEM);
867 }
868 
869 static int ovl_set_redirect(struct dentry *dentry, bool samedir)
870 {
871 	int err;
872 	const char *redirect = ovl_dentry_get_redirect(dentry);
873 
874 	if (redirect && (samedir || redirect[0] == '/'))
875 		return 0;
876 
877 	redirect = ovl_get_redirect(dentry, samedir);
878 	if (IS_ERR(redirect))
879 		return PTR_ERR(redirect);
880 
881 	err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
882 				 OVL_XATTR_REDIRECT,
883 				 redirect, strlen(redirect), -EXDEV);
884 	if (!err) {
885 		spin_lock(&dentry->d_lock);
886 		ovl_dentry_set_redirect(dentry, redirect);
887 		spin_unlock(&dentry->d_lock);
888 	} else {
889 		kfree(redirect);
890 		pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
891 				    err);
892 		/* Fall back to userspace copy-up */
893 		err = -EXDEV;
894 	}
895 	return err;
896 }
897 
898 static int ovl_rename(struct inode *olddir, struct dentry *old,
899 		      struct inode *newdir, struct dentry *new,
900 		      unsigned int flags)
901 {
902 	int err;
903 	bool locked = false;
904 	struct dentry *old_upperdir;
905 	struct dentry *new_upperdir;
906 	struct dentry *olddentry;
907 	struct dentry *newdentry;
908 	struct dentry *trap;
909 	bool old_opaque;
910 	bool new_opaque;
911 	bool cleanup_whiteout = false;
912 	bool overwrite = !(flags & RENAME_EXCHANGE);
913 	bool is_dir = d_is_dir(old);
914 	bool new_is_dir = d_is_dir(new);
915 	bool samedir = olddir == newdir;
916 	struct dentry *opaquedir = NULL;
917 	const struct cred *old_cred = NULL;
918 
919 	err = -EINVAL;
920 	if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
921 		goto out;
922 
923 	flags &= ~RENAME_NOREPLACE;
924 
925 	/* Don't copy up directory trees */
926 	err = -EXDEV;
927 	if (!ovl_can_move(old))
928 		goto out;
929 	if (!overwrite && !ovl_can_move(new))
930 		goto out;
931 
932 	err = ovl_want_write(old);
933 	if (err)
934 		goto out;
935 
936 	err = ovl_copy_up(old);
937 	if (err)
938 		goto out_drop_write;
939 
940 	err = ovl_copy_up(new->d_parent);
941 	if (err)
942 		goto out_drop_write;
943 	if (!overwrite) {
944 		err = ovl_copy_up(new);
945 		if (err)
946 			goto out_drop_write;
947 	} else {
948 		err = ovl_nlink_start(new, &locked);
949 		if (err)
950 			goto out_drop_write;
951 	}
952 
953 	old_cred = ovl_override_creds(old->d_sb);
954 
955 	if (overwrite && new_is_dir && (ovl_type_merge_or_lower(new) ||
956 					ovl_may_have_whiteouts(new))) {
957 		opaquedir = ovl_check_empty_and_clear(new);
958 		err = PTR_ERR(opaquedir);
959 		if (IS_ERR(opaquedir)) {
960 			opaquedir = NULL;
961 			goto out_revert_creds;
962 		}
963 	}
964 
965 	if (overwrite) {
966 		if (ovl_lower_positive(old)) {
967 			if (!ovl_dentry_is_whiteout(new)) {
968 				/* Whiteout source */
969 				flags |= RENAME_WHITEOUT;
970 			} else {
971 				/* Switch whiteouts */
972 				flags |= RENAME_EXCHANGE;
973 			}
974 		} else if (is_dir && ovl_dentry_is_whiteout(new)) {
975 			flags |= RENAME_EXCHANGE;
976 			cleanup_whiteout = true;
977 		}
978 	}
979 
980 	old_upperdir = ovl_dentry_upper(old->d_parent);
981 	new_upperdir = ovl_dentry_upper(new->d_parent);
982 
983 	if (!samedir) {
984 		/*
985 		 * When moving a merge dir or non-dir with copy up origin into
986 		 * a new parent, we are marking the new parent dir "impure".
987 		 * When ovl_iterate() iterates an "impure" upper dir, it will
988 		 * lookup the origin inodes of the entries to fill d_ino.
989 		 */
990 		if (ovl_type_origin(old)) {
991 			err = ovl_set_impure(new->d_parent, new_upperdir);
992 			if (err)
993 				goto out_revert_creds;
994 		}
995 		if (!overwrite && ovl_type_origin(new)) {
996 			err = ovl_set_impure(old->d_parent, old_upperdir);
997 			if (err)
998 				goto out_revert_creds;
999 		}
1000 	}
1001 
1002 	trap = lock_rename(new_upperdir, old_upperdir);
1003 
1004 	olddentry = lookup_one_len(old->d_name.name, old_upperdir,
1005 				   old->d_name.len);
1006 	err = PTR_ERR(olddentry);
1007 	if (IS_ERR(olddentry))
1008 		goto out_unlock;
1009 
1010 	err = -ESTALE;
1011 	if (!ovl_matches_upper(old, olddentry))
1012 		goto out_dput_old;
1013 
1014 	newdentry = lookup_one_len(new->d_name.name, new_upperdir,
1015 				   new->d_name.len);
1016 	err = PTR_ERR(newdentry);
1017 	if (IS_ERR(newdentry))
1018 		goto out_dput_old;
1019 
1020 	old_opaque = ovl_dentry_is_opaque(old);
1021 	new_opaque = ovl_dentry_is_opaque(new);
1022 
1023 	err = -ESTALE;
1024 	if (d_inode(new) && ovl_dentry_upper(new)) {
1025 		if (opaquedir) {
1026 			if (newdentry != opaquedir)
1027 				goto out_dput;
1028 		} else {
1029 			if (!ovl_matches_upper(new, newdentry))
1030 				goto out_dput;
1031 		}
1032 	} else {
1033 		if (!d_is_negative(newdentry) &&
1034 		    (!new_opaque || !ovl_is_whiteout(newdentry)))
1035 			goto out_dput;
1036 	}
1037 
1038 	if (olddentry == trap)
1039 		goto out_dput;
1040 	if (newdentry == trap)
1041 		goto out_dput;
1042 
1043 	if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
1044 		goto out_dput;
1045 
1046 	err = 0;
1047 	if (is_dir) {
1048 		if (ovl_type_merge_or_lower(old))
1049 			err = ovl_set_redirect(old, samedir);
1050 		else if (!old_opaque && ovl_type_merge(new->d_parent))
1051 			err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
1052 		if (err)
1053 			goto out_dput;
1054 	}
1055 	if (!overwrite && new_is_dir) {
1056 		if (ovl_type_merge_or_lower(new))
1057 			err = ovl_set_redirect(new, samedir);
1058 		else if (!new_opaque && ovl_type_merge(old->d_parent))
1059 			err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
1060 		if (err)
1061 			goto out_dput;
1062 	}
1063 
1064 	err = ovl_do_rename(old_upperdir->d_inode, olddentry,
1065 			    new_upperdir->d_inode, newdentry, flags);
1066 	if (err)
1067 		goto out_dput;
1068 
1069 	if (cleanup_whiteout)
1070 		ovl_cleanup(old_upperdir->d_inode, newdentry);
1071 
1072 	if (overwrite && d_inode(new)) {
1073 		if (new_is_dir)
1074 			clear_nlink(d_inode(new));
1075 		else
1076 			drop_nlink(d_inode(new));
1077 	}
1078 
1079 	ovl_dentry_version_inc(old->d_parent, ovl_type_origin(old) ||
1080 			       (!overwrite && ovl_type_origin(new)));
1081 	ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old) ||
1082 			       (d_inode(new) && ovl_type_origin(new)));
1083 
1084 out_dput:
1085 	dput(newdentry);
1086 out_dput_old:
1087 	dput(olddentry);
1088 out_unlock:
1089 	unlock_rename(new_upperdir, old_upperdir);
1090 out_revert_creds:
1091 	revert_creds(old_cred);
1092 	ovl_nlink_end(new, locked);
1093 out_drop_write:
1094 	ovl_drop_write(old);
1095 out:
1096 	dput(opaquedir);
1097 	return err;
1098 }
1099 
1100 const struct inode_operations ovl_dir_inode_operations = {
1101 	.lookup		= ovl_lookup,
1102 	.mkdir		= ovl_mkdir,
1103 	.symlink	= ovl_symlink,
1104 	.unlink		= ovl_unlink,
1105 	.rmdir		= ovl_rmdir,
1106 	.rename		= ovl_rename,
1107 	.link		= ovl_link,
1108 	.setattr	= ovl_setattr,
1109 	.create		= ovl_create,
1110 	.mknod		= ovl_mknod,
1111 	.permission	= ovl_permission,
1112 	.getattr	= ovl_getattr,
1113 	.listxattr	= ovl_listxattr,
1114 	.get_acl	= ovl_get_acl,
1115 	.update_time	= ovl_update_time,
1116 };
1117