xref: /openbmc/linux/fs/overlayfs/dir.c (revision 3e09b155)
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/namei.h>
12 #include <linux/xattr.h>
13 #include <linux/security.h>
14 #include <linux/cred.h>
15 #include <linux/module.h>
16 #include <linux/posix_acl.h>
17 #include <linux/posix_acl_xattr.h>
18 #include <linux/atomic.h>
19 #include <linux/ratelimit.h>
20 #include "overlayfs.h"
21 
22 static unsigned short ovl_redirect_max = 256;
23 module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
24 MODULE_PARM_DESC(ovl_redirect_max,
25 		 "Maximum length of absolute redirect xattr value");
26 
27 int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
28 {
29 	int err;
30 
31 	dget(wdentry);
32 	if (d_is_dir(wdentry))
33 		err = ovl_do_rmdir(wdir, wdentry);
34 	else
35 		err = ovl_do_unlink(wdir, wdentry);
36 	dput(wdentry);
37 
38 	if (err) {
39 		pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
40 		       wdentry, err);
41 	}
42 
43 	return err;
44 }
45 
46 struct dentry *ovl_lookup_temp(struct dentry *workdir)
47 {
48 	struct dentry *temp;
49 	char name[20];
50 	static atomic_t temp_id = ATOMIC_INIT(0);
51 
52 	/* counter is allowed to wrap, since temp dentries are ephemeral */
53 	snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
54 
55 	temp = lookup_one_len(name, workdir, strlen(name));
56 	if (!IS_ERR(temp) && temp->d_inode) {
57 		pr_err("overlayfs: workdir/%s already exists\n", name);
58 		dput(temp);
59 		temp = ERR_PTR(-EIO);
60 	}
61 
62 	return temp;
63 }
64 
65 /* caller holds i_mutex on workdir */
66 static struct dentry *ovl_whiteout(struct dentry *workdir,
67 				   struct dentry *dentry)
68 {
69 	int err;
70 	struct dentry *whiteout;
71 	struct inode *wdir = workdir->d_inode;
72 
73 	whiteout = ovl_lookup_temp(workdir);
74 	if (IS_ERR(whiteout))
75 		return whiteout;
76 
77 	err = ovl_do_whiteout(wdir, whiteout);
78 	if (err) {
79 		dput(whiteout);
80 		whiteout = ERR_PTR(err);
81 	}
82 
83 	return whiteout;
84 }
85 
86 int ovl_create_real(struct inode *dir, struct dentry *newdentry,
87 		    struct cattr *attr, struct dentry *hardlink, bool debug)
88 {
89 	int err;
90 
91 	if (newdentry->d_inode)
92 		return -ESTALE;
93 
94 	if (hardlink) {
95 		err = ovl_do_link(hardlink, dir, newdentry, debug);
96 	} else {
97 		switch (attr->mode & S_IFMT) {
98 		case S_IFREG:
99 			err = ovl_do_create(dir, newdentry, attr->mode, debug);
100 			break;
101 
102 		case S_IFDIR:
103 			err = ovl_do_mkdir(dir, newdentry, attr->mode, debug);
104 			break;
105 
106 		case S_IFCHR:
107 		case S_IFBLK:
108 		case S_IFIFO:
109 		case S_IFSOCK:
110 			err = ovl_do_mknod(dir, newdentry,
111 					   attr->mode, attr->rdev, debug);
112 			break;
113 
114 		case S_IFLNK:
115 			err = ovl_do_symlink(dir, newdentry, attr->link, debug);
116 			break;
117 
118 		default:
119 			err = -EPERM;
120 		}
121 	}
122 	if (!err && WARN_ON(!newdentry->d_inode)) {
123 		/*
124 		 * Not quite sure if non-instantiated dentry is legal or not.
125 		 * VFS doesn't seem to care so check and warn here.
126 		 */
127 		err = -ENOENT;
128 	}
129 	return err;
130 }
131 
132 static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
133 			       int xerr)
134 {
135 	int err;
136 
137 	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
138 	if (!err)
139 		ovl_dentry_set_opaque(dentry);
140 
141 	return err;
142 }
143 
144 static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
145 {
146 	/*
147 	 * Fail with -EIO when trying to create opaque dir and upper doesn't
148 	 * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
149 	 * return a specific error for noxattr case.
150 	 */
151 	return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
152 }
153 
154 /* Common operations required to be done after creation of file on upper */
155 static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
156 			    struct dentry *newdentry, bool hardlink)
157 {
158 	ovl_dentry_version_inc(dentry->d_parent, false);
159 	ovl_dentry_set_upper_alias(dentry);
160 	if (!hardlink) {
161 		ovl_inode_update(inode, newdentry);
162 		ovl_copyattr(newdentry->d_inode, inode);
163 	} else {
164 		WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
165 		dput(newdentry);
166 		inc_nlink(inode);
167 	}
168 	d_instantiate(dentry, inode);
169 	/* Force lookup of new upper hardlink to find its lower */
170 	if (hardlink)
171 		d_drop(dentry);
172 }
173 
174 static bool ovl_type_merge(struct dentry *dentry)
175 {
176 	return OVL_TYPE_MERGE(ovl_path_type(dentry));
177 }
178 
179 static bool ovl_type_origin(struct dentry *dentry)
180 {
181 	return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
182 }
183 
184 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
185 			    struct cattr *attr, struct dentry *hardlink)
186 {
187 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
188 	struct inode *udir = upperdir->d_inode;
189 	struct dentry *newdentry;
190 	int err;
191 
192 	if (!hardlink && !IS_POSIXACL(udir))
193 		attr->mode &= ~current_umask();
194 
195 	inode_lock_nested(udir, I_MUTEX_PARENT);
196 	newdentry = lookup_one_len(dentry->d_name.name, upperdir,
197 				   dentry->d_name.len);
198 	err = PTR_ERR(newdentry);
199 	if (IS_ERR(newdentry))
200 		goto out_unlock;
201 	err = ovl_create_real(udir, newdentry, attr, hardlink, false);
202 	if (err)
203 		goto out_dput;
204 
205 	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
206 		/* Setting opaque here is just an optimization, allow to fail */
207 		ovl_set_opaque(dentry, newdentry);
208 	}
209 
210 	ovl_instantiate(dentry, inode, newdentry, !!hardlink);
211 	newdentry = NULL;
212 out_dput:
213 	dput(newdentry);
214 out_unlock:
215 	inode_unlock(udir);
216 	return err;
217 }
218 
219 static int ovl_lock_rename_workdir(struct dentry *workdir,
220 				   struct dentry *upperdir)
221 {
222 	/* Workdir should not be the same as upperdir */
223 	if (workdir == upperdir)
224 		goto err;
225 
226 	/* Workdir should not be subdir of upperdir and vice versa */
227 	if (lock_rename(workdir, upperdir) != NULL)
228 		goto err_unlock;
229 
230 	return 0;
231 
232 err_unlock:
233 	unlock_rename(workdir, upperdir);
234 err:
235 	pr_err("overlayfs: failed to lock workdir+upperdir\n");
236 	return -EIO;
237 }
238 
239 static struct dentry *ovl_clear_empty(struct dentry *dentry,
240 				      struct list_head *list)
241 {
242 	struct dentry *workdir = ovl_workdir(dentry);
243 	struct inode *wdir = workdir->d_inode;
244 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
245 	struct inode *udir = upperdir->d_inode;
246 	struct path upperpath;
247 	struct dentry *upper;
248 	struct dentry *opaquedir;
249 	struct kstat stat;
250 	int err;
251 
252 	if (WARN_ON(!workdir))
253 		return ERR_PTR(-EROFS);
254 
255 	err = ovl_lock_rename_workdir(workdir, upperdir);
256 	if (err)
257 		goto out;
258 
259 	ovl_path_upper(dentry, &upperpath);
260 	err = vfs_getattr(&upperpath, &stat,
261 			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
262 	if (err)
263 		goto out_unlock;
264 
265 	err = -ESTALE;
266 	if (!S_ISDIR(stat.mode))
267 		goto out_unlock;
268 	upper = upperpath.dentry;
269 	if (upper->d_parent->d_inode != udir)
270 		goto out_unlock;
271 
272 	opaquedir = ovl_lookup_temp(workdir);
273 	err = PTR_ERR(opaquedir);
274 	if (IS_ERR(opaquedir))
275 		goto out_unlock;
276 
277 	err = ovl_create_real(wdir, opaquedir,
278 			      &(struct cattr){.mode = stat.mode}, NULL, true);
279 	if (err)
280 		goto out_dput;
281 
282 	err = ovl_copy_xattr(upper, opaquedir);
283 	if (err)
284 		goto out_cleanup;
285 
286 	err = ovl_set_opaque(dentry, opaquedir);
287 	if (err)
288 		goto out_cleanup;
289 
290 	inode_lock(opaquedir->d_inode);
291 	err = ovl_set_attr(opaquedir, &stat);
292 	inode_unlock(opaquedir->d_inode);
293 	if (err)
294 		goto out_cleanup;
295 
296 	err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
297 	if (err)
298 		goto out_cleanup;
299 
300 	ovl_cleanup_whiteouts(upper, list);
301 	ovl_cleanup(wdir, upper);
302 	unlock_rename(workdir, upperdir);
303 
304 	/* dentry's upper doesn't match now, get rid of it */
305 	d_drop(dentry);
306 
307 	return opaquedir;
308 
309 out_cleanup:
310 	ovl_cleanup(wdir, opaquedir);
311 out_dput:
312 	dput(opaquedir);
313 out_unlock:
314 	unlock_rename(workdir, upperdir);
315 out:
316 	return ERR_PTR(err);
317 }
318 
319 static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
320 {
321 	int err;
322 	struct dentry *ret = NULL;
323 	enum ovl_path_type type = ovl_path_type(dentry);
324 	LIST_HEAD(list);
325 
326 	err = ovl_check_empty_dir(dentry, &list);
327 	if (err) {
328 		ret = ERR_PTR(err);
329 		goto out_free;
330 	}
331 
332 	/*
333 	 * When removing an empty opaque directory, then it makes no sense to
334 	 * replace it with an exact replica of itself.
335 	 *
336 	 * If no upperdentry then skip clearing whiteouts.
337 	 *
338 	 * Can race with copy-up, since we don't hold the upperdir mutex.
339 	 * Doesn't matter, since copy-up can't create a non-empty directory
340 	 * from an empty one.
341 	 */
342 	if (OVL_TYPE_UPPER(type) && OVL_TYPE_MERGE(type))
343 		ret = ovl_clear_empty(dentry, &list);
344 
345 out_free:
346 	ovl_cache_free(&list);
347 
348 	return ret;
349 }
350 
351 static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
352 			     const struct posix_acl *acl)
353 {
354 	void *buffer;
355 	size_t size;
356 	int err;
357 
358 	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
359 		return 0;
360 
361 	size = posix_acl_to_xattr(NULL, acl, NULL, 0);
362 	buffer = kmalloc(size, GFP_KERNEL);
363 	if (!buffer)
364 		return -ENOMEM;
365 
366 	size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
367 	err = size;
368 	if (err < 0)
369 		goto out_free;
370 
371 	err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
372 out_free:
373 	kfree(buffer);
374 	return err;
375 }
376 
377 static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
378 				    struct cattr *cattr,
379 				    struct dentry *hardlink)
380 {
381 	struct dentry *workdir = ovl_workdir(dentry);
382 	struct inode *wdir = workdir->d_inode;
383 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
384 	struct inode *udir = upperdir->d_inode;
385 	struct dentry *upper;
386 	struct dentry *newdentry;
387 	int err;
388 	struct posix_acl *acl, *default_acl;
389 
390 	if (WARN_ON(!workdir))
391 		return -EROFS;
392 
393 	if (!hardlink) {
394 		err = posix_acl_create(dentry->d_parent->d_inode,
395 				       &cattr->mode, &default_acl, &acl);
396 		if (err)
397 			return err;
398 	}
399 
400 	err = ovl_lock_rename_workdir(workdir, upperdir);
401 	if (err)
402 		goto out;
403 
404 	newdentry = ovl_lookup_temp(workdir);
405 	err = PTR_ERR(newdentry);
406 	if (IS_ERR(newdentry))
407 		goto out_unlock;
408 
409 	upper = lookup_one_len(dentry->d_name.name, upperdir,
410 			       dentry->d_name.len);
411 	err = PTR_ERR(upper);
412 	if (IS_ERR(upper))
413 		goto out_dput;
414 
415 	err = ovl_create_real(wdir, newdentry, cattr, hardlink, true);
416 	if (err)
417 		goto out_dput2;
418 
419 	/*
420 	 * mode could have been mutilated due to umask (e.g. sgid directory)
421 	 */
422 	if (!hardlink &&
423 	    !S_ISLNK(cattr->mode) &&
424 	    newdentry->d_inode->i_mode != cattr->mode) {
425 		struct iattr attr = {
426 			.ia_valid = ATTR_MODE,
427 			.ia_mode = cattr->mode,
428 		};
429 		inode_lock(newdentry->d_inode);
430 		err = notify_change(newdentry, &attr, NULL);
431 		inode_unlock(newdentry->d_inode);
432 		if (err)
433 			goto out_cleanup;
434 	}
435 	if (!hardlink) {
436 		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
437 					acl);
438 		if (err)
439 			goto out_cleanup;
440 
441 		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
442 					default_acl);
443 		if (err)
444 			goto out_cleanup;
445 	}
446 
447 	if (!hardlink && S_ISDIR(cattr->mode)) {
448 		err = ovl_set_opaque(dentry, newdentry);
449 		if (err)
450 			goto out_cleanup;
451 
452 		err = ovl_do_rename(wdir, newdentry, udir, upper,
453 				    RENAME_EXCHANGE);
454 		if (err)
455 			goto out_cleanup;
456 
457 		ovl_cleanup(wdir, upper);
458 	} else {
459 		err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
460 		if (err)
461 			goto out_cleanup;
462 	}
463 	ovl_instantiate(dentry, inode, newdentry, !!hardlink);
464 	newdentry = NULL;
465 out_dput2:
466 	dput(upper);
467 out_dput:
468 	dput(newdentry);
469 out_unlock:
470 	unlock_rename(workdir, upperdir);
471 out:
472 	if (!hardlink) {
473 		posix_acl_release(acl);
474 		posix_acl_release(default_acl);
475 	}
476 	return err;
477 
478 out_cleanup:
479 	ovl_cleanup(wdir, newdentry);
480 	goto out_dput2;
481 }
482 
483 static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
484 			      struct cattr *attr, struct dentry *hardlink,
485 			      bool origin)
486 {
487 	int err;
488 	const struct cred *old_cred;
489 	struct cred *override_cred;
490 	struct dentry *parent = dentry->d_parent;
491 
492 	err = ovl_copy_up(parent);
493 	if (err)
494 		return err;
495 
496 	old_cred = ovl_override_creds(dentry->d_sb);
497 
498 	/*
499 	 * When linking a file with copy up origin into a new parent, mark the
500 	 * new parent dir "impure".
501 	 */
502 	if (origin) {
503 		err = ovl_set_impure(parent, ovl_dentry_upper(parent));
504 		if (err)
505 			goto out_revert_creds;
506 	}
507 
508 	err = -ENOMEM;
509 	override_cred = prepare_creds();
510 	if (override_cred) {
511 		override_cred->fsuid = inode->i_uid;
512 		override_cred->fsgid = inode->i_gid;
513 		if (!hardlink) {
514 			err = security_dentry_create_files_as(dentry,
515 					attr->mode, &dentry->d_name, old_cred,
516 					override_cred);
517 			if (err) {
518 				put_cred(override_cred);
519 				goto out_revert_creds;
520 			}
521 		}
522 		put_cred(override_creds(override_cred));
523 		put_cred(override_cred);
524 
525 		if (!ovl_dentry_is_whiteout(dentry))
526 			err = ovl_create_upper(dentry, inode, attr,
527 						hardlink);
528 		else
529 			err = ovl_create_over_whiteout(dentry, inode, attr,
530 							hardlink);
531 	}
532 out_revert_creds:
533 	revert_creds(old_cred);
534 	if (!err) {
535 		struct inode *realinode = d_inode(ovl_dentry_upper(dentry));
536 
537 		WARN_ON(inode->i_mode != realinode->i_mode);
538 		WARN_ON(!uid_eq(inode->i_uid, realinode->i_uid));
539 		WARN_ON(!gid_eq(inode->i_gid, realinode->i_gid));
540 	}
541 	return err;
542 }
543 
544 static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
545 			     const char *link)
546 {
547 	int err;
548 	struct inode *inode;
549 	struct cattr attr = {
550 		.rdev = rdev,
551 		.link = link,
552 	};
553 
554 	err = ovl_want_write(dentry);
555 	if (err)
556 		goto out;
557 
558 	err = -ENOMEM;
559 	inode = ovl_new_inode(dentry->d_sb, mode, rdev);
560 	if (!inode)
561 		goto out_drop_write;
562 
563 	inode_init_owner(inode, dentry->d_parent->d_inode, mode);
564 	attr.mode = inode->i_mode;
565 
566 	err = ovl_create_or_link(dentry, inode, &attr, NULL, false);
567 	if (err)
568 		iput(inode);
569 
570 out_drop_write:
571 	ovl_drop_write(dentry);
572 out:
573 	return err;
574 }
575 
576 static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
577 		      bool excl)
578 {
579 	return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
580 }
581 
582 static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
583 {
584 	return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
585 }
586 
587 static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
588 		     dev_t rdev)
589 {
590 	/* Don't allow creation of "whiteout" on overlay */
591 	if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
592 		return -EPERM;
593 
594 	return ovl_create_object(dentry, mode, rdev, NULL);
595 }
596 
597 static int ovl_symlink(struct inode *dir, struct dentry *dentry,
598 		       const char *link)
599 {
600 	return ovl_create_object(dentry, S_IFLNK, 0, link);
601 }
602 
603 static int ovl_link(struct dentry *old, struct inode *newdir,
604 		    struct dentry *new)
605 {
606 	int err;
607 	bool locked = false;
608 	struct inode *inode;
609 
610 	err = ovl_want_write(old);
611 	if (err)
612 		goto out;
613 
614 	err = ovl_copy_up(old);
615 	if (err)
616 		goto out_drop_write;
617 
618 	err = ovl_nlink_start(old, &locked);
619 	if (err)
620 		goto out_drop_write;
621 
622 	inode = d_inode(old);
623 	ihold(inode);
624 
625 	err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old),
626 				 ovl_type_origin(old));
627 	if (err)
628 		iput(inode);
629 
630 	ovl_nlink_end(old, locked);
631 out_drop_write:
632 	ovl_drop_write(old);
633 out:
634 	return err;
635 }
636 
637 static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
638 {
639 	return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
640 }
641 
642 static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
643 {
644 	struct dentry *workdir = ovl_workdir(dentry);
645 	struct inode *wdir = workdir->d_inode;
646 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
647 	struct inode *udir = upperdir->d_inode;
648 	struct dentry *whiteout;
649 	struct dentry *upper;
650 	struct dentry *opaquedir = NULL;
651 	int err;
652 	int flags = 0;
653 
654 	if (WARN_ON(!workdir))
655 		return -EROFS;
656 
657 	if (is_dir) {
658 		opaquedir = ovl_check_empty_and_clear(dentry);
659 		err = PTR_ERR(opaquedir);
660 		if (IS_ERR(opaquedir))
661 			goto out;
662 	}
663 
664 	err = ovl_lock_rename_workdir(workdir, upperdir);
665 	if (err)
666 		goto out_dput;
667 
668 	upper = lookup_one_len(dentry->d_name.name, upperdir,
669 			       dentry->d_name.len);
670 	err = PTR_ERR(upper);
671 	if (IS_ERR(upper))
672 		goto out_unlock;
673 
674 	err = -ESTALE;
675 	if ((opaquedir && upper != opaquedir) ||
676 	    (!opaquedir && ovl_dentry_upper(dentry) &&
677 	     !ovl_matches_upper(dentry, upper))) {
678 		goto out_dput_upper;
679 	}
680 
681 	whiteout = ovl_whiteout(workdir, dentry);
682 	err = PTR_ERR(whiteout);
683 	if (IS_ERR(whiteout))
684 		goto out_dput_upper;
685 
686 	if (d_is_dir(upper))
687 		flags = RENAME_EXCHANGE;
688 
689 	err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
690 	if (err)
691 		goto kill_whiteout;
692 	if (flags)
693 		ovl_cleanup(wdir, upper);
694 
695 	ovl_dentry_version_inc(dentry->d_parent, true);
696 out_d_drop:
697 	d_drop(dentry);
698 	dput(whiteout);
699 out_dput_upper:
700 	dput(upper);
701 out_unlock:
702 	unlock_rename(workdir, upperdir);
703 out_dput:
704 	dput(opaquedir);
705 out:
706 	return err;
707 
708 kill_whiteout:
709 	ovl_cleanup(wdir, whiteout);
710 	goto out_d_drop;
711 }
712 
713 static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
714 {
715 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
716 	struct inode *dir = upperdir->d_inode;
717 	struct dentry *upper;
718 	struct dentry *opaquedir = NULL;
719 	int err;
720 
721 	/* Redirect dir can be !ovl_lower_positive && OVL_TYPE_MERGE */
722 	if (is_dir && ovl_dentry_get_redirect(dentry)) {
723 		opaquedir = ovl_check_empty_and_clear(dentry);
724 		err = PTR_ERR(opaquedir);
725 		if (IS_ERR(opaquedir))
726 			goto out;
727 	}
728 
729 	inode_lock_nested(dir, I_MUTEX_PARENT);
730 	upper = lookup_one_len(dentry->d_name.name, upperdir,
731 			       dentry->d_name.len);
732 	err = PTR_ERR(upper);
733 	if (IS_ERR(upper))
734 		goto out_unlock;
735 
736 	err = -ESTALE;
737 	if ((opaquedir && upper != opaquedir) ||
738 	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
739 		goto out_dput_upper;
740 
741 	if (is_dir)
742 		err = vfs_rmdir(dir, upper);
743 	else
744 		err = vfs_unlink(dir, upper, NULL);
745 	ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry));
746 
747 	/*
748 	 * Keeping this dentry hashed would mean having to release
749 	 * upperpath/lowerpath, which could only be done if we are the
750 	 * sole user of this dentry.  Too tricky...  Just unhash for
751 	 * now.
752 	 */
753 	if (!err)
754 		d_drop(dentry);
755 out_dput_upper:
756 	dput(upper);
757 out_unlock:
758 	inode_unlock(dir);
759 	dput(opaquedir);
760 out:
761 	return err;
762 }
763 
764 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
765 {
766 	int err;
767 	bool locked = false;
768 	const struct cred *old_cred;
769 
770 	err = ovl_want_write(dentry);
771 	if (err)
772 		goto out;
773 
774 	err = ovl_copy_up(dentry->d_parent);
775 	if (err)
776 		goto out_drop_write;
777 
778 	err = ovl_nlink_start(dentry, &locked);
779 	if (err)
780 		goto out_drop_write;
781 
782 	old_cred = ovl_override_creds(dentry->d_sb);
783 	if (!ovl_lower_positive(dentry))
784 		err = ovl_remove_upper(dentry, is_dir);
785 	else
786 		err = ovl_remove_and_whiteout(dentry, is_dir);
787 	revert_creds(old_cred);
788 	if (!err) {
789 		if (is_dir)
790 			clear_nlink(dentry->d_inode);
791 		else
792 			drop_nlink(dentry->d_inode);
793 	}
794 	ovl_nlink_end(dentry, locked);
795 out_drop_write:
796 	ovl_drop_write(dentry);
797 out:
798 	return err;
799 }
800 
801 static int ovl_unlink(struct inode *dir, struct dentry *dentry)
802 {
803 	return ovl_do_remove(dentry, false);
804 }
805 
806 static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
807 {
808 	return ovl_do_remove(dentry, true);
809 }
810 
811 static bool ovl_type_merge_or_lower(struct dentry *dentry)
812 {
813 	enum ovl_path_type type = ovl_path_type(dentry);
814 
815 	return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
816 }
817 
818 static bool ovl_can_move(struct dentry *dentry)
819 {
820 	return ovl_redirect_dir(dentry->d_sb) ||
821 		!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
822 }
823 
824 static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
825 {
826 	char *buf, *ret;
827 	struct dentry *d, *tmp;
828 	int buflen = ovl_redirect_max + 1;
829 
830 	if (samedir) {
831 		ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
832 			       GFP_KERNEL);
833 		goto out;
834 	}
835 
836 	buf = ret = kmalloc(buflen, GFP_KERNEL);
837 	if (!buf)
838 		goto out;
839 
840 	buflen--;
841 	buf[buflen] = '\0';
842 	for (d = dget(dentry); !IS_ROOT(d);) {
843 		const char *name;
844 		int thislen;
845 
846 		spin_lock(&d->d_lock);
847 		name = ovl_dentry_get_redirect(d);
848 		if (name) {
849 			thislen = strlen(name);
850 		} else {
851 			name = d->d_name.name;
852 			thislen = d->d_name.len;
853 		}
854 
855 		/* If path is too long, fall back to userspace move */
856 		if (thislen + (name[0] != '/') > buflen) {
857 			ret = ERR_PTR(-EXDEV);
858 			spin_unlock(&d->d_lock);
859 			goto out_put;
860 		}
861 
862 		buflen -= thislen;
863 		memcpy(&buf[buflen], name, thislen);
864 		tmp = dget_dlock(d->d_parent);
865 		spin_unlock(&d->d_lock);
866 
867 		dput(d);
868 		d = tmp;
869 
870 		/* Absolute redirect: finished */
871 		if (buf[buflen] == '/')
872 			break;
873 		buflen--;
874 		buf[buflen] = '/';
875 	}
876 	ret = kstrdup(&buf[buflen], GFP_KERNEL);
877 out_put:
878 	dput(d);
879 	kfree(buf);
880 out:
881 	return ret ? ret : ERR_PTR(-ENOMEM);
882 }
883 
884 static int ovl_set_redirect(struct dentry *dentry, bool samedir)
885 {
886 	int err;
887 	const char *redirect = ovl_dentry_get_redirect(dentry);
888 
889 	if (redirect && (samedir || redirect[0] == '/'))
890 		return 0;
891 
892 	redirect = ovl_get_redirect(dentry, samedir);
893 	if (IS_ERR(redirect))
894 		return PTR_ERR(redirect);
895 
896 	err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
897 				 OVL_XATTR_REDIRECT,
898 				 redirect, strlen(redirect), -EXDEV);
899 	if (!err) {
900 		spin_lock(&dentry->d_lock);
901 		ovl_dentry_set_redirect(dentry, redirect);
902 		spin_unlock(&dentry->d_lock);
903 	} else {
904 		kfree(redirect);
905 		pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
906 		/* Fall back to userspace copy-up */
907 		err = -EXDEV;
908 	}
909 	return err;
910 }
911 
912 static int ovl_rename(struct inode *olddir, struct dentry *old,
913 		      struct inode *newdir, struct dentry *new,
914 		      unsigned int flags)
915 {
916 	int err;
917 	bool locked = false;
918 	struct dentry *old_upperdir;
919 	struct dentry *new_upperdir;
920 	struct dentry *olddentry;
921 	struct dentry *newdentry;
922 	struct dentry *trap;
923 	bool old_opaque;
924 	bool new_opaque;
925 	bool cleanup_whiteout = false;
926 	bool overwrite = !(flags & RENAME_EXCHANGE);
927 	bool is_dir = d_is_dir(old);
928 	bool new_is_dir = d_is_dir(new);
929 	bool samedir = olddir == newdir;
930 	struct dentry *opaquedir = NULL;
931 	const struct cred *old_cred = NULL;
932 
933 	err = -EINVAL;
934 	if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
935 		goto out;
936 
937 	flags &= ~RENAME_NOREPLACE;
938 
939 	/* Don't copy up directory trees */
940 	err = -EXDEV;
941 	if (!ovl_can_move(old))
942 		goto out;
943 	if (!overwrite && !ovl_can_move(new))
944 		goto out;
945 
946 	err = ovl_want_write(old);
947 	if (err)
948 		goto out;
949 
950 	err = ovl_copy_up(old);
951 	if (err)
952 		goto out_drop_write;
953 
954 	err = ovl_copy_up(new->d_parent);
955 	if (err)
956 		goto out_drop_write;
957 	if (!overwrite) {
958 		err = ovl_copy_up(new);
959 		if (err)
960 			goto out_drop_write;
961 	} else {
962 		err = ovl_nlink_start(new, &locked);
963 		if (err)
964 			goto out_drop_write;
965 	}
966 
967 	old_cred = ovl_override_creds(old->d_sb);
968 
969 	if (overwrite && new_is_dir && ovl_type_merge_or_lower(new)) {
970 		opaquedir = ovl_check_empty_and_clear(new);
971 		err = PTR_ERR(opaquedir);
972 		if (IS_ERR(opaquedir)) {
973 			opaquedir = NULL;
974 			goto out_revert_creds;
975 		}
976 	}
977 
978 	if (overwrite) {
979 		if (ovl_lower_positive(old)) {
980 			if (!ovl_dentry_is_whiteout(new)) {
981 				/* Whiteout source */
982 				flags |= RENAME_WHITEOUT;
983 			} else {
984 				/* Switch whiteouts */
985 				flags |= RENAME_EXCHANGE;
986 			}
987 		} else if (is_dir && ovl_dentry_is_whiteout(new)) {
988 			flags |= RENAME_EXCHANGE;
989 			cleanup_whiteout = true;
990 		}
991 	}
992 
993 	old_upperdir = ovl_dentry_upper(old->d_parent);
994 	new_upperdir = ovl_dentry_upper(new->d_parent);
995 
996 	if (!samedir) {
997 		/*
998 		 * When moving a merge dir or non-dir with copy up origin into
999 		 * a new parent, we are marking the new parent dir "impure".
1000 		 * When ovl_iterate() iterates an "impure" upper dir, it will
1001 		 * lookup the origin inodes of the entries to fill d_ino.
1002 		 */
1003 		if (ovl_type_origin(old)) {
1004 			err = ovl_set_impure(new->d_parent, new_upperdir);
1005 			if (err)
1006 				goto out_revert_creds;
1007 		}
1008 		if (!overwrite && ovl_type_origin(new)) {
1009 			err = ovl_set_impure(old->d_parent, old_upperdir);
1010 			if (err)
1011 				goto out_revert_creds;
1012 		}
1013 	}
1014 
1015 	trap = lock_rename(new_upperdir, old_upperdir);
1016 
1017 	olddentry = lookup_one_len(old->d_name.name, old_upperdir,
1018 				   old->d_name.len);
1019 	err = PTR_ERR(olddentry);
1020 	if (IS_ERR(olddentry))
1021 		goto out_unlock;
1022 
1023 	err = -ESTALE;
1024 	if (!ovl_matches_upper(old, olddentry))
1025 		goto out_dput_old;
1026 
1027 	newdentry = lookup_one_len(new->d_name.name, new_upperdir,
1028 				   new->d_name.len);
1029 	err = PTR_ERR(newdentry);
1030 	if (IS_ERR(newdentry))
1031 		goto out_dput_old;
1032 
1033 	old_opaque = ovl_dentry_is_opaque(old);
1034 	new_opaque = ovl_dentry_is_opaque(new);
1035 
1036 	err = -ESTALE;
1037 	if (d_inode(new) && ovl_dentry_upper(new)) {
1038 		if (opaquedir) {
1039 			if (newdentry != opaquedir)
1040 				goto out_dput;
1041 		} else {
1042 			if (!ovl_matches_upper(new, newdentry))
1043 				goto out_dput;
1044 		}
1045 	} else {
1046 		if (!d_is_negative(newdentry) &&
1047 		    (!new_opaque || !ovl_is_whiteout(newdentry)))
1048 			goto out_dput;
1049 	}
1050 
1051 	if (olddentry == trap)
1052 		goto out_dput;
1053 	if (newdentry == trap)
1054 		goto out_dput;
1055 
1056 	if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
1057 		goto out_dput;
1058 
1059 	err = 0;
1060 	if (is_dir) {
1061 		if (ovl_type_merge_or_lower(old))
1062 			err = ovl_set_redirect(old, samedir);
1063 		else if (!old_opaque && ovl_type_merge(new->d_parent))
1064 			err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
1065 		if (err)
1066 			goto out_dput;
1067 	}
1068 	if (!overwrite && new_is_dir) {
1069 		if (ovl_type_merge_or_lower(new))
1070 			err = ovl_set_redirect(new, samedir);
1071 		else if (!new_opaque && ovl_type_merge(old->d_parent))
1072 			err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
1073 		if (err)
1074 			goto out_dput;
1075 	}
1076 
1077 	err = ovl_do_rename(old_upperdir->d_inode, olddentry,
1078 			    new_upperdir->d_inode, newdentry, flags);
1079 	if (err)
1080 		goto out_dput;
1081 
1082 	if (cleanup_whiteout)
1083 		ovl_cleanup(old_upperdir->d_inode, newdentry);
1084 
1085 	if (overwrite && d_inode(new)) {
1086 		if (new_is_dir)
1087 			clear_nlink(d_inode(new));
1088 		else
1089 			drop_nlink(d_inode(new));
1090 	}
1091 
1092 	ovl_dentry_version_inc(old->d_parent,
1093 			       !overwrite && ovl_type_origin(new));
1094 	ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old));
1095 
1096 out_dput:
1097 	dput(newdentry);
1098 out_dput_old:
1099 	dput(olddentry);
1100 out_unlock:
1101 	unlock_rename(new_upperdir, old_upperdir);
1102 out_revert_creds:
1103 	revert_creds(old_cred);
1104 	ovl_nlink_end(new, locked);
1105 out_drop_write:
1106 	ovl_drop_write(old);
1107 out:
1108 	dput(opaquedir);
1109 	return err;
1110 }
1111 
1112 const struct inode_operations ovl_dir_inode_operations = {
1113 	.lookup		= ovl_lookup,
1114 	.mkdir		= ovl_mkdir,
1115 	.symlink	= ovl_symlink,
1116 	.unlink		= ovl_unlink,
1117 	.rmdir		= ovl_rmdir,
1118 	.rename		= ovl_rename,
1119 	.link		= ovl_link,
1120 	.setattr	= ovl_setattr,
1121 	.create		= ovl_create,
1122 	.mknod		= ovl_mknod,
1123 	.permission	= ovl_permission,
1124 	.getattr	= ovl_getattr,
1125 	.listxattr	= ovl_listxattr,
1126 	.get_acl	= ovl_get_acl,
1127 	.update_time	= ovl_update_time,
1128 };
1129