xref: /openbmc/linux/fs/overlayfs/dir.c (revision 8730046c)
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/namei.h>
12 #include <linux/xattr.h>
13 #include <linux/security.h>
14 #include <linux/cred.h>
15 #include <linux/module.h>
16 #include <linux/posix_acl.h>
17 #include <linux/posix_acl_xattr.h>
18 #include <linux/atomic.h>
19 #include <linux/ratelimit.h>
20 #include "overlayfs.h"
21 
22 static unsigned short ovl_redirect_max = 256;
23 module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
24 MODULE_PARM_DESC(ovl_redirect_max,
25 		 "Maximum length of absolute redirect xattr value");
26 
27 void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
28 {
29 	int err;
30 
31 	dget(wdentry);
32 	if (d_is_dir(wdentry))
33 		err = ovl_do_rmdir(wdir, wdentry);
34 	else
35 		err = ovl_do_unlink(wdir, wdentry);
36 	dput(wdentry);
37 
38 	if (err) {
39 		pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
40 		       wdentry, err);
41 	}
42 }
43 
44 struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
45 {
46 	struct dentry *temp;
47 	char name[20];
48 	static atomic_t temp_id = ATOMIC_INIT(0);
49 
50 	/* counter is allowed to wrap, since temp dentries are ephemeral */
51 	snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
52 
53 	temp = lookup_one_len(name, workdir, strlen(name));
54 	if (!IS_ERR(temp) && temp->d_inode) {
55 		pr_err("overlayfs: workdir/%s already exists\n", name);
56 		dput(temp);
57 		temp = ERR_PTR(-EIO);
58 	}
59 
60 	return temp;
61 }
62 
63 /* caller holds i_mutex on workdir */
64 static struct dentry *ovl_whiteout(struct dentry *workdir,
65 				   struct dentry *dentry)
66 {
67 	int err;
68 	struct dentry *whiteout;
69 	struct inode *wdir = workdir->d_inode;
70 
71 	whiteout = ovl_lookup_temp(workdir, dentry);
72 	if (IS_ERR(whiteout))
73 		return whiteout;
74 
75 	err = ovl_do_whiteout(wdir, whiteout);
76 	if (err) {
77 		dput(whiteout);
78 		whiteout = ERR_PTR(err);
79 	}
80 
81 	return whiteout;
82 }
83 
84 int ovl_create_real(struct inode *dir, struct dentry *newdentry,
85 		    struct cattr *attr, struct dentry *hardlink, bool debug)
86 {
87 	int err;
88 
89 	if (newdentry->d_inode)
90 		return -ESTALE;
91 
92 	if (hardlink) {
93 		err = ovl_do_link(hardlink, dir, newdentry, debug);
94 	} else {
95 		switch (attr->mode & S_IFMT) {
96 		case S_IFREG:
97 			err = ovl_do_create(dir, newdentry, attr->mode, debug);
98 			break;
99 
100 		case S_IFDIR:
101 			err = ovl_do_mkdir(dir, newdentry, attr->mode, debug);
102 			break;
103 
104 		case S_IFCHR:
105 		case S_IFBLK:
106 		case S_IFIFO:
107 		case S_IFSOCK:
108 			err = ovl_do_mknod(dir, newdentry,
109 					   attr->mode, attr->rdev, debug);
110 			break;
111 
112 		case S_IFLNK:
113 			err = ovl_do_symlink(dir, newdentry, attr->link, debug);
114 			break;
115 
116 		default:
117 			err = -EPERM;
118 		}
119 	}
120 	if (!err && WARN_ON(!newdentry->d_inode)) {
121 		/*
122 		 * Not quite sure if non-instantiated dentry is legal or not.
123 		 * VFS doesn't seem to care so check and warn here.
124 		 */
125 		err = -ENOENT;
126 	}
127 	return err;
128 }
129 
130 static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
131 {
132 	int err;
133 
134 	err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
135 	if (!err)
136 		ovl_dentry_set_opaque(dentry);
137 
138 	return err;
139 }
140 
141 static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
142 			 struct kstat *stat)
143 {
144 	int err;
145 	enum ovl_path_type type;
146 	struct path realpath;
147 	const struct cred *old_cred;
148 
149 	type = ovl_path_real(dentry, &realpath);
150 	old_cred = ovl_override_creds(dentry->d_sb);
151 	err = vfs_getattr(&realpath, stat);
152 	revert_creds(old_cred);
153 	if (err)
154 		return err;
155 
156 	stat->dev = dentry->d_sb->s_dev;
157 	stat->ino = dentry->d_inode->i_ino;
158 
159 	/*
160 	 * It's probably not worth it to count subdirs to get the
161 	 * correct link count.  nlink=1 seems to pacify 'find' and
162 	 * other utilities.
163 	 */
164 	if (OVL_TYPE_MERGE(type))
165 		stat->nlink = 1;
166 
167 	return 0;
168 }
169 
170 /* Common operations required to be done after creation of file on upper */
171 static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
172 			    struct dentry *newdentry, bool hardlink)
173 {
174 	ovl_dentry_version_inc(dentry->d_parent);
175 	ovl_dentry_update(dentry, newdentry);
176 	if (!hardlink) {
177 		ovl_inode_update(inode, d_inode(newdentry));
178 		ovl_copyattr(newdentry->d_inode, inode);
179 	} else {
180 		WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry));
181 		inc_nlink(inode);
182 	}
183 	d_instantiate(dentry, inode);
184 }
185 
186 static bool ovl_type_merge(struct dentry *dentry)
187 {
188 	return OVL_TYPE_MERGE(ovl_path_type(dentry));
189 }
190 
191 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
192 			    struct cattr *attr, struct dentry *hardlink)
193 {
194 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
195 	struct inode *udir = upperdir->d_inode;
196 	struct dentry *newdentry;
197 	int err;
198 
199 	if (!hardlink && !IS_POSIXACL(udir))
200 		attr->mode &= ~current_umask();
201 
202 	inode_lock_nested(udir, I_MUTEX_PARENT);
203 	newdentry = lookup_one_len(dentry->d_name.name, upperdir,
204 				   dentry->d_name.len);
205 	err = PTR_ERR(newdentry);
206 	if (IS_ERR(newdentry))
207 		goto out_unlock;
208 	err = ovl_create_real(udir, newdentry, attr, hardlink, false);
209 	if (err)
210 		goto out_dput;
211 
212 	if (ovl_type_merge(dentry->d_parent)) {
213 		/* Setting opaque here is just an optimization, allow to fail */
214 		ovl_set_opaque(dentry, newdentry);
215 	}
216 
217 	ovl_instantiate(dentry, inode, newdentry, !!hardlink);
218 	newdentry = NULL;
219 out_dput:
220 	dput(newdentry);
221 out_unlock:
222 	inode_unlock(udir);
223 	return err;
224 }
225 
226 static int ovl_lock_rename_workdir(struct dentry *workdir,
227 				   struct dentry *upperdir)
228 {
229 	/* Workdir should not be the same as upperdir */
230 	if (workdir == upperdir)
231 		goto err;
232 
233 	/* Workdir should not be subdir of upperdir and vice versa */
234 	if (lock_rename(workdir, upperdir) != NULL)
235 		goto err_unlock;
236 
237 	return 0;
238 
239 err_unlock:
240 	unlock_rename(workdir, upperdir);
241 err:
242 	pr_err("overlayfs: failed to lock workdir+upperdir\n");
243 	return -EIO;
244 }
245 
246 static struct dentry *ovl_clear_empty(struct dentry *dentry,
247 				      struct list_head *list)
248 {
249 	struct dentry *workdir = ovl_workdir(dentry);
250 	struct inode *wdir = workdir->d_inode;
251 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
252 	struct inode *udir = upperdir->d_inode;
253 	struct path upperpath;
254 	struct dentry *upper;
255 	struct dentry *opaquedir;
256 	struct kstat stat;
257 	int err;
258 
259 	if (WARN_ON(!workdir))
260 		return ERR_PTR(-EROFS);
261 
262 	err = ovl_lock_rename_workdir(workdir, upperdir);
263 	if (err)
264 		goto out;
265 
266 	ovl_path_upper(dentry, &upperpath);
267 	err = vfs_getattr(&upperpath, &stat);
268 	if (err)
269 		goto out_unlock;
270 
271 	err = -ESTALE;
272 	if (!S_ISDIR(stat.mode))
273 		goto out_unlock;
274 	upper = upperpath.dentry;
275 	if (upper->d_parent->d_inode != udir)
276 		goto out_unlock;
277 
278 	opaquedir = ovl_lookup_temp(workdir, dentry);
279 	err = PTR_ERR(opaquedir);
280 	if (IS_ERR(opaquedir))
281 		goto out_unlock;
282 
283 	err = ovl_create_real(wdir, opaquedir,
284 			      &(struct cattr){.mode = stat.mode}, NULL, true);
285 	if (err)
286 		goto out_dput;
287 
288 	err = ovl_copy_xattr(upper, opaquedir);
289 	if (err)
290 		goto out_cleanup;
291 
292 	err = ovl_set_opaque(dentry, opaquedir);
293 	if (err)
294 		goto out_cleanup;
295 
296 	inode_lock(opaquedir->d_inode);
297 	err = ovl_set_attr(opaquedir, &stat);
298 	inode_unlock(opaquedir->d_inode);
299 	if (err)
300 		goto out_cleanup;
301 
302 	err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
303 	if (err)
304 		goto out_cleanup;
305 
306 	ovl_cleanup_whiteouts(upper, list);
307 	ovl_cleanup(wdir, upper);
308 	unlock_rename(workdir, upperdir);
309 
310 	/* dentry's upper doesn't match now, get rid of it */
311 	d_drop(dentry);
312 
313 	return opaquedir;
314 
315 out_cleanup:
316 	ovl_cleanup(wdir, opaquedir);
317 out_dput:
318 	dput(opaquedir);
319 out_unlock:
320 	unlock_rename(workdir, upperdir);
321 out:
322 	return ERR_PTR(err);
323 }
324 
325 static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
326 {
327 	int err;
328 	struct dentry *ret = NULL;
329 	enum ovl_path_type type = ovl_path_type(dentry);
330 	LIST_HEAD(list);
331 
332 	err = ovl_check_empty_dir(dentry, &list);
333 	if (err) {
334 		ret = ERR_PTR(err);
335 		goto out_free;
336 	}
337 
338 	/*
339 	 * When removing an empty opaque directory, then it makes no sense to
340 	 * replace it with an exact replica of itself.
341 	 *
342 	 * If no upperdentry then skip clearing whiteouts.
343 	 *
344 	 * Can race with copy-up, since we don't hold the upperdir mutex.
345 	 * Doesn't matter, since copy-up can't create a non-empty directory
346 	 * from an empty one.
347 	 */
348 	if (OVL_TYPE_UPPER(type) && OVL_TYPE_MERGE(type))
349 		ret = ovl_clear_empty(dentry, &list);
350 
351 out_free:
352 	ovl_cache_free(&list);
353 
354 	return ret;
355 }
356 
357 static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
358 			     const struct posix_acl *acl)
359 {
360 	void *buffer;
361 	size_t size;
362 	int err;
363 
364 	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
365 		return 0;
366 
367 	size = posix_acl_to_xattr(NULL, acl, NULL, 0);
368 	buffer = kmalloc(size, GFP_KERNEL);
369 	if (!buffer)
370 		return -ENOMEM;
371 
372 	size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
373 	err = size;
374 	if (err < 0)
375 		goto out_free;
376 
377 	err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
378 out_free:
379 	kfree(buffer);
380 	return err;
381 }
382 
383 static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
384 				    struct cattr *cattr,
385 				    struct dentry *hardlink)
386 {
387 	struct dentry *workdir = ovl_workdir(dentry);
388 	struct inode *wdir = workdir->d_inode;
389 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
390 	struct inode *udir = upperdir->d_inode;
391 	struct dentry *upper;
392 	struct dentry *newdentry;
393 	int err;
394 	struct posix_acl *acl, *default_acl;
395 
396 	if (WARN_ON(!workdir))
397 		return -EROFS;
398 
399 	if (!hardlink) {
400 		err = posix_acl_create(dentry->d_parent->d_inode,
401 				       &cattr->mode, &default_acl, &acl);
402 		if (err)
403 			return err;
404 	}
405 
406 	err = ovl_lock_rename_workdir(workdir, upperdir);
407 	if (err)
408 		goto out;
409 
410 	newdentry = ovl_lookup_temp(workdir, dentry);
411 	err = PTR_ERR(newdentry);
412 	if (IS_ERR(newdentry))
413 		goto out_unlock;
414 
415 	upper = lookup_one_len(dentry->d_name.name, upperdir,
416 			       dentry->d_name.len);
417 	err = PTR_ERR(upper);
418 	if (IS_ERR(upper))
419 		goto out_dput;
420 
421 	err = ovl_create_real(wdir, newdentry, cattr, hardlink, true);
422 	if (err)
423 		goto out_dput2;
424 
425 	/*
426 	 * mode could have been mutilated due to umask (e.g. sgid directory)
427 	 */
428 	if (!hardlink &&
429 	    !S_ISLNK(cattr->mode) &&
430 	    newdentry->d_inode->i_mode != cattr->mode) {
431 		struct iattr attr = {
432 			.ia_valid = ATTR_MODE,
433 			.ia_mode = cattr->mode,
434 		};
435 		inode_lock(newdentry->d_inode);
436 		err = notify_change(newdentry, &attr, NULL);
437 		inode_unlock(newdentry->d_inode);
438 		if (err)
439 			goto out_cleanup;
440 	}
441 	if (!hardlink) {
442 		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
443 					acl);
444 		if (err)
445 			goto out_cleanup;
446 
447 		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
448 					default_acl);
449 		if (err)
450 			goto out_cleanup;
451 	}
452 
453 	if (!hardlink && S_ISDIR(cattr->mode)) {
454 		err = ovl_set_opaque(dentry, newdentry);
455 		if (err)
456 			goto out_cleanup;
457 
458 		err = ovl_do_rename(wdir, newdentry, udir, upper,
459 				    RENAME_EXCHANGE);
460 		if (err)
461 			goto out_cleanup;
462 
463 		ovl_cleanup(wdir, upper);
464 	} else {
465 		err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
466 		if (err)
467 			goto out_cleanup;
468 	}
469 	ovl_instantiate(dentry, inode, newdentry, !!hardlink);
470 	newdentry = NULL;
471 out_dput2:
472 	dput(upper);
473 out_dput:
474 	dput(newdentry);
475 out_unlock:
476 	unlock_rename(workdir, upperdir);
477 out:
478 	if (!hardlink) {
479 		posix_acl_release(acl);
480 		posix_acl_release(default_acl);
481 	}
482 	return err;
483 
484 out_cleanup:
485 	ovl_cleanup(wdir, newdentry);
486 	goto out_dput2;
487 }
488 
489 static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
490 			      struct cattr *attr, struct dentry *hardlink)
491 {
492 	int err;
493 	const struct cred *old_cred;
494 	struct cred *override_cred;
495 
496 	err = ovl_copy_up(dentry->d_parent);
497 	if (err)
498 		return err;
499 
500 	old_cred = ovl_override_creds(dentry->d_sb);
501 	err = -ENOMEM;
502 	override_cred = prepare_creds();
503 	if (override_cred) {
504 		override_cred->fsuid = inode->i_uid;
505 		override_cred->fsgid = inode->i_gid;
506 		if (!hardlink) {
507 			err = security_dentry_create_files_as(dentry,
508 					attr->mode, &dentry->d_name, old_cred,
509 					override_cred);
510 			if (err) {
511 				put_cred(override_cred);
512 				goto out_revert_creds;
513 			}
514 		}
515 		put_cred(override_creds(override_cred));
516 		put_cred(override_cred);
517 
518 		if (!ovl_dentry_is_whiteout(dentry))
519 			err = ovl_create_upper(dentry, inode, attr,
520 						hardlink);
521 		else
522 			err = ovl_create_over_whiteout(dentry, inode, attr,
523 							hardlink);
524 	}
525 out_revert_creds:
526 	revert_creds(old_cred);
527 	if (!err) {
528 		struct inode *realinode = d_inode(ovl_dentry_upper(dentry));
529 
530 		WARN_ON(inode->i_mode != realinode->i_mode);
531 		WARN_ON(!uid_eq(inode->i_uid, realinode->i_uid));
532 		WARN_ON(!gid_eq(inode->i_gid, realinode->i_gid));
533 	}
534 	return err;
535 }
536 
537 static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
538 			     const char *link)
539 {
540 	int err;
541 	struct inode *inode;
542 	struct cattr attr = {
543 		.rdev = rdev,
544 		.link = link,
545 	};
546 
547 	err = ovl_want_write(dentry);
548 	if (err)
549 		goto out;
550 
551 	err = -ENOMEM;
552 	inode = ovl_new_inode(dentry->d_sb, mode, rdev);
553 	if (!inode)
554 		goto out_drop_write;
555 
556 	inode_init_owner(inode, dentry->d_parent->d_inode, mode);
557 	attr.mode = inode->i_mode;
558 
559 	err = ovl_create_or_link(dentry, inode, &attr, NULL);
560 	if (err)
561 		iput(inode);
562 
563 out_drop_write:
564 	ovl_drop_write(dentry);
565 out:
566 	return err;
567 }
568 
569 static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
570 		      bool excl)
571 {
572 	return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
573 }
574 
575 static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
576 {
577 	return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
578 }
579 
580 static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
581 		     dev_t rdev)
582 {
583 	/* Don't allow creation of "whiteout" on overlay */
584 	if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
585 		return -EPERM;
586 
587 	return ovl_create_object(dentry, mode, rdev, NULL);
588 }
589 
590 static int ovl_symlink(struct inode *dir, struct dentry *dentry,
591 		       const char *link)
592 {
593 	return ovl_create_object(dentry, S_IFLNK, 0, link);
594 }
595 
596 static int ovl_link(struct dentry *old, struct inode *newdir,
597 		    struct dentry *new)
598 {
599 	int err;
600 	struct inode *inode;
601 
602 	err = ovl_want_write(old);
603 	if (err)
604 		goto out;
605 
606 	err = ovl_copy_up(old);
607 	if (err)
608 		goto out_drop_write;
609 
610 	inode = d_inode(old);
611 	ihold(inode);
612 
613 	err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old));
614 	if (err)
615 		iput(inode);
616 
617 out_drop_write:
618 	ovl_drop_write(old);
619 out:
620 	return err;
621 }
622 
623 static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
624 {
625 	struct dentry *workdir = ovl_workdir(dentry);
626 	struct inode *wdir = workdir->d_inode;
627 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
628 	struct inode *udir = upperdir->d_inode;
629 	struct dentry *whiteout;
630 	struct dentry *upper;
631 	struct dentry *opaquedir = NULL;
632 	int err;
633 	int flags = 0;
634 
635 	if (WARN_ON(!workdir))
636 		return -EROFS;
637 
638 	if (is_dir) {
639 		opaquedir = ovl_check_empty_and_clear(dentry);
640 		err = PTR_ERR(opaquedir);
641 		if (IS_ERR(opaquedir))
642 			goto out;
643 	}
644 
645 	err = ovl_lock_rename_workdir(workdir, upperdir);
646 	if (err)
647 		goto out_dput;
648 
649 	upper = lookup_one_len(dentry->d_name.name, upperdir,
650 			       dentry->d_name.len);
651 	err = PTR_ERR(upper);
652 	if (IS_ERR(upper))
653 		goto out_unlock;
654 
655 	err = -ESTALE;
656 	if ((opaquedir && upper != opaquedir) ||
657 	    (!opaquedir && ovl_dentry_upper(dentry) &&
658 	     upper != ovl_dentry_upper(dentry))) {
659 		goto out_dput_upper;
660 	}
661 
662 	whiteout = ovl_whiteout(workdir, dentry);
663 	err = PTR_ERR(whiteout);
664 	if (IS_ERR(whiteout))
665 		goto out_dput_upper;
666 
667 	if (d_is_dir(upper))
668 		flags = RENAME_EXCHANGE;
669 
670 	err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
671 	if (err)
672 		goto kill_whiteout;
673 	if (flags)
674 		ovl_cleanup(wdir, upper);
675 
676 	ovl_dentry_version_inc(dentry->d_parent);
677 out_d_drop:
678 	d_drop(dentry);
679 	dput(whiteout);
680 out_dput_upper:
681 	dput(upper);
682 out_unlock:
683 	unlock_rename(workdir, upperdir);
684 out_dput:
685 	dput(opaquedir);
686 out:
687 	return err;
688 
689 kill_whiteout:
690 	ovl_cleanup(wdir, whiteout);
691 	goto out_d_drop;
692 }
693 
694 static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
695 {
696 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
697 	struct inode *dir = upperdir->d_inode;
698 	struct dentry *upper;
699 	struct dentry *opaquedir = NULL;
700 	int err;
701 
702 	/* Redirect dir can be !ovl_lower_positive && OVL_TYPE_MERGE */
703 	if (is_dir && ovl_dentry_get_redirect(dentry)) {
704 		opaquedir = ovl_check_empty_and_clear(dentry);
705 		err = PTR_ERR(opaquedir);
706 		if (IS_ERR(opaquedir))
707 			goto out;
708 	}
709 
710 	inode_lock_nested(dir, I_MUTEX_PARENT);
711 	upper = lookup_one_len(dentry->d_name.name, upperdir,
712 			       dentry->d_name.len);
713 	err = PTR_ERR(upper);
714 	if (IS_ERR(upper))
715 		goto out_unlock;
716 
717 	err = -ESTALE;
718 	if ((opaquedir && upper != opaquedir) ||
719 	    (!opaquedir && upper != ovl_dentry_upper(dentry)))
720 		goto out_dput_upper;
721 
722 	if (is_dir)
723 		err = vfs_rmdir(dir, upper);
724 	else
725 		err = vfs_unlink(dir, upper, NULL);
726 	ovl_dentry_version_inc(dentry->d_parent);
727 
728 	/*
729 	 * Keeping this dentry hashed would mean having to release
730 	 * upperpath/lowerpath, which could only be done if we are the
731 	 * sole user of this dentry.  Too tricky...  Just unhash for
732 	 * now.
733 	 */
734 	if (!err)
735 		d_drop(dentry);
736 out_dput_upper:
737 	dput(upper);
738 out_unlock:
739 	inode_unlock(dir);
740 	dput(opaquedir);
741 out:
742 	return err;
743 }
744 
745 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
746 {
747 	enum ovl_path_type type;
748 	int err;
749 	const struct cred *old_cred;
750 
751 	err = ovl_want_write(dentry);
752 	if (err)
753 		goto out;
754 
755 	err = ovl_copy_up(dentry->d_parent);
756 	if (err)
757 		goto out_drop_write;
758 
759 	type = ovl_path_type(dentry);
760 
761 	old_cred = ovl_override_creds(dentry->d_sb);
762 	if (!ovl_lower_positive(dentry))
763 		err = ovl_remove_upper(dentry, is_dir);
764 	else
765 		err = ovl_remove_and_whiteout(dentry, is_dir);
766 	revert_creds(old_cred);
767 	if (!err) {
768 		if (is_dir)
769 			clear_nlink(dentry->d_inode);
770 		else
771 			drop_nlink(dentry->d_inode);
772 	}
773 out_drop_write:
774 	ovl_drop_write(dentry);
775 out:
776 	return err;
777 }
778 
779 static int ovl_unlink(struct inode *dir, struct dentry *dentry)
780 {
781 	return ovl_do_remove(dentry, false);
782 }
783 
784 static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
785 {
786 	return ovl_do_remove(dentry, true);
787 }
788 
789 static bool ovl_type_merge_or_lower(struct dentry *dentry)
790 {
791 	enum ovl_path_type type = ovl_path_type(dentry);
792 
793 	return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
794 }
795 
796 static bool ovl_can_move(struct dentry *dentry)
797 {
798 	return ovl_redirect_dir(dentry->d_sb) ||
799 		!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
800 }
801 
802 static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
803 {
804 	char *buf, *ret;
805 	struct dentry *d, *tmp;
806 	int buflen = ovl_redirect_max + 1;
807 
808 	if (samedir) {
809 		ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
810 			       GFP_KERNEL);
811 		goto out;
812 	}
813 
814 	buf = ret = kmalloc(buflen, GFP_TEMPORARY);
815 	if (!buf)
816 		goto out;
817 
818 	buflen--;
819 	buf[buflen] = '\0';
820 	for (d = dget(dentry); !IS_ROOT(d);) {
821 		const char *name;
822 		int thislen;
823 
824 		spin_lock(&d->d_lock);
825 		name = ovl_dentry_get_redirect(d);
826 		if (name) {
827 			thislen = strlen(name);
828 		} else {
829 			name = d->d_name.name;
830 			thislen = d->d_name.len;
831 		}
832 
833 		/* If path is too long, fall back to userspace move */
834 		if (thislen + (name[0] != '/') > buflen) {
835 			ret = ERR_PTR(-EXDEV);
836 			spin_unlock(&d->d_lock);
837 			goto out_put;
838 		}
839 
840 		buflen -= thislen;
841 		memcpy(&buf[buflen], name, thislen);
842 		tmp = dget_dlock(d->d_parent);
843 		spin_unlock(&d->d_lock);
844 
845 		dput(d);
846 		d = tmp;
847 
848 		/* Absolute redirect: finished */
849 		if (buf[buflen] == '/')
850 			break;
851 		buflen--;
852 		buf[buflen] = '/';
853 	}
854 	ret = kstrdup(&buf[buflen], GFP_KERNEL);
855 out_put:
856 	dput(d);
857 	kfree(buf);
858 out:
859 	return ret ? ret : ERR_PTR(-ENOMEM);
860 }
861 
862 static int ovl_set_redirect(struct dentry *dentry, bool samedir)
863 {
864 	int err;
865 	const char *redirect = ovl_dentry_get_redirect(dentry);
866 
867 	if (redirect && (samedir || redirect[0] == '/'))
868 		return 0;
869 
870 	redirect = ovl_get_redirect(dentry, samedir);
871 	if (IS_ERR(redirect))
872 		return PTR_ERR(redirect);
873 
874 	err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT,
875 			      redirect, strlen(redirect), 0);
876 	if (!err) {
877 		spin_lock(&dentry->d_lock);
878 		ovl_dentry_set_redirect(dentry, redirect);
879 		spin_unlock(&dentry->d_lock);
880 	} else {
881 		kfree(redirect);
882 		if (err == -EOPNOTSUPP)
883 			ovl_clear_redirect_dir(dentry->d_sb);
884 		else
885 			pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
886 		/* Fall back to userspace copy-up */
887 		err = -EXDEV;
888 	}
889 	return err;
890 }
891 
892 static int ovl_rename(struct inode *olddir, struct dentry *old,
893 		      struct inode *newdir, struct dentry *new,
894 		      unsigned int flags)
895 {
896 	int err;
897 	struct dentry *old_upperdir;
898 	struct dentry *new_upperdir;
899 	struct dentry *olddentry;
900 	struct dentry *newdentry;
901 	struct dentry *trap;
902 	bool old_opaque;
903 	bool new_opaque;
904 	bool cleanup_whiteout = false;
905 	bool overwrite = !(flags & RENAME_EXCHANGE);
906 	bool is_dir = d_is_dir(old);
907 	bool new_is_dir = d_is_dir(new);
908 	bool samedir = olddir == newdir;
909 	struct dentry *opaquedir = NULL;
910 	const struct cred *old_cred = NULL;
911 
912 	err = -EINVAL;
913 	if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
914 		goto out;
915 
916 	flags &= ~RENAME_NOREPLACE;
917 
918 	/* Don't copy up directory trees */
919 	err = -EXDEV;
920 	if (!ovl_can_move(old))
921 		goto out;
922 	if (!overwrite && !ovl_can_move(new))
923 		goto out;
924 
925 	err = ovl_want_write(old);
926 	if (err)
927 		goto out;
928 
929 	err = ovl_copy_up(old);
930 	if (err)
931 		goto out_drop_write;
932 
933 	err = ovl_copy_up(new->d_parent);
934 	if (err)
935 		goto out_drop_write;
936 	if (!overwrite) {
937 		err = ovl_copy_up(new);
938 		if (err)
939 			goto out_drop_write;
940 	}
941 
942 	old_cred = ovl_override_creds(old->d_sb);
943 
944 	if (overwrite && new_is_dir && ovl_type_merge_or_lower(new)) {
945 		opaquedir = ovl_check_empty_and_clear(new);
946 		err = PTR_ERR(opaquedir);
947 		if (IS_ERR(opaquedir)) {
948 			opaquedir = NULL;
949 			goto out_revert_creds;
950 		}
951 	}
952 
953 	if (overwrite) {
954 		if (ovl_lower_positive(old)) {
955 			if (!ovl_dentry_is_whiteout(new)) {
956 				/* Whiteout source */
957 				flags |= RENAME_WHITEOUT;
958 			} else {
959 				/* Switch whiteouts */
960 				flags |= RENAME_EXCHANGE;
961 			}
962 		} else if (is_dir && ovl_dentry_is_whiteout(new)) {
963 			flags |= RENAME_EXCHANGE;
964 			cleanup_whiteout = true;
965 		}
966 	}
967 
968 	old_upperdir = ovl_dentry_upper(old->d_parent);
969 	new_upperdir = ovl_dentry_upper(new->d_parent);
970 
971 	trap = lock_rename(new_upperdir, old_upperdir);
972 
973 	olddentry = lookup_one_len(old->d_name.name, old_upperdir,
974 				   old->d_name.len);
975 	err = PTR_ERR(olddentry);
976 	if (IS_ERR(olddentry))
977 		goto out_unlock;
978 
979 	err = -ESTALE;
980 	if (olddentry != ovl_dentry_upper(old))
981 		goto out_dput_old;
982 
983 	newdentry = lookup_one_len(new->d_name.name, new_upperdir,
984 				   new->d_name.len);
985 	err = PTR_ERR(newdentry);
986 	if (IS_ERR(newdentry))
987 		goto out_dput_old;
988 
989 	old_opaque = ovl_dentry_is_opaque(old);
990 	new_opaque = ovl_dentry_is_opaque(new);
991 
992 	err = -ESTALE;
993 	if (ovl_dentry_upper(new)) {
994 		if (opaquedir) {
995 			if (newdentry != opaquedir)
996 				goto out_dput;
997 		} else {
998 			if (newdentry != ovl_dentry_upper(new))
999 				goto out_dput;
1000 		}
1001 	} else {
1002 		if (!d_is_negative(newdentry) &&
1003 		    (!new_opaque || !ovl_is_whiteout(newdentry)))
1004 			goto out_dput;
1005 	}
1006 
1007 	if (olddentry == trap)
1008 		goto out_dput;
1009 	if (newdentry == trap)
1010 		goto out_dput;
1011 
1012 	if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
1013 		goto out_dput;
1014 
1015 	err = 0;
1016 	if (is_dir) {
1017 		if (ovl_type_merge_or_lower(old))
1018 			err = ovl_set_redirect(old, samedir);
1019 		else if (!old_opaque && ovl_type_merge(new->d_parent))
1020 			err = ovl_set_opaque(old, olddentry);
1021 		if (err)
1022 			goto out_dput;
1023 	}
1024 	if (!overwrite && new_is_dir) {
1025 		if (ovl_type_merge_or_lower(new))
1026 			err = ovl_set_redirect(new, samedir);
1027 		else if (!new_opaque && ovl_type_merge(old->d_parent))
1028 			err = ovl_set_opaque(new, newdentry);
1029 		if (err)
1030 			goto out_dput;
1031 	}
1032 
1033 	err = ovl_do_rename(old_upperdir->d_inode, olddentry,
1034 			    new_upperdir->d_inode, newdentry, flags);
1035 	if (err)
1036 		goto out_dput;
1037 
1038 	if (cleanup_whiteout)
1039 		ovl_cleanup(old_upperdir->d_inode, newdentry);
1040 
1041 	ovl_dentry_version_inc(old->d_parent);
1042 	ovl_dentry_version_inc(new->d_parent);
1043 
1044 out_dput:
1045 	dput(newdentry);
1046 out_dput_old:
1047 	dput(olddentry);
1048 out_unlock:
1049 	unlock_rename(new_upperdir, old_upperdir);
1050 out_revert_creds:
1051 	revert_creds(old_cred);
1052 out_drop_write:
1053 	ovl_drop_write(old);
1054 out:
1055 	dput(opaquedir);
1056 	return err;
1057 }
1058 
1059 const struct inode_operations ovl_dir_inode_operations = {
1060 	.lookup		= ovl_lookup,
1061 	.mkdir		= ovl_mkdir,
1062 	.symlink	= ovl_symlink,
1063 	.unlink		= ovl_unlink,
1064 	.rmdir		= ovl_rmdir,
1065 	.rename		= ovl_rename,
1066 	.link		= ovl_link,
1067 	.setattr	= ovl_setattr,
1068 	.create		= ovl_create,
1069 	.mknod		= ovl_mknod,
1070 	.permission	= ovl_permission,
1071 	.getattr	= ovl_dir_getattr,
1072 	.listxattr	= ovl_listxattr,
1073 	.get_acl	= ovl_get_acl,
1074 	.update_time	= ovl_update_time,
1075 };
1076