xref: /openbmc/linux/fs/overlayfs/dir.c (revision f7d84fa7)
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/namei.h>
12 #include <linux/xattr.h>
13 #include <linux/security.h>
14 #include <linux/cred.h>
15 #include <linux/module.h>
16 #include <linux/posix_acl.h>
17 #include <linux/posix_acl_xattr.h>
18 #include <linux/atomic.h>
19 #include <linux/ratelimit.h>
20 #include "overlayfs.h"
21 
22 static unsigned short ovl_redirect_max = 256;
23 module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
24 MODULE_PARM_DESC(ovl_redirect_max,
25 		 "Maximum length of absolute redirect xattr value");
26 
27 void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
28 {
29 	int err;
30 
31 	dget(wdentry);
32 	if (d_is_dir(wdentry))
33 		err = ovl_do_rmdir(wdir, wdentry);
34 	else
35 		err = ovl_do_unlink(wdir, wdentry);
36 	dput(wdentry);
37 
38 	if (err) {
39 		pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
40 		       wdentry, err);
41 	}
42 }
43 
44 struct dentry *ovl_lookup_temp(struct dentry *workdir)
45 {
46 	struct dentry *temp;
47 	char name[20];
48 	static atomic_t temp_id = ATOMIC_INIT(0);
49 
50 	/* counter is allowed to wrap, since temp dentries are ephemeral */
51 	snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
52 
53 	temp = lookup_one_len(name, workdir, strlen(name));
54 	if (!IS_ERR(temp) && temp->d_inode) {
55 		pr_err("overlayfs: workdir/%s already exists\n", name);
56 		dput(temp);
57 		temp = ERR_PTR(-EIO);
58 	}
59 
60 	return temp;
61 }
62 
63 /* caller holds i_mutex on workdir */
64 static struct dentry *ovl_whiteout(struct dentry *workdir,
65 				   struct dentry *dentry)
66 {
67 	int err;
68 	struct dentry *whiteout;
69 	struct inode *wdir = workdir->d_inode;
70 
71 	whiteout = ovl_lookup_temp(workdir);
72 	if (IS_ERR(whiteout))
73 		return whiteout;
74 
75 	err = ovl_do_whiteout(wdir, whiteout);
76 	if (err) {
77 		dput(whiteout);
78 		whiteout = ERR_PTR(err);
79 	}
80 
81 	return whiteout;
82 }
83 
84 int ovl_create_real(struct inode *dir, struct dentry *newdentry,
85 		    struct cattr *attr, struct dentry *hardlink, bool debug)
86 {
87 	int err;
88 
89 	if (newdentry->d_inode)
90 		return -ESTALE;
91 
92 	if (hardlink) {
93 		err = ovl_do_link(hardlink, dir, newdentry, debug);
94 	} else {
95 		switch (attr->mode & S_IFMT) {
96 		case S_IFREG:
97 			err = ovl_do_create(dir, newdentry, attr->mode, debug);
98 			break;
99 
100 		case S_IFDIR:
101 			err = ovl_do_mkdir(dir, newdentry, attr->mode, debug);
102 			break;
103 
104 		case S_IFCHR:
105 		case S_IFBLK:
106 		case S_IFIFO:
107 		case S_IFSOCK:
108 			err = ovl_do_mknod(dir, newdentry,
109 					   attr->mode, attr->rdev, debug);
110 			break;
111 
112 		case S_IFLNK:
113 			err = ovl_do_symlink(dir, newdentry, attr->link, debug);
114 			break;
115 
116 		default:
117 			err = -EPERM;
118 		}
119 	}
120 	if (!err && WARN_ON(!newdentry->d_inode)) {
121 		/*
122 		 * Not quite sure if non-instantiated dentry is legal or not.
123 		 * VFS doesn't seem to care so check and warn here.
124 		 */
125 		err = -ENOENT;
126 	}
127 	return err;
128 }
129 
130 static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
131 			       int xerr)
132 {
133 	int err;
134 
135 	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
136 	if (!err)
137 		ovl_dentry_set_opaque(dentry);
138 
139 	return err;
140 }
141 
142 static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
143 {
144 	/*
145 	 * Fail with -EIO when trying to create opaque dir and upper doesn't
146 	 * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
147 	 * return a specific error for noxattr case.
148 	 */
149 	return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
150 }
151 
152 /* Common operations required to be done after creation of file on upper */
153 static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
154 			    struct dentry *newdentry, bool hardlink)
155 {
156 	ovl_dentry_version_inc(dentry->d_parent);
157 	ovl_dentry_update(dentry, newdentry);
158 	if (!hardlink) {
159 		ovl_inode_update(inode, d_inode(newdentry));
160 		ovl_copyattr(newdentry->d_inode, inode);
161 	} else {
162 		WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry));
163 		inc_nlink(inode);
164 	}
165 	d_instantiate(dentry, inode);
166 	/* Force lookup of new upper hardlink to find its lower */
167 	if (hardlink)
168 		d_drop(dentry);
169 }
170 
171 static bool ovl_type_merge(struct dentry *dentry)
172 {
173 	return OVL_TYPE_MERGE(ovl_path_type(dentry));
174 }
175 
176 static bool ovl_type_origin(struct dentry *dentry)
177 {
178 	return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
179 }
180 
181 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
182 			    struct cattr *attr, struct dentry *hardlink)
183 {
184 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
185 	struct inode *udir = upperdir->d_inode;
186 	struct dentry *newdentry;
187 	int err;
188 
189 	if (!hardlink && !IS_POSIXACL(udir))
190 		attr->mode &= ~current_umask();
191 
192 	inode_lock_nested(udir, I_MUTEX_PARENT);
193 	newdentry = lookup_one_len(dentry->d_name.name, upperdir,
194 				   dentry->d_name.len);
195 	err = PTR_ERR(newdentry);
196 	if (IS_ERR(newdentry))
197 		goto out_unlock;
198 	err = ovl_create_real(udir, newdentry, attr, hardlink, false);
199 	if (err)
200 		goto out_dput;
201 
202 	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
203 		/* Setting opaque here is just an optimization, allow to fail */
204 		ovl_set_opaque(dentry, newdentry);
205 	}
206 
207 	ovl_instantiate(dentry, inode, newdentry, !!hardlink);
208 	newdentry = NULL;
209 out_dput:
210 	dput(newdentry);
211 out_unlock:
212 	inode_unlock(udir);
213 	return err;
214 }
215 
216 static int ovl_lock_rename_workdir(struct dentry *workdir,
217 				   struct dentry *upperdir)
218 {
219 	/* Workdir should not be the same as upperdir */
220 	if (workdir == upperdir)
221 		goto err;
222 
223 	/* Workdir should not be subdir of upperdir and vice versa */
224 	if (lock_rename(workdir, upperdir) != NULL)
225 		goto err_unlock;
226 
227 	return 0;
228 
229 err_unlock:
230 	unlock_rename(workdir, upperdir);
231 err:
232 	pr_err("overlayfs: failed to lock workdir+upperdir\n");
233 	return -EIO;
234 }
235 
236 static struct dentry *ovl_clear_empty(struct dentry *dentry,
237 				      struct list_head *list)
238 {
239 	struct dentry *workdir = ovl_workdir(dentry);
240 	struct inode *wdir = workdir->d_inode;
241 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
242 	struct inode *udir = upperdir->d_inode;
243 	struct path upperpath;
244 	struct dentry *upper;
245 	struct dentry *opaquedir;
246 	struct kstat stat;
247 	int err;
248 
249 	if (WARN_ON(!workdir))
250 		return ERR_PTR(-EROFS);
251 
252 	err = ovl_lock_rename_workdir(workdir, upperdir);
253 	if (err)
254 		goto out;
255 
256 	ovl_path_upper(dentry, &upperpath);
257 	err = vfs_getattr(&upperpath, &stat,
258 			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
259 	if (err)
260 		goto out_unlock;
261 
262 	err = -ESTALE;
263 	if (!S_ISDIR(stat.mode))
264 		goto out_unlock;
265 	upper = upperpath.dentry;
266 	if (upper->d_parent->d_inode != udir)
267 		goto out_unlock;
268 
269 	opaquedir = ovl_lookup_temp(workdir);
270 	err = PTR_ERR(opaquedir);
271 	if (IS_ERR(opaquedir))
272 		goto out_unlock;
273 
274 	err = ovl_create_real(wdir, opaquedir,
275 			      &(struct cattr){.mode = stat.mode}, NULL, true);
276 	if (err)
277 		goto out_dput;
278 
279 	err = ovl_copy_xattr(upper, opaquedir);
280 	if (err)
281 		goto out_cleanup;
282 
283 	err = ovl_set_opaque(dentry, opaquedir);
284 	if (err)
285 		goto out_cleanup;
286 
287 	inode_lock(opaquedir->d_inode);
288 	err = ovl_set_attr(opaquedir, &stat);
289 	inode_unlock(opaquedir->d_inode);
290 	if (err)
291 		goto out_cleanup;
292 
293 	err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
294 	if (err)
295 		goto out_cleanup;
296 
297 	ovl_cleanup_whiteouts(upper, list);
298 	ovl_cleanup(wdir, upper);
299 	unlock_rename(workdir, upperdir);
300 
301 	/* dentry's upper doesn't match now, get rid of it */
302 	d_drop(dentry);
303 
304 	return opaquedir;
305 
306 out_cleanup:
307 	ovl_cleanup(wdir, opaquedir);
308 out_dput:
309 	dput(opaquedir);
310 out_unlock:
311 	unlock_rename(workdir, upperdir);
312 out:
313 	return ERR_PTR(err);
314 }
315 
316 static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
317 {
318 	int err;
319 	struct dentry *ret = NULL;
320 	enum ovl_path_type type = ovl_path_type(dentry);
321 	LIST_HEAD(list);
322 
323 	err = ovl_check_empty_dir(dentry, &list);
324 	if (err) {
325 		ret = ERR_PTR(err);
326 		goto out_free;
327 	}
328 
329 	/*
330 	 * When removing an empty opaque directory, then it makes no sense to
331 	 * replace it with an exact replica of itself.
332 	 *
333 	 * If no upperdentry then skip clearing whiteouts.
334 	 *
335 	 * Can race with copy-up, since we don't hold the upperdir mutex.
336 	 * Doesn't matter, since copy-up can't create a non-empty directory
337 	 * from an empty one.
338 	 */
339 	if (OVL_TYPE_UPPER(type) && OVL_TYPE_MERGE(type))
340 		ret = ovl_clear_empty(dentry, &list);
341 
342 out_free:
343 	ovl_cache_free(&list);
344 
345 	return ret;
346 }
347 
348 static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
349 			     const struct posix_acl *acl)
350 {
351 	void *buffer;
352 	size_t size;
353 	int err;
354 
355 	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
356 		return 0;
357 
358 	size = posix_acl_to_xattr(NULL, acl, NULL, 0);
359 	buffer = kmalloc(size, GFP_KERNEL);
360 	if (!buffer)
361 		return -ENOMEM;
362 
363 	size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
364 	err = size;
365 	if (err < 0)
366 		goto out_free;
367 
368 	err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
369 out_free:
370 	kfree(buffer);
371 	return err;
372 }
373 
374 static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
375 				    struct cattr *cattr,
376 				    struct dentry *hardlink)
377 {
378 	struct dentry *workdir = ovl_workdir(dentry);
379 	struct inode *wdir = workdir->d_inode;
380 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
381 	struct inode *udir = upperdir->d_inode;
382 	struct dentry *upper;
383 	struct dentry *newdentry;
384 	int err;
385 	struct posix_acl *acl, *default_acl;
386 
387 	if (WARN_ON(!workdir))
388 		return -EROFS;
389 
390 	if (!hardlink) {
391 		err = posix_acl_create(dentry->d_parent->d_inode,
392 				       &cattr->mode, &default_acl, &acl);
393 		if (err)
394 			return err;
395 	}
396 
397 	err = ovl_lock_rename_workdir(workdir, upperdir);
398 	if (err)
399 		goto out;
400 
401 	newdentry = ovl_lookup_temp(workdir);
402 	err = PTR_ERR(newdentry);
403 	if (IS_ERR(newdentry))
404 		goto out_unlock;
405 
406 	upper = lookup_one_len(dentry->d_name.name, upperdir,
407 			       dentry->d_name.len);
408 	err = PTR_ERR(upper);
409 	if (IS_ERR(upper))
410 		goto out_dput;
411 
412 	err = ovl_create_real(wdir, newdentry, cattr, hardlink, true);
413 	if (err)
414 		goto out_dput2;
415 
416 	/*
417 	 * mode could have been mutilated due to umask (e.g. sgid directory)
418 	 */
419 	if (!hardlink &&
420 	    !S_ISLNK(cattr->mode) &&
421 	    newdentry->d_inode->i_mode != cattr->mode) {
422 		struct iattr attr = {
423 			.ia_valid = ATTR_MODE,
424 			.ia_mode = cattr->mode,
425 		};
426 		inode_lock(newdentry->d_inode);
427 		err = notify_change(newdentry, &attr, NULL);
428 		inode_unlock(newdentry->d_inode);
429 		if (err)
430 			goto out_cleanup;
431 	}
432 	if (!hardlink) {
433 		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
434 					acl);
435 		if (err)
436 			goto out_cleanup;
437 
438 		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
439 					default_acl);
440 		if (err)
441 			goto out_cleanup;
442 	}
443 
444 	if (!hardlink && S_ISDIR(cattr->mode)) {
445 		err = ovl_set_opaque(dentry, newdentry);
446 		if (err)
447 			goto out_cleanup;
448 
449 		err = ovl_do_rename(wdir, newdentry, udir, upper,
450 				    RENAME_EXCHANGE);
451 		if (err)
452 			goto out_cleanup;
453 
454 		ovl_cleanup(wdir, upper);
455 	} else {
456 		err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
457 		if (err)
458 			goto out_cleanup;
459 	}
460 	ovl_instantiate(dentry, inode, newdentry, !!hardlink);
461 	newdentry = NULL;
462 out_dput2:
463 	dput(upper);
464 out_dput:
465 	dput(newdentry);
466 out_unlock:
467 	unlock_rename(workdir, upperdir);
468 out:
469 	if (!hardlink) {
470 		posix_acl_release(acl);
471 		posix_acl_release(default_acl);
472 	}
473 	return err;
474 
475 out_cleanup:
476 	ovl_cleanup(wdir, newdentry);
477 	goto out_dput2;
478 }
479 
480 static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
481 			      struct cattr *attr, struct dentry *hardlink)
482 {
483 	int err;
484 	const struct cred *old_cred;
485 	struct cred *override_cred;
486 
487 	err = ovl_copy_up(dentry->d_parent);
488 	if (err)
489 		return err;
490 
491 	old_cred = ovl_override_creds(dentry->d_sb);
492 	err = -ENOMEM;
493 	override_cred = prepare_creds();
494 	if (override_cred) {
495 		override_cred->fsuid = inode->i_uid;
496 		override_cred->fsgid = inode->i_gid;
497 		if (!hardlink) {
498 			err = security_dentry_create_files_as(dentry,
499 					attr->mode, &dentry->d_name, old_cred,
500 					override_cred);
501 			if (err) {
502 				put_cred(override_cred);
503 				goto out_revert_creds;
504 			}
505 		}
506 		put_cred(override_creds(override_cred));
507 		put_cred(override_cred);
508 
509 		if (!ovl_dentry_is_whiteout(dentry))
510 			err = ovl_create_upper(dentry, inode, attr,
511 						hardlink);
512 		else
513 			err = ovl_create_over_whiteout(dentry, inode, attr,
514 							hardlink);
515 	}
516 out_revert_creds:
517 	revert_creds(old_cred);
518 	if (!err) {
519 		struct inode *realinode = d_inode(ovl_dentry_upper(dentry));
520 
521 		WARN_ON(inode->i_mode != realinode->i_mode);
522 		WARN_ON(!uid_eq(inode->i_uid, realinode->i_uid));
523 		WARN_ON(!gid_eq(inode->i_gid, realinode->i_gid));
524 	}
525 	return err;
526 }
527 
528 static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
529 			     const char *link)
530 {
531 	int err;
532 	struct inode *inode;
533 	struct cattr attr = {
534 		.rdev = rdev,
535 		.link = link,
536 	};
537 
538 	err = ovl_want_write(dentry);
539 	if (err)
540 		goto out;
541 
542 	err = -ENOMEM;
543 	inode = ovl_new_inode(dentry->d_sb, mode, rdev);
544 	if (!inode)
545 		goto out_drop_write;
546 
547 	inode_init_owner(inode, dentry->d_parent->d_inode, mode);
548 	attr.mode = inode->i_mode;
549 
550 	err = ovl_create_or_link(dentry, inode, &attr, NULL);
551 	if (err)
552 		iput(inode);
553 
554 out_drop_write:
555 	ovl_drop_write(dentry);
556 out:
557 	return err;
558 }
559 
560 static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
561 		      bool excl)
562 {
563 	return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
564 }
565 
566 static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
567 {
568 	return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
569 }
570 
571 static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
572 		     dev_t rdev)
573 {
574 	/* Don't allow creation of "whiteout" on overlay */
575 	if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
576 		return -EPERM;
577 
578 	return ovl_create_object(dentry, mode, rdev, NULL);
579 }
580 
581 static int ovl_symlink(struct inode *dir, struct dentry *dentry,
582 		       const char *link)
583 {
584 	return ovl_create_object(dentry, S_IFLNK, 0, link);
585 }
586 
587 static int ovl_link(struct dentry *old, struct inode *newdir,
588 		    struct dentry *new)
589 {
590 	int err;
591 	struct inode *inode;
592 
593 	err = ovl_want_write(old);
594 	if (err)
595 		goto out;
596 
597 	err = ovl_copy_up(old);
598 	if (err)
599 		goto out_drop_write;
600 
601 	inode = d_inode(old);
602 	ihold(inode);
603 
604 	err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old));
605 	if (err)
606 		iput(inode);
607 
608 out_drop_write:
609 	ovl_drop_write(old);
610 out:
611 	return err;
612 }
613 
614 static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
615 {
616 	struct dentry *workdir = ovl_workdir(dentry);
617 	struct inode *wdir = workdir->d_inode;
618 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
619 	struct inode *udir = upperdir->d_inode;
620 	struct dentry *whiteout;
621 	struct dentry *upper;
622 	struct dentry *opaquedir = NULL;
623 	int err;
624 	int flags = 0;
625 
626 	if (WARN_ON(!workdir))
627 		return -EROFS;
628 
629 	if (is_dir) {
630 		opaquedir = ovl_check_empty_and_clear(dentry);
631 		err = PTR_ERR(opaquedir);
632 		if (IS_ERR(opaquedir))
633 			goto out;
634 	}
635 
636 	err = ovl_lock_rename_workdir(workdir, upperdir);
637 	if (err)
638 		goto out_dput;
639 
640 	upper = lookup_one_len(dentry->d_name.name, upperdir,
641 			       dentry->d_name.len);
642 	err = PTR_ERR(upper);
643 	if (IS_ERR(upper))
644 		goto out_unlock;
645 
646 	err = -ESTALE;
647 	if ((opaquedir && upper != opaquedir) ||
648 	    (!opaquedir && ovl_dentry_upper(dentry) &&
649 	     upper != ovl_dentry_upper(dentry))) {
650 		goto out_dput_upper;
651 	}
652 
653 	whiteout = ovl_whiteout(workdir, dentry);
654 	err = PTR_ERR(whiteout);
655 	if (IS_ERR(whiteout))
656 		goto out_dput_upper;
657 
658 	if (d_is_dir(upper))
659 		flags = RENAME_EXCHANGE;
660 
661 	err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
662 	if (err)
663 		goto kill_whiteout;
664 	if (flags)
665 		ovl_cleanup(wdir, upper);
666 
667 	ovl_dentry_version_inc(dentry->d_parent);
668 out_d_drop:
669 	d_drop(dentry);
670 	dput(whiteout);
671 out_dput_upper:
672 	dput(upper);
673 out_unlock:
674 	unlock_rename(workdir, upperdir);
675 out_dput:
676 	dput(opaquedir);
677 out:
678 	return err;
679 
680 kill_whiteout:
681 	ovl_cleanup(wdir, whiteout);
682 	goto out_d_drop;
683 }
684 
685 static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
686 {
687 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
688 	struct inode *dir = upperdir->d_inode;
689 	struct dentry *upper;
690 	struct dentry *opaquedir = NULL;
691 	int err;
692 
693 	/* Redirect dir can be !ovl_lower_positive && OVL_TYPE_MERGE */
694 	if (is_dir && ovl_dentry_get_redirect(dentry)) {
695 		opaquedir = ovl_check_empty_and_clear(dentry);
696 		err = PTR_ERR(opaquedir);
697 		if (IS_ERR(opaquedir))
698 			goto out;
699 	}
700 
701 	inode_lock_nested(dir, I_MUTEX_PARENT);
702 	upper = lookup_one_len(dentry->d_name.name, upperdir,
703 			       dentry->d_name.len);
704 	err = PTR_ERR(upper);
705 	if (IS_ERR(upper))
706 		goto out_unlock;
707 
708 	err = -ESTALE;
709 	if ((opaquedir && upper != opaquedir) ||
710 	    (!opaquedir && upper != ovl_dentry_upper(dentry)))
711 		goto out_dput_upper;
712 
713 	if (is_dir)
714 		err = vfs_rmdir(dir, upper);
715 	else
716 		err = vfs_unlink(dir, upper, NULL);
717 	ovl_dentry_version_inc(dentry->d_parent);
718 
719 	/*
720 	 * Keeping this dentry hashed would mean having to release
721 	 * upperpath/lowerpath, which could only be done if we are the
722 	 * sole user of this dentry.  Too tricky...  Just unhash for
723 	 * now.
724 	 */
725 	if (!err)
726 		d_drop(dentry);
727 out_dput_upper:
728 	dput(upper);
729 out_unlock:
730 	inode_unlock(dir);
731 	dput(opaquedir);
732 out:
733 	return err;
734 }
735 
736 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
737 {
738 	enum ovl_path_type type;
739 	int err;
740 	const struct cred *old_cred;
741 
742 	err = ovl_want_write(dentry);
743 	if (err)
744 		goto out;
745 
746 	err = ovl_copy_up(dentry->d_parent);
747 	if (err)
748 		goto out_drop_write;
749 
750 	type = ovl_path_type(dentry);
751 
752 	old_cred = ovl_override_creds(dentry->d_sb);
753 	if (!ovl_lower_positive(dentry))
754 		err = ovl_remove_upper(dentry, is_dir);
755 	else
756 		err = ovl_remove_and_whiteout(dentry, is_dir);
757 	revert_creds(old_cred);
758 	if (!err) {
759 		if (is_dir)
760 			clear_nlink(dentry->d_inode);
761 		else
762 			drop_nlink(dentry->d_inode);
763 	}
764 out_drop_write:
765 	ovl_drop_write(dentry);
766 out:
767 	return err;
768 }
769 
770 static int ovl_unlink(struct inode *dir, struct dentry *dentry)
771 {
772 	return ovl_do_remove(dentry, false);
773 }
774 
775 static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
776 {
777 	return ovl_do_remove(dentry, true);
778 }
779 
780 static bool ovl_type_merge_or_lower(struct dentry *dentry)
781 {
782 	enum ovl_path_type type = ovl_path_type(dentry);
783 
784 	return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
785 }
786 
787 static bool ovl_can_move(struct dentry *dentry)
788 {
789 	return ovl_redirect_dir(dentry->d_sb) ||
790 		!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
791 }
792 
793 static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
794 {
795 	char *buf, *ret;
796 	struct dentry *d, *tmp;
797 	int buflen = ovl_redirect_max + 1;
798 
799 	if (samedir) {
800 		ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
801 			       GFP_KERNEL);
802 		goto out;
803 	}
804 
805 	buf = ret = kmalloc(buflen, GFP_TEMPORARY);
806 	if (!buf)
807 		goto out;
808 
809 	buflen--;
810 	buf[buflen] = '\0';
811 	for (d = dget(dentry); !IS_ROOT(d);) {
812 		const char *name;
813 		int thislen;
814 
815 		spin_lock(&d->d_lock);
816 		name = ovl_dentry_get_redirect(d);
817 		if (name) {
818 			thislen = strlen(name);
819 		} else {
820 			name = d->d_name.name;
821 			thislen = d->d_name.len;
822 		}
823 
824 		/* If path is too long, fall back to userspace move */
825 		if (thislen + (name[0] != '/') > buflen) {
826 			ret = ERR_PTR(-EXDEV);
827 			spin_unlock(&d->d_lock);
828 			goto out_put;
829 		}
830 
831 		buflen -= thislen;
832 		memcpy(&buf[buflen], name, thislen);
833 		tmp = dget_dlock(d->d_parent);
834 		spin_unlock(&d->d_lock);
835 
836 		dput(d);
837 		d = tmp;
838 
839 		/* Absolute redirect: finished */
840 		if (buf[buflen] == '/')
841 			break;
842 		buflen--;
843 		buf[buflen] = '/';
844 	}
845 	ret = kstrdup(&buf[buflen], GFP_KERNEL);
846 out_put:
847 	dput(d);
848 	kfree(buf);
849 out:
850 	return ret ? ret : ERR_PTR(-ENOMEM);
851 }
852 
853 static int ovl_set_redirect(struct dentry *dentry, bool samedir)
854 {
855 	int err;
856 	const char *redirect = ovl_dentry_get_redirect(dentry);
857 
858 	if (redirect && (samedir || redirect[0] == '/'))
859 		return 0;
860 
861 	redirect = ovl_get_redirect(dentry, samedir);
862 	if (IS_ERR(redirect))
863 		return PTR_ERR(redirect);
864 
865 	err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
866 				 OVL_XATTR_REDIRECT,
867 				 redirect, strlen(redirect), -EXDEV);
868 	if (!err) {
869 		spin_lock(&dentry->d_lock);
870 		ovl_dentry_set_redirect(dentry, redirect);
871 		spin_unlock(&dentry->d_lock);
872 	} else {
873 		kfree(redirect);
874 		pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
875 		/* Fall back to userspace copy-up */
876 		err = -EXDEV;
877 	}
878 	return err;
879 }
880 
881 static int ovl_rename(struct inode *olddir, struct dentry *old,
882 		      struct inode *newdir, struct dentry *new,
883 		      unsigned int flags)
884 {
885 	int err;
886 	struct dentry *old_upperdir;
887 	struct dentry *new_upperdir;
888 	struct dentry *olddentry;
889 	struct dentry *newdentry;
890 	struct dentry *trap;
891 	bool old_opaque;
892 	bool new_opaque;
893 	bool cleanup_whiteout = false;
894 	bool overwrite = !(flags & RENAME_EXCHANGE);
895 	bool is_dir = d_is_dir(old);
896 	bool new_is_dir = d_is_dir(new);
897 	bool samedir = olddir == newdir;
898 	struct dentry *opaquedir = NULL;
899 	const struct cred *old_cred = NULL;
900 
901 	err = -EINVAL;
902 	if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
903 		goto out;
904 
905 	flags &= ~RENAME_NOREPLACE;
906 
907 	/* Don't copy up directory trees */
908 	err = -EXDEV;
909 	if (!ovl_can_move(old))
910 		goto out;
911 	if (!overwrite && !ovl_can_move(new))
912 		goto out;
913 
914 	err = ovl_want_write(old);
915 	if (err)
916 		goto out;
917 
918 	err = ovl_copy_up(old);
919 	if (err)
920 		goto out_drop_write;
921 
922 	err = ovl_copy_up(new->d_parent);
923 	if (err)
924 		goto out_drop_write;
925 	if (!overwrite) {
926 		err = ovl_copy_up(new);
927 		if (err)
928 			goto out_drop_write;
929 	}
930 
931 	old_cred = ovl_override_creds(old->d_sb);
932 
933 	if (overwrite && new_is_dir && ovl_type_merge_or_lower(new)) {
934 		opaquedir = ovl_check_empty_and_clear(new);
935 		err = PTR_ERR(opaquedir);
936 		if (IS_ERR(opaquedir)) {
937 			opaquedir = NULL;
938 			goto out_revert_creds;
939 		}
940 	}
941 
942 	if (overwrite) {
943 		if (ovl_lower_positive(old)) {
944 			if (!ovl_dentry_is_whiteout(new)) {
945 				/* Whiteout source */
946 				flags |= RENAME_WHITEOUT;
947 			} else {
948 				/* Switch whiteouts */
949 				flags |= RENAME_EXCHANGE;
950 			}
951 		} else if (is_dir && ovl_dentry_is_whiteout(new)) {
952 			flags |= RENAME_EXCHANGE;
953 			cleanup_whiteout = true;
954 		}
955 	}
956 
957 	old_upperdir = ovl_dentry_upper(old->d_parent);
958 	new_upperdir = ovl_dentry_upper(new->d_parent);
959 
960 	if (!samedir) {
961 		/*
962 		 * When moving a merge dir or non-dir with copy up origin into
963 		 * a new parent, we are marking the new parent dir "impure".
964 		 * When ovl_iterate() iterates an "impure" upper dir, it will
965 		 * lookup the origin inodes of the entries to fill d_ino.
966 		 */
967 		if (ovl_type_origin(old)) {
968 			err = ovl_set_impure(new->d_parent, new_upperdir);
969 			if (err)
970 				goto out_revert_creds;
971 		}
972 		if (!overwrite && ovl_type_origin(new)) {
973 			err = ovl_set_impure(old->d_parent, old_upperdir);
974 			if (err)
975 				goto out_revert_creds;
976 		}
977 	}
978 
979 	trap = lock_rename(new_upperdir, old_upperdir);
980 
981 	olddentry = lookup_one_len(old->d_name.name, old_upperdir,
982 				   old->d_name.len);
983 	err = PTR_ERR(olddentry);
984 	if (IS_ERR(olddentry))
985 		goto out_unlock;
986 
987 	err = -ESTALE;
988 	if (olddentry != ovl_dentry_upper(old))
989 		goto out_dput_old;
990 
991 	newdentry = lookup_one_len(new->d_name.name, new_upperdir,
992 				   new->d_name.len);
993 	err = PTR_ERR(newdentry);
994 	if (IS_ERR(newdentry))
995 		goto out_dput_old;
996 
997 	old_opaque = ovl_dentry_is_opaque(old);
998 	new_opaque = ovl_dentry_is_opaque(new);
999 
1000 	err = -ESTALE;
1001 	if (ovl_dentry_upper(new)) {
1002 		if (opaquedir) {
1003 			if (newdentry != opaquedir)
1004 				goto out_dput;
1005 		} else {
1006 			if (newdentry != ovl_dentry_upper(new))
1007 				goto out_dput;
1008 		}
1009 	} else {
1010 		if (!d_is_negative(newdentry) &&
1011 		    (!new_opaque || !ovl_is_whiteout(newdentry)))
1012 			goto out_dput;
1013 	}
1014 
1015 	if (olddentry == trap)
1016 		goto out_dput;
1017 	if (newdentry == trap)
1018 		goto out_dput;
1019 
1020 	if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
1021 		goto out_dput;
1022 
1023 	err = 0;
1024 	if (is_dir) {
1025 		if (ovl_type_merge_or_lower(old))
1026 			err = ovl_set_redirect(old, samedir);
1027 		else if (!old_opaque && ovl_type_merge(new->d_parent))
1028 			err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
1029 		if (err)
1030 			goto out_dput;
1031 	}
1032 	if (!overwrite && new_is_dir) {
1033 		if (ovl_type_merge_or_lower(new))
1034 			err = ovl_set_redirect(new, samedir);
1035 		else if (!new_opaque && ovl_type_merge(old->d_parent))
1036 			err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
1037 		if (err)
1038 			goto out_dput;
1039 	}
1040 
1041 	err = ovl_do_rename(old_upperdir->d_inode, olddentry,
1042 			    new_upperdir->d_inode, newdentry, flags);
1043 	if (err)
1044 		goto out_dput;
1045 
1046 	if (cleanup_whiteout)
1047 		ovl_cleanup(old_upperdir->d_inode, newdentry);
1048 
1049 	ovl_dentry_version_inc(old->d_parent);
1050 	ovl_dentry_version_inc(new->d_parent);
1051 
1052 out_dput:
1053 	dput(newdentry);
1054 out_dput_old:
1055 	dput(olddentry);
1056 out_unlock:
1057 	unlock_rename(new_upperdir, old_upperdir);
1058 out_revert_creds:
1059 	revert_creds(old_cred);
1060 out_drop_write:
1061 	ovl_drop_write(old);
1062 out:
1063 	dput(opaquedir);
1064 	return err;
1065 }
1066 
1067 const struct inode_operations ovl_dir_inode_operations = {
1068 	.lookup		= ovl_lookup,
1069 	.mkdir		= ovl_mkdir,
1070 	.symlink	= ovl_symlink,
1071 	.unlink		= ovl_unlink,
1072 	.rmdir		= ovl_rmdir,
1073 	.rename		= ovl_rename,
1074 	.link		= ovl_link,
1075 	.setattr	= ovl_setattr,
1076 	.create		= ovl_create,
1077 	.mknod		= ovl_mknod,
1078 	.permission	= ovl_permission,
1079 	.getattr	= ovl_getattr,
1080 	.listxattr	= ovl_listxattr,
1081 	.get_acl	= ovl_get_acl,
1082 	.update_time	= ovl_update_time,
1083 };
1084