xref: /openbmc/linux/fs/overlayfs/copy_up.c (revision adb57164)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *
4  * Copyright (C) 2011 Novell Inc.
5  */
6 
7 #include <linux/module.h>
8 #include <linux/fs.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/splice.h>
12 #include <linux/xattr.h>
13 #include <linux/security.h>
14 #include <linux/uaccess.h>
15 #include <linux/sched/signal.h>
16 #include <linux/cred.h>
17 #include <linux/namei.h>
18 #include <linux/fdtable.h>
19 #include <linux/ratelimit.h>
20 #include <linux/exportfs.h>
21 #include "overlayfs.h"
22 
23 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
24 
25 static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
26 {
27 	pr_warn("\"check_copy_up\" module option is obsolete\n");
28 	return 0;
29 }
30 
31 static int ovl_ccup_get(char *buf, const struct kernel_param *param)
32 {
33 	return sprintf(buf, "N\n");
34 }
35 
36 module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
37 MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing");
38 
39 static bool ovl_must_copy_xattr(const char *name)
40 {
41 	return !strcmp(name, XATTR_POSIX_ACL_ACCESS) ||
42 	       !strcmp(name, XATTR_POSIX_ACL_DEFAULT) ||
43 	       !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN);
44 }
45 
46 int ovl_copy_xattr(struct dentry *old, struct dentry *new)
47 {
48 	ssize_t list_size, size, value_size = 0;
49 	char *buf, *name, *value = NULL;
50 	int uninitialized_var(error);
51 	size_t slen;
52 
53 	if (!(old->d_inode->i_opflags & IOP_XATTR) ||
54 	    !(new->d_inode->i_opflags & IOP_XATTR))
55 		return 0;
56 
57 	list_size = vfs_listxattr(old, NULL, 0);
58 	if (list_size <= 0) {
59 		if (list_size == -EOPNOTSUPP)
60 			return 0;
61 		return list_size;
62 	}
63 
64 	buf = kzalloc(list_size, GFP_KERNEL);
65 	if (!buf)
66 		return -ENOMEM;
67 
68 	list_size = vfs_listxattr(old, buf, list_size);
69 	if (list_size <= 0) {
70 		error = list_size;
71 		goto out;
72 	}
73 
74 	for (name = buf; list_size; name += slen) {
75 		slen = strnlen(name, list_size) + 1;
76 
77 		/* underlying fs providing us with an broken xattr list? */
78 		if (WARN_ON(slen > list_size)) {
79 			error = -EIO;
80 			break;
81 		}
82 		list_size -= slen;
83 
84 		if (ovl_is_private_xattr(name))
85 			continue;
86 retry:
87 		size = vfs_getxattr(old, name, value, value_size);
88 		if (size == -ERANGE)
89 			size = vfs_getxattr(old, name, NULL, 0);
90 
91 		if (size < 0) {
92 			error = size;
93 			break;
94 		}
95 
96 		if (size > value_size) {
97 			void *new;
98 
99 			new = krealloc(value, size, GFP_KERNEL);
100 			if (!new) {
101 				error = -ENOMEM;
102 				break;
103 			}
104 			value = new;
105 			value_size = size;
106 			goto retry;
107 		}
108 
109 		error = security_inode_copy_up_xattr(name);
110 		if (error < 0 && error != -EOPNOTSUPP)
111 			break;
112 		if (error == 1) {
113 			error = 0;
114 			continue; /* Discard */
115 		}
116 		error = vfs_setxattr(new, name, value, size, 0);
117 		if (error) {
118 			if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
119 				break;
120 
121 			/* Ignore failure to copy unknown xattrs */
122 			error = 0;
123 		}
124 	}
125 	kfree(value);
126 out:
127 	kfree(buf);
128 	return error;
129 }
130 
131 static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
132 {
133 	struct file *old_file;
134 	struct file *new_file;
135 	loff_t old_pos = 0;
136 	loff_t new_pos = 0;
137 	loff_t cloned;
138 	loff_t data_pos = -1;
139 	loff_t hole_len;
140 	bool skip_hole = false;
141 	int error = 0;
142 
143 	if (len == 0)
144 		return 0;
145 
146 	old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
147 	if (IS_ERR(old_file))
148 		return PTR_ERR(old_file);
149 
150 	new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
151 	if (IS_ERR(new_file)) {
152 		error = PTR_ERR(new_file);
153 		goto out_fput;
154 	}
155 
156 	/* Try to use clone_file_range to clone up within the same fs */
157 	cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
158 	if (cloned == len)
159 		goto out;
160 	/* Couldn't clone, so now we try to copy the data */
161 
162 	/* Check if lower fs supports seek operation */
163 	if (old_file->f_mode & FMODE_LSEEK &&
164 	    old_file->f_op->llseek)
165 		skip_hole = true;
166 
167 	while (len) {
168 		size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
169 		long bytes;
170 
171 		if (len < this_len)
172 			this_len = len;
173 
174 		if (signal_pending_state(TASK_KILLABLE, current)) {
175 			error = -EINTR;
176 			break;
177 		}
178 
179 		/*
180 		 * Fill zero for hole will cost unnecessary disk space
181 		 * and meanwhile slow down the copy-up speed, so we do
182 		 * an optimization for hole during copy-up, it relies
183 		 * on SEEK_DATA implementation in lower fs so if lower
184 		 * fs does not support it, copy-up will behave as before.
185 		 *
186 		 * Detail logic of hole detection as below:
187 		 * When we detect next data position is larger than current
188 		 * position we will skip that hole, otherwise we copy
189 		 * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
190 		 * it may not recognize all kind of holes and sometimes
191 		 * only skips partial of hole area. However, it will be
192 		 * enough for most of the use cases.
193 		 */
194 
195 		if (skip_hole && data_pos < old_pos) {
196 			data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
197 			if (data_pos > old_pos) {
198 				hole_len = data_pos - old_pos;
199 				len -= hole_len;
200 				old_pos = new_pos = data_pos;
201 				continue;
202 			} else if (data_pos == -ENXIO) {
203 				break;
204 			} else if (data_pos < 0) {
205 				skip_hole = false;
206 			}
207 		}
208 
209 		bytes = do_splice_direct(old_file, &old_pos,
210 					 new_file, &new_pos,
211 					 this_len, SPLICE_F_MOVE);
212 		if (bytes <= 0) {
213 			error = bytes;
214 			break;
215 		}
216 		WARN_ON(old_pos != new_pos);
217 
218 		len -= bytes;
219 	}
220 out:
221 	if (!error)
222 		error = vfs_fsync(new_file, 0);
223 	fput(new_file);
224 out_fput:
225 	fput(old_file);
226 	return error;
227 }
228 
229 static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
230 {
231 	struct iattr attr = {
232 		.ia_valid = ATTR_SIZE,
233 		.ia_size = stat->size,
234 	};
235 
236 	return notify_change(upperdentry, &attr, NULL);
237 }
238 
239 static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
240 {
241 	struct iattr attr = {
242 		.ia_valid =
243 		     ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
244 		.ia_atime = stat->atime,
245 		.ia_mtime = stat->mtime,
246 	};
247 
248 	return notify_change(upperdentry, &attr, NULL);
249 }
250 
251 int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
252 {
253 	int err = 0;
254 
255 	if (!S_ISLNK(stat->mode)) {
256 		struct iattr attr = {
257 			.ia_valid = ATTR_MODE,
258 			.ia_mode = stat->mode,
259 		};
260 		err = notify_change(upperdentry, &attr, NULL);
261 	}
262 	if (!err) {
263 		struct iattr attr = {
264 			.ia_valid = ATTR_UID | ATTR_GID,
265 			.ia_uid = stat->uid,
266 			.ia_gid = stat->gid,
267 		};
268 		err = notify_change(upperdentry, &attr, NULL);
269 	}
270 	if (!err)
271 		ovl_set_timestamps(upperdentry, stat);
272 
273 	return err;
274 }
275 
276 struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
277 {
278 	struct ovl_fh *fh;
279 	int fh_type, dwords;
280 	int buflen = MAX_HANDLE_SZ;
281 	uuid_t *uuid = &real->d_sb->s_uuid;
282 	int err;
283 
284 	/* Make sure the real fid stays 32bit aligned */
285 	BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4);
286 	BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255);
287 
288 	fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL);
289 	if (!fh)
290 		return ERR_PTR(-ENOMEM);
291 
292 	/*
293 	 * We encode a non-connectable file handle for non-dir, because we
294 	 * only need to find the lower inode number and we don't want to pay
295 	 * the price or reconnecting the dentry.
296 	 */
297 	dwords = buflen >> 2;
298 	fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
299 	buflen = (dwords << 2);
300 
301 	err = -EIO;
302 	if (WARN_ON(fh_type < 0) ||
303 	    WARN_ON(buflen > MAX_HANDLE_SZ) ||
304 	    WARN_ON(fh_type == FILEID_INVALID))
305 		goto out_err;
306 
307 	fh->fb.version = OVL_FH_VERSION;
308 	fh->fb.magic = OVL_FH_MAGIC;
309 	fh->fb.type = fh_type;
310 	fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN;
311 	/*
312 	 * When we will want to decode an overlay dentry from this handle
313 	 * and all layers are on the same fs, if we get a disconncted real
314 	 * dentry when we decode fid, the only way to tell if we should assign
315 	 * it to upperdentry or to lowerstack is by checking this flag.
316 	 */
317 	if (is_upper)
318 		fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
319 	fh->fb.len = sizeof(fh->fb) + buflen;
320 	fh->fb.uuid = *uuid;
321 
322 	return fh;
323 
324 out_err:
325 	kfree(fh);
326 	return ERR_PTR(err);
327 }
328 
329 int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
330 		   struct dentry *upper)
331 {
332 	const struct ovl_fh *fh = NULL;
333 	int err;
334 
335 	/*
336 	 * When lower layer doesn't support export operations store a 'null' fh,
337 	 * so we can use the overlay.origin xattr to distignuish between a copy
338 	 * up and a pure upper inode.
339 	 */
340 	if (ovl_can_decode_fh(lower->d_sb)) {
341 		fh = ovl_encode_real_fh(lower, false);
342 		if (IS_ERR(fh))
343 			return PTR_ERR(fh);
344 	}
345 
346 	/*
347 	 * Do not fail when upper doesn't support xattrs.
348 	 */
349 	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf,
350 				 fh ? fh->fb.len : 0, 0);
351 	kfree(fh);
352 
353 	return err;
354 }
355 
356 /* Store file handle of @upper dir in @index dir entry */
357 static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
358 {
359 	const struct ovl_fh *fh;
360 	int err;
361 
362 	fh = ovl_encode_real_fh(upper, true);
363 	if (IS_ERR(fh))
364 		return PTR_ERR(fh);
365 
366 	err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh->buf, fh->fb.len, 0);
367 
368 	kfree(fh);
369 	return err;
370 }
371 
372 /*
373  * Create and install index entry.
374  *
375  * Caller must hold i_mutex on indexdir.
376  */
377 static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
378 			    struct dentry *upper)
379 {
380 	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
381 	struct inode *dir = d_inode(indexdir);
382 	struct dentry *index = NULL;
383 	struct dentry *temp = NULL;
384 	struct qstr name = { };
385 	int err;
386 
387 	/*
388 	 * For now this is only used for creating index entry for directories,
389 	 * because non-dir are copied up directly to index and then hardlinked
390 	 * to upper dir.
391 	 *
392 	 * TODO: implement create index for non-dir, so we can call it when
393 	 * encoding file handle for non-dir in case index does not exist.
394 	 */
395 	if (WARN_ON(!d_is_dir(dentry)))
396 		return -EIO;
397 
398 	/* Directory not expected to be indexed before copy up */
399 	if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
400 		return -EIO;
401 
402 	err = ovl_get_index_name(origin, &name);
403 	if (err)
404 		return err;
405 
406 	temp = ovl_create_temp(indexdir, OVL_CATTR(S_IFDIR | 0));
407 	err = PTR_ERR(temp);
408 	if (IS_ERR(temp))
409 		goto free_name;
410 
411 	err = ovl_set_upper_fh(upper, temp);
412 	if (err)
413 		goto out;
414 
415 	index = lookup_one_len(name.name, indexdir, name.len);
416 	if (IS_ERR(index)) {
417 		err = PTR_ERR(index);
418 	} else {
419 		err = ovl_do_rename(dir, temp, dir, index, 0);
420 		dput(index);
421 	}
422 out:
423 	if (err)
424 		ovl_cleanup(dir, temp);
425 	dput(temp);
426 free_name:
427 	kfree(name.name);
428 	return err;
429 }
430 
431 struct ovl_copy_up_ctx {
432 	struct dentry *parent;
433 	struct dentry *dentry;
434 	struct path lowerpath;
435 	struct kstat stat;
436 	struct kstat pstat;
437 	const char *link;
438 	struct dentry *destdir;
439 	struct qstr destname;
440 	struct dentry *workdir;
441 	bool origin;
442 	bool indexed;
443 	bool metacopy;
444 };
445 
446 static int ovl_link_up(struct ovl_copy_up_ctx *c)
447 {
448 	int err;
449 	struct dentry *upper;
450 	struct dentry *upperdir = ovl_dentry_upper(c->parent);
451 	struct inode *udir = d_inode(upperdir);
452 
453 	/* Mark parent "impure" because it may now contain non-pure upper */
454 	err = ovl_set_impure(c->parent, upperdir);
455 	if (err)
456 		return err;
457 
458 	err = ovl_set_nlink_lower(c->dentry);
459 	if (err)
460 		return err;
461 
462 	inode_lock_nested(udir, I_MUTEX_PARENT);
463 	upper = lookup_one_len(c->dentry->d_name.name, upperdir,
464 			       c->dentry->d_name.len);
465 	err = PTR_ERR(upper);
466 	if (!IS_ERR(upper)) {
467 		err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper);
468 		dput(upper);
469 
470 		if (!err) {
471 			/* Restore timestamps on parent (best effort) */
472 			ovl_set_timestamps(upperdir, &c->pstat);
473 			ovl_dentry_set_upper_alias(c->dentry);
474 		}
475 	}
476 	inode_unlock(udir);
477 	if (err)
478 		return err;
479 
480 	err = ovl_set_nlink_upper(c->dentry);
481 
482 	return err;
483 }
484 
485 static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
486 {
487 	int err;
488 
489 	/*
490 	 * Copy up data first and then xattrs. Writing data after
491 	 * xattrs will remove security.capability xattr automatically.
492 	 */
493 	if (S_ISREG(c->stat.mode) && !c->metacopy) {
494 		struct path upperpath, datapath;
495 
496 		ovl_path_upper(c->dentry, &upperpath);
497 		if (WARN_ON(upperpath.dentry != NULL))
498 			return -EIO;
499 		upperpath.dentry = temp;
500 
501 		ovl_path_lowerdata(c->dentry, &datapath);
502 		err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
503 		if (err)
504 			return err;
505 	}
506 
507 	err = ovl_copy_xattr(c->lowerpath.dentry, temp);
508 	if (err)
509 		return err;
510 
511 	/*
512 	 * Store identifier of lower inode in upper inode xattr to
513 	 * allow lookup of the copy up origin inode.
514 	 *
515 	 * Don't set origin when we are breaking the association with a lower
516 	 * hard link.
517 	 */
518 	if (c->origin) {
519 		err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
520 		if (err)
521 			return err;
522 	}
523 
524 	if (c->metacopy) {
525 		err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
526 					 NULL, 0, -EOPNOTSUPP);
527 		if (err)
528 			return err;
529 	}
530 
531 	inode_lock(temp->d_inode);
532 	if (S_ISREG(c->stat.mode))
533 		err = ovl_set_size(temp, &c->stat);
534 	if (!err)
535 		err = ovl_set_attr(temp, &c->stat);
536 	inode_unlock(temp->d_inode);
537 
538 	return err;
539 }
540 
541 struct ovl_cu_creds {
542 	const struct cred *old;
543 	struct cred *new;
544 };
545 
546 static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc)
547 {
548 	int err;
549 
550 	cc->old = cc->new = NULL;
551 	err = security_inode_copy_up(dentry, &cc->new);
552 	if (err < 0)
553 		return err;
554 
555 	if (cc->new)
556 		cc->old = override_creds(cc->new);
557 
558 	return 0;
559 }
560 
561 static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
562 {
563 	if (cc->new) {
564 		revert_creds(cc->old);
565 		put_cred(cc->new);
566 	}
567 }
568 
569 /*
570  * Copyup using workdir to prepare temp file.  Used when copying up directories,
571  * special files or when upper fs doesn't support O_TMPFILE.
572  */
573 static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
574 {
575 	struct inode *inode;
576 	struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
577 	struct dentry *temp, *upper;
578 	struct ovl_cu_creds cc;
579 	int err;
580 	struct ovl_cattr cattr = {
581 		/* Can't properly set mode on creation because of the umask */
582 		.mode = c->stat.mode & S_IFMT,
583 		.rdev = c->stat.rdev,
584 		.link = c->link
585 	};
586 
587 	err = ovl_lock_rename_workdir(c->workdir, c->destdir);
588 	if (err)
589 		return err;
590 
591 	err = ovl_prep_cu_creds(c->dentry, &cc);
592 	if (err)
593 		goto unlock;
594 
595 	temp = ovl_create_temp(c->workdir, &cattr);
596 	ovl_revert_cu_creds(&cc);
597 
598 	err = PTR_ERR(temp);
599 	if (IS_ERR(temp))
600 		goto unlock;
601 
602 	err = ovl_copy_up_inode(c, temp);
603 	if (err)
604 		goto cleanup;
605 
606 	if (S_ISDIR(c->stat.mode) && c->indexed) {
607 		err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
608 		if (err)
609 			goto cleanup;
610 	}
611 
612 	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
613 	err = PTR_ERR(upper);
614 	if (IS_ERR(upper))
615 		goto cleanup;
616 
617 	err = ovl_do_rename(wdir, temp, udir, upper, 0);
618 	dput(upper);
619 	if (err)
620 		goto cleanup;
621 
622 	if (!c->metacopy)
623 		ovl_set_upperdata(d_inode(c->dentry));
624 	inode = d_inode(c->dentry);
625 	ovl_inode_update(inode, temp);
626 	if (S_ISDIR(inode->i_mode))
627 		ovl_set_flag(OVL_WHITEOUTS, inode);
628 unlock:
629 	unlock_rename(c->workdir, c->destdir);
630 
631 	return err;
632 
633 cleanup:
634 	ovl_cleanup(wdir, temp);
635 	dput(temp);
636 	goto unlock;
637 }
638 
639 /* Copyup using O_TMPFILE which does not require cross dir locking */
640 static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
641 {
642 	struct inode *udir = d_inode(c->destdir);
643 	struct dentry *temp, *upper;
644 	struct ovl_cu_creds cc;
645 	int err;
646 
647 	err = ovl_prep_cu_creds(c->dentry, &cc);
648 	if (err)
649 		return err;
650 
651 	temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
652 	ovl_revert_cu_creds(&cc);
653 
654 	if (IS_ERR(temp))
655 		return PTR_ERR(temp);
656 
657 	err = ovl_copy_up_inode(c, temp);
658 	if (err)
659 		goto out_dput;
660 
661 	inode_lock_nested(udir, I_MUTEX_PARENT);
662 
663 	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
664 	err = PTR_ERR(upper);
665 	if (!IS_ERR(upper)) {
666 		err = ovl_do_link(temp, udir, upper);
667 		dput(upper);
668 	}
669 	inode_unlock(udir);
670 
671 	if (err)
672 		goto out_dput;
673 
674 	if (!c->metacopy)
675 		ovl_set_upperdata(d_inode(c->dentry));
676 	ovl_inode_update(d_inode(c->dentry), temp);
677 
678 	return 0;
679 
680 out_dput:
681 	dput(temp);
682 	return err;
683 }
684 
685 /*
686  * Copy up a single dentry
687  *
688  * All renames start with copy up of source if necessary.  The actual
689  * rename will only proceed once the copy up was successful.  Copy up uses
690  * upper parent i_mutex for exclusion.  Since rename can change d_parent it
691  * is possible that the copy up will lock the old parent.  At that point
692  * the file will have already been copied up anyway.
693  */
694 static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
695 {
696 	int err;
697 	struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
698 	bool to_index = false;
699 
700 	/*
701 	 * Indexed non-dir is copied up directly to the index entry and then
702 	 * hardlinked to upper dir. Indexed dir is copied up to indexdir,
703 	 * then index entry is created and then copied up dir installed.
704 	 * Copying dir up to indexdir instead of workdir simplifies locking.
705 	 */
706 	if (ovl_need_index(c->dentry)) {
707 		c->indexed = true;
708 		if (S_ISDIR(c->stat.mode))
709 			c->workdir = ovl_indexdir(c->dentry->d_sb);
710 		else
711 			to_index = true;
712 	}
713 
714 	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
715 		c->origin = true;
716 
717 	if (to_index) {
718 		c->destdir = ovl_indexdir(c->dentry->d_sb);
719 		err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
720 		if (err)
721 			return err;
722 	} else if (WARN_ON(!c->parent)) {
723 		/* Disconnected dentry must be copied up to index dir */
724 		return -EIO;
725 	} else {
726 		/*
727 		 * Mark parent "impure" because it may now contain non-pure
728 		 * upper
729 		 */
730 		err = ovl_set_impure(c->parent, c->destdir);
731 		if (err)
732 			return err;
733 	}
734 
735 	/* Should we copyup with O_TMPFILE or with workdir? */
736 	if (S_ISREG(c->stat.mode) && ofs->tmpfile)
737 		err = ovl_copy_up_tmpfile(c);
738 	else
739 		err = ovl_copy_up_workdir(c);
740 	if (err)
741 		goto out;
742 
743 	if (c->indexed)
744 		ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
745 
746 	if (to_index) {
747 		/* Initialize nlink for copy up of disconnected dentry */
748 		err = ovl_set_nlink_upper(c->dentry);
749 	} else {
750 		struct inode *udir = d_inode(c->destdir);
751 
752 		/* Restore timestamps on parent (best effort) */
753 		inode_lock(udir);
754 		ovl_set_timestamps(c->destdir, &c->pstat);
755 		inode_unlock(udir);
756 
757 		ovl_dentry_set_upper_alias(c->dentry);
758 	}
759 
760 out:
761 	if (to_index)
762 		kfree(c->destname.name);
763 	return err;
764 }
765 
766 static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
767 				  int flags)
768 {
769 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
770 
771 	if (!ofs->config.metacopy)
772 		return false;
773 
774 	if (!S_ISREG(mode))
775 		return false;
776 
777 	if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
778 		return false;
779 
780 	return true;
781 }
782 
783 /* Copy up data of an inode which was copied up metadata only in the past. */
784 static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
785 {
786 	struct path upperpath, datapath;
787 	int err;
788 	char *capability = NULL;
789 	ssize_t uninitialized_var(cap_size);
790 
791 	ovl_path_upper(c->dentry, &upperpath);
792 	if (WARN_ON(upperpath.dentry == NULL))
793 		return -EIO;
794 
795 	ovl_path_lowerdata(c->dentry, &datapath);
796 	if (WARN_ON(datapath.dentry == NULL))
797 		return -EIO;
798 
799 	if (c->stat.size) {
800 		err = cap_size = ovl_getxattr(upperpath.dentry, XATTR_NAME_CAPS,
801 					      &capability, 0);
802 		if (err < 0 && err != -ENODATA)
803 			goto out;
804 	}
805 
806 	err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
807 	if (err)
808 		goto out_free;
809 
810 	/*
811 	 * Writing to upper file will clear security.capability xattr. We
812 	 * don't want that to happen for normal copy-up operation.
813 	 */
814 	if (capability) {
815 		err = ovl_do_setxattr(upperpath.dentry, XATTR_NAME_CAPS,
816 				      capability, cap_size, 0);
817 		if (err)
818 			goto out_free;
819 	}
820 
821 
822 	err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY);
823 	if (err)
824 		goto out_free;
825 
826 	ovl_set_upperdata(d_inode(c->dentry));
827 out_free:
828 	kfree(capability);
829 out:
830 	return err;
831 }
832 
833 static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
834 			   int flags)
835 {
836 	int err;
837 	DEFINE_DELAYED_CALL(done);
838 	struct path parentpath;
839 	struct ovl_copy_up_ctx ctx = {
840 		.parent = parent,
841 		.dentry = dentry,
842 		.workdir = ovl_workdir(dentry),
843 	};
844 
845 	if (WARN_ON(!ctx.workdir))
846 		return -EROFS;
847 
848 	ovl_path_lower(dentry, &ctx.lowerpath);
849 	err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
850 			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
851 	if (err)
852 		return err;
853 
854 	ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
855 
856 	if (parent) {
857 		ovl_path_upper(parent, &parentpath);
858 		ctx.destdir = parentpath.dentry;
859 		ctx.destname = dentry->d_name;
860 
861 		err = vfs_getattr(&parentpath, &ctx.pstat,
862 				  STATX_ATIME | STATX_MTIME,
863 				  AT_STATX_SYNC_AS_STAT);
864 		if (err)
865 			return err;
866 	}
867 
868 	/* maybe truncate regular file. this has no effect on dirs */
869 	if (flags & O_TRUNC)
870 		ctx.stat.size = 0;
871 
872 	if (S_ISLNK(ctx.stat.mode)) {
873 		ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
874 		if (IS_ERR(ctx.link))
875 			return PTR_ERR(ctx.link);
876 	}
877 
878 	err = ovl_copy_up_start(dentry, flags);
879 	/* err < 0: interrupted, err > 0: raced with another copy-up */
880 	if (unlikely(err)) {
881 		if (err > 0)
882 			err = 0;
883 	} else {
884 		if (!ovl_dentry_upper(dentry))
885 			err = ovl_do_copy_up(&ctx);
886 		if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
887 			err = ovl_link_up(&ctx);
888 		if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
889 			err = ovl_copy_up_meta_inode_data(&ctx);
890 		ovl_copy_up_end(dentry);
891 	}
892 	do_delayed_call(&done);
893 
894 	return err;
895 }
896 
897 int ovl_copy_up_flags(struct dentry *dentry, int flags)
898 {
899 	int err = 0;
900 	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
901 	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
902 
903 	/*
904 	 * With NFS export, copy up can get called for a disconnected non-dir.
905 	 * In this case, we will copy up lower inode to index dir without
906 	 * linking it to upper dir.
907 	 */
908 	if (WARN_ON(disconnected && d_is_dir(dentry)))
909 		return -EIO;
910 
911 	while (!err) {
912 		struct dentry *next;
913 		struct dentry *parent = NULL;
914 
915 		if (ovl_already_copied_up(dentry, flags))
916 			break;
917 
918 		next = dget(dentry);
919 		/* find the topmost dentry not yet copied up */
920 		for (; !disconnected;) {
921 			parent = dget_parent(next);
922 
923 			if (ovl_dentry_upper(parent))
924 				break;
925 
926 			dput(next);
927 			next = parent;
928 		}
929 
930 		err = ovl_copy_up_one(parent, next, flags);
931 
932 		dput(parent);
933 		dput(next);
934 	}
935 	revert_creds(old_cred);
936 
937 	return err;
938 }
939 
940 static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
941 {
942 	/* Copy up of disconnected dentry does not set upper alias */
943 	if (ovl_already_copied_up(dentry, flags))
944 		return false;
945 
946 	if (special_file(d_inode(dentry)->i_mode))
947 		return false;
948 
949 	if (!ovl_open_flags_need_copy_up(flags))
950 		return false;
951 
952 	return true;
953 }
954 
955 int ovl_maybe_copy_up(struct dentry *dentry, int flags)
956 {
957 	int err = 0;
958 
959 	if (ovl_open_need_copy_up(dentry, flags)) {
960 		err = ovl_want_write(dentry);
961 		if (!err) {
962 			err = ovl_copy_up_flags(dentry, flags);
963 			ovl_drop_write(dentry);
964 		}
965 	}
966 
967 	return err;
968 }
969 
970 int ovl_copy_up_with_data(struct dentry *dentry)
971 {
972 	return ovl_copy_up_flags(dentry, O_WRONLY);
973 }
974 
975 int ovl_copy_up(struct dentry *dentry)
976 {
977 	return ovl_copy_up_flags(dentry, 0);
978 }
979