xref: /openbmc/linux/fs/overlayfs/namei.c (revision 4c5a116a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2011 Novell Inc.
4  * Copyright (C) 2016 Red Hat, Inc.
5  */
6 
7 #include <linux/fs.h>
8 #include <linux/cred.h>
9 #include <linux/ctype.h>
10 #include <linux/namei.h>
11 #include <linux/xattr.h>
12 #include <linux/ratelimit.h>
13 #include <linux/mount.h>
14 #include <linux/exportfs.h>
15 #include "overlayfs.h"
16 
17 struct ovl_lookup_data {
18 	struct super_block *sb;
19 	struct qstr name;
20 	bool is_dir;
21 	bool opaque;
22 	bool stop;
23 	bool last;
24 	char *redirect;
25 	bool metacopy;
26 };
27 
28 static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
29 			      size_t prelen, const char *post)
30 {
31 	int res;
32 	char *buf;
33 
34 	buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
35 	if (IS_ERR_OR_NULL(buf))
36 		return PTR_ERR(buf);
37 
38 	if (buf[0] == '/') {
39 		/*
40 		 * One of the ancestor path elements in an absolute path
41 		 * lookup in ovl_lookup_layer() could have been opaque and
42 		 * that will stop further lookup in lower layers (d->stop=true)
43 		 * But we have found an absolute redirect in decendant path
44 		 * element and that should force continue lookup in lower
45 		 * layers (reset d->stop).
46 		 */
47 		d->stop = false;
48 	} else {
49 		res = strlen(buf) + 1;
50 		memmove(buf + prelen, buf, res);
51 		memcpy(buf, d->name.name, prelen);
52 	}
53 
54 	strcat(buf, post);
55 	kfree(d->redirect);
56 	d->redirect = buf;
57 	d->name.name = d->redirect;
58 	d->name.len = strlen(d->redirect);
59 
60 	return 0;
61 }
62 
63 static int ovl_acceptable(void *ctx, struct dentry *dentry)
64 {
65 	/*
66 	 * A non-dir origin may be disconnected, which is fine, because
67 	 * we only need it for its unique inode number.
68 	 */
69 	if (!d_is_dir(dentry))
70 		return 1;
71 
72 	/* Don't decode a deleted empty directory */
73 	if (d_unhashed(dentry))
74 		return 0;
75 
76 	/* Check if directory belongs to the layer we are decoding from */
77 	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
78 }
79 
80 /*
81  * Check validity of an overlay file handle buffer.
82  *
83  * Return 0 for a valid file handle.
84  * Return -ENODATA for "origin unknown".
85  * Return <0 for an invalid file handle.
86  */
87 int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
88 {
89 	if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
90 		return -EINVAL;
91 
92 	if (fb->magic != OVL_FH_MAGIC)
93 		return -EINVAL;
94 
95 	/* Treat larger version and unknown flags as "origin unknown" */
96 	if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
97 		return -ENODATA;
98 
99 	/* Treat endianness mismatch as "origin unknown" */
100 	if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
101 	    (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
102 		return -ENODATA;
103 
104 	return 0;
105 }
106 
107 static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
108 {
109 	int res, err;
110 	struct ovl_fh *fh = NULL;
111 
112 	res = vfs_getxattr(dentry, name, NULL, 0);
113 	if (res < 0) {
114 		if (res == -ENODATA || res == -EOPNOTSUPP)
115 			return NULL;
116 		goto fail;
117 	}
118 	/* Zero size value means "copied up but origin unknown" */
119 	if (res == 0)
120 		return NULL;
121 
122 	fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
123 	if (!fh)
124 		return ERR_PTR(-ENOMEM);
125 
126 	res = vfs_getxattr(dentry, name, fh->buf, res);
127 	if (res < 0)
128 		goto fail;
129 
130 	err = ovl_check_fb_len(&fh->fb, res);
131 	if (err < 0) {
132 		if (err == -ENODATA)
133 			goto out;
134 		goto invalid;
135 	}
136 
137 	return fh;
138 
139 out:
140 	kfree(fh);
141 	return NULL;
142 
143 fail:
144 	pr_warn_ratelimited("failed to get origin (%i)\n", res);
145 	goto out;
146 invalid:
147 	pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
148 	goto out;
149 }
150 
151 struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
152 				  bool connected)
153 {
154 	struct dentry *real;
155 	int bytes;
156 
157 	/*
158 	 * Make sure that the stored uuid matches the uuid of the lower
159 	 * layer where file handle will be decoded.
160 	 */
161 	if (!uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid))
162 		return NULL;
163 
164 	bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
165 	real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
166 				  bytes >> 2, (int)fh->fb.type,
167 				  connected ? ovl_acceptable : NULL, mnt);
168 	if (IS_ERR(real)) {
169 		/*
170 		 * Treat stale file handle to lower file as "origin unknown".
171 		 * upper file handle could become stale when upper file is
172 		 * unlinked and this information is needed to handle stale
173 		 * index entries correctly.
174 		 */
175 		if (real == ERR_PTR(-ESTALE) &&
176 		    !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
177 			real = NULL;
178 		return real;
179 	}
180 
181 	if (ovl_dentry_weird(real)) {
182 		dput(real);
183 		return NULL;
184 	}
185 
186 	return real;
187 }
188 
189 static bool ovl_is_opaquedir(struct dentry *dentry)
190 {
191 	return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
192 }
193 
194 static struct dentry *ovl_lookup_positive_unlocked(const char *name,
195 						   struct dentry *base, int len,
196 						   bool drop_negative)
197 {
198 	struct dentry *ret = lookup_one_len_unlocked(name, base, len);
199 
200 	if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
201 		if (drop_negative && ret->d_lockref.count == 1) {
202 			spin_lock(&ret->d_lock);
203 			/* Recheck condition under lock */
204 			if (d_is_negative(ret) && ret->d_lockref.count == 1)
205 				__d_drop(ret);
206 			spin_unlock(&ret->d_lock);
207 		}
208 		dput(ret);
209 		ret = ERR_PTR(-ENOENT);
210 	}
211 	return ret;
212 }
213 
214 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
215 			     const char *name, unsigned int namelen,
216 			     size_t prelen, const char *post,
217 			     struct dentry **ret, bool drop_negative)
218 {
219 	struct dentry *this;
220 	int err;
221 	bool last_element = !post[0];
222 
223 	this = ovl_lookup_positive_unlocked(name, base, namelen, drop_negative);
224 	if (IS_ERR(this)) {
225 		err = PTR_ERR(this);
226 		this = NULL;
227 		if (err == -ENOENT || err == -ENAMETOOLONG)
228 			goto out;
229 		goto out_err;
230 	}
231 
232 	if (ovl_dentry_weird(this)) {
233 		/* Don't support traversing automounts and other weirdness */
234 		err = -EREMOTE;
235 		goto out_err;
236 	}
237 	if (ovl_is_whiteout(this)) {
238 		d->stop = d->opaque = true;
239 		goto put_and_out;
240 	}
241 	/*
242 	 * This dentry should be a regular file if previous layer lookup
243 	 * found a metacopy dentry.
244 	 */
245 	if (last_element && d->metacopy && !d_is_reg(this)) {
246 		d->stop = true;
247 		goto put_and_out;
248 	}
249 	if (!d_can_lookup(this)) {
250 		if (d->is_dir || !last_element) {
251 			d->stop = true;
252 			goto put_and_out;
253 		}
254 		err = ovl_check_metacopy_xattr(this);
255 		if (err < 0)
256 			goto out_err;
257 
258 		d->metacopy = err;
259 		d->stop = !d->metacopy;
260 		if (!d->metacopy || d->last)
261 			goto out;
262 	} else {
263 		if (ovl_lookup_trap_inode(d->sb, this)) {
264 			/* Caught in a trap of overlapping layers */
265 			err = -ELOOP;
266 			goto out_err;
267 		}
268 
269 		if (last_element)
270 			d->is_dir = true;
271 		if (d->last)
272 			goto out;
273 
274 		if (ovl_is_opaquedir(this)) {
275 			d->stop = true;
276 			if (last_element)
277 				d->opaque = true;
278 			goto out;
279 		}
280 	}
281 	err = ovl_check_redirect(this, d, prelen, post);
282 	if (err)
283 		goto out_err;
284 out:
285 	*ret = this;
286 	return 0;
287 
288 put_and_out:
289 	dput(this);
290 	this = NULL;
291 	goto out;
292 
293 out_err:
294 	dput(this);
295 	return err;
296 }
297 
298 static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
299 			    struct dentry **ret, bool drop_negative)
300 {
301 	/* Counting down from the end, since the prefix can change */
302 	size_t rem = d->name.len - 1;
303 	struct dentry *dentry = NULL;
304 	int err;
305 
306 	if (d->name.name[0] != '/')
307 		return ovl_lookup_single(base, d, d->name.name, d->name.len,
308 					 0, "", ret, drop_negative);
309 
310 	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
311 		const char *s = d->name.name + d->name.len - rem;
312 		const char *next = strchrnul(s, '/');
313 		size_t thislen = next - s;
314 		bool end = !next[0];
315 
316 		/* Verify we did not go off the rails */
317 		if (WARN_ON(s[-1] != '/'))
318 			return -EIO;
319 
320 		err = ovl_lookup_single(base, d, s, thislen,
321 					d->name.len - rem, next, &base,
322 					drop_negative);
323 		dput(dentry);
324 		if (err)
325 			return err;
326 		dentry = base;
327 		if (end)
328 			break;
329 
330 		rem -= thislen + 1;
331 
332 		if (WARN_ON(rem >= d->name.len))
333 			return -EIO;
334 	}
335 	*ret = dentry;
336 	return 0;
337 }
338 
339 
340 int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
341 			struct dentry *upperdentry, struct ovl_path **stackp)
342 {
343 	struct dentry *origin = NULL;
344 	int i;
345 
346 	for (i = 1; i < ofs->numlayer; i++) {
347 		/*
348 		 * If lower fs uuid is not unique among lower fs we cannot match
349 		 * fh->uuid to layer.
350 		 */
351 		if (ofs->layers[i].fsid &&
352 		    ofs->layers[i].fs->bad_uuid)
353 			continue;
354 
355 		origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt,
356 					    connected);
357 		if (origin)
358 			break;
359 	}
360 
361 	if (!origin)
362 		return -ESTALE;
363 	else if (IS_ERR(origin))
364 		return PTR_ERR(origin);
365 
366 	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
367 	    ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
368 		goto invalid;
369 
370 	if (!*stackp)
371 		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
372 	if (!*stackp) {
373 		dput(origin);
374 		return -ENOMEM;
375 	}
376 	**stackp = (struct ovl_path){
377 		.dentry = origin,
378 		.layer = &ofs->layers[i]
379 	};
380 
381 	return 0;
382 
383 invalid:
384 	pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
385 			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
386 			    d_inode(origin)->i_mode & S_IFMT);
387 	dput(origin);
388 	return -EIO;
389 }
390 
391 static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
392 			    struct ovl_path **stackp)
393 {
394 	struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
395 	int err;
396 
397 	if (IS_ERR_OR_NULL(fh))
398 		return PTR_ERR(fh);
399 
400 	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
401 	kfree(fh);
402 
403 	if (err) {
404 		if (err == -ESTALE)
405 			return 0;
406 		return err;
407 	}
408 
409 	return 0;
410 }
411 
412 /*
413  * Verify that @fh matches the file handle stored in xattr @name.
414  * Return 0 on match, -ESTALE on mismatch, < 0 on error.
415  */
416 static int ovl_verify_fh(struct dentry *dentry, const char *name,
417 			 const struct ovl_fh *fh)
418 {
419 	struct ovl_fh *ofh = ovl_get_fh(dentry, name);
420 	int err = 0;
421 
422 	if (!ofh)
423 		return -ENODATA;
424 
425 	if (IS_ERR(ofh))
426 		return PTR_ERR(ofh);
427 
428 	if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
429 		err = -ESTALE;
430 
431 	kfree(ofh);
432 	return err;
433 }
434 
435 /*
436  * Verify that @real dentry matches the file handle stored in xattr @name.
437  *
438  * If @set is true and there is no stored file handle, encode @real and store
439  * file handle in xattr @name.
440  *
441  * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
442  */
443 int ovl_verify_set_fh(struct dentry *dentry, const char *name,
444 		      struct dentry *real, bool is_upper, bool set)
445 {
446 	struct inode *inode;
447 	struct ovl_fh *fh;
448 	int err;
449 
450 	fh = ovl_encode_real_fh(real, is_upper);
451 	err = PTR_ERR(fh);
452 	if (IS_ERR(fh)) {
453 		fh = NULL;
454 		goto fail;
455 	}
456 
457 	err = ovl_verify_fh(dentry, name, fh);
458 	if (set && err == -ENODATA)
459 		err = ovl_do_setxattr(dentry, name, fh->buf, fh->fb.len, 0);
460 	if (err)
461 		goto fail;
462 
463 out:
464 	kfree(fh);
465 	return err;
466 
467 fail:
468 	inode = d_inode(real);
469 	pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
470 			    is_upper ? "upper" : "origin", real,
471 			    inode ? inode->i_ino : 0, err);
472 	goto out;
473 }
474 
475 /* Get upper dentry from index */
476 struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
477 {
478 	struct ovl_fh *fh;
479 	struct dentry *upper;
480 
481 	if (!d_is_dir(index))
482 		return dget(index);
483 
484 	fh = ovl_get_fh(index, OVL_XATTR_UPPER);
485 	if (IS_ERR_OR_NULL(fh))
486 		return ERR_CAST(fh);
487 
488 	upper = ovl_decode_real_fh(fh, ovl_upper_mnt(ofs), true);
489 	kfree(fh);
490 
491 	if (IS_ERR_OR_NULL(upper))
492 		return upper ?: ERR_PTR(-ESTALE);
493 
494 	if (!d_is_dir(upper)) {
495 		pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
496 				    index, upper);
497 		dput(upper);
498 		return ERR_PTR(-EIO);
499 	}
500 
501 	return upper;
502 }
503 
504 /*
505  * Verify that an index entry name matches the origin file handle stored in
506  * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
507  * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
508  */
509 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
510 {
511 	struct ovl_fh *fh = NULL;
512 	size_t len;
513 	struct ovl_path origin = { };
514 	struct ovl_path *stack = &origin;
515 	struct dentry *upper = NULL;
516 	int err;
517 
518 	if (!d_inode(index))
519 		return 0;
520 
521 	err = -EINVAL;
522 	if (index->d_name.len < sizeof(struct ovl_fb)*2)
523 		goto fail;
524 
525 	err = -ENOMEM;
526 	len = index->d_name.len / 2;
527 	fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
528 	if (!fh)
529 		goto fail;
530 
531 	err = -EINVAL;
532 	if (hex2bin(fh->buf, index->d_name.name, len))
533 		goto fail;
534 
535 	err = ovl_check_fb_len(&fh->fb, len);
536 	if (err)
537 		goto fail;
538 
539 	/*
540 	 * Whiteout index entries are used as an indication that an exported
541 	 * overlay file handle should be treated as stale (i.e. after unlink
542 	 * of the overlay inode). These entries contain no origin xattr.
543 	 */
544 	if (ovl_is_whiteout(index))
545 		goto out;
546 
547 	/*
548 	 * Verifying directory index entries are not stale is expensive, so
549 	 * only verify stale dir index if NFS export is enabled.
550 	 */
551 	if (d_is_dir(index) && !ofs->config.nfs_export)
552 		goto out;
553 
554 	/*
555 	 * Directory index entries should have 'upper' xattr pointing to the
556 	 * real upper dir. Non-dir index entries are hardlinks to the upper
557 	 * real inode. For non-dir index, we can read the copy up origin xattr
558 	 * directly from the index dentry, but for dir index we first need to
559 	 * decode the upper directory.
560 	 */
561 	upper = ovl_index_upper(ofs, index);
562 	if (IS_ERR_OR_NULL(upper)) {
563 		err = PTR_ERR(upper);
564 		/*
565 		 * Directory index entries with no 'upper' xattr need to be
566 		 * removed. When dir index entry has a stale 'upper' xattr,
567 		 * we assume that upper dir was removed and we treat the dir
568 		 * index as orphan entry that needs to be whited out.
569 		 */
570 		if (err == -ESTALE)
571 			goto orphan;
572 		else if (!err)
573 			err = -ESTALE;
574 		goto fail;
575 	}
576 
577 	err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
578 	dput(upper);
579 	if (err)
580 		goto fail;
581 
582 	/* Check if non-dir index is orphan and don't warn before cleaning it */
583 	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
584 		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
585 		if (err)
586 			goto fail;
587 
588 		if (ovl_get_nlink(origin.dentry, index, 0) == 0)
589 			goto orphan;
590 	}
591 
592 out:
593 	dput(origin.dentry);
594 	kfree(fh);
595 	return err;
596 
597 fail:
598 	pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
599 			    index, d_inode(index)->i_mode & S_IFMT, err);
600 	goto out;
601 
602 orphan:
603 	pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
604 			    index, d_inode(index)->i_mode & S_IFMT,
605 			    d_inode(index)->i_nlink);
606 	err = -ENOENT;
607 	goto out;
608 }
609 
610 static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
611 {
612 	char *n, *s;
613 
614 	n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
615 	if (!n)
616 		return -ENOMEM;
617 
618 	s  = bin2hex(n, fh->buf, fh->fb.len);
619 	*name = (struct qstr) QSTR_INIT(n, s - n);
620 
621 	return 0;
622 
623 }
624 
625 /*
626  * Lookup in indexdir for the index entry of a lower real inode or a copy up
627  * origin inode. The index entry name is the hex representation of the lower
628  * inode file handle.
629  *
630  * If the index dentry in negative, then either no lower aliases have been
631  * copied up yet, or aliases have been copied up in older kernels and are
632  * not indexed.
633  *
634  * If the index dentry for a copy up origin inode is positive, but points
635  * to an inode different than the upper inode, then either the upper inode
636  * has been copied up and not indexed or it was indexed, but since then
637  * index dir was cleared. Either way, that index cannot be used to indentify
638  * the overlay inode.
639  */
640 int ovl_get_index_name(struct dentry *origin, struct qstr *name)
641 {
642 	struct ovl_fh *fh;
643 	int err;
644 
645 	fh = ovl_encode_real_fh(origin, false);
646 	if (IS_ERR(fh))
647 		return PTR_ERR(fh);
648 
649 	err = ovl_get_index_name_fh(fh, name);
650 
651 	kfree(fh);
652 	return err;
653 }
654 
655 /* Lookup index by file handle for NFS export */
656 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
657 {
658 	struct dentry *index;
659 	struct qstr name;
660 	int err;
661 
662 	err = ovl_get_index_name_fh(fh, &name);
663 	if (err)
664 		return ERR_PTR(err);
665 
666 	index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
667 	kfree(name.name);
668 	if (IS_ERR(index)) {
669 		if (PTR_ERR(index) == -ENOENT)
670 			index = NULL;
671 		return index;
672 	}
673 
674 	if (ovl_is_whiteout(index))
675 		err = -ESTALE;
676 	else if (ovl_dentry_weird(index))
677 		err = -EIO;
678 	else
679 		return index;
680 
681 	dput(index);
682 	return ERR_PTR(err);
683 }
684 
685 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
686 				struct dentry *origin, bool verify)
687 {
688 	struct dentry *index;
689 	struct inode *inode;
690 	struct qstr name;
691 	bool is_dir = d_is_dir(origin);
692 	int err;
693 
694 	err = ovl_get_index_name(origin, &name);
695 	if (err)
696 		return ERR_PTR(err);
697 
698 	index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
699 	if (IS_ERR(index)) {
700 		err = PTR_ERR(index);
701 		if (err == -ENOENT) {
702 			index = NULL;
703 			goto out;
704 		}
705 		pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
706 				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
707 				    d_inode(origin)->i_ino, name.len, name.name,
708 				    err);
709 		goto out;
710 	}
711 
712 	inode = d_inode(index);
713 	if (ovl_is_whiteout(index) && !verify) {
714 		/*
715 		 * When index lookup is called with !verify for decoding an
716 		 * overlay file handle, a whiteout index implies that decode
717 		 * should treat file handle as stale and no need to print a
718 		 * warning about it.
719 		 */
720 		dput(index);
721 		index = ERR_PTR(-ESTALE);
722 		goto out;
723 	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
724 		   ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
725 		/*
726 		 * Index should always be of the same file type as origin
727 		 * except for the case of a whiteout index. A whiteout
728 		 * index should only exist if all lower aliases have been
729 		 * unlinked, which means that finding a lower origin on lookup
730 		 * whose index is a whiteout should be treated as an error.
731 		 */
732 		pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
733 				    index, d_inode(index)->i_mode & S_IFMT,
734 				    d_inode(origin)->i_mode & S_IFMT);
735 		goto fail;
736 	} else if (is_dir && verify) {
737 		if (!upper) {
738 			pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
739 					    origin, index);
740 			goto fail;
741 		}
742 
743 		/* Verify that dir index 'upper' xattr points to upper dir */
744 		err = ovl_verify_upper(index, upper, false);
745 		if (err) {
746 			if (err == -ESTALE) {
747 				pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
748 						    upper, origin, index);
749 			}
750 			goto fail;
751 		}
752 	} else if (upper && d_inode(upper) != inode) {
753 		goto out_dput;
754 	}
755 out:
756 	kfree(name.name);
757 	return index;
758 
759 out_dput:
760 	dput(index);
761 	index = NULL;
762 	goto out;
763 
764 fail:
765 	dput(index);
766 	index = ERR_PTR(-EIO);
767 	goto out;
768 }
769 
770 /*
771  * Returns next layer in stack starting from top.
772  * Returns -1 if this is the last layer.
773  */
774 int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
775 {
776 	struct ovl_entry *oe = dentry->d_fsdata;
777 
778 	BUG_ON(idx < 0);
779 	if (idx == 0) {
780 		ovl_path_upper(dentry, path);
781 		if (path->dentry)
782 			return oe->numlower ? 1 : -1;
783 		idx++;
784 	}
785 	BUG_ON(idx > oe->numlower);
786 	path->dentry = oe->lowerstack[idx - 1].dentry;
787 	path->mnt = oe->lowerstack[idx - 1].layer->mnt;
788 
789 	return (idx < oe->numlower) ? idx + 1 : -1;
790 }
791 
792 /* Fix missing 'origin' xattr */
793 static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
794 			  struct dentry *upper)
795 {
796 	int err;
797 
798 	if (ovl_check_origin_xattr(upper))
799 		return 0;
800 
801 	err = ovl_want_write(dentry);
802 	if (err)
803 		return err;
804 
805 	err = ovl_set_origin(dentry, lower, upper);
806 	if (!err)
807 		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
808 
809 	ovl_drop_write(dentry);
810 	return err;
811 }
812 
813 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
814 			  unsigned int flags)
815 {
816 	struct ovl_entry *oe;
817 	const struct cred *old_cred;
818 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
819 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
820 	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
821 	struct ovl_path *stack = NULL, *origin_path = NULL;
822 	struct dentry *upperdir, *upperdentry = NULL;
823 	struct dentry *origin = NULL;
824 	struct dentry *index = NULL;
825 	unsigned int ctr = 0;
826 	struct inode *inode = NULL;
827 	bool upperopaque = false;
828 	char *upperredirect = NULL;
829 	struct dentry *this;
830 	unsigned int i;
831 	int err;
832 	bool uppermetacopy = false;
833 	struct ovl_lookup_data d = {
834 		.sb = dentry->d_sb,
835 		.name = dentry->d_name,
836 		.is_dir = false,
837 		.opaque = false,
838 		.stop = false,
839 		.last = ofs->config.redirect_follow ? false : !poe->numlower,
840 		.redirect = NULL,
841 		.metacopy = false,
842 	};
843 
844 	if (dentry->d_name.len > ofs->namelen)
845 		return ERR_PTR(-ENAMETOOLONG);
846 
847 	old_cred = ovl_override_creds(dentry->d_sb);
848 	upperdir = ovl_dentry_upper(dentry->d_parent);
849 	if (upperdir) {
850 		err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
851 		if (err)
852 			goto out;
853 
854 		if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
855 			dput(upperdentry);
856 			err = -EREMOTE;
857 			goto out;
858 		}
859 		if (upperdentry && !d.is_dir) {
860 			/*
861 			 * Lookup copy up origin by decoding origin file handle.
862 			 * We may get a disconnected dentry, which is fine,
863 			 * because we only need to hold the origin inode in
864 			 * cache and use its inode number.  We may even get a
865 			 * connected dentry, that is not under any of the lower
866 			 * layers root.  That is also fine for using it's inode
867 			 * number - it's the same as if we held a reference
868 			 * to a dentry in lower layer that was moved under us.
869 			 */
870 			err = ovl_check_origin(ofs, upperdentry, &origin_path);
871 			if (err)
872 				goto out_put_upper;
873 
874 			if (d.metacopy)
875 				uppermetacopy = true;
876 		}
877 
878 		if (d.redirect) {
879 			err = -ENOMEM;
880 			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
881 			if (!upperredirect)
882 				goto out_put_upper;
883 			if (d.redirect[0] == '/')
884 				poe = roe;
885 		}
886 		upperopaque = d.opaque;
887 	}
888 
889 	if (!d.stop && poe->numlower) {
890 		err = -ENOMEM;
891 		stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path),
892 				GFP_KERNEL);
893 		if (!stack)
894 			goto out_put_upper;
895 	}
896 
897 	for (i = 0; !d.stop && i < poe->numlower; i++) {
898 		struct ovl_path lower = poe->lowerstack[i];
899 
900 		if (!ofs->config.redirect_follow)
901 			d.last = i == poe->numlower - 1;
902 		else
903 			d.last = lower.layer->idx == roe->numlower;
904 
905 		err = ovl_lookup_layer(lower.dentry, &d, &this, false);
906 		if (err)
907 			goto out_put;
908 
909 		if (!this)
910 			continue;
911 
912 		if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
913 			err = -EPERM;
914 			pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
915 			goto out_put;
916 		}
917 
918 		/*
919 		 * If no origin fh is stored in upper of a merge dir, store fh
920 		 * of lower dir and set upper parent "impure".
921 		 */
922 		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
923 			err = ovl_fix_origin(dentry, this, upperdentry);
924 			if (err) {
925 				dput(this);
926 				goto out_put;
927 			}
928 		}
929 
930 		/*
931 		 * When "verify_lower" feature is enabled, do not merge with a
932 		 * lower dir that does not match a stored origin xattr. In any
933 		 * case, only verified origin is used for index lookup.
934 		 *
935 		 * For non-dir dentry, if index=on, then ensure origin
936 		 * matches the dentry found using path based lookup,
937 		 * otherwise error out.
938 		 */
939 		if (upperdentry && !ctr &&
940 		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
941 		     (!d.is_dir && ofs->config.index && origin_path))) {
942 			err = ovl_verify_origin(upperdentry, this, false);
943 			if (err) {
944 				dput(this);
945 				if (d.is_dir)
946 					break;
947 				goto out_put;
948 			}
949 			origin = this;
950 		}
951 
952 		if (d.metacopy && ctr) {
953 			/*
954 			 * Do not store intermediate metacopy dentries in
955 			 * lower chain, except top most lower metacopy dentry.
956 			 * Continue the loop so that if there is an absolute
957 			 * redirect on this dentry, poe can be reset to roe.
958 			 */
959 			dput(this);
960 			this = NULL;
961 		} else {
962 			stack[ctr].dentry = this;
963 			stack[ctr].layer = lower.layer;
964 			ctr++;
965 		}
966 
967 		/*
968 		 * Following redirects can have security consequences: it's like
969 		 * a symlink into the lower layer without the permission checks.
970 		 * This is only a problem if the upper layer is untrusted (e.g
971 		 * comes from an USB drive).  This can allow a non-readable file
972 		 * or directory to become readable.
973 		 *
974 		 * Only following redirects when redirects are enabled disables
975 		 * this attack vector when not necessary.
976 		 */
977 		err = -EPERM;
978 		if (d.redirect && !ofs->config.redirect_follow) {
979 			pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
980 					    dentry);
981 			goto out_put;
982 		}
983 
984 		if (d.stop)
985 			break;
986 
987 		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
988 			poe = roe;
989 			/* Find the current layer on the root dentry */
990 			i = lower.layer->idx - 1;
991 		}
992 	}
993 
994 	/*
995 	 * For regular non-metacopy upper dentries, there is no lower
996 	 * path based lookup, hence ctr will be zero. If a dentry is found
997 	 * using ORIGIN xattr on upper, install it in stack.
998 	 *
999 	 * For metacopy dentry, path based lookup will find lower dentries.
1000 	 * Just make sure a corresponding data dentry has been found.
1001 	 */
1002 	if (d.metacopy || (uppermetacopy && !ctr)) {
1003 		err = -EIO;
1004 		goto out_put;
1005 	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
1006 		if (WARN_ON(stack != NULL)) {
1007 			err = -EIO;
1008 			goto out_put;
1009 		}
1010 		stack = origin_path;
1011 		ctr = 1;
1012 		origin = origin_path->dentry;
1013 		origin_path = NULL;
1014 	}
1015 
1016 	/*
1017 	 * Always lookup index if there is no-upperdentry.
1018 	 *
1019 	 * For the case of upperdentry, we have set origin by now if it
1020 	 * needed to be set. There are basically three cases.
1021 	 *
1022 	 * For directories, lookup index by lower inode and verify it matches
1023 	 * upper inode. We only trust dir index if we verified that lower dir
1024 	 * matches origin, otherwise dir index entries may be inconsistent
1025 	 * and we ignore them.
1026 	 *
1027 	 * For regular upper, we already set origin if upper had ORIGIN
1028 	 * xattr. There is no verification though as there is no path
1029 	 * based dentry lookup in lower in this case.
1030 	 *
1031 	 * For metacopy upper, we set a verified origin already if index
1032 	 * is enabled and if upper had an ORIGIN xattr.
1033 	 *
1034 	 */
1035 	if (!upperdentry && ctr)
1036 		origin = stack[0].dentry;
1037 
1038 	if (origin && ovl_indexdir(dentry->d_sb) &&
1039 	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
1040 		index = ovl_lookup_index(ofs, upperdentry, origin, true);
1041 		if (IS_ERR(index)) {
1042 			err = PTR_ERR(index);
1043 			index = NULL;
1044 			goto out_put;
1045 		}
1046 	}
1047 
1048 	oe = ovl_alloc_entry(ctr);
1049 	err = -ENOMEM;
1050 	if (!oe)
1051 		goto out_put;
1052 
1053 	memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
1054 	dentry->d_fsdata = oe;
1055 
1056 	if (upperopaque)
1057 		ovl_dentry_set_opaque(dentry);
1058 
1059 	if (upperdentry)
1060 		ovl_dentry_set_upper_alias(dentry);
1061 	else if (index) {
1062 		upperdentry = dget(index);
1063 		upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
1064 		if (IS_ERR(upperredirect)) {
1065 			err = PTR_ERR(upperredirect);
1066 			upperredirect = NULL;
1067 			goto out_free_oe;
1068 		}
1069 		err = ovl_check_metacopy_xattr(upperdentry);
1070 		if (err < 0)
1071 			goto out_free_oe;
1072 		uppermetacopy = err;
1073 	}
1074 
1075 	if (upperdentry || ctr) {
1076 		struct ovl_inode_params oip = {
1077 			.upperdentry = upperdentry,
1078 			.lowerpath = stack,
1079 			.index = index,
1080 			.numlower = ctr,
1081 			.redirect = upperredirect,
1082 			.lowerdata = (ctr > 1 && !d.is_dir) ?
1083 				      stack[ctr - 1].dentry : NULL,
1084 		};
1085 
1086 		inode = ovl_get_inode(dentry->d_sb, &oip);
1087 		err = PTR_ERR(inode);
1088 		if (IS_ERR(inode))
1089 			goto out_free_oe;
1090 		if (upperdentry && !uppermetacopy)
1091 			ovl_set_flag(OVL_UPPERDATA, inode);
1092 	}
1093 
1094 	ovl_dentry_update_reval(dentry, upperdentry,
1095 			DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
1096 
1097 	revert_creds(old_cred);
1098 	if (origin_path) {
1099 		dput(origin_path->dentry);
1100 		kfree(origin_path);
1101 	}
1102 	dput(index);
1103 	kfree(stack);
1104 	kfree(d.redirect);
1105 	return d_splice_alias(inode, dentry);
1106 
1107 out_free_oe:
1108 	dentry->d_fsdata = NULL;
1109 	kfree(oe);
1110 out_put:
1111 	dput(index);
1112 	for (i = 0; i < ctr; i++)
1113 		dput(stack[i].dentry);
1114 	kfree(stack);
1115 out_put_upper:
1116 	if (origin_path) {
1117 		dput(origin_path->dentry);
1118 		kfree(origin_path);
1119 	}
1120 	dput(upperdentry);
1121 	kfree(upperredirect);
1122 out:
1123 	kfree(d.redirect);
1124 	revert_creds(old_cred);
1125 	return ERR_PTR(err);
1126 }
1127 
1128 bool ovl_lower_positive(struct dentry *dentry)
1129 {
1130 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
1131 	const struct qstr *name = &dentry->d_name;
1132 	const struct cred *old_cred;
1133 	unsigned int i;
1134 	bool positive = false;
1135 	bool done = false;
1136 
1137 	/*
1138 	 * If dentry is negative, then lower is positive iff this is a
1139 	 * whiteout.
1140 	 */
1141 	if (!dentry->d_inode)
1142 		return ovl_dentry_is_opaque(dentry);
1143 
1144 	/* Negative upper -> positive lower */
1145 	if (!ovl_dentry_upper(dentry))
1146 		return true;
1147 
1148 	old_cred = ovl_override_creds(dentry->d_sb);
1149 	/* Positive upper -> have to look up lower to see whether it exists */
1150 	for (i = 0; !done && !positive && i < poe->numlower; i++) {
1151 		struct dentry *this;
1152 		struct dentry *lowerdir = poe->lowerstack[i].dentry;
1153 
1154 		this = lookup_positive_unlocked(name->name, lowerdir,
1155 					       name->len);
1156 		if (IS_ERR(this)) {
1157 			switch (PTR_ERR(this)) {
1158 			case -ENOENT:
1159 			case -ENAMETOOLONG:
1160 				break;
1161 
1162 			default:
1163 				/*
1164 				 * Assume something is there, we just couldn't
1165 				 * access it.
1166 				 */
1167 				positive = true;
1168 				break;
1169 			}
1170 		} else {
1171 			positive = !ovl_is_whiteout(this);
1172 			done = true;
1173 			dput(this);
1174 		}
1175 	}
1176 	revert_creds(old_cred);
1177 
1178 	return positive;
1179 }
1180