xref: /openbmc/linux/fs/overlayfs/namei.c (revision 965f22bc)
1 /*
2  * Copyright (C) 2011 Novell Inc.
3  * Copyright (C) 2016 Red Hat, Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/cred.h>
12 #include <linux/ctype.h>
13 #include <linux/namei.h>
14 #include <linux/xattr.h>
15 #include <linux/ratelimit.h>
16 #include <linux/mount.h>
17 #include <linux/exportfs.h>
18 #include "overlayfs.h"
19 
20 struct ovl_lookup_data {
21 	struct qstr name;
22 	bool is_dir;
23 	bool opaque;
24 	bool stop;
25 	bool last;
26 	char *redirect;
27 	bool metacopy;
28 };
29 
30 static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
31 			      size_t prelen, const char *post)
32 {
33 	int res;
34 	char *buf;
35 
36 	buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
37 	if (IS_ERR_OR_NULL(buf))
38 		return PTR_ERR(buf);
39 
40 	if (buf[0] == '/') {
41 		/*
42 		 * One of the ancestor path elements in an absolute path
43 		 * lookup in ovl_lookup_layer() could have been opaque and
44 		 * that will stop further lookup in lower layers (d->stop=true)
45 		 * But we have found an absolute redirect in decendant path
46 		 * element and that should force continue lookup in lower
47 		 * layers (reset d->stop).
48 		 */
49 		d->stop = false;
50 	} else {
51 		res = strlen(buf) + 1;
52 		memmove(buf + prelen, buf, res);
53 		memcpy(buf, d->name.name, prelen);
54 	}
55 
56 	strcat(buf, post);
57 	kfree(d->redirect);
58 	d->redirect = buf;
59 	d->name.name = d->redirect;
60 	d->name.len = strlen(d->redirect);
61 
62 	return 0;
63 }
64 
65 static int ovl_acceptable(void *ctx, struct dentry *dentry)
66 {
67 	/*
68 	 * A non-dir origin may be disconnected, which is fine, because
69 	 * we only need it for its unique inode number.
70 	 */
71 	if (!d_is_dir(dentry))
72 		return 1;
73 
74 	/* Don't decode a deleted empty directory */
75 	if (d_unhashed(dentry))
76 		return 0;
77 
78 	/* Check if directory belongs to the layer we are decoding from */
79 	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
80 }
81 
82 /*
83  * Check validity of an overlay file handle buffer.
84  *
85  * Return 0 for a valid file handle.
86  * Return -ENODATA for "origin unknown".
87  * Return <0 for an invalid file handle.
88  */
89 int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
90 {
91 	if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
92 		return -EINVAL;
93 
94 	if (fh->magic != OVL_FH_MAGIC)
95 		return -EINVAL;
96 
97 	/* Treat larger version and unknown flags as "origin unknown" */
98 	if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
99 		return -ENODATA;
100 
101 	/* Treat endianness mismatch as "origin unknown" */
102 	if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
103 	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
104 		return -ENODATA;
105 
106 	return 0;
107 }
108 
109 static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
110 {
111 	int res, err;
112 	struct ovl_fh *fh = NULL;
113 
114 	res = vfs_getxattr(dentry, name, NULL, 0);
115 	if (res < 0) {
116 		if (res == -ENODATA || res == -EOPNOTSUPP)
117 			return NULL;
118 		goto fail;
119 	}
120 	/* Zero size value means "copied up but origin unknown" */
121 	if (res == 0)
122 		return NULL;
123 
124 	fh = kzalloc(res, GFP_KERNEL);
125 	if (!fh)
126 		return ERR_PTR(-ENOMEM);
127 
128 	res = vfs_getxattr(dentry, name, fh, res);
129 	if (res < 0)
130 		goto fail;
131 
132 	err = ovl_check_fh_len(fh, res);
133 	if (err < 0) {
134 		if (err == -ENODATA)
135 			goto out;
136 		goto invalid;
137 	}
138 
139 	return fh;
140 
141 out:
142 	kfree(fh);
143 	return NULL;
144 
145 fail:
146 	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
147 	goto out;
148 invalid:
149 	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
150 	goto out;
151 }
152 
153 struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
154 				  bool connected)
155 {
156 	struct dentry *real;
157 	int bytes;
158 
159 	/*
160 	 * Make sure that the stored uuid matches the uuid of the lower
161 	 * layer where file handle will be decoded.
162 	 */
163 	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
164 		return NULL;
165 
166 	bytes = (fh->len - offsetof(struct ovl_fh, fid));
167 	real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
168 				  bytes >> 2, (int)fh->type,
169 				  connected ? ovl_acceptable : NULL, mnt);
170 	if (IS_ERR(real)) {
171 		/*
172 		 * Treat stale file handle to lower file as "origin unknown".
173 		 * upper file handle could become stale when upper file is
174 		 * unlinked and this information is needed to handle stale
175 		 * index entries correctly.
176 		 */
177 		if (real == ERR_PTR(-ESTALE) &&
178 		    !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
179 			real = NULL;
180 		return real;
181 	}
182 
183 	if (ovl_dentry_weird(real)) {
184 		dput(real);
185 		return NULL;
186 	}
187 
188 	return real;
189 }
190 
191 static bool ovl_is_opaquedir(struct dentry *dentry)
192 {
193 	return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
194 }
195 
196 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
197 			     const char *name, unsigned int namelen,
198 			     size_t prelen, const char *post,
199 			     struct dentry **ret)
200 {
201 	struct dentry *this;
202 	int err;
203 	bool last_element = !post[0];
204 
205 	this = lookup_one_len_unlocked(name, base, namelen);
206 	if (IS_ERR(this)) {
207 		err = PTR_ERR(this);
208 		this = NULL;
209 		if (err == -ENOENT || err == -ENAMETOOLONG)
210 			goto out;
211 		goto out_err;
212 	}
213 	if (!this->d_inode)
214 		goto put_and_out;
215 
216 	if (ovl_dentry_weird(this)) {
217 		/* Don't support traversing automounts and other weirdness */
218 		err = -EREMOTE;
219 		goto out_err;
220 	}
221 	if (ovl_is_whiteout(this)) {
222 		d->stop = d->opaque = true;
223 		goto put_and_out;
224 	}
225 	/*
226 	 * This dentry should be a regular file if previous layer lookup
227 	 * found a metacopy dentry.
228 	 */
229 	if (last_element && d->metacopy && !d_is_reg(this)) {
230 		d->stop = true;
231 		goto put_and_out;
232 	}
233 	if (!d_can_lookup(this)) {
234 		if (d->is_dir || !last_element) {
235 			d->stop = true;
236 			goto put_and_out;
237 		}
238 		err = ovl_check_metacopy_xattr(this);
239 		if (err < 0)
240 			goto out_err;
241 
242 		d->metacopy = err;
243 		d->stop = !d->metacopy;
244 		if (!d->metacopy || d->last)
245 			goto out;
246 	} else {
247 		if (last_element)
248 			d->is_dir = true;
249 		if (d->last)
250 			goto out;
251 
252 		if (ovl_is_opaquedir(this)) {
253 			d->stop = true;
254 			if (last_element)
255 				d->opaque = true;
256 			goto out;
257 		}
258 	}
259 	err = ovl_check_redirect(this, d, prelen, post);
260 	if (err)
261 		goto out_err;
262 out:
263 	*ret = this;
264 	return 0;
265 
266 put_and_out:
267 	dput(this);
268 	this = NULL;
269 	goto out;
270 
271 out_err:
272 	dput(this);
273 	return err;
274 }
275 
276 static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
277 			    struct dentry **ret)
278 {
279 	/* Counting down from the end, since the prefix can change */
280 	size_t rem = d->name.len - 1;
281 	struct dentry *dentry = NULL;
282 	int err;
283 
284 	if (d->name.name[0] != '/')
285 		return ovl_lookup_single(base, d, d->name.name, d->name.len,
286 					 0, "", ret);
287 
288 	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
289 		const char *s = d->name.name + d->name.len - rem;
290 		const char *next = strchrnul(s, '/');
291 		size_t thislen = next - s;
292 		bool end = !next[0];
293 
294 		/* Verify we did not go off the rails */
295 		if (WARN_ON(s[-1] != '/'))
296 			return -EIO;
297 
298 		err = ovl_lookup_single(base, d, s, thislen,
299 					d->name.len - rem, next, &base);
300 		dput(dentry);
301 		if (err)
302 			return err;
303 		dentry = base;
304 		if (end)
305 			break;
306 
307 		rem -= thislen + 1;
308 
309 		if (WARN_ON(rem >= d->name.len))
310 			return -EIO;
311 	}
312 	*ret = dentry;
313 	return 0;
314 }
315 
316 
317 int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
318 			struct dentry *upperdentry, struct ovl_path **stackp)
319 {
320 	struct dentry *origin = NULL;
321 	int i;
322 
323 	for (i = 0; i < ofs->numlower; i++) {
324 		origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
325 					    connected);
326 		if (origin)
327 			break;
328 	}
329 
330 	if (!origin)
331 		return -ESTALE;
332 	else if (IS_ERR(origin))
333 		return PTR_ERR(origin);
334 
335 	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
336 	    ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
337 		goto invalid;
338 
339 	if (!*stackp)
340 		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
341 	if (!*stackp) {
342 		dput(origin);
343 		return -ENOMEM;
344 	}
345 	**stackp = (struct ovl_path){
346 		.dentry = origin,
347 		.layer = &ofs->lower_layers[i]
348 	};
349 
350 	return 0;
351 
352 invalid:
353 	pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
354 			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
355 			    d_inode(origin)->i_mode & S_IFMT);
356 	dput(origin);
357 	return -EIO;
358 }
359 
360 static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
361 			    struct ovl_path **stackp, unsigned int *ctrp)
362 {
363 	struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
364 	int err;
365 
366 	if (IS_ERR_OR_NULL(fh))
367 		return PTR_ERR(fh);
368 
369 	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
370 	kfree(fh);
371 
372 	if (err) {
373 		if (err == -ESTALE)
374 			return 0;
375 		return err;
376 	}
377 
378 	if (WARN_ON(*ctrp))
379 		return -EIO;
380 
381 	*ctrp = 1;
382 	return 0;
383 }
384 
385 /*
386  * Verify that @fh matches the file handle stored in xattr @name.
387  * Return 0 on match, -ESTALE on mismatch, < 0 on error.
388  */
389 static int ovl_verify_fh(struct dentry *dentry, const char *name,
390 			 const struct ovl_fh *fh)
391 {
392 	struct ovl_fh *ofh = ovl_get_fh(dentry, name);
393 	int err = 0;
394 
395 	if (!ofh)
396 		return -ENODATA;
397 
398 	if (IS_ERR(ofh))
399 		return PTR_ERR(ofh);
400 
401 	if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
402 		err = -ESTALE;
403 
404 	kfree(ofh);
405 	return err;
406 }
407 
408 /*
409  * Verify that @real dentry matches the file handle stored in xattr @name.
410  *
411  * If @set is true and there is no stored file handle, encode @real and store
412  * file handle in xattr @name.
413  *
414  * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
415  */
416 int ovl_verify_set_fh(struct dentry *dentry, const char *name,
417 		      struct dentry *real, bool is_upper, bool set)
418 {
419 	struct inode *inode;
420 	struct ovl_fh *fh;
421 	int err;
422 
423 	fh = ovl_encode_real_fh(real, is_upper);
424 	err = PTR_ERR(fh);
425 	if (IS_ERR(fh))
426 		goto fail;
427 
428 	err = ovl_verify_fh(dentry, name, fh);
429 	if (set && err == -ENODATA)
430 		err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
431 	if (err)
432 		goto fail;
433 
434 out:
435 	kfree(fh);
436 	return err;
437 
438 fail:
439 	inode = d_inode(real);
440 	pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
441 			    is_upper ? "upper" : "origin", real,
442 			    inode ? inode->i_ino : 0, err);
443 	goto out;
444 }
445 
446 /* Get upper dentry from index */
447 struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
448 {
449 	struct ovl_fh *fh;
450 	struct dentry *upper;
451 
452 	if (!d_is_dir(index))
453 		return dget(index);
454 
455 	fh = ovl_get_fh(index, OVL_XATTR_UPPER);
456 	if (IS_ERR_OR_NULL(fh))
457 		return ERR_CAST(fh);
458 
459 	upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
460 	kfree(fh);
461 
462 	if (IS_ERR_OR_NULL(upper))
463 		return upper ?: ERR_PTR(-ESTALE);
464 
465 	if (!d_is_dir(upper)) {
466 		pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
467 				    index, upper);
468 		dput(upper);
469 		return ERR_PTR(-EIO);
470 	}
471 
472 	return upper;
473 }
474 
475 /* Is this a leftover from create/whiteout of directory index entry? */
476 static bool ovl_is_temp_index(struct dentry *index)
477 {
478 	return index->d_name.name[0] == '#';
479 }
480 
481 /*
482  * Verify that an index entry name matches the origin file handle stored in
483  * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
484  * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
485  */
486 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
487 {
488 	struct ovl_fh *fh = NULL;
489 	size_t len;
490 	struct ovl_path origin = { };
491 	struct ovl_path *stack = &origin;
492 	struct dentry *upper = NULL;
493 	int err;
494 
495 	if (!d_inode(index))
496 		return 0;
497 
498 	/* Cleanup leftover from index create/cleanup attempt */
499 	err = -ESTALE;
500 	if (ovl_is_temp_index(index))
501 		goto fail;
502 
503 	err = -EINVAL;
504 	if (index->d_name.len < sizeof(struct ovl_fh)*2)
505 		goto fail;
506 
507 	err = -ENOMEM;
508 	len = index->d_name.len / 2;
509 	fh = kzalloc(len, GFP_KERNEL);
510 	if (!fh)
511 		goto fail;
512 
513 	err = -EINVAL;
514 	if (hex2bin((u8 *)fh, index->d_name.name, len))
515 		goto fail;
516 
517 	err = ovl_check_fh_len(fh, len);
518 	if (err)
519 		goto fail;
520 
521 	/*
522 	 * Whiteout index entries are used as an indication that an exported
523 	 * overlay file handle should be treated as stale (i.e. after unlink
524 	 * of the overlay inode). These entries contain no origin xattr.
525 	 */
526 	if (ovl_is_whiteout(index))
527 		goto out;
528 
529 	/*
530 	 * Verifying directory index entries are not stale is expensive, so
531 	 * only verify stale dir index if NFS export is enabled.
532 	 */
533 	if (d_is_dir(index) && !ofs->config.nfs_export)
534 		goto out;
535 
536 	/*
537 	 * Directory index entries should have 'upper' xattr pointing to the
538 	 * real upper dir. Non-dir index entries are hardlinks to the upper
539 	 * real inode. For non-dir index, we can read the copy up origin xattr
540 	 * directly from the index dentry, but for dir index we first need to
541 	 * decode the upper directory.
542 	 */
543 	upper = ovl_index_upper(ofs, index);
544 	if (IS_ERR_OR_NULL(upper)) {
545 		err = PTR_ERR(upper);
546 		/*
547 		 * Directory index entries with no 'upper' xattr need to be
548 		 * removed. When dir index entry has a stale 'upper' xattr,
549 		 * we assume that upper dir was removed and we treat the dir
550 		 * index as orphan entry that needs to be whited out.
551 		 */
552 		if (err == -ESTALE)
553 			goto orphan;
554 		else if (!err)
555 			err = -ESTALE;
556 		goto fail;
557 	}
558 
559 	err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
560 	dput(upper);
561 	if (err)
562 		goto fail;
563 
564 	/* Check if non-dir index is orphan and don't warn before cleaning it */
565 	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
566 		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
567 		if (err)
568 			goto fail;
569 
570 		if (ovl_get_nlink(origin.dentry, index, 0) == 0)
571 			goto orphan;
572 	}
573 
574 out:
575 	dput(origin.dentry);
576 	kfree(fh);
577 	return err;
578 
579 fail:
580 	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
581 			    index, d_inode(index)->i_mode & S_IFMT, err);
582 	goto out;
583 
584 orphan:
585 	pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
586 			    index, d_inode(index)->i_mode & S_IFMT,
587 			    d_inode(index)->i_nlink);
588 	err = -ENOENT;
589 	goto out;
590 }
591 
592 static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
593 {
594 	char *n, *s;
595 
596 	n = kcalloc(fh->len, 2, GFP_KERNEL);
597 	if (!n)
598 		return -ENOMEM;
599 
600 	s  = bin2hex(n, fh, fh->len);
601 	*name = (struct qstr) QSTR_INIT(n, s - n);
602 
603 	return 0;
604 
605 }
606 
607 /*
608  * Lookup in indexdir for the index entry of a lower real inode or a copy up
609  * origin inode. The index entry name is the hex representation of the lower
610  * inode file handle.
611  *
612  * If the index dentry in negative, then either no lower aliases have been
613  * copied up yet, or aliases have been copied up in older kernels and are
614  * not indexed.
615  *
616  * If the index dentry for a copy up origin inode is positive, but points
617  * to an inode different than the upper inode, then either the upper inode
618  * has been copied up and not indexed or it was indexed, but since then
619  * index dir was cleared. Either way, that index cannot be used to indentify
620  * the overlay inode.
621  */
622 int ovl_get_index_name(struct dentry *origin, struct qstr *name)
623 {
624 	struct ovl_fh *fh;
625 	int err;
626 
627 	fh = ovl_encode_real_fh(origin, false);
628 	if (IS_ERR(fh))
629 		return PTR_ERR(fh);
630 
631 	err = ovl_get_index_name_fh(fh, name);
632 
633 	kfree(fh);
634 	return err;
635 }
636 
637 /* Lookup index by file handle for NFS export */
638 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
639 {
640 	struct dentry *index;
641 	struct qstr name;
642 	int err;
643 
644 	err = ovl_get_index_name_fh(fh, &name);
645 	if (err)
646 		return ERR_PTR(err);
647 
648 	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
649 	kfree(name.name);
650 	if (IS_ERR(index)) {
651 		if (PTR_ERR(index) == -ENOENT)
652 			index = NULL;
653 		return index;
654 	}
655 
656 	if (d_is_negative(index))
657 		err = 0;
658 	else if (ovl_is_whiteout(index))
659 		err = -ESTALE;
660 	else if (ovl_dentry_weird(index))
661 		err = -EIO;
662 	else
663 		return index;
664 
665 	dput(index);
666 	return ERR_PTR(err);
667 }
668 
669 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
670 				struct dentry *origin, bool verify)
671 {
672 	struct dentry *index;
673 	struct inode *inode;
674 	struct qstr name;
675 	bool is_dir = d_is_dir(origin);
676 	int err;
677 
678 	err = ovl_get_index_name(origin, &name);
679 	if (err)
680 		return ERR_PTR(err);
681 
682 	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
683 	if (IS_ERR(index)) {
684 		err = PTR_ERR(index);
685 		if (err == -ENOENT) {
686 			index = NULL;
687 			goto out;
688 		}
689 		pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
690 				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
691 				    d_inode(origin)->i_ino, name.len, name.name,
692 				    err);
693 		goto out;
694 	}
695 
696 	inode = d_inode(index);
697 	if (d_is_negative(index)) {
698 		goto out_dput;
699 	} else if (ovl_is_whiteout(index) && !verify) {
700 		/*
701 		 * When index lookup is called with !verify for decoding an
702 		 * overlay file handle, a whiteout index implies that decode
703 		 * should treat file handle as stale and no need to print a
704 		 * warning about it.
705 		 */
706 		dput(index);
707 		index = ERR_PTR(-ESTALE);
708 		goto out;
709 	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
710 		   ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
711 		/*
712 		 * Index should always be of the same file type as origin
713 		 * except for the case of a whiteout index. A whiteout
714 		 * index should only exist if all lower aliases have been
715 		 * unlinked, which means that finding a lower origin on lookup
716 		 * whose index is a whiteout should be treated as an error.
717 		 */
718 		pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
719 				    index, d_inode(index)->i_mode & S_IFMT,
720 				    d_inode(origin)->i_mode & S_IFMT);
721 		goto fail;
722 	} else if (is_dir && verify) {
723 		if (!upper) {
724 			pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
725 					    origin, index);
726 			goto fail;
727 		}
728 
729 		/* Verify that dir index 'upper' xattr points to upper dir */
730 		err = ovl_verify_upper(index, upper, false);
731 		if (err) {
732 			if (err == -ESTALE) {
733 				pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
734 						    upper, origin, index);
735 			}
736 			goto fail;
737 		}
738 	} else if (upper && d_inode(upper) != inode) {
739 		goto out_dput;
740 	}
741 out:
742 	kfree(name.name);
743 	return index;
744 
745 out_dput:
746 	dput(index);
747 	index = NULL;
748 	goto out;
749 
750 fail:
751 	dput(index);
752 	index = ERR_PTR(-EIO);
753 	goto out;
754 }
755 
756 /*
757  * Returns next layer in stack starting from top.
758  * Returns -1 if this is the last layer.
759  */
760 int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
761 {
762 	struct ovl_entry *oe = dentry->d_fsdata;
763 
764 	BUG_ON(idx < 0);
765 	if (idx == 0) {
766 		ovl_path_upper(dentry, path);
767 		if (path->dentry)
768 			return oe->numlower ? 1 : -1;
769 		idx++;
770 	}
771 	BUG_ON(idx > oe->numlower);
772 	path->dentry = oe->lowerstack[idx - 1].dentry;
773 	path->mnt = oe->lowerstack[idx - 1].layer->mnt;
774 
775 	return (idx < oe->numlower) ? idx + 1 : -1;
776 }
777 
778 /* Fix missing 'origin' xattr */
779 static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
780 			  struct dentry *upper)
781 {
782 	int err;
783 
784 	if (ovl_check_origin_xattr(upper))
785 		return 0;
786 
787 	err = ovl_want_write(dentry);
788 	if (err)
789 		return err;
790 
791 	err = ovl_set_origin(dentry, lower, upper);
792 	if (!err)
793 		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
794 
795 	ovl_drop_write(dentry);
796 	return err;
797 }
798 
799 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
800 			  unsigned int flags)
801 {
802 	struct ovl_entry *oe;
803 	const struct cred *old_cred;
804 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
805 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
806 	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
807 	struct ovl_path *stack = NULL, *origin_path = NULL;
808 	struct dentry *upperdir, *upperdentry = NULL;
809 	struct dentry *origin = NULL;
810 	struct dentry *index = NULL;
811 	unsigned int ctr = 0;
812 	struct inode *inode = NULL;
813 	bool upperopaque = false;
814 	char *upperredirect = NULL;
815 	struct dentry *this;
816 	unsigned int i;
817 	int err;
818 	bool metacopy = false;
819 	struct ovl_lookup_data d = {
820 		.name = dentry->d_name,
821 		.is_dir = false,
822 		.opaque = false,
823 		.stop = false,
824 		.last = ofs->config.redirect_follow ? false : !poe->numlower,
825 		.redirect = NULL,
826 		.metacopy = false,
827 	};
828 
829 	if (dentry->d_name.len > ofs->namelen)
830 		return ERR_PTR(-ENAMETOOLONG);
831 
832 	old_cred = ovl_override_creds(dentry->d_sb);
833 	upperdir = ovl_dentry_upper(dentry->d_parent);
834 	if (upperdir) {
835 		err = ovl_lookup_layer(upperdir, &d, &upperdentry);
836 		if (err)
837 			goto out;
838 
839 		if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) {
840 			dput(upperdentry);
841 			err = -EREMOTE;
842 			goto out;
843 		}
844 		if (upperdentry && !d.is_dir) {
845 			unsigned int origin_ctr = 0;
846 
847 			/*
848 			 * Lookup copy up origin by decoding origin file handle.
849 			 * We may get a disconnected dentry, which is fine,
850 			 * because we only need to hold the origin inode in
851 			 * cache and use its inode number.  We may even get a
852 			 * connected dentry, that is not under any of the lower
853 			 * layers root.  That is also fine for using it's inode
854 			 * number - it's the same as if we held a reference
855 			 * to a dentry in lower layer that was moved under us.
856 			 */
857 			err = ovl_check_origin(ofs, upperdentry, &origin_path,
858 					       &origin_ctr);
859 			if (err)
860 				goto out_put_upper;
861 
862 			if (d.metacopy)
863 				metacopy = true;
864 		}
865 
866 		if (d.redirect) {
867 			err = -ENOMEM;
868 			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
869 			if (!upperredirect)
870 				goto out_put_upper;
871 			if (d.redirect[0] == '/')
872 				poe = roe;
873 		}
874 		upperopaque = d.opaque;
875 	}
876 
877 	if (!d.stop && poe->numlower) {
878 		err = -ENOMEM;
879 		stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
880 				GFP_KERNEL);
881 		if (!stack)
882 			goto out_put_upper;
883 	}
884 
885 	for (i = 0; !d.stop && i < poe->numlower; i++) {
886 		struct ovl_path lower = poe->lowerstack[i];
887 
888 		if (!ofs->config.redirect_follow)
889 			d.last = i == poe->numlower - 1;
890 		else
891 			d.last = lower.layer->idx == roe->numlower;
892 
893 		err = ovl_lookup_layer(lower.dentry, &d, &this);
894 		if (err)
895 			goto out_put;
896 
897 		if (!this)
898 			continue;
899 
900 		/*
901 		 * If no origin fh is stored in upper of a merge dir, store fh
902 		 * of lower dir and set upper parent "impure".
903 		 */
904 		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
905 			err = ovl_fix_origin(dentry, this, upperdentry);
906 			if (err) {
907 				dput(this);
908 				goto out_put;
909 			}
910 		}
911 
912 		/*
913 		 * When "verify_lower" feature is enabled, do not merge with a
914 		 * lower dir that does not match a stored origin xattr. In any
915 		 * case, only verified origin is used for index lookup.
916 		 *
917 		 * For non-dir dentry, if index=on, then ensure origin
918 		 * matches the dentry found using path based lookup,
919 		 * otherwise error out.
920 		 */
921 		if (upperdentry && !ctr &&
922 		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
923 		     (!d.is_dir && ofs->config.index && origin_path))) {
924 			err = ovl_verify_origin(upperdentry, this, false);
925 			if (err) {
926 				dput(this);
927 				if (d.is_dir)
928 					break;
929 				goto out_put;
930 			}
931 			origin = this;
932 		}
933 
934 		if (d.metacopy)
935 			metacopy = true;
936 		/*
937 		 * Do not store intermediate metacopy dentries in chain,
938 		 * except top most lower metacopy dentry
939 		 */
940 		if (d.metacopy && ctr) {
941 			dput(this);
942 			continue;
943 		}
944 
945 		stack[ctr].dentry = this;
946 		stack[ctr].layer = lower.layer;
947 		ctr++;
948 
949 		/*
950 		 * Following redirects can have security consequences: it's like
951 		 * a symlink into the lower layer without the permission checks.
952 		 * This is only a problem if the upper layer is untrusted (e.g
953 		 * comes from an USB drive).  This can allow a non-readable file
954 		 * or directory to become readable.
955 		 *
956 		 * Only following redirects when redirects are enabled disables
957 		 * this attack vector when not necessary.
958 		 */
959 		err = -EPERM;
960 		if (d.redirect && !ofs->config.redirect_follow) {
961 			pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
962 					    dentry);
963 			goto out_put;
964 		}
965 
966 		if (d.stop)
967 			break;
968 
969 		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
970 			poe = roe;
971 			/* Find the current layer on the root dentry */
972 			i = lower.layer->idx - 1;
973 		}
974 	}
975 
976 	if (metacopy) {
977 		/*
978 		 * Found a metacopy dentry but did not find corresponding
979 		 * data dentry
980 		 */
981 		if (d.metacopy) {
982 			err = -EIO;
983 			goto out_put;
984 		}
985 
986 		err = -EPERM;
987 		if (!ofs->config.metacopy) {
988 			pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
989 					    dentry);
990 			goto out_put;
991 		}
992 	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
993 		if (WARN_ON(stack != NULL)) {
994 			err = -EIO;
995 			goto out_put;
996 		}
997 		stack = origin_path;
998 		ctr = 1;
999 		origin_path = NULL;
1000 	}
1001 
1002 	/*
1003 	 * Lookup index by lower inode and verify it matches upper inode.
1004 	 * We only trust dir index if we verified that lower dir matches
1005 	 * origin, otherwise dir index entries may be inconsistent and we
1006 	 * ignore them.
1007 	 *
1008 	 * For non-dir upper metacopy dentry, we already set "origin" if we
1009 	 * verified that lower matched upper origin. If upper origin was
1010 	 * not present (because lower layer did not support fh encode/decode),
1011 	 * or indexing is not enabled, do not set "origin" and skip looking up
1012 	 * index. This case should be handled in same way as a non-dir upper
1013 	 * without ORIGIN is handled.
1014 	 *
1015 	 * Always lookup index of non-dir non-metacopy and non-upper.
1016 	 */
1017 	if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
1018 		origin = stack[0].dentry;
1019 
1020 	if (origin && ovl_indexdir(dentry->d_sb) &&
1021 	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
1022 		index = ovl_lookup_index(ofs, upperdentry, origin, true);
1023 		if (IS_ERR(index)) {
1024 			err = PTR_ERR(index);
1025 			index = NULL;
1026 			goto out_put;
1027 		}
1028 	}
1029 
1030 	oe = ovl_alloc_entry(ctr);
1031 	err = -ENOMEM;
1032 	if (!oe)
1033 		goto out_put;
1034 
1035 	memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
1036 	dentry->d_fsdata = oe;
1037 
1038 	if (upperopaque)
1039 		ovl_dentry_set_opaque(dentry);
1040 
1041 	if (upperdentry)
1042 		ovl_dentry_set_upper_alias(dentry);
1043 	else if (index) {
1044 		upperdentry = dget(index);
1045 		upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
1046 		if (IS_ERR(upperredirect)) {
1047 			err = PTR_ERR(upperredirect);
1048 			upperredirect = NULL;
1049 			goto out_free_oe;
1050 		}
1051 	}
1052 
1053 	if (upperdentry || ctr) {
1054 		struct ovl_inode_params oip = {
1055 			.upperdentry = upperdentry,
1056 			.lowerpath = stack,
1057 			.index = index,
1058 			.numlower = ctr,
1059 			.redirect = upperredirect,
1060 			.lowerdata = (ctr > 1 && !d.is_dir) ?
1061 				      stack[ctr - 1].dentry : NULL,
1062 		};
1063 
1064 		inode = ovl_get_inode(dentry->d_sb, &oip);
1065 		err = PTR_ERR(inode);
1066 		if (IS_ERR(inode))
1067 			goto out_free_oe;
1068 	}
1069 
1070 	revert_creds(old_cred);
1071 	if (origin_path) {
1072 		dput(origin_path->dentry);
1073 		kfree(origin_path);
1074 	}
1075 	dput(index);
1076 	kfree(stack);
1077 	kfree(d.redirect);
1078 	return d_splice_alias(inode, dentry);
1079 
1080 out_free_oe:
1081 	dentry->d_fsdata = NULL;
1082 	kfree(oe);
1083 out_put:
1084 	dput(index);
1085 	for (i = 0; i < ctr; i++)
1086 		dput(stack[i].dentry);
1087 	kfree(stack);
1088 out_put_upper:
1089 	if (origin_path) {
1090 		dput(origin_path->dentry);
1091 		kfree(origin_path);
1092 	}
1093 	dput(upperdentry);
1094 	kfree(upperredirect);
1095 out:
1096 	kfree(d.redirect);
1097 	revert_creds(old_cred);
1098 	return ERR_PTR(err);
1099 }
1100 
1101 bool ovl_lower_positive(struct dentry *dentry)
1102 {
1103 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
1104 	const struct qstr *name = &dentry->d_name;
1105 	const struct cred *old_cred;
1106 	unsigned int i;
1107 	bool positive = false;
1108 	bool done = false;
1109 
1110 	/*
1111 	 * If dentry is negative, then lower is positive iff this is a
1112 	 * whiteout.
1113 	 */
1114 	if (!dentry->d_inode)
1115 		return ovl_dentry_is_opaque(dentry);
1116 
1117 	/* Negative upper -> positive lower */
1118 	if (!ovl_dentry_upper(dentry))
1119 		return true;
1120 
1121 	old_cred = ovl_override_creds(dentry->d_sb);
1122 	/* Positive upper -> have to look up lower to see whether it exists */
1123 	for (i = 0; !done && !positive && i < poe->numlower; i++) {
1124 		struct dentry *this;
1125 		struct dentry *lowerdir = poe->lowerstack[i].dentry;
1126 
1127 		this = lookup_one_len_unlocked(name->name, lowerdir,
1128 					       name->len);
1129 		if (IS_ERR(this)) {
1130 			switch (PTR_ERR(this)) {
1131 			case -ENOENT:
1132 			case -ENAMETOOLONG:
1133 				break;
1134 
1135 			default:
1136 				/*
1137 				 * Assume something is there, we just couldn't
1138 				 * access it.
1139 				 */
1140 				positive = true;
1141 				break;
1142 			}
1143 		} else {
1144 			if (this->d_inode) {
1145 				positive = !ovl_is_whiteout(this);
1146 				done = true;
1147 			}
1148 			dput(this);
1149 		}
1150 	}
1151 	revert_creds(old_cred);
1152 
1153 	return positive;
1154 }
1155