xref: /openbmc/linux/fs/overlayfs/namei.c (revision 09c434b8)
1 /*
2  * Copyright (C) 2011 Novell Inc.
3  * Copyright (C) 2016 Red Hat, Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/cred.h>
12 #include <linux/ctype.h>
13 #include <linux/namei.h>
14 #include <linux/xattr.h>
15 #include <linux/ratelimit.h>
16 #include <linux/mount.h>
17 #include <linux/exportfs.h>
18 #include "overlayfs.h"
19 
20 struct ovl_lookup_data {
21 	struct qstr name;
22 	bool is_dir;
23 	bool opaque;
24 	bool stop;
25 	bool last;
26 	char *redirect;
27 	bool metacopy;
28 };
29 
30 static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
31 			      size_t prelen, const char *post)
32 {
33 	int res;
34 	char *buf;
35 
36 	buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
37 	if (IS_ERR_OR_NULL(buf))
38 		return PTR_ERR(buf);
39 
40 	if (buf[0] == '/') {
41 		/*
42 		 * One of the ancestor path elements in an absolute path
43 		 * lookup in ovl_lookup_layer() could have been opaque and
44 		 * that will stop further lookup in lower layers (d->stop=true)
45 		 * But we have found an absolute redirect in decendant path
46 		 * element and that should force continue lookup in lower
47 		 * layers (reset d->stop).
48 		 */
49 		d->stop = false;
50 	} else {
51 		res = strlen(buf) + 1;
52 		memmove(buf + prelen, buf, res);
53 		memcpy(buf, d->name.name, prelen);
54 	}
55 
56 	strcat(buf, post);
57 	kfree(d->redirect);
58 	d->redirect = buf;
59 	d->name.name = d->redirect;
60 	d->name.len = strlen(d->redirect);
61 
62 	return 0;
63 }
64 
65 static int ovl_acceptable(void *ctx, struct dentry *dentry)
66 {
67 	/*
68 	 * A non-dir origin may be disconnected, which is fine, because
69 	 * we only need it for its unique inode number.
70 	 */
71 	if (!d_is_dir(dentry))
72 		return 1;
73 
74 	/* Don't decode a deleted empty directory */
75 	if (d_unhashed(dentry))
76 		return 0;
77 
78 	/* Check if directory belongs to the layer we are decoding from */
79 	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
80 }
81 
82 /*
83  * Check validity of an overlay file handle buffer.
84  *
85  * Return 0 for a valid file handle.
86  * Return -ENODATA for "origin unknown".
87  * Return <0 for an invalid file handle.
88  */
89 int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
90 {
91 	if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
92 		return -EINVAL;
93 
94 	if (fh->magic != OVL_FH_MAGIC)
95 		return -EINVAL;
96 
97 	/* Treat larger version and unknown flags as "origin unknown" */
98 	if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
99 		return -ENODATA;
100 
101 	/* Treat endianness mismatch as "origin unknown" */
102 	if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
103 	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
104 		return -ENODATA;
105 
106 	return 0;
107 }
108 
109 static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
110 {
111 	int res, err;
112 	struct ovl_fh *fh = NULL;
113 
114 	res = vfs_getxattr(dentry, name, NULL, 0);
115 	if (res < 0) {
116 		if (res == -ENODATA || res == -EOPNOTSUPP)
117 			return NULL;
118 		goto fail;
119 	}
120 	/* Zero size value means "copied up but origin unknown" */
121 	if (res == 0)
122 		return NULL;
123 
124 	fh = kzalloc(res, GFP_KERNEL);
125 	if (!fh)
126 		return ERR_PTR(-ENOMEM);
127 
128 	res = vfs_getxattr(dentry, name, fh, res);
129 	if (res < 0)
130 		goto fail;
131 
132 	err = ovl_check_fh_len(fh, res);
133 	if (err < 0) {
134 		if (err == -ENODATA)
135 			goto out;
136 		goto invalid;
137 	}
138 
139 	return fh;
140 
141 out:
142 	kfree(fh);
143 	return NULL;
144 
145 fail:
146 	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
147 	goto out;
148 invalid:
149 	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
150 	goto out;
151 }
152 
153 struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
154 				  bool connected)
155 {
156 	struct dentry *real;
157 	int bytes;
158 
159 	/*
160 	 * Make sure that the stored uuid matches the uuid of the lower
161 	 * layer where file handle will be decoded.
162 	 */
163 	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
164 		return NULL;
165 
166 	bytes = (fh->len - offsetof(struct ovl_fh, fid));
167 	real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
168 				  bytes >> 2, (int)fh->type,
169 				  connected ? ovl_acceptable : NULL, mnt);
170 	if (IS_ERR(real)) {
171 		/*
172 		 * Treat stale file handle to lower file as "origin unknown".
173 		 * upper file handle could become stale when upper file is
174 		 * unlinked and this information is needed to handle stale
175 		 * index entries correctly.
176 		 */
177 		if (real == ERR_PTR(-ESTALE) &&
178 		    !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
179 			real = NULL;
180 		return real;
181 	}
182 
183 	if (ovl_dentry_weird(real)) {
184 		dput(real);
185 		return NULL;
186 	}
187 
188 	return real;
189 }
190 
191 static bool ovl_is_opaquedir(struct dentry *dentry)
192 {
193 	return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
194 }
195 
196 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
197 			     const char *name, unsigned int namelen,
198 			     size_t prelen, const char *post,
199 			     struct dentry **ret)
200 {
201 	struct dentry *this;
202 	int err;
203 	bool last_element = !post[0];
204 
205 	this = lookup_one_len_unlocked(name, base, namelen);
206 	if (IS_ERR(this)) {
207 		err = PTR_ERR(this);
208 		this = NULL;
209 		if (err == -ENOENT || err == -ENAMETOOLONG)
210 			goto out;
211 		goto out_err;
212 	}
213 	if (!this->d_inode)
214 		goto put_and_out;
215 
216 	if (ovl_dentry_weird(this)) {
217 		/* Don't support traversing automounts and other weirdness */
218 		err = -EREMOTE;
219 		goto out_err;
220 	}
221 	if (ovl_is_whiteout(this)) {
222 		d->stop = d->opaque = true;
223 		goto put_and_out;
224 	}
225 	/*
226 	 * This dentry should be a regular file if previous layer lookup
227 	 * found a metacopy dentry.
228 	 */
229 	if (last_element && d->metacopy && !d_is_reg(this)) {
230 		d->stop = true;
231 		goto put_and_out;
232 	}
233 	if (!d_can_lookup(this)) {
234 		if (d->is_dir || !last_element) {
235 			d->stop = true;
236 			goto put_and_out;
237 		}
238 		err = ovl_check_metacopy_xattr(this);
239 		if (err < 0)
240 			goto out_err;
241 
242 		d->metacopy = err;
243 		d->stop = !d->metacopy;
244 		if (!d->metacopy || d->last)
245 			goto out;
246 	} else {
247 		if (last_element)
248 			d->is_dir = true;
249 		if (d->last)
250 			goto out;
251 
252 		if (ovl_is_opaquedir(this)) {
253 			d->stop = true;
254 			if (last_element)
255 				d->opaque = true;
256 			goto out;
257 		}
258 	}
259 	err = ovl_check_redirect(this, d, prelen, post);
260 	if (err)
261 		goto out_err;
262 out:
263 	*ret = this;
264 	return 0;
265 
266 put_and_out:
267 	dput(this);
268 	this = NULL;
269 	goto out;
270 
271 out_err:
272 	dput(this);
273 	return err;
274 }
275 
276 static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
277 			    struct dentry **ret)
278 {
279 	/* Counting down from the end, since the prefix can change */
280 	size_t rem = d->name.len - 1;
281 	struct dentry *dentry = NULL;
282 	int err;
283 
284 	if (d->name.name[0] != '/')
285 		return ovl_lookup_single(base, d, d->name.name, d->name.len,
286 					 0, "", ret);
287 
288 	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
289 		const char *s = d->name.name + d->name.len - rem;
290 		const char *next = strchrnul(s, '/');
291 		size_t thislen = next - s;
292 		bool end = !next[0];
293 
294 		/* Verify we did not go off the rails */
295 		if (WARN_ON(s[-1] != '/'))
296 			return -EIO;
297 
298 		err = ovl_lookup_single(base, d, s, thislen,
299 					d->name.len - rem, next, &base);
300 		dput(dentry);
301 		if (err)
302 			return err;
303 		dentry = base;
304 		if (end)
305 			break;
306 
307 		rem -= thislen + 1;
308 
309 		if (WARN_ON(rem >= d->name.len))
310 			return -EIO;
311 	}
312 	*ret = dentry;
313 	return 0;
314 }
315 
316 
317 int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
318 			struct dentry *upperdentry, struct ovl_path **stackp)
319 {
320 	struct dentry *origin = NULL;
321 	int i;
322 
323 	for (i = 0; i < ofs->numlower; i++) {
324 		origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
325 					    connected);
326 		if (origin)
327 			break;
328 	}
329 
330 	if (!origin)
331 		return -ESTALE;
332 	else if (IS_ERR(origin))
333 		return PTR_ERR(origin);
334 
335 	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
336 	    ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
337 		goto invalid;
338 
339 	if (!*stackp)
340 		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
341 	if (!*stackp) {
342 		dput(origin);
343 		return -ENOMEM;
344 	}
345 	**stackp = (struct ovl_path){
346 		.dentry = origin,
347 		.layer = &ofs->lower_layers[i]
348 	};
349 
350 	return 0;
351 
352 invalid:
353 	pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
354 			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
355 			    d_inode(origin)->i_mode & S_IFMT);
356 	dput(origin);
357 	return -EIO;
358 }
359 
360 static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
361 			    struct ovl_path **stackp, unsigned int *ctrp)
362 {
363 	struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
364 	int err;
365 
366 	if (IS_ERR_OR_NULL(fh))
367 		return PTR_ERR(fh);
368 
369 	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
370 	kfree(fh);
371 
372 	if (err) {
373 		if (err == -ESTALE)
374 			return 0;
375 		return err;
376 	}
377 
378 	if (WARN_ON(*ctrp))
379 		return -EIO;
380 
381 	*ctrp = 1;
382 	return 0;
383 }
384 
385 /*
386  * Verify that @fh matches the file handle stored in xattr @name.
387  * Return 0 on match, -ESTALE on mismatch, < 0 on error.
388  */
389 static int ovl_verify_fh(struct dentry *dentry, const char *name,
390 			 const struct ovl_fh *fh)
391 {
392 	struct ovl_fh *ofh = ovl_get_fh(dentry, name);
393 	int err = 0;
394 
395 	if (!ofh)
396 		return -ENODATA;
397 
398 	if (IS_ERR(ofh))
399 		return PTR_ERR(ofh);
400 
401 	if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
402 		err = -ESTALE;
403 
404 	kfree(ofh);
405 	return err;
406 }
407 
408 /*
409  * Verify that @real dentry matches the file handle stored in xattr @name.
410  *
411  * If @set is true and there is no stored file handle, encode @real and store
412  * file handle in xattr @name.
413  *
414  * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
415  */
416 int ovl_verify_set_fh(struct dentry *dentry, const char *name,
417 		      struct dentry *real, bool is_upper, bool set)
418 {
419 	struct inode *inode;
420 	struct ovl_fh *fh;
421 	int err;
422 
423 	fh = ovl_encode_real_fh(real, is_upper);
424 	err = PTR_ERR(fh);
425 	if (IS_ERR(fh)) {
426 		fh = NULL;
427 		goto fail;
428 	}
429 
430 	err = ovl_verify_fh(dentry, name, fh);
431 	if (set && err == -ENODATA)
432 		err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
433 	if (err)
434 		goto fail;
435 
436 out:
437 	kfree(fh);
438 	return err;
439 
440 fail:
441 	inode = d_inode(real);
442 	pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
443 			    is_upper ? "upper" : "origin", real,
444 			    inode ? inode->i_ino : 0, err);
445 	goto out;
446 }
447 
448 /* Get upper dentry from index */
449 struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
450 {
451 	struct ovl_fh *fh;
452 	struct dentry *upper;
453 
454 	if (!d_is_dir(index))
455 		return dget(index);
456 
457 	fh = ovl_get_fh(index, OVL_XATTR_UPPER);
458 	if (IS_ERR_OR_NULL(fh))
459 		return ERR_CAST(fh);
460 
461 	upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
462 	kfree(fh);
463 
464 	if (IS_ERR_OR_NULL(upper))
465 		return upper ?: ERR_PTR(-ESTALE);
466 
467 	if (!d_is_dir(upper)) {
468 		pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
469 				    index, upper);
470 		dput(upper);
471 		return ERR_PTR(-EIO);
472 	}
473 
474 	return upper;
475 }
476 
477 /* Is this a leftover from create/whiteout of directory index entry? */
478 static bool ovl_is_temp_index(struct dentry *index)
479 {
480 	return index->d_name.name[0] == '#';
481 }
482 
483 /*
484  * Verify that an index entry name matches the origin file handle stored in
485  * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
486  * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
487  */
488 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
489 {
490 	struct ovl_fh *fh = NULL;
491 	size_t len;
492 	struct ovl_path origin = { };
493 	struct ovl_path *stack = &origin;
494 	struct dentry *upper = NULL;
495 	int err;
496 
497 	if (!d_inode(index))
498 		return 0;
499 
500 	/* Cleanup leftover from index create/cleanup attempt */
501 	err = -ESTALE;
502 	if (ovl_is_temp_index(index))
503 		goto fail;
504 
505 	err = -EINVAL;
506 	if (index->d_name.len < sizeof(struct ovl_fh)*2)
507 		goto fail;
508 
509 	err = -ENOMEM;
510 	len = index->d_name.len / 2;
511 	fh = kzalloc(len, GFP_KERNEL);
512 	if (!fh)
513 		goto fail;
514 
515 	err = -EINVAL;
516 	if (hex2bin((u8 *)fh, index->d_name.name, len))
517 		goto fail;
518 
519 	err = ovl_check_fh_len(fh, len);
520 	if (err)
521 		goto fail;
522 
523 	/*
524 	 * Whiteout index entries are used as an indication that an exported
525 	 * overlay file handle should be treated as stale (i.e. after unlink
526 	 * of the overlay inode). These entries contain no origin xattr.
527 	 */
528 	if (ovl_is_whiteout(index))
529 		goto out;
530 
531 	/*
532 	 * Verifying directory index entries are not stale is expensive, so
533 	 * only verify stale dir index if NFS export is enabled.
534 	 */
535 	if (d_is_dir(index) && !ofs->config.nfs_export)
536 		goto out;
537 
538 	/*
539 	 * Directory index entries should have 'upper' xattr pointing to the
540 	 * real upper dir. Non-dir index entries are hardlinks to the upper
541 	 * real inode. For non-dir index, we can read the copy up origin xattr
542 	 * directly from the index dentry, but for dir index we first need to
543 	 * decode the upper directory.
544 	 */
545 	upper = ovl_index_upper(ofs, index);
546 	if (IS_ERR_OR_NULL(upper)) {
547 		err = PTR_ERR(upper);
548 		/*
549 		 * Directory index entries with no 'upper' xattr need to be
550 		 * removed. When dir index entry has a stale 'upper' xattr,
551 		 * we assume that upper dir was removed and we treat the dir
552 		 * index as orphan entry that needs to be whited out.
553 		 */
554 		if (err == -ESTALE)
555 			goto orphan;
556 		else if (!err)
557 			err = -ESTALE;
558 		goto fail;
559 	}
560 
561 	err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
562 	dput(upper);
563 	if (err)
564 		goto fail;
565 
566 	/* Check if non-dir index is orphan and don't warn before cleaning it */
567 	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
568 		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
569 		if (err)
570 			goto fail;
571 
572 		if (ovl_get_nlink(origin.dentry, index, 0) == 0)
573 			goto orphan;
574 	}
575 
576 out:
577 	dput(origin.dentry);
578 	kfree(fh);
579 	return err;
580 
581 fail:
582 	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
583 			    index, d_inode(index)->i_mode & S_IFMT, err);
584 	goto out;
585 
586 orphan:
587 	pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
588 			    index, d_inode(index)->i_mode & S_IFMT,
589 			    d_inode(index)->i_nlink);
590 	err = -ENOENT;
591 	goto out;
592 }
593 
594 static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
595 {
596 	char *n, *s;
597 
598 	n = kcalloc(fh->len, 2, GFP_KERNEL);
599 	if (!n)
600 		return -ENOMEM;
601 
602 	s  = bin2hex(n, fh, fh->len);
603 	*name = (struct qstr) QSTR_INIT(n, s - n);
604 
605 	return 0;
606 
607 }
608 
609 /*
610  * Lookup in indexdir for the index entry of a lower real inode or a copy up
611  * origin inode. The index entry name is the hex representation of the lower
612  * inode file handle.
613  *
614  * If the index dentry in negative, then either no lower aliases have been
615  * copied up yet, or aliases have been copied up in older kernels and are
616  * not indexed.
617  *
618  * If the index dentry for a copy up origin inode is positive, but points
619  * to an inode different than the upper inode, then either the upper inode
620  * has been copied up and not indexed or it was indexed, but since then
621  * index dir was cleared. Either way, that index cannot be used to indentify
622  * the overlay inode.
623  */
624 int ovl_get_index_name(struct dentry *origin, struct qstr *name)
625 {
626 	struct ovl_fh *fh;
627 	int err;
628 
629 	fh = ovl_encode_real_fh(origin, false);
630 	if (IS_ERR(fh))
631 		return PTR_ERR(fh);
632 
633 	err = ovl_get_index_name_fh(fh, name);
634 
635 	kfree(fh);
636 	return err;
637 }
638 
639 /* Lookup index by file handle for NFS export */
640 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
641 {
642 	struct dentry *index;
643 	struct qstr name;
644 	int err;
645 
646 	err = ovl_get_index_name_fh(fh, &name);
647 	if (err)
648 		return ERR_PTR(err);
649 
650 	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
651 	kfree(name.name);
652 	if (IS_ERR(index)) {
653 		if (PTR_ERR(index) == -ENOENT)
654 			index = NULL;
655 		return index;
656 	}
657 
658 	if (d_is_negative(index))
659 		err = 0;
660 	else if (ovl_is_whiteout(index))
661 		err = -ESTALE;
662 	else if (ovl_dentry_weird(index))
663 		err = -EIO;
664 	else
665 		return index;
666 
667 	dput(index);
668 	return ERR_PTR(err);
669 }
670 
671 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
672 				struct dentry *origin, bool verify)
673 {
674 	struct dentry *index;
675 	struct inode *inode;
676 	struct qstr name;
677 	bool is_dir = d_is_dir(origin);
678 	int err;
679 
680 	err = ovl_get_index_name(origin, &name);
681 	if (err)
682 		return ERR_PTR(err);
683 
684 	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
685 	if (IS_ERR(index)) {
686 		err = PTR_ERR(index);
687 		if (err == -ENOENT) {
688 			index = NULL;
689 			goto out;
690 		}
691 		pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
692 				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
693 				    d_inode(origin)->i_ino, name.len, name.name,
694 				    err);
695 		goto out;
696 	}
697 
698 	inode = d_inode(index);
699 	if (d_is_negative(index)) {
700 		goto out_dput;
701 	} else if (ovl_is_whiteout(index) && !verify) {
702 		/*
703 		 * When index lookup is called with !verify for decoding an
704 		 * overlay file handle, a whiteout index implies that decode
705 		 * should treat file handle as stale and no need to print a
706 		 * warning about it.
707 		 */
708 		dput(index);
709 		index = ERR_PTR(-ESTALE);
710 		goto out;
711 	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
712 		   ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
713 		/*
714 		 * Index should always be of the same file type as origin
715 		 * except for the case of a whiteout index. A whiteout
716 		 * index should only exist if all lower aliases have been
717 		 * unlinked, which means that finding a lower origin on lookup
718 		 * whose index is a whiteout should be treated as an error.
719 		 */
720 		pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
721 				    index, d_inode(index)->i_mode & S_IFMT,
722 				    d_inode(origin)->i_mode & S_IFMT);
723 		goto fail;
724 	} else if (is_dir && verify) {
725 		if (!upper) {
726 			pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
727 					    origin, index);
728 			goto fail;
729 		}
730 
731 		/* Verify that dir index 'upper' xattr points to upper dir */
732 		err = ovl_verify_upper(index, upper, false);
733 		if (err) {
734 			if (err == -ESTALE) {
735 				pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
736 						    upper, origin, index);
737 			}
738 			goto fail;
739 		}
740 	} else if (upper && d_inode(upper) != inode) {
741 		goto out_dput;
742 	}
743 out:
744 	kfree(name.name);
745 	return index;
746 
747 out_dput:
748 	dput(index);
749 	index = NULL;
750 	goto out;
751 
752 fail:
753 	dput(index);
754 	index = ERR_PTR(-EIO);
755 	goto out;
756 }
757 
758 /*
759  * Returns next layer in stack starting from top.
760  * Returns -1 if this is the last layer.
761  */
762 int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
763 {
764 	struct ovl_entry *oe = dentry->d_fsdata;
765 
766 	BUG_ON(idx < 0);
767 	if (idx == 0) {
768 		ovl_path_upper(dentry, path);
769 		if (path->dentry)
770 			return oe->numlower ? 1 : -1;
771 		idx++;
772 	}
773 	BUG_ON(idx > oe->numlower);
774 	path->dentry = oe->lowerstack[idx - 1].dentry;
775 	path->mnt = oe->lowerstack[idx - 1].layer->mnt;
776 
777 	return (idx < oe->numlower) ? idx + 1 : -1;
778 }
779 
780 /* Fix missing 'origin' xattr */
781 static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
782 			  struct dentry *upper)
783 {
784 	int err;
785 
786 	if (ovl_check_origin_xattr(upper))
787 		return 0;
788 
789 	err = ovl_want_write(dentry);
790 	if (err)
791 		return err;
792 
793 	err = ovl_set_origin(dentry, lower, upper);
794 	if (!err)
795 		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
796 
797 	ovl_drop_write(dentry);
798 	return err;
799 }
800 
801 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
802 			  unsigned int flags)
803 {
804 	struct ovl_entry *oe;
805 	const struct cred *old_cred;
806 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
807 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
808 	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
809 	struct ovl_path *stack = NULL, *origin_path = NULL;
810 	struct dentry *upperdir, *upperdentry = NULL;
811 	struct dentry *origin = NULL;
812 	struct dentry *index = NULL;
813 	unsigned int ctr = 0;
814 	struct inode *inode = NULL;
815 	bool upperopaque = false;
816 	char *upperredirect = NULL;
817 	struct dentry *this;
818 	unsigned int i;
819 	int err;
820 	bool metacopy = false;
821 	struct ovl_lookup_data d = {
822 		.name = dentry->d_name,
823 		.is_dir = false,
824 		.opaque = false,
825 		.stop = false,
826 		.last = ofs->config.redirect_follow ? false : !poe->numlower,
827 		.redirect = NULL,
828 		.metacopy = false,
829 	};
830 
831 	if (dentry->d_name.len > ofs->namelen)
832 		return ERR_PTR(-ENAMETOOLONG);
833 
834 	old_cred = ovl_override_creds(dentry->d_sb);
835 	upperdir = ovl_dentry_upper(dentry->d_parent);
836 	if (upperdir) {
837 		err = ovl_lookup_layer(upperdir, &d, &upperdentry);
838 		if (err)
839 			goto out;
840 
841 		if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) {
842 			dput(upperdentry);
843 			err = -EREMOTE;
844 			goto out;
845 		}
846 		if (upperdentry && !d.is_dir) {
847 			unsigned int origin_ctr = 0;
848 
849 			/*
850 			 * Lookup copy up origin by decoding origin file handle.
851 			 * We may get a disconnected dentry, which is fine,
852 			 * because we only need to hold the origin inode in
853 			 * cache and use its inode number.  We may even get a
854 			 * connected dentry, that is not under any of the lower
855 			 * layers root.  That is also fine for using it's inode
856 			 * number - it's the same as if we held a reference
857 			 * to a dentry in lower layer that was moved under us.
858 			 */
859 			err = ovl_check_origin(ofs, upperdentry, &origin_path,
860 					       &origin_ctr);
861 			if (err)
862 				goto out_put_upper;
863 
864 			if (d.metacopy)
865 				metacopy = true;
866 		}
867 
868 		if (d.redirect) {
869 			err = -ENOMEM;
870 			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
871 			if (!upperredirect)
872 				goto out_put_upper;
873 			if (d.redirect[0] == '/')
874 				poe = roe;
875 		}
876 		upperopaque = d.opaque;
877 	}
878 
879 	if (!d.stop && poe->numlower) {
880 		err = -ENOMEM;
881 		stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
882 				GFP_KERNEL);
883 		if (!stack)
884 			goto out_put_upper;
885 	}
886 
887 	for (i = 0; !d.stop && i < poe->numlower; i++) {
888 		struct ovl_path lower = poe->lowerstack[i];
889 
890 		if (!ofs->config.redirect_follow)
891 			d.last = i == poe->numlower - 1;
892 		else
893 			d.last = lower.layer->idx == roe->numlower;
894 
895 		err = ovl_lookup_layer(lower.dentry, &d, &this);
896 		if (err)
897 			goto out_put;
898 
899 		if (!this)
900 			continue;
901 
902 		/*
903 		 * If no origin fh is stored in upper of a merge dir, store fh
904 		 * of lower dir and set upper parent "impure".
905 		 */
906 		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
907 			err = ovl_fix_origin(dentry, this, upperdentry);
908 			if (err) {
909 				dput(this);
910 				goto out_put;
911 			}
912 		}
913 
914 		/*
915 		 * When "verify_lower" feature is enabled, do not merge with a
916 		 * lower dir that does not match a stored origin xattr. In any
917 		 * case, only verified origin is used for index lookup.
918 		 *
919 		 * For non-dir dentry, if index=on, then ensure origin
920 		 * matches the dentry found using path based lookup,
921 		 * otherwise error out.
922 		 */
923 		if (upperdentry && !ctr &&
924 		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
925 		     (!d.is_dir && ofs->config.index && origin_path))) {
926 			err = ovl_verify_origin(upperdentry, this, false);
927 			if (err) {
928 				dput(this);
929 				if (d.is_dir)
930 					break;
931 				goto out_put;
932 			}
933 			origin = this;
934 		}
935 
936 		if (d.metacopy)
937 			metacopy = true;
938 		/*
939 		 * Do not store intermediate metacopy dentries in chain,
940 		 * except top most lower metacopy dentry
941 		 */
942 		if (d.metacopy && ctr) {
943 			dput(this);
944 			continue;
945 		}
946 
947 		stack[ctr].dentry = this;
948 		stack[ctr].layer = lower.layer;
949 		ctr++;
950 
951 		/*
952 		 * Following redirects can have security consequences: it's like
953 		 * a symlink into the lower layer without the permission checks.
954 		 * This is only a problem if the upper layer is untrusted (e.g
955 		 * comes from an USB drive).  This can allow a non-readable file
956 		 * or directory to become readable.
957 		 *
958 		 * Only following redirects when redirects are enabled disables
959 		 * this attack vector when not necessary.
960 		 */
961 		err = -EPERM;
962 		if (d.redirect && !ofs->config.redirect_follow) {
963 			pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
964 					    dentry);
965 			goto out_put;
966 		}
967 
968 		if (d.stop)
969 			break;
970 
971 		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
972 			poe = roe;
973 			/* Find the current layer on the root dentry */
974 			i = lower.layer->idx - 1;
975 		}
976 	}
977 
978 	if (metacopy) {
979 		/*
980 		 * Found a metacopy dentry but did not find corresponding
981 		 * data dentry
982 		 */
983 		if (d.metacopy) {
984 			err = -EIO;
985 			goto out_put;
986 		}
987 
988 		err = -EPERM;
989 		if (!ofs->config.metacopy) {
990 			pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
991 					    dentry);
992 			goto out_put;
993 		}
994 	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
995 		if (WARN_ON(stack != NULL)) {
996 			err = -EIO;
997 			goto out_put;
998 		}
999 		stack = origin_path;
1000 		ctr = 1;
1001 		origin_path = NULL;
1002 	}
1003 
1004 	/*
1005 	 * Lookup index by lower inode and verify it matches upper inode.
1006 	 * We only trust dir index if we verified that lower dir matches
1007 	 * origin, otherwise dir index entries may be inconsistent and we
1008 	 * ignore them.
1009 	 *
1010 	 * For non-dir upper metacopy dentry, we already set "origin" if we
1011 	 * verified that lower matched upper origin. If upper origin was
1012 	 * not present (because lower layer did not support fh encode/decode),
1013 	 * or indexing is not enabled, do not set "origin" and skip looking up
1014 	 * index. This case should be handled in same way as a non-dir upper
1015 	 * without ORIGIN is handled.
1016 	 *
1017 	 * Always lookup index of non-dir non-metacopy and non-upper.
1018 	 */
1019 	if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
1020 		origin = stack[0].dentry;
1021 
1022 	if (origin && ovl_indexdir(dentry->d_sb) &&
1023 	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
1024 		index = ovl_lookup_index(ofs, upperdentry, origin, true);
1025 		if (IS_ERR(index)) {
1026 			err = PTR_ERR(index);
1027 			index = NULL;
1028 			goto out_put;
1029 		}
1030 	}
1031 
1032 	oe = ovl_alloc_entry(ctr);
1033 	err = -ENOMEM;
1034 	if (!oe)
1035 		goto out_put;
1036 
1037 	memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
1038 	dentry->d_fsdata = oe;
1039 
1040 	if (upperopaque)
1041 		ovl_dentry_set_opaque(dentry);
1042 
1043 	if (upperdentry)
1044 		ovl_dentry_set_upper_alias(dentry);
1045 	else if (index) {
1046 		upperdentry = dget(index);
1047 		upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
1048 		if (IS_ERR(upperredirect)) {
1049 			err = PTR_ERR(upperredirect);
1050 			upperredirect = NULL;
1051 			goto out_free_oe;
1052 		}
1053 	}
1054 
1055 	if (upperdentry || ctr) {
1056 		struct ovl_inode_params oip = {
1057 			.upperdentry = upperdentry,
1058 			.lowerpath = stack,
1059 			.index = index,
1060 			.numlower = ctr,
1061 			.redirect = upperredirect,
1062 			.lowerdata = (ctr > 1 && !d.is_dir) ?
1063 				      stack[ctr - 1].dentry : NULL,
1064 		};
1065 
1066 		inode = ovl_get_inode(dentry->d_sb, &oip);
1067 		err = PTR_ERR(inode);
1068 		if (IS_ERR(inode))
1069 			goto out_free_oe;
1070 	}
1071 
1072 	revert_creds(old_cred);
1073 	if (origin_path) {
1074 		dput(origin_path->dentry);
1075 		kfree(origin_path);
1076 	}
1077 	dput(index);
1078 	kfree(stack);
1079 	kfree(d.redirect);
1080 	return d_splice_alias(inode, dentry);
1081 
1082 out_free_oe:
1083 	dentry->d_fsdata = NULL;
1084 	kfree(oe);
1085 out_put:
1086 	dput(index);
1087 	for (i = 0; i < ctr; i++)
1088 		dput(stack[i].dentry);
1089 	kfree(stack);
1090 out_put_upper:
1091 	if (origin_path) {
1092 		dput(origin_path->dentry);
1093 		kfree(origin_path);
1094 	}
1095 	dput(upperdentry);
1096 	kfree(upperredirect);
1097 out:
1098 	kfree(d.redirect);
1099 	revert_creds(old_cred);
1100 	return ERR_PTR(err);
1101 }
1102 
1103 bool ovl_lower_positive(struct dentry *dentry)
1104 {
1105 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
1106 	const struct qstr *name = &dentry->d_name;
1107 	const struct cred *old_cred;
1108 	unsigned int i;
1109 	bool positive = false;
1110 	bool done = false;
1111 
1112 	/*
1113 	 * If dentry is negative, then lower is positive iff this is a
1114 	 * whiteout.
1115 	 */
1116 	if (!dentry->d_inode)
1117 		return ovl_dentry_is_opaque(dentry);
1118 
1119 	/* Negative upper -> positive lower */
1120 	if (!ovl_dentry_upper(dentry))
1121 		return true;
1122 
1123 	old_cred = ovl_override_creds(dentry->d_sb);
1124 	/* Positive upper -> have to look up lower to see whether it exists */
1125 	for (i = 0; !done && !positive && i < poe->numlower; i++) {
1126 		struct dentry *this;
1127 		struct dentry *lowerdir = poe->lowerstack[i].dentry;
1128 
1129 		this = lookup_one_len_unlocked(name->name, lowerdir,
1130 					       name->len);
1131 		if (IS_ERR(this)) {
1132 			switch (PTR_ERR(this)) {
1133 			case -ENOENT:
1134 			case -ENAMETOOLONG:
1135 				break;
1136 
1137 			default:
1138 				/*
1139 				 * Assume something is there, we just couldn't
1140 				 * access it.
1141 				 */
1142 				positive = true;
1143 				break;
1144 			}
1145 		} else {
1146 			if (this->d_inode) {
1147 				positive = !ovl_is_whiteout(this);
1148 				done = true;
1149 			}
1150 			dput(this);
1151 		}
1152 	}
1153 	revert_creds(old_cred);
1154 
1155 	return positive;
1156 }
1157