xref: /openbmc/linux/fs/overlayfs/namei.c (revision b9b77222)
1 /*
2  * Copyright (C) 2011 Novell Inc.
3  * Copyright (C) 2016 Red Hat, Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/cred.h>
12 #include <linux/ctype.h>
13 #include <linux/namei.h>
14 #include <linux/xattr.h>
15 #include <linux/ratelimit.h>
16 #include <linux/mount.h>
17 #include <linux/exportfs.h>
18 #include "overlayfs.h"
19 
20 struct ovl_lookup_data {
21 	struct qstr name;
22 	bool is_dir;
23 	bool opaque;
24 	bool stop;
25 	bool last;
26 	char *redirect;
27 };
28 
29 static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
30 			      size_t prelen, const char *post)
31 {
32 	int res;
33 	char *s, *next, *buf = NULL;
34 
35 	res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
36 	if (res < 0) {
37 		if (res == -ENODATA || res == -EOPNOTSUPP)
38 			return 0;
39 		goto fail;
40 	}
41 	buf = kzalloc(prelen + res + strlen(post) + 1, GFP_KERNEL);
42 	if (!buf)
43 		return -ENOMEM;
44 
45 	if (res == 0)
46 		goto invalid;
47 
48 	res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
49 	if (res < 0)
50 		goto fail;
51 	if (res == 0)
52 		goto invalid;
53 	if (buf[0] == '/') {
54 		for (s = buf; *s++ == '/'; s = next) {
55 			next = strchrnul(s, '/');
56 			if (s == next)
57 				goto invalid;
58 		}
59 		/*
60 		 * One of the ancestor path elements in an absolute path
61 		 * lookup in ovl_lookup_layer() could have been opaque and
62 		 * that will stop further lookup in lower layers (d->stop=true)
63 		 * But we have found an absolute redirect in decendant path
64 		 * element and that should force continue lookup in lower
65 		 * layers (reset d->stop).
66 		 */
67 		d->stop = false;
68 	} else {
69 		if (strchr(buf, '/') != NULL)
70 			goto invalid;
71 
72 		memmove(buf + prelen, buf, res);
73 		memcpy(buf, d->name.name, prelen);
74 	}
75 
76 	strcat(buf, post);
77 	kfree(d->redirect);
78 	d->redirect = buf;
79 	d->name.name = d->redirect;
80 	d->name.len = strlen(d->redirect);
81 
82 	return 0;
83 
84 err_free:
85 	kfree(buf);
86 	return 0;
87 fail:
88 	pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
89 	goto err_free;
90 invalid:
91 	pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
92 	goto err_free;
93 }
94 
95 static int ovl_acceptable(void *ctx, struct dentry *dentry)
96 {
97 	/*
98 	 * A non-dir origin may be disconnected, which is fine, because
99 	 * we only need it for its unique inode number.
100 	 */
101 	if (!d_is_dir(dentry))
102 		return 1;
103 
104 	/* Don't decode a deleted empty directory */
105 	if (d_unhashed(dentry))
106 		return 0;
107 
108 	/* Check if directory belongs to the layer we are decoding from */
109 	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
110 }
111 
112 /*
113  * Check validity of an overlay file handle buffer.
114  *
115  * Return 0 for a valid file handle.
116  * Return -ENODATA for "origin unknown".
117  * Return <0 for an invalid file handle.
118  */
119 int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
120 {
121 	if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
122 		return -EINVAL;
123 
124 	if (fh->magic != OVL_FH_MAGIC)
125 		return -EINVAL;
126 
127 	/* Treat larger version and unknown flags as "origin unknown" */
128 	if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
129 		return -ENODATA;
130 
131 	/* Treat endianness mismatch as "origin unknown" */
132 	if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
133 	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
134 		return -ENODATA;
135 
136 	return 0;
137 }
138 
139 static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
140 {
141 	int res, err;
142 	struct ovl_fh *fh = NULL;
143 
144 	res = vfs_getxattr(dentry, name, NULL, 0);
145 	if (res < 0) {
146 		if (res == -ENODATA || res == -EOPNOTSUPP)
147 			return NULL;
148 		goto fail;
149 	}
150 	/* Zero size value means "copied up but origin unknown" */
151 	if (res == 0)
152 		return NULL;
153 
154 	fh = kzalloc(res, GFP_KERNEL);
155 	if (!fh)
156 		return ERR_PTR(-ENOMEM);
157 
158 	res = vfs_getxattr(dentry, name, fh, res);
159 	if (res < 0)
160 		goto fail;
161 
162 	err = ovl_check_fh_len(fh, res);
163 	if (err < 0) {
164 		if (err == -ENODATA)
165 			goto out;
166 		goto invalid;
167 	}
168 
169 	return fh;
170 
171 out:
172 	kfree(fh);
173 	return NULL;
174 
175 fail:
176 	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
177 	goto out;
178 invalid:
179 	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
180 	goto out;
181 }
182 
183 struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
184 				  bool connected)
185 {
186 	struct dentry *real;
187 	int bytes;
188 
189 	/*
190 	 * Make sure that the stored uuid matches the uuid of the lower
191 	 * layer where file handle will be decoded.
192 	 */
193 	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
194 		return NULL;
195 
196 	bytes = (fh->len - offsetof(struct ovl_fh, fid));
197 	real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
198 				  bytes >> 2, (int)fh->type,
199 				  connected ? ovl_acceptable : NULL, mnt);
200 	if (IS_ERR(real)) {
201 		/*
202 		 * Treat stale file handle to lower file as "origin unknown".
203 		 * upper file handle could become stale when upper file is
204 		 * unlinked and this information is needed to handle stale
205 		 * index entries correctly.
206 		 */
207 		if (real == ERR_PTR(-ESTALE) &&
208 		    !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
209 			real = NULL;
210 		return real;
211 	}
212 
213 	if (ovl_dentry_weird(real)) {
214 		dput(real);
215 		return NULL;
216 	}
217 
218 	return real;
219 }
220 
221 static bool ovl_is_opaquedir(struct dentry *dentry)
222 {
223 	return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
224 }
225 
226 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
227 			     const char *name, unsigned int namelen,
228 			     size_t prelen, const char *post,
229 			     struct dentry **ret)
230 {
231 	struct dentry *this;
232 	int err;
233 	bool last_element = !post[0];
234 
235 	this = lookup_one_len_unlocked(name, base, namelen);
236 	if (IS_ERR(this)) {
237 		err = PTR_ERR(this);
238 		this = NULL;
239 		if (err == -ENOENT || err == -ENAMETOOLONG)
240 			goto out;
241 		goto out_err;
242 	}
243 	if (!this->d_inode)
244 		goto put_and_out;
245 
246 	if (ovl_dentry_weird(this)) {
247 		/* Don't support traversing automounts and other weirdness */
248 		err = -EREMOTE;
249 		goto out_err;
250 	}
251 	if (ovl_is_whiteout(this)) {
252 		d->stop = d->opaque = true;
253 		goto put_and_out;
254 	}
255 	if (!d_can_lookup(this)) {
256 		d->stop = true;
257 		if (d->is_dir)
258 			goto put_and_out;
259 
260 		/*
261 		 * NB: handle failure to lookup non-last element when non-dir
262 		 * redirects become possible
263 		 */
264 		WARN_ON(!last_element);
265 		goto out;
266 	}
267 	if (last_element)
268 		d->is_dir = true;
269 	if (d->last)
270 		goto out;
271 
272 	if (ovl_is_opaquedir(this)) {
273 		d->stop = true;
274 		if (last_element)
275 			d->opaque = true;
276 		goto out;
277 	}
278 	err = ovl_check_redirect(this, d, prelen, post);
279 	if (err)
280 		goto out_err;
281 out:
282 	*ret = this;
283 	return 0;
284 
285 put_and_out:
286 	dput(this);
287 	this = NULL;
288 	goto out;
289 
290 out_err:
291 	dput(this);
292 	return err;
293 }
294 
295 static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
296 			    struct dentry **ret)
297 {
298 	/* Counting down from the end, since the prefix can change */
299 	size_t rem = d->name.len - 1;
300 	struct dentry *dentry = NULL;
301 	int err;
302 
303 	if (d->name.name[0] != '/')
304 		return ovl_lookup_single(base, d, d->name.name, d->name.len,
305 					 0, "", ret);
306 
307 	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
308 		const char *s = d->name.name + d->name.len - rem;
309 		const char *next = strchrnul(s, '/');
310 		size_t thislen = next - s;
311 		bool end = !next[0];
312 
313 		/* Verify we did not go off the rails */
314 		if (WARN_ON(s[-1] != '/'))
315 			return -EIO;
316 
317 		err = ovl_lookup_single(base, d, s, thislen,
318 					d->name.len - rem, next, &base);
319 		dput(dentry);
320 		if (err)
321 			return err;
322 		dentry = base;
323 		if (end)
324 			break;
325 
326 		rem -= thislen + 1;
327 
328 		if (WARN_ON(rem >= d->name.len))
329 			return -EIO;
330 	}
331 	*ret = dentry;
332 	return 0;
333 }
334 
335 
336 int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
337 			struct dentry *upperdentry, struct ovl_path **stackp)
338 {
339 	struct dentry *origin = NULL;
340 	int i;
341 
342 	for (i = 0; i < ofs->numlower; i++) {
343 		origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
344 					    connected);
345 		if (origin)
346 			break;
347 	}
348 
349 	if (!origin)
350 		return -ESTALE;
351 	else if (IS_ERR(origin))
352 		return PTR_ERR(origin);
353 
354 	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
355 	    ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
356 		goto invalid;
357 
358 	if (!*stackp)
359 		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
360 	if (!*stackp) {
361 		dput(origin);
362 		return -ENOMEM;
363 	}
364 	**stackp = (struct ovl_path){
365 		.dentry = origin,
366 		.layer = &ofs->lower_layers[i]
367 	};
368 
369 	return 0;
370 
371 invalid:
372 	pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
373 			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
374 			    d_inode(origin)->i_mode & S_IFMT);
375 	dput(origin);
376 	return -EIO;
377 }
378 
379 static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
380 			    struct ovl_path **stackp, unsigned int *ctrp)
381 {
382 	struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
383 	int err;
384 
385 	if (IS_ERR_OR_NULL(fh))
386 		return PTR_ERR(fh);
387 
388 	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
389 	kfree(fh);
390 
391 	if (err) {
392 		if (err == -ESTALE)
393 			return 0;
394 		return err;
395 	}
396 
397 	if (WARN_ON(*ctrp))
398 		return -EIO;
399 
400 	*ctrp = 1;
401 	return 0;
402 }
403 
404 /*
405  * Verify that @fh matches the file handle stored in xattr @name.
406  * Return 0 on match, -ESTALE on mismatch, < 0 on error.
407  */
408 static int ovl_verify_fh(struct dentry *dentry, const char *name,
409 			 const struct ovl_fh *fh)
410 {
411 	struct ovl_fh *ofh = ovl_get_fh(dentry, name);
412 	int err = 0;
413 
414 	if (!ofh)
415 		return -ENODATA;
416 
417 	if (IS_ERR(ofh))
418 		return PTR_ERR(ofh);
419 
420 	if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
421 		err = -ESTALE;
422 
423 	kfree(ofh);
424 	return err;
425 }
426 
427 /*
428  * Verify that @real dentry matches the file handle stored in xattr @name.
429  *
430  * If @set is true and there is no stored file handle, encode @real and store
431  * file handle in xattr @name.
432  *
433  * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
434  */
435 int ovl_verify_set_fh(struct dentry *dentry, const char *name,
436 		      struct dentry *real, bool is_upper, bool set)
437 {
438 	struct inode *inode;
439 	struct ovl_fh *fh;
440 	int err;
441 
442 	fh = ovl_encode_real_fh(real, is_upper);
443 	err = PTR_ERR(fh);
444 	if (IS_ERR(fh))
445 		goto fail;
446 
447 	err = ovl_verify_fh(dentry, name, fh);
448 	if (set && err == -ENODATA)
449 		err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
450 	if (err)
451 		goto fail;
452 
453 out:
454 	kfree(fh);
455 	return err;
456 
457 fail:
458 	inode = d_inode(real);
459 	pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
460 			    is_upper ? "upper" : "origin", real,
461 			    inode ? inode->i_ino : 0, err);
462 	goto out;
463 }
464 
465 /* Get upper dentry from index */
466 struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
467 {
468 	struct ovl_fh *fh;
469 	struct dentry *upper;
470 
471 	if (!d_is_dir(index))
472 		return dget(index);
473 
474 	fh = ovl_get_fh(index, OVL_XATTR_UPPER);
475 	if (IS_ERR_OR_NULL(fh))
476 		return ERR_CAST(fh);
477 
478 	upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
479 	kfree(fh);
480 
481 	if (IS_ERR_OR_NULL(upper))
482 		return upper ?: ERR_PTR(-ESTALE);
483 
484 	if (!d_is_dir(upper)) {
485 		pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
486 				    index, upper);
487 		dput(upper);
488 		return ERR_PTR(-EIO);
489 	}
490 
491 	return upper;
492 }
493 
494 /* Is this a leftover from create/whiteout of directory index entry? */
495 static bool ovl_is_temp_index(struct dentry *index)
496 {
497 	return index->d_name.name[0] == '#';
498 }
499 
500 /*
501  * Verify that an index entry name matches the origin file handle stored in
502  * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
503  * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
504  */
505 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
506 {
507 	struct ovl_fh *fh = NULL;
508 	size_t len;
509 	struct ovl_path origin = { };
510 	struct ovl_path *stack = &origin;
511 	struct dentry *upper = NULL;
512 	int err;
513 
514 	if (!d_inode(index))
515 		return 0;
516 
517 	/* Cleanup leftover from index create/cleanup attempt */
518 	err = -ESTALE;
519 	if (ovl_is_temp_index(index))
520 		goto fail;
521 
522 	err = -EINVAL;
523 	if (index->d_name.len < sizeof(struct ovl_fh)*2)
524 		goto fail;
525 
526 	err = -ENOMEM;
527 	len = index->d_name.len / 2;
528 	fh = kzalloc(len, GFP_KERNEL);
529 	if (!fh)
530 		goto fail;
531 
532 	err = -EINVAL;
533 	if (hex2bin((u8 *)fh, index->d_name.name, len))
534 		goto fail;
535 
536 	err = ovl_check_fh_len(fh, len);
537 	if (err)
538 		goto fail;
539 
540 	/*
541 	 * Whiteout index entries are used as an indication that an exported
542 	 * overlay file handle should be treated as stale (i.e. after unlink
543 	 * of the overlay inode). These entries contain no origin xattr.
544 	 */
545 	if (ovl_is_whiteout(index))
546 		goto out;
547 
548 	/*
549 	 * Verifying directory index entries are not stale is expensive, so
550 	 * only verify stale dir index if NFS export is enabled.
551 	 */
552 	if (d_is_dir(index) && !ofs->config.nfs_export)
553 		goto out;
554 
555 	/*
556 	 * Directory index entries should have 'upper' xattr pointing to the
557 	 * real upper dir. Non-dir index entries are hardlinks to the upper
558 	 * real inode. For non-dir index, we can read the copy up origin xattr
559 	 * directly from the index dentry, but for dir index we first need to
560 	 * decode the upper directory.
561 	 */
562 	upper = ovl_index_upper(ofs, index);
563 	if (IS_ERR_OR_NULL(upper)) {
564 		err = PTR_ERR(upper);
565 		/*
566 		 * Directory index entries with no 'upper' xattr need to be
567 		 * removed. When dir index entry has a stale 'upper' xattr,
568 		 * we assume that upper dir was removed and we treat the dir
569 		 * index as orphan entry that needs to be whited out.
570 		 */
571 		if (err == -ESTALE)
572 			goto orphan;
573 		else if (!err)
574 			err = -ESTALE;
575 		goto fail;
576 	}
577 
578 	err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
579 	dput(upper);
580 	if (err)
581 		goto fail;
582 
583 	/* Check if non-dir index is orphan and don't warn before cleaning it */
584 	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
585 		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
586 		if (err)
587 			goto fail;
588 
589 		if (ovl_get_nlink(origin.dentry, index, 0) == 0)
590 			goto orphan;
591 	}
592 
593 out:
594 	dput(origin.dentry);
595 	kfree(fh);
596 	return err;
597 
598 fail:
599 	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
600 			    index, d_inode(index)->i_mode & S_IFMT, err);
601 	goto out;
602 
603 orphan:
604 	pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
605 			    index, d_inode(index)->i_mode & S_IFMT,
606 			    d_inode(index)->i_nlink);
607 	err = -ENOENT;
608 	goto out;
609 }
610 
611 static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
612 {
613 	char *n, *s;
614 
615 	n = kcalloc(fh->len, 2, GFP_KERNEL);
616 	if (!n)
617 		return -ENOMEM;
618 
619 	s  = bin2hex(n, fh, fh->len);
620 	*name = (struct qstr) QSTR_INIT(n, s - n);
621 
622 	return 0;
623 
624 }
625 
626 /*
627  * Lookup in indexdir for the index entry of a lower real inode or a copy up
628  * origin inode. The index entry name is the hex representation of the lower
629  * inode file handle.
630  *
631  * If the index dentry in negative, then either no lower aliases have been
632  * copied up yet, or aliases have been copied up in older kernels and are
633  * not indexed.
634  *
635  * If the index dentry for a copy up origin inode is positive, but points
636  * to an inode different than the upper inode, then either the upper inode
637  * has been copied up and not indexed or it was indexed, but since then
638  * index dir was cleared. Either way, that index cannot be used to indentify
639  * the overlay inode.
640  */
641 int ovl_get_index_name(struct dentry *origin, struct qstr *name)
642 {
643 	struct ovl_fh *fh;
644 	int err;
645 
646 	fh = ovl_encode_real_fh(origin, false);
647 	if (IS_ERR(fh))
648 		return PTR_ERR(fh);
649 
650 	err = ovl_get_index_name_fh(fh, name);
651 
652 	kfree(fh);
653 	return err;
654 }
655 
656 /* Lookup index by file handle for NFS export */
657 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
658 {
659 	struct dentry *index;
660 	struct qstr name;
661 	int err;
662 
663 	err = ovl_get_index_name_fh(fh, &name);
664 	if (err)
665 		return ERR_PTR(err);
666 
667 	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
668 	kfree(name.name);
669 	if (IS_ERR(index)) {
670 		if (PTR_ERR(index) == -ENOENT)
671 			index = NULL;
672 		return index;
673 	}
674 
675 	if (d_is_negative(index))
676 		err = 0;
677 	else if (ovl_is_whiteout(index))
678 		err = -ESTALE;
679 	else if (ovl_dentry_weird(index))
680 		err = -EIO;
681 	else
682 		return index;
683 
684 	dput(index);
685 	return ERR_PTR(err);
686 }
687 
688 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
689 				struct dentry *origin, bool verify)
690 {
691 	struct dentry *index;
692 	struct inode *inode;
693 	struct qstr name;
694 	bool is_dir = d_is_dir(origin);
695 	int err;
696 
697 	err = ovl_get_index_name(origin, &name);
698 	if (err)
699 		return ERR_PTR(err);
700 
701 	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
702 	if (IS_ERR(index)) {
703 		err = PTR_ERR(index);
704 		if (err == -ENOENT) {
705 			index = NULL;
706 			goto out;
707 		}
708 		pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
709 				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
710 				    d_inode(origin)->i_ino, name.len, name.name,
711 				    err);
712 		goto out;
713 	}
714 
715 	inode = d_inode(index);
716 	if (d_is_negative(index)) {
717 		goto out_dput;
718 	} else if (ovl_is_whiteout(index) && !verify) {
719 		/*
720 		 * When index lookup is called with !verify for decoding an
721 		 * overlay file handle, a whiteout index implies that decode
722 		 * should treat file handle as stale and no need to print a
723 		 * warning about it.
724 		 */
725 		dput(index);
726 		index = ERR_PTR(-ESTALE);
727 		goto out;
728 	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
729 		   ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
730 		/*
731 		 * Index should always be of the same file type as origin
732 		 * except for the case of a whiteout index. A whiteout
733 		 * index should only exist if all lower aliases have been
734 		 * unlinked, which means that finding a lower origin on lookup
735 		 * whose index is a whiteout should be treated as an error.
736 		 */
737 		pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
738 				    index, d_inode(index)->i_mode & S_IFMT,
739 				    d_inode(origin)->i_mode & S_IFMT);
740 		goto fail;
741 	} else if (is_dir && verify) {
742 		if (!upper) {
743 			pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
744 					    origin, index);
745 			goto fail;
746 		}
747 
748 		/* Verify that dir index 'upper' xattr points to upper dir */
749 		err = ovl_verify_upper(index, upper, false);
750 		if (err) {
751 			if (err == -ESTALE) {
752 				pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
753 						    upper, origin, index);
754 			}
755 			goto fail;
756 		}
757 	} else if (upper && d_inode(upper) != inode) {
758 		goto out_dput;
759 	}
760 out:
761 	kfree(name.name);
762 	return index;
763 
764 out_dput:
765 	dput(index);
766 	index = NULL;
767 	goto out;
768 
769 fail:
770 	dput(index);
771 	index = ERR_PTR(-EIO);
772 	goto out;
773 }
774 
775 /*
776  * Returns next layer in stack starting from top.
777  * Returns -1 if this is the last layer.
778  */
779 int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
780 {
781 	struct ovl_entry *oe = dentry->d_fsdata;
782 
783 	BUG_ON(idx < 0);
784 	if (idx == 0) {
785 		ovl_path_upper(dentry, path);
786 		if (path->dentry)
787 			return oe->numlower ? 1 : -1;
788 		idx++;
789 	}
790 	BUG_ON(idx > oe->numlower);
791 	path->dentry = oe->lowerstack[idx - 1].dentry;
792 	path->mnt = oe->lowerstack[idx - 1].layer->mnt;
793 
794 	return (idx < oe->numlower) ? idx + 1 : -1;
795 }
796 
797 /* Fix missing 'origin' xattr */
798 static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
799 			  struct dentry *upper)
800 {
801 	int err;
802 
803 	if (ovl_check_origin_xattr(upper))
804 		return 0;
805 
806 	err = ovl_want_write(dentry);
807 	if (err)
808 		return err;
809 
810 	err = ovl_set_origin(dentry, lower, upper);
811 	if (!err)
812 		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
813 
814 	ovl_drop_write(dentry);
815 	return err;
816 }
817 
818 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
819 			  unsigned int flags)
820 {
821 	struct ovl_entry *oe;
822 	const struct cred *old_cred;
823 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
824 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
825 	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
826 	struct ovl_path *stack = NULL;
827 	struct dentry *upperdir, *upperdentry = NULL;
828 	struct dentry *origin = NULL;
829 	struct dentry *index = NULL;
830 	unsigned int ctr = 0;
831 	struct inode *inode = NULL;
832 	bool upperopaque = false;
833 	char *upperredirect = NULL;
834 	struct dentry *this;
835 	unsigned int i;
836 	int err;
837 	struct ovl_lookup_data d = {
838 		.name = dentry->d_name,
839 		.is_dir = false,
840 		.opaque = false,
841 		.stop = false,
842 		.last = ofs->config.redirect_follow ? false : !poe->numlower,
843 		.redirect = NULL,
844 	};
845 
846 	if (dentry->d_name.len > ofs->namelen)
847 		return ERR_PTR(-ENAMETOOLONG);
848 
849 	old_cred = ovl_override_creds(dentry->d_sb);
850 	upperdir = ovl_dentry_upper(dentry->d_parent);
851 	if (upperdir) {
852 		err = ovl_lookup_layer(upperdir, &d, &upperdentry);
853 		if (err)
854 			goto out;
855 
856 		if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) {
857 			dput(upperdentry);
858 			err = -EREMOTE;
859 			goto out;
860 		}
861 		if (upperdentry && !d.is_dir) {
862 			BUG_ON(!d.stop || d.redirect);
863 			/*
864 			 * Lookup copy up origin by decoding origin file handle.
865 			 * We may get a disconnected dentry, which is fine,
866 			 * because we only need to hold the origin inode in
867 			 * cache and use its inode number.  We may even get a
868 			 * connected dentry, that is not under any of the lower
869 			 * layers root.  That is also fine for using it's inode
870 			 * number - it's the same as if we held a reference
871 			 * to a dentry in lower layer that was moved under us.
872 			 */
873 			err = ovl_check_origin(ofs, upperdentry, &stack, &ctr);
874 			if (err)
875 				goto out_put_upper;
876 		}
877 
878 		if (d.redirect) {
879 			err = -ENOMEM;
880 			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
881 			if (!upperredirect)
882 				goto out_put_upper;
883 			if (d.redirect[0] == '/')
884 				poe = roe;
885 		}
886 		upperopaque = d.opaque;
887 	}
888 
889 	if (!d.stop && poe->numlower) {
890 		err = -ENOMEM;
891 		stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
892 				GFP_KERNEL);
893 		if (!stack)
894 			goto out_put_upper;
895 	}
896 
897 	for (i = 0; !d.stop && i < poe->numlower; i++) {
898 		struct ovl_path lower = poe->lowerstack[i];
899 
900 		if (!ofs->config.redirect_follow)
901 			d.last = i == poe->numlower - 1;
902 		else
903 			d.last = lower.layer->idx == roe->numlower;
904 
905 		err = ovl_lookup_layer(lower.dentry, &d, &this);
906 		if (err)
907 			goto out_put;
908 
909 		if (!this)
910 			continue;
911 
912 		/*
913 		 * If no origin fh is stored in upper of a merge dir, store fh
914 		 * of lower dir and set upper parent "impure".
915 		 */
916 		if (upperdentry && !ctr && !ofs->noxattr) {
917 			err = ovl_fix_origin(dentry, this, upperdentry);
918 			if (err) {
919 				dput(this);
920 				goto out_put;
921 			}
922 		}
923 
924 		/*
925 		 * When "verify_lower" feature is enabled, do not merge with a
926 		 * lower dir that does not match a stored origin xattr. In any
927 		 * case, only verified origin is used for index lookup.
928 		 */
929 		if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) {
930 			err = ovl_verify_origin(upperdentry, this, false);
931 			if (err) {
932 				dput(this);
933 				break;
934 			}
935 
936 			/* Bless lower dir as verified origin */
937 			origin = this;
938 		}
939 
940 		stack[ctr].dentry = this;
941 		stack[ctr].layer = lower.layer;
942 		ctr++;
943 
944 		/*
945 		 * Following redirects can have security consequences: it's like
946 		 * a symlink into the lower layer without the permission checks.
947 		 * This is only a problem if the upper layer is untrusted (e.g
948 		 * comes from an USB drive).  This can allow a non-readable file
949 		 * or directory to become readable.
950 		 *
951 		 * Only following redirects when redirects are enabled disables
952 		 * this attack vector when not necessary.
953 		 */
954 		err = -EPERM;
955 		if (d.redirect && !ofs->config.redirect_follow) {
956 			pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
957 					    dentry);
958 			goto out_put;
959 		}
960 
961 		if (d.stop)
962 			break;
963 
964 		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
965 			poe = roe;
966 			/* Find the current layer on the root dentry */
967 			i = lower.layer->idx - 1;
968 		}
969 	}
970 
971 	/*
972 	 * Lookup index by lower inode and verify it matches upper inode.
973 	 * We only trust dir index if we verified that lower dir matches
974 	 * origin, otherwise dir index entries may be inconsistent and we
975 	 * ignore them. Always lookup index of non-dir and non-upper.
976 	 */
977 	if (ctr && (!upperdentry || !d.is_dir))
978 		origin = stack[0].dentry;
979 
980 	if (origin && ovl_indexdir(dentry->d_sb) &&
981 	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
982 		index = ovl_lookup_index(ofs, upperdentry, origin, true);
983 		if (IS_ERR(index)) {
984 			err = PTR_ERR(index);
985 			index = NULL;
986 			goto out_put;
987 		}
988 	}
989 
990 	oe = ovl_alloc_entry(ctr);
991 	err = -ENOMEM;
992 	if (!oe)
993 		goto out_put;
994 
995 	memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
996 	dentry->d_fsdata = oe;
997 
998 	if (upperopaque)
999 		ovl_dentry_set_opaque(dentry);
1000 
1001 	if (upperdentry)
1002 		ovl_dentry_set_upper_alias(dentry);
1003 	else if (index)
1004 		upperdentry = dget(index);
1005 
1006 	if (upperdentry || ctr) {
1007 		struct ovl_inode_params oip = {
1008 			.upperdentry = upperdentry,
1009 			.lowerpath = stack,
1010 			.index = index,
1011 			.numlower = ctr,
1012 		};
1013 
1014 		inode = ovl_get_inode(dentry->d_sb, &oip);
1015 		err = PTR_ERR(inode);
1016 		if (IS_ERR(inode))
1017 			goto out_free_oe;
1018 
1019 		/*
1020 		 * NB: handle redirected hard links when non-dir redirects
1021 		 * become possible
1022 		 */
1023 		WARN_ON(OVL_I(inode)->redirect);
1024 		OVL_I(inode)->redirect = upperredirect;
1025 	}
1026 
1027 	revert_creds(old_cred);
1028 	dput(index);
1029 	kfree(stack);
1030 	kfree(d.redirect);
1031 	return d_splice_alias(inode, dentry);
1032 
1033 out_free_oe:
1034 	dentry->d_fsdata = NULL;
1035 	kfree(oe);
1036 out_put:
1037 	dput(index);
1038 	for (i = 0; i < ctr; i++)
1039 		dput(stack[i].dentry);
1040 	kfree(stack);
1041 out_put_upper:
1042 	dput(upperdentry);
1043 	kfree(upperredirect);
1044 out:
1045 	kfree(d.redirect);
1046 	revert_creds(old_cred);
1047 	return ERR_PTR(err);
1048 }
1049 
1050 bool ovl_lower_positive(struct dentry *dentry)
1051 {
1052 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
1053 	const struct qstr *name = &dentry->d_name;
1054 	const struct cred *old_cred;
1055 	unsigned int i;
1056 	bool positive = false;
1057 	bool done = false;
1058 
1059 	/*
1060 	 * If dentry is negative, then lower is positive iff this is a
1061 	 * whiteout.
1062 	 */
1063 	if (!dentry->d_inode)
1064 		return ovl_dentry_is_opaque(dentry);
1065 
1066 	/* Negative upper -> positive lower */
1067 	if (!ovl_dentry_upper(dentry))
1068 		return true;
1069 
1070 	old_cred = ovl_override_creds(dentry->d_sb);
1071 	/* Positive upper -> have to look up lower to see whether it exists */
1072 	for (i = 0; !done && !positive && i < poe->numlower; i++) {
1073 		struct dentry *this;
1074 		struct dentry *lowerdir = poe->lowerstack[i].dentry;
1075 
1076 		this = lookup_one_len_unlocked(name->name, lowerdir,
1077 					       name->len);
1078 		if (IS_ERR(this)) {
1079 			switch (PTR_ERR(this)) {
1080 			case -ENOENT:
1081 			case -ENAMETOOLONG:
1082 				break;
1083 
1084 			default:
1085 				/*
1086 				 * Assume something is there, we just couldn't
1087 				 * access it.
1088 				 */
1089 				positive = true;
1090 				break;
1091 			}
1092 		} else {
1093 			if (this->d_inode) {
1094 				positive = !ovl_is_whiteout(this);
1095 				done = true;
1096 			}
1097 			dput(this);
1098 		}
1099 	}
1100 	revert_creds(old_cred);
1101 
1102 	return positive;
1103 }
1104