xref: /openbmc/linux/fs/overlayfs/namei.c (revision 47aab53331effedd3f5a6136854bd1da011f94b6)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2011 Novell Inc.
4  * Copyright (C) 2016 Red Hat, Inc.
5  */
6 
7 #include <linux/fs.h>
8 #include <linux/cred.h>
9 #include <linux/ctype.h>
10 #include <linux/namei.h>
11 #include <linux/xattr.h>
12 #include <linux/ratelimit.h>
13 #include <linux/mount.h>
14 #include <linux/exportfs.h>
15 #include "overlayfs.h"
16 
17 #include "../internal.h"	/* for vfs_path_lookup */
18 
19 struct ovl_lookup_data {
20 	struct super_block *sb;
21 	struct vfsmount *mnt;
22 	struct qstr name;
23 	bool is_dir;
24 	bool opaque;
25 	bool stop;
26 	bool last;
27 	char *redirect;
28 	bool metacopy;
29 	/* Referring to last redirect xattr */
30 	bool absolute_redirect;
31 };
32 
33 static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d,
34 			      size_t prelen, const char *post)
35 {
36 	int res;
37 	char *buf;
38 	struct ovl_fs *ofs = OVL_FS(d->sb);
39 
40 	d->absolute_redirect = false;
41 	buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post));
42 	if (IS_ERR_OR_NULL(buf))
43 		return PTR_ERR(buf);
44 
45 	if (buf[0] == '/') {
46 		d->absolute_redirect = true;
47 		/*
48 		 * One of the ancestor path elements in an absolute path
49 		 * lookup in ovl_lookup_layer() could have been opaque and
50 		 * that will stop further lookup in lower layers (d->stop=true)
51 		 * But we have found an absolute redirect in descendant path
52 		 * element and that should force continue lookup in lower
53 		 * layers (reset d->stop).
54 		 */
55 		d->stop = false;
56 	} else {
57 		res = strlen(buf) + 1;
58 		memmove(buf + prelen, buf, res);
59 		memcpy(buf, d->name.name, prelen);
60 	}
61 
62 	strcat(buf, post);
63 	kfree(d->redirect);
64 	d->redirect = buf;
65 	d->name.name = d->redirect;
66 	d->name.len = strlen(d->redirect);
67 
68 	return 0;
69 }
70 
71 static int ovl_acceptable(void *ctx, struct dentry *dentry)
72 {
73 	/*
74 	 * A non-dir origin may be disconnected, which is fine, because
75 	 * we only need it for its unique inode number.
76 	 */
77 	if (!d_is_dir(dentry))
78 		return 1;
79 
80 	/* Don't decode a deleted empty directory */
81 	if (d_unhashed(dentry))
82 		return 0;
83 
84 	/* Check if directory belongs to the layer we are decoding from */
85 	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
86 }
87 
88 /*
89  * Check validity of an overlay file handle buffer.
90  *
91  * Return 0 for a valid file handle.
92  * Return -ENODATA for "origin unknown".
93  * Return <0 for an invalid file handle.
94  */
95 int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
96 {
97 	if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
98 		return -EINVAL;
99 
100 	if (fb->magic != OVL_FH_MAGIC)
101 		return -EINVAL;
102 
103 	/* Treat larger version and unknown flags as "origin unknown" */
104 	if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
105 		return -ENODATA;
106 
107 	/* Treat endianness mismatch as "origin unknown" */
108 	if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
109 	    (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
110 		return -ENODATA;
111 
112 	return 0;
113 }
114 
115 static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry,
116 				 enum ovl_xattr ox)
117 {
118 	int res, err;
119 	struct ovl_fh *fh = NULL;
120 
121 	res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0);
122 	if (res < 0) {
123 		if (res == -ENODATA || res == -EOPNOTSUPP)
124 			return NULL;
125 		goto fail;
126 	}
127 	/* Zero size value means "copied up but origin unknown" */
128 	if (res == 0)
129 		return NULL;
130 
131 	fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
132 	if (!fh)
133 		return ERR_PTR(-ENOMEM);
134 
135 	res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res);
136 	if (res < 0)
137 		goto fail;
138 
139 	err = ovl_check_fb_len(&fh->fb, res);
140 	if (err < 0) {
141 		if (err == -ENODATA)
142 			goto out;
143 		goto invalid;
144 	}
145 
146 	return fh;
147 
148 out:
149 	kfree(fh);
150 	return NULL;
151 
152 fail:
153 	pr_warn_ratelimited("failed to get origin (%i)\n", res);
154 	goto out;
155 invalid:
156 	pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
157 	goto out;
158 }
159 
160 struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
161 				  struct vfsmount *mnt, bool connected)
162 {
163 	struct dentry *real;
164 	int bytes;
165 
166 	if (!capable(CAP_DAC_READ_SEARCH))
167 		return NULL;
168 
169 	/*
170 	 * Make sure that the stored uuid matches the uuid of the lower
171 	 * layer where file handle will be decoded.
172 	 * In case of uuid=off option just make sure that stored uuid is null.
173 	 */
174 	if (ofs->config.uuid ? !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
175 			      !uuid_is_null(&fh->fb.uuid))
176 		return NULL;
177 
178 	bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
179 	real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
180 				  bytes >> 2, (int)fh->fb.type,
181 				  connected ? ovl_acceptable : NULL, mnt);
182 	if (IS_ERR(real)) {
183 		/*
184 		 * Treat stale file handle to lower file as "origin unknown".
185 		 * upper file handle could become stale when upper file is
186 		 * unlinked and this information is needed to handle stale
187 		 * index entries correctly.
188 		 */
189 		if (real == ERR_PTR(-ESTALE) &&
190 		    !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
191 			real = NULL;
192 		return real;
193 	}
194 
195 	if (ovl_dentry_weird(real)) {
196 		dput(real);
197 		return NULL;
198 	}
199 
200 	return real;
201 }
202 
203 static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path)
204 {
205 	return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE);
206 }
207 
208 static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
209 						   const char *name,
210 						   struct dentry *base, int len,
211 						   bool drop_negative)
212 {
213 	struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len);
214 
215 	if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
216 		if (drop_negative && ret->d_lockref.count == 1) {
217 			spin_lock(&ret->d_lock);
218 			/* Recheck condition under lock */
219 			if (d_is_negative(ret) && ret->d_lockref.count == 1)
220 				__d_drop(ret);
221 			spin_unlock(&ret->d_lock);
222 		}
223 		dput(ret);
224 		ret = ERR_PTR(-ENOENT);
225 	}
226 	return ret;
227 }
228 
229 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
230 			     const char *name, unsigned int namelen,
231 			     size_t prelen, const char *post,
232 			     struct dentry **ret, bool drop_negative)
233 {
234 	struct dentry *this;
235 	struct path path;
236 	int err;
237 	bool last_element = !post[0];
238 
239 	this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
240 	if (IS_ERR(this)) {
241 		err = PTR_ERR(this);
242 		this = NULL;
243 		if (err == -ENOENT || err == -ENAMETOOLONG)
244 			goto out;
245 		goto out_err;
246 	}
247 
248 	if (ovl_dentry_weird(this)) {
249 		/* Don't support traversing automounts and other weirdness */
250 		err = -EREMOTE;
251 		goto out_err;
252 	}
253 	if (ovl_is_whiteout(this)) {
254 		d->stop = d->opaque = true;
255 		goto put_and_out;
256 	}
257 	/*
258 	 * This dentry should be a regular file if previous layer lookup
259 	 * found a metacopy dentry.
260 	 */
261 	if (last_element && d->metacopy && !d_is_reg(this)) {
262 		d->stop = true;
263 		goto put_and_out;
264 	}
265 
266 	path.dentry = this;
267 	path.mnt = d->mnt;
268 	if (!d_can_lookup(this)) {
269 		if (d->is_dir || !last_element) {
270 			d->stop = true;
271 			goto put_and_out;
272 		}
273 		err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path);
274 		if (err < 0)
275 			goto out_err;
276 
277 		d->metacopy = err;
278 		d->stop = !d->metacopy;
279 		if (!d->metacopy || d->last)
280 			goto out;
281 	} else {
282 		if (ovl_lookup_trap_inode(d->sb, this)) {
283 			/* Caught in a trap of overlapping layers */
284 			err = -ELOOP;
285 			goto out_err;
286 		}
287 
288 		if (last_element)
289 			d->is_dir = true;
290 		if (d->last)
291 			goto out;
292 
293 		if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) {
294 			d->stop = true;
295 			if (last_element)
296 				d->opaque = true;
297 			goto out;
298 		}
299 	}
300 	err = ovl_check_redirect(&path, d, prelen, post);
301 	if (err)
302 		goto out_err;
303 out:
304 	*ret = this;
305 	return 0;
306 
307 put_and_out:
308 	dput(this);
309 	this = NULL;
310 	goto out;
311 
312 out_err:
313 	dput(this);
314 	return err;
315 }
316 
317 static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
318 			    struct dentry **ret, bool drop_negative)
319 {
320 	/* Counting down from the end, since the prefix can change */
321 	size_t rem = d->name.len - 1;
322 	struct dentry *dentry = NULL;
323 	int err;
324 
325 	if (d->name.name[0] != '/')
326 		return ovl_lookup_single(base, d, d->name.name, d->name.len,
327 					 0, "", ret, drop_negative);
328 
329 	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
330 		const char *s = d->name.name + d->name.len - rem;
331 		const char *next = strchrnul(s, '/');
332 		size_t thislen = next - s;
333 		bool end = !next[0];
334 
335 		/* Verify we did not go off the rails */
336 		if (WARN_ON(s[-1] != '/'))
337 			return -EIO;
338 
339 		err = ovl_lookup_single(base, d, s, thislen,
340 					d->name.len - rem, next, &base,
341 					drop_negative);
342 		dput(dentry);
343 		if (err)
344 			return err;
345 		dentry = base;
346 		if (end)
347 			break;
348 
349 		rem -= thislen + 1;
350 
351 		if (WARN_ON(rem >= d->name.len))
352 			return -EIO;
353 	}
354 	*ret = dentry;
355 	return 0;
356 }
357 
358 static int ovl_lookup_data_layer(struct dentry *dentry, const char *redirect,
359 				 const struct ovl_layer *layer,
360 				 struct path *datapath)
361 {
362 	int err;
363 
364 	err = vfs_path_lookup(layer->mnt->mnt_root, layer->mnt, redirect,
365 			LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS | LOOKUP_NO_XDEV,
366 			datapath);
367 	pr_debug("lookup lowerdata (%pd2, redirect=\"%s\", layer=%d, err=%i)\n",
368 		 dentry, redirect, layer->idx, err);
369 
370 	if (err)
371 		return err;
372 
373 	err = -EREMOTE;
374 	if (ovl_dentry_weird(datapath->dentry))
375 		goto out_path_put;
376 
377 	err = -ENOENT;
378 	/* Only regular file is acceptable as lower data */
379 	if (!d_is_reg(datapath->dentry))
380 		goto out_path_put;
381 
382 	return 0;
383 
384 out_path_put:
385 	path_put(datapath);
386 
387 	return err;
388 }
389 
390 /* Lookup in data-only layers by absolute redirect to layer root */
391 static int ovl_lookup_data_layers(struct dentry *dentry, const char *redirect,
392 				  struct ovl_path *lowerdata)
393 {
394 	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
395 	const struct ovl_layer *layer;
396 	struct path datapath;
397 	int err = -ENOENT;
398 	int i;
399 
400 	layer = &ofs->layers[ofs->numlayer - ofs->numdatalayer];
401 	for (i = 0; i < ofs->numdatalayer; i++, layer++) {
402 		err = ovl_lookup_data_layer(dentry, redirect, layer, &datapath);
403 		if (!err) {
404 			mntput(datapath.mnt);
405 			lowerdata->dentry = datapath.dentry;
406 			lowerdata->layer = layer;
407 			return 0;
408 		}
409 	}
410 
411 	return err;
412 }
413 
414 int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
415 			struct dentry *upperdentry, struct ovl_path **stackp)
416 {
417 	struct dentry *origin = NULL;
418 	int i;
419 
420 	for (i = 1; i <= ovl_numlowerlayer(ofs); i++) {
421 		/*
422 		 * If lower fs uuid is not unique among lower fs we cannot match
423 		 * fh->uuid to layer.
424 		 */
425 		if (ofs->layers[i].fsid &&
426 		    ofs->layers[i].fs->bad_uuid)
427 			continue;
428 
429 		origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
430 					    connected);
431 		if (origin)
432 			break;
433 	}
434 
435 	if (!origin)
436 		return -ESTALE;
437 	else if (IS_ERR(origin))
438 		return PTR_ERR(origin);
439 
440 	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
441 	    inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
442 		goto invalid;
443 
444 	if (!*stackp)
445 		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
446 	if (!*stackp) {
447 		dput(origin);
448 		return -ENOMEM;
449 	}
450 	**stackp = (struct ovl_path){
451 		.dentry = origin,
452 		.layer = &ofs->layers[i]
453 	};
454 
455 	return 0;
456 
457 invalid:
458 	pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
459 			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
460 			    d_inode(origin)->i_mode & S_IFMT);
461 	dput(origin);
462 	return -ESTALE;
463 }
464 
465 static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
466 			    struct ovl_path **stackp)
467 {
468 	struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
469 	int err;
470 
471 	if (IS_ERR_OR_NULL(fh))
472 		return PTR_ERR(fh);
473 
474 	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
475 	kfree(fh);
476 
477 	if (err) {
478 		if (err == -ESTALE)
479 			return 0;
480 		return err;
481 	}
482 
483 	return 0;
484 }
485 
486 /*
487  * Verify that @fh matches the file handle stored in xattr @name.
488  * Return 0 on match, -ESTALE on mismatch, < 0 on error.
489  */
490 static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
491 			 enum ovl_xattr ox, const struct ovl_fh *fh)
492 {
493 	struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
494 	int err = 0;
495 
496 	if (!ofh)
497 		return -ENODATA;
498 
499 	if (IS_ERR(ofh))
500 		return PTR_ERR(ofh);
501 
502 	if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
503 		err = -ESTALE;
504 
505 	kfree(ofh);
506 	return err;
507 }
508 
509 /*
510  * Verify that @real dentry matches the file handle stored in xattr @name.
511  *
512  * If @set is true and there is no stored file handle, encode @real and store
513  * file handle in xattr @name.
514  *
515  * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
516  */
517 int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
518 		      enum ovl_xattr ox, struct dentry *real, bool is_upper,
519 		      bool set)
520 {
521 	struct inode *inode;
522 	struct ovl_fh *fh;
523 	int err;
524 
525 	fh = ovl_encode_real_fh(ofs, real, is_upper);
526 	err = PTR_ERR(fh);
527 	if (IS_ERR(fh)) {
528 		fh = NULL;
529 		goto fail;
530 	}
531 
532 	err = ovl_verify_fh(ofs, dentry, ox, fh);
533 	if (set && err == -ENODATA)
534 		err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
535 	if (err)
536 		goto fail;
537 
538 out:
539 	kfree(fh);
540 	return err;
541 
542 fail:
543 	inode = d_inode(real);
544 	pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
545 			    is_upper ? "upper" : "origin", real,
546 			    inode ? inode->i_ino : 0, err);
547 	goto out;
548 }
549 
550 /* Get upper dentry from index */
551 struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
552 			       bool connected)
553 {
554 	struct ovl_fh *fh;
555 	struct dentry *upper;
556 
557 	if (!d_is_dir(index))
558 		return dget(index);
559 
560 	fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
561 	if (IS_ERR_OR_NULL(fh))
562 		return ERR_CAST(fh);
563 
564 	upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), connected);
565 	kfree(fh);
566 
567 	if (IS_ERR_OR_NULL(upper))
568 		return upper ?: ERR_PTR(-ESTALE);
569 
570 	if (!d_is_dir(upper)) {
571 		pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
572 				    index, upper);
573 		dput(upper);
574 		return ERR_PTR(-EIO);
575 	}
576 
577 	return upper;
578 }
579 
580 /*
581  * Verify that an index entry name matches the origin file handle stored in
582  * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
583  * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
584  */
585 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
586 {
587 	struct ovl_fh *fh = NULL;
588 	size_t len;
589 	struct ovl_path origin = { };
590 	struct ovl_path *stack = &origin;
591 	struct dentry *upper = NULL;
592 	int err;
593 
594 	if (!d_inode(index))
595 		return 0;
596 
597 	err = -EINVAL;
598 	if (index->d_name.len < sizeof(struct ovl_fb)*2)
599 		goto fail;
600 
601 	err = -ENOMEM;
602 	len = index->d_name.len / 2;
603 	fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
604 	if (!fh)
605 		goto fail;
606 
607 	err = -EINVAL;
608 	if (hex2bin(fh->buf, index->d_name.name, len))
609 		goto fail;
610 
611 	err = ovl_check_fb_len(&fh->fb, len);
612 	if (err)
613 		goto fail;
614 
615 	/*
616 	 * Whiteout index entries are used as an indication that an exported
617 	 * overlay file handle should be treated as stale (i.e. after unlink
618 	 * of the overlay inode). These entries contain no origin xattr.
619 	 */
620 	if (ovl_is_whiteout(index))
621 		goto out;
622 
623 	/*
624 	 * Verifying directory index entries are not stale is expensive, so
625 	 * only verify stale dir index if NFS export is enabled.
626 	 */
627 	if (d_is_dir(index) && !ofs->config.nfs_export)
628 		goto out;
629 
630 	/*
631 	 * Directory index entries should have 'upper' xattr pointing to the
632 	 * real upper dir. Non-dir index entries are hardlinks to the upper
633 	 * real inode. For non-dir index, we can read the copy up origin xattr
634 	 * directly from the index dentry, but for dir index we first need to
635 	 * decode the upper directory.
636 	 */
637 	upper = ovl_index_upper(ofs, index, false);
638 	if (IS_ERR_OR_NULL(upper)) {
639 		err = PTR_ERR(upper);
640 		/*
641 		 * Directory index entries with no 'upper' xattr need to be
642 		 * removed. When dir index entry has a stale 'upper' xattr,
643 		 * we assume that upper dir was removed and we treat the dir
644 		 * index as orphan entry that needs to be whited out.
645 		 */
646 		if (err == -ESTALE)
647 			goto orphan;
648 		else if (!err)
649 			err = -ESTALE;
650 		goto fail;
651 	}
652 
653 	err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
654 	dput(upper);
655 	if (err)
656 		goto fail;
657 
658 	/* Check if non-dir index is orphan and don't warn before cleaning it */
659 	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
660 		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
661 		if (err)
662 			goto fail;
663 
664 		if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
665 			goto orphan;
666 	}
667 
668 out:
669 	dput(origin.dentry);
670 	kfree(fh);
671 	return err;
672 
673 fail:
674 	pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
675 			    index, d_inode(index)->i_mode & S_IFMT, err);
676 	goto out;
677 
678 orphan:
679 	pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
680 			    index, d_inode(index)->i_mode & S_IFMT,
681 			    d_inode(index)->i_nlink);
682 	err = -ENOENT;
683 	goto out;
684 }
685 
686 static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
687 {
688 	char *n, *s;
689 
690 	n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
691 	if (!n)
692 		return -ENOMEM;
693 
694 	s  = bin2hex(n, fh->buf, fh->fb.len);
695 	*name = (struct qstr) QSTR_INIT(n, s - n);
696 
697 	return 0;
698 
699 }
700 
701 /*
702  * Lookup in indexdir for the index entry of a lower real inode or a copy up
703  * origin inode. The index entry name is the hex representation of the lower
704  * inode file handle.
705  *
706  * If the index dentry in negative, then either no lower aliases have been
707  * copied up yet, or aliases have been copied up in older kernels and are
708  * not indexed.
709  *
710  * If the index dentry for a copy up origin inode is positive, but points
711  * to an inode different than the upper inode, then either the upper inode
712  * has been copied up and not indexed or it was indexed, but since then
713  * index dir was cleared. Either way, that index cannot be used to identify
714  * the overlay inode.
715  */
716 int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
717 		       struct qstr *name)
718 {
719 	struct ovl_fh *fh;
720 	int err;
721 
722 	fh = ovl_encode_real_fh(ofs, origin, false);
723 	if (IS_ERR(fh))
724 		return PTR_ERR(fh);
725 
726 	err = ovl_get_index_name_fh(fh, name);
727 
728 	kfree(fh);
729 	return err;
730 }
731 
732 /* Lookup index by file handle for NFS export */
733 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
734 {
735 	struct dentry *index;
736 	struct qstr name;
737 	int err;
738 
739 	err = ovl_get_index_name_fh(fh, &name);
740 	if (err)
741 		return ERR_PTR(err);
742 
743 	index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
744 	kfree(name.name);
745 	if (IS_ERR(index)) {
746 		if (PTR_ERR(index) == -ENOENT)
747 			index = NULL;
748 		return index;
749 	}
750 
751 	if (ovl_is_whiteout(index))
752 		err = -ESTALE;
753 	else if (ovl_dentry_weird(index))
754 		err = -EIO;
755 	else
756 		return index;
757 
758 	dput(index);
759 	return ERR_PTR(err);
760 }
761 
762 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
763 				struct dentry *origin, bool verify)
764 {
765 	struct dentry *index;
766 	struct inode *inode;
767 	struct qstr name;
768 	bool is_dir = d_is_dir(origin);
769 	int err;
770 
771 	err = ovl_get_index_name(ofs, origin, &name);
772 	if (err)
773 		return ERR_PTR(err);
774 
775 	index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), name.name,
776 					     ofs->indexdir, name.len);
777 	if (IS_ERR(index)) {
778 		err = PTR_ERR(index);
779 		if (err == -ENOENT) {
780 			index = NULL;
781 			goto out;
782 		}
783 		pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
784 				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
785 				    d_inode(origin)->i_ino, name.len, name.name,
786 				    err);
787 		goto out;
788 	}
789 
790 	inode = d_inode(index);
791 	if (ovl_is_whiteout(index) && !verify) {
792 		/*
793 		 * When index lookup is called with !verify for decoding an
794 		 * overlay file handle, a whiteout index implies that decode
795 		 * should treat file handle as stale and no need to print a
796 		 * warning about it.
797 		 */
798 		dput(index);
799 		index = ERR_PTR(-ESTALE);
800 		goto out;
801 	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
802 		   inode_wrong_type(inode, d_inode(origin)->i_mode)) {
803 		/*
804 		 * Index should always be of the same file type as origin
805 		 * except for the case of a whiteout index. A whiteout
806 		 * index should only exist if all lower aliases have been
807 		 * unlinked, which means that finding a lower origin on lookup
808 		 * whose index is a whiteout should be treated as an error.
809 		 */
810 		pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
811 				    index, d_inode(index)->i_mode & S_IFMT,
812 				    d_inode(origin)->i_mode & S_IFMT);
813 		goto fail;
814 	} else if (is_dir && verify) {
815 		if (!upper) {
816 			pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
817 					    origin, index);
818 			goto fail;
819 		}
820 
821 		/* Verify that dir index 'upper' xattr points to upper dir */
822 		err = ovl_verify_upper(ofs, index, upper, false);
823 		if (err) {
824 			if (err == -ESTALE) {
825 				pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
826 						    upper, origin, index);
827 			}
828 			goto fail;
829 		}
830 	} else if (upper && d_inode(upper) != inode) {
831 		goto out_dput;
832 	}
833 out:
834 	kfree(name.name);
835 	return index;
836 
837 out_dput:
838 	dput(index);
839 	index = NULL;
840 	goto out;
841 
842 fail:
843 	dput(index);
844 	index = ERR_PTR(-EIO);
845 	goto out;
846 }
847 
848 /*
849  * Returns next layer in stack starting from top.
850  * Returns -1 if this is the last layer.
851  */
852 int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
853 {
854 	struct ovl_entry *oe = OVL_E(dentry);
855 	struct ovl_path *lowerstack = ovl_lowerstack(oe);
856 
857 	BUG_ON(idx < 0);
858 	if (idx == 0) {
859 		ovl_path_upper(dentry, path);
860 		if (path->dentry)
861 			return ovl_numlower(oe) ? 1 : -1;
862 		idx++;
863 	}
864 	BUG_ON(idx > ovl_numlower(oe));
865 	path->dentry = lowerstack[idx - 1].dentry;
866 	path->mnt = lowerstack[idx - 1].layer->mnt;
867 
868 	return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
869 }
870 
871 /* Fix missing 'origin' xattr */
872 static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
873 			  struct dentry *lower, struct dentry *upper)
874 {
875 	int err;
876 
877 	if (ovl_check_origin_xattr(ofs, upper))
878 		return 0;
879 
880 	err = ovl_want_write(dentry);
881 	if (err)
882 		return err;
883 
884 	err = ovl_set_origin(ofs, lower, upper);
885 	if (!err)
886 		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
887 
888 	ovl_drop_write(dentry);
889 	return err;
890 }
891 
892 /* Lazy lookup of lowerdata */
893 int ovl_maybe_lookup_lowerdata(struct dentry *dentry)
894 {
895 	struct inode *inode = d_inode(dentry);
896 	const char *redirect = ovl_lowerdata_redirect(inode);
897 	struct ovl_path datapath = {};
898 	const struct cred *old_cred;
899 	int err;
900 
901 	if (!redirect || ovl_dentry_lowerdata(dentry))
902 		return 0;
903 
904 	if (redirect[0] != '/')
905 		return -EIO;
906 
907 	err = ovl_inode_lock_interruptible(inode);
908 	if (err)
909 		return err;
910 
911 	err = 0;
912 	/* Someone got here before us? */
913 	if (ovl_dentry_lowerdata(dentry))
914 		goto out;
915 
916 	old_cred = ovl_override_creds(dentry->d_sb);
917 	err = ovl_lookup_data_layers(dentry, redirect, &datapath);
918 	revert_creds(old_cred);
919 	if (err)
920 		goto out_err;
921 
922 	err = ovl_dentry_set_lowerdata(dentry, &datapath);
923 	if (err)
924 		goto out_err;
925 
926 out:
927 	ovl_inode_unlock(inode);
928 	dput(datapath.dentry);
929 
930 	return err;
931 
932 out_err:
933 	pr_warn_ratelimited("lazy lowerdata lookup failed (%pd2, err=%i)\n",
934 			    dentry, err);
935 	goto out;
936 }
937 
938 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
939 			  unsigned int flags)
940 {
941 	struct ovl_entry *oe = NULL;
942 	const struct cred *old_cred;
943 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
944 	struct ovl_entry *poe = OVL_E(dentry->d_parent);
945 	struct ovl_entry *roe = OVL_E(dentry->d_sb->s_root);
946 	struct ovl_path *stack = NULL, *origin_path = NULL;
947 	struct dentry *upperdir, *upperdentry = NULL;
948 	struct dentry *origin = NULL;
949 	struct dentry *index = NULL;
950 	unsigned int ctr = 0;
951 	struct inode *inode = NULL;
952 	bool upperopaque = false;
953 	char *upperredirect = NULL;
954 	struct dentry *this;
955 	unsigned int i;
956 	int err;
957 	bool uppermetacopy = false;
958 	struct ovl_lookup_data d = {
959 		.sb = dentry->d_sb,
960 		.name = dentry->d_name,
961 		.is_dir = false,
962 		.opaque = false,
963 		.stop = false,
964 		.last = ovl_redirect_follow(ofs) ? false : !ovl_numlower(poe),
965 		.redirect = NULL,
966 		.metacopy = false,
967 	};
968 
969 	if (dentry->d_name.len > ofs->namelen)
970 		return ERR_PTR(-ENAMETOOLONG);
971 
972 	old_cred = ovl_override_creds(dentry->d_sb);
973 	upperdir = ovl_dentry_upper(dentry->d_parent);
974 	if (upperdir) {
975 		d.mnt = ovl_upper_mnt(ofs);
976 		err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
977 		if (err)
978 			goto out;
979 
980 		if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
981 			dput(upperdentry);
982 			err = -EREMOTE;
983 			goto out;
984 		}
985 		if (upperdentry && !d.is_dir) {
986 			/*
987 			 * Lookup copy up origin by decoding origin file handle.
988 			 * We may get a disconnected dentry, which is fine,
989 			 * because we only need to hold the origin inode in
990 			 * cache and use its inode number.  We may even get a
991 			 * connected dentry, that is not under any of the lower
992 			 * layers root.  That is also fine for using it's inode
993 			 * number - it's the same as if we held a reference
994 			 * to a dentry in lower layer that was moved under us.
995 			 */
996 			err = ovl_check_origin(ofs, upperdentry, &origin_path);
997 			if (err)
998 				goto out_put_upper;
999 
1000 			if (d.metacopy)
1001 				uppermetacopy = true;
1002 		}
1003 
1004 		if (d.redirect) {
1005 			err = -ENOMEM;
1006 			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
1007 			if (!upperredirect)
1008 				goto out_put_upper;
1009 			if (d.redirect[0] == '/')
1010 				poe = roe;
1011 		}
1012 		upperopaque = d.opaque;
1013 	}
1014 
1015 	if (!d.stop && ovl_numlower(poe)) {
1016 		err = -ENOMEM;
1017 		stack = ovl_stack_alloc(ofs->numlayer - 1);
1018 		if (!stack)
1019 			goto out_put_upper;
1020 	}
1021 
1022 	for (i = 0; !d.stop && i < ovl_numlower(poe); i++) {
1023 		struct ovl_path lower = ovl_lowerstack(poe)[i];
1024 
1025 		if (!ovl_redirect_follow(ofs))
1026 			d.last = i == ovl_numlower(poe) - 1;
1027 		else if (d.is_dir || !ofs->numdatalayer)
1028 			d.last = lower.layer->idx == ovl_numlower(roe);
1029 
1030 		d.mnt = lower.layer->mnt;
1031 		err = ovl_lookup_layer(lower.dentry, &d, &this, false);
1032 		if (err)
1033 			goto out_put;
1034 
1035 		if (!this)
1036 			continue;
1037 
1038 		if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
1039 			dput(this);
1040 			err = -EPERM;
1041 			pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
1042 			goto out_put;
1043 		}
1044 
1045 		/*
1046 		 * If no origin fh is stored in upper of a merge dir, store fh
1047 		 * of lower dir and set upper parent "impure".
1048 		 */
1049 		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
1050 			err = ovl_fix_origin(ofs, dentry, this, upperdentry);
1051 			if (err) {
1052 				dput(this);
1053 				goto out_put;
1054 			}
1055 		}
1056 
1057 		/*
1058 		 * When "verify_lower" feature is enabled, do not merge with a
1059 		 * lower dir that does not match a stored origin xattr. In any
1060 		 * case, only verified origin is used for index lookup.
1061 		 *
1062 		 * For non-dir dentry, if index=on, then ensure origin
1063 		 * matches the dentry found using path based lookup,
1064 		 * otherwise error out.
1065 		 */
1066 		if (upperdentry && !ctr &&
1067 		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
1068 		     (!d.is_dir && ofs->config.index && origin_path))) {
1069 			err = ovl_verify_origin(ofs, upperdentry, this, false);
1070 			if (err) {
1071 				dput(this);
1072 				if (d.is_dir)
1073 					break;
1074 				goto out_put;
1075 			}
1076 			origin = this;
1077 		}
1078 
1079 		if (d.metacopy && ctr) {
1080 			/*
1081 			 * Do not store intermediate metacopy dentries in
1082 			 * lower chain, except top most lower metacopy dentry.
1083 			 * Continue the loop so that if there is an absolute
1084 			 * redirect on this dentry, poe can be reset to roe.
1085 			 */
1086 			dput(this);
1087 			this = NULL;
1088 		} else {
1089 			stack[ctr].dentry = this;
1090 			stack[ctr].layer = lower.layer;
1091 			ctr++;
1092 		}
1093 
1094 		/*
1095 		 * Following redirects can have security consequences: it's like
1096 		 * a symlink into the lower layer without the permission checks.
1097 		 * This is only a problem if the upper layer is untrusted (e.g
1098 		 * comes from an USB drive).  This can allow a non-readable file
1099 		 * or directory to become readable.
1100 		 *
1101 		 * Only following redirects when redirects are enabled disables
1102 		 * this attack vector when not necessary.
1103 		 */
1104 		err = -EPERM;
1105 		if (d.redirect && !ovl_redirect_follow(ofs)) {
1106 			pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
1107 					    dentry);
1108 			goto out_put;
1109 		}
1110 
1111 		if (d.stop)
1112 			break;
1113 
1114 		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
1115 			poe = roe;
1116 			/* Find the current layer on the root dentry */
1117 			i = lower.layer->idx - 1;
1118 		}
1119 	}
1120 
1121 	/* Defer lookup of lowerdata in data-only layers to first access */
1122 	if (d.metacopy && ctr && ofs->numdatalayer && d.absolute_redirect) {
1123 		d.metacopy = false;
1124 		ctr++;
1125 	}
1126 
1127 	/*
1128 	 * For regular non-metacopy upper dentries, there is no lower
1129 	 * path based lookup, hence ctr will be zero. If a dentry is found
1130 	 * using ORIGIN xattr on upper, install it in stack.
1131 	 *
1132 	 * For metacopy dentry, path based lookup will find lower dentries.
1133 	 * Just make sure a corresponding data dentry has been found.
1134 	 */
1135 	if (d.metacopy || (uppermetacopy && !ctr)) {
1136 		pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
1137 				    dentry);
1138 		err = -EIO;
1139 		goto out_put;
1140 	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
1141 		if (WARN_ON(stack != NULL)) {
1142 			err = -EIO;
1143 			goto out_put;
1144 		}
1145 		stack = origin_path;
1146 		ctr = 1;
1147 		origin = origin_path->dentry;
1148 		origin_path = NULL;
1149 	}
1150 
1151 	/*
1152 	 * Always lookup index if there is no-upperdentry.
1153 	 *
1154 	 * For the case of upperdentry, we have set origin by now if it
1155 	 * needed to be set. There are basically three cases.
1156 	 *
1157 	 * For directories, lookup index by lower inode and verify it matches
1158 	 * upper inode. We only trust dir index if we verified that lower dir
1159 	 * matches origin, otherwise dir index entries may be inconsistent
1160 	 * and we ignore them.
1161 	 *
1162 	 * For regular upper, we already set origin if upper had ORIGIN
1163 	 * xattr. There is no verification though as there is no path
1164 	 * based dentry lookup in lower in this case.
1165 	 *
1166 	 * For metacopy upper, we set a verified origin already if index
1167 	 * is enabled and if upper had an ORIGIN xattr.
1168 	 *
1169 	 */
1170 	if (!upperdentry && ctr)
1171 		origin = stack[0].dentry;
1172 
1173 	if (origin && ovl_indexdir(dentry->d_sb) &&
1174 	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
1175 		index = ovl_lookup_index(ofs, upperdentry, origin, true);
1176 		if (IS_ERR(index)) {
1177 			err = PTR_ERR(index);
1178 			index = NULL;
1179 			goto out_put;
1180 		}
1181 	}
1182 
1183 	if (ctr) {
1184 		oe = ovl_alloc_entry(ctr);
1185 		err = -ENOMEM;
1186 		if (!oe)
1187 			goto out_put;
1188 
1189 		ovl_stack_cpy(ovl_lowerstack(oe), stack, ctr);
1190 	}
1191 
1192 	if (upperopaque)
1193 		ovl_dentry_set_opaque(dentry);
1194 
1195 	if (upperdentry)
1196 		ovl_dentry_set_upper_alias(dentry);
1197 	else if (index) {
1198 		struct path upperpath = {
1199 			.dentry = upperdentry = dget(index),
1200 			.mnt = ovl_upper_mnt(ofs),
1201 		};
1202 
1203 		/*
1204 		 * It's safe to assign upperredirect here: the previous
1205 		 * assignment of happens only if upperdentry is non-NULL, and
1206 		 * this one only if upperdentry is NULL.
1207 		 */
1208 		upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
1209 		if (IS_ERR(upperredirect)) {
1210 			err = PTR_ERR(upperredirect);
1211 			upperredirect = NULL;
1212 			goto out_free_oe;
1213 		}
1214 		err = ovl_check_metacopy_xattr(ofs, &upperpath);
1215 		if (err < 0)
1216 			goto out_free_oe;
1217 		uppermetacopy = err;
1218 	}
1219 
1220 	if (upperdentry || ctr) {
1221 		struct ovl_inode_params oip = {
1222 			.upperdentry = upperdentry,
1223 			.oe = oe,
1224 			.index = index,
1225 			.redirect = upperredirect,
1226 		};
1227 
1228 		/* Store lowerdata redirect for lazy lookup */
1229 		if (ctr > 1 && !d.is_dir && !stack[ctr - 1].dentry) {
1230 			oip.lowerdata_redirect = d.redirect;
1231 			d.redirect = NULL;
1232 		}
1233 		inode = ovl_get_inode(dentry->d_sb, &oip);
1234 		err = PTR_ERR(inode);
1235 		if (IS_ERR(inode))
1236 			goto out_free_oe;
1237 		if (upperdentry && !uppermetacopy)
1238 			ovl_set_flag(OVL_UPPERDATA, inode);
1239 	}
1240 
1241 	ovl_dentry_init_reval(dentry, upperdentry, OVL_I_E(inode));
1242 
1243 	revert_creds(old_cred);
1244 	if (origin_path) {
1245 		dput(origin_path->dentry);
1246 		kfree(origin_path);
1247 	}
1248 	dput(index);
1249 	ovl_stack_free(stack, ctr);
1250 	kfree(d.redirect);
1251 	return d_splice_alias(inode, dentry);
1252 
1253 out_free_oe:
1254 	ovl_free_entry(oe);
1255 out_put:
1256 	dput(index);
1257 	ovl_stack_free(stack, ctr);
1258 out_put_upper:
1259 	if (origin_path) {
1260 		dput(origin_path->dentry);
1261 		kfree(origin_path);
1262 	}
1263 	dput(upperdentry);
1264 	kfree(upperredirect);
1265 out:
1266 	kfree(d.redirect);
1267 	revert_creds(old_cred);
1268 	return ERR_PTR(err);
1269 }
1270 
1271 bool ovl_lower_positive(struct dentry *dentry)
1272 {
1273 	struct ovl_entry *poe = OVL_E(dentry->d_parent);
1274 	const struct qstr *name = &dentry->d_name;
1275 	const struct cred *old_cred;
1276 	unsigned int i;
1277 	bool positive = false;
1278 	bool done = false;
1279 
1280 	/*
1281 	 * If dentry is negative, then lower is positive iff this is a
1282 	 * whiteout.
1283 	 */
1284 	if (!dentry->d_inode)
1285 		return ovl_dentry_is_opaque(dentry);
1286 
1287 	/* Negative upper -> positive lower */
1288 	if (!ovl_dentry_upper(dentry))
1289 		return true;
1290 
1291 	old_cred = ovl_override_creds(dentry->d_sb);
1292 	/* Positive upper -> have to look up lower to see whether it exists */
1293 	for (i = 0; !done && !positive && i < ovl_numlower(poe); i++) {
1294 		struct dentry *this;
1295 		struct ovl_path *parentpath = &ovl_lowerstack(poe)[i];
1296 
1297 		this = lookup_one_positive_unlocked(
1298 				mnt_idmap(parentpath->layer->mnt),
1299 				name->name, parentpath->dentry, name->len);
1300 		if (IS_ERR(this)) {
1301 			switch (PTR_ERR(this)) {
1302 			case -ENOENT:
1303 			case -ENAMETOOLONG:
1304 				break;
1305 
1306 			default:
1307 				/*
1308 				 * Assume something is there, we just couldn't
1309 				 * access it.
1310 				 */
1311 				positive = true;
1312 				break;
1313 			}
1314 		} else {
1315 			positive = !ovl_is_whiteout(this);
1316 			done = true;
1317 			dput(this);
1318 		}
1319 	}
1320 	revert_creds(old_cred);
1321 
1322 	return positive;
1323 }
1324