xref: /openbmc/linux/fs/overlayfs/file.c (revision 4e95bc26)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 Red Hat, Inc.
4  */
5 
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include "overlayfs.h"
13 
14 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
15 {
16 	if (realinode != ovl_inode_upper(inode))
17 		return 'l';
18 	if (ovl_has_upperdata(inode))
19 		return 'u';
20 	else
21 		return 'm';
22 }
23 
24 static struct file *ovl_open_realfile(const struct file *file,
25 				      struct inode *realinode)
26 {
27 	struct inode *inode = file_inode(file);
28 	struct file *realfile;
29 	const struct cred *old_cred;
30 	int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY;
31 
32 	old_cred = ovl_override_creds(inode->i_sb);
33 	realfile = open_with_fake_path(&file->f_path, flags, realinode,
34 				       current_cred());
35 	revert_creds(old_cred);
36 
37 	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
38 		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
39 		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
40 
41 	return realfile;
42 }
43 
44 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
45 
46 static int ovl_change_flags(struct file *file, unsigned int flags)
47 {
48 	struct inode *inode = file_inode(file);
49 	int err;
50 
51 	/* No atime modificaton on underlying */
52 	flags |= O_NOATIME | FMODE_NONOTIFY;
53 
54 	/* If some flag changed that cannot be changed then something's amiss */
55 	if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
56 		return -EIO;
57 
58 	flags &= OVL_SETFL_MASK;
59 
60 	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
61 		return -EPERM;
62 
63 	if (flags & O_DIRECT) {
64 		if (!file->f_mapping->a_ops ||
65 		    !file->f_mapping->a_ops->direct_IO)
66 			return -EINVAL;
67 	}
68 
69 	if (file->f_op->check_flags) {
70 		err = file->f_op->check_flags(flags);
71 		if (err)
72 			return err;
73 	}
74 
75 	spin_lock(&file->f_lock);
76 	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
77 	spin_unlock(&file->f_lock);
78 
79 	return 0;
80 }
81 
82 static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
83 			       bool allow_meta)
84 {
85 	struct inode *inode = file_inode(file);
86 	struct inode *realinode;
87 
88 	real->flags = 0;
89 	real->file = file->private_data;
90 
91 	if (allow_meta)
92 		realinode = ovl_inode_real(inode);
93 	else
94 		realinode = ovl_inode_realdata(inode);
95 
96 	/* Has it been copied up since we'd opened it? */
97 	if (unlikely(file_inode(real->file) != realinode)) {
98 		real->flags = FDPUT_FPUT;
99 		real->file = ovl_open_realfile(file, realinode);
100 
101 		return PTR_ERR_OR_ZERO(real->file);
102 	}
103 
104 	/* Did the flags change since open? */
105 	if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
106 		return ovl_change_flags(real->file, file->f_flags);
107 
108 	return 0;
109 }
110 
111 static int ovl_real_fdget(const struct file *file, struct fd *real)
112 {
113 	return ovl_real_fdget_meta(file, real, false);
114 }
115 
116 static int ovl_open(struct inode *inode, struct file *file)
117 {
118 	struct file *realfile;
119 	int err;
120 
121 	err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
122 	if (err)
123 		return err;
124 
125 	/* No longer need these flags, so don't pass them on to underlying fs */
126 	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
127 
128 	realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
129 	if (IS_ERR(realfile))
130 		return PTR_ERR(realfile);
131 
132 	file->private_data = realfile;
133 
134 	return 0;
135 }
136 
137 static int ovl_release(struct inode *inode, struct file *file)
138 {
139 	fput(file->private_data);
140 
141 	return 0;
142 }
143 
144 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
145 {
146 	struct inode *inode = file_inode(file);
147 	struct fd real;
148 	const struct cred *old_cred;
149 	ssize_t ret;
150 
151 	/*
152 	 * The two special cases below do not need to involve real fs,
153 	 * so we can optimizing concurrent callers.
154 	 */
155 	if (offset == 0) {
156 		if (whence == SEEK_CUR)
157 			return file->f_pos;
158 
159 		if (whence == SEEK_SET)
160 			return vfs_setpos(file, 0, 0);
161 	}
162 
163 	ret = ovl_real_fdget(file, &real);
164 	if (ret)
165 		return ret;
166 
167 	/*
168 	 * Overlay file f_pos is the master copy that is preserved
169 	 * through copy up and modified on read/write, but only real
170 	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
171 	 * limitations that are more strict than ->s_maxbytes for specific
172 	 * files, so we use the real file to perform seeks.
173 	 */
174 	inode_lock(inode);
175 	real.file->f_pos = file->f_pos;
176 
177 	old_cred = ovl_override_creds(inode->i_sb);
178 	ret = vfs_llseek(real.file, offset, whence);
179 	revert_creds(old_cred);
180 
181 	file->f_pos = real.file->f_pos;
182 	inode_unlock(inode);
183 
184 	fdput(real);
185 
186 	return ret;
187 }
188 
189 static void ovl_file_accessed(struct file *file)
190 {
191 	struct inode *inode, *upperinode;
192 
193 	if (file->f_flags & O_NOATIME)
194 		return;
195 
196 	inode = file_inode(file);
197 	upperinode = ovl_inode_upper(inode);
198 
199 	if (!upperinode)
200 		return;
201 
202 	if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
203 	     !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
204 		inode->i_mtime = upperinode->i_mtime;
205 		inode->i_ctime = upperinode->i_ctime;
206 	}
207 
208 	touch_atime(&file->f_path);
209 }
210 
211 static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
212 {
213 	int ifl = iocb->ki_flags;
214 	rwf_t flags = 0;
215 
216 	if (ifl & IOCB_NOWAIT)
217 		flags |= RWF_NOWAIT;
218 	if (ifl & IOCB_HIPRI)
219 		flags |= RWF_HIPRI;
220 	if (ifl & IOCB_DSYNC)
221 		flags |= RWF_DSYNC;
222 	if (ifl & IOCB_SYNC)
223 		flags |= RWF_SYNC;
224 
225 	return flags;
226 }
227 
228 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
229 {
230 	struct file *file = iocb->ki_filp;
231 	struct fd real;
232 	const struct cred *old_cred;
233 	ssize_t ret;
234 
235 	if (!iov_iter_count(iter))
236 		return 0;
237 
238 	ret = ovl_real_fdget(file, &real);
239 	if (ret)
240 		return ret;
241 
242 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
243 	ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
244 			    ovl_iocb_to_rwf(iocb));
245 	revert_creds(old_cred);
246 
247 	ovl_file_accessed(file);
248 
249 	fdput(real);
250 
251 	return ret;
252 }
253 
254 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
255 {
256 	struct file *file = iocb->ki_filp;
257 	struct inode *inode = file_inode(file);
258 	struct fd real;
259 	const struct cred *old_cred;
260 	ssize_t ret;
261 
262 	if (!iov_iter_count(iter))
263 		return 0;
264 
265 	inode_lock(inode);
266 	/* Update mode */
267 	ovl_copyattr(ovl_inode_real(inode), inode);
268 	ret = file_remove_privs(file);
269 	if (ret)
270 		goto out_unlock;
271 
272 	ret = ovl_real_fdget(file, &real);
273 	if (ret)
274 		goto out_unlock;
275 
276 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
277 	file_start_write(real.file);
278 	ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
279 			     ovl_iocb_to_rwf(iocb));
280 	file_end_write(real.file);
281 	revert_creds(old_cred);
282 
283 	/* Update size */
284 	ovl_copyattr(ovl_inode_real(inode), inode);
285 
286 	fdput(real);
287 
288 out_unlock:
289 	inode_unlock(inode);
290 
291 	return ret;
292 }
293 
294 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
295 {
296 	struct fd real;
297 	const struct cred *old_cred;
298 	int ret;
299 
300 	ret = ovl_real_fdget_meta(file, &real, !datasync);
301 	if (ret)
302 		return ret;
303 
304 	/* Don't sync lower file for fear of receiving EROFS error */
305 	if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
306 		old_cred = ovl_override_creds(file_inode(file)->i_sb);
307 		ret = vfs_fsync_range(real.file, start, end, datasync);
308 		revert_creds(old_cred);
309 	}
310 
311 	fdput(real);
312 
313 	return ret;
314 }
315 
316 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
317 {
318 	struct file *realfile = file->private_data;
319 	const struct cred *old_cred;
320 	int ret;
321 
322 	if (!realfile->f_op->mmap)
323 		return -ENODEV;
324 
325 	if (WARN_ON(file != vma->vm_file))
326 		return -EIO;
327 
328 	vma->vm_file = get_file(realfile);
329 
330 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
331 	ret = call_mmap(vma->vm_file, vma);
332 	revert_creds(old_cred);
333 
334 	if (ret) {
335 		/* Drop reference count from new vm_file value */
336 		fput(realfile);
337 	} else {
338 		/* Drop reference count from previous vm_file value */
339 		fput(file);
340 	}
341 
342 	ovl_file_accessed(file);
343 
344 	return ret;
345 }
346 
347 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
348 {
349 	struct inode *inode = file_inode(file);
350 	struct fd real;
351 	const struct cred *old_cred;
352 	int ret;
353 
354 	ret = ovl_real_fdget(file, &real);
355 	if (ret)
356 		return ret;
357 
358 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
359 	ret = vfs_fallocate(real.file, mode, offset, len);
360 	revert_creds(old_cred);
361 
362 	/* Update size */
363 	ovl_copyattr(ovl_inode_real(inode), inode);
364 
365 	fdput(real);
366 
367 	return ret;
368 }
369 
370 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
371 {
372 	struct fd real;
373 	const struct cred *old_cred;
374 	int ret;
375 
376 	ret = ovl_real_fdget(file, &real);
377 	if (ret)
378 		return ret;
379 
380 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
381 	ret = vfs_fadvise(real.file, offset, len, advice);
382 	revert_creds(old_cred);
383 
384 	fdput(real);
385 
386 	return ret;
387 }
388 
389 static long ovl_real_ioctl(struct file *file, unsigned int cmd,
390 			   unsigned long arg)
391 {
392 	struct fd real;
393 	const struct cred *old_cred;
394 	long ret;
395 
396 	ret = ovl_real_fdget(file, &real);
397 	if (ret)
398 		return ret;
399 
400 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
401 	ret = vfs_ioctl(real.file, cmd, arg);
402 	revert_creds(old_cred);
403 
404 	fdput(real);
405 
406 	return ret;
407 }
408 
409 static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
410 				unsigned long arg, unsigned int iflags)
411 {
412 	long ret;
413 	struct inode *inode = file_inode(file);
414 	unsigned int old_iflags;
415 
416 	if (!inode_owner_or_capable(inode))
417 		return -EACCES;
418 
419 	ret = mnt_want_write_file(file);
420 	if (ret)
421 		return ret;
422 
423 	inode_lock(inode);
424 
425 	/* Check the capability before cred override */
426 	ret = -EPERM;
427 	old_iflags = READ_ONCE(inode->i_flags);
428 	if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) &&
429 	    !capable(CAP_LINUX_IMMUTABLE))
430 		goto unlock;
431 
432 	ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
433 	if (ret)
434 		goto unlock;
435 
436 	ret = ovl_real_ioctl(file, cmd, arg);
437 
438 	ovl_copyflags(ovl_inode_real(inode), inode);
439 unlock:
440 	inode_unlock(inode);
441 
442 	mnt_drop_write_file(file);
443 
444 	return ret;
445 
446 }
447 
448 static unsigned int ovl_fsflags_to_iflags(unsigned int flags)
449 {
450 	unsigned int iflags = 0;
451 
452 	if (flags & FS_SYNC_FL)
453 		iflags |= S_SYNC;
454 	if (flags & FS_APPEND_FL)
455 		iflags |= S_APPEND;
456 	if (flags & FS_IMMUTABLE_FL)
457 		iflags |= S_IMMUTABLE;
458 	if (flags & FS_NOATIME_FL)
459 		iflags |= S_NOATIME;
460 
461 	return iflags;
462 }
463 
464 static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
465 				  unsigned long arg)
466 {
467 	unsigned int flags;
468 
469 	if (get_user(flags, (int __user *) arg))
470 		return -EFAULT;
471 
472 	return ovl_ioctl_set_flags(file, cmd, arg,
473 				   ovl_fsflags_to_iflags(flags));
474 }
475 
476 static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags)
477 {
478 	unsigned int iflags = 0;
479 
480 	if (xflags & FS_XFLAG_SYNC)
481 		iflags |= S_SYNC;
482 	if (xflags & FS_XFLAG_APPEND)
483 		iflags |= S_APPEND;
484 	if (xflags & FS_XFLAG_IMMUTABLE)
485 		iflags |= S_IMMUTABLE;
486 	if (xflags & FS_XFLAG_NOATIME)
487 		iflags |= S_NOATIME;
488 
489 	return iflags;
490 }
491 
492 static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
493 				   unsigned long arg)
494 {
495 	struct fsxattr fa;
496 
497 	memset(&fa, 0, sizeof(fa));
498 	if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
499 		return -EFAULT;
500 
501 	return ovl_ioctl_set_flags(file, cmd, arg,
502 				   ovl_fsxflags_to_iflags(fa.fsx_xflags));
503 }
504 
505 static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
506 {
507 	long ret;
508 
509 	switch (cmd) {
510 	case FS_IOC_GETFLAGS:
511 	case FS_IOC_FSGETXATTR:
512 		ret = ovl_real_ioctl(file, cmd, arg);
513 		break;
514 
515 	case FS_IOC_SETFLAGS:
516 		ret = ovl_ioctl_set_fsflags(file, cmd, arg);
517 		break;
518 
519 	case FS_IOC_FSSETXATTR:
520 		ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
521 		break;
522 
523 	default:
524 		ret = -ENOTTY;
525 	}
526 
527 	return ret;
528 }
529 
530 static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
531 			     unsigned long arg)
532 {
533 	switch (cmd) {
534 	case FS_IOC32_GETFLAGS:
535 		cmd = FS_IOC_GETFLAGS;
536 		break;
537 
538 	case FS_IOC32_SETFLAGS:
539 		cmd = FS_IOC_SETFLAGS;
540 		break;
541 
542 	default:
543 		return -ENOIOCTLCMD;
544 	}
545 
546 	return ovl_ioctl(file, cmd, arg);
547 }
548 
549 enum ovl_copyop {
550 	OVL_COPY,
551 	OVL_CLONE,
552 	OVL_DEDUPE,
553 };
554 
555 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
556 			    struct file *file_out, loff_t pos_out,
557 			    loff_t len, unsigned int flags, enum ovl_copyop op)
558 {
559 	struct inode *inode_out = file_inode(file_out);
560 	struct fd real_in, real_out;
561 	const struct cred *old_cred;
562 	loff_t ret;
563 
564 	ret = ovl_real_fdget(file_out, &real_out);
565 	if (ret)
566 		return ret;
567 
568 	ret = ovl_real_fdget(file_in, &real_in);
569 	if (ret) {
570 		fdput(real_out);
571 		return ret;
572 	}
573 
574 	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
575 	switch (op) {
576 	case OVL_COPY:
577 		ret = vfs_copy_file_range(real_in.file, pos_in,
578 					  real_out.file, pos_out, len, flags);
579 		break;
580 
581 	case OVL_CLONE:
582 		ret = vfs_clone_file_range(real_in.file, pos_in,
583 					   real_out.file, pos_out, len, flags);
584 		break;
585 
586 	case OVL_DEDUPE:
587 		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
588 						real_out.file, pos_out, len,
589 						flags);
590 		break;
591 	}
592 	revert_creds(old_cred);
593 
594 	/* Update size */
595 	ovl_copyattr(ovl_inode_real(inode_out), inode_out);
596 
597 	fdput(real_in);
598 	fdput(real_out);
599 
600 	return ret;
601 }
602 
603 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
604 				   struct file *file_out, loff_t pos_out,
605 				   size_t len, unsigned int flags)
606 {
607 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
608 			    OVL_COPY);
609 }
610 
611 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
612 				   struct file *file_out, loff_t pos_out,
613 				   loff_t len, unsigned int remap_flags)
614 {
615 	enum ovl_copyop op;
616 
617 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
618 		return -EINVAL;
619 
620 	if (remap_flags & REMAP_FILE_DEDUP)
621 		op = OVL_DEDUPE;
622 	else
623 		op = OVL_CLONE;
624 
625 	/*
626 	 * Don't copy up because of a dedupe request, this wouldn't make sense
627 	 * most of the time (data would be duplicated instead of deduplicated).
628 	 */
629 	if (op == OVL_DEDUPE &&
630 	    (!ovl_inode_upper(file_inode(file_in)) ||
631 	     !ovl_inode_upper(file_inode(file_out))))
632 		return -EPERM;
633 
634 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
635 			    remap_flags, op);
636 }
637 
638 const struct file_operations ovl_file_operations = {
639 	.open		= ovl_open,
640 	.release	= ovl_release,
641 	.llseek		= ovl_llseek,
642 	.read_iter	= ovl_read_iter,
643 	.write_iter	= ovl_write_iter,
644 	.fsync		= ovl_fsync,
645 	.mmap		= ovl_mmap,
646 	.fallocate	= ovl_fallocate,
647 	.fadvise	= ovl_fadvise,
648 	.unlocked_ioctl	= ovl_ioctl,
649 	.compat_ioctl	= ovl_compat_ioctl,
650 
651 	.copy_file_range	= ovl_copy_file_range,
652 	.remap_file_range	= ovl_remap_file_range,
653 };
654