xref: /openbmc/linux/fs/fuse/dir.c (revision 901181b7)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/sched.h>
15 #include <linux/namei.h>
16 #include <linux/slab.h>
17 #include <linux/xattr.h>
18 #include <linux/iversion.h>
19 #include <linux/posix_acl.h>
20 
21 static void fuse_advise_use_readdirplus(struct inode *dir)
22 {
23 	struct fuse_inode *fi = get_fuse_inode(dir);
24 
25 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
26 }
27 
28 #if BITS_PER_LONG >= 64
29 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
30 {
31 	entry->d_fsdata = (void *) time;
32 }
33 
34 static inline u64 fuse_dentry_time(const struct dentry *entry)
35 {
36 	return (u64)entry->d_fsdata;
37 }
38 
39 #else
40 union fuse_dentry {
41 	u64 time;
42 	struct rcu_head rcu;
43 };
44 
45 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
46 {
47 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
48 }
49 
50 static inline u64 fuse_dentry_time(const struct dentry *entry)
51 {
52 	return ((union fuse_dentry *) entry->d_fsdata)->time;
53 }
54 #endif
55 
56 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
57 {
58 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
59 	bool delete = !time && fc->delete_stale;
60 	/*
61 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
62 	 * Don't care about races, either way it's just an optimization
63 	 */
64 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
65 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
66 		spin_lock(&dentry->d_lock);
67 		if (!delete)
68 			dentry->d_flags &= ~DCACHE_OP_DELETE;
69 		else
70 			dentry->d_flags |= DCACHE_OP_DELETE;
71 		spin_unlock(&dentry->d_lock);
72 	}
73 
74 	__fuse_dentry_settime(dentry, time);
75 }
76 
77 /*
78  * FUSE caches dentries and attributes with separate timeout.  The
79  * time in jiffies until the dentry/attributes are valid is stored in
80  * dentry->d_fsdata and fuse_inode->i_time respectively.
81  */
82 
83 /*
84  * Calculate the time in jiffies until a dentry/attributes are valid
85  */
86 static u64 time_to_jiffies(u64 sec, u32 nsec)
87 {
88 	if (sec || nsec) {
89 		struct timespec64 ts = {
90 			sec,
91 			min_t(u32, nsec, NSEC_PER_SEC - 1)
92 		};
93 
94 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
95 	} else
96 		return 0;
97 }
98 
99 /*
100  * Set dentry and possibly attribute timeouts from the lookup/mk*
101  * replies
102  */
103 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
104 {
105 	fuse_dentry_settime(entry,
106 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
107 }
108 
109 static u64 attr_timeout(struct fuse_attr_out *o)
110 {
111 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
112 }
113 
114 u64 entry_attr_timeout(struct fuse_entry_out *o)
115 {
116 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
117 }
118 
119 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
120 {
121 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
122 }
123 
124 /*
125  * Mark the attributes as stale, so that at the next call to
126  * ->getattr() they will be fetched from userspace
127  */
128 void fuse_invalidate_attr(struct inode *inode)
129 {
130 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
131 }
132 
133 static void fuse_dir_changed(struct inode *dir)
134 {
135 	fuse_invalidate_attr(dir);
136 	inode_maybe_inc_iversion(dir, false);
137 }
138 
139 /**
140  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
141  * atime is not used.
142  */
143 void fuse_invalidate_atime(struct inode *inode)
144 {
145 	if (!IS_RDONLY(inode))
146 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
147 }
148 
149 /*
150  * Just mark the entry as stale, so that a next attempt to look it up
151  * will result in a new lookup call to userspace
152  *
153  * This is called when a dentry is about to become negative and the
154  * timeout is unknown (unlink, rmdir, rename and in some cases
155  * lookup)
156  */
157 void fuse_invalidate_entry_cache(struct dentry *entry)
158 {
159 	fuse_dentry_settime(entry, 0);
160 }
161 
162 /*
163  * Same as fuse_invalidate_entry_cache(), but also try to remove the
164  * dentry from the hash
165  */
166 static void fuse_invalidate_entry(struct dentry *entry)
167 {
168 	d_invalidate(entry);
169 	fuse_invalidate_entry_cache(entry);
170 }
171 
172 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
173 			     u64 nodeid, const struct qstr *name,
174 			     struct fuse_entry_out *outarg)
175 {
176 	memset(outarg, 0, sizeof(struct fuse_entry_out));
177 	args->opcode = FUSE_LOOKUP;
178 	args->nodeid = nodeid;
179 	args->in_numargs = 1;
180 	args->in_args[0].size = name->len + 1;
181 	args->in_args[0].value = name->name;
182 	args->out_numargs = 1;
183 	args->out_args[0].size = sizeof(struct fuse_entry_out);
184 	args->out_args[0].value = outarg;
185 }
186 
187 /*
188  * Check whether the dentry is still valid
189  *
190  * If the entry validity timeout has expired and the dentry is
191  * positive, try to redo the lookup.  If the lookup results in a
192  * different inode, then let the VFS invalidate the dentry and redo
193  * the lookup once more.  If the lookup results in the same inode,
194  * then refresh the attributes, timeouts and mark the dentry valid.
195  */
196 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
197 {
198 	struct inode *inode;
199 	struct dentry *parent;
200 	struct fuse_mount *fm;
201 	struct fuse_inode *fi;
202 	int ret;
203 
204 	inode = d_inode_rcu(entry);
205 	if (inode && fuse_is_bad(inode))
206 		goto invalid;
207 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
208 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) {
209 		struct fuse_entry_out outarg;
210 		FUSE_ARGS(args);
211 		struct fuse_forget_link *forget;
212 		u64 attr_version;
213 
214 		/* For negative dentries, always do a fresh lookup */
215 		if (!inode)
216 			goto invalid;
217 
218 		ret = -ECHILD;
219 		if (flags & LOOKUP_RCU)
220 			goto out;
221 
222 		fm = get_fuse_mount(inode);
223 
224 		forget = fuse_alloc_forget();
225 		ret = -ENOMEM;
226 		if (!forget)
227 			goto out;
228 
229 		attr_version = fuse_get_attr_version(fm->fc);
230 
231 		parent = dget_parent(entry);
232 		fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
233 				 &entry->d_name, &outarg);
234 		ret = fuse_simple_request(fm, &args);
235 		dput(parent);
236 		/* Zero nodeid is same as -ENOENT */
237 		if (!ret && !outarg.nodeid)
238 			ret = -ENOENT;
239 		if (!ret) {
240 			fi = get_fuse_inode(inode);
241 			if (outarg.nodeid != get_node_id(inode) ||
242 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
243 				fuse_queue_forget(fm->fc, forget,
244 						  outarg.nodeid, 1);
245 				goto invalid;
246 			}
247 			spin_lock(&fi->lock);
248 			fi->nlookup++;
249 			spin_unlock(&fi->lock);
250 		}
251 		kfree(forget);
252 		if (ret == -ENOMEM)
253 			goto out;
254 		if (ret || fuse_invalid_attr(&outarg.attr) ||
255 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
256 			goto invalid;
257 
258 		forget_all_cached_acls(inode);
259 		fuse_change_attributes(inode, &outarg.attr,
260 				       entry_attr_timeout(&outarg),
261 				       attr_version);
262 		fuse_change_entry_timeout(entry, &outarg);
263 	} else if (inode) {
264 		fi = get_fuse_inode(inode);
265 		if (flags & LOOKUP_RCU) {
266 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
267 				return -ECHILD;
268 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
269 			parent = dget_parent(entry);
270 			fuse_advise_use_readdirplus(d_inode(parent));
271 			dput(parent);
272 		}
273 	}
274 	ret = 1;
275 out:
276 	return ret;
277 
278 invalid:
279 	ret = 0;
280 	goto out;
281 }
282 
283 #if BITS_PER_LONG < 64
284 static int fuse_dentry_init(struct dentry *dentry)
285 {
286 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
287 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
288 
289 	return dentry->d_fsdata ? 0 : -ENOMEM;
290 }
291 static void fuse_dentry_release(struct dentry *dentry)
292 {
293 	union fuse_dentry *fd = dentry->d_fsdata;
294 
295 	kfree_rcu(fd, rcu);
296 }
297 #endif
298 
299 static int fuse_dentry_delete(const struct dentry *dentry)
300 {
301 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
302 }
303 
304 /*
305  * Create a fuse_mount object with a new superblock (with path->dentry
306  * as the root), and return that mount so it can be auto-mounted on
307  * @path.
308  */
309 static struct vfsmount *fuse_dentry_automount(struct path *path)
310 {
311 	struct fs_context *fsc;
312 	struct vfsmount *mnt;
313 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
314 
315 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
316 	if (IS_ERR(fsc))
317 		return ERR_CAST(fsc);
318 
319 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
320 	fsc->fs_private = mp_fi;
321 
322 	/* Create the submount */
323 	mnt = fc_mount(fsc);
324 	if (!IS_ERR(mnt))
325 		mntget(mnt);
326 
327 	put_fs_context(fsc);
328 	return mnt;
329 }
330 
331 const struct dentry_operations fuse_dentry_operations = {
332 	.d_revalidate	= fuse_dentry_revalidate,
333 	.d_delete	= fuse_dentry_delete,
334 #if BITS_PER_LONG < 64
335 	.d_init		= fuse_dentry_init,
336 	.d_release	= fuse_dentry_release,
337 #endif
338 	.d_automount	= fuse_dentry_automount,
339 };
340 
341 const struct dentry_operations fuse_root_dentry_operations = {
342 #if BITS_PER_LONG < 64
343 	.d_init		= fuse_dentry_init,
344 	.d_release	= fuse_dentry_release,
345 #endif
346 };
347 
348 int fuse_valid_type(int m)
349 {
350 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
351 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
352 }
353 
354 bool fuse_invalid_attr(struct fuse_attr *attr)
355 {
356 	return !fuse_valid_type(attr->mode) ||
357 		attr->size > LLONG_MAX;
358 }
359 
360 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
361 		     struct fuse_entry_out *outarg, struct inode **inode)
362 {
363 	struct fuse_mount *fm = get_fuse_mount_super(sb);
364 	FUSE_ARGS(args);
365 	struct fuse_forget_link *forget;
366 	u64 attr_version;
367 	int err;
368 
369 	*inode = NULL;
370 	err = -ENAMETOOLONG;
371 	if (name->len > FUSE_NAME_MAX)
372 		goto out;
373 
374 
375 	forget = fuse_alloc_forget();
376 	err = -ENOMEM;
377 	if (!forget)
378 		goto out;
379 
380 	attr_version = fuse_get_attr_version(fm->fc);
381 
382 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
383 	err = fuse_simple_request(fm, &args);
384 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
385 	if (err || !outarg->nodeid)
386 		goto out_put_forget;
387 
388 	err = -EIO;
389 	if (!outarg->nodeid)
390 		goto out_put_forget;
391 	if (fuse_invalid_attr(&outarg->attr))
392 		goto out_put_forget;
393 
394 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
395 			   &outarg->attr, entry_attr_timeout(outarg),
396 			   attr_version);
397 	err = -ENOMEM;
398 	if (!*inode) {
399 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
400 		goto out;
401 	}
402 	err = 0;
403 
404  out_put_forget:
405 	kfree(forget);
406  out:
407 	return err;
408 }
409 
410 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
411 				  unsigned int flags)
412 {
413 	int err;
414 	struct fuse_entry_out outarg;
415 	struct inode *inode;
416 	struct dentry *newent;
417 	bool outarg_valid = true;
418 	bool locked;
419 
420 	if (fuse_is_bad(dir))
421 		return ERR_PTR(-EIO);
422 
423 	locked = fuse_lock_inode(dir);
424 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
425 			       &outarg, &inode);
426 	fuse_unlock_inode(dir, locked);
427 	if (err == -ENOENT) {
428 		outarg_valid = false;
429 		err = 0;
430 	}
431 	if (err)
432 		goto out_err;
433 
434 	err = -EIO;
435 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
436 		goto out_iput;
437 
438 	newent = d_splice_alias(inode, entry);
439 	err = PTR_ERR(newent);
440 	if (IS_ERR(newent))
441 		goto out_err;
442 
443 	entry = newent ? newent : entry;
444 	if (outarg_valid)
445 		fuse_change_entry_timeout(entry, &outarg);
446 	else
447 		fuse_invalidate_entry_cache(entry);
448 
449 	if (inode)
450 		fuse_advise_use_readdirplus(dir);
451 	return newent;
452 
453  out_iput:
454 	iput(inode);
455  out_err:
456 	return ERR_PTR(err);
457 }
458 
459 /*
460  * Atomic create+open operation
461  *
462  * If the filesystem doesn't support this, then fall back to separate
463  * 'mknod' + 'open' requests.
464  */
465 static int fuse_create_open(struct inode *dir, struct dentry *entry,
466 			    struct file *file, unsigned int flags,
467 			    umode_t mode)
468 {
469 	int err;
470 	struct inode *inode;
471 	struct fuse_mount *fm = get_fuse_mount(dir);
472 	FUSE_ARGS(args);
473 	struct fuse_forget_link *forget;
474 	struct fuse_create_in inarg;
475 	struct fuse_open_out outopen;
476 	struct fuse_entry_out outentry;
477 	struct fuse_inode *fi;
478 	struct fuse_file *ff;
479 
480 	/* Userspace expects S_IFREG in create mode */
481 	BUG_ON((mode & S_IFMT) != S_IFREG);
482 
483 	forget = fuse_alloc_forget();
484 	err = -ENOMEM;
485 	if (!forget)
486 		goto out_err;
487 
488 	err = -ENOMEM;
489 	ff = fuse_file_alloc(fm);
490 	if (!ff)
491 		goto out_put_forget_req;
492 
493 	if (!fm->fc->dont_mask)
494 		mode &= ~current_umask();
495 
496 	flags &= ~O_NOCTTY;
497 	memset(&inarg, 0, sizeof(inarg));
498 	memset(&outentry, 0, sizeof(outentry));
499 	inarg.flags = flags;
500 	inarg.mode = mode;
501 	inarg.umask = current_umask();
502 
503 	if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
504 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
505 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
506 	}
507 
508 	args.opcode = FUSE_CREATE;
509 	args.nodeid = get_node_id(dir);
510 	args.in_numargs = 2;
511 	args.in_args[0].size = sizeof(inarg);
512 	args.in_args[0].value = &inarg;
513 	args.in_args[1].size = entry->d_name.len + 1;
514 	args.in_args[1].value = entry->d_name.name;
515 	args.out_numargs = 2;
516 	args.out_args[0].size = sizeof(outentry);
517 	args.out_args[0].value = &outentry;
518 	args.out_args[1].size = sizeof(outopen);
519 	args.out_args[1].value = &outopen;
520 	err = fuse_simple_request(fm, &args);
521 	if (err)
522 		goto out_free_ff;
523 
524 	err = -EIO;
525 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
526 	    fuse_invalid_attr(&outentry.attr))
527 		goto out_free_ff;
528 
529 	ff->fh = outopen.fh;
530 	ff->nodeid = outentry.nodeid;
531 	ff->open_flags = outopen.open_flags;
532 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
533 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
534 	if (!inode) {
535 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
536 		fuse_sync_release(NULL, ff, flags);
537 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
538 		err = -ENOMEM;
539 		goto out_err;
540 	}
541 	kfree(forget);
542 	d_instantiate(entry, inode);
543 	fuse_change_entry_timeout(entry, &outentry);
544 	fuse_dir_changed(dir);
545 	err = finish_open(file, entry, generic_file_open);
546 	if (err) {
547 		fi = get_fuse_inode(inode);
548 		fuse_sync_release(fi, ff, flags);
549 	} else {
550 		file->private_data = ff;
551 		fuse_finish_open(inode, file);
552 	}
553 	return err;
554 
555 out_free_ff:
556 	fuse_file_free(ff);
557 out_put_forget_req:
558 	kfree(forget);
559 out_err:
560 	return err;
561 }
562 
563 static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *,
564 		      umode_t, dev_t);
565 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
566 			    struct file *file, unsigned flags,
567 			    umode_t mode)
568 {
569 	int err;
570 	struct fuse_conn *fc = get_fuse_conn(dir);
571 	struct dentry *res = NULL;
572 
573 	if (fuse_is_bad(dir))
574 		return -EIO;
575 
576 	if (d_in_lookup(entry)) {
577 		res = fuse_lookup(dir, entry, 0);
578 		if (IS_ERR(res))
579 			return PTR_ERR(res);
580 
581 		if (res)
582 			entry = res;
583 	}
584 
585 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
586 		goto no_open;
587 
588 	/* Only creates */
589 	file->f_mode |= FMODE_CREATED;
590 
591 	if (fc->no_create)
592 		goto mknod;
593 
594 	err = fuse_create_open(dir, entry, file, flags, mode);
595 	if (err == -ENOSYS) {
596 		fc->no_create = 1;
597 		goto mknod;
598 	}
599 out_dput:
600 	dput(res);
601 	return err;
602 
603 mknod:
604 	err = fuse_mknod(&init_user_ns, dir, entry, mode, 0);
605 	if (err)
606 		goto out_dput;
607 no_open:
608 	return finish_no_open(file, res);
609 }
610 
611 /*
612  * Code shared between mknod, mkdir, symlink and link
613  */
614 static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
615 			    struct inode *dir, struct dentry *entry,
616 			    umode_t mode)
617 {
618 	struct fuse_entry_out outarg;
619 	struct inode *inode;
620 	struct dentry *d;
621 	int err;
622 	struct fuse_forget_link *forget;
623 
624 	if (fuse_is_bad(dir))
625 		return -EIO;
626 
627 	forget = fuse_alloc_forget();
628 	if (!forget)
629 		return -ENOMEM;
630 
631 	memset(&outarg, 0, sizeof(outarg));
632 	args->nodeid = get_node_id(dir);
633 	args->out_numargs = 1;
634 	args->out_args[0].size = sizeof(outarg);
635 	args->out_args[0].value = &outarg;
636 	err = fuse_simple_request(fm, args);
637 	if (err)
638 		goto out_put_forget_req;
639 
640 	err = -EIO;
641 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
642 		goto out_put_forget_req;
643 
644 	if ((outarg.attr.mode ^ mode) & S_IFMT)
645 		goto out_put_forget_req;
646 
647 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
648 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
649 	if (!inode) {
650 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
651 		return -ENOMEM;
652 	}
653 	kfree(forget);
654 
655 	d_drop(entry);
656 	d = d_splice_alias(inode, entry);
657 	if (IS_ERR(d))
658 		return PTR_ERR(d);
659 
660 	if (d) {
661 		fuse_change_entry_timeout(d, &outarg);
662 		dput(d);
663 	} else {
664 		fuse_change_entry_timeout(entry, &outarg);
665 	}
666 	fuse_dir_changed(dir);
667 	return 0;
668 
669  out_put_forget_req:
670 	kfree(forget);
671 	return err;
672 }
673 
674 static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir,
675 		      struct dentry *entry, umode_t mode, dev_t rdev)
676 {
677 	struct fuse_mknod_in inarg;
678 	struct fuse_mount *fm = get_fuse_mount(dir);
679 	FUSE_ARGS(args);
680 
681 	if (!fm->fc->dont_mask)
682 		mode &= ~current_umask();
683 
684 	memset(&inarg, 0, sizeof(inarg));
685 	inarg.mode = mode;
686 	inarg.rdev = new_encode_dev(rdev);
687 	inarg.umask = current_umask();
688 	args.opcode = FUSE_MKNOD;
689 	args.in_numargs = 2;
690 	args.in_args[0].size = sizeof(inarg);
691 	args.in_args[0].value = &inarg;
692 	args.in_args[1].size = entry->d_name.len + 1;
693 	args.in_args[1].value = entry->d_name.name;
694 	return create_new_entry(fm, &args, dir, entry, mode);
695 }
696 
697 static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir,
698 		       struct dentry *entry, umode_t mode, bool excl)
699 {
700 	return fuse_mknod(&init_user_ns, dir, entry, mode, 0);
701 }
702 
703 static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
704 		      struct dentry *entry, umode_t mode)
705 {
706 	struct fuse_mkdir_in inarg;
707 	struct fuse_mount *fm = get_fuse_mount(dir);
708 	FUSE_ARGS(args);
709 
710 	if (!fm->fc->dont_mask)
711 		mode &= ~current_umask();
712 
713 	memset(&inarg, 0, sizeof(inarg));
714 	inarg.mode = mode;
715 	inarg.umask = current_umask();
716 	args.opcode = FUSE_MKDIR;
717 	args.in_numargs = 2;
718 	args.in_args[0].size = sizeof(inarg);
719 	args.in_args[0].value = &inarg;
720 	args.in_args[1].size = entry->d_name.len + 1;
721 	args.in_args[1].value = entry->d_name.name;
722 	return create_new_entry(fm, &args, dir, entry, S_IFDIR);
723 }
724 
725 static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir,
726 			struct dentry *entry, const char *link)
727 {
728 	struct fuse_mount *fm = get_fuse_mount(dir);
729 	unsigned len = strlen(link) + 1;
730 	FUSE_ARGS(args);
731 
732 	args.opcode = FUSE_SYMLINK;
733 	args.in_numargs = 2;
734 	args.in_args[0].size = entry->d_name.len + 1;
735 	args.in_args[0].value = entry->d_name.name;
736 	args.in_args[1].size = len;
737 	args.in_args[1].value = link;
738 	return create_new_entry(fm, &args, dir, entry, S_IFLNK);
739 }
740 
741 void fuse_flush_time_update(struct inode *inode)
742 {
743 	int err = sync_inode_metadata(inode, 1);
744 
745 	mapping_set_error(inode->i_mapping, err);
746 }
747 
748 static void fuse_update_ctime_in_cache(struct inode *inode)
749 {
750 	if (!IS_NOCMTIME(inode)) {
751 		inode->i_ctime = current_time(inode);
752 		mark_inode_dirty_sync(inode);
753 		fuse_flush_time_update(inode);
754 	}
755 }
756 
757 void fuse_update_ctime(struct inode *inode)
758 {
759 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
760 	fuse_update_ctime_in_cache(inode);
761 }
762 
763 static void fuse_entry_unlinked(struct dentry *entry)
764 {
765 	struct inode *inode = d_inode(entry);
766 	struct fuse_conn *fc = get_fuse_conn(inode);
767 	struct fuse_inode *fi = get_fuse_inode(inode);
768 
769 	spin_lock(&fi->lock);
770 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
771 	/*
772 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
773 	 * happen if userspace filesystem is careless.  It would be
774 	 * difficult to enforce correct nlink usage so just ignore this
775 	 * condition here
776 	 */
777 	if (S_ISDIR(inode->i_mode))
778 		clear_nlink(inode);
779 	else if (inode->i_nlink > 0)
780 		drop_nlink(inode);
781 	spin_unlock(&fi->lock);
782 	fuse_invalidate_entry_cache(entry);
783 	fuse_update_ctime(inode);
784 }
785 
786 static int fuse_unlink(struct inode *dir, struct dentry *entry)
787 {
788 	int err;
789 	struct fuse_mount *fm = get_fuse_mount(dir);
790 	FUSE_ARGS(args);
791 
792 	if (fuse_is_bad(dir))
793 		return -EIO;
794 
795 	args.opcode = FUSE_UNLINK;
796 	args.nodeid = get_node_id(dir);
797 	args.in_numargs = 1;
798 	args.in_args[0].size = entry->d_name.len + 1;
799 	args.in_args[0].value = entry->d_name.name;
800 	err = fuse_simple_request(fm, &args);
801 	if (!err) {
802 		fuse_dir_changed(dir);
803 		fuse_entry_unlinked(entry);
804 	} else if (err == -EINTR)
805 		fuse_invalidate_entry(entry);
806 	return err;
807 }
808 
809 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
810 {
811 	int err;
812 	struct fuse_mount *fm = get_fuse_mount(dir);
813 	FUSE_ARGS(args);
814 
815 	if (fuse_is_bad(dir))
816 		return -EIO;
817 
818 	args.opcode = FUSE_RMDIR;
819 	args.nodeid = get_node_id(dir);
820 	args.in_numargs = 1;
821 	args.in_args[0].size = entry->d_name.len + 1;
822 	args.in_args[0].value = entry->d_name.name;
823 	err = fuse_simple_request(fm, &args);
824 	if (!err) {
825 		fuse_dir_changed(dir);
826 		fuse_entry_unlinked(entry);
827 	} else if (err == -EINTR)
828 		fuse_invalidate_entry(entry);
829 	return err;
830 }
831 
832 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
833 			      struct inode *newdir, struct dentry *newent,
834 			      unsigned int flags, int opcode, size_t argsize)
835 {
836 	int err;
837 	struct fuse_rename2_in inarg;
838 	struct fuse_mount *fm = get_fuse_mount(olddir);
839 	FUSE_ARGS(args);
840 
841 	memset(&inarg, 0, argsize);
842 	inarg.newdir = get_node_id(newdir);
843 	inarg.flags = flags;
844 	args.opcode = opcode;
845 	args.nodeid = get_node_id(olddir);
846 	args.in_numargs = 3;
847 	args.in_args[0].size = argsize;
848 	args.in_args[0].value = &inarg;
849 	args.in_args[1].size = oldent->d_name.len + 1;
850 	args.in_args[1].value = oldent->d_name.name;
851 	args.in_args[2].size = newent->d_name.len + 1;
852 	args.in_args[2].value = newent->d_name.name;
853 	err = fuse_simple_request(fm, &args);
854 	if (!err) {
855 		/* ctime changes */
856 		fuse_update_ctime(d_inode(oldent));
857 
858 		if (flags & RENAME_EXCHANGE)
859 			fuse_update_ctime(d_inode(newent));
860 
861 		fuse_dir_changed(olddir);
862 		if (olddir != newdir)
863 			fuse_dir_changed(newdir);
864 
865 		/* newent will end up negative */
866 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
867 			fuse_entry_unlinked(newent);
868 	} else if (err == -EINTR) {
869 		/* If request was interrupted, DEITY only knows if the
870 		   rename actually took place.  If the invalidation
871 		   fails (e.g. some process has CWD under the renamed
872 		   directory), then there can be inconsistency between
873 		   the dcache and the real filesystem.  Tough luck. */
874 		fuse_invalidate_entry(oldent);
875 		if (d_really_is_positive(newent))
876 			fuse_invalidate_entry(newent);
877 	}
878 
879 	return err;
880 }
881 
882 static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir,
883 			struct dentry *oldent, struct inode *newdir,
884 			struct dentry *newent, unsigned int flags)
885 {
886 	struct fuse_conn *fc = get_fuse_conn(olddir);
887 	int err;
888 
889 	if (fuse_is_bad(olddir))
890 		return -EIO;
891 
892 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
893 		return -EINVAL;
894 
895 	if (flags) {
896 		if (fc->no_rename2 || fc->minor < 23)
897 			return -EINVAL;
898 
899 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
900 					 FUSE_RENAME2,
901 					 sizeof(struct fuse_rename2_in));
902 		if (err == -ENOSYS) {
903 			fc->no_rename2 = 1;
904 			err = -EINVAL;
905 		}
906 	} else {
907 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
908 					 FUSE_RENAME,
909 					 sizeof(struct fuse_rename_in));
910 	}
911 
912 	return err;
913 }
914 
915 static int fuse_link(struct dentry *entry, struct inode *newdir,
916 		     struct dentry *newent)
917 {
918 	int err;
919 	struct fuse_link_in inarg;
920 	struct inode *inode = d_inode(entry);
921 	struct fuse_mount *fm = get_fuse_mount(inode);
922 	FUSE_ARGS(args);
923 
924 	memset(&inarg, 0, sizeof(inarg));
925 	inarg.oldnodeid = get_node_id(inode);
926 	args.opcode = FUSE_LINK;
927 	args.in_numargs = 2;
928 	args.in_args[0].size = sizeof(inarg);
929 	args.in_args[0].value = &inarg;
930 	args.in_args[1].size = newent->d_name.len + 1;
931 	args.in_args[1].value = newent->d_name.name;
932 	err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
933 	if (!err)
934 		fuse_update_ctime_in_cache(inode);
935 	else if (err == -EINTR)
936 		fuse_invalidate_attr(inode);
937 
938 	return err;
939 }
940 
941 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
942 			  struct kstat *stat)
943 {
944 	unsigned int blkbits;
945 	struct fuse_conn *fc = get_fuse_conn(inode);
946 
947 	stat->dev = inode->i_sb->s_dev;
948 	stat->ino = attr->ino;
949 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
950 	stat->nlink = attr->nlink;
951 	stat->uid = make_kuid(fc->user_ns, attr->uid);
952 	stat->gid = make_kgid(fc->user_ns, attr->gid);
953 	stat->rdev = inode->i_rdev;
954 	stat->atime.tv_sec = attr->atime;
955 	stat->atime.tv_nsec = attr->atimensec;
956 	stat->mtime.tv_sec = attr->mtime;
957 	stat->mtime.tv_nsec = attr->mtimensec;
958 	stat->ctime.tv_sec = attr->ctime;
959 	stat->ctime.tv_nsec = attr->ctimensec;
960 	stat->size = attr->size;
961 	stat->blocks = attr->blocks;
962 
963 	if (attr->blksize != 0)
964 		blkbits = ilog2(attr->blksize);
965 	else
966 		blkbits = inode->i_sb->s_blocksize_bits;
967 
968 	stat->blksize = 1 << blkbits;
969 }
970 
971 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
972 			   struct file *file)
973 {
974 	int err;
975 	struct fuse_getattr_in inarg;
976 	struct fuse_attr_out outarg;
977 	struct fuse_mount *fm = get_fuse_mount(inode);
978 	FUSE_ARGS(args);
979 	u64 attr_version;
980 
981 	attr_version = fuse_get_attr_version(fm->fc);
982 
983 	memset(&inarg, 0, sizeof(inarg));
984 	memset(&outarg, 0, sizeof(outarg));
985 	/* Directories have separate file-handle space */
986 	if (file && S_ISREG(inode->i_mode)) {
987 		struct fuse_file *ff = file->private_data;
988 
989 		inarg.getattr_flags |= FUSE_GETATTR_FH;
990 		inarg.fh = ff->fh;
991 	}
992 	args.opcode = FUSE_GETATTR;
993 	args.nodeid = get_node_id(inode);
994 	args.in_numargs = 1;
995 	args.in_args[0].size = sizeof(inarg);
996 	args.in_args[0].value = &inarg;
997 	args.out_numargs = 1;
998 	args.out_args[0].size = sizeof(outarg);
999 	args.out_args[0].value = &outarg;
1000 	err = fuse_simple_request(fm, &args);
1001 	if (!err) {
1002 		if (fuse_invalid_attr(&outarg.attr) ||
1003 		    inode_wrong_type(inode, outarg.attr.mode)) {
1004 			fuse_make_bad(inode);
1005 			err = -EIO;
1006 		} else {
1007 			fuse_change_attributes(inode, &outarg.attr,
1008 					       attr_timeout(&outarg),
1009 					       attr_version);
1010 			if (stat)
1011 				fuse_fillattr(inode, &outarg.attr, stat);
1012 		}
1013 	}
1014 	return err;
1015 }
1016 
1017 static int fuse_update_get_attr(struct inode *inode, struct file *file,
1018 				struct kstat *stat, u32 request_mask,
1019 				unsigned int flags)
1020 {
1021 	struct fuse_inode *fi = get_fuse_inode(inode);
1022 	int err = 0;
1023 	bool sync;
1024 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1025 	u32 cache_mask = fuse_get_cache_mask(inode);
1026 
1027 	if (flags & AT_STATX_FORCE_SYNC)
1028 		sync = true;
1029 	else if (flags & AT_STATX_DONT_SYNC)
1030 		sync = false;
1031 	else if (request_mask & inval_mask & ~cache_mask)
1032 		sync = true;
1033 	else
1034 		sync = time_before64(fi->i_time, get_jiffies_64());
1035 
1036 	if (sync) {
1037 		forget_all_cached_acls(inode);
1038 		err = fuse_do_getattr(inode, stat, file);
1039 	} else if (stat) {
1040 		generic_fillattr(&init_user_ns, inode, stat);
1041 		stat->mode = fi->orig_i_mode;
1042 		stat->ino = fi->orig_ino;
1043 	}
1044 
1045 	return err;
1046 }
1047 
1048 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1049 {
1050 	return fuse_update_get_attr(inode, file, NULL, mask, 0);
1051 }
1052 
1053 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1054 			     u64 child_nodeid, struct qstr *name)
1055 {
1056 	int err = -ENOTDIR;
1057 	struct inode *parent;
1058 	struct dentry *dir;
1059 	struct dentry *entry;
1060 
1061 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1062 	if (!parent)
1063 		return -ENOENT;
1064 
1065 	inode_lock_nested(parent, I_MUTEX_PARENT);
1066 	if (!S_ISDIR(parent->i_mode))
1067 		goto unlock;
1068 
1069 	err = -ENOENT;
1070 	dir = d_find_alias(parent);
1071 	if (!dir)
1072 		goto unlock;
1073 
1074 	name->hash = full_name_hash(dir, name->name, name->len);
1075 	entry = d_lookup(dir, name);
1076 	dput(dir);
1077 	if (!entry)
1078 		goto unlock;
1079 
1080 	fuse_dir_changed(parent);
1081 	fuse_invalidate_entry(entry);
1082 
1083 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1084 		inode_lock(d_inode(entry));
1085 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1086 			err = -ENOENT;
1087 			goto badentry;
1088 		}
1089 		if (d_mountpoint(entry)) {
1090 			err = -EBUSY;
1091 			goto badentry;
1092 		}
1093 		if (d_is_dir(entry)) {
1094 			shrink_dcache_parent(entry);
1095 			if (!simple_empty(entry)) {
1096 				err = -ENOTEMPTY;
1097 				goto badentry;
1098 			}
1099 			d_inode(entry)->i_flags |= S_DEAD;
1100 		}
1101 		dont_mount(entry);
1102 		clear_nlink(d_inode(entry));
1103 		err = 0;
1104  badentry:
1105 		inode_unlock(d_inode(entry));
1106 		if (!err)
1107 			d_delete(entry);
1108 	} else {
1109 		err = 0;
1110 	}
1111 	dput(entry);
1112 
1113  unlock:
1114 	inode_unlock(parent);
1115 	iput(parent);
1116 	return err;
1117 }
1118 
1119 /*
1120  * Calling into a user-controlled filesystem gives the filesystem
1121  * daemon ptrace-like capabilities over the current process.  This
1122  * means, that the filesystem daemon is able to record the exact
1123  * filesystem operations performed, and can also control the behavior
1124  * of the requester process in otherwise impossible ways.  For example
1125  * it can delay the operation for arbitrary length of time allowing
1126  * DoS against the requester.
1127  *
1128  * For this reason only those processes can call into the filesystem,
1129  * for which the owner of the mount has ptrace privilege.  This
1130  * excludes processes started by other users, suid or sgid processes.
1131  */
1132 int fuse_allow_current_process(struct fuse_conn *fc)
1133 {
1134 	const struct cred *cred;
1135 
1136 	if (fc->allow_other)
1137 		return current_in_userns(fc->user_ns);
1138 
1139 	cred = current_cred();
1140 	if (uid_eq(cred->euid, fc->user_id) &&
1141 	    uid_eq(cred->suid, fc->user_id) &&
1142 	    uid_eq(cred->uid,  fc->user_id) &&
1143 	    gid_eq(cred->egid, fc->group_id) &&
1144 	    gid_eq(cred->sgid, fc->group_id) &&
1145 	    gid_eq(cred->gid,  fc->group_id))
1146 		return 1;
1147 
1148 	return 0;
1149 }
1150 
1151 static int fuse_access(struct inode *inode, int mask)
1152 {
1153 	struct fuse_mount *fm = get_fuse_mount(inode);
1154 	FUSE_ARGS(args);
1155 	struct fuse_access_in inarg;
1156 	int err;
1157 
1158 	BUG_ON(mask & MAY_NOT_BLOCK);
1159 
1160 	if (fm->fc->no_access)
1161 		return 0;
1162 
1163 	memset(&inarg, 0, sizeof(inarg));
1164 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1165 	args.opcode = FUSE_ACCESS;
1166 	args.nodeid = get_node_id(inode);
1167 	args.in_numargs = 1;
1168 	args.in_args[0].size = sizeof(inarg);
1169 	args.in_args[0].value = &inarg;
1170 	err = fuse_simple_request(fm, &args);
1171 	if (err == -ENOSYS) {
1172 		fm->fc->no_access = 1;
1173 		err = 0;
1174 	}
1175 	return err;
1176 }
1177 
1178 static int fuse_perm_getattr(struct inode *inode, int mask)
1179 {
1180 	if (mask & MAY_NOT_BLOCK)
1181 		return -ECHILD;
1182 
1183 	forget_all_cached_acls(inode);
1184 	return fuse_do_getattr(inode, NULL, NULL);
1185 }
1186 
1187 /*
1188  * Check permission.  The two basic access models of FUSE are:
1189  *
1190  * 1) Local access checking ('default_permissions' mount option) based
1191  * on file mode.  This is the plain old disk filesystem permission
1192  * modell.
1193  *
1194  * 2) "Remote" access checking, where server is responsible for
1195  * checking permission in each inode operation.  An exception to this
1196  * is if ->permission() was invoked from sys_access() in which case an
1197  * access request is sent.  Execute permission is still checked
1198  * locally based on file mode.
1199  */
1200 static int fuse_permission(struct user_namespace *mnt_userns,
1201 			   struct inode *inode, int mask)
1202 {
1203 	struct fuse_conn *fc = get_fuse_conn(inode);
1204 	bool refreshed = false;
1205 	int err = 0;
1206 
1207 	if (fuse_is_bad(inode))
1208 		return -EIO;
1209 
1210 	if (!fuse_allow_current_process(fc))
1211 		return -EACCES;
1212 
1213 	/*
1214 	 * If attributes are needed, refresh them before proceeding
1215 	 */
1216 	if (fc->default_permissions ||
1217 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1218 		struct fuse_inode *fi = get_fuse_inode(inode);
1219 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1220 
1221 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1222 		    time_before64(fi->i_time, get_jiffies_64())) {
1223 			refreshed = true;
1224 
1225 			err = fuse_perm_getattr(inode, mask);
1226 			if (err)
1227 				return err;
1228 		}
1229 	}
1230 
1231 	if (fc->default_permissions) {
1232 		err = generic_permission(&init_user_ns, inode, mask);
1233 
1234 		/* If permission is denied, try to refresh file
1235 		   attributes.  This is also needed, because the root
1236 		   node will at first have no permissions */
1237 		if (err == -EACCES && !refreshed) {
1238 			err = fuse_perm_getattr(inode, mask);
1239 			if (!err)
1240 				err = generic_permission(&init_user_ns,
1241 							 inode, mask);
1242 		}
1243 
1244 		/* Note: the opposite of the above test does not
1245 		   exist.  So if permissions are revoked this won't be
1246 		   noticed immediately, only after the attribute
1247 		   timeout has expired */
1248 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1249 		err = fuse_access(inode, mask);
1250 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1251 		if (!(inode->i_mode & S_IXUGO)) {
1252 			if (refreshed)
1253 				return -EACCES;
1254 
1255 			err = fuse_perm_getattr(inode, mask);
1256 			if (!err && !(inode->i_mode & S_IXUGO))
1257 				return -EACCES;
1258 		}
1259 	}
1260 	return err;
1261 }
1262 
1263 static int fuse_readlink_page(struct inode *inode, struct page *page)
1264 {
1265 	struct fuse_mount *fm = get_fuse_mount(inode);
1266 	struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1267 	struct fuse_args_pages ap = {
1268 		.num_pages = 1,
1269 		.pages = &page,
1270 		.descs = &desc,
1271 	};
1272 	char *link;
1273 	ssize_t res;
1274 
1275 	ap.args.opcode = FUSE_READLINK;
1276 	ap.args.nodeid = get_node_id(inode);
1277 	ap.args.out_pages = true;
1278 	ap.args.out_argvar = true;
1279 	ap.args.page_zeroing = true;
1280 	ap.args.out_numargs = 1;
1281 	ap.args.out_args[0].size = desc.length;
1282 	res = fuse_simple_request(fm, &ap.args);
1283 
1284 	fuse_invalidate_atime(inode);
1285 
1286 	if (res < 0)
1287 		return res;
1288 
1289 	if (WARN_ON(res >= PAGE_SIZE))
1290 		return -EIO;
1291 
1292 	link = page_address(page);
1293 	link[res] = '\0';
1294 
1295 	return 0;
1296 }
1297 
1298 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1299 				 struct delayed_call *callback)
1300 {
1301 	struct fuse_conn *fc = get_fuse_conn(inode);
1302 	struct page *page;
1303 	int err;
1304 
1305 	err = -EIO;
1306 	if (fuse_is_bad(inode))
1307 		goto out_err;
1308 
1309 	if (fc->cache_symlinks)
1310 		return page_get_link(dentry, inode, callback);
1311 
1312 	err = -ECHILD;
1313 	if (!dentry)
1314 		goto out_err;
1315 
1316 	page = alloc_page(GFP_KERNEL);
1317 	err = -ENOMEM;
1318 	if (!page)
1319 		goto out_err;
1320 
1321 	err = fuse_readlink_page(inode, page);
1322 	if (err) {
1323 		__free_page(page);
1324 		goto out_err;
1325 	}
1326 
1327 	set_delayed_call(callback, page_put_link, page);
1328 
1329 	return page_address(page);
1330 
1331 out_err:
1332 	return ERR_PTR(err);
1333 }
1334 
1335 static int fuse_dir_open(struct inode *inode, struct file *file)
1336 {
1337 	return fuse_open_common(inode, file, true);
1338 }
1339 
1340 static int fuse_dir_release(struct inode *inode, struct file *file)
1341 {
1342 	fuse_release_common(file, true);
1343 
1344 	return 0;
1345 }
1346 
1347 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1348 			  int datasync)
1349 {
1350 	struct inode *inode = file->f_mapping->host;
1351 	struct fuse_conn *fc = get_fuse_conn(inode);
1352 	int err;
1353 
1354 	if (fuse_is_bad(inode))
1355 		return -EIO;
1356 
1357 	if (fc->no_fsyncdir)
1358 		return 0;
1359 
1360 	inode_lock(inode);
1361 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1362 	if (err == -ENOSYS) {
1363 		fc->no_fsyncdir = 1;
1364 		err = 0;
1365 	}
1366 	inode_unlock(inode);
1367 
1368 	return err;
1369 }
1370 
1371 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1372 			    unsigned long arg)
1373 {
1374 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1375 
1376 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1377 	if (fc->minor < 18)
1378 		return -ENOTTY;
1379 
1380 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1381 }
1382 
1383 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1384 				   unsigned long arg)
1385 {
1386 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1387 
1388 	if (fc->minor < 18)
1389 		return -ENOTTY;
1390 
1391 	return fuse_ioctl_common(file, cmd, arg,
1392 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1393 }
1394 
1395 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1396 {
1397 	/* Always update if mtime is explicitly set  */
1398 	if (ivalid & ATTR_MTIME_SET)
1399 		return true;
1400 
1401 	/* Or if kernel i_mtime is the official one */
1402 	if (trust_local_mtime)
1403 		return true;
1404 
1405 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1406 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1407 		return false;
1408 
1409 	/* In all other cases update */
1410 	return true;
1411 }
1412 
1413 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1414 			   struct fuse_setattr_in *arg, bool trust_local_cmtime)
1415 {
1416 	unsigned ivalid = iattr->ia_valid;
1417 
1418 	if (ivalid & ATTR_MODE)
1419 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1420 	if (ivalid & ATTR_UID)
1421 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1422 	if (ivalid & ATTR_GID)
1423 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1424 	if (ivalid & ATTR_SIZE)
1425 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1426 	if (ivalid & ATTR_ATIME) {
1427 		arg->valid |= FATTR_ATIME;
1428 		arg->atime = iattr->ia_atime.tv_sec;
1429 		arg->atimensec = iattr->ia_atime.tv_nsec;
1430 		if (!(ivalid & ATTR_ATIME_SET))
1431 			arg->valid |= FATTR_ATIME_NOW;
1432 	}
1433 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1434 		arg->valid |= FATTR_MTIME;
1435 		arg->mtime = iattr->ia_mtime.tv_sec;
1436 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1437 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1438 			arg->valid |= FATTR_MTIME_NOW;
1439 	}
1440 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1441 		arg->valid |= FATTR_CTIME;
1442 		arg->ctime = iattr->ia_ctime.tv_sec;
1443 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1444 	}
1445 }
1446 
1447 /*
1448  * Prevent concurrent writepages on inode
1449  *
1450  * This is done by adding a negative bias to the inode write counter
1451  * and waiting for all pending writes to finish.
1452  */
1453 void fuse_set_nowrite(struct inode *inode)
1454 {
1455 	struct fuse_inode *fi = get_fuse_inode(inode);
1456 
1457 	BUG_ON(!inode_is_locked(inode));
1458 
1459 	spin_lock(&fi->lock);
1460 	BUG_ON(fi->writectr < 0);
1461 	fi->writectr += FUSE_NOWRITE;
1462 	spin_unlock(&fi->lock);
1463 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1464 }
1465 
1466 /*
1467  * Allow writepages on inode
1468  *
1469  * Remove the bias from the writecounter and send any queued
1470  * writepages.
1471  */
1472 static void __fuse_release_nowrite(struct inode *inode)
1473 {
1474 	struct fuse_inode *fi = get_fuse_inode(inode);
1475 
1476 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1477 	fi->writectr = 0;
1478 	fuse_flush_writepages(inode);
1479 }
1480 
1481 void fuse_release_nowrite(struct inode *inode)
1482 {
1483 	struct fuse_inode *fi = get_fuse_inode(inode);
1484 
1485 	spin_lock(&fi->lock);
1486 	__fuse_release_nowrite(inode);
1487 	spin_unlock(&fi->lock);
1488 }
1489 
1490 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1491 			      struct inode *inode,
1492 			      struct fuse_setattr_in *inarg_p,
1493 			      struct fuse_attr_out *outarg_p)
1494 {
1495 	args->opcode = FUSE_SETATTR;
1496 	args->nodeid = get_node_id(inode);
1497 	args->in_numargs = 1;
1498 	args->in_args[0].size = sizeof(*inarg_p);
1499 	args->in_args[0].value = inarg_p;
1500 	args->out_numargs = 1;
1501 	args->out_args[0].size = sizeof(*outarg_p);
1502 	args->out_args[0].value = outarg_p;
1503 }
1504 
1505 /*
1506  * Flush inode->i_mtime to the server
1507  */
1508 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1509 {
1510 	struct fuse_mount *fm = get_fuse_mount(inode);
1511 	FUSE_ARGS(args);
1512 	struct fuse_setattr_in inarg;
1513 	struct fuse_attr_out outarg;
1514 
1515 	memset(&inarg, 0, sizeof(inarg));
1516 	memset(&outarg, 0, sizeof(outarg));
1517 
1518 	inarg.valid = FATTR_MTIME;
1519 	inarg.mtime = inode->i_mtime.tv_sec;
1520 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1521 	if (fm->fc->minor >= 23) {
1522 		inarg.valid |= FATTR_CTIME;
1523 		inarg.ctime = inode->i_ctime.tv_sec;
1524 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1525 	}
1526 	if (ff) {
1527 		inarg.valid |= FATTR_FH;
1528 		inarg.fh = ff->fh;
1529 	}
1530 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1531 
1532 	return fuse_simple_request(fm, &args);
1533 }
1534 
1535 /*
1536  * Set attributes, and at the same time refresh them.
1537  *
1538  * Truncation is slightly complicated, because the 'truncate' request
1539  * may fail, in which case we don't want to touch the mapping.
1540  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1541  * and the actual truncation by hand.
1542  */
1543 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1544 		    struct file *file)
1545 {
1546 	struct inode *inode = d_inode(dentry);
1547 	struct fuse_mount *fm = get_fuse_mount(inode);
1548 	struct fuse_conn *fc = fm->fc;
1549 	struct fuse_inode *fi = get_fuse_inode(inode);
1550 	struct address_space *mapping = inode->i_mapping;
1551 	FUSE_ARGS(args);
1552 	struct fuse_setattr_in inarg;
1553 	struct fuse_attr_out outarg;
1554 	bool is_truncate = false;
1555 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1556 	loff_t oldsize;
1557 	int err;
1558 	bool trust_local_cmtime = is_wb;
1559 	bool fault_blocked = false;
1560 
1561 	if (!fc->default_permissions)
1562 		attr->ia_valid |= ATTR_FORCE;
1563 
1564 	err = setattr_prepare(&init_user_ns, dentry, attr);
1565 	if (err)
1566 		return err;
1567 
1568 	if (attr->ia_valid & ATTR_SIZE) {
1569 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1570 			return -EIO;
1571 		is_truncate = true;
1572 	}
1573 
1574 	if (FUSE_IS_DAX(inode) && is_truncate) {
1575 		filemap_invalidate_lock(mapping);
1576 		fault_blocked = true;
1577 		err = fuse_dax_break_layouts(inode, 0, 0);
1578 		if (err) {
1579 			filemap_invalidate_unlock(mapping);
1580 			return err;
1581 		}
1582 	}
1583 
1584 	if (attr->ia_valid & ATTR_OPEN) {
1585 		/* This is coming from open(..., ... | O_TRUNC); */
1586 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1587 		WARN_ON(attr->ia_size != 0);
1588 		if (fc->atomic_o_trunc) {
1589 			/*
1590 			 * No need to send request to userspace, since actual
1591 			 * truncation has already been done by OPEN.  But still
1592 			 * need to truncate page cache.
1593 			 */
1594 			i_size_write(inode, 0);
1595 			truncate_pagecache(inode, 0);
1596 			goto out;
1597 		}
1598 		file = NULL;
1599 	}
1600 
1601 	/* Flush dirty data/metadata before non-truncate SETATTR */
1602 	if (is_wb &&
1603 	    attr->ia_valid &
1604 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1605 			 ATTR_TIMES_SET)) {
1606 		err = write_inode_now(inode, true);
1607 		if (err)
1608 			return err;
1609 
1610 		fuse_set_nowrite(inode);
1611 		fuse_release_nowrite(inode);
1612 	}
1613 
1614 	if (is_truncate) {
1615 		fuse_set_nowrite(inode);
1616 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1617 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1618 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1619 	}
1620 
1621 	memset(&inarg, 0, sizeof(inarg));
1622 	memset(&outarg, 0, sizeof(outarg));
1623 	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1624 	if (file) {
1625 		struct fuse_file *ff = file->private_data;
1626 		inarg.valid |= FATTR_FH;
1627 		inarg.fh = ff->fh;
1628 	}
1629 
1630 	/* Kill suid/sgid for non-directory chown unconditionally */
1631 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
1632 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
1633 		inarg.valid |= FATTR_KILL_SUIDGID;
1634 
1635 	if (attr->ia_valid & ATTR_SIZE) {
1636 		/* For mandatory locking in truncate */
1637 		inarg.valid |= FATTR_LOCKOWNER;
1638 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1639 
1640 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
1641 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
1642 			inarg.valid |= FATTR_KILL_SUIDGID;
1643 	}
1644 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1645 	err = fuse_simple_request(fm, &args);
1646 	if (err) {
1647 		if (err == -EINTR)
1648 			fuse_invalidate_attr(inode);
1649 		goto error;
1650 	}
1651 
1652 	if (fuse_invalid_attr(&outarg.attr) ||
1653 	    inode_wrong_type(inode, outarg.attr.mode)) {
1654 		fuse_make_bad(inode);
1655 		err = -EIO;
1656 		goto error;
1657 	}
1658 
1659 	spin_lock(&fi->lock);
1660 	/* the kernel maintains i_mtime locally */
1661 	if (trust_local_cmtime) {
1662 		if (attr->ia_valid & ATTR_MTIME)
1663 			inode->i_mtime = attr->ia_mtime;
1664 		if (attr->ia_valid & ATTR_CTIME)
1665 			inode->i_ctime = attr->ia_ctime;
1666 		/* FIXME: clear I_DIRTY_SYNC? */
1667 	}
1668 
1669 	fuse_change_attributes_common(inode, &outarg.attr,
1670 				      attr_timeout(&outarg),
1671 				      fuse_get_cache_mask(inode));
1672 	oldsize = inode->i_size;
1673 	/* see the comment in fuse_change_attributes() */
1674 	if (!is_wb || is_truncate)
1675 		i_size_write(inode, outarg.attr.size);
1676 
1677 	if (is_truncate) {
1678 		/* NOTE: this may release/reacquire fi->lock */
1679 		__fuse_release_nowrite(inode);
1680 	}
1681 	spin_unlock(&fi->lock);
1682 
1683 	/*
1684 	 * Only call invalidate_inode_pages2() after removing
1685 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1686 	 */
1687 	if ((is_truncate || !is_wb) &&
1688 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1689 		truncate_pagecache(inode, outarg.attr.size);
1690 		invalidate_inode_pages2(mapping);
1691 	}
1692 
1693 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1694 out:
1695 	if (fault_blocked)
1696 		filemap_invalidate_unlock(mapping);
1697 
1698 	return 0;
1699 
1700 error:
1701 	if (is_truncate)
1702 		fuse_release_nowrite(inode);
1703 
1704 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1705 
1706 	if (fault_blocked)
1707 		filemap_invalidate_unlock(mapping);
1708 	return err;
1709 }
1710 
1711 static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry,
1712 			struct iattr *attr)
1713 {
1714 	struct inode *inode = d_inode(entry);
1715 	struct fuse_conn *fc = get_fuse_conn(inode);
1716 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1717 	int ret;
1718 
1719 	if (fuse_is_bad(inode))
1720 		return -EIO;
1721 
1722 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1723 		return -EACCES;
1724 
1725 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1726 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1727 				    ATTR_MODE);
1728 
1729 		/*
1730 		 * The only sane way to reliably kill suid/sgid is to do it in
1731 		 * the userspace filesystem
1732 		 *
1733 		 * This should be done on write(), truncate() and chown().
1734 		 */
1735 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
1736 			/*
1737 			 * ia_mode calculation may have used stale i_mode.
1738 			 * Refresh and recalculate.
1739 			 */
1740 			ret = fuse_do_getattr(inode, NULL, file);
1741 			if (ret)
1742 				return ret;
1743 
1744 			attr->ia_mode = inode->i_mode;
1745 			if (inode->i_mode & S_ISUID) {
1746 				attr->ia_valid |= ATTR_MODE;
1747 				attr->ia_mode &= ~S_ISUID;
1748 			}
1749 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1750 				attr->ia_valid |= ATTR_MODE;
1751 				attr->ia_mode &= ~S_ISGID;
1752 			}
1753 		}
1754 	}
1755 	if (!attr->ia_valid)
1756 		return 0;
1757 
1758 	ret = fuse_do_setattr(entry, attr, file);
1759 	if (!ret) {
1760 		/*
1761 		 * If filesystem supports acls it may have updated acl xattrs in
1762 		 * the filesystem, so forget cached acls for the inode.
1763 		 */
1764 		if (fc->posix_acl)
1765 			forget_all_cached_acls(inode);
1766 
1767 		/* Directory mode changed, may need to revalidate access */
1768 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1769 			fuse_invalidate_entry_cache(entry);
1770 	}
1771 	return ret;
1772 }
1773 
1774 static int fuse_getattr(struct user_namespace *mnt_userns,
1775 			const struct path *path, struct kstat *stat,
1776 			u32 request_mask, unsigned int flags)
1777 {
1778 	struct inode *inode = d_inode(path->dentry);
1779 	struct fuse_conn *fc = get_fuse_conn(inode);
1780 
1781 	if (fuse_is_bad(inode))
1782 		return -EIO;
1783 
1784 	if (!fuse_allow_current_process(fc)) {
1785 		if (!request_mask) {
1786 			/*
1787 			 * If user explicitly requested *nothing* then don't
1788 			 * error out, but return st_dev only.
1789 			 */
1790 			stat->result_mask = 0;
1791 			stat->dev = inode->i_sb->s_dev;
1792 			return 0;
1793 		}
1794 		return -EACCES;
1795 	}
1796 
1797 	return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
1798 }
1799 
1800 static const struct inode_operations fuse_dir_inode_operations = {
1801 	.lookup		= fuse_lookup,
1802 	.mkdir		= fuse_mkdir,
1803 	.symlink	= fuse_symlink,
1804 	.unlink		= fuse_unlink,
1805 	.rmdir		= fuse_rmdir,
1806 	.rename		= fuse_rename2,
1807 	.link		= fuse_link,
1808 	.setattr	= fuse_setattr,
1809 	.create		= fuse_create,
1810 	.atomic_open	= fuse_atomic_open,
1811 	.mknod		= fuse_mknod,
1812 	.permission	= fuse_permission,
1813 	.getattr	= fuse_getattr,
1814 	.listxattr	= fuse_listxattr,
1815 	.get_acl	= fuse_get_acl,
1816 	.set_acl	= fuse_set_acl,
1817 	.fileattr_get	= fuse_fileattr_get,
1818 	.fileattr_set	= fuse_fileattr_set,
1819 };
1820 
1821 static const struct file_operations fuse_dir_operations = {
1822 	.llseek		= generic_file_llseek,
1823 	.read		= generic_read_dir,
1824 	.iterate_shared	= fuse_readdir,
1825 	.open		= fuse_dir_open,
1826 	.release	= fuse_dir_release,
1827 	.fsync		= fuse_dir_fsync,
1828 	.unlocked_ioctl	= fuse_dir_ioctl,
1829 	.compat_ioctl	= fuse_dir_compat_ioctl,
1830 };
1831 
1832 static const struct inode_operations fuse_common_inode_operations = {
1833 	.setattr	= fuse_setattr,
1834 	.permission	= fuse_permission,
1835 	.getattr	= fuse_getattr,
1836 	.listxattr	= fuse_listxattr,
1837 	.get_acl	= fuse_get_acl,
1838 	.set_acl	= fuse_set_acl,
1839 	.fileattr_get	= fuse_fileattr_get,
1840 	.fileattr_set	= fuse_fileattr_set,
1841 };
1842 
1843 static const struct inode_operations fuse_symlink_inode_operations = {
1844 	.setattr	= fuse_setattr,
1845 	.get_link	= fuse_get_link,
1846 	.getattr	= fuse_getattr,
1847 	.listxattr	= fuse_listxattr,
1848 };
1849 
1850 void fuse_init_common(struct inode *inode)
1851 {
1852 	inode->i_op = &fuse_common_inode_operations;
1853 }
1854 
1855 void fuse_init_dir(struct inode *inode)
1856 {
1857 	struct fuse_inode *fi = get_fuse_inode(inode);
1858 
1859 	inode->i_op = &fuse_dir_inode_operations;
1860 	inode->i_fop = &fuse_dir_operations;
1861 
1862 	spin_lock_init(&fi->rdc.lock);
1863 	fi->rdc.cached = false;
1864 	fi->rdc.size = 0;
1865 	fi->rdc.pos = 0;
1866 	fi->rdc.version = 0;
1867 }
1868 
1869 static int fuse_symlink_readpage(struct file *null, struct page *page)
1870 {
1871 	int err = fuse_readlink_page(page->mapping->host, page);
1872 
1873 	if (!err)
1874 		SetPageUptodate(page);
1875 
1876 	unlock_page(page);
1877 
1878 	return err;
1879 }
1880 
1881 static const struct address_space_operations fuse_symlink_aops = {
1882 	.readpage	= fuse_symlink_readpage,
1883 };
1884 
1885 void fuse_init_symlink(struct inode *inode)
1886 {
1887 	inode->i_op = &fuse_symlink_inode_operations;
1888 	inode->i_data.a_ops = &fuse_symlink_aops;
1889 	inode_nohighmem(inode);
1890 }
1891