xref: /openbmc/linux/fs/fuse/dir.c (revision 5497b23e)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/sched.h>
15 #include <linux/namei.h>
16 #include <linux/slab.h>
17 #include <linux/xattr.h>
18 #include <linux/iversion.h>
19 #include <linux/posix_acl.h>
20 
21 static void fuse_advise_use_readdirplus(struct inode *dir)
22 {
23 	struct fuse_inode *fi = get_fuse_inode(dir);
24 
25 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
26 }
27 
28 #if BITS_PER_LONG >= 64
29 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
30 {
31 	entry->d_fsdata = (void *) time;
32 }
33 
34 static inline u64 fuse_dentry_time(const struct dentry *entry)
35 {
36 	return (u64)entry->d_fsdata;
37 }
38 
39 #else
40 union fuse_dentry {
41 	u64 time;
42 	struct rcu_head rcu;
43 };
44 
45 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
46 {
47 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
48 }
49 
50 static inline u64 fuse_dentry_time(const struct dentry *entry)
51 {
52 	return ((union fuse_dentry *) entry->d_fsdata)->time;
53 }
54 #endif
55 
56 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
57 {
58 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
59 	bool delete = !time && fc->delete_stale;
60 	/*
61 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
62 	 * Don't care about races, either way it's just an optimization
63 	 */
64 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
65 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
66 		spin_lock(&dentry->d_lock);
67 		if (!delete)
68 			dentry->d_flags &= ~DCACHE_OP_DELETE;
69 		else
70 			dentry->d_flags |= DCACHE_OP_DELETE;
71 		spin_unlock(&dentry->d_lock);
72 	}
73 
74 	__fuse_dentry_settime(dentry, time);
75 }
76 
77 /*
78  * FUSE caches dentries and attributes with separate timeout.  The
79  * time in jiffies until the dentry/attributes are valid is stored in
80  * dentry->d_fsdata and fuse_inode->i_time respectively.
81  */
82 
83 /*
84  * Calculate the time in jiffies until a dentry/attributes are valid
85  */
86 static u64 time_to_jiffies(u64 sec, u32 nsec)
87 {
88 	if (sec || nsec) {
89 		struct timespec64 ts = {
90 			sec,
91 			min_t(u32, nsec, NSEC_PER_SEC - 1)
92 		};
93 
94 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
95 	} else
96 		return 0;
97 }
98 
99 /*
100  * Set dentry and possibly attribute timeouts from the lookup/mk*
101  * replies
102  */
103 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
104 {
105 	fuse_dentry_settime(entry,
106 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
107 }
108 
109 static u64 attr_timeout(struct fuse_attr_out *o)
110 {
111 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
112 }
113 
114 u64 entry_attr_timeout(struct fuse_entry_out *o)
115 {
116 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
117 }
118 
119 static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
120 {
121 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
122 }
123 
124 /*
125  * Mark the attributes as stale, so that at the next call to
126  * ->getattr() they will be fetched from userspace
127  */
128 void fuse_invalidate_attr(struct inode *inode)
129 {
130 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
131 }
132 
133 static void fuse_dir_changed(struct inode *dir)
134 {
135 	fuse_invalidate_attr(dir);
136 	inode_maybe_inc_iversion(dir, false);
137 }
138 
139 /**
140  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
141  * atime is not used.
142  */
143 void fuse_invalidate_atime(struct inode *inode)
144 {
145 	if (!IS_RDONLY(inode))
146 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
147 }
148 
149 /*
150  * Just mark the entry as stale, so that a next attempt to look it up
151  * will result in a new lookup call to userspace
152  *
153  * This is called when a dentry is about to become negative and the
154  * timeout is unknown (unlink, rmdir, rename and in some cases
155  * lookup)
156  */
157 void fuse_invalidate_entry_cache(struct dentry *entry)
158 {
159 	fuse_dentry_settime(entry, 0);
160 }
161 
162 /*
163  * Same as fuse_invalidate_entry_cache(), but also try to remove the
164  * dentry from the hash
165  */
166 static void fuse_invalidate_entry(struct dentry *entry)
167 {
168 	d_invalidate(entry);
169 	fuse_invalidate_entry_cache(entry);
170 }
171 
172 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
173 			     u64 nodeid, const struct qstr *name,
174 			     struct fuse_entry_out *outarg)
175 {
176 	memset(outarg, 0, sizeof(struct fuse_entry_out));
177 	args->opcode = FUSE_LOOKUP;
178 	args->nodeid = nodeid;
179 	args->in_numargs = 1;
180 	args->in_args[0].size = name->len + 1;
181 	args->in_args[0].value = name->name;
182 	args->out_numargs = 1;
183 	args->out_args[0].size = sizeof(struct fuse_entry_out);
184 	args->out_args[0].value = outarg;
185 }
186 
187 /*
188  * Check whether the dentry is still valid
189  *
190  * If the entry validity timeout has expired and the dentry is
191  * positive, try to redo the lookup.  If the lookup results in a
192  * different inode, then let the VFS invalidate the dentry and redo
193  * the lookup once more.  If the lookup results in the same inode,
194  * then refresh the attributes, timeouts and mark the dentry valid.
195  */
196 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
197 {
198 	struct inode *inode;
199 	struct dentry *parent;
200 	struct fuse_mount *fm;
201 	struct fuse_inode *fi;
202 	int ret;
203 
204 	inode = d_inode_rcu(entry);
205 	if (inode && fuse_is_bad(inode))
206 		goto invalid;
207 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
208 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) {
209 		struct fuse_entry_out outarg;
210 		FUSE_ARGS(args);
211 		struct fuse_forget_link *forget;
212 		u64 attr_version;
213 
214 		/* For negative dentries, always do a fresh lookup */
215 		if (!inode)
216 			goto invalid;
217 
218 		ret = -ECHILD;
219 		if (flags & LOOKUP_RCU)
220 			goto out;
221 
222 		fm = get_fuse_mount(inode);
223 
224 		forget = fuse_alloc_forget();
225 		ret = -ENOMEM;
226 		if (!forget)
227 			goto out;
228 
229 		attr_version = fuse_get_attr_version(fm->fc);
230 
231 		parent = dget_parent(entry);
232 		fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
233 				 &entry->d_name, &outarg);
234 		ret = fuse_simple_request(fm, &args);
235 		dput(parent);
236 		/* Zero nodeid is same as -ENOENT */
237 		if (!ret && !outarg.nodeid)
238 			ret = -ENOENT;
239 		if (!ret) {
240 			fi = get_fuse_inode(inode);
241 			if (outarg.nodeid != get_node_id(inode) ||
242 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
243 				fuse_queue_forget(fm->fc, forget,
244 						  outarg.nodeid, 1);
245 				goto invalid;
246 			}
247 			spin_lock(&fi->lock);
248 			fi->nlookup++;
249 			spin_unlock(&fi->lock);
250 		}
251 		kfree(forget);
252 		if (ret == -ENOMEM)
253 			goto out;
254 		if (ret || fuse_invalid_attr(&outarg.attr) ||
255 		    (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
256 			goto invalid;
257 
258 		forget_all_cached_acls(inode);
259 		fuse_change_attributes(inode, &outarg.attr,
260 				       entry_attr_timeout(&outarg),
261 				       attr_version);
262 		fuse_change_entry_timeout(entry, &outarg);
263 	} else if (inode) {
264 		fi = get_fuse_inode(inode);
265 		if (flags & LOOKUP_RCU) {
266 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
267 				return -ECHILD;
268 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
269 			parent = dget_parent(entry);
270 			fuse_advise_use_readdirplus(d_inode(parent));
271 			dput(parent);
272 		}
273 	}
274 	ret = 1;
275 out:
276 	return ret;
277 
278 invalid:
279 	ret = 0;
280 	goto out;
281 }
282 
283 #if BITS_PER_LONG < 64
284 static int fuse_dentry_init(struct dentry *dentry)
285 {
286 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
287 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
288 
289 	return dentry->d_fsdata ? 0 : -ENOMEM;
290 }
291 static void fuse_dentry_release(struct dentry *dentry)
292 {
293 	union fuse_dentry *fd = dentry->d_fsdata;
294 
295 	kfree_rcu(fd, rcu);
296 }
297 #endif
298 
299 static int fuse_dentry_delete(const struct dentry *dentry)
300 {
301 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
302 }
303 
304 /*
305  * Create a fuse_mount object with a new superblock (with path->dentry
306  * as the root), and return that mount so it can be auto-mounted on
307  * @path.
308  */
309 static struct vfsmount *fuse_dentry_automount(struct path *path)
310 {
311 	struct fs_context *fsc;
312 	struct fuse_mount *parent_fm = get_fuse_mount_super(path->mnt->mnt_sb);
313 	struct fuse_conn *fc = parent_fm->fc;
314 	struct fuse_mount *fm;
315 	struct vfsmount *mnt;
316 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
317 	struct super_block *sb;
318 	int err;
319 
320 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
321 	if (IS_ERR(fsc)) {
322 		err = PTR_ERR(fsc);
323 		goto out;
324 	}
325 
326 	err = -ENOMEM;
327 	fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
328 	if (!fm)
329 		goto out_put_fsc;
330 
331 	fsc->s_fs_info = fm;
332 	sb = sget_fc(fsc, NULL, set_anon_super_fc);
333 	if (IS_ERR(sb)) {
334 		err = PTR_ERR(sb);
335 		kfree(fm);
336 		goto out_put_fsc;
337 	}
338 	fm->fc = fuse_conn_get(fc);
339 
340 	/* Initialize superblock, making @mp_fi its root */
341 	err = fuse_fill_super_submount(sb, mp_fi);
342 	if (err)
343 		goto out_put_sb;
344 
345 	sb->s_flags |= SB_ACTIVE;
346 	fsc->root = dget(sb->s_root);
347 	/* We are done configuring the superblock, so unlock it */
348 	up_write(&sb->s_umount);
349 
350 	down_write(&fc->killsb);
351 	list_add_tail(&fm->fc_entry, &fc->mounts);
352 	up_write(&fc->killsb);
353 
354 	/* Create the submount */
355 	mnt = vfs_create_mount(fsc);
356 	if (IS_ERR(mnt)) {
357 		err = PTR_ERR(mnt);
358 		goto out_put_fsc;
359 	}
360 	mntget(mnt);
361 	put_fs_context(fsc);
362 	return mnt;
363 
364 out_put_sb:
365 	/*
366 	 * Only jump here when fsc->root is NULL and sb is still locked
367 	 * (otherwise put_fs_context() will put the superblock)
368 	 */
369 	deactivate_locked_super(sb);
370 out_put_fsc:
371 	put_fs_context(fsc);
372 out:
373 	return ERR_PTR(err);
374 }
375 
376 const struct dentry_operations fuse_dentry_operations = {
377 	.d_revalidate	= fuse_dentry_revalidate,
378 	.d_delete	= fuse_dentry_delete,
379 #if BITS_PER_LONG < 64
380 	.d_init		= fuse_dentry_init,
381 	.d_release	= fuse_dentry_release,
382 #endif
383 	.d_automount	= fuse_dentry_automount,
384 };
385 
386 const struct dentry_operations fuse_root_dentry_operations = {
387 #if BITS_PER_LONG < 64
388 	.d_init		= fuse_dentry_init,
389 	.d_release	= fuse_dentry_release,
390 #endif
391 };
392 
393 int fuse_valid_type(int m)
394 {
395 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
396 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
397 }
398 
399 bool fuse_invalid_attr(struct fuse_attr *attr)
400 {
401 	return !fuse_valid_type(attr->mode) ||
402 		attr->size > LLONG_MAX;
403 }
404 
405 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
406 		     struct fuse_entry_out *outarg, struct inode **inode)
407 {
408 	struct fuse_mount *fm = get_fuse_mount_super(sb);
409 	FUSE_ARGS(args);
410 	struct fuse_forget_link *forget;
411 	u64 attr_version;
412 	int err;
413 
414 	*inode = NULL;
415 	err = -ENAMETOOLONG;
416 	if (name->len > FUSE_NAME_MAX)
417 		goto out;
418 
419 
420 	forget = fuse_alloc_forget();
421 	err = -ENOMEM;
422 	if (!forget)
423 		goto out;
424 
425 	attr_version = fuse_get_attr_version(fm->fc);
426 
427 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
428 	err = fuse_simple_request(fm, &args);
429 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
430 	if (err || !outarg->nodeid)
431 		goto out_put_forget;
432 
433 	err = -EIO;
434 	if (!outarg->nodeid)
435 		goto out_put_forget;
436 	if (fuse_invalid_attr(&outarg->attr))
437 		goto out_put_forget;
438 
439 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
440 			   &outarg->attr, entry_attr_timeout(outarg),
441 			   attr_version);
442 	err = -ENOMEM;
443 	if (!*inode) {
444 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
445 		goto out;
446 	}
447 	err = 0;
448 
449  out_put_forget:
450 	kfree(forget);
451  out:
452 	return err;
453 }
454 
455 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
456 				  unsigned int flags)
457 {
458 	int err;
459 	struct fuse_entry_out outarg;
460 	struct inode *inode;
461 	struct dentry *newent;
462 	bool outarg_valid = true;
463 	bool locked;
464 
465 	if (fuse_is_bad(dir))
466 		return ERR_PTR(-EIO);
467 
468 	locked = fuse_lock_inode(dir);
469 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
470 			       &outarg, &inode);
471 	fuse_unlock_inode(dir, locked);
472 	if (err == -ENOENT) {
473 		outarg_valid = false;
474 		err = 0;
475 	}
476 	if (err)
477 		goto out_err;
478 
479 	err = -EIO;
480 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
481 		goto out_iput;
482 
483 	newent = d_splice_alias(inode, entry);
484 	err = PTR_ERR(newent);
485 	if (IS_ERR(newent))
486 		goto out_err;
487 
488 	entry = newent ? newent : entry;
489 	if (outarg_valid)
490 		fuse_change_entry_timeout(entry, &outarg);
491 	else
492 		fuse_invalidate_entry_cache(entry);
493 
494 	if (inode)
495 		fuse_advise_use_readdirplus(dir);
496 	return newent;
497 
498  out_iput:
499 	iput(inode);
500  out_err:
501 	return ERR_PTR(err);
502 }
503 
504 /*
505  * Atomic create+open operation
506  *
507  * If the filesystem doesn't support this, then fall back to separate
508  * 'mknod' + 'open' requests.
509  */
510 static int fuse_create_open(struct inode *dir, struct dentry *entry,
511 			    struct file *file, unsigned flags,
512 			    umode_t mode)
513 {
514 	int err;
515 	struct inode *inode;
516 	struct fuse_mount *fm = get_fuse_mount(dir);
517 	FUSE_ARGS(args);
518 	struct fuse_forget_link *forget;
519 	struct fuse_create_in inarg;
520 	struct fuse_open_out outopen;
521 	struct fuse_entry_out outentry;
522 	struct fuse_inode *fi;
523 	struct fuse_file *ff;
524 
525 	/* Userspace expects S_IFREG in create mode */
526 	BUG_ON((mode & S_IFMT) != S_IFREG);
527 
528 	forget = fuse_alloc_forget();
529 	err = -ENOMEM;
530 	if (!forget)
531 		goto out_err;
532 
533 	err = -ENOMEM;
534 	ff = fuse_file_alloc(fm);
535 	if (!ff)
536 		goto out_put_forget_req;
537 
538 	if (!fm->fc->dont_mask)
539 		mode &= ~current_umask();
540 
541 	flags &= ~O_NOCTTY;
542 	memset(&inarg, 0, sizeof(inarg));
543 	memset(&outentry, 0, sizeof(outentry));
544 	inarg.flags = flags;
545 	inarg.mode = mode;
546 	inarg.umask = current_umask();
547 
548 	if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
549 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
550 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
551 	}
552 
553 	args.opcode = FUSE_CREATE;
554 	args.nodeid = get_node_id(dir);
555 	args.in_numargs = 2;
556 	args.in_args[0].size = sizeof(inarg);
557 	args.in_args[0].value = &inarg;
558 	args.in_args[1].size = entry->d_name.len + 1;
559 	args.in_args[1].value = entry->d_name.name;
560 	args.out_numargs = 2;
561 	args.out_args[0].size = sizeof(outentry);
562 	args.out_args[0].value = &outentry;
563 	args.out_args[1].size = sizeof(outopen);
564 	args.out_args[1].value = &outopen;
565 	err = fuse_simple_request(fm, &args);
566 	if (err)
567 		goto out_free_ff;
568 
569 	err = -EIO;
570 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
571 	    fuse_invalid_attr(&outentry.attr))
572 		goto out_free_ff;
573 
574 	ff->fh = outopen.fh;
575 	ff->nodeid = outentry.nodeid;
576 	ff->open_flags = outopen.open_flags;
577 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
578 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
579 	if (!inode) {
580 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
581 		fuse_sync_release(NULL, ff, flags);
582 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
583 		err = -ENOMEM;
584 		goto out_err;
585 	}
586 	kfree(forget);
587 	d_instantiate(entry, inode);
588 	fuse_change_entry_timeout(entry, &outentry);
589 	fuse_dir_changed(dir);
590 	err = finish_open(file, entry, generic_file_open);
591 	if (err) {
592 		fi = get_fuse_inode(inode);
593 		fuse_sync_release(fi, ff, flags);
594 	} else {
595 		file->private_data = ff;
596 		fuse_finish_open(inode, file);
597 	}
598 	return err;
599 
600 out_free_ff:
601 	fuse_file_free(ff);
602 out_put_forget_req:
603 	kfree(forget);
604 out_err:
605 	return err;
606 }
607 
608 static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *,
609 		      umode_t, dev_t);
610 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
611 			    struct file *file, unsigned flags,
612 			    umode_t mode)
613 {
614 	int err;
615 	struct fuse_conn *fc = get_fuse_conn(dir);
616 	struct dentry *res = NULL;
617 
618 	if (fuse_is_bad(dir))
619 		return -EIO;
620 
621 	if (d_in_lookup(entry)) {
622 		res = fuse_lookup(dir, entry, 0);
623 		if (IS_ERR(res))
624 			return PTR_ERR(res);
625 
626 		if (res)
627 			entry = res;
628 	}
629 
630 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
631 		goto no_open;
632 
633 	/* Only creates */
634 	file->f_mode |= FMODE_CREATED;
635 
636 	if (fc->no_create)
637 		goto mknod;
638 
639 	err = fuse_create_open(dir, entry, file, flags, mode);
640 	if (err == -ENOSYS) {
641 		fc->no_create = 1;
642 		goto mknod;
643 	}
644 out_dput:
645 	dput(res);
646 	return err;
647 
648 mknod:
649 	err = fuse_mknod(&init_user_ns, dir, entry, mode, 0);
650 	if (err)
651 		goto out_dput;
652 no_open:
653 	return finish_no_open(file, res);
654 }
655 
656 /*
657  * Code shared between mknod, mkdir, symlink and link
658  */
659 static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
660 			    struct inode *dir, struct dentry *entry,
661 			    umode_t mode)
662 {
663 	struct fuse_entry_out outarg;
664 	struct inode *inode;
665 	struct dentry *d;
666 	int err;
667 	struct fuse_forget_link *forget;
668 
669 	if (fuse_is_bad(dir))
670 		return -EIO;
671 
672 	forget = fuse_alloc_forget();
673 	if (!forget)
674 		return -ENOMEM;
675 
676 	memset(&outarg, 0, sizeof(outarg));
677 	args->nodeid = get_node_id(dir);
678 	args->out_numargs = 1;
679 	args->out_args[0].size = sizeof(outarg);
680 	args->out_args[0].value = &outarg;
681 	err = fuse_simple_request(fm, args);
682 	if (err)
683 		goto out_put_forget_req;
684 
685 	err = -EIO;
686 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
687 		goto out_put_forget_req;
688 
689 	if ((outarg.attr.mode ^ mode) & S_IFMT)
690 		goto out_put_forget_req;
691 
692 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
693 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
694 	if (!inode) {
695 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
696 		return -ENOMEM;
697 	}
698 	kfree(forget);
699 
700 	d_drop(entry);
701 	d = d_splice_alias(inode, entry);
702 	if (IS_ERR(d))
703 		return PTR_ERR(d);
704 
705 	if (d) {
706 		fuse_change_entry_timeout(d, &outarg);
707 		dput(d);
708 	} else {
709 		fuse_change_entry_timeout(entry, &outarg);
710 	}
711 	fuse_dir_changed(dir);
712 	return 0;
713 
714  out_put_forget_req:
715 	kfree(forget);
716 	return err;
717 }
718 
719 static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir,
720 		      struct dentry *entry, umode_t mode, dev_t rdev)
721 {
722 	struct fuse_mknod_in inarg;
723 	struct fuse_mount *fm = get_fuse_mount(dir);
724 	FUSE_ARGS(args);
725 
726 	if (!fm->fc->dont_mask)
727 		mode &= ~current_umask();
728 
729 	memset(&inarg, 0, sizeof(inarg));
730 	inarg.mode = mode;
731 	inarg.rdev = new_encode_dev(rdev);
732 	inarg.umask = current_umask();
733 	args.opcode = FUSE_MKNOD;
734 	args.in_numargs = 2;
735 	args.in_args[0].size = sizeof(inarg);
736 	args.in_args[0].value = &inarg;
737 	args.in_args[1].size = entry->d_name.len + 1;
738 	args.in_args[1].value = entry->d_name.name;
739 	return create_new_entry(fm, &args, dir, entry, mode);
740 }
741 
742 static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir,
743 		       struct dentry *entry, umode_t mode, bool excl)
744 {
745 	return fuse_mknod(&init_user_ns, dir, entry, mode, 0);
746 }
747 
748 static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
749 		      struct dentry *entry, umode_t mode)
750 {
751 	struct fuse_mkdir_in inarg;
752 	struct fuse_mount *fm = get_fuse_mount(dir);
753 	FUSE_ARGS(args);
754 
755 	if (!fm->fc->dont_mask)
756 		mode &= ~current_umask();
757 
758 	memset(&inarg, 0, sizeof(inarg));
759 	inarg.mode = mode;
760 	inarg.umask = current_umask();
761 	args.opcode = FUSE_MKDIR;
762 	args.in_numargs = 2;
763 	args.in_args[0].size = sizeof(inarg);
764 	args.in_args[0].value = &inarg;
765 	args.in_args[1].size = entry->d_name.len + 1;
766 	args.in_args[1].value = entry->d_name.name;
767 	return create_new_entry(fm, &args, dir, entry, S_IFDIR);
768 }
769 
770 static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir,
771 			struct dentry *entry, const char *link)
772 {
773 	struct fuse_mount *fm = get_fuse_mount(dir);
774 	unsigned len = strlen(link) + 1;
775 	FUSE_ARGS(args);
776 
777 	args.opcode = FUSE_SYMLINK;
778 	args.in_numargs = 2;
779 	args.in_args[0].size = entry->d_name.len + 1;
780 	args.in_args[0].value = entry->d_name.name;
781 	args.in_args[1].size = len;
782 	args.in_args[1].value = link;
783 	return create_new_entry(fm, &args, dir, entry, S_IFLNK);
784 }
785 
786 void fuse_update_ctime(struct inode *inode)
787 {
788 	if (!IS_NOCMTIME(inode)) {
789 		inode->i_ctime = current_time(inode);
790 		mark_inode_dirty_sync(inode);
791 	}
792 }
793 
794 static int fuse_unlink(struct inode *dir, struct dentry *entry)
795 {
796 	int err;
797 	struct fuse_mount *fm = get_fuse_mount(dir);
798 	FUSE_ARGS(args);
799 
800 	if (fuse_is_bad(dir))
801 		return -EIO;
802 
803 	args.opcode = FUSE_UNLINK;
804 	args.nodeid = get_node_id(dir);
805 	args.in_numargs = 1;
806 	args.in_args[0].size = entry->d_name.len + 1;
807 	args.in_args[0].value = entry->d_name.name;
808 	err = fuse_simple_request(fm, &args);
809 	if (!err) {
810 		struct inode *inode = d_inode(entry);
811 		struct fuse_inode *fi = get_fuse_inode(inode);
812 
813 		spin_lock(&fi->lock);
814 		fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
815 		/*
816 		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
817 		 * happen if userspace filesystem is careless.  It would be
818 		 * difficult to enforce correct nlink usage so just ignore this
819 		 * condition here
820 		 */
821 		if (inode->i_nlink > 0)
822 			drop_nlink(inode);
823 		spin_unlock(&fi->lock);
824 		fuse_invalidate_attr(inode);
825 		fuse_dir_changed(dir);
826 		fuse_invalidate_entry_cache(entry);
827 		fuse_update_ctime(inode);
828 	} else if (err == -EINTR)
829 		fuse_invalidate_entry(entry);
830 	return err;
831 }
832 
833 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
834 {
835 	int err;
836 	struct fuse_mount *fm = get_fuse_mount(dir);
837 	FUSE_ARGS(args);
838 
839 	if (fuse_is_bad(dir))
840 		return -EIO;
841 
842 	args.opcode = FUSE_RMDIR;
843 	args.nodeid = get_node_id(dir);
844 	args.in_numargs = 1;
845 	args.in_args[0].size = entry->d_name.len + 1;
846 	args.in_args[0].value = entry->d_name.name;
847 	err = fuse_simple_request(fm, &args);
848 	if (!err) {
849 		clear_nlink(d_inode(entry));
850 		fuse_dir_changed(dir);
851 		fuse_invalidate_entry_cache(entry);
852 	} else if (err == -EINTR)
853 		fuse_invalidate_entry(entry);
854 	return err;
855 }
856 
857 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
858 			      struct inode *newdir, struct dentry *newent,
859 			      unsigned int flags, int opcode, size_t argsize)
860 {
861 	int err;
862 	struct fuse_rename2_in inarg;
863 	struct fuse_mount *fm = get_fuse_mount(olddir);
864 	FUSE_ARGS(args);
865 
866 	memset(&inarg, 0, argsize);
867 	inarg.newdir = get_node_id(newdir);
868 	inarg.flags = flags;
869 	args.opcode = opcode;
870 	args.nodeid = get_node_id(olddir);
871 	args.in_numargs = 3;
872 	args.in_args[0].size = argsize;
873 	args.in_args[0].value = &inarg;
874 	args.in_args[1].size = oldent->d_name.len + 1;
875 	args.in_args[1].value = oldent->d_name.name;
876 	args.in_args[2].size = newent->d_name.len + 1;
877 	args.in_args[2].value = newent->d_name.name;
878 	err = fuse_simple_request(fm, &args);
879 	if (!err) {
880 		/* ctime changes */
881 		fuse_invalidate_attr(d_inode(oldent));
882 		fuse_update_ctime(d_inode(oldent));
883 
884 		if (flags & RENAME_EXCHANGE) {
885 			fuse_invalidate_attr(d_inode(newent));
886 			fuse_update_ctime(d_inode(newent));
887 		}
888 
889 		fuse_dir_changed(olddir);
890 		if (olddir != newdir)
891 			fuse_dir_changed(newdir);
892 
893 		/* newent will end up negative */
894 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
895 			fuse_invalidate_attr(d_inode(newent));
896 			fuse_invalidate_entry_cache(newent);
897 			fuse_update_ctime(d_inode(newent));
898 		}
899 	} else if (err == -EINTR) {
900 		/* If request was interrupted, DEITY only knows if the
901 		   rename actually took place.  If the invalidation
902 		   fails (e.g. some process has CWD under the renamed
903 		   directory), then there can be inconsistency between
904 		   the dcache and the real filesystem.  Tough luck. */
905 		fuse_invalidate_entry(oldent);
906 		if (d_really_is_positive(newent))
907 			fuse_invalidate_entry(newent);
908 	}
909 
910 	return err;
911 }
912 
913 static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir,
914 			struct dentry *oldent, struct inode *newdir,
915 			struct dentry *newent, unsigned int flags)
916 {
917 	struct fuse_conn *fc = get_fuse_conn(olddir);
918 	int err;
919 
920 	if (fuse_is_bad(olddir))
921 		return -EIO;
922 
923 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
924 		return -EINVAL;
925 
926 	if (flags) {
927 		if (fc->no_rename2 || fc->minor < 23)
928 			return -EINVAL;
929 
930 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
931 					 FUSE_RENAME2,
932 					 sizeof(struct fuse_rename2_in));
933 		if (err == -ENOSYS) {
934 			fc->no_rename2 = 1;
935 			err = -EINVAL;
936 		}
937 	} else {
938 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
939 					 FUSE_RENAME,
940 					 sizeof(struct fuse_rename_in));
941 	}
942 
943 	return err;
944 }
945 
946 static int fuse_link(struct dentry *entry, struct inode *newdir,
947 		     struct dentry *newent)
948 {
949 	int err;
950 	struct fuse_link_in inarg;
951 	struct inode *inode = d_inode(entry);
952 	struct fuse_mount *fm = get_fuse_mount(inode);
953 	FUSE_ARGS(args);
954 
955 	memset(&inarg, 0, sizeof(inarg));
956 	inarg.oldnodeid = get_node_id(inode);
957 	args.opcode = FUSE_LINK;
958 	args.in_numargs = 2;
959 	args.in_args[0].size = sizeof(inarg);
960 	args.in_args[0].value = &inarg;
961 	args.in_args[1].size = newent->d_name.len + 1;
962 	args.in_args[1].value = newent->d_name.name;
963 	err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
964 	/* Contrary to "normal" filesystems it can happen that link
965 	   makes two "logical" inodes point to the same "physical"
966 	   inode.  We invalidate the attributes of the old one, so it
967 	   will reflect changes in the backing inode (link count,
968 	   etc.)
969 	*/
970 	if (!err) {
971 		struct fuse_inode *fi = get_fuse_inode(inode);
972 
973 		spin_lock(&fi->lock);
974 		fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
975 		if (likely(inode->i_nlink < UINT_MAX))
976 			inc_nlink(inode);
977 		spin_unlock(&fi->lock);
978 		fuse_invalidate_attr(inode);
979 		fuse_update_ctime(inode);
980 	} else if (err == -EINTR) {
981 		fuse_invalidate_attr(inode);
982 	}
983 	return err;
984 }
985 
986 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
987 			  struct kstat *stat)
988 {
989 	unsigned int blkbits;
990 	struct fuse_conn *fc = get_fuse_conn(inode);
991 
992 	/* see the comment in fuse_change_attributes() */
993 	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
994 		attr->size = i_size_read(inode);
995 		attr->mtime = inode->i_mtime.tv_sec;
996 		attr->mtimensec = inode->i_mtime.tv_nsec;
997 		attr->ctime = inode->i_ctime.tv_sec;
998 		attr->ctimensec = inode->i_ctime.tv_nsec;
999 	}
1000 
1001 	stat->dev = inode->i_sb->s_dev;
1002 	stat->ino = attr->ino;
1003 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1004 	stat->nlink = attr->nlink;
1005 	stat->uid = make_kuid(fc->user_ns, attr->uid);
1006 	stat->gid = make_kgid(fc->user_ns, attr->gid);
1007 	stat->rdev = inode->i_rdev;
1008 	stat->atime.tv_sec = attr->atime;
1009 	stat->atime.tv_nsec = attr->atimensec;
1010 	stat->mtime.tv_sec = attr->mtime;
1011 	stat->mtime.tv_nsec = attr->mtimensec;
1012 	stat->ctime.tv_sec = attr->ctime;
1013 	stat->ctime.tv_nsec = attr->ctimensec;
1014 	stat->size = attr->size;
1015 	stat->blocks = attr->blocks;
1016 
1017 	if (attr->blksize != 0)
1018 		blkbits = ilog2(attr->blksize);
1019 	else
1020 		blkbits = inode->i_sb->s_blocksize_bits;
1021 
1022 	stat->blksize = 1 << blkbits;
1023 }
1024 
1025 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
1026 			   struct file *file)
1027 {
1028 	int err;
1029 	struct fuse_getattr_in inarg;
1030 	struct fuse_attr_out outarg;
1031 	struct fuse_mount *fm = get_fuse_mount(inode);
1032 	FUSE_ARGS(args);
1033 	u64 attr_version;
1034 
1035 	attr_version = fuse_get_attr_version(fm->fc);
1036 
1037 	memset(&inarg, 0, sizeof(inarg));
1038 	memset(&outarg, 0, sizeof(outarg));
1039 	/* Directories have separate file-handle space */
1040 	if (file && S_ISREG(inode->i_mode)) {
1041 		struct fuse_file *ff = file->private_data;
1042 
1043 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1044 		inarg.fh = ff->fh;
1045 	}
1046 	args.opcode = FUSE_GETATTR;
1047 	args.nodeid = get_node_id(inode);
1048 	args.in_numargs = 1;
1049 	args.in_args[0].size = sizeof(inarg);
1050 	args.in_args[0].value = &inarg;
1051 	args.out_numargs = 1;
1052 	args.out_args[0].size = sizeof(outarg);
1053 	args.out_args[0].value = &outarg;
1054 	err = fuse_simple_request(fm, &args);
1055 	if (!err) {
1056 		if (fuse_invalid_attr(&outarg.attr) ||
1057 		    (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1058 			fuse_make_bad(inode);
1059 			err = -EIO;
1060 		} else {
1061 			fuse_change_attributes(inode, &outarg.attr,
1062 					       attr_timeout(&outarg),
1063 					       attr_version);
1064 			if (stat)
1065 				fuse_fillattr(inode, &outarg.attr, stat);
1066 		}
1067 	}
1068 	return err;
1069 }
1070 
1071 static int fuse_update_get_attr(struct inode *inode, struct file *file,
1072 				struct kstat *stat, u32 request_mask,
1073 				unsigned int flags)
1074 {
1075 	struct fuse_inode *fi = get_fuse_inode(inode);
1076 	int err = 0;
1077 	bool sync;
1078 
1079 	if (flags & AT_STATX_FORCE_SYNC)
1080 		sync = true;
1081 	else if (flags & AT_STATX_DONT_SYNC)
1082 		sync = false;
1083 	else if (request_mask & READ_ONCE(fi->inval_mask))
1084 		sync = true;
1085 	else
1086 		sync = time_before64(fi->i_time, get_jiffies_64());
1087 
1088 	if (sync) {
1089 		forget_all_cached_acls(inode);
1090 		err = fuse_do_getattr(inode, stat, file);
1091 	} else if (stat) {
1092 		generic_fillattr(&init_user_ns, inode, stat);
1093 		stat->mode = fi->orig_i_mode;
1094 		stat->ino = fi->orig_ino;
1095 	}
1096 
1097 	return err;
1098 }
1099 
1100 int fuse_update_attributes(struct inode *inode, struct file *file)
1101 {
1102 	/* Do *not* need to get atime for internal purposes */
1103 	return fuse_update_get_attr(inode, file, NULL,
1104 				    STATX_BASIC_STATS & ~STATX_ATIME, 0);
1105 }
1106 
1107 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1108 			     u64 child_nodeid, struct qstr *name)
1109 {
1110 	int err = -ENOTDIR;
1111 	struct inode *parent;
1112 	struct dentry *dir;
1113 	struct dentry *entry;
1114 
1115 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1116 	if (!parent)
1117 		return -ENOENT;
1118 
1119 	inode_lock(parent);
1120 	if (!S_ISDIR(parent->i_mode))
1121 		goto unlock;
1122 
1123 	err = -ENOENT;
1124 	dir = d_find_alias(parent);
1125 	if (!dir)
1126 		goto unlock;
1127 
1128 	name->hash = full_name_hash(dir, name->name, name->len);
1129 	entry = d_lookup(dir, name);
1130 	dput(dir);
1131 	if (!entry)
1132 		goto unlock;
1133 
1134 	fuse_dir_changed(parent);
1135 	fuse_invalidate_entry(entry);
1136 
1137 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1138 		inode_lock(d_inode(entry));
1139 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1140 			err = -ENOENT;
1141 			goto badentry;
1142 		}
1143 		if (d_mountpoint(entry)) {
1144 			err = -EBUSY;
1145 			goto badentry;
1146 		}
1147 		if (d_is_dir(entry)) {
1148 			shrink_dcache_parent(entry);
1149 			if (!simple_empty(entry)) {
1150 				err = -ENOTEMPTY;
1151 				goto badentry;
1152 			}
1153 			d_inode(entry)->i_flags |= S_DEAD;
1154 		}
1155 		dont_mount(entry);
1156 		clear_nlink(d_inode(entry));
1157 		err = 0;
1158  badentry:
1159 		inode_unlock(d_inode(entry));
1160 		if (!err)
1161 			d_delete(entry);
1162 	} else {
1163 		err = 0;
1164 	}
1165 	dput(entry);
1166 
1167  unlock:
1168 	inode_unlock(parent);
1169 	iput(parent);
1170 	return err;
1171 }
1172 
1173 /*
1174  * Calling into a user-controlled filesystem gives the filesystem
1175  * daemon ptrace-like capabilities over the current process.  This
1176  * means, that the filesystem daemon is able to record the exact
1177  * filesystem operations performed, and can also control the behavior
1178  * of the requester process in otherwise impossible ways.  For example
1179  * it can delay the operation for arbitrary length of time allowing
1180  * DoS against the requester.
1181  *
1182  * For this reason only those processes can call into the filesystem,
1183  * for which the owner of the mount has ptrace privilege.  This
1184  * excludes processes started by other users, suid or sgid processes.
1185  */
1186 int fuse_allow_current_process(struct fuse_conn *fc)
1187 {
1188 	const struct cred *cred;
1189 
1190 	if (fc->allow_other)
1191 		return current_in_userns(fc->user_ns);
1192 
1193 	cred = current_cred();
1194 	if (uid_eq(cred->euid, fc->user_id) &&
1195 	    uid_eq(cred->suid, fc->user_id) &&
1196 	    uid_eq(cred->uid,  fc->user_id) &&
1197 	    gid_eq(cred->egid, fc->group_id) &&
1198 	    gid_eq(cred->sgid, fc->group_id) &&
1199 	    gid_eq(cred->gid,  fc->group_id))
1200 		return 1;
1201 
1202 	return 0;
1203 }
1204 
1205 static int fuse_access(struct inode *inode, int mask)
1206 {
1207 	struct fuse_mount *fm = get_fuse_mount(inode);
1208 	FUSE_ARGS(args);
1209 	struct fuse_access_in inarg;
1210 	int err;
1211 
1212 	BUG_ON(mask & MAY_NOT_BLOCK);
1213 
1214 	if (fm->fc->no_access)
1215 		return 0;
1216 
1217 	memset(&inarg, 0, sizeof(inarg));
1218 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1219 	args.opcode = FUSE_ACCESS;
1220 	args.nodeid = get_node_id(inode);
1221 	args.in_numargs = 1;
1222 	args.in_args[0].size = sizeof(inarg);
1223 	args.in_args[0].value = &inarg;
1224 	err = fuse_simple_request(fm, &args);
1225 	if (err == -ENOSYS) {
1226 		fm->fc->no_access = 1;
1227 		err = 0;
1228 	}
1229 	return err;
1230 }
1231 
1232 static int fuse_perm_getattr(struct inode *inode, int mask)
1233 {
1234 	if (mask & MAY_NOT_BLOCK)
1235 		return -ECHILD;
1236 
1237 	forget_all_cached_acls(inode);
1238 	return fuse_do_getattr(inode, NULL, NULL);
1239 }
1240 
1241 /*
1242  * Check permission.  The two basic access models of FUSE are:
1243  *
1244  * 1) Local access checking ('default_permissions' mount option) based
1245  * on file mode.  This is the plain old disk filesystem permission
1246  * modell.
1247  *
1248  * 2) "Remote" access checking, where server is responsible for
1249  * checking permission in each inode operation.  An exception to this
1250  * is if ->permission() was invoked from sys_access() in which case an
1251  * access request is sent.  Execute permission is still checked
1252  * locally based on file mode.
1253  */
1254 static int fuse_permission(struct user_namespace *mnt_userns,
1255 			   struct inode *inode, int mask)
1256 {
1257 	struct fuse_conn *fc = get_fuse_conn(inode);
1258 	bool refreshed = false;
1259 	int err = 0;
1260 
1261 	if (fuse_is_bad(inode))
1262 		return -EIO;
1263 
1264 	if (!fuse_allow_current_process(fc))
1265 		return -EACCES;
1266 
1267 	/*
1268 	 * If attributes are needed, refresh them before proceeding
1269 	 */
1270 	if (fc->default_permissions ||
1271 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1272 		struct fuse_inode *fi = get_fuse_inode(inode);
1273 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1274 
1275 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1276 		    time_before64(fi->i_time, get_jiffies_64())) {
1277 			refreshed = true;
1278 
1279 			err = fuse_perm_getattr(inode, mask);
1280 			if (err)
1281 				return err;
1282 		}
1283 	}
1284 
1285 	if (fc->default_permissions) {
1286 		err = generic_permission(&init_user_ns, inode, mask);
1287 
1288 		/* If permission is denied, try to refresh file
1289 		   attributes.  This is also needed, because the root
1290 		   node will at first have no permissions */
1291 		if (err == -EACCES && !refreshed) {
1292 			err = fuse_perm_getattr(inode, mask);
1293 			if (!err)
1294 				err = generic_permission(&init_user_ns,
1295 							 inode, mask);
1296 		}
1297 
1298 		/* Note: the opposite of the above test does not
1299 		   exist.  So if permissions are revoked this won't be
1300 		   noticed immediately, only after the attribute
1301 		   timeout has expired */
1302 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1303 		err = fuse_access(inode, mask);
1304 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1305 		if (!(inode->i_mode & S_IXUGO)) {
1306 			if (refreshed)
1307 				return -EACCES;
1308 
1309 			err = fuse_perm_getattr(inode, mask);
1310 			if (!err && !(inode->i_mode & S_IXUGO))
1311 				return -EACCES;
1312 		}
1313 	}
1314 	return err;
1315 }
1316 
1317 static int fuse_readlink_page(struct inode *inode, struct page *page)
1318 {
1319 	struct fuse_mount *fm = get_fuse_mount(inode);
1320 	struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1321 	struct fuse_args_pages ap = {
1322 		.num_pages = 1,
1323 		.pages = &page,
1324 		.descs = &desc,
1325 	};
1326 	char *link;
1327 	ssize_t res;
1328 
1329 	ap.args.opcode = FUSE_READLINK;
1330 	ap.args.nodeid = get_node_id(inode);
1331 	ap.args.out_pages = true;
1332 	ap.args.out_argvar = true;
1333 	ap.args.page_zeroing = true;
1334 	ap.args.out_numargs = 1;
1335 	ap.args.out_args[0].size = desc.length;
1336 	res = fuse_simple_request(fm, &ap.args);
1337 
1338 	fuse_invalidate_atime(inode);
1339 
1340 	if (res < 0)
1341 		return res;
1342 
1343 	if (WARN_ON(res >= PAGE_SIZE))
1344 		return -EIO;
1345 
1346 	link = page_address(page);
1347 	link[res] = '\0';
1348 
1349 	return 0;
1350 }
1351 
1352 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1353 				 struct delayed_call *callback)
1354 {
1355 	struct fuse_conn *fc = get_fuse_conn(inode);
1356 	struct page *page;
1357 	int err;
1358 
1359 	err = -EIO;
1360 	if (fuse_is_bad(inode))
1361 		goto out_err;
1362 
1363 	if (fc->cache_symlinks)
1364 		return page_get_link(dentry, inode, callback);
1365 
1366 	err = -ECHILD;
1367 	if (!dentry)
1368 		goto out_err;
1369 
1370 	page = alloc_page(GFP_KERNEL);
1371 	err = -ENOMEM;
1372 	if (!page)
1373 		goto out_err;
1374 
1375 	err = fuse_readlink_page(inode, page);
1376 	if (err) {
1377 		__free_page(page);
1378 		goto out_err;
1379 	}
1380 
1381 	set_delayed_call(callback, page_put_link, page);
1382 
1383 	return page_address(page);
1384 
1385 out_err:
1386 	return ERR_PTR(err);
1387 }
1388 
1389 static int fuse_dir_open(struct inode *inode, struct file *file)
1390 {
1391 	return fuse_open_common(inode, file, true);
1392 }
1393 
1394 static int fuse_dir_release(struct inode *inode, struct file *file)
1395 {
1396 	fuse_release_common(file, true);
1397 
1398 	return 0;
1399 }
1400 
1401 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1402 			  int datasync)
1403 {
1404 	struct inode *inode = file->f_mapping->host;
1405 	struct fuse_conn *fc = get_fuse_conn(inode);
1406 	int err;
1407 
1408 	if (fuse_is_bad(inode))
1409 		return -EIO;
1410 
1411 	if (fc->no_fsyncdir)
1412 		return 0;
1413 
1414 	inode_lock(inode);
1415 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1416 	if (err == -ENOSYS) {
1417 		fc->no_fsyncdir = 1;
1418 		err = 0;
1419 	}
1420 	inode_unlock(inode);
1421 
1422 	return err;
1423 }
1424 
1425 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1426 			    unsigned long arg)
1427 {
1428 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1429 
1430 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1431 	if (fc->minor < 18)
1432 		return -ENOTTY;
1433 
1434 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1435 }
1436 
1437 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1438 				   unsigned long arg)
1439 {
1440 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1441 
1442 	if (fc->minor < 18)
1443 		return -ENOTTY;
1444 
1445 	return fuse_ioctl_common(file, cmd, arg,
1446 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1447 }
1448 
1449 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1450 {
1451 	/* Always update if mtime is explicitly set  */
1452 	if (ivalid & ATTR_MTIME_SET)
1453 		return true;
1454 
1455 	/* Or if kernel i_mtime is the official one */
1456 	if (trust_local_mtime)
1457 		return true;
1458 
1459 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1460 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1461 		return false;
1462 
1463 	/* In all other cases update */
1464 	return true;
1465 }
1466 
1467 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1468 			   struct fuse_setattr_in *arg, bool trust_local_cmtime)
1469 {
1470 	unsigned ivalid = iattr->ia_valid;
1471 
1472 	if (ivalid & ATTR_MODE)
1473 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1474 	if (ivalid & ATTR_UID)
1475 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1476 	if (ivalid & ATTR_GID)
1477 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1478 	if (ivalid & ATTR_SIZE)
1479 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1480 	if (ivalid & ATTR_ATIME) {
1481 		arg->valid |= FATTR_ATIME;
1482 		arg->atime = iattr->ia_atime.tv_sec;
1483 		arg->atimensec = iattr->ia_atime.tv_nsec;
1484 		if (!(ivalid & ATTR_ATIME_SET))
1485 			arg->valid |= FATTR_ATIME_NOW;
1486 	}
1487 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1488 		arg->valid |= FATTR_MTIME;
1489 		arg->mtime = iattr->ia_mtime.tv_sec;
1490 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1491 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1492 			arg->valid |= FATTR_MTIME_NOW;
1493 	}
1494 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1495 		arg->valid |= FATTR_CTIME;
1496 		arg->ctime = iattr->ia_ctime.tv_sec;
1497 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1498 	}
1499 }
1500 
1501 /*
1502  * Prevent concurrent writepages on inode
1503  *
1504  * This is done by adding a negative bias to the inode write counter
1505  * and waiting for all pending writes to finish.
1506  */
1507 void fuse_set_nowrite(struct inode *inode)
1508 {
1509 	struct fuse_inode *fi = get_fuse_inode(inode);
1510 
1511 	BUG_ON(!inode_is_locked(inode));
1512 
1513 	spin_lock(&fi->lock);
1514 	BUG_ON(fi->writectr < 0);
1515 	fi->writectr += FUSE_NOWRITE;
1516 	spin_unlock(&fi->lock);
1517 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1518 }
1519 
1520 /*
1521  * Allow writepages on inode
1522  *
1523  * Remove the bias from the writecounter and send any queued
1524  * writepages.
1525  */
1526 static void __fuse_release_nowrite(struct inode *inode)
1527 {
1528 	struct fuse_inode *fi = get_fuse_inode(inode);
1529 
1530 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1531 	fi->writectr = 0;
1532 	fuse_flush_writepages(inode);
1533 }
1534 
1535 void fuse_release_nowrite(struct inode *inode)
1536 {
1537 	struct fuse_inode *fi = get_fuse_inode(inode);
1538 
1539 	spin_lock(&fi->lock);
1540 	__fuse_release_nowrite(inode);
1541 	spin_unlock(&fi->lock);
1542 }
1543 
1544 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1545 			      struct inode *inode,
1546 			      struct fuse_setattr_in *inarg_p,
1547 			      struct fuse_attr_out *outarg_p)
1548 {
1549 	args->opcode = FUSE_SETATTR;
1550 	args->nodeid = get_node_id(inode);
1551 	args->in_numargs = 1;
1552 	args->in_args[0].size = sizeof(*inarg_p);
1553 	args->in_args[0].value = inarg_p;
1554 	args->out_numargs = 1;
1555 	args->out_args[0].size = sizeof(*outarg_p);
1556 	args->out_args[0].value = outarg_p;
1557 }
1558 
1559 /*
1560  * Flush inode->i_mtime to the server
1561  */
1562 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1563 {
1564 	struct fuse_mount *fm = get_fuse_mount(inode);
1565 	FUSE_ARGS(args);
1566 	struct fuse_setattr_in inarg;
1567 	struct fuse_attr_out outarg;
1568 
1569 	memset(&inarg, 0, sizeof(inarg));
1570 	memset(&outarg, 0, sizeof(outarg));
1571 
1572 	inarg.valid = FATTR_MTIME;
1573 	inarg.mtime = inode->i_mtime.tv_sec;
1574 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1575 	if (fm->fc->minor >= 23) {
1576 		inarg.valid |= FATTR_CTIME;
1577 		inarg.ctime = inode->i_ctime.tv_sec;
1578 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1579 	}
1580 	if (ff) {
1581 		inarg.valid |= FATTR_FH;
1582 		inarg.fh = ff->fh;
1583 	}
1584 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1585 
1586 	return fuse_simple_request(fm, &args);
1587 }
1588 
1589 /*
1590  * Set attributes, and at the same time refresh them.
1591  *
1592  * Truncation is slightly complicated, because the 'truncate' request
1593  * may fail, in which case we don't want to touch the mapping.
1594  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1595  * and the actual truncation by hand.
1596  */
1597 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1598 		    struct file *file)
1599 {
1600 	struct inode *inode = d_inode(dentry);
1601 	struct fuse_mount *fm = get_fuse_mount(inode);
1602 	struct fuse_conn *fc = fm->fc;
1603 	struct fuse_inode *fi = get_fuse_inode(inode);
1604 	FUSE_ARGS(args);
1605 	struct fuse_setattr_in inarg;
1606 	struct fuse_attr_out outarg;
1607 	bool is_truncate = false;
1608 	bool is_wb = fc->writeback_cache;
1609 	loff_t oldsize;
1610 	int err;
1611 	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1612 	bool fault_blocked = false;
1613 
1614 	if (!fc->default_permissions)
1615 		attr->ia_valid |= ATTR_FORCE;
1616 
1617 	err = setattr_prepare(&init_user_ns, dentry, attr);
1618 	if (err)
1619 		return err;
1620 
1621 	if (attr->ia_valid & ATTR_SIZE) {
1622 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1623 			return -EIO;
1624 		is_truncate = true;
1625 	}
1626 
1627 	if (FUSE_IS_DAX(inode) && is_truncate) {
1628 		down_write(&fi->i_mmap_sem);
1629 		fault_blocked = true;
1630 		err = fuse_dax_break_layouts(inode, 0, 0);
1631 		if (err) {
1632 			up_write(&fi->i_mmap_sem);
1633 			return err;
1634 		}
1635 	}
1636 
1637 	if (attr->ia_valid & ATTR_OPEN) {
1638 		/* This is coming from open(..., ... | O_TRUNC); */
1639 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1640 		WARN_ON(attr->ia_size != 0);
1641 		if (fc->atomic_o_trunc) {
1642 			/*
1643 			 * No need to send request to userspace, since actual
1644 			 * truncation has already been done by OPEN.  But still
1645 			 * need to truncate page cache.
1646 			 */
1647 			i_size_write(inode, 0);
1648 			truncate_pagecache(inode, 0);
1649 			goto out;
1650 		}
1651 		file = NULL;
1652 	}
1653 
1654 	/* Flush dirty data/metadata before non-truncate SETATTR */
1655 	if (is_wb && S_ISREG(inode->i_mode) &&
1656 	    attr->ia_valid &
1657 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1658 			 ATTR_TIMES_SET)) {
1659 		err = write_inode_now(inode, true);
1660 		if (err)
1661 			return err;
1662 
1663 		fuse_set_nowrite(inode);
1664 		fuse_release_nowrite(inode);
1665 	}
1666 
1667 	if (is_truncate) {
1668 		fuse_set_nowrite(inode);
1669 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1670 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1671 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1672 	}
1673 
1674 	memset(&inarg, 0, sizeof(inarg));
1675 	memset(&outarg, 0, sizeof(outarg));
1676 	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1677 	if (file) {
1678 		struct fuse_file *ff = file->private_data;
1679 		inarg.valid |= FATTR_FH;
1680 		inarg.fh = ff->fh;
1681 	}
1682 
1683 	/* Kill suid/sgid for non-directory chown unconditionally */
1684 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
1685 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
1686 		inarg.valid |= FATTR_KILL_SUIDGID;
1687 
1688 	if (attr->ia_valid & ATTR_SIZE) {
1689 		/* For mandatory locking in truncate */
1690 		inarg.valid |= FATTR_LOCKOWNER;
1691 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1692 
1693 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
1694 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
1695 			inarg.valid |= FATTR_KILL_SUIDGID;
1696 	}
1697 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1698 	err = fuse_simple_request(fm, &args);
1699 	if (err) {
1700 		if (err == -EINTR)
1701 			fuse_invalidate_attr(inode);
1702 		goto error;
1703 	}
1704 
1705 	if (fuse_invalid_attr(&outarg.attr) ||
1706 	    (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1707 		fuse_make_bad(inode);
1708 		err = -EIO;
1709 		goto error;
1710 	}
1711 
1712 	spin_lock(&fi->lock);
1713 	/* the kernel maintains i_mtime locally */
1714 	if (trust_local_cmtime) {
1715 		if (attr->ia_valid & ATTR_MTIME)
1716 			inode->i_mtime = attr->ia_mtime;
1717 		if (attr->ia_valid & ATTR_CTIME)
1718 			inode->i_ctime = attr->ia_ctime;
1719 		/* FIXME: clear I_DIRTY_SYNC? */
1720 	}
1721 
1722 	fuse_change_attributes_common(inode, &outarg.attr,
1723 				      attr_timeout(&outarg));
1724 	oldsize = inode->i_size;
1725 	/* see the comment in fuse_change_attributes() */
1726 	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1727 		i_size_write(inode, outarg.attr.size);
1728 
1729 	if (is_truncate) {
1730 		/* NOTE: this may release/reacquire fi->lock */
1731 		__fuse_release_nowrite(inode);
1732 	}
1733 	spin_unlock(&fi->lock);
1734 
1735 	/*
1736 	 * Only call invalidate_inode_pages2() after removing
1737 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1738 	 */
1739 	if ((is_truncate || !is_wb) &&
1740 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1741 		truncate_pagecache(inode, outarg.attr.size);
1742 		invalidate_inode_pages2(inode->i_mapping);
1743 	}
1744 
1745 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1746 out:
1747 	if (fault_blocked)
1748 		up_write(&fi->i_mmap_sem);
1749 
1750 	return 0;
1751 
1752 error:
1753 	if (is_truncate)
1754 		fuse_release_nowrite(inode);
1755 
1756 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1757 
1758 	if (fault_blocked)
1759 		up_write(&fi->i_mmap_sem);
1760 	return err;
1761 }
1762 
1763 static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry,
1764 			struct iattr *attr)
1765 {
1766 	struct inode *inode = d_inode(entry);
1767 	struct fuse_conn *fc = get_fuse_conn(inode);
1768 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1769 	int ret;
1770 
1771 	if (fuse_is_bad(inode))
1772 		return -EIO;
1773 
1774 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1775 		return -EACCES;
1776 
1777 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1778 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1779 				    ATTR_MODE);
1780 
1781 		/*
1782 		 * The only sane way to reliably kill suid/sgid is to do it in
1783 		 * the userspace filesystem
1784 		 *
1785 		 * This should be done on write(), truncate() and chown().
1786 		 */
1787 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
1788 			/*
1789 			 * ia_mode calculation may have used stale i_mode.
1790 			 * Refresh and recalculate.
1791 			 */
1792 			ret = fuse_do_getattr(inode, NULL, file);
1793 			if (ret)
1794 				return ret;
1795 
1796 			attr->ia_mode = inode->i_mode;
1797 			if (inode->i_mode & S_ISUID) {
1798 				attr->ia_valid |= ATTR_MODE;
1799 				attr->ia_mode &= ~S_ISUID;
1800 			}
1801 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1802 				attr->ia_valid |= ATTR_MODE;
1803 				attr->ia_mode &= ~S_ISGID;
1804 			}
1805 		}
1806 	}
1807 	if (!attr->ia_valid)
1808 		return 0;
1809 
1810 	ret = fuse_do_setattr(entry, attr, file);
1811 	if (!ret) {
1812 		/*
1813 		 * If filesystem supports acls it may have updated acl xattrs in
1814 		 * the filesystem, so forget cached acls for the inode.
1815 		 */
1816 		if (fc->posix_acl)
1817 			forget_all_cached_acls(inode);
1818 
1819 		/* Directory mode changed, may need to revalidate access */
1820 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1821 			fuse_invalidate_entry_cache(entry);
1822 	}
1823 	return ret;
1824 }
1825 
1826 static int fuse_getattr(struct user_namespace *mnt_userns,
1827 			const struct path *path, struct kstat *stat,
1828 			u32 request_mask, unsigned int flags)
1829 {
1830 	struct inode *inode = d_inode(path->dentry);
1831 	struct fuse_conn *fc = get_fuse_conn(inode);
1832 
1833 	if (fuse_is_bad(inode))
1834 		return -EIO;
1835 
1836 	if (!fuse_allow_current_process(fc)) {
1837 		if (!request_mask) {
1838 			/*
1839 			 * If user explicitly requested *nothing* then don't
1840 			 * error out, but return st_dev only.
1841 			 */
1842 			stat->result_mask = 0;
1843 			stat->dev = inode->i_sb->s_dev;
1844 			return 0;
1845 		}
1846 		return -EACCES;
1847 	}
1848 
1849 	return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
1850 }
1851 
1852 static const struct inode_operations fuse_dir_inode_operations = {
1853 	.lookup		= fuse_lookup,
1854 	.mkdir		= fuse_mkdir,
1855 	.symlink	= fuse_symlink,
1856 	.unlink		= fuse_unlink,
1857 	.rmdir		= fuse_rmdir,
1858 	.rename		= fuse_rename2,
1859 	.link		= fuse_link,
1860 	.setattr	= fuse_setattr,
1861 	.create		= fuse_create,
1862 	.atomic_open	= fuse_atomic_open,
1863 	.mknod		= fuse_mknod,
1864 	.permission	= fuse_permission,
1865 	.getattr	= fuse_getattr,
1866 	.listxattr	= fuse_listxattr,
1867 	.get_acl	= fuse_get_acl,
1868 	.set_acl	= fuse_set_acl,
1869 };
1870 
1871 static const struct file_operations fuse_dir_operations = {
1872 	.llseek		= generic_file_llseek,
1873 	.read		= generic_read_dir,
1874 	.iterate_shared	= fuse_readdir,
1875 	.open		= fuse_dir_open,
1876 	.release	= fuse_dir_release,
1877 	.fsync		= fuse_dir_fsync,
1878 	.unlocked_ioctl	= fuse_dir_ioctl,
1879 	.compat_ioctl	= fuse_dir_compat_ioctl,
1880 };
1881 
1882 static const struct inode_operations fuse_common_inode_operations = {
1883 	.setattr	= fuse_setattr,
1884 	.permission	= fuse_permission,
1885 	.getattr	= fuse_getattr,
1886 	.listxattr	= fuse_listxattr,
1887 	.get_acl	= fuse_get_acl,
1888 	.set_acl	= fuse_set_acl,
1889 };
1890 
1891 static const struct inode_operations fuse_symlink_inode_operations = {
1892 	.setattr	= fuse_setattr,
1893 	.get_link	= fuse_get_link,
1894 	.getattr	= fuse_getattr,
1895 	.listxattr	= fuse_listxattr,
1896 };
1897 
1898 void fuse_init_common(struct inode *inode)
1899 {
1900 	inode->i_op = &fuse_common_inode_operations;
1901 }
1902 
1903 void fuse_init_dir(struct inode *inode)
1904 {
1905 	struct fuse_inode *fi = get_fuse_inode(inode);
1906 
1907 	inode->i_op = &fuse_dir_inode_operations;
1908 	inode->i_fop = &fuse_dir_operations;
1909 
1910 	spin_lock_init(&fi->rdc.lock);
1911 	fi->rdc.cached = false;
1912 	fi->rdc.size = 0;
1913 	fi->rdc.pos = 0;
1914 	fi->rdc.version = 0;
1915 }
1916 
1917 static int fuse_symlink_readpage(struct file *null, struct page *page)
1918 {
1919 	int err = fuse_readlink_page(page->mapping->host, page);
1920 
1921 	if (!err)
1922 		SetPageUptodate(page);
1923 
1924 	unlock_page(page);
1925 
1926 	return err;
1927 }
1928 
1929 static const struct address_space_operations fuse_symlink_aops = {
1930 	.readpage	= fuse_symlink_readpage,
1931 };
1932 
1933 void fuse_init_symlink(struct inode *inode)
1934 {
1935 	inode->i_op = &fuse_symlink_inode_operations;
1936 	inode->i_data.a_ops = &fuse_symlink_aops;
1937 	inode_nohighmem(inode);
1938 }
1939