xref: /openbmc/linux/fs/fuse/dir.c (revision c5c87812)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/sched.h>
15 #include <linux/namei.h>
16 #include <linux/slab.h>
17 #include <linux/xattr.h>
18 #include <linux/iversion.h>
19 #include <linux/posix_acl.h>
20 
21 static void fuse_advise_use_readdirplus(struct inode *dir)
22 {
23 	struct fuse_inode *fi = get_fuse_inode(dir);
24 
25 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
26 }
27 
28 #if BITS_PER_LONG >= 64
29 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
30 {
31 	entry->d_fsdata = (void *) time;
32 }
33 
34 static inline u64 fuse_dentry_time(const struct dentry *entry)
35 {
36 	return (u64)entry->d_fsdata;
37 }
38 
39 #else
40 union fuse_dentry {
41 	u64 time;
42 	struct rcu_head rcu;
43 };
44 
45 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
46 {
47 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
48 }
49 
50 static inline u64 fuse_dentry_time(const struct dentry *entry)
51 {
52 	return ((union fuse_dentry *) entry->d_fsdata)->time;
53 }
54 #endif
55 
56 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
57 {
58 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
59 	bool delete = !time && fc->delete_stale;
60 	/*
61 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
62 	 * Don't care about races, either way it's just an optimization
63 	 */
64 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
65 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
66 		spin_lock(&dentry->d_lock);
67 		if (!delete)
68 			dentry->d_flags &= ~DCACHE_OP_DELETE;
69 		else
70 			dentry->d_flags |= DCACHE_OP_DELETE;
71 		spin_unlock(&dentry->d_lock);
72 	}
73 
74 	__fuse_dentry_settime(dentry, time);
75 }
76 
77 /*
78  * FUSE caches dentries and attributes with separate timeout.  The
79  * time in jiffies until the dentry/attributes are valid is stored in
80  * dentry->d_fsdata and fuse_inode->i_time respectively.
81  */
82 
83 /*
84  * Calculate the time in jiffies until a dentry/attributes are valid
85  */
86 static u64 time_to_jiffies(u64 sec, u32 nsec)
87 {
88 	if (sec || nsec) {
89 		struct timespec64 ts = {
90 			sec,
91 			min_t(u32, nsec, NSEC_PER_SEC - 1)
92 		};
93 
94 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
95 	} else
96 		return 0;
97 }
98 
99 /*
100  * Set dentry and possibly attribute timeouts from the lookup/mk*
101  * replies
102  */
103 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
104 {
105 	fuse_dentry_settime(entry,
106 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
107 }
108 
109 static u64 attr_timeout(struct fuse_attr_out *o)
110 {
111 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
112 }
113 
114 u64 entry_attr_timeout(struct fuse_entry_out *o)
115 {
116 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
117 }
118 
119 static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
120 {
121 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
122 }
123 
124 /*
125  * Mark the attributes as stale, so that at the next call to
126  * ->getattr() they will be fetched from userspace
127  */
128 void fuse_invalidate_attr(struct inode *inode)
129 {
130 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
131 }
132 
133 static void fuse_dir_changed(struct inode *dir)
134 {
135 	fuse_invalidate_attr(dir);
136 	inode_maybe_inc_iversion(dir, false);
137 }
138 
139 /**
140  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
141  * atime is not used.
142  */
143 void fuse_invalidate_atime(struct inode *inode)
144 {
145 	if (!IS_RDONLY(inode))
146 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
147 }
148 
149 /*
150  * Just mark the entry as stale, so that a next attempt to look it up
151  * will result in a new lookup call to userspace
152  *
153  * This is called when a dentry is about to become negative and the
154  * timeout is unknown (unlink, rmdir, rename and in some cases
155  * lookup)
156  */
157 void fuse_invalidate_entry_cache(struct dentry *entry)
158 {
159 	fuse_dentry_settime(entry, 0);
160 }
161 
162 /*
163  * Same as fuse_invalidate_entry_cache(), but also try to remove the
164  * dentry from the hash
165  */
166 static void fuse_invalidate_entry(struct dentry *entry)
167 {
168 	d_invalidate(entry);
169 	fuse_invalidate_entry_cache(entry);
170 }
171 
172 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
173 			     u64 nodeid, const struct qstr *name,
174 			     struct fuse_entry_out *outarg)
175 {
176 	memset(outarg, 0, sizeof(struct fuse_entry_out));
177 	args->opcode = FUSE_LOOKUP;
178 	args->nodeid = nodeid;
179 	args->in_numargs = 1;
180 	args->in_args[0].size = name->len + 1;
181 	args->in_args[0].value = name->name;
182 	args->out_numargs = 1;
183 	args->out_args[0].size = sizeof(struct fuse_entry_out);
184 	args->out_args[0].value = outarg;
185 }
186 
187 /*
188  * Check whether the dentry is still valid
189  *
190  * If the entry validity timeout has expired and the dentry is
191  * positive, try to redo the lookup.  If the lookup results in a
192  * different inode, then let the VFS invalidate the dentry and redo
193  * the lookup once more.  If the lookup results in the same inode,
194  * then refresh the attributes, timeouts and mark the dentry valid.
195  */
196 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
197 {
198 	struct inode *inode;
199 	struct dentry *parent;
200 	struct fuse_mount *fm;
201 	struct fuse_inode *fi;
202 	int ret;
203 
204 	inode = d_inode_rcu(entry);
205 	if (inode && is_bad_inode(inode))
206 		goto invalid;
207 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
208 		 (flags & LOOKUP_REVAL)) {
209 		struct fuse_entry_out outarg;
210 		FUSE_ARGS(args);
211 		struct fuse_forget_link *forget;
212 		u64 attr_version;
213 
214 		/* For negative dentries, always do a fresh lookup */
215 		if (!inode)
216 			goto invalid;
217 
218 		ret = -ECHILD;
219 		if (flags & LOOKUP_RCU)
220 			goto out;
221 
222 		fm = get_fuse_mount(inode);
223 
224 		forget = fuse_alloc_forget();
225 		ret = -ENOMEM;
226 		if (!forget)
227 			goto out;
228 
229 		attr_version = fuse_get_attr_version(fm->fc);
230 
231 		parent = dget_parent(entry);
232 		fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
233 				 &entry->d_name, &outarg);
234 		ret = fuse_simple_request(fm, &args);
235 		dput(parent);
236 		/* Zero nodeid is same as -ENOENT */
237 		if (!ret && !outarg.nodeid)
238 			ret = -ENOENT;
239 		if (!ret) {
240 			fi = get_fuse_inode(inode);
241 			if (outarg.nodeid != get_node_id(inode) ||
242 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
243 				fuse_queue_forget(fm->fc, forget,
244 						  outarg.nodeid, 1);
245 				goto invalid;
246 			}
247 			spin_lock(&fi->lock);
248 			fi->nlookup++;
249 			spin_unlock(&fi->lock);
250 		}
251 		kfree(forget);
252 		if (ret == -ENOMEM)
253 			goto out;
254 		if (ret || fuse_invalid_attr(&outarg.attr) ||
255 		    (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
256 			goto invalid;
257 
258 		forget_all_cached_acls(inode);
259 		fuse_change_attributes(inode, &outarg.attr,
260 				       entry_attr_timeout(&outarg),
261 				       attr_version);
262 		fuse_change_entry_timeout(entry, &outarg);
263 	} else if (inode) {
264 		fi = get_fuse_inode(inode);
265 		if (flags & LOOKUP_RCU) {
266 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
267 				return -ECHILD;
268 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
269 			parent = dget_parent(entry);
270 			fuse_advise_use_readdirplus(d_inode(parent));
271 			dput(parent);
272 		}
273 	}
274 	ret = 1;
275 out:
276 	return ret;
277 
278 invalid:
279 	ret = 0;
280 	goto out;
281 }
282 
283 #if BITS_PER_LONG < 64
284 static int fuse_dentry_init(struct dentry *dentry)
285 {
286 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
287 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
288 
289 	return dentry->d_fsdata ? 0 : -ENOMEM;
290 }
291 static void fuse_dentry_release(struct dentry *dentry)
292 {
293 	union fuse_dentry *fd = dentry->d_fsdata;
294 
295 	kfree_rcu(fd, rcu);
296 }
297 #endif
298 
299 static int fuse_dentry_delete(const struct dentry *dentry)
300 {
301 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
302 }
303 
304 /*
305  * Create a fuse_mount object with a new superblock (with path->dentry
306  * as the root), and return that mount so it can be auto-mounted on
307  * @path.
308  */
309 static struct vfsmount *fuse_dentry_automount(struct path *path)
310 {
311 	struct fs_context *fsc;
312 	struct fuse_mount *parent_fm = get_fuse_mount_super(path->mnt->mnt_sb);
313 	struct fuse_conn *fc = parent_fm->fc;
314 	struct fuse_mount *fm;
315 	struct vfsmount *mnt;
316 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
317 	struct super_block *sb;
318 	int err;
319 
320 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
321 	if (IS_ERR(fsc)) {
322 		err = PTR_ERR(fsc);
323 		goto out;
324 	}
325 
326 	err = -ENOMEM;
327 	fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
328 	if (!fm)
329 		goto out_put_fsc;
330 
331 	refcount_set(&fm->count, 1);
332 	fsc->s_fs_info = fm;
333 	sb = sget_fc(fsc, NULL, set_anon_super_fc);
334 	if (IS_ERR(sb)) {
335 		err = PTR_ERR(sb);
336 		fuse_mount_put(fm);
337 		goto out_put_fsc;
338 	}
339 	fm->fc = fuse_conn_get(fc);
340 
341 	/* Initialize superblock, making @mp_fi its root */
342 	err = fuse_fill_super_submount(sb, mp_fi);
343 	if (err)
344 		goto out_put_sb;
345 
346 	sb->s_flags |= SB_ACTIVE;
347 	fsc->root = dget(sb->s_root);
348 	/* We are done configuring the superblock, so unlock it */
349 	up_write(&sb->s_umount);
350 
351 	down_write(&fc->killsb);
352 	list_add_tail(&fm->fc_entry, &fc->mounts);
353 	up_write(&fc->killsb);
354 
355 	/* Create the submount */
356 	mnt = vfs_create_mount(fsc);
357 	if (IS_ERR(mnt)) {
358 		err = PTR_ERR(mnt);
359 		goto out_put_fsc;
360 	}
361 	mntget(mnt);
362 	put_fs_context(fsc);
363 	return mnt;
364 
365 out_put_sb:
366 	/*
367 	 * Only jump here when fsc->root is NULL and sb is still locked
368 	 * (otherwise put_fs_context() will put the superblock)
369 	 */
370 	deactivate_locked_super(sb);
371 out_put_fsc:
372 	put_fs_context(fsc);
373 out:
374 	return ERR_PTR(err);
375 }
376 
377 const struct dentry_operations fuse_dentry_operations = {
378 	.d_revalidate	= fuse_dentry_revalidate,
379 	.d_delete	= fuse_dentry_delete,
380 #if BITS_PER_LONG < 64
381 	.d_init		= fuse_dentry_init,
382 	.d_release	= fuse_dentry_release,
383 #endif
384 	.d_automount	= fuse_dentry_automount,
385 };
386 
387 const struct dentry_operations fuse_root_dentry_operations = {
388 #if BITS_PER_LONG < 64
389 	.d_init		= fuse_dentry_init,
390 	.d_release	= fuse_dentry_release,
391 #endif
392 };
393 
394 int fuse_valid_type(int m)
395 {
396 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
397 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
398 }
399 
400 bool fuse_invalid_attr(struct fuse_attr *attr)
401 {
402 	return !fuse_valid_type(attr->mode) ||
403 		attr->size > LLONG_MAX;
404 }
405 
406 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
407 		     struct fuse_entry_out *outarg, struct inode **inode)
408 {
409 	struct fuse_mount *fm = get_fuse_mount_super(sb);
410 	FUSE_ARGS(args);
411 	struct fuse_forget_link *forget;
412 	u64 attr_version;
413 	int err;
414 
415 	*inode = NULL;
416 	err = -ENAMETOOLONG;
417 	if (name->len > FUSE_NAME_MAX)
418 		goto out;
419 
420 
421 	forget = fuse_alloc_forget();
422 	err = -ENOMEM;
423 	if (!forget)
424 		goto out;
425 
426 	attr_version = fuse_get_attr_version(fm->fc);
427 
428 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
429 	err = fuse_simple_request(fm, &args);
430 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
431 	if (err || !outarg->nodeid)
432 		goto out_put_forget;
433 
434 	err = -EIO;
435 	if (!outarg->nodeid)
436 		goto out_put_forget;
437 	if (fuse_invalid_attr(&outarg->attr))
438 		goto out_put_forget;
439 
440 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
441 			   &outarg->attr, entry_attr_timeout(outarg),
442 			   attr_version);
443 	err = -ENOMEM;
444 	if (!*inode) {
445 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
446 		goto out;
447 	}
448 	err = 0;
449 
450  out_put_forget:
451 	kfree(forget);
452  out:
453 	return err;
454 }
455 
456 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
457 				  unsigned int flags)
458 {
459 	int err;
460 	struct fuse_entry_out outarg;
461 	struct inode *inode;
462 	struct dentry *newent;
463 	bool outarg_valid = true;
464 	bool locked;
465 
466 	locked = fuse_lock_inode(dir);
467 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
468 			       &outarg, &inode);
469 	fuse_unlock_inode(dir, locked);
470 	if (err == -ENOENT) {
471 		outarg_valid = false;
472 		err = 0;
473 	}
474 	if (err)
475 		goto out_err;
476 
477 	err = -EIO;
478 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
479 		goto out_iput;
480 
481 	newent = d_splice_alias(inode, entry);
482 	err = PTR_ERR(newent);
483 	if (IS_ERR(newent))
484 		goto out_err;
485 
486 	entry = newent ? newent : entry;
487 	if (outarg_valid)
488 		fuse_change_entry_timeout(entry, &outarg);
489 	else
490 		fuse_invalidate_entry_cache(entry);
491 
492 	if (inode)
493 		fuse_advise_use_readdirplus(dir);
494 	return newent;
495 
496  out_iput:
497 	iput(inode);
498  out_err:
499 	return ERR_PTR(err);
500 }
501 
502 /*
503  * Atomic create+open operation
504  *
505  * If the filesystem doesn't support this, then fall back to separate
506  * 'mknod' + 'open' requests.
507  */
508 static int fuse_create_open(struct inode *dir, struct dentry *entry,
509 			    struct file *file, unsigned flags,
510 			    umode_t mode)
511 {
512 	int err;
513 	struct inode *inode;
514 	struct fuse_mount *fm = get_fuse_mount(dir);
515 	FUSE_ARGS(args);
516 	struct fuse_forget_link *forget;
517 	struct fuse_create_in inarg;
518 	struct fuse_open_out outopen;
519 	struct fuse_entry_out outentry;
520 	struct fuse_inode *fi;
521 	struct fuse_file *ff;
522 
523 	/* Userspace expects S_IFREG in create mode */
524 	BUG_ON((mode & S_IFMT) != S_IFREG);
525 
526 	forget = fuse_alloc_forget();
527 	err = -ENOMEM;
528 	if (!forget)
529 		goto out_err;
530 
531 	err = -ENOMEM;
532 	ff = fuse_file_alloc(fm);
533 	if (!ff)
534 		goto out_put_forget_req;
535 
536 	if (!fm->fc->dont_mask)
537 		mode &= ~current_umask();
538 
539 	flags &= ~O_NOCTTY;
540 	memset(&inarg, 0, sizeof(inarg));
541 	memset(&outentry, 0, sizeof(outentry));
542 	inarg.flags = flags;
543 	inarg.mode = mode;
544 	inarg.umask = current_umask();
545 	args.opcode = FUSE_CREATE;
546 	args.nodeid = get_node_id(dir);
547 	args.in_numargs = 2;
548 	args.in_args[0].size = sizeof(inarg);
549 	args.in_args[0].value = &inarg;
550 	args.in_args[1].size = entry->d_name.len + 1;
551 	args.in_args[1].value = entry->d_name.name;
552 	args.out_numargs = 2;
553 	args.out_args[0].size = sizeof(outentry);
554 	args.out_args[0].value = &outentry;
555 	args.out_args[1].size = sizeof(outopen);
556 	args.out_args[1].value = &outopen;
557 	err = fuse_simple_request(fm, &args);
558 	if (err)
559 		goto out_free_ff;
560 
561 	err = -EIO;
562 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
563 	    fuse_invalid_attr(&outentry.attr))
564 		goto out_free_ff;
565 
566 	ff->fh = outopen.fh;
567 	ff->nodeid = outentry.nodeid;
568 	ff->open_flags = outopen.open_flags;
569 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
570 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
571 	if (!inode) {
572 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
573 		fuse_sync_release(NULL, ff, flags);
574 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
575 		err = -ENOMEM;
576 		goto out_err;
577 	}
578 	kfree(forget);
579 	d_instantiate(entry, inode);
580 	fuse_change_entry_timeout(entry, &outentry);
581 	fuse_dir_changed(dir);
582 	err = finish_open(file, entry, generic_file_open);
583 	if (err) {
584 		fi = get_fuse_inode(inode);
585 		fuse_sync_release(fi, ff, flags);
586 	} else {
587 		file->private_data = ff;
588 		fuse_finish_open(inode, file);
589 	}
590 	return err;
591 
592 out_free_ff:
593 	fuse_file_free(ff);
594 out_put_forget_req:
595 	kfree(forget);
596 out_err:
597 	return err;
598 }
599 
600 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
601 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
602 			    struct file *file, unsigned flags,
603 			    umode_t mode)
604 {
605 	int err;
606 	struct fuse_conn *fc = get_fuse_conn(dir);
607 	struct dentry *res = NULL;
608 
609 	if (d_in_lookup(entry)) {
610 		res = fuse_lookup(dir, entry, 0);
611 		if (IS_ERR(res))
612 			return PTR_ERR(res);
613 
614 		if (res)
615 			entry = res;
616 	}
617 
618 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
619 		goto no_open;
620 
621 	/* Only creates */
622 	file->f_mode |= FMODE_CREATED;
623 
624 	if (fc->no_create)
625 		goto mknod;
626 
627 	err = fuse_create_open(dir, entry, file, flags, mode);
628 	if (err == -ENOSYS) {
629 		fc->no_create = 1;
630 		goto mknod;
631 	}
632 out_dput:
633 	dput(res);
634 	return err;
635 
636 mknod:
637 	err = fuse_mknod(dir, entry, mode, 0);
638 	if (err)
639 		goto out_dput;
640 no_open:
641 	return finish_no_open(file, res);
642 }
643 
644 /*
645  * Code shared between mknod, mkdir, symlink and link
646  */
647 static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
648 			    struct inode *dir, struct dentry *entry,
649 			    umode_t mode)
650 {
651 	struct fuse_entry_out outarg;
652 	struct inode *inode;
653 	struct dentry *d;
654 	int err;
655 	struct fuse_forget_link *forget;
656 
657 	forget = fuse_alloc_forget();
658 	if (!forget)
659 		return -ENOMEM;
660 
661 	memset(&outarg, 0, sizeof(outarg));
662 	args->nodeid = get_node_id(dir);
663 	args->out_numargs = 1;
664 	args->out_args[0].size = sizeof(outarg);
665 	args->out_args[0].value = &outarg;
666 	err = fuse_simple_request(fm, args);
667 	if (err)
668 		goto out_put_forget_req;
669 
670 	err = -EIO;
671 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
672 		goto out_put_forget_req;
673 
674 	if ((outarg.attr.mode ^ mode) & S_IFMT)
675 		goto out_put_forget_req;
676 
677 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
678 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
679 	if (!inode) {
680 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
681 		return -ENOMEM;
682 	}
683 	kfree(forget);
684 
685 	d_drop(entry);
686 	d = d_splice_alias(inode, entry);
687 	if (IS_ERR(d))
688 		return PTR_ERR(d);
689 
690 	if (d) {
691 		fuse_change_entry_timeout(d, &outarg);
692 		dput(d);
693 	} else {
694 		fuse_change_entry_timeout(entry, &outarg);
695 	}
696 	fuse_dir_changed(dir);
697 	return 0;
698 
699  out_put_forget_req:
700 	kfree(forget);
701 	return err;
702 }
703 
704 static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
705 		      dev_t rdev)
706 {
707 	struct fuse_mknod_in inarg;
708 	struct fuse_mount *fm = get_fuse_mount(dir);
709 	FUSE_ARGS(args);
710 
711 	if (!fm->fc->dont_mask)
712 		mode &= ~current_umask();
713 
714 	memset(&inarg, 0, sizeof(inarg));
715 	inarg.mode = mode;
716 	inarg.rdev = new_encode_dev(rdev);
717 	inarg.umask = current_umask();
718 	args.opcode = FUSE_MKNOD;
719 	args.in_numargs = 2;
720 	args.in_args[0].size = sizeof(inarg);
721 	args.in_args[0].value = &inarg;
722 	args.in_args[1].size = entry->d_name.len + 1;
723 	args.in_args[1].value = entry->d_name.name;
724 	return create_new_entry(fm, &args, dir, entry, mode);
725 }
726 
727 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
728 		       bool excl)
729 {
730 	return fuse_mknod(dir, entry, mode, 0);
731 }
732 
733 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
734 {
735 	struct fuse_mkdir_in inarg;
736 	struct fuse_mount *fm = get_fuse_mount(dir);
737 	FUSE_ARGS(args);
738 
739 	if (!fm->fc->dont_mask)
740 		mode &= ~current_umask();
741 
742 	memset(&inarg, 0, sizeof(inarg));
743 	inarg.mode = mode;
744 	inarg.umask = current_umask();
745 	args.opcode = FUSE_MKDIR;
746 	args.in_numargs = 2;
747 	args.in_args[0].size = sizeof(inarg);
748 	args.in_args[0].value = &inarg;
749 	args.in_args[1].size = entry->d_name.len + 1;
750 	args.in_args[1].value = entry->d_name.name;
751 	return create_new_entry(fm, &args, dir, entry, S_IFDIR);
752 }
753 
754 static int fuse_symlink(struct inode *dir, struct dentry *entry,
755 			const char *link)
756 {
757 	struct fuse_mount *fm = get_fuse_mount(dir);
758 	unsigned len = strlen(link) + 1;
759 	FUSE_ARGS(args);
760 
761 	args.opcode = FUSE_SYMLINK;
762 	args.in_numargs = 2;
763 	args.in_args[0].size = entry->d_name.len + 1;
764 	args.in_args[0].value = entry->d_name.name;
765 	args.in_args[1].size = len;
766 	args.in_args[1].value = link;
767 	return create_new_entry(fm, &args, dir, entry, S_IFLNK);
768 }
769 
770 void fuse_update_ctime(struct inode *inode)
771 {
772 	if (!IS_NOCMTIME(inode)) {
773 		inode->i_ctime = current_time(inode);
774 		mark_inode_dirty_sync(inode);
775 	}
776 }
777 
778 static int fuse_unlink(struct inode *dir, struct dentry *entry)
779 {
780 	int err;
781 	struct fuse_mount *fm = get_fuse_mount(dir);
782 	FUSE_ARGS(args);
783 
784 	args.opcode = FUSE_UNLINK;
785 	args.nodeid = get_node_id(dir);
786 	args.in_numargs = 1;
787 	args.in_args[0].size = entry->d_name.len + 1;
788 	args.in_args[0].value = entry->d_name.name;
789 	err = fuse_simple_request(fm, &args);
790 	if (!err) {
791 		struct inode *inode = d_inode(entry);
792 		struct fuse_inode *fi = get_fuse_inode(inode);
793 
794 		spin_lock(&fi->lock);
795 		fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
796 		/*
797 		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
798 		 * happen if userspace filesystem is careless.  It would be
799 		 * difficult to enforce correct nlink usage so just ignore this
800 		 * condition here
801 		 */
802 		if (inode->i_nlink > 0)
803 			drop_nlink(inode);
804 		spin_unlock(&fi->lock);
805 		fuse_invalidate_attr(inode);
806 		fuse_dir_changed(dir);
807 		fuse_invalidate_entry_cache(entry);
808 		fuse_update_ctime(inode);
809 	} else if (err == -EINTR)
810 		fuse_invalidate_entry(entry);
811 	return err;
812 }
813 
814 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
815 {
816 	int err;
817 	struct fuse_mount *fm = get_fuse_mount(dir);
818 	FUSE_ARGS(args);
819 
820 	args.opcode = FUSE_RMDIR;
821 	args.nodeid = get_node_id(dir);
822 	args.in_numargs = 1;
823 	args.in_args[0].size = entry->d_name.len + 1;
824 	args.in_args[0].value = entry->d_name.name;
825 	err = fuse_simple_request(fm, &args);
826 	if (!err) {
827 		clear_nlink(d_inode(entry));
828 		fuse_dir_changed(dir);
829 		fuse_invalidate_entry_cache(entry);
830 	} else if (err == -EINTR)
831 		fuse_invalidate_entry(entry);
832 	return err;
833 }
834 
835 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
836 			      struct inode *newdir, struct dentry *newent,
837 			      unsigned int flags, int opcode, size_t argsize)
838 {
839 	int err;
840 	struct fuse_rename2_in inarg;
841 	struct fuse_mount *fm = get_fuse_mount(olddir);
842 	FUSE_ARGS(args);
843 
844 	memset(&inarg, 0, argsize);
845 	inarg.newdir = get_node_id(newdir);
846 	inarg.flags = flags;
847 	args.opcode = opcode;
848 	args.nodeid = get_node_id(olddir);
849 	args.in_numargs = 3;
850 	args.in_args[0].size = argsize;
851 	args.in_args[0].value = &inarg;
852 	args.in_args[1].size = oldent->d_name.len + 1;
853 	args.in_args[1].value = oldent->d_name.name;
854 	args.in_args[2].size = newent->d_name.len + 1;
855 	args.in_args[2].value = newent->d_name.name;
856 	err = fuse_simple_request(fm, &args);
857 	if (!err) {
858 		/* ctime changes */
859 		fuse_invalidate_attr(d_inode(oldent));
860 		fuse_update_ctime(d_inode(oldent));
861 
862 		if (flags & RENAME_EXCHANGE) {
863 			fuse_invalidate_attr(d_inode(newent));
864 			fuse_update_ctime(d_inode(newent));
865 		}
866 
867 		fuse_dir_changed(olddir);
868 		if (olddir != newdir)
869 			fuse_dir_changed(newdir);
870 
871 		/* newent will end up negative */
872 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
873 			fuse_invalidate_attr(d_inode(newent));
874 			fuse_invalidate_entry_cache(newent);
875 			fuse_update_ctime(d_inode(newent));
876 		}
877 	} else if (err == -EINTR) {
878 		/* If request was interrupted, DEITY only knows if the
879 		   rename actually took place.  If the invalidation
880 		   fails (e.g. some process has CWD under the renamed
881 		   directory), then there can be inconsistency between
882 		   the dcache and the real filesystem.  Tough luck. */
883 		fuse_invalidate_entry(oldent);
884 		if (d_really_is_positive(newent))
885 			fuse_invalidate_entry(newent);
886 	}
887 
888 	return err;
889 }
890 
891 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
892 			struct inode *newdir, struct dentry *newent,
893 			unsigned int flags)
894 {
895 	struct fuse_conn *fc = get_fuse_conn(olddir);
896 	int err;
897 
898 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
899 		return -EINVAL;
900 
901 	if (flags) {
902 		if (fc->no_rename2 || fc->minor < 23)
903 			return -EINVAL;
904 
905 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
906 					 FUSE_RENAME2,
907 					 sizeof(struct fuse_rename2_in));
908 		if (err == -ENOSYS) {
909 			fc->no_rename2 = 1;
910 			err = -EINVAL;
911 		}
912 	} else {
913 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
914 					 FUSE_RENAME,
915 					 sizeof(struct fuse_rename_in));
916 	}
917 
918 	return err;
919 }
920 
921 static int fuse_link(struct dentry *entry, struct inode *newdir,
922 		     struct dentry *newent)
923 {
924 	int err;
925 	struct fuse_link_in inarg;
926 	struct inode *inode = d_inode(entry);
927 	struct fuse_mount *fm = get_fuse_mount(inode);
928 	FUSE_ARGS(args);
929 
930 	memset(&inarg, 0, sizeof(inarg));
931 	inarg.oldnodeid = get_node_id(inode);
932 	args.opcode = FUSE_LINK;
933 	args.in_numargs = 2;
934 	args.in_args[0].size = sizeof(inarg);
935 	args.in_args[0].value = &inarg;
936 	args.in_args[1].size = newent->d_name.len + 1;
937 	args.in_args[1].value = newent->d_name.name;
938 	err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
939 	/* Contrary to "normal" filesystems it can happen that link
940 	   makes two "logical" inodes point to the same "physical"
941 	   inode.  We invalidate the attributes of the old one, so it
942 	   will reflect changes in the backing inode (link count,
943 	   etc.)
944 	*/
945 	if (!err) {
946 		struct fuse_inode *fi = get_fuse_inode(inode);
947 
948 		spin_lock(&fi->lock);
949 		fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
950 		if (likely(inode->i_nlink < UINT_MAX))
951 			inc_nlink(inode);
952 		spin_unlock(&fi->lock);
953 		fuse_invalidate_attr(inode);
954 		fuse_update_ctime(inode);
955 	} else if (err == -EINTR) {
956 		fuse_invalidate_attr(inode);
957 	}
958 	return err;
959 }
960 
961 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
962 			  struct kstat *stat)
963 {
964 	unsigned int blkbits;
965 	struct fuse_conn *fc = get_fuse_conn(inode);
966 
967 	/* see the comment in fuse_change_attributes() */
968 	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
969 		attr->size = i_size_read(inode);
970 		attr->mtime = inode->i_mtime.tv_sec;
971 		attr->mtimensec = inode->i_mtime.tv_nsec;
972 		attr->ctime = inode->i_ctime.tv_sec;
973 		attr->ctimensec = inode->i_ctime.tv_nsec;
974 	}
975 
976 	stat->dev = inode->i_sb->s_dev;
977 	stat->ino = attr->ino;
978 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
979 	stat->nlink = attr->nlink;
980 	stat->uid = make_kuid(fc->user_ns, attr->uid);
981 	stat->gid = make_kgid(fc->user_ns, attr->gid);
982 	stat->rdev = inode->i_rdev;
983 	stat->atime.tv_sec = attr->atime;
984 	stat->atime.tv_nsec = attr->atimensec;
985 	stat->mtime.tv_sec = attr->mtime;
986 	stat->mtime.tv_nsec = attr->mtimensec;
987 	stat->ctime.tv_sec = attr->ctime;
988 	stat->ctime.tv_nsec = attr->ctimensec;
989 	stat->size = attr->size;
990 	stat->blocks = attr->blocks;
991 
992 	if (attr->blksize != 0)
993 		blkbits = ilog2(attr->blksize);
994 	else
995 		blkbits = inode->i_sb->s_blocksize_bits;
996 
997 	stat->blksize = 1 << blkbits;
998 }
999 
1000 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
1001 			   struct file *file)
1002 {
1003 	int err;
1004 	struct fuse_getattr_in inarg;
1005 	struct fuse_attr_out outarg;
1006 	struct fuse_mount *fm = get_fuse_mount(inode);
1007 	FUSE_ARGS(args);
1008 	u64 attr_version;
1009 
1010 	attr_version = fuse_get_attr_version(fm->fc);
1011 
1012 	memset(&inarg, 0, sizeof(inarg));
1013 	memset(&outarg, 0, sizeof(outarg));
1014 	/* Directories have separate file-handle space */
1015 	if (file && S_ISREG(inode->i_mode)) {
1016 		struct fuse_file *ff = file->private_data;
1017 
1018 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1019 		inarg.fh = ff->fh;
1020 	}
1021 	args.opcode = FUSE_GETATTR;
1022 	args.nodeid = get_node_id(inode);
1023 	args.in_numargs = 1;
1024 	args.in_args[0].size = sizeof(inarg);
1025 	args.in_args[0].value = &inarg;
1026 	args.out_numargs = 1;
1027 	args.out_args[0].size = sizeof(outarg);
1028 	args.out_args[0].value = &outarg;
1029 	err = fuse_simple_request(fm, &args);
1030 	if (!err) {
1031 		if (fuse_invalid_attr(&outarg.attr) ||
1032 		    (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1033 			make_bad_inode(inode);
1034 			err = -EIO;
1035 		} else {
1036 			fuse_change_attributes(inode, &outarg.attr,
1037 					       attr_timeout(&outarg),
1038 					       attr_version);
1039 			if (stat)
1040 				fuse_fillattr(inode, &outarg.attr, stat);
1041 		}
1042 	}
1043 	return err;
1044 }
1045 
1046 static int fuse_update_get_attr(struct inode *inode, struct file *file,
1047 				struct kstat *stat, u32 request_mask,
1048 				unsigned int flags)
1049 {
1050 	struct fuse_inode *fi = get_fuse_inode(inode);
1051 	int err = 0;
1052 	bool sync;
1053 
1054 	if (flags & AT_STATX_FORCE_SYNC)
1055 		sync = true;
1056 	else if (flags & AT_STATX_DONT_SYNC)
1057 		sync = false;
1058 	else if (request_mask & READ_ONCE(fi->inval_mask))
1059 		sync = true;
1060 	else
1061 		sync = time_before64(fi->i_time, get_jiffies_64());
1062 
1063 	if (sync) {
1064 		forget_all_cached_acls(inode);
1065 		err = fuse_do_getattr(inode, stat, file);
1066 	} else if (stat) {
1067 		generic_fillattr(inode, stat);
1068 		stat->mode = fi->orig_i_mode;
1069 		stat->ino = fi->orig_ino;
1070 	}
1071 
1072 	return err;
1073 }
1074 
1075 int fuse_update_attributes(struct inode *inode, struct file *file)
1076 {
1077 	/* Do *not* need to get atime for internal purposes */
1078 	return fuse_update_get_attr(inode, file, NULL,
1079 				    STATX_BASIC_STATS & ~STATX_ATIME, 0);
1080 }
1081 
1082 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1083 			     u64 child_nodeid, struct qstr *name)
1084 {
1085 	int err = -ENOTDIR;
1086 	struct inode *parent;
1087 	struct dentry *dir;
1088 	struct dentry *entry;
1089 
1090 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1091 	if (!parent)
1092 		return -ENOENT;
1093 
1094 	inode_lock(parent);
1095 	if (!S_ISDIR(parent->i_mode))
1096 		goto unlock;
1097 
1098 	err = -ENOENT;
1099 	dir = d_find_alias(parent);
1100 	if (!dir)
1101 		goto unlock;
1102 
1103 	name->hash = full_name_hash(dir, name->name, name->len);
1104 	entry = d_lookup(dir, name);
1105 	dput(dir);
1106 	if (!entry)
1107 		goto unlock;
1108 
1109 	fuse_dir_changed(parent);
1110 	fuse_invalidate_entry(entry);
1111 
1112 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1113 		inode_lock(d_inode(entry));
1114 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1115 			err = -ENOENT;
1116 			goto badentry;
1117 		}
1118 		if (d_mountpoint(entry)) {
1119 			err = -EBUSY;
1120 			goto badentry;
1121 		}
1122 		if (d_is_dir(entry)) {
1123 			shrink_dcache_parent(entry);
1124 			if (!simple_empty(entry)) {
1125 				err = -ENOTEMPTY;
1126 				goto badentry;
1127 			}
1128 			d_inode(entry)->i_flags |= S_DEAD;
1129 		}
1130 		dont_mount(entry);
1131 		clear_nlink(d_inode(entry));
1132 		err = 0;
1133  badentry:
1134 		inode_unlock(d_inode(entry));
1135 		if (!err)
1136 			d_delete(entry);
1137 	} else {
1138 		err = 0;
1139 	}
1140 	dput(entry);
1141 
1142  unlock:
1143 	inode_unlock(parent);
1144 	iput(parent);
1145 	return err;
1146 }
1147 
1148 /*
1149  * Calling into a user-controlled filesystem gives the filesystem
1150  * daemon ptrace-like capabilities over the current process.  This
1151  * means, that the filesystem daemon is able to record the exact
1152  * filesystem operations performed, and can also control the behavior
1153  * of the requester process in otherwise impossible ways.  For example
1154  * it can delay the operation for arbitrary length of time allowing
1155  * DoS against the requester.
1156  *
1157  * For this reason only those processes can call into the filesystem,
1158  * for which the owner of the mount has ptrace privilege.  This
1159  * excludes processes started by other users, suid or sgid processes.
1160  */
1161 int fuse_allow_current_process(struct fuse_conn *fc)
1162 {
1163 	const struct cred *cred;
1164 
1165 	if (fc->allow_other)
1166 		return current_in_userns(fc->user_ns);
1167 
1168 	cred = current_cred();
1169 	if (uid_eq(cred->euid, fc->user_id) &&
1170 	    uid_eq(cred->suid, fc->user_id) &&
1171 	    uid_eq(cred->uid,  fc->user_id) &&
1172 	    gid_eq(cred->egid, fc->group_id) &&
1173 	    gid_eq(cred->sgid, fc->group_id) &&
1174 	    gid_eq(cred->gid,  fc->group_id))
1175 		return 1;
1176 
1177 	return 0;
1178 }
1179 
1180 static int fuse_access(struct inode *inode, int mask)
1181 {
1182 	struct fuse_mount *fm = get_fuse_mount(inode);
1183 	FUSE_ARGS(args);
1184 	struct fuse_access_in inarg;
1185 	int err;
1186 
1187 	BUG_ON(mask & MAY_NOT_BLOCK);
1188 
1189 	if (fm->fc->no_access)
1190 		return 0;
1191 
1192 	memset(&inarg, 0, sizeof(inarg));
1193 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1194 	args.opcode = FUSE_ACCESS;
1195 	args.nodeid = get_node_id(inode);
1196 	args.in_numargs = 1;
1197 	args.in_args[0].size = sizeof(inarg);
1198 	args.in_args[0].value = &inarg;
1199 	err = fuse_simple_request(fm, &args);
1200 	if (err == -ENOSYS) {
1201 		fm->fc->no_access = 1;
1202 		err = 0;
1203 	}
1204 	return err;
1205 }
1206 
1207 static int fuse_perm_getattr(struct inode *inode, int mask)
1208 {
1209 	if (mask & MAY_NOT_BLOCK)
1210 		return -ECHILD;
1211 
1212 	forget_all_cached_acls(inode);
1213 	return fuse_do_getattr(inode, NULL, NULL);
1214 }
1215 
1216 /*
1217  * Check permission.  The two basic access models of FUSE are:
1218  *
1219  * 1) Local access checking ('default_permissions' mount option) based
1220  * on file mode.  This is the plain old disk filesystem permission
1221  * modell.
1222  *
1223  * 2) "Remote" access checking, where server is responsible for
1224  * checking permission in each inode operation.  An exception to this
1225  * is if ->permission() was invoked from sys_access() in which case an
1226  * access request is sent.  Execute permission is still checked
1227  * locally based on file mode.
1228  */
1229 static int fuse_permission(struct inode *inode, int mask)
1230 {
1231 	struct fuse_conn *fc = get_fuse_conn(inode);
1232 	bool refreshed = false;
1233 	int err = 0;
1234 
1235 	if (!fuse_allow_current_process(fc))
1236 		return -EACCES;
1237 
1238 	/*
1239 	 * If attributes are needed, refresh them before proceeding
1240 	 */
1241 	if (fc->default_permissions ||
1242 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1243 		struct fuse_inode *fi = get_fuse_inode(inode);
1244 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1245 
1246 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1247 		    time_before64(fi->i_time, get_jiffies_64())) {
1248 			refreshed = true;
1249 
1250 			err = fuse_perm_getattr(inode, mask);
1251 			if (err)
1252 				return err;
1253 		}
1254 	}
1255 
1256 	if (fc->default_permissions) {
1257 		err = generic_permission(inode, mask);
1258 
1259 		/* If permission is denied, try to refresh file
1260 		   attributes.  This is also needed, because the root
1261 		   node will at first have no permissions */
1262 		if (err == -EACCES && !refreshed) {
1263 			err = fuse_perm_getattr(inode, mask);
1264 			if (!err)
1265 				err = generic_permission(inode, mask);
1266 		}
1267 
1268 		/* Note: the opposite of the above test does not
1269 		   exist.  So if permissions are revoked this won't be
1270 		   noticed immediately, only after the attribute
1271 		   timeout has expired */
1272 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1273 		err = fuse_access(inode, mask);
1274 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1275 		if (!(inode->i_mode & S_IXUGO)) {
1276 			if (refreshed)
1277 				return -EACCES;
1278 
1279 			err = fuse_perm_getattr(inode, mask);
1280 			if (!err && !(inode->i_mode & S_IXUGO))
1281 				return -EACCES;
1282 		}
1283 	}
1284 	return err;
1285 }
1286 
1287 static int fuse_readlink_page(struct inode *inode, struct page *page)
1288 {
1289 	struct fuse_mount *fm = get_fuse_mount(inode);
1290 	struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1291 	struct fuse_args_pages ap = {
1292 		.num_pages = 1,
1293 		.pages = &page,
1294 		.descs = &desc,
1295 	};
1296 	char *link;
1297 	ssize_t res;
1298 
1299 	ap.args.opcode = FUSE_READLINK;
1300 	ap.args.nodeid = get_node_id(inode);
1301 	ap.args.out_pages = true;
1302 	ap.args.out_argvar = true;
1303 	ap.args.page_zeroing = true;
1304 	ap.args.out_numargs = 1;
1305 	ap.args.out_args[0].size = desc.length;
1306 	res = fuse_simple_request(fm, &ap.args);
1307 
1308 	fuse_invalidate_atime(inode);
1309 
1310 	if (res < 0)
1311 		return res;
1312 
1313 	if (WARN_ON(res >= PAGE_SIZE))
1314 		return -EIO;
1315 
1316 	link = page_address(page);
1317 	link[res] = '\0';
1318 
1319 	return 0;
1320 }
1321 
1322 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1323 				 struct delayed_call *callback)
1324 {
1325 	struct fuse_conn *fc = get_fuse_conn(inode);
1326 	struct page *page;
1327 	int err;
1328 
1329 	err = -EIO;
1330 	if (is_bad_inode(inode))
1331 		goto out_err;
1332 
1333 	if (fc->cache_symlinks)
1334 		return page_get_link(dentry, inode, callback);
1335 
1336 	err = -ECHILD;
1337 	if (!dentry)
1338 		goto out_err;
1339 
1340 	page = alloc_page(GFP_KERNEL);
1341 	err = -ENOMEM;
1342 	if (!page)
1343 		goto out_err;
1344 
1345 	err = fuse_readlink_page(inode, page);
1346 	if (err) {
1347 		__free_page(page);
1348 		goto out_err;
1349 	}
1350 
1351 	set_delayed_call(callback, page_put_link, page);
1352 
1353 	return page_address(page);
1354 
1355 out_err:
1356 	return ERR_PTR(err);
1357 }
1358 
1359 static int fuse_dir_open(struct inode *inode, struct file *file)
1360 {
1361 	return fuse_open_common(inode, file, true);
1362 }
1363 
1364 static int fuse_dir_release(struct inode *inode, struct file *file)
1365 {
1366 	fuse_release_common(file, true);
1367 
1368 	return 0;
1369 }
1370 
1371 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1372 			  int datasync)
1373 {
1374 	struct inode *inode = file->f_mapping->host;
1375 	struct fuse_conn *fc = get_fuse_conn(inode);
1376 	int err;
1377 
1378 	if (is_bad_inode(inode))
1379 		return -EIO;
1380 
1381 	if (fc->no_fsyncdir)
1382 		return 0;
1383 
1384 	inode_lock(inode);
1385 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1386 	if (err == -ENOSYS) {
1387 		fc->no_fsyncdir = 1;
1388 		err = 0;
1389 	}
1390 	inode_unlock(inode);
1391 
1392 	return err;
1393 }
1394 
1395 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1396 			    unsigned long arg)
1397 {
1398 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1399 
1400 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1401 	if (fc->minor < 18)
1402 		return -ENOTTY;
1403 
1404 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1405 }
1406 
1407 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1408 				   unsigned long arg)
1409 {
1410 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1411 
1412 	if (fc->minor < 18)
1413 		return -ENOTTY;
1414 
1415 	return fuse_ioctl_common(file, cmd, arg,
1416 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1417 }
1418 
1419 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1420 {
1421 	/* Always update if mtime is explicitly set  */
1422 	if (ivalid & ATTR_MTIME_SET)
1423 		return true;
1424 
1425 	/* Or if kernel i_mtime is the official one */
1426 	if (trust_local_mtime)
1427 		return true;
1428 
1429 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1430 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1431 		return false;
1432 
1433 	/* In all other cases update */
1434 	return true;
1435 }
1436 
1437 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1438 			   struct fuse_setattr_in *arg, bool trust_local_cmtime)
1439 {
1440 	unsigned ivalid = iattr->ia_valid;
1441 
1442 	if (ivalid & ATTR_MODE)
1443 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1444 	if (ivalid & ATTR_UID)
1445 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1446 	if (ivalid & ATTR_GID)
1447 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1448 	if (ivalid & ATTR_SIZE)
1449 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1450 	if (ivalid & ATTR_ATIME) {
1451 		arg->valid |= FATTR_ATIME;
1452 		arg->atime = iattr->ia_atime.tv_sec;
1453 		arg->atimensec = iattr->ia_atime.tv_nsec;
1454 		if (!(ivalid & ATTR_ATIME_SET))
1455 			arg->valid |= FATTR_ATIME_NOW;
1456 	}
1457 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1458 		arg->valid |= FATTR_MTIME;
1459 		arg->mtime = iattr->ia_mtime.tv_sec;
1460 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1461 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1462 			arg->valid |= FATTR_MTIME_NOW;
1463 	}
1464 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1465 		arg->valid |= FATTR_CTIME;
1466 		arg->ctime = iattr->ia_ctime.tv_sec;
1467 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1468 	}
1469 }
1470 
1471 /*
1472  * Prevent concurrent writepages on inode
1473  *
1474  * This is done by adding a negative bias to the inode write counter
1475  * and waiting for all pending writes to finish.
1476  */
1477 void fuse_set_nowrite(struct inode *inode)
1478 {
1479 	struct fuse_inode *fi = get_fuse_inode(inode);
1480 
1481 	BUG_ON(!inode_is_locked(inode));
1482 
1483 	spin_lock(&fi->lock);
1484 	BUG_ON(fi->writectr < 0);
1485 	fi->writectr += FUSE_NOWRITE;
1486 	spin_unlock(&fi->lock);
1487 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1488 }
1489 
1490 /*
1491  * Allow writepages on inode
1492  *
1493  * Remove the bias from the writecounter and send any queued
1494  * writepages.
1495  */
1496 static void __fuse_release_nowrite(struct inode *inode)
1497 {
1498 	struct fuse_inode *fi = get_fuse_inode(inode);
1499 
1500 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1501 	fi->writectr = 0;
1502 	fuse_flush_writepages(inode);
1503 }
1504 
1505 void fuse_release_nowrite(struct inode *inode)
1506 {
1507 	struct fuse_inode *fi = get_fuse_inode(inode);
1508 
1509 	spin_lock(&fi->lock);
1510 	__fuse_release_nowrite(inode);
1511 	spin_unlock(&fi->lock);
1512 }
1513 
1514 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1515 			      struct inode *inode,
1516 			      struct fuse_setattr_in *inarg_p,
1517 			      struct fuse_attr_out *outarg_p)
1518 {
1519 	args->opcode = FUSE_SETATTR;
1520 	args->nodeid = get_node_id(inode);
1521 	args->in_numargs = 1;
1522 	args->in_args[0].size = sizeof(*inarg_p);
1523 	args->in_args[0].value = inarg_p;
1524 	args->out_numargs = 1;
1525 	args->out_args[0].size = sizeof(*outarg_p);
1526 	args->out_args[0].value = outarg_p;
1527 }
1528 
1529 /*
1530  * Flush inode->i_mtime to the server
1531  */
1532 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1533 {
1534 	struct fuse_mount *fm = get_fuse_mount(inode);
1535 	FUSE_ARGS(args);
1536 	struct fuse_setattr_in inarg;
1537 	struct fuse_attr_out outarg;
1538 
1539 	memset(&inarg, 0, sizeof(inarg));
1540 	memset(&outarg, 0, sizeof(outarg));
1541 
1542 	inarg.valid = FATTR_MTIME;
1543 	inarg.mtime = inode->i_mtime.tv_sec;
1544 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1545 	if (fm->fc->minor >= 23) {
1546 		inarg.valid |= FATTR_CTIME;
1547 		inarg.ctime = inode->i_ctime.tv_sec;
1548 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1549 	}
1550 	if (ff) {
1551 		inarg.valid |= FATTR_FH;
1552 		inarg.fh = ff->fh;
1553 	}
1554 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1555 
1556 	return fuse_simple_request(fm, &args);
1557 }
1558 
1559 /*
1560  * Set attributes, and at the same time refresh them.
1561  *
1562  * Truncation is slightly complicated, because the 'truncate' request
1563  * may fail, in which case we don't want to touch the mapping.
1564  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1565  * and the actual truncation by hand.
1566  */
1567 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1568 		    struct file *file)
1569 {
1570 	struct inode *inode = d_inode(dentry);
1571 	struct fuse_mount *fm = get_fuse_mount(inode);
1572 	struct fuse_conn *fc = fm->fc;
1573 	struct fuse_inode *fi = get_fuse_inode(inode);
1574 	FUSE_ARGS(args);
1575 	struct fuse_setattr_in inarg;
1576 	struct fuse_attr_out outarg;
1577 	bool is_truncate = false;
1578 	bool is_wb = fc->writeback_cache;
1579 	loff_t oldsize;
1580 	int err;
1581 	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1582 	bool fault_blocked = false;
1583 
1584 	if (!fc->default_permissions)
1585 		attr->ia_valid |= ATTR_FORCE;
1586 
1587 	err = setattr_prepare(dentry, attr);
1588 	if (err)
1589 		return err;
1590 
1591 	if (attr->ia_valid & ATTR_SIZE) {
1592 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1593 			return -EIO;
1594 		is_truncate = true;
1595 	}
1596 
1597 	if (FUSE_IS_DAX(inode) && is_truncate) {
1598 		down_write(&fi->i_mmap_sem);
1599 		fault_blocked = true;
1600 		err = fuse_dax_break_layouts(inode, 0, 0);
1601 		if (err) {
1602 			up_write(&fi->i_mmap_sem);
1603 			return err;
1604 		}
1605 	}
1606 
1607 	if (attr->ia_valid & ATTR_OPEN) {
1608 		/* This is coming from open(..., ... | O_TRUNC); */
1609 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1610 		WARN_ON(attr->ia_size != 0);
1611 		if (fc->atomic_o_trunc) {
1612 			/*
1613 			 * No need to send request to userspace, since actual
1614 			 * truncation has already been done by OPEN.  But still
1615 			 * need to truncate page cache.
1616 			 */
1617 			i_size_write(inode, 0);
1618 			truncate_pagecache(inode, 0);
1619 			goto out;
1620 		}
1621 		file = NULL;
1622 	}
1623 
1624 	/* Flush dirty data/metadata before non-truncate SETATTR */
1625 	if (is_wb && S_ISREG(inode->i_mode) &&
1626 	    attr->ia_valid &
1627 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1628 			 ATTR_TIMES_SET)) {
1629 		err = write_inode_now(inode, true);
1630 		if (err)
1631 			return err;
1632 
1633 		fuse_set_nowrite(inode);
1634 		fuse_release_nowrite(inode);
1635 	}
1636 
1637 	if (is_truncate) {
1638 		fuse_set_nowrite(inode);
1639 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1640 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1641 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1642 	}
1643 
1644 	memset(&inarg, 0, sizeof(inarg));
1645 	memset(&outarg, 0, sizeof(outarg));
1646 	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1647 	if (file) {
1648 		struct fuse_file *ff = file->private_data;
1649 		inarg.valid |= FATTR_FH;
1650 		inarg.fh = ff->fh;
1651 	}
1652 	if (attr->ia_valid & ATTR_SIZE) {
1653 		/* For mandatory locking in truncate */
1654 		inarg.valid |= FATTR_LOCKOWNER;
1655 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1656 	}
1657 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1658 	err = fuse_simple_request(fm, &args);
1659 	if (err) {
1660 		if (err == -EINTR)
1661 			fuse_invalidate_attr(inode);
1662 		goto error;
1663 	}
1664 
1665 	if (fuse_invalid_attr(&outarg.attr) ||
1666 	    (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1667 		make_bad_inode(inode);
1668 		err = -EIO;
1669 		goto error;
1670 	}
1671 
1672 	spin_lock(&fi->lock);
1673 	/* the kernel maintains i_mtime locally */
1674 	if (trust_local_cmtime) {
1675 		if (attr->ia_valid & ATTR_MTIME)
1676 			inode->i_mtime = attr->ia_mtime;
1677 		if (attr->ia_valid & ATTR_CTIME)
1678 			inode->i_ctime = attr->ia_ctime;
1679 		/* FIXME: clear I_DIRTY_SYNC? */
1680 	}
1681 
1682 	fuse_change_attributes_common(inode, &outarg.attr,
1683 				      attr_timeout(&outarg));
1684 	oldsize = inode->i_size;
1685 	/* see the comment in fuse_change_attributes() */
1686 	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1687 		i_size_write(inode, outarg.attr.size);
1688 
1689 	if (is_truncate) {
1690 		/* NOTE: this may release/reacquire fi->lock */
1691 		__fuse_release_nowrite(inode);
1692 	}
1693 	spin_unlock(&fi->lock);
1694 
1695 	/*
1696 	 * Only call invalidate_inode_pages2() after removing
1697 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1698 	 */
1699 	if ((is_truncate || !is_wb) &&
1700 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1701 		truncate_pagecache(inode, outarg.attr.size);
1702 		invalidate_inode_pages2(inode->i_mapping);
1703 	}
1704 
1705 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1706 out:
1707 	if (fault_blocked)
1708 		up_write(&fi->i_mmap_sem);
1709 
1710 	return 0;
1711 
1712 error:
1713 	if (is_truncate)
1714 		fuse_release_nowrite(inode);
1715 
1716 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1717 
1718 	if (fault_blocked)
1719 		up_write(&fi->i_mmap_sem);
1720 	return err;
1721 }
1722 
1723 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1724 {
1725 	struct inode *inode = d_inode(entry);
1726 	struct fuse_conn *fc = get_fuse_conn(inode);
1727 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1728 	int ret;
1729 
1730 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1731 		return -EACCES;
1732 
1733 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1734 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1735 				    ATTR_MODE);
1736 
1737 		/*
1738 		 * The only sane way to reliably kill suid/sgid is to do it in
1739 		 * the userspace filesystem
1740 		 *
1741 		 * This should be done on write(), truncate() and chown().
1742 		 */
1743 		if (!fc->handle_killpriv) {
1744 			/*
1745 			 * ia_mode calculation may have used stale i_mode.
1746 			 * Refresh and recalculate.
1747 			 */
1748 			ret = fuse_do_getattr(inode, NULL, file);
1749 			if (ret)
1750 				return ret;
1751 
1752 			attr->ia_mode = inode->i_mode;
1753 			if (inode->i_mode & S_ISUID) {
1754 				attr->ia_valid |= ATTR_MODE;
1755 				attr->ia_mode &= ~S_ISUID;
1756 			}
1757 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1758 				attr->ia_valid |= ATTR_MODE;
1759 				attr->ia_mode &= ~S_ISGID;
1760 			}
1761 		}
1762 	}
1763 	if (!attr->ia_valid)
1764 		return 0;
1765 
1766 	ret = fuse_do_setattr(entry, attr, file);
1767 	if (!ret) {
1768 		/*
1769 		 * If filesystem supports acls it may have updated acl xattrs in
1770 		 * the filesystem, so forget cached acls for the inode.
1771 		 */
1772 		if (fc->posix_acl)
1773 			forget_all_cached_acls(inode);
1774 
1775 		/* Directory mode changed, may need to revalidate access */
1776 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1777 			fuse_invalidate_entry_cache(entry);
1778 	}
1779 	return ret;
1780 }
1781 
1782 static int fuse_getattr(const struct path *path, struct kstat *stat,
1783 			u32 request_mask, unsigned int flags)
1784 {
1785 	struct inode *inode = d_inode(path->dentry);
1786 	struct fuse_conn *fc = get_fuse_conn(inode);
1787 
1788 	if (!fuse_allow_current_process(fc)) {
1789 		if (!request_mask) {
1790 			/*
1791 			 * If user explicitly requested *nothing* then don't
1792 			 * error out, but return st_dev only.
1793 			 */
1794 			stat->result_mask = 0;
1795 			stat->dev = inode->i_sb->s_dev;
1796 			return 0;
1797 		}
1798 		return -EACCES;
1799 	}
1800 
1801 	return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
1802 }
1803 
1804 static const struct inode_operations fuse_dir_inode_operations = {
1805 	.lookup		= fuse_lookup,
1806 	.mkdir		= fuse_mkdir,
1807 	.symlink	= fuse_symlink,
1808 	.unlink		= fuse_unlink,
1809 	.rmdir		= fuse_rmdir,
1810 	.rename		= fuse_rename2,
1811 	.link		= fuse_link,
1812 	.setattr	= fuse_setattr,
1813 	.create		= fuse_create,
1814 	.atomic_open	= fuse_atomic_open,
1815 	.mknod		= fuse_mknod,
1816 	.permission	= fuse_permission,
1817 	.getattr	= fuse_getattr,
1818 	.listxattr	= fuse_listxattr,
1819 	.get_acl	= fuse_get_acl,
1820 	.set_acl	= fuse_set_acl,
1821 };
1822 
1823 static const struct file_operations fuse_dir_operations = {
1824 	.llseek		= generic_file_llseek,
1825 	.read		= generic_read_dir,
1826 	.iterate_shared	= fuse_readdir,
1827 	.open		= fuse_dir_open,
1828 	.release	= fuse_dir_release,
1829 	.fsync		= fuse_dir_fsync,
1830 	.unlocked_ioctl	= fuse_dir_ioctl,
1831 	.compat_ioctl	= fuse_dir_compat_ioctl,
1832 };
1833 
1834 static const struct inode_operations fuse_common_inode_operations = {
1835 	.setattr	= fuse_setattr,
1836 	.permission	= fuse_permission,
1837 	.getattr	= fuse_getattr,
1838 	.listxattr	= fuse_listxattr,
1839 	.get_acl	= fuse_get_acl,
1840 	.set_acl	= fuse_set_acl,
1841 };
1842 
1843 static const struct inode_operations fuse_symlink_inode_operations = {
1844 	.setattr	= fuse_setattr,
1845 	.get_link	= fuse_get_link,
1846 	.getattr	= fuse_getattr,
1847 	.listxattr	= fuse_listxattr,
1848 };
1849 
1850 void fuse_init_common(struct inode *inode)
1851 {
1852 	inode->i_op = &fuse_common_inode_operations;
1853 }
1854 
1855 void fuse_init_dir(struct inode *inode)
1856 {
1857 	struct fuse_inode *fi = get_fuse_inode(inode);
1858 
1859 	inode->i_op = &fuse_dir_inode_operations;
1860 	inode->i_fop = &fuse_dir_operations;
1861 
1862 	spin_lock_init(&fi->rdc.lock);
1863 	fi->rdc.cached = false;
1864 	fi->rdc.size = 0;
1865 	fi->rdc.pos = 0;
1866 	fi->rdc.version = 0;
1867 }
1868 
1869 static int fuse_symlink_readpage(struct file *null, struct page *page)
1870 {
1871 	int err = fuse_readlink_page(page->mapping->host, page);
1872 
1873 	if (!err)
1874 		SetPageUptodate(page);
1875 
1876 	unlock_page(page);
1877 
1878 	return err;
1879 }
1880 
1881 static const struct address_space_operations fuse_symlink_aops = {
1882 	.readpage	= fuse_symlink_readpage,
1883 };
1884 
1885 void fuse_init_symlink(struct inode *inode)
1886 {
1887 	inode->i_op = &fuse_symlink_inode_operations;
1888 	inode->i_data.a_ops = &fuse_symlink_aops;
1889 	inode_nohighmem(inode);
1890 }
1891