xref: /openbmc/linux/fs/fuse/dir.c (revision 2e6ae11dd0d1c37f44cec51a58fb2092e55ed0f5)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/sched.h>
14 #include <linux/namei.h>
15 #include <linux/slab.h>
16 #include <linux/xattr.h>
17 #include <linux/posix_acl.h>
18 
19 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
20 {
21 	struct fuse_conn *fc = get_fuse_conn(dir);
22 	struct fuse_inode *fi = get_fuse_inode(dir);
23 
24 	if (!fc->do_readdirplus)
25 		return false;
26 	if (!fc->readdirplus_auto)
27 		return true;
28 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
29 		return true;
30 	if (ctx->pos == 0)
31 		return true;
32 	return false;
33 }
34 
35 static void fuse_advise_use_readdirplus(struct inode *dir)
36 {
37 	struct fuse_inode *fi = get_fuse_inode(dir);
38 
39 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
40 }
41 
42 union fuse_dentry {
43 	u64 time;
44 	struct rcu_head rcu;
45 };
46 
47 static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
48 {
49 	((union fuse_dentry *) entry->d_fsdata)->time = time;
50 }
51 
52 static inline u64 fuse_dentry_time(struct dentry *entry)
53 {
54 	return ((union fuse_dentry *) entry->d_fsdata)->time;
55 }
56 
57 /*
58  * FUSE caches dentries and attributes with separate timeout.  The
59  * time in jiffies until the dentry/attributes are valid is stored in
60  * dentry->d_fsdata and fuse_inode->i_time respectively.
61  */
62 
63 /*
64  * Calculate the time in jiffies until a dentry/attributes are valid
65  */
66 static u64 time_to_jiffies(u64 sec, u32 nsec)
67 {
68 	if (sec || nsec) {
69 		struct timespec64 ts = {
70 			sec,
71 			min_t(u32, nsec, NSEC_PER_SEC - 1)
72 		};
73 
74 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
75 	} else
76 		return 0;
77 }
78 
79 /*
80  * Set dentry and possibly attribute timeouts from the lookup/mk*
81  * replies
82  */
83 static void fuse_change_entry_timeout(struct dentry *entry,
84 				      struct fuse_entry_out *o)
85 {
86 	fuse_dentry_settime(entry,
87 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
88 }
89 
90 static u64 attr_timeout(struct fuse_attr_out *o)
91 {
92 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
93 }
94 
95 static u64 entry_attr_timeout(struct fuse_entry_out *o)
96 {
97 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
98 }
99 
100 /*
101  * Mark the attributes as stale, so that at the next call to
102  * ->getattr() they will be fetched from userspace
103  */
104 void fuse_invalidate_attr(struct inode *inode)
105 {
106 	get_fuse_inode(inode)->i_time = 0;
107 }
108 
109 /**
110  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
111  * atime is not used.
112  */
113 void fuse_invalidate_atime(struct inode *inode)
114 {
115 	if (!IS_RDONLY(inode))
116 		fuse_invalidate_attr(inode);
117 }
118 
119 /*
120  * Just mark the entry as stale, so that a next attempt to look it up
121  * will result in a new lookup call to userspace
122  *
123  * This is called when a dentry is about to become negative and the
124  * timeout is unknown (unlink, rmdir, rename and in some cases
125  * lookup)
126  */
127 void fuse_invalidate_entry_cache(struct dentry *entry)
128 {
129 	fuse_dentry_settime(entry, 0);
130 }
131 
132 /*
133  * Same as fuse_invalidate_entry_cache(), but also try to remove the
134  * dentry from the hash
135  */
136 static void fuse_invalidate_entry(struct dentry *entry)
137 {
138 	d_invalidate(entry);
139 	fuse_invalidate_entry_cache(entry);
140 }
141 
142 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
143 			     u64 nodeid, const struct qstr *name,
144 			     struct fuse_entry_out *outarg)
145 {
146 	memset(outarg, 0, sizeof(struct fuse_entry_out));
147 	args->in.h.opcode = FUSE_LOOKUP;
148 	args->in.h.nodeid = nodeid;
149 	args->in.numargs = 1;
150 	args->in.args[0].size = name->len + 1;
151 	args->in.args[0].value = name->name;
152 	args->out.numargs = 1;
153 	args->out.args[0].size = sizeof(struct fuse_entry_out);
154 	args->out.args[0].value = outarg;
155 }
156 
157 u64 fuse_get_attr_version(struct fuse_conn *fc)
158 {
159 	u64 curr_version;
160 
161 	/*
162 	 * The spin lock isn't actually needed on 64bit archs, but we
163 	 * don't yet care too much about such optimizations.
164 	 */
165 	spin_lock(&fc->lock);
166 	curr_version = fc->attr_version;
167 	spin_unlock(&fc->lock);
168 
169 	return curr_version;
170 }
171 
172 /*
173  * Check whether the dentry is still valid
174  *
175  * If the entry validity timeout has expired and the dentry is
176  * positive, try to redo the lookup.  If the lookup results in a
177  * different inode, then let the VFS invalidate the dentry and redo
178  * the lookup once more.  If the lookup results in the same inode,
179  * then refresh the attributes, timeouts and mark the dentry valid.
180  */
181 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
182 {
183 	struct inode *inode;
184 	struct dentry *parent;
185 	struct fuse_conn *fc;
186 	struct fuse_inode *fi;
187 	int ret;
188 
189 	inode = d_inode_rcu(entry);
190 	if (inode && is_bad_inode(inode))
191 		goto invalid;
192 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
193 		 (flags & LOOKUP_REVAL)) {
194 		struct fuse_entry_out outarg;
195 		FUSE_ARGS(args);
196 		struct fuse_forget_link *forget;
197 		u64 attr_version;
198 
199 		/* For negative dentries, always do a fresh lookup */
200 		if (!inode)
201 			goto invalid;
202 
203 		ret = -ECHILD;
204 		if (flags & LOOKUP_RCU)
205 			goto out;
206 
207 		fc = get_fuse_conn(inode);
208 
209 		forget = fuse_alloc_forget();
210 		ret = -ENOMEM;
211 		if (!forget)
212 			goto out;
213 
214 		attr_version = fuse_get_attr_version(fc);
215 
216 		parent = dget_parent(entry);
217 		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
218 				 &entry->d_name, &outarg);
219 		ret = fuse_simple_request(fc, &args);
220 		dput(parent);
221 		/* Zero nodeid is same as -ENOENT */
222 		if (!ret && !outarg.nodeid)
223 			ret = -ENOENT;
224 		if (!ret) {
225 			fi = get_fuse_inode(inode);
226 			if (outarg.nodeid != get_node_id(inode)) {
227 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
228 				goto invalid;
229 			}
230 			spin_lock(&fc->lock);
231 			fi->nlookup++;
232 			spin_unlock(&fc->lock);
233 		}
234 		kfree(forget);
235 		if (ret == -ENOMEM)
236 			goto out;
237 		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
238 			goto invalid;
239 
240 		forget_all_cached_acls(inode);
241 		fuse_change_attributes(inode, &outarg.attr,
242 				       entry_attr_timeout(&outarg),
243 				       attr_version);
244 		fuse_change_entry_timeout(entry, &outarg);
245 	} else if (inode) {
246 		fi = get_fuse_inode(inode);
247 		if (flags & LOOKUP_RCU) {
248 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
249 				return -ECHILD;
250 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
251 			parent = dget_parent(entry);
252 			fuse_advise_use_readdirplus(d_inode(parent));
253 			dput(parent);
254 		}
255 	}
256 	ret = 1;
257 out:
258 	return ret;
259 
260 invalid:
261 	ret = 0;
262 	goto out;
263 }
264 
265 static int invalid_nodeid(u64 nodeid)
266 {
267 	return !nodeid || nodeid == FUSE_ROOT_ID;
268 }
269 
270 static int fuse_dentry_init(struct dentry *dentry)
271 {
272 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
273 
274 	return dentry->d_fsdata ? 0 : -ENOMEM;
275 }
276 static void fuse_dentry_release(struct dentry *dentry)
277 {
278 	union fuse_dentry *fd = dentry->d_fsdata;
279 
280 	kfree_rcu(fd, rcu);
281 }
282 
283 const struct dentry_operations fuse_dentry_operations = {
284 	.d_revalidate	= fuse_dentry_revalidate,
285 	.d_init		= fuse_dentry_init,
286 	.d_release	= fuse_dentry_release,
287 };
288 
289 const struct dentry_operations fuse_root_dentry_operations = {
290 	.d_init		= fuse_dentry_init,
291 	.d_release	= fuse_dentry_release,
292 };
293 
294 int fuse_valid_type(int m)
295 {
296 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
297 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
298 }
299 
300 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
301 		     struct fuse_entry_out *outarg, struct inode **inode)
302 {
303 	struct fuse_conn *fc = get_fuse_conn_super(sb);
304 	FUSE_ARGS(args);
305 	struct fuse_forget_link *forget;
306 	u64 attr_version;
307 	int err;
308 
309 	*inode = NULL;
310 	err = -ENAMETOOLONG;
311 	if (name->len > FUSE_NAME_MAX)
312 		goto out;
313 
314 
315 	forget = fuse_alloc_forget();
316 	err = -ENOMEM;
317 	if (!forget)
318 		goto out;
319 
320 	attr_version = fuse_get_attr_version(fc);
321 
322 	fuse_lookup_init(fc, &args, nodeid, name, outarg);
323 	err = fuse_simple_request(fc, &args);
324 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
325 	if (err || !outarg->nodeid)
326 		goto out_put_forget;
327 
328 	err = -EIO;
329 	if (!outarg->nodeid)
330 		goto out_put_forget;
331 	if (!fuse_valid_type(outarg->attr.mode))
332 		goto out_put_forget;
333 
334 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
335 			   &outarg->attr, entry_attr_timeout(outarg),
336 			   attr_version);
337 	err = -ENOMEM;
338 	if (!*inode) {
339 		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
340 		goto out;
341 	}
342 	err = 0;
343 
344  out_put_forget:
345 	kfree(forget);
346  out:
347 	return err;
348 }
349 
350 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
351 				  unsigned int flags)
352 {
353 	int err;
354 	struct fuse_entry_out outarg;
355 	struct inode *inode;
356 	struct dentry *newent;
357 	bool outarg_valid = true;
358 	bool locked;
359 
360 	locked = fuse_lock_inode(dir);
361 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
362 			       &outarg, &inode);
363 	fuse_unlock_inode(dir, locked);
364 	if (err == -ENOENT) {
365 		outarg_valid = false;
366 		err = 0;
367 	}
368 	if (err)
369 		goto out_err;
370 
371 	err = -EIO;
372 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
373 		goto out_iput;
374 
375 	newent = d_splice_alias(inode, entry);
376 	err = PTR_ERR(newent);
377 	if (IS_ERR(newent))
378 		goto out_err;
379 
380 	entry = newent ? newent : entry;
381 	if (outarg_valid)
382 		fuse_change_entry_timeout(entry, &outarg);
383 	else
384 		fuse_invalidate_entry_cache(entry);
385 
386 	fuse_advise_use_readdirplus(dir);
387 	return newent;
388 
389  out_iput:
390 	iput(inode);
391  out_err:
392 	return ERR_PTR(err);
393 }
394 
395 /*
396  * Atomic create+open operation
397  *
398  * If the filesystem doesn't support this, then fall back to separate
399  * 'mknod' + 'open' requests.
400  */
401 static int fuse_create_open(struct inode *dir, struct dentry *entry,
402 			    struct file *file, unsigned flags,
403 			    umode_t mode)
404 {
405 	int err;
406 	struct inode *inode;
407 	struct fuse_conn *fc = get_fuse_conn(dir);
408 	FUSE_ARGS(args);
409 	struct fuse_forget_link *forget;
410 	struct fuse_create_in inarg;
411 	struct fuse_open_out outopen;
412 	struct fuse_entry_out outentry;
413 	struct fuse_file *ff;
414 
415 	/* Userspace expects S_IFREG in create mode */
416 	BUG_ON((mode & S_IFMT) != S_IFREG);
417 
418 	forget = fuse_alloc_forget();
419 	err = -ENOMEM;
420 	if (!forget)
421 		goto out_err;
422 
423 	err = -ENOMEM;
424 	ff = fuse_file_alloc(fc);
425 	if (!ff)
426 		goto out_put_forget_req;
427 
428 	if (!fc->dont_mask)
429 		mode &= ~current_umask();
430 
431 	flags &= ~O_NOCTTY;
432 	memset(&inarg, 0, sizeof(inarg));
433 	memset(&outentry, 0, sizeof(outentry));
434 	inarg.flags = flags;
435 	inarg.mode = mode;
436 	inarg.umask = current_umask();
437 	args.in.h.opcode = FUSE_CREATE;
438 	args.in.h.nodeid = get_node_id(dir);
439 	args.in.numargs = 2;
440 	args.in.args[0].size = sizeof(inarg);
441 	args.in.args[0].value = &inarg;
442 	args.in.args[1].size = entry->d_name.len + 1;
443 	args.in.args[1].value = entry->d_name.name;
444 	args.out.numargs = 2;
445 	args.out.args[0].size = sizeof(outentry);
446 	args.out.args[0].value = &outentry;
447 	args.out.args[1].size = sizeof(outopen);
448 	args.out.args[1].value = &outopen;
449 	err = fuse_simple_request(fc, &args);
450 	if (err)
451 		goto out_free_ff;
452 
453 	err = -EIO;
454 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
455 		goto out_free_ff;
456 
457 	ff->fh = outopen.fh;
458 	ff->nodeid = outentry.nodeid;
459 	ff->open_flags = outopen.open_flags;
460 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
461 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
462 	if (!inode) {
463 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
464 		fuse_sync_release(ff, flags);
465 		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
466 		err = -ENOMEM;
467 		goto out_err;
468 	}
469 	kfree(forget);
470 	d_instantiate(entry, inode);
471 	fuse_change_entry_timeout(entry, &outentry);
472 	fuse_invalidate_attr(dir);
473 	err = finish_open(file, entry, generic_file_open);
474 	if (err) {
475 		fuse_sync_release(ff, flags);
476 	} else {
477 		file->private_data = ff;
478 		fuse_finish_open(inode, file);
479 	}
480 	return err;
481 
482 out_free_ff:
483 	fuse_file_free(ff);
484 out_put_forget_req:
485 	kfree(forget);
486 out_err:
487 	return err;
488 }
489 
490 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
491 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
492 			    struct file *file, unsigned flags,
493 			    umode_t mode)
494 {
495 	int err;
496 	struct fuse_conn *fc = get_fuse_conn(dir);
497 	struct dentry *res = NULL;
498 
499 	if (d_in_lookup(entry)) {
500 		res = fuse_lookup(dir, entry, 0);
501 		if (IS_ERR(res))
502 			return PTR_ERR(res);
503 
504 		if (res)
505 			entry = res;
506 	}
507 
508 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
509 		goto no_open;
510 
511 	/* Only creates */
512 	file->f_mode |= FMODE_CREATED;
513 
514 	if (fc->no_create)
515 		goto mknod;
516 
517 	err = fuse_create_open(dir, entry, file, flags, mode);
518 	if (err == -ENOSYS) {
519 		fc->no_create = 1;
520 		goto mknod;
521 	}
522 out_dput:
523 	dput(res);
524 	return err;
525 
526 mknod:
527 	err = fuse_mknod(dir, entry, mode, 0);
528 	if (err)
529 		goto out_dput;
530 no_open:
531 	return finish_no_open(file, res);
532 }
533 
534 /*
535  * Code shared between mknod, mkdir, symlink and link
536  */
537 static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
538 			    struct inode *dir, struct dentry *entry,
539 			    umode_t mode)
540 {
541 	struct fuse_entry_out outarg;
542 	struct inode *inode;
543 	struct dentry *d;
544 	int err;
545 	struct fuse_forget_link *forget;
546 
547 	forget = fuse_alloc_forget();
548 	if (!forget)
549 		return -ENOMEM;
550 
551 	memset(&outarg, 0, sizeof(outarg));
552 	args->in.h.nodeid = get_node_id(dir);
553 	args->out.numargs = 1;
554 	args->out.args[0].size = sizeof(outarg);
555 	args->out.args[0].value = &outarg;
556 	err = fuse_simple_request(fc, args);
557 	if (err)
558 		goto out_put_forget_req;
559 
560 	err = -EIO;
561 	if (invalid_nodeid(outarg.nodeid))
562 		goto out_put_forget_req;
563 
564 	if ((outarg.attr.mode ^ mode) & S_IFMT)
565 		goto out_put_forget_req;
566 
567 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
568 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
569 	if (!inode) {
570 		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
571 		return -ENOMEM;
572 	}
573 	kfree(forget);
574 
575 	d_drop(entry);
576 	d = d_splice_alias(inode, entry);
577 	if (IS_ERR(d))
578 		return PTR_ERR(d);
579 
580 	if (d) {
581 		fuse_change_entry_timeout(d, &outarg);
582 		dput(d);
583 	} else {
584 		fuse_change_entry_timeout(entry, &outarg);
585 	}
586 	fuse_invalidate_attr(dir);
587 	return 0;
588 
589  out_put_forget_req:
590 	kfree(forget);
591 	return err;
592 }
593 
594 static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
595 		      dev_t rdev)
596 {
597 	struct fuse_mknod_in inarg;
598 	struct fuse_conn *fc = get_fuse_conn(dir);
599 	FUSE_ARGS(args);
600 
601 	if (!fc->dont_mask)
602 		mode &= ~current_umask();
603 
604 	memset(&inarg, 0, sizeof(inarg));
605 	inarg.mode = mode;
606 	inarg.rdev = new_encode_dev(rdev);
607 	inarg.umask = current_umask();
608 	args.in.h.opcode = FUSE_MKNOD;
609 	args.in.numargs = 2;
610 	args.in.args[0].size = sizeof(inarg);
611 	args.in.args[0].value = &inarg;
612 	args.in.args[1].size = entry->d_name.len + 1;
613 	args.in.args[1].value = entry->d_name.name;
614 	return create_new_entry(fc, &args, dir, entry, mode);
615 }
616 
617 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
618 		       bool excl)
619 {
620 	return fuse_mknod(dir, entry, mode, 0);
621 }
622 
623 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
624 {
625 	struct fuse_mkdir_in inarg;
626 	struct fuse_conn *fc = get_fuse_conn(dir);
627 	FUSE_ARGS(args);
628 
629 	if (!fc->dont_mask)
630 		mode &= ~current_umask();
631 
632 	memset(&inarg, 0, sizeof(inarg));
633 	inarg.mode = mode;
634 	inarg.umask = current_umask();
635 	args.in.h.opcode = FUSE_MKDIR;
636 	args.in.numargs = 2;
637 	args.in.args[0].size = sizeof(inarg);
638 	args.in.args[0].value = &inarg;
639 	args.in.args[1].size = entry->d_name.len + 1;
640 	args.in.args[1].value = entry->d_name.name;
641 	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
642 }
643 
644 static int fuse_symlink(struct inode *dir, struct dentry *entry,
645 			const char *link)
646 {
647 	struct fuse_conn *fc = get_fuse_conn(dir);
648 	unsigned len = strlen(link) + 1;
649 	FUSE_ARGS(args);
650 
651 	args.in.h.opcode = FUSE_SYMLINK;
652 	args.in.numargs = 2;
653 	args.in.args[0].size = entry->d_name.len + 1;
654 	args.in.args[0].value = entry->d_name.name;
655 	args.in.args[1].size = len;
656 	args.in.args[1].value = link;
657 	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
658 }
659 
660 void fuse_update_ctime(struct inode *inode)
661 {
662 	if (!IS_NOCMTIME(inode)) {
663 		inode->i_ctime = current_time(inode);
664 		mark_inode_dirty_sync(inode);
665 	}
666 }
667 
668 static int fuse_unlink(struct inode *dir, struct dentry *entry)
669 {
670 	int err;
671 	struct fuse_conn *fc = get_fuse_conn(dir);
672 	FUSE_ARGS(args);
673 
674 	args.in.h.opcode = FUSE_UNLINK;
675 	args.in.h.nodeid = get_node_id(dir);
676 	args.in.numargs = 1;
677 	args.in.args[0].size = entry->d_name.len + 1;
678 	args.in.args[0].value = entry->d_name.name;
679 	err = fuse_simple_request(fc, &args);
680 	if (!err) {
681 		struct inode *inode = d_inode(entry);
682 		struct fuse_inode *fi = get_fuse_inode(inode);
683 
684 		spin_lock(&fc->lock);
685 		fi->attr_version = ++fc->attr_version;
686 		/*
687 		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
688 		 * happen if userspace filesystem is careless.  It would be
689 		 * difficult to enforce correct nlink usage so just ignore this
690 		 * condition here
691 		 */
692 		if (inode->i_nlink > 0)
693 			drop_nlink(inode);
694 		spin_unlock(&fc->lock);
695 		fuse_invalidate_attr(inode);
696 		fuse_invalidate_attr(dir);
697 		fuse_invalidate_entry_cache(entry);
698 		fuse_update_ctime(inode);
699 	} else if (err == -EINTR)
700 		fuse_invalidate_entry(entry);
701 	return err;
702 }
703 
704 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
705 {
706 	int err;
707 	struct fuse_conn *fc = get_fuse_conn(dir);
708 	FUSE_ARGS(args);
709 
710 	args.in.h.opcode = FUSE_RMDIR;
711 	args.in.h.nodeid = get_node_id(dir);
712 	args.in.numargs = 1;
713 	args.in.args[0].size = entry->d_name.len + 1;
714 	args.in.args[0].value = entry->d_name.name;
715 	err = fuse_simple_request(fc, &args);
716 	if (!err) {
717 		clear_nlink(d_inode(entry));
718 		fuse_invalidate_attr(dir);
719 		fuse_invalidate_entry_cache(entry);
720 	} else if (err == -EINTR)
721 		fuse_invalidate_entry(entry);
722 	return err;
723 }
724 
725 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
726 			      struct inode *newdir, struct dentry *newent,
727 			      unsigned int flags, int opcode, size_t argsize)
728 {
729 	int err;
730 	struct fuse_rename2_in inarg;
731 	struct fuse_conn *fc = get_fuse_conn(olddir);
732 	FUSE_ARGS(args);
733 
734 	memset(&inarg, 0, argsize);
735 	inarg.newdir = get_node_id(newdir);
736 	inarg.flags = flags;
737 	args.in.h.opcode = opcode;
738 	args.in.h.nodeid = get_node_id(olddir);
739 	args.in.numargs = 3;
740 	args.in.args[0].size = argsize;
741 	args.in.args[0].value = &inarg;
742 	args.in.args[1].size = oldent->d_name.len + 1;
743 	args.in.args[1].value = oldent->d_name.name;
744 	args.in.args[2].size = newent->d_name.len + 1;
745 	args.in.args[2].value = newent->d_name.name;
746 	err = fuse_simple_request(fc, &args);
747 	if (!err) {
748 		/* ctime changes */
749 		fuse_invalidate_attr(d_inode(oldent));
750 		fuse_update_ctime(d_inode(oldent));
751 
752 		if (flags & RENAME_EXCHANGE) {
753 			fuse_invalidate_attr(d_inode(newent));
754 			fuse_update_ctime(d_inode(newent));
755 		}
756 
757 		fuse_invalidate_attr(olddir);
758 		if (olddir != newdir)
759 			fuse_invalidate_attr(newdir);
760 
761 		/* newent will end up negative */
762 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
763 			fuse_invalidate_attr(d_inode(newent));
764 			fuse_invalidate_entry_cache(newent);
765 			fuse_update_ctime(d_inode(newent));
766 		}
767 	} else if (err == -EINTR) {
768 		/* If request was interrupted, DEITY only knows if the
769 		   rename actually took place.  If the invalidation
770 		   fails (e.g. some process has CWD under the renamed
771 		   directory), then there can be inconsistency between
772 		   the dcache and the real filesystem.  Tough luck. */
773 		fuse_invalidate_entry(oldent);
774 		if (d_really_is_positive(newent))
775 			fuse_invalidate_entry(newent);
776 	}
777 
778 	return err;
779 }
780 
781 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
782 			struct inode *newdir, struct dentry *newent,
783 			unsigned int flags)
784 {
785 	struct fuse_conn *fc = get_fuse_conn(olddir);
786 	int err;
787 
788 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
789 		return -EINVAL;
790 
791 	if (flags) {
792 		if (fc->no_rename2 || fc->minor < 23)
793 			return -EINVAL;
794 
795 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
796 					 FUSE_RENAME2,
797 					 sizeof(struct fuse_rename2_in));
798 		if (err == -ENOSYS) {
799 			fc->no_rename2 = 1;
800 			err = -EINVAL;
801 		}
802 	} else {
803 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
804 					 FUSE_RENAME,
805 					 sizeof(struct fuse_rename_in));
806 	}
807 
808 	return err;
809 }
810 
811 static int fuse_link(struct dentry *entry, struct inode *newdir,
812 		     struct dentry *newent)
813 {
814 	int err;
815 	struct fuse_link_in inarg;
816 	struct inode *inode = d_inode(entry);
817 	struct fuse_conn *fc = get_fuse_conn(inode);
818 	FUSE_ARGS(args);
819 
820 	memset(&inarg, 0, sizeof(inarg));
821 	inarg.oldnodeid = get_node_id(inode);
822 	args.in.h.opcode = FUSE_LINK;
823 	args.in.numargs = 2;
824 	args.in.args[0].size = sizeof(inarg);
825 	args.in.args[0].value = &inarg;
826 	args.in.args[1].size = newent->d_name.len + 1;
827 	args.in.args[1].value = newent->d_name.name;
828 	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
829 	/* Contrary to "normal" filesystems it can happen that link
830 	   makes two "logical" inodes point to the same "physical"
831 	   inode.  We invalidate the attributes of the old one, so it
832 	   will reflect changes in the backing inode (link count,
833 	   etc.)
834 	*/
835 	if (!err) {
836 		struct fuse_inode *fi = get_fuse_inode(inode);
837 
838 		spin_lock(&fc->lock);
839 		fi->attr_version = ++fc->attr_version;
840 		inc_nlink(inode);
841 		spin_unlock(&fc->lock);
842 		fuse_invalidate_attr(inode);
843 		fuse_update_ctime(inode);
844 	} else if (err == -EINTR) {
845 		fuse_invalidate_attr(inode);
846 	}
847 	return err;
848 }
849 
850 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
851 			  struct kstat *stat)
852 {
853 	unsigned int blkbits;
854 	struct fuse_conn *fc = get_fuse_conn(inode);
855 
856 	/* see the comment in fuse_change_attributes() */
857 	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
858 		attr->size = i_size_read(inode);
859 		attr->mtime = inode->i_mtime.tv_sec;
860 		attr->mtimensec = inode->i_mtime.tv_nsec;
861 		attr->ctime = inode->i_ctime.tv_sec;
862 		attr->ctimensec = inode->i_ctime.tv_nsec;
863 	}
864 
865 	stat->dev = inode->i_sb->s_dev;
866 	stat->ino = attr->ino;
867 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
868 	stat->nlink = attr->nlink;
869 	stat->uid = make_kuid(fc->user_ns, attr->uid);
870 	stat->gid = make_kgid(fc->user_ns, attr->gid);
871 	stat->rdev = inode->i_rdev;
872 	stat->atime.tv_sec = attr->atime;
873 	stat->atime.tv_nsec = attr->atimensec;
874 	stat->mtime.tv_sec = attr->mtime;
875 	stat->mtime.tv_nsec = attr->mtimensec;
876 	stat->ctime.tv_sec = attr->ctime;
877 	stat->ctime.tv_nsec = attr->ctimensec;
878 	stat->size = attr->size;
879 	stat->blocks = attr->blocks;
880 
881 	if (attr->blksize != 0)
882 		blkbits = ilog2(attr->blksize);
883 	else
884 		blkbits = inode->i_sb->s_blocksize_bits;
885 
886 	stat->blksize = 1 << blkbits;
887 }
888 
889 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
890 			   struct file *file)
891 {
892 	int err;
893 	struct fuse_getattr_in inarg;
894 	struct fuse_attr_out outarg;
895 	struct fuse_conn *fc = get_fuse_conn(inode);
896 	FUSE_ARGS(args);
897 	u64 attr_version;
898 
899 	attr_version = fuse_get_attr_version(fc);
900 
901 	memset(&inarg, 0, sizeof(inarg));
902 	memset(&outarg, 0, sizeof(outarg));
903 	/* Directories have separate file-handle space */
904 	if (file && S_ISREG(inode->i_mode)) {
905 		struct fuse_file *ff = file->private_data;
906 
907 		inarg.getattr_flags |= FUSE_GETATTR_FH;
908 		inarg.fh = ff->fh;
909 	}
910 	args.in.h.opcode = FUSE_GETATTR;
911 	args.in.h.nodeid = get_node_id(inode);
912 	args.in.numargs = 1;
913 	args.in.args[0].size = sizeof(inarg);
914 	args.in.args[0].value = &inarg;
915 	args.out.numargs = 1;
916 	args.out.args[0].size = sizeof(outarg);
917 	args.out.args[0].value = &outarg;
918 	err = fuse_simple_request(fc, &args);
919 	if (!err) {
920 		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
921 			make_bad_inode(inode);
922 			err = -EIO;
923 		} else {
924 			fuse_change_attributes(inode, &outarg.attr,
925 					       attr_timeout(&outarg),
926 					       attr_version);
927 			if (stat)
928 				fuse_fillattr(inode, &outarg.attr, stat);
929 		}
930 	}
931 	return err;
932 }
933 
934 static int fuse_update_get_attr(struct inode *inode, struct file *file,
935 				struct kstat *stat, unsigned int flags)
936 {
937 	struct fuse_inode *fi = get_fuse_inode(inode);
938 	int err = 0;
939 	bool sync;
940 
941 	if (flags & AT_STATX_FORCE_SYNC)
942 		sync = true;
943 	else if (flags & AT_STATX_DONT_SYNC)
944 		sync = false;
945 	else
946 		sync = time_before64(fi->i_time, get_jiffies_64());
947 
948 	if (sync) {
949 		forget_all_cached_acls(inode);
950 		err = fuse_do_getattr(inode, stat, file);
951 	} else if (stat) {
952 		generic_fillattr(inode, stat);
953 		stat->mode = fi->orig_i_mode;
954 		stat->ino = fi->orig_ino;
955 	}
956 
957 	return err;
958 }
959 
960 int fuse_update_attributes(struct inode *inode, struct file *file)
961 {
962 	return fuse_update_get_attr(inode, file, NULL, 0);
963 }
964 
965 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
966 			     u64 child_nodeid, struct qstr *name)
967 {
968 	int err = -ENOTDIR;
969 	struct inode *parent;
970 	struct dentry *dir;
971 	struct dentry *entry;
972 
973 	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
974 	if (!parent)
975 		return -ENOENT;
976 
977 	inode_lock(parent);
978 	if (!S_ISDIR(parent->i_mode))
979 		goto unlock;
980 
981 	err = -ENOENT;
982 	dir = d_find_alias(parent);
983 	if (!dir)
984 		goto unlock;
985 
986 	name->hash = full_name_hash(dir, name->name, name->len);
987 	entry = d_lookup(dir, name);
988 	dput(dir);
989 	if (!entry)
990 		goto unlock;
991 
992 	fuse_invalidate_attr(parent);
993 	fuse_invalidate_entry(entry);
994 
995 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
996 		inode_lock(d_inode(entry));
997 		if (get_node_id(d_inode(entry)) != child_nodeid) {
998 			err = -ENOENT;
999 			goto badentry;
1000 		}
1001 		if (d_mountpoint(entry)) {
1002 			err = -EBUSY;
1003 			goto badentry;
1004 		}
1005 		if (d_is_dir(entry)) {
1006 			shrink_dcache_parent(entry);
1007 			if (!simple_empty(entry)) {
1008 				err = -ENOTEMPTY;
1009 				goto badentry;
1010 			}
1011 			d_inode(entry)->i_flags |= S_DEAD;
1012 		}
1013 		dont_mount(entry);
1014 		clear_nlink(d_inode(entry));
1015 		err = 0;
1016  badentry:
1017 		inode_unlock(d_inode(entry));
1018 		if (!err)
1019 			d_delete(entry);
1020 	} else {
1021 		err = 0;
1022 	}
1023 	dput(entry);
1024 
1025  unlock:
1026 	inode_unlock(parent);
1027 	iput(parent);
1028 	return err;
1029 }
1030 
1031 /*
1032  * Calling into a user-controlled filesystem gives the filesystem
1033  * daemon ptrace-like capabilities over the current process.  This
1034  * means, that the filesystem daemon is able to record the exact
1035  * filesystem operations performed, and can also control the behavior
1036  * of the requester process in otherwise impossible ways.  For example
1037  * it can delay the operation for arbitrary length of time allowing
1038  * DoS against the requester.
1039  *
1040  * For this reason only those processes can call into the filesystem,
1041  * for which the owner of the mount has ptrace privilege.  This
1042  * excludes processes started by other users, suid or sgid processes.
1043  */
1044 int fuse_allow_current_process(struct fuse_conn *fc)
1045 {
1046 	const struct cred *cred;
1047 
1048 	if (fc->allow_other)
1049 		return current_in_userns(fc->user_ns);
1050 
1051 	cred = current_cred();
1052 	if (uid_eq(cred->euid, fc->user_id) &&
1053 	    uid_eq(cred->suid, fc->user_id) &&
1054 	    uid_eq(cred->uid,  fc->user_id) &&
1055 	    gid_eq(cred->egid, fc->group_id) &&
1056 	    gid_eq(cred->sgid, fc->group_id) &&
1057 	    gid_eq(cred->gid,  fc->group_id))
1058 		return 1;
1059 
1060 	return 0;
1061 }
1062 
1063 static int fuse_access(struct inode *inode, int mask)
1064 {
1065 	struct fuse_conn *fc = get_fuse_conn(inode);
1066 	FUSE_ARGS(args);
1067 	struct fuse_access_in inarg;
1068 	int err;
1069 
1070 	BUG_ON(mask & MAY_NOT_BLOCK);
1071 
1072 	if (fc->no_access)
1073 		return 0;
1074 
1075 	memset(&inarg, 0, sizeof(inarg));
1076 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1077 	args.in.h.opcode = FUSE_ACCESS;
1078 	args.in.h.nodeid = get_node_id(inode);
1079 	args.in.numargs = 1;
1080 	args.in.args[0].size = sizeof(inarg);
1081 	args.in.args[0].value = &inarg;
1082 	err = fuse_simple_request(fc, &args);
1083 	if (err == -ENOSYS) {
1084 		fc->no_access = 1;
1085 		err = 0;
1086 	}
1087 	return err;
1088 }
1089 
1090 static int fuse_perm_getattr(struct inode *inode, int mask)
1091 {
1092 	if (mask & MAY_NOT_BLOCK)
1093 		return -ECHILD;
1094 
1095 	forget_all_cached_acls(inode);
1096 	return fuse_do_getattr(inode, NULL, NULL);
1097 }
1098 
1099 /*
1100  * Check permission.  The two basic access models of FUSE are:
1101  *
1102  * 1) Local access checking ('default_permissions' mount option) based
1103  * on file mode.  This is the plain old disk filesystem permission
1104  * modell.
1105  *
1106  * 2) "Remote" access checking, where server is responsible for
1107  * checking permission in each inode operation.  An exception to this
1108  * is if ->permission() was invoked from sys_access() in which case an
1109  * access request is sent.  Execute permission is still checked
1110  * locally based on file mode.
1111  */
1112 static int fuse_permission(struct inode *inode, int mask)
1113 {
1114 	struct fuse_conn *fc = get_fuse_conn(inode);
1115 	bool refreshed = false;
1116 	int err = 0;
1117 
1118 	if (!fuse_allow_current_process(fc))
1119 		return -EACCES;
1120 
1121 	/*
1122 	 * If attributes are needed, refresh them before proceeding
1123 	 */
1124 	if (fc->default_permissions ||
1125 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1126 		struct fuse_inode *fi = get_fuse_inode(inode);
1127 
1128 		if (time_before64(fi->i_time, get_jiffies_64())) {
1129 			refreshed = true;
1130 
1131 			err = fuse_perm_getattr(inode, mask);
1132 			if (err)
1133 				return err;
1134 		}
1135 	}
1136 
1137 	if (fc->default_permissions) {
1138 		err = generic_permission(inode, mask);
1139 
1140 		/* If permission is denied, try to refresh file
1141 		   attributes.  This is also needed, because the root
1142 		   node will at first have no permissions */
1143 		if (err == -EACCES && !refreshed) {
1144 			err = fuse_perm_getattr(inode, mask);
1145 			if (!err)
1146 				err = generic_permission(inode, mask);
1147 		}
1148 
1149 		/* Note: the opposite of the above test does not
1150 		   exist.  So if permissions are revoked this won't be
1151 		   noticed immediately, only after the attribute
1152 		   timeout has expired */
1153 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1154 		err = fuse_access(inode, mask);
1155 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1156 		if (!(inode->i_mode & S_IXUGO)) {
1157 			if (refreshed)
1158 				return -EACCES;
1159 
1160 			err = fuse_perm_getattr(inode, mask);
1161 			if (!err && !(inode->i_mode & S_IXUGO))
1162 				return -EACCES;
1163 		}
1164 	}
1165 	return err;
1166 }
1167 
1168 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1169 			 struct dir_context *ctx)
1170 {
1171 	while (nbytes >= FUSE_NAME_OFFSET) {
1172 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1173 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
1174 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1175 			return -EIO;
1176 		if (reclen > nbytes)
1177 			break;
1178 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1179 			return -EIO;
1180 
1181 		if (!dir_emit(ctx, dirent->name, dirent->namelen,
1182 			       dirent->ino, dirent->type))
1183 			break;
1184 
1185 		buf += reclen;
1186 		nbytes -= reclen;
1187 		ctx->pos = dirent->off;
1188 	}
1189 
1190 	return 0;
1191 }
1192 
1193 static int fuse_direntplus_link(struct file *file,
1194 				struct fuse_direntplus *direntplus,
1195 				u64 attr_version)
1196 {
1197 	struct fuse_entry_out *o = &direntplus->entry_out;
1198 	struct fuse_dirent *dirent = &direntplus->dirent;
1199 	struct dentry *parent = file->f_path.dentry;
1200 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1201 	struct dentry *dentry;
1202 	struct dentry *alias;
1203 	struct inode *dir = d_inode(parent);
1204 	struct fuse_conn *fc;
1205 	struct inode *inode;
1206 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1207 
1208 	if (!o->nodeid) {
1209 		/*
1210 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1211 		 * ENOENT. Instead, it only means the userspace filesystem did
1212 		 * not want to return attributes/handle for this entry.
1213 		 *
1214 		 * So do nothing.
1215 		 */
1216 		return 0;
1217 	}
1218 
1219 	if (name.name[0] == '.') {
1220 		/*
1221 		 * We could potentially refresh the attributes of the directory
1222 		 * and its parent?
1223 		 */
1224 		if (name.len == 1)
1225 			return 0;
1226 		if (name.name[1] == '.' && name.len == 2)
1227 			return 0;
1228 	}
1229 
1230 	if (invalid_nodeid(o->nodeid))
1231 		return -EIO;
1232 	if (!fuse_valid_type(o->attr.mode))
1233 		return -EIO;
1234 
1235 	fc = get_fuse_conn(dir);
1236 
1237 	name.hash = full_name_hash(parent, name.name, name.len);
1238 	dentry = d_lookup(parent, &name);
1239 	if (!dentry) {
1240 retry:
1241 		dentry = d_alloc_parallel(parent, &name, &wq);
1242 		if (IS_ERR(dentry))
1243 			return PTR_ERR(dentry);
1244 	}
1245 	if (!d_in_lookup(dentry)) {
1246 		struct fuse_inode *fi;
1247 		inode = d_inode(dentry);
1248 		if (!inode ||
1249 		    get_node_id(inode) != o->nodeid ||
1250 		    ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1251 			d_invalidate(dentry);
1252 			dput(dentry);
1253 			goto retry;
1254 		}
1255 		if (is_bad_inode(inode)) {
1256 			dput(dentry);
1257 			return -EIO;
1258 		}
1259 
1260 		fi = get_fuse_inode(inode);
1261 		spin_lock(&fc->lock);
1262 		fi->nlookup++;
1263 		spin_unlock(&fc->lock);
1264 
1265 		forget_all_cached_acls(inode);
1266 		fuse_change_attributes(inode, &o->attr,
1267 				       entry_attr_timeout(o),
1268 				       attr_version);
1269 		/*
1270 		 * The other branch comes via fuse_iget()
1271 		 * which bumps nlookup inside
1272 		 */
1273 	} else {
1274 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1275 				  &o->attr, entry_attr_timeout(o),
1276 				  attr_version);
1277 		if (!inode)
1278 			inode = ERR_PTR(-ENOMEM);
1279 
1280 		alias = d_splice_alias(inode, dentry);
1281 		d_lookup_done(dentry);
1282 		if (alias) {
1283 			dput(dentry);
1284 			dentry = alias;
1285 		}
1286 		if (IS_ERR(dentry))
1287 			return PTR_ERR(dentry);
1288 	}
1289 	if (fc->readdirplus_auto)
1290 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1291 	fuse_change_entry_timeout(dentry, o);
1292 
1293 	dput(dentry);
1294 	return 0;
1295 }
1296 
1297 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1298 			     struct dir_context *ctx, u64 attr_version)
1299 {
1300 	struct fuse_direntplus *direntplus;
1301 	struct fuse_dirent *dirent;
1302 	size_t reclen;
1303 	int over = 0;
1304 	int ret;
1305 
1306 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1307 		direntplus = (struct fuse_direntplus *) buf;
1308 		dirent = &direntplus->dirent;
1309 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1310 
1311 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1312 			return -EIO;
1313 		if (reclen > nbytes)
1314 			break;
1315 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1316 			return -EIO;
1317 
1318 		if (!over) {
1319 			/* We fill entries into dstbuf only as much as
1320 			   it can hold. But we still continue iterating
1321 			   over remaining entries to link them. If not,
1322 			   we need to send a FORGET for each of those
1323 			   which we did not link.
1324 			*/
1325 			over = !dir_emit(ctx, dirent->name, dirent->namelen,
1326 				       dirent->ino, dirent->type);
1327 			if (!over)
1328 				ctx->pos = dirent->off;
1329 		}
1330 
1331 		buf += reclen;
1332 		nbytes -= reclen;
1333 
1334 		ret = fuse_direntplus_link(file, direntplus, attr_version);
1335 		if (ret)
1336 			fuse_force_forget(file, direntplus->entry_out.nodeid);
1337 	}
1338 
1339 	return 0;
1340 }
1341 
1342 static int fuse_readdir(struct file *file, struct dir_context *ctx)
1343 {
1344 	int plus, err;
1345 	size_t nbytes;
1346 	struct page *page;
1347 	struct inode *inode = file_inode(file);
1348 	struct fuse_conn *fc = get_fuse_conn(inode);
1349 	struct fuse_req *req;
1350 	u64 attr_version = 0;
1351 	bool locked;
1352 
1353 	if (is_bad_inode(inode))
1354 		return -EIO;
1355 
1356 	req = fuse_get_req(fc, 1);
1357 	if (IS_ERR(req))
1358 		return PTR_ERR(req);
1359 
1360 	page = alloc_page(GFP_KERNEL);
1361 	if (!page) {
1362 		fuse_put_request(fc, req);
1363 		return -ENOMEM;
1364 	}
1365 
1366 	plus = fuse_use_readdirplus(inode, ctx);
1367 	req->out.argpages = 1;
1368 	req->num_pages = 1;
1369 	req->pages[0] = page;
1370 	req->page_descs[0].length = PAGE_SIZE;
1371 	if (plus) {
1372 		attr_version = fuse_get_attr_version(fc);
1373 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1374 			       FUSE_READDIRPLUS);
1375 	} else {
1376 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1377 			       FUSE_READDIR);
1378 	}
1379 	locked = fuse_lock_inode(inode);
1380 	fuse_request_send(fc, req);
1381 	fuse_unlock_inode(inode, locked);
1382 	nbytes = req->out.args[0].size;
1383 	err = req->out.h.error;
1384 	fuse_put_request(fc, req);
1385 	if (!err) {
1386 		if (plus) {
1387 			err = parse_dirplusfile(page_address(page), nbytes,
1388 						file, ctx,
1389 						attr_version);
1390 		} else {
1391 			err = parse_dirfile(page_address(page), nbytes, file,
1392 					    ctx);
1393 		}
1394 	}
1395 
1396 	__free_page(page);
1397 	fuse_invalidate_atime(inode);
1398 	return err;
1399 }
1400 
1401 static const char *fuse_get_link(struct dentry *dentry,
1402 				 struct inode *inode,
1403 				 struct delayed_call *done)
1404 {
1405 	struct fuse_conn *fc = get_fuse_conn(inode);
1406 	FUSE_ARGS(args);
1407 	char *link;
1408 	ssize_t ret;
1409 
1410 	if (!dentry)
1411 		return ERR_PTR(-ECHILD);
1412 
1413 	link = kmalloc(PAGE_SIZE, GFP_KERNEL);
1414 	if (!link)
1415 		return ERR_PTR(-ENOMEM);
1416 
1417 	args.in.h.opcode = FUSE_READLINK;
1418 	args.in.h.nodeid = get_node_id(inode);
1419 	args.out.argvar = 1;
1420 	args.out.numargs = 1;
1421 	args.out.args[0].size = PAGE_SIZE - 1;
1422 	args.out.args[0].value = link;
1423 	ret = fuse_simple_request(fc, &args);
1424 	if (ret < 0) {
1425 		kfree(link);
1426 		link = ERR_PTR(ret);
1427 	} else {
1428 		link[ret] = '\0';
1429 		set_delayed_call(done, kfree_link, link);
1430 	}
1431 	fuse_invalidate_atime(inode);
1432 	return link;
1433 }
1434 
1435 static int fuse_dir_open(struct inode *inode, struct file *file)
1436 {
1437 	return fuse_open_common(inode, file, true);
1438 }
1439 
1440 static int fuse_dir_release(struct inode *inode, struct file *file)
1441 {
1442 	fuse_release_common(file, FUSE_RELEASEDIR);
1443 
1444 	return 0;
1445 }
1446 
1447 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1448 			  int datasync)
1449 {
1450 	return fuse_fsync_common(file, start, end, datasync, 1);
1451 }
1452 
1453 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1454 			    unsigned long arg)
1455 {
1456 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1457 
1458 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1459 	if (fc->minor < 18)
1460 		return -ENOTTY;
1461 
1462 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1463 }
1464 
1465 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1466 				   unsigned long arg)
1467 {
1468 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1469 
1470 	if (fc->minor < 18)
1471 		return -ENOTTY;
1472 
1473 	return fuse_ioctl_common(file, cmd, arg,
1474 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1475 }
1476 
1477 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1478 {
1479 	/* Always update if mtime is explicitly set  */
1480 	if (ivalid & ATTR_MTIME_SET)
1481 		return true;
1482 
1483 	/* Or if kernel i_mtime is the official one */
1484 	if (trust_local_mtime)
1485 		return true;
1486 
1487 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1488 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1489 		return false;
1490 
1491 	/* In all other cases update */
1492 	return true;
1493 }
1494 
1495 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1496 			   struct fuse_setattr_in *arg, bool trust_local_cmtime)
1497 {
1498 	unsigned ivalid = iattr->ia_valid;
1499 
1500 	if (ivalid & ATTR_MODE)
1501 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1502 	if (ivalid & ATTR_UID)
1503 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1504 	if (ivalid & ATTR_GID)
1505 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1506 	if (ivalid & ATTR_SIZE)
1507 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1508 	if (ivalid & ATTR_ATIME) {
1509 		arg->valid |= FATTR_ATIME;
1510 		arg->atime = iattr->ia_atime.tv_sec;
1511 		arg->atimensec = iattr->ia_atime.tv_nsec;
1512 		if (!(ivalid & ATTR_ATIME_SET))
1513 			arg->valid |= FATTR_ATIME_NOW;
1514 	}
1515 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1516 		arg->valid |= FATTR_MTIME;
1517 		arg->mtime = iattr->ia_mtime.tv_sec;
1518 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1519 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1520 			arg->valid |= FATTR_MTIME_NOW;
1521 	}
1522 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1523 		arg->valid |= FATTR_CTIME;
1524 		arg->ctime = iattr->ia_ctime.tv_sec;
1525 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1526 	}
1527 }
1528 
1529 /*
1530  * Prevent concurrent writepages on inode
1531  *
1532  * This is done by adding a negative bias to the inode write counter
1533  * and waiting for all pending writes to finish.
1534  */
1535 void fuse_set_nowrite(struct inode *inode)
1536 {
1537 	struct fuse_conn *fc = get_fuse_conn(inode);
1538 	struct fuse_inode *fi = get_fuse_inode(inode);
1539 
1540 	BUG_ON(!inode_is_locked(inode));
1541 
1542 	spin_lock(&fc->lock);
1543 	BUG_ON(fi->writectr < 0);
1544 	fi->writectr += FUSE_NOWRITE;
1545 	spin_unlock(&fc->lock);
1546 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1547 }
1548 
1549 /*
1550  * Allow writepages on inode
1551  *
1552  * Remove the bias from the writecounter and send any queued
1553  * writepages.
1554  */
1555 static void __fuse_release_nowrite(struct inode *inode)
1556 {
1557 	struct fuse_inode *fi = get_fuse_inode(inode);
1558 
1559 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1560 	fi->writectr = 0;
1561 	fuse_flush_writepages(inode);
1562 }
1563 
1564 void fuse_release_nowrite(struct inode *inode)
1565 {
1566 	struct fuse_conn *fc = get_fuse_conn(inode);
1567 
1568 	spin_lock(&fc->lock);
1569 	__fuse_release_nowrite(inode);
1570 	spin_unlock(&fc->lock);
1571 }
1572 
1573 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1574 			      struct inode *inode,
1575 			      struct fuse_setattr_in *inarg_p,
1576 			      struct fuse_attr_out *outarg_p)
1577 {
1578 	args->in.h.opcode = FUSE_SETATTR;
1579 	args->in.h.nodeid = get_node_id(inode);
1580 	args->in.numargs = 1;
1581 	args->in.args[0].size = sizeof(*inarg_p);
1582 	args->in.args[0].value = inarg_p;
1583 	args->out.numargs = 1;
1584 	args->out.args[0].size = sizeof(*outarg_p);
1585 	args->out.args[0].value = outarg_p;
1586 }
1587 
1588 /*
1589  * Flush inode->i_mtime to the server
1590  */
1591 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1592 {
1593 	struct fuse_conn *fc = get_fuse_conn(inode);
1594 	FUSE_ARGS(args);
1595 	struct fuse_setattr_in inarg;
1596 	struct fuse_attr_out outarg;
1597 
1598 	memset(&inarg, 0, sizeof(inarg));
1599 	memset(&outarg, 0, sizeof(outarg));
1600 
1601 	inarg.valid = FATTR_MTIME;
1602 	inarg.mtime = inode->i_mtime.tv_sec;
1603 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1604 	if (fc->minor >= 23) {
1605 		inarg.valid |= FATTR_CTIME;
1606 		inarg.ctime = inode->i_ctime.tv_sec;
1607 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1608 	}
1609 	if (ff) {
1610 		inarg.valid |= FATTR_FH;
1611 		inarg.fh = ff->fh;
1612 	}
1613 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1614 
1615 	return fuse_simple_request(fc, &args);
1616 }
1617 
1618 /*
1619  * Set attributes, and at the same time refresh them.
1620  *
1621  * Truncation is slightly complicated, because the 'truncate' request
1622  * may fail, in which case we don't want to touch the mapping.
1623  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1624  * and the actual truncation by hand.
1625  */
1626 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1627 		    struct file *file)
1628 {
1629 	struct inode *inode = d_inode(dentry);
1630 	struct fuse_conn *fc = get_fuse_conn(inode);
1631 	struct fuse_inode *fi = get_fuse_inode(inode);
1632 	FUSE_ARGS(args);
1633 	struct fuse_setattr_in inarg;
1634 	struct fuse_attr_out outarg;
1635 	bool is_truncate = false;
1636 	bool is_wb = fc->writeback_cache;
1637 	loff_t oldsize;
1638 	int err;
1639 	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1640 
1641 	if (!fc->default_permissions)
1642 		attr->ia_valid |= ATTR_FORCE;
1643 
1644 	err = setattr_prepare(dentry, attr);
1645 	if (err)
1646 		return err;
1647 
1648 	if (attr->ia_valid & ATTR_OPEN) {
1649 		/* This is coming from open(..., ... | O_TRUNC); */
1650 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1651 		WARN_ON(attr->ia_size != 0);
1652 		if (fc->atomic_o_trunc) {
1653 			/*
1654 			 * No need to send request to userspace, since actual
1655 			 * truncation has already been done by OPEN.  But still
1656 			 * need to truncate page cache.
1657 			 */
1658 			i_size_write(inode, 0);
1659 			truncate_pagecache(inode, 0);
1660 			return 0;
1661 		}
1662 		file = NULL;
1663 	}
1664 
1665 	if (attr->ia_valid & ATTR_SIZE)
1666 		is_truncate = true;
1667 
1668 	if (is_truncate) {
1669 		fuse_set_nowrite(inode);
1670 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1671 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1672 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1673 	}
1674 
1675 	memset(&inarg, 0, sizeof(inarg));
1676 	memset(&outarg, 0, sizeof(outarg));
1677 	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1678 	if (file) {
1679 		struct fuse_file *ff = file->private_data;
1680 		inarg.valid |= FATTR_FH;
1681 		inarg.fh = ff->fh;
1682 	}
1683 	if (attr->ia_valid & ATTR_SIZE) {
1684 		/* For mandatory locking in truncate */
1685 		inarg.valid |= FATTR_LOCKOWNER;
1686 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1687 	}
1688 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1689 	err = fuse_simple_request(fc, &args);
1690 	if (err) {
1691 		if (err == -EINTR)
1692 			fuse_invalidate_attr(inode);
1693 		goto error;
1694 	}
1695 
1696 	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1697 		make_bad_inode(inode);
1698 		err = -EIO;
1699 		goto error;
1700 	}
1701 
1702 	spin_lock(&fc->lock);
1703 	/* the kernel maintains i_mtime locally */
1704 	if (trust_local_cmtime) {
1705 		if (attr->ia_valid & ATTR_MTIME)
1706 			inode->i_mtime = attr->ia_mtime;
1707 		if (attr->ia_valid & ATTR_CTIME)
1708 			inode->i_ctime = attr->ia_ctime;
1709 		/* FIXME: clear I_DIRTY_SYNC? */
1710 	}
1711 
1712 	fuse_change_attributes_common(inode, &outarg.attr,
1713 				      attr_timeout(&outarg));
1714 	oldsize = inode->i_size;
1715 	/* see the comment in fuse_change_attributes() */
1716 	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1717 		i_size_write(inode, outarg.attr.size);
1718 
1719 	if (is_truncate) {
1720 		/* NOTE: this may release/reacquire fc->lock */
1721 		__fuse_release_nowrite(inode);
1722 	}
1723 	spin_unlock(&fc->lock);
1724 
1725 	/*
1726 	 * Only call invalidate_inode_pages2() after removing
1727 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1728 	 */
1729 	if ((is_truncate || !is_wb) &&
1730 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1731 		truncate_pagecache(inode, outarg.attr.size);
1732 		invalidate_inode_pages2(inode->i_mapping);
1733 	}
1734 
1735 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1736 	return 0;
1737 
1738 error:
1739 	if (is_truncate)
1740 		fuse_release_nowrite(inode);
1741 
1742 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1743 	return err;
1744 }
1745 
1746 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1747 {
1748 	struct inode *inode = d_inode(entry);
1749 	struct fuse_conn *fc = get_fuse_conn(inode);
1750 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1751 	int ret;
1752 
1753 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1754 		return -EACCES;
1755 
1756 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1757 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1758 				    ATTR_MODE);
1759 
1760 		/*
1761 		 * The only sane way to reliably kill suid/sgid is to do it in
1762 		 * the userspace filesystem
1763 		 *
1764 		 * This should be done on write(), truncate() and chown().
1765 		 */
1766 		if (!fc->handle_killpriv) {
1767 			/*
1768 			 * ia_mode calculation may have used stale i_mode.
1769 			 * Refresh and recalculate.
1770 			 */
1771 			ret = fuse_do_getattr(inode, NULL, file);
1772 			if (ret)
1773 				return ret;
1774 
1775 			attr->ia_mode = inode->i_mode;
1776 			if (inode->i_mode & S_ISUID) {
1777 				attr->ia_valid |= ATTR_MODE;
1778 				attr->ia_mode &= ~S_ISUID;
1779 			}
1780 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1781 				attr->ia_valid |= ATTR_MODE;
1782 				attr->ia_mode &= ~S_ISGID;
1783 			}
1784 		}
1785 	}
1786 	if (!attr->ia_valid)
1787 		return 0;
1788 
1789 	ret = fuse_do_setattr(entry, attr, file);
1790 	if (!ret) {
1791 		/*
1792 		 * If filesystem supports acls it may have updated acl xattrs in
1793 		 * the filesystem, so forget cached acls for the inode.
1794 		 */
1795 		if (fc->posix_acl)
1796 			forget_all_cached_acls(inode);
1797 
1798 		/* Directory mode changed, may need to revalidate access */
1799 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1800 			fuse_invalidate_entry_cache(entry);
1801 	}
1802 	return ret;
1803 }
1804 
1805 static int fuse_getattr(const struct path *path, struct kstat *stat,
1806 			u32 request_mask, unsigned int flags)
1807 {
1808 	struct inode *inode = d_inode(path->dentry);
1809 	struct fuse_conn *fc = get_fuse_conn(inode);
1810 
1811 	if (!fuse_allow_current_process(fc))
1812 		return -EACCES;
1813 
1814 	return fuse_update_get_attr(inode, NULL, stat, flags);
1815 }
1816 
1817 static const struct inode_operations fuse_dir_inode_operations = {
1818 	.lookup		= fuse_lookup,
1819 	.mkdir		= fuse_mkdir,
1820 	.symlink	= fuse_symlink,
1821 	.unlink		= fuse_unlink,
1822 	.rmdir		= fuse_rmdir,
1823 	.rename		= fuse_rename2,
1824 	.link		= fuse_link,
1825 	.setattr	= fuse_setattr,
1826 	.create		= fuse_create,
1827 	.atomic_open	= fuse_atomic_open,
1828 	.mknod		= fuse_mknod,
1829 	.permission	= fuse_permission,
1830 	.getattr	= fuse_getattr,
1831 	.listxattr	= fuse_listxattr,
1832 	.get_acl	= fuse_get_acl,
1833 	.set_acl	= fuse_set_acl,
1834 };
1835 
1836 static const struct file_operations fuse_dir_operations = {
1837 	.llseek		= generic_file_llseek,
1838 	.read		= generic_read_dir,
1839 	.iterate_shared	= fuse_readdir,
1840 	.open		= fuse_dir_open,
1841 	.release	= fuse_dir_release,
1842 	.fsync		= fuse_dir_fsync,
1843 	.unlocked_ioctl	= fuse_dir_ioctl,
1844 	.compat_ioctl	= fuse_dir_compat_ioctl,
1845 };
1846 
1847 static const struct inode_operations fuse_common_inode_operations = {
1848 	.setattr	= fuse_setattr,
1849 	.permission	= fuse_permission,
1850 	.getattr	= fuse_getattr,
1851 	.listxattr	= fuse_listxattr,
1852 	.get_acl	= fuse_get_acl,
1853 	.set_acl	= fuse_set_acl,
1854 };
1855 
1856 static const struct inode_operations fuse_symlink_inode_operations = {
1857 	.setattr	= fuse_setattr,
1858 	.get_link	= fuse_get_link,
1859 	.getattr	= fuse_getattr,
1860 	.listxattr	= fuse_listxattr,
1861 };
1862 
1863 void fuse_init_common(struct inode *inode)
1864 {
1865 	inode->i_op = &fuse_common_inode_operations;
1866 }
1867 
1868 void fuse_init_dir(struct inode *inode)
1869 {
1870 	inode->i_op = &fuse_dir_inode_operations;
1871 	inode->i_fop = &fuse_dir_operations;
1872 }
1873 
1874 void fuse_init_symlink(struct inode *inode)
1875 {
1876 	inode->i_op = &fuse_symlink_inode_operations;
1877 }
1878