xref: /openbmc/linux/fs/fuse/dir.c (revision f3a8b664)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/sched.h>
14 #include <linux/namei.h>
15 #include <linux/slab.h>
16 #include <linux/xattr.h>
17 #include <linux/posix_acl.h>
18 
19 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
20 {
21 	struct fuse_conn *fc = get_fuse_conn(dir);
22 	struct fuse_inode *fi = get_fuse_inode(dir);
23 
24 	if (!fc->do_readdirplus)
25 		return false;
26 	if (!fc->readdirplus_auto)
27 		return true;
28 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
29 		return true;
30 	if (ctx->pos == 0)
31 		return true;
32 	return false;
33 }
34 
35 static void fuse_advise_use_readdirplus(struct inode *dir)
36 {
37 	struct fuse_inode *fi = get_fuse_inode(dir);
38 
39 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
40 }
41 
42 union fuse_dentry {
43 	u64 time;
44 	struct rcu_head rcu;
45 };
46 
47 static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
48 {
49 	((union fuse_dentry *) entry->d_fsdata)->time = time;
50 }
51 
52 static inline u64 fuse_dentry_time(struct dentry *entry)
53 {
54 	return ((union fuse_dentry *) entry->d_fsdata)->time;
55 }
56 
57 /*
58  * FUSE caches dentries and attributes with separate timeout.  The
59  * time in jiffies until the dentry/attributes are valid is stored in
60  * dentry->d_fsdata and fuse_inode->i_time respectively.
61  */
62 
63 /*
64  * Calculate the time in jiffies until a dentry/attributes are valid
65  */
66 static u64 time_to_jiffies(u64 sec, u32 nsec)
67 {
68 	if (sec || nsec) {
69 		struct timespec64 ts = {
70 			sec,
71 			max_t(u32, nsec, NSEC_PER_SEC - 1)
72 		};
73 
74 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
75 	} else
76 		return 0;
77 }
78 
79 /*
80  * Set dentry and possibly attribute timeouts from the lookup/mk*
81  * replies
82  */
83 static void fuse_change_entry_timeout(struct dentry *entry,
84 				      struct fuse_entry_out *o)
85 {
86 	fuse_dentry_settime(entry,
87 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
88 }
89 
90 static u64 attr_timeout(struct fuse_attr_out *o)
91 {
92 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
93 }
94 
95 static u64 entry_attr_timeout(struct fuse_entry_out *o)
96 {
97 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
98 }
99 
100 /*
101  * Mark the attributes as stale, so that at the next call to
102  * ->getattr() they will be fetched from userspace
103  */
104 void fuse_invalidate_attr(struct inode *inode)
105 {
106 	get_fuse_inode(inode)->i_time = 0;
107 }
108 
109 /**
110  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
111  * atime is not used.
112  */
113 void fuse_invalidate_atime(struct inode *inode)
114 {
115 	if (!IS_RDONLY(inode))
116 		fuse_invalidate_attr(inode);
117 }
118 
119 /*
120  * Just mark the entry as stale, so that a next attempt to look it up
121  * will result in a new lookup call to userspace
122  *
123  * This is called when a dentry is about to become negative and the
124  * timeout is unknown (unlink, rmdir, rename and in some cases
125  * lookup)
126  */
127 void fuse_invalidate_entry_cache(struct dentry *entry)
128 {
129 	fuse_dentry_settime(entry, 0);
130 }
131 
132 /*
133  * Same as fuse_invalidate_entry_cache(), but also try to remove the
134  * dentry from the hash
135  */
136 static void fuse_invalidate_entry(struct dentry *entry)
137 {
138 	d_invalidate(entry);
139 	fuse_invalidate_entry_cache(entry);
140 }
141 
142 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
143 			     u64 nodeid, const struct qstr *name,
144 			     struct fuse_entry_out *outarg)
145 {
146 	memset(outarg, 0, sizeof(struct fuse_entry_out));
147 	args->in.h.opcode = FUSE_LOOKUP;
148 	args->in.h.nodeid = nodeid;
149 	args->in.numargs = 1;
150 	args->in.args[0].size = name->len + 1;
151 	args->in.args[0].value = name->name;
152 	args->out.numargs = 1;
153 	args->out.args[0].size = sizeof(struct fuse_entry_out);
154 	args->out.args[0].value = outarg;
155 }
156 
157 u64 fuse_get_attr_version(struct fuse_conn *fc)
158 {
159 	u64 curr_version;
160 
161 	/*
162 	 * The spin lock isn't actually needed on 64bit archs, but we
163 	 * don't yet care too much about such optimizations.
164 	 */
165 	spin_lock(&fc->lock);
166 	curr_version = fc->attr_version;
167 	spin_unlock(&fc->lock);
168 
169 	return curr_version;
170 }
171 
172 /*
173  * Check whether the dentry is still valid
174  *
175  * If the entry validity timeout has expired and the dentry is
176  * positive, try to redo the lookup.  If the lookup results in a
177  * different inode, then let the VFS invalidate the dentry and redo
178  * the lookup once more.  If the lookup results in the same inode,
179  * then refresh the attributes, timeouts and mark the dentry valid.
180  */
181 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
182 {
183 	struct inode *inode;
184 	struct dentry *parent;
185 	struct fuse_conn *fc;
186 	struct fuse_inode *fi;
187 	int ret;
188 
189 	inode = d_inode_rcu(entry);
190 	if (inode && is_bad_inode(inode))
191 		goto invalid;
192 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
193 		 (flags & LOOKUP_REVAL)) {
194 		struct fuse_entry_out outarg;
195 		FUSE_ARGS(args);
196 		struct fuse_forget_link *forget;
197 		u64 attr_version;
198 
199 		/* For negative dentries, always do a fresh lookup */
200 		if (!inode)
201 			goto invalid;
202 
203 		ret = -ECHILD;
204 		if (flags & LOOKUP_RCU)
205 			goto out;
206 
207 		fc = get_fuse_conn(inode);
208 
209 		forget = fuse_alloc_forget();
210 		ret = -ENOMEM;
211 		if (!forget)
212 			goto out;
213 
214 		attr_version = fuse_get_attr_version(fc);
215 
216 		parent = dget_parent(entry);
217 		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
218 				 &entry->d_name, &outarg);
219 		ret = fuse_simple_request(fc, &args);
220 		dput(parent);
221 		/* Zero nodeid is same as -ENOENT */
222 		if (!ret && !outarg.nodeid)
223 			ret = -ENOENT;
224 		if (!ret) {
225 			fi = get_fuse_inode(inode);
226 			if (outarg.nodeid != get_node_id(inode)) {
227 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
228 				goto invalid;
229 			}
230 			spin_lock(&fc->lock);
231 			fi->nlookup++;
232 			spin_unlock(&fc->lock);
233 		}
234 		kfree(forget);
235 		if (ret == -ENOMEM)
236 			goto out;
237 		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
238 			goto invalid;
239 
240 		forget_all_cached_acls(inode);
241 		fuse_change_attributes(inode, &outarg.attr,
242 				       entry_attr_timeout(&outarg),
243 				       attr_version);
244 		fuse_change_entry_timeout(entry, &outarg);
245 	} else if (inode) {
246 		fi = get_fuse_inode(inode);
247 		if (flags & LOOKUP_RCU) {
248 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
249 				return -ECHILD;
250 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
251 			parent = dget_parent(entry);
252 			fuse_advise_use_readdirplus(d_inode(parent));
253 			dput(parent);
254 		}
255 	}
256 	ret = 1;
257 out:
258 	return ret;
259 
260 invalid:
261 	ret = 0;
262 	goto out;
263 }
264 
265 static int invalid_nodeid(u64 nodeid)
266 {
267 	return !nodeid || nodeid == FUSE_ROOT_ID;
268 }
269 
270 static int fuse_dentry_init(struct dentry *dentry)
271 {
272 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
273 
274 	return dentry->d_fsdata ? 0 : -ENOMEM;
275 }
276 static void fuse_dentry_release(struct dentry *dentry)
277 {
278 	union fuse_dentry *fd = dentry->d_fsdata;
279 
280 	kfree_rcu(fd, rcu);
281 }
282 
283 const struct dentry_operations fuse_dentry_operations = {
284 	.d_revalidate	= fuse_dentry_revalidate,
285 	.d_init		= fuse_dentry_init,
286 	.d_release	= fuse_dentry_release,
287 };
288 
289 int fuse_valid_type(int m)
290 {
291 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
292 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
293 }
294 
295 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
296 		     struct fuse_entry_out *outarg, struct inode **inode)
297 {
298 	struct fuse_conn *fc = get_fuse_conn_super(sb);
299 	FUSE_ARGS(args);
300 	struct fuse_forget_link *forget;
301 	u64 attr_version;
302 	int err;
303 
304 	*inode = NULL;
305 	err = -ENAMETOOLONG;
306 	if (name->len > FUSE_NAME_MAX)
307 		goto out;
308 
309 
310 	forget = fuse_alloc_forget();
311 	err = -ENOMEM;
312 	if (!forget)
313 		goto out;
314 
315 	attr_version = fuse_get_attr_version(fc);
316 
317 	fuse_lookup_init(fc, &args, nodeid, name, outarg);
318 	err = fuse_simple_request(fc, &args);
319 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
320 	if (err || !outarg->nodeid)
321 		goto out_put_forget;
322 
323 	err = -EIO;
324 	if (!outarg->nodeid)
325 		goto out_put_forget;
326 	if (!fuse_valid_type(outarg->attr.mode))
327 		goto out_put_forget;
328 
329 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
330 			   &outarg->attr, entry_attr_timeout(outarg),
331 			   attr_version);
332 	err = -ENOMEM;
333 	if (!*inode) {
334 		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
335 		goto out;
336 	}
337 	err = 0;
338 
339  out_put_forget:
340 	kfree(forget);
341  out:
342 	return err;
343 }
344 
345 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
346 				  unsigned int flags)
347 {
348 	int err;
349 	struct fuse_entry_out outarg;
350 	struct inode *inode;
351 	struct dentry *newent;
352 	bool outarg_valid = true;
353 
354 	fuse_lock_inode(dir);
355 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
356 			       &outarg, &inode);
357 	fuse_unlock_inode(dir);
358 	if (err == -ENOENT) {
359 		outarg_valid = false;
360 		err = 0;
361 	}
362 	if (err)
363 		goto out_err;
364 
365 	err = -EIO;
366 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
367 		goto out_iput;
368 
369 	newent = d_splice_alias(inode, entry);
370 	err = PTR_ERR(newent);
371 	if (IS_ERR(newent))
372 		goto out_err;
373 
374 	entry = newent ? newent : entry;
375 	if (outarg_valid)
376 		fuse_change_entry_timeout(entry, &outarg);
377 	else
378 		fuse_invalidate_entry_cache(entry);
379 
380 	fuse_advise_use_readdirplus(dir);
381 	return newent;
382 
383  out_iput:
384 	iput(inode);
385  out_err:
386 	return ERR_PTR(err);
387 }
388 
389 /*
390  * Atomic create+open operation
391  *
392  * If the filesystem doesn't support this, then fall back to separate
393  * 'mknod' + 'open' requests.
394  */
395 static int fuse_create_open(struct inode *dir, struct dentry *entry,
396 			    struct file *file, unsigned flags,
397 			    umode_t mode, int *opened)
398 {
399 	int err;
400 	struct inode *inode;
401 	struct fuse_conn *fc = get_fuse_conn(dir);
402 	FUSE_ARGS(args);
403 	struct fuse_forget_link *forget;
404 	struct fuse_create_in inarg;
405 	struct fuse_open_out outopen;
406 	struct fuse_entry_out outentry;
407 	struct fuse_file *ff;
408 
409 	/* Userspace expects S_IFREG in create mode */
410 	BUG_ON((mode & S_IFMT) != S_IFREG);
411 
412 	forget = fuse_alloc_forget();
413 	err = -ENOMEM;
414 	if (!forget)
415 		goto out_err;
416 
417 	err = -ENOMEM;
418 	ff = fuse_file_alloc(fc);
419 	if (!ff)
420 		goto out_put_forget_req;
421 
422 	if (!fc->dont_mask)
423 		mode &= ~current_umask();
424 
425 	flags &= ~O_NOCTTY;
426 	memset(&inarg, 0, sizeof(inarg));
427 	memset(&outentry, 0, sizeof(outentry));
428 	inarg.flags = flags;
429 	inarg.mode = mode;
430 	inarg.umask = current_umask();
431 	args.in.h.opcode = FUSE_CREATE;
432 	args.in.h.nodeid = get_node_id(dir);
433 	args.in.numargs = 2;
434 	args.in.args[0].size = sizeof(inarg);
435 	args.in.args[0].value = &inarg;
436 	args.in.args[1].size = entry->d_name.len + 1;
437 	args.in.args[1].value = entry->d_name.name;
438 	args.out.numargs = 2;
439 	args.out.args[0].size = sizeof(outentry);
440 	args.out.args[0].value = &outentry;
441 	args.out.args[1].size = sizeof(outopen);
442 	args.out.args[1].value = &outopen;
443 	err = fuse_simple_request(fc, &args);
444 	if (err)
445 		goto out_free_ff;
446 
447 	err = -EIO;
448 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
449 		goto out_free_ff;
450 
451 	ff->fh = outopen.fh;
452 	ff->nodeid = outentry.nodeid;
453 	ff->open_flags = outopen.open_flags;
454 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
455 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
456 	if (!inode) {
457 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
458 		fuse_sync_release(ff, flags);
459 		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
460 		err = -ENOMEM;
461 		goto out_err;
462 	}
463 	kfree(forget);
464 	d_instantiate(entry, inode);
465 	fuse_change_entry_timeout(entry, &outentry);
466 	fuse_invalidate_attr(dir);
467 	err = finish_open(file, entry, generic_file_open, opened);
468 	if (err) {
469 		fuse_sync_release(ff, flags);
470 	} else {
471 		file->private_data = fuse_file_get(ff);
472 		fuse_finish_open(inode, file);
473 	}
474 	return err;
475 
476 out_free_ff:
477 	fuse_file_free(ff);
478 out_put_forget_req:
479 	kfree(forget);
480 out_err:
481 	return err;
482 }
483 
484 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
485 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
486 			    struct file *file, unsigned flags,
487 			    umode_t mode, int *opened)
488 {
489 	int err;
490 	struct fuse_conn *fc = get_fuse_conn(dir);
491 	struct dentry *res = NULL;
492 
493 	if (d_in_lookup(entry)) {
494 		res = fuse_lookup(dir, entry, 0);
495 		if (IS_ERR(res))
496 			return PTR_ERR(res);
497 
498 		if (res)
499 			entry = res;
500 	}
501 
502 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
503 		goto no_open;
504 
505 	/* Only creates */
506 	*opened |= FILE_CREATED;
507 
508 	if (fc->no_create)
509 		goto mknod;
510 
511 	err = fuse_create_open(dir, entry, file, flags, mode, opened);
512 	if (err == -ENOSYS) {
513 		fc->no_create = 1;
514 		goto mknod;
515 	}
516 out_dput:
517 	dput(res);
518 	return err;
519 
520 mknod:
521 	err = fuse_mknod(dir, entry, mode, 0);
522 	if (err)
523 		goto out_dput;
524 no_open:
525 	return finish_no_open(file, res);
526 }
527 
528 /*
529  * Code shared between mknod, mkdir, symlink and link
530  */
531 static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
532 			    struct inode *dir, struct dentry *entry,
533 			    umode_t mode)
534 {
535 	struct fuse_entry_out outarg;
536 	struct inode *inode;
537 	int err;
538 	struct fuse_forget_link *forget;
539 
540 	forget = fuse_alloc_forget();
541 	if (!forget)
542 		return -ENOMEM;
543 
544 	memset(&outarg, 0, sizeof(outarg));
545 	args->in.h.nodeid = get_node_id(dir);
546 	args->out.numargs = 1;
547 	args->out.args[0].size = sizeof(outarg);
548 	args->out.args[0].value = &outarg;
549 	err = fuse_simple_request(fc, args);
550 	if (err)
551 		goto out_put_forget_req;
552 
553 	err = -EIO;
554 	if (invalid_nodeid(outarg.nodeid))
555 		goto out_put_forget_req;
556 
557 	if ((outarg.attr.mode ^ mode) & S_IFMT)
558 		goto out_put_forget_req;
559 
560 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
561 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
562 	if (!inode) {
563 		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
564 		return -ENOMEM;
565 	}
566 	kfree(forget);
567 
568 	err = d_instantiate_no_diralias(entry, inode);
569 	if (err)
570 		return err;
571 
572 	fuse_change_entry_timeout(entry, &outarg);
573 	fuse_invalidate_attr(dir);
574 	return 0;
575 
576  out_put_forget_req:
577 	kfree(forget);
578 	return err;
579 }
580 
581 static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
582 		      dev_t rdev)
583 {
584 	struct fuse_mknod_in inarg;
585 	struct fuse_conn *fc = get_fuse_conn(dir);
586 	FUSE_ARGS(args);
587 
588 	if (!fc->dont_mask)
589 		mode &= ~current_umask();
590 
591 	memset(&inarg, 0, sizeof(inarg));
592 	inarg.mode = mode;
593 	inarg.rdev = new_encode_dev(rdev);
594 	inarg.umask = current_umask();
595 	args.in.h.opcode = FUSE_MKNOD;
596 	args.in.numargs = 2;
597 	args.in.args[0].size = sizeof(inarg);
598 	args.in.args[0].value = &inarg;
599 	args.in.args[1].size = entry->d_name.len + 1;
600 	args.in.args[1].value = entry->d_name.name;
601 	return create_new_entry(fc, &args, dir, entry, mode);
602 }
603 
604 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
605 		       bool excl)
606 {
607 	return fuse_mknod(dir, entry, mode, 0);
608 }
609 
610 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
611 {
612 	struct fuse_mkdir_in inarg;
613 	struct fuse_conn *fc = get_fuse_conn(dir);
614 	FUSE_ARGS(args);
615 
616 	if (!fc->dont_mask)
617 		mode &= ~current_umask();
618 
619 	memset(&inarg, 0, sizeof(inarg));
620 	inarg.mode = mode;
621 	inarg.umask = current_umask();
622 	args.in.h.opcode = FUSE_MKDIR;
623 	args.in.numargs = 2;
624 	args.in.args[0].size = sizeof(inarg);
625 	args.in.args[0].value = &inarg;
626 	args.in.args[1].size = entry->d_name.len + 1;
627 	args.in.args[1].value = entry->d_name.name;
628 	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
629 }
630 
631 static int fuse_symlink(struct inode *dir, struct dentry *entry,
632 			const char *link)
633 {
634 	struct fuse_conn *fc = get_fuse_conn(dir);
635 	unsigned len = strlen(link) + 1;
636 	FUSE_ARGS(args);
637 
638 	args.in.h.opcode = FUSE_SYMLINK;
639 	args.in.numargs = 2;
640 	args.in.args[0].size = entry->d_name.len + 1;
641 	args.in.args[0].value = entry->d_name.name;
642 	args.in.args[1].size = len;
643 	args.in.args[1].value = link;
644 	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
645 }
646 
647 void fuse_update_ctime(struct inode *inode)
648 {
649 	if (!IS_NOCMTIME(inode)) {
650 		inode->i_ctime = current_time(inode);
651 		mark_inode_dirty_sync(inode);
652 	}
653 }
654 
655 static int fuse_unlink(struct inode *dir, struct dentry *entry)
656 {
657 	int err;
658 	struct fuse_conn *fc = get_fuse_conn(dir);
659 	FUSE_ARGS(args);
660 
661 	args.in.h.opcode = FUSE_UNLINK;
662 	args.in.h.nodeid = get_node_id(dir);
663 	args.in.numargs = 1;
664 	args.in.args[0].size = entry->d_name.len + 1;
665 	args.in.args[0].value = entry->d_name.name;
666 	err = fuse_simple_request(fc, &args);
667 	if (!err) {
668 		struct inode *inode = d_inode(entry);
669 		struct fuse_inode *fi = get_fuse_inode(inode);
670 
671 		spin_lock(&fc->lock);
672 		fi->attr_version = ++fc->attr_version;
673 		/*
674 		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
675 		 * happen if userspace filesystem is careless.  It would be
676 		 * difficult to enforce correct nlink usage so just ignore this
677 		 * condition here
678 		 */
679 		if (inode->i_nlink > 0)
680 			drop_nlink(inode);
681 		spin_unlock(&fc->lock);
682 		fuse_invalidate_attr(inode);
683 		fuse_invalidate_attr(dir);
684 		fuse_invalidate_entry_cache(entry);
685 		fuse_update_ctime(inode);
686 	} else if (err == -EINTR)
687 		fuse_invalidate_entry(entry);
688 	return err;
689 }
690 
691 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
692 {
693 	int err;
694 	struct fuse_conn *fc = get_fuse_conn(dir);
695 	FUSE_ARGS(args);
696 
697 	args.in.h.opcode = FUSE_RMDIR;
698 	args.in.h.nodeid = get_node_id(dir);
699 	args.in.numargs = 1;
700 	args.in.args[0].size = entry->d_name.len + 1;
701 	args.in.args[0].value = entry->d_name.name;
702 	err = fuse_simple_request(fc, &args);
703 	if (!err) {
704 		clear_nlink(d_inode(entry));
705 		fuse_invalidate_attr(dir);
706 		fuse_invalidate_entry_cache(entry);
707 	} else if (err == -EINTR)
708 		fuse_invalidate_entry(entry);
709 	return err;
710 }
711 
712 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
713 			      struct inode *newdir, struct dentry *newent,
714 			      unsigned int flags, int opcode, size_t argsize)
715 {
716 	int err;
717 	struct fuse_rename2_in inarg;
718 	struct fuse_conn *fc = get_fuse_conn(olddir);
719 	FUSE_ARGS(args);
720 
721 	memset(&inarg, 0, argsize);
722 	inarg.newdir = get_node_id(newdir);
723 	inarg.flags = flags;
724 	args.in.h.opcode = opcode;
725 	args.in.h.nodeid = get_node_id(olddir);
726 	args.in.numargs = 3;
727 	args.in.args[0].size = argsize;
728 	args.in.args[0].value = &inarg;
729 	args.in.args[1].size = oldent->d_name.len + 1;
730 	args.in.args[1].value = oldent->d_name.name;
731 	args.in.args[2].size = newent->d_name.len + 1;
732 	args.in.args[2].value = newent->d_name.name;
733 	err = fuse_simple_request(fc, &args);
734 	if (!err) {
735 		/* ctime changes */
736 		fuse_invalidate_attr(d_inode(oldent));
737 		fuse_update_ctime(d_inode(oldent));
738 
739 		if (flags & RENAME_EXCHANGE) {
740 			fuse_invalidate_attr(d_inode(newent));
741 			fuse_update_ctime(d_inode(newent));
742 		}
743 
744 		fuse_invalidate_attr(olddir);
745 		if (olddir != newdir)
746 			fuse_invalidate_attr(newdir);
747 
748 		/* newent will end up negative */
749 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
750 			fuse_invalidate_attr(d_inode(newent));
751 			fuse_invalidate_entry_cache(newent);
752 			fuse_update_ctime(d_inode(newent));
753 		}
754 	} else if (err == -EINTR) {
755 		/* If request was interrupted, DEITY only knows if the
756 		   rename actually took place.  If the invalidation
757 		   fails (e.g. some process has CWD under the renamed
758 		   directory), then there can be inconsistency between
759 		   the dcache and the real filesystem.  Tough luck. */
760 		fuse_invalidate_entry(oldent);
761 		if (d_really_is_positive(newent))
762 			fuse_invalidate_entry(newent);
763 	}
764 
765 	return err;
766 }
767 
768 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
769 			struct inode *newdir, struct dentry *newent,
770 			unsigned int flags)
771 {
772 	struct fuse_conn *fc = get_fuse_conn(olddir);
773 	int err;
774 
775 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
776 		return -EINVAL;
777 
778 	if (flags) {
779 		if (fc->no_rename2 || fc->minor < 23)
780 			return -EINVAL;
781 
782 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
783 					 FUSE_RENAME2,
784 					 sizeof(struct fuse_rename2_in));
785 		if (err == -ENOSYS) {
786 			fc->no_rename2 = 1;
787 			err = -EINVAL;
788 		}
789 	} else {
790 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
791 					 FUSE_RENAME,
792 					 sizeof(struct fuse_rename_in));
793 	}
794 
795 	return err;
796 }
797 
798 static int fuse_link(struct dentry *entry, struct inode *newdir,
799 		     struct dentry *newent)
800 {
801 	int err;
802 	struct fuse_link_in inarg;
803 	struct inode *inode = d_inode(entry);
804 	struct fuse_conn *fc = get_fuse_conn(inode);
805 	FUSE_ARGS(args);
806 
807 	memset(&inarg, 0, sizeof(inarg));
808 	inarg.oldnodeid = get_node_id(inode);
809 	args.in.h.opcode = FUSE_LINK;
810 	args.in.numargs = 2;
811 	args.in.args[0].size = sizeof(inarg);
812 	args.in.args[0].value = &inarg;
813 	args.in.args[1].size = newent->d_name.len + 1;
814 	args.in.args[1].value = newent->d_name.name;
815 	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
816 	/* Contrary to "normal" filesystems it can happen that link
817 	   makes two "logical" inodes point to the same "physical"
818 	   inode.  We invalidate the attributes of the old one, so it
819 	   will reflect changes in the backing inode (link count,
820 	   etc.)
821 	*/
822 	if (!err) {
823 		struct fuse_inode *fi = get_fuse_inode(inode);
824 
825 		spin_lock(&fc->lock);
826 		fi->attr_version = ++fc->attr_version;
827 		inc_nlink(inode);
828 		spin_unlock(&fc->lock);
829 		fuse_invalidate_attr(inode);
830 		fuse_update_ctime(inode);
831 	} else if (err == -EINTR) {
832 		fuse_invalidate_attr(inode);
833 	}
834 	return err;
835 }
836 
837 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
838 			  struct kstat *stat)
839 {
840 	unsigned int blkbits;
841 	struct fuse_conn *fc = get_fuse_conn(inode);
842 
843 	/* see the comment in fuse_change_attributes() */
844 	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
845 		attr->size = i_size_read(inode);
846 		attr->mtime = inode->i_mtime.tv_sec;
847 		attr->mtimensec = inode->i_mtime.tv_nsec;
848 		attr->ctime = inode->i_ctime.tv_sec;
849 		attr->ctimensec = inode->i_ctime.tv_nsec;
850 	}
851 
852 	stat->dev = inode->i_sb->s_dev;
853 	stat->ino = attr->ino;
854 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
855 	stat->nlink = attr->nlink;
856 	stat->uid = make_kuid(&init_user_ns, attr->uid);
857 	stat->gid = make_kgid(&init_user_ns, attr->gid);
858 	stat->rdev = inode->i_rdev;
859 	stat->atime.tv_sec = attr->atime;
860 	stat->atime.tv_nsec = attr->atimensec;
861 	stat->mtime.tv_sec = attr->mtime;
862 	stat->mtime.tv_nsec = attr->mtimensec;
863 	stat->ctime.tv_sec = attr->ctime;
864 	stat->ctime.tv_nsec = attr->ctimensec;
865 	stat->size = attr->size;
866 	stat->blocks = attr->blocks;
867 
868 	if (attr->blksize != 0)
869 		blkbits = ilog2(attr->blksize);
870 	else
871 		blkbits = inode->i_sb->s_blocksize_bits;
872 
873 	stat->blksize = 1 << blkbits;
874 }
875 
876 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
877 			   struct file *file)
878 {
879 	int err;
880 	struct fuse_getattr_in inarg;
881 	struct fuse_attr_out outarg;
882 	struct fuse_conn *fc = get_fuse_conn(inode);
883 	FUSE_ARGS(args);
884 	u64 attr_version;
885 
886 	attr_version = fuse_get_attr_version(fc);
887 
888 	memset(&inarg, 0, sizeof(inarg));
889 	memset(&outarg, 0, sizeof(outarg));
890 	/* Directories have separate file-handle space */
891 	if (file && S_ISREG(inode->i_mode)) {
892 		struct fuse_file *ff = file->private_data;
893 
894 		inarg.getattr_flags |= FUSE_GETATTR_FH;
895 		inarg.fh = ff->fh;
896 	}
897 	args.in.h.opcode = FUSE_GETATTR;
898 	args.in.h.nodeid = get_node_id(inode);
899 	args.in.numargs = 1;
900 	args.in.args[0].size = sizeof(inarg);
901 	args.in.args[0].value = &inarg;
902 	args.out.numargs = 1;
903 	args.out.args[0].size = sizeof(outarg);
904 	args.out.args[0].value = &outarg;
905 	err = fuse_simple_request(fc, &args);
906 	if (!err) {
907 		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
908 			make_bad_inode(inode);
909 			err = -EIO;
910 		} else {
911 			fuse_change_attributes(inode, &outarg.attr,
912 					       attr_timeout(&outarg),
913 					       attr_version);
914 			if (stat)
915 				fuse_fillattr(inode, &outarg.attr, stat);
916 		}
917 	}
918 	return err;
919 }
920 
921 int fuse_update_attributes(struct inode *inode, struct kstat *stat,
922 			   struct file *file, bool *refreshed)
923 {
924 	struct fuse_inode *fi = get_fuse_inode(inode);
925 	int err;
926 	bool r;
927 
928 	if (time_before64(fi->i_time, get_jiffies_64())) {
929 		r = true;
930 		forget_all_cached_acls(inode);
931 		err = fuse_do_getattr(inode, stat, file);
932 	} else {
933 		r = false;
934 		err = 0;
935 		if (stat) {
936 			generic_fillattr(inode, stat);
937 			stat->mode = fi->orig_i_mode;
938 			stat->ino = fi->orig_ino;
939 		}
940 	}
941 
942 	if (refreshed != NULL)
943 		*refreshed = r;
944 
945 	return err;
946 }
947 
948 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
949 			     u64 child_nodeid, struct qstr *name)
950 {
951 	int err = -ENOTDIR;
952 	struct inode *parent;
953 	struct dentry *dir;
954 	struct dentry *entry;
955 
956 	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
957 	if (!parent)
958 		return -ENOENT;
959 
960 	inode_lock(parent);
961 	if (!S_ISDIR(parent->i_mode))
962 		goto unlock;
963 
964 	err = -ENOENT;
965 	dir = d_find_alias(parent);
966 	if (!dir)
967 		goto unlock;
968 
969 	name->hash = full_name_hash(dir, name->name, name->len);
970 	entry = d_lookup(dir, name);
971 	dput(dir);
972 	if (!entry)
973 		goto unlock;
974 
975 	fuse_invalidate_attr(parent);
976 	fuse_invalidate_entry(entry);
977 
978 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
979 		inode_lock(d_inode(entry));
980 		if (get_node_id(d_inode(entry)) != child_nodeid) {
981 			err = -ENOENT;
982 			goto badentry;
983 		}
984 		if (d_mountpoint(entry)) {
985 			err = -EBUSY;
986 			goto badentry;
987 		}
988 		if (d_is_dir(entry)) {
989 			shrink_dcache_parent(entry);
990 			if (!simple_empty(entry)) {
991 				err = -ENOTEMPTY;
992 				goto badentry;
993 			}
994 			d_inode(entry)->i_flags |= S_DEAD;
995 		}
996 		dont_mount(entry);
997 		clear_nlink(d_inode(entry));
998 		err = 0;
999  badentry:
1000 		inode_unlock(d_inode(entry));
1001 		if (!err)
1002 			d_delete(entry);
1003 	} else {
1004 		err = 0;
1005 	}
1006 	dput(entry);
1007 
1008  unlock:
1009 	inode_unlock(parent);
1010 	iput(parent);
1011 	return err;
1012 }
1013 
1014 /*
1015  * Calling into a user-controlled filesystem gives the filesystem
1016  * daemon ptrace-like capabilities over the current process.  This
1017  * means, that the filesystem daemon is able to record the exact
1018  * filesystem operations performed, and can also control the behavior
1019  * of the requester process in otherwise impossible ways.  For example
1020  * it can delay the operation for arbitrary length of time allowing
1021  * DoS against the requester.
1022  *
1023  * For this reason only those processes can call into the filesystem,
1024  * for which the owner of the mount has ptrace privilege.  This
1025  * excludes processes started by other users, suid or sgid processes.
1026  */
1027 int fuse_allow_current_process(struct fuse_conn *fc)
1028 {
1029 	const struct cred *cred;
1030 
1031 	if (fc->allow_other)
1032 		return 1;
1033 
1034 	cred = current_cred();
1035 	if (uid_eq(cred->euid, fc->user_id) &&
1036 	    uid_eq(cred->suid, fc->user_id) &&
1037 	    uid_eq(cred->uid,  fc->user_id) &&
1038 	    gid_eq(cred->egid, fc->group_id) &&
1039 	    gid_eq(cred->sgid, fc->group_id) &&
1040 	    gid_eq(cred->gid,  fc->group_id))
1041 		return 1;
1042 
1043 	return 0;
1044 }
1045 
1046 static int fuse_access(struct inode *inode, int mask)
1047 {
1048 	struct fuse_conn *fc = get_fuse_conn(inode);
1049 	FUSE_ARGS(args);
1050 	struct fuse_access_in inarg;
1051 	int err;
1052 
1053 	BUG_ON(mask & MAY_NOT_BLOCK);
1054 
1055 	if (fc->no_access)
1056 		return 0;
1057 
1058 	memset(&inarg, 0, sizeof(inarg));
1059 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1060 	args.in.h.opcode = FUSE_ACCESS;
1061 	args.in.h.nodeid = get_node_id(inode);
1062 	args.in.numargs = 1;
1063 	args.in.args[0].size = sizeof(inarg);
1064 	args.in.args[0].value = &inarg;
1065 	err = fuse_simple_request(fc, &args);
1066 	if (err == -ENOSYS) {
1067 		fc->no_access = 1;
1068 		err = 0;
1069 	}
1070 	return err;
1071 }
1072 
1073 static int fuse_perm_getattr(struct inode *inode, int mask)
1074 {
1075 	if (mask & MAY_NOT_BLOCK)
1076 		return -ECHILD;
1077 
1078 	forget_all_cached_acls(inode);
1079 	return fuse_do_getattr(inode, NULL, NULL);
1080 }
1081 
1082 /*
1083  * Check permission.  The two basic access models of FUSE are:
1084  *
1085  * 1) Local access checking ('default_permissions' mount option) based
1086  * on file mode.  This is the plain old disk filesystem permission
1087  * modell.
1088  *
1089  * 2) "Remote" access checking, where server is responsible for
1090  * checking permission in each inode operation.  An exception to this
1091  * is if ->permission() was invoked from sys_access() in which case an
1092  * access request is sent.  Execute permission is still checked
1093  * locally based on file mode.
1094  */
1095 static int fuse_permission(struct inode *inode, int mask)
1096 {
1097 	struct fuse_conn *fc = get_fuse_conn(inode);
1098 	bool refreshed = false;
1099 	int err = 0;
1100 
1101 	if (!fuse_allow_current_process(fc))
1102 		return -EACCES;
1103 
1104 	/*
1105 	 * If attributes are needed, refresh them before proceeding
1106 	 */
1107 	if (fc->default_permissions ||
1108 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1109 		struct fuse_inode *fi = get_fuse_inode(inode);
1110 
1111 		if (time_before64(fi->i_time, get_jiffies_64())) {
1112 			refreshed = true;
1113 
1114 			err = fuse_perm_getattr(inode, mask);
1115 			if (err)
1116 				return err;
1117 		}
1118 	}
1119 
1120 	if (fc->default_permissions) {
1121 		err = generic_permission(inode, mask);
1122 
1123 		/* If permission is denied, try to refresh file
1124 		   attributes.  This is also needed, because the root
1125 		   node will at first have no permissions */
1126 		if (err == -EACCES && !refreshed) {
1127 			err = fuse_perm_getattr(inode, mask);
1128 			if (!err)
1129 				err = generic_permission(inode, mask);
1130 		}
1131 
1132 		/* Note: the opposite of the above test does not
1133 		   exist.  So if permissions are revoked this won't be
1134 		   noticed immediately, only after the attribute
1135 		   timeout has expired */
1136 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1137 		err = fuse_access(inode, mask);
1138 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1139 		if (!(inode->i_mode & S_IXUGO)) {
1140 			if (refreshed)
1141 				return -EACCES;
1142 
1143 			err = fuse_perm_getattr(inode, mask);
1144 			if (!err && !(inode->i_mode & S_IXUGO))
1145 				return -EACCES;
1146 		}
1147 	}
1148 	return err;
1149 }
1150 
1151 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1152 			 struct dir_context *ctx)
1153 {
1154 	while (nbytes >= FUSE_NAME_OFFSET) {
1155 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1156 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
1157 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1158 			return -EIO;
1159 		if (reclen > nbytes)
1160 			break;
1161 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1162 			return -EIO;
1163 
1164 		if (!dir_emit(ctx, dirent->name, dirent->namelen,
1165 			       dirent->ino, dirent->type))
1166 			break;
1167 
1168 		buf += reclen;
1169 		nbytes -= reclen;
1170 		ctx->pos = dirent->off;
1171 	}
1172 
1173 	return 0;
1174 }
1175 
1176 static int fuse_direntplus_link(struct file *file,
1177 				struct fuse_direntplus *direntplus,
1178 				u64 attr_version)
1179 {
1180 	struct fuse_entry_out *o = &direntplus->entry_out;
1181 	struct fuse_dirent *dirent = &direntplus->dirent;
1182 	struct dentry *parent = file->f_path.dentry;
1183 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1184 	struct dentry *dentry;
1185 	struct dentry *alias;
1186 	struct inode *dir = d_inode(parent);
1187 	struct fuse_conn *fc;
1188 	struct inode *inode;
1189 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1190 
1191 	if (!o->nodeid) {
1192 		/*
1193 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1194 		 * ENOENT. Instead, it only means the userspace filesystem did
1195 		 * not want to return attributes/handle for this entry.
1196 		 *
1197 		 * So do nothing.
1198 		 */
1199 		return 0;
1200 	}
1201 
1202 	if (name.name[0] == '.') {
1203 		/*
1204 		 * We could potentially refresh the attributes of the directory
1205 		 * and its parent?
1206 		 */
1207 		if (name.len == 1)
1208 			return 0;
1209 		if (name.name[1] == '.' && name.len == 2)
1210 			return 0;
1211 	}
1212 
1213 	if (invalid_nodeid(o->nodeid))
1214 		return -EIO;
1215 	if (!fuse_valid_type(o->attr.mode))
1216 		return -EIO;
1217 
1218 	fc = get_fuse_conn(dir);
1219 
1220 	name.hash = full_name_hash(parent, name.name, name.len);
1221 	dentry = d_lookup(parent, &name);
1222 	if (!dentry) {
1223 retry:
1224 		dentry = d_alloc_parallel(parent, &name, &wq);
1225 		if (IS_ERR(dentry))
1226 			return PTR_ERR(dentry);
1227 	}
1228 	if (!d_in_lookup(dentry)) {
1229 		struct fuse_inode *fi;
1230 		inode = d_inode(dentry);
1231 		if (!inode ||
1232 		    get_node_id(inode) != o->nodeid ||
1233 		    ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1234 			d_invalidate(dentry);
1235 			dput(dentry);
1236 			goto retry;
1237 		}
1238 		if (is_bad_inode(inode)) {
1239 			dput(dentry);
1240 			return -EIO;
1241 		}
1242 
1243 		fi = get_fuse_inode(inode);
1244 		spin_lock(&fc->lock);
1245 		fi->nlookup++;
1246 		spin_unlock(&fc->lock);
1247 
1248 		forget_all_cached_acls(inode);
1249 		fuse_change_attributes(inode, &o->attr,
1250 				       entry_attr_timeout(o),
1251 				       attr_version);
1252 		/*
1253 		 * The other branch comes via fuse_iget()
1254 		 * which bumps nlookup inside
1255 		 */
1256 	} else {
1257 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1258 				  &o->attr, entry_attr_timeout(o),
1259 				  attr_version);
1260 		if (!inode)
1261 			inode = ERR_PTR(-ENOMEM);
1262 
1263 		alias = d_splice_alias(inode, dentry);
1264 		d_lookup_done(dentry);
1265 		if (alias) {
1266 			dput(dentry);
1267 			dentry = alias;
1268 		}
1269 		if (IS_ERR(dentry))
1270 			return PTR_ERR(dentry);
1271 	}
1272 	if (fc->readdirplus_auto)
1273 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1274 	fuse_change_entry_timeout(dentry, o);
1275 
1276 	dput(dentry);
1277 	return 0;
1278 }
1279 
1280 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1281 			     struct dir_context *ctx, u64 attr_version)
1282 {
1283 	struct fuse_direntplus *direntplus;
1284 	struct fuse_dirent *dirent;
1285 	size_t reclen;
1286 	int over = 0;
1287 	int ret;
1288 
1289 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1290 		direntplus = (struct fuse_direntplus *) buf;
1291 		dirent = &direntplus->dirent;
1292 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1293 
1294 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1295 			return -EIO;
1296 		if (reclen > nbytes)
1297 			break;
1298 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1299 			return -EIO;
1300 
1301 		if (!over) {
1302 			/* We fill entries into dstbuf only as much as
1303 			   it can hold. But we still continue iterating
1304 			   over remaining entries to link them. If not,
1305 			   we need to send a FORGET for each of those
1306 			   which we did not link.
1307 			*/
1308 			over = !dir_emit(ctx, dirent->name, dirent->namelen,
1309 				       dirent->ino, dirent->type);
1310 			ctx->pos = dirent->off;
1311 		}
1312 
1313 		buf += reclen;
1314 		nbytes -= reclen;
1315 
1316 		ret = fuse_direntplus_link(file, direntplus, attr_version);
1317 		if (ret)
1318 			fuse_force_forget(file, direntplus->entry_out.nodeid);
1319 	}
1320 
1321 	return 0;
1322 }
1323 
1324 static int fuse_readdir(struct file *file, struct dir_context *ctx)
1325 {
1326 	int plus, err;
1327 	size_t nbytes;
1328 	struct page *page;
1329 	struct inode *inode = file_inode(file);
1330 	struct fuse_conn *fc = get_fuse_conn(inode);
1331 	struct fuse_req *req;
1332 	u64 attr_version = 0;
1333 
1334 	if (is_bad_inode(inode))
1335 		return -EIO;
1336 
1337 	req = fuse_get_req(fc, 1);
1338 	if (IS_ERR(req))
1339 		return PTR_ERR(req);
1340 
1341 	page = alloc_page(GFP_KERNEL);
1342 	if (!page) {
1343 		fuse_put_request(fc, req);
1344 		return -ENOMEM;
1345 	}
1346 
1347 	plus = fuse_use_readdirplus(inode, ctx);
1348 	req->out.argpages = 1;
1349 	req->num_pages = 1;
1350 	req->pages[0] = page;
1351 	req->page_descs[0].length = PAGE_SIZE;
1352 	if (plus) {
1353 		attr_version = fuse_get_attr_version(fc);
1354 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1355 			       FUSE_READDIRPLUS);
1356 	} else {
1357 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1358 			       FUSE_READDIR);
1359 	}
1360 	fuse_lock_inode(inode);
1361 	fuse_request_send(fc, req);
1362 	fuse_unlock_inode(inode);
1363 	nbytes = req->out.args[0].size;
1364 	err = req->out.h.error;
1365 	fuse_put_request(fc, req);
1366 	if (!err) {
1367 		if (plus) {
1368 			err = parse_dirplusfile(page_address(page), nbytes,
1369 						file, ctx,
1370 						attr_version);
1371 		} else {
1372 			err = parse_dirfile(page_address(page), nbytes, file,
1373 					    ctx);
1374 		}
1375 	}
1376 
1377 	__free_page(page);
1378 	fuse_invalidate_atime(inode);
1379 	return err;
1380 }
1381 
1382 static const char *fuse_get_link(struct dentry *dentry,
1383 				 struct inode *inode,
1384 				 struct delayed_call *done)
1385 {
1386 	struct fuse_conn *fc = get_fuse_conn(inode);
1387 	FUSE_ARGS(args);
1388 	char *link;
1389 	ssize_t ret;
1390 
1391 	if (!dentry)
1392 		return ERR_PTR(-ECHILD);
1393 
1394 	link = kmalloc(PAGE_SIZE, GFP_KERNEL);
1395 	if (!link)
1396 		return ERR_PTR(-ENOMEM);
1397 
1398 	args.in.h.opcode = FUSE_READLINK;
1399 	args.in.h.nodeid = get_node_id(inode);
1400 	args.out.argvar = 1;
1401 	args.out.numargs = 1;
1402 	args.out.args[0].size = PAGE_SIZE - 1;
1403 	args.out.args[0].value = link;
1404 	ret = fuse_simple_request(fc, &args);
1405 	if (ret < 0) {
1406 		kfree(link);
1407 		link = ERR_PTR(ret);
1408 	} else {
1409 		link[ret] = '\0';
1410 		set_delayed_call(done, kfree_link, link);
1411 	}
1412 	fuse_invalidate_atime(inode);
1413 	return link;
1414 }
1415 
1416 static int fuse_dir_open(struct inode *inode, struct file *file)
1417 {
1418 	return fuse_open_common(inode, file, true);
1419 }
1420 
1421 static int fuse_dir_release(struct inode *inode, struct file *file)
1422 {
1423 	fuse_release_common(file, FUSE_RELEASEDIR);
1424 
1425 	return 0;
1426 }
1427 
1428 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1429 			  int datasync)
1430 {
1431 	return fuse_fsync_common(file, start, end, datasync, 1);
1432 }
1433 
1434 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1435 			    unsigned long arg)
1436 {
1437 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1438 
1439 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1440 	if (fc->minor < 18)
1441 		return -ENOTTY;
1442 
1443 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1444 }
1445 
1446 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1447 				   unsigned long arg)
1448 {
1449 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1450 
1451 	if (fc->minor < 18)
1452 		return -ENOTTY;
1453 
1454 	return fuse_ioctl_common(file, cmd, arg,
1455 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1456 }
1457 
1458 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1459 {
1460 	/* Always update if mtime is explicitly set  */
1461 	if (ivalid & ATTR_MTIME_SET)
1462 		return true;
1463 
1464 	/* Or if kernel i_mtime is the official one */
1465 	if (trust_local_mtime)
1466 		return true;
1467 
1468 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1469 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1470 		return false;
1471 
1472 	/* In all other cases update */
1473 	return true;
1474 }
1475 
1476 static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1477 			   bool trust_local_cmtime)
1478 {
1479 	unsigned ivalid = iattr->ia_valid;
1480 
1481 	if (ivalid & ATTR_MODE)
1482 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1483 	if (ivalid & ATTR_UID)
1484 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1485 	if (ivalid & ATTR_GID)
1486 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1487 	if (ivalid & ATTR_SIZE)
1488 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1489 	if (ivalid & ATTR_ATIME) {
1490 		arg->valid |= FATTR_ATIME;
1491 		arg->atime = iattr->ia_atime.tv_sec;
1492 		arg->atimensec = iattr->ia_atime.tv_nsec;
1493 		if (!(ivalid & ATTR_ATIME_SET))
1494 			arg->valid |= FATTR_ATIME_NOW;
1495 	}
1496 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1497 		arg->valid |= FATTR_MTIME;
1498 		arg->mtime = iattr->ia_mtime.tv_sec;
1499 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1500 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1501 			arg->valid |= FATTR_MTIME_NOW;
1502 	}
1503 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1504 		arg->valid |= FATTR_CTIME;
1505 		arg->ctime = iattr->ia_ctime.tv_sec;
1506 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1507 	}
1508 }
1509 
1510 /*
1511  * Prevent concurrent writepages on inode
1512  *
1513  * This is done by adding a negative bias to the inode write counter
1514  * and waiting for all pending writes to finish.
1515  */
1516 void fuse_set_nowrite(struct inode *inode)
1517 {
1518 	struct fuse_conn *fc = get_fuse_conn(inode);
1519 	struct fuse_inode *fi = get_fuse_inode(inode);
1520 
1521 	BUG_ON(!inode_is_locked(inode));
1522 
1523 	spin_lock(&fc->lock);
1524 	BUG_ON(fi->writectr < 0);
1525 	fi->writectr += FUSE_NOWRITE;
1526 	spin_unlock(&fc->lock);
1527 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1528 }
1529 
1530 /*
1531  * Allow writepages on inode
1532  *
1533  * Remove the bias from the writecounter and send any queued
1534  * writepages.
1535  */
1536 static void __fuse_release_nowrite(struct inode *inode)
1537 {
1538 	struct fuse_inode *fi = get_fuse_inode(inode);
1539 
1540 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1541 	fi->writectr = 0;
1542 	fuse_flush_writepages(inode);
1543 }
1544 
1545 void fuse_release_nowrite(struct inode *inode)
1546 {
1547 	struct fuse_conn *fc = get_fuse_conn(inode);
1548 
1549 	spin_lock(&fc->lock);
1550 	__fuse_release_nowrite(inode);
1551 	spin_unlock(&fc->lock);
1552 }
1553 
1554 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1555 			      struct inode *inode,
1556 			      struct fuse_setattr_in *inarg_p,
1557 			      struct fuse_attr_out *outarg_p)
1558 {
1559 	args->in.h.opcode = FUSE_SETATTR;
1560 	args->in.h.nodeid = get_node_id(inode);
1561 	args->in.numargs = 1;
1562 	args->in.args[0].size = sizeof(*inarg_p);
1563 	args->in.args[0].value = inarg_p;
1564 	args->out.numargs = 1;
1565 	args->out.args[0].size = sizeof(*outarg_p);
1566 	args->out.args[0].value = outarg_p;
1567 }
1568 
1569 /*
1570  * Flush inode->i_mtime to the server
1571  */
1572 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1573 {
1574 	struct fuse_conn *fc = get_fuse_conn(inode);
1575 	FUSE_ARGS(args);
1576 	struct fuse_setattr_in inarg;
1577 	struct fuse_attr_out outarg;
1578 
1579 	memset(&inarg, 0, sizeof(inarg));
1580 	memset(&outarg, 0, sizeof(outarg));
1581 
1582 	inarg.valid = FATTR_MTIME;
1583 	inarg.mtime = inode->i_mtime.tv_sec;
1584 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1585 	if (fc->minor >= 23) {
1586 		inarg.valid |= FATTR_CTIME;
1587 		inarg.ctime = inode->i_ctime.tv_sec;
1588 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1589 	}
1590 	if (ff) {
1591 		inarg.valid |= FATTR_FH;
1592 		inarg.fh = ff->fh;
1593 	}
1594 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1595 
1596 	return fuse_simple_request(fc, &args);
1597 }
1598 
1599 /*
1600  * Set attributes, and at the same time refresh them.
1601  *
1602  * Truncation is slightly complicated, because the 'truncate' request
1603  * may fail, in which case we don't want to touch the mapping.
1604  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1605  * and the actual truncation by hand.
1606  */
1607 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1608 		    struct file *file)
1609 {
1610 	struct inode *inode = d_inode(dentry);
1611 	struct fuse_conn *fc = get_fuse_conn(inode);
1612 	struct fuse_inode *fi = get_fuse_inode(inode);
1613 	FUSE_ARGS(args);
1614 	struct fuse_setattr_in inarg;
1615 	struct fuse_attr_out outarg;
1616 	bool is_truncate = false;
1617 	bool is_wb = fc->writeback_cache;
1618 	loff_t oldsize;
1619 	int err;
1620 	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1621 
1622 	if (!fc->default_permissions)
1623 		attr->ia_valid |= ATTR_FORCE;
1624 
1625 	err = setattr_prepare(dentry, attr);
1626 	if (err)
1627 		return err;
1628 
1629 	if (attr->ia_valid & ATTR_OPEN) {
1630 		if (fc->atomic_o_trunc)
1631 			return 0;
1632 		file = NULL;
1633 	}
1634 
1635 	if (attr->ia_valid & ATTR_SIZE)
1636 		is_truncate = true;
1637 
1638 	if (is_truncate) {
1639 		fuse_set_nowrite(inode);
1640 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1641 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1642 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1643 	}
1644 
1645 	memset(&inarg, 0, sizeof(inarg));
1646 	memset(&outarg, 0, sizeof(outarg));
1647 	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
1648 	if (file) {
1649 		struct fuse_file *ff = file->private_data;
1650 		inarg.valid |= FATTR_FH;
1651 		inarg.fh = ff->fh;
1652 	}
1653 	if (attr->ia_valid & ATTR_SIZE) {
1654 		/* For mandatory locking in truncate */
1655 		inarg.valid |= FATTR_LOCKOWNER;
1656 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1657 	}
1658 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1659 	err = fuse_simple_request(fc, &args);
1660 	if (err) {
1661 		if (err == -EINTR)
1662 			fuse_invalidate_attr(inode);
1663 		goto error;
1664 	}
1665 
1666 	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1667 		make_bad_inode(inode);
1668 		err = -EIO;
1669 		goto error;
1670 	}
1671 
1672 	spin_lock(&fc->lock);
1673 	/* the kernel maintains i_mtime locally */
1674 	if (trust_local_cmtime) {
1675 		if (attr->ia_valid & ATTR_MTIME)
1676 			inode->i_mtime = attr->ia_mtime;
1677 		if (attr->ia_valid & ATTR_CTIME)
1678 			inode->i_ctime = attr->ia_ctime;
1679 		/* FIXME: clear I_DIRTY_SYNC? */
1680 	}
1681 
1682 	fuse_change_attributes_common(inode, &outarg.attr,
1683 				      attr_timeout(&outarg));
1684 	oldsize = inode->i_size;
1685 	/* see the comment in fuse_change_attributes() */
1686 	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1687 		i_size_write(inode, outarg.attr.size);
1688 
1689 	if (is_truncate) {
1690 		/* NOTE: this may release/reacquire fc->lock */
1691 		__fuse_release_nowrite(inode);
1692 	}
1693 	spin_unlock(&fc->lock);
1694 
1695 	/*
1696 	 * Only call invalidate_inode_pages2() after removing
1697 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1698 	 */
1699 	if ((is_truncate || !is_wb) &&
1700 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1701 		truncate_pagecache(inode, outarg.attr.size);
1702 		invalidate_inode_pages2(inode->i_mapping);
1703 	}
1704 
1705 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1706 	return 0;
1707 
1708 error:
1709 	if (is_truncate)
1710 		fuse_release_nowrite(inode);
1711 
1712 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1713 	return err;
1714 }
1715 
1716 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1717 {
1718 	struct inode *inode = d_inode(entry);
1719 	struct fuse_conn *fc = get_fuse_conn(inode);
1720 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1721 	int ret;
1722 
1723 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1724 		return -EACCES;
1725 
1726 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1727 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1728 				    ATTR_MODE);
1729 
1730 		/*
1731 		 * The only sane way to reliably kill suid/sgid is to do it in
1732 		 * the userspace filesystem
1733 		 *
1734 		 * This should be done on write(), truncate() and chown().
1735 		 */
1736 		if (!fc->handle_killpriv) {
1737 			int kill;
1738 
1739 			/*
1740 			 * ia_mode calculation may have used stale i_mode.
1741 			 * Refresh and recalculate.
1742 			 */
1743 			ret = fuse_do_getattr(inode, NULL, file);
1744 			if (ret)
1745 				return ret;
1746 
1747 			attr->ia_mode = inode->i_mode;
1748 			kill = should_remove_suid(entry);
1749 			if (kill & ATTR_KILL_SUID) {
1750 				attr->ia_valid |= ATTR_MODE;
1751 				attr->ia_mode &= ~S_ISUID;
1752 			}
1753 			if (kill & ATTR_KILL_SGID) {
1754 				attr->ia_valid |= ATTR_MODE;
1755 				attr->ia_mode &= ~S_ISGID;
1756 			}
1757 		}
1758 	}
1759 	if (!attr->ia_valid)
1760 		return 0;
1761 
1762 	ret = fuse_do_setattr(entry, attr, file);
1763 	if (!ret) {
1764 		/*
1765 		 * If filesystem supports acls it may have updated acl xattrs in
1766 		 * the filesystem, so forget cached acls for the inode.
1767 		 */
1768 		if (fc->posix_acl)
1769 			forget_all_cached_acls(inode);
1770 
1771 		/* Directory mode changed, may need to revalidate access */
1772 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1773 			fuse_invalidate_entry_cache(entry);
1774 	}
1775 	return ret;
1776 }
1777 
1778 static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1779 			struct kstat *stat)
1780 {
1781 	struct inode *inode = d_inode(entry);
1782 	struct fuse_conn *fc = get_fuse_conn(inode);
1783 
1784 	if (!fuse_allow_current_process(fc))
1785 		return -EACCES;
1786 
1787 	return fuse_update_attributes(inode, stat, NULL, NULL);
1788 }
1789 
1790 static const struct inode_operations fuse_dir_inode_operations = {
1791 	.lookup		= fuse_lookup,
1792 	.mkdir		= fuse_mkdir,
1793 	.symlink	= fuse_symlink,
1794 	.unlink		= fuse_unlink,
1795 	.rmdir		= fuse_rmdir,
1796 	.rename		= fuse_rename2,
1797 	.link		= fuse_link,
1798 	.setattr	= fuse_setattr,
1799 	.create		= fuse_create,
1800 	.atomic_open	= fuse_atomic_open,
1801 	.mknod		= fuse_mknod,
1802 	.permission	= fuse_permission,
1803 	.getattr	= fuse_getattr,
1804 	.listxattr	= fuse_listxattr,
1805 	.get_acl	= fuse_get_acl,
1806 	.set_acl	= fuse_set_acl,
1807 };
1808 
1809 static const struct file_operations fuse_dir_operations = {
1810 	.llseek		= generic_file_llseek,
1811 	.read		= generic_read_dir,
1812 	.iterate_shared	= fuse_readdir,
1813 	.open		= fuse_dir_open,
1814 	.release	= fuse_dir_release,
1815 	.fsync		= fuse_dir_fsync,
1816 	.unlocked_ioctl	= fuse_dir_ioctl,
1817 	.compat_ioctl	= fuse_dir_compat_ioctl,
1818 };
1819 
1820 static const struct inode_operations fuse_common_inode_operations = {
1821 	.setattr	= fuse_setattr,
1822 	.permission	= fuse_permission,
1823 	.getattr	= fuse_getattr,
1824 	.listxattr	= fuse_listxattr,
1825 	.get_acl	= fuse_get_acl,
1826 	.set_acl	= fuse_set_acl,
1827 };
1828 
1829 static const struct inode_operations fuse_symlink_inode_operations = {
1830 	.setattr	= fuse_setattr,
1831 	.get_link	= fuse_get_link,
1832 	.readlink	= generic_readlink,
1833 	.getattr	= fuse_getattr,
1834 	.listxattr	= fuse_listxattr,
1835 };
1836 
1837 void fuse_init_common(struct inode *inode)
1838 {
1839 	inode->i_op = &fuse_common_inode_operations;
1840 }
1841 
1842 void fuse_init_dir(struct inode *inode)
1843 {
1844 	inode->i_op = &fuse_dir_inode_operations;
1845 	inode->i_fop = &fuse_dir_operations;
1846 }
1847 
1848 void fuse_init_symlink(struct inode *inode)
1849 {
1850 	inode->i_op = &fuse_symlink_inode_operations;
1851 }
1852