xref: /openbmc/linux/fs/fuse/dir.c (revision 60bcc88ad185d512f5718f2f8dcccb483ea8fb73)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/sched.h>
14 #include <linux/namei.h>
15 #include <linux/slab.h>
16 #include <linux/xattr.h>
17 #include <linux/posix_acl.h>
18 
19 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
20 {
21 	struct fuse_conn *fc = get_fuse_conn(dir);
22 	struct fuse_inode *fi = get_fuse_inode(dir);
23 
24 	if (!fc->do_readdirplus)
25 		return false;
26 	if (!fc->readdirplus_auto)
27 		return true;
28 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
29 		return true;
30 	if (ctx->pos == 0)
31 		return true;
32 	return false;
33 }
34 
35 static void fuse_advise_use_readdirplus(struct inode *dir)
36 {
37 	struct fuse_inode *fi = get_fuse_inode(dir);
38 
39 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
40 }
41 
42 #if BITS_PER_LONG >= 64
43 static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
44 {
45 	entry->d_time = time;
46 }
47 
48 static inline u64 fuse_dentry_time(struct dentry *entry)
49 {
50 	return entry->d_time;
51 }
52 #else
53 /*
54  * On 32 bit archs store the high 32 bits of time in d_fsdata
55  */
56 static void fuse_dentry_settime(struct dentry *entry, u64 time)
57 {
58 	entry->d_time = time;
59 	entry->d_fsdata = (void *) (unsigned long) (time >> 32);
60 }
61 
62 static u64 fuse_dentry_time(struct dentry *entry)
63 {
64 	return (u64) entry->d_time +
65 		((u64) (unsigned long) entry->d_fsdata << 32);
66 }
67 #endif
68 
69 /*
70  * FUSE caches dentries and attributes with separate timeout.  The
71  * time in jiffies until the dentry/attributes are valid is stored in
72  * dentry->d_time and fuse_inode->i_time respectively.
73  */
74 
75 /*
76  * Calculate the time in jiffies until a dentry/attributes are valid
77  */
78 static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
79 {
80 	if (sec || nsec) {
81 		struct timespec ts = {sec, nsec};
82 		return get_jiffies_64() + timespec_to_jiffies(&ts);
83 	} else
84 		return 0;
85 }
86 
87 /*
88  * Set dentry and possibly attribute timeouts from the lookup/mk*
89  * replies
90  */
91 static void fuse_change_entry_timeout(struct dentry *entry,
92 				      struct fuse_entry_out *o)
93 {
94 	fuse_dentry_settime(entry,
95 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
96 }
97 
98 static u64 attr_timeout(struct fuse_attr_out *o)
99 {
100 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
101 }
102 
103 static u64 entry_attr_timeout(struct fuse_entry_out *o)
104 {
105 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
106 }
107 
108 /*
109  * Mark the attributes as stale, so that at the next call to
110  * ->getattr() they will be fetched from userspace
111  */
112 void fuse_invalidate_attr(struct inode *inode)
113 {
114 	get_fuse_inode(inode)->i_time = 0;
115 }
116 
117 /**
118  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
119  * atime is not used.
120  */
121 void fuse_invalidate_atime(struct inode *inode)
122 {
123 	if (!IS_RDONLY(inode))
124 		fuse_invalidate_attr(inode);
125 }
126 
127 /*
128  * Just mark the entry as stale, so that a next attempt to look it up
129  * will result in a new lookup call to userspace
130  *
131  * This is called when a dentry is about to become negative and the
132  * timeout is unknown (unlink, rmdir, rename and in some cases
133  * lookup)
134  */
135 void fuse_invalidate_entry_cache(struct dentry *entry)
136 {
137 	fuse_dentry_settime(entry, 0);
138 }
139 
140 /*
141  * Same as fuse_invalidate_entry_cache(), but also try to remove the
142  * dentry from the hash
143  */
144 static void fuse_invalidate_entry(struct dentry *entry)
145 {
146 	d_invalidate(entry);
147 	fuse_invalidate_entry_cache(entry);
148 }
149 
150 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
151 			     u64 nodeid, const struct qstr *name,
152 			     struct fuse_entry_out *outarg)
153 {
154 	memset(outarg, 0, sizeof(struct fuse_entry_out));
155 	args->in.h.opcode = FUSE_LOOKUP;
156 	args->in.h.nodeid = nodeid;
157 	args->in.numargs = 1;
158 	args->in.args[0].size = name->len + 1;
159 	args->in.args[0].value = name->name;
160 	args->out.numargs = 1;
161 	args->out.args[0].size = sizeof(struct fuse_entry_out);
162 	args->out.args[0].value = outarg;
163 }
164 
165 u64 fuse_get_attr_version(struct fuse_conn *fc)
166 {
167 	u64 curr_version;
168 
169 	/*
170 	 * The spin lock isn't actually needed on 64bit archs, but we
171 	 * don't yet care too much about such optimizations.
172 	 */
173 	spin_lock(&fc->lock);
174 	curr_version = fc->attr_version;
175 	spin_unlock(&fc->lock);
176 
177 	return curr_version;
178 }
179 
180 /*
181  * Check whether the dentry is still valid
182  *
183  * If the entry validity timeout has expired and the dentry is
184  * positive, try to redo the lookup.  If the lookup results in a
185  * different inode, then let the VFS invalidate the dentry and redo
186  * the lookup once more.  If the lookup results in the same inode,
187  * then refresh the attributes, timeouts and mark the dentry valid.
188  */
189 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
190 {
191 	struct inode *inode;
192 	struct dentry *parent;
193 	struct fuse_conn *fc;
194 	struct fuse_inode *fi;
195 	int ret;
196 
197 	inode = d_inode_rcu(entry);
198 	if (inode && is_bad_inode(inode))
199 		goto invalid;
200 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
201 		 (flags & LOOKUP_REVAL)) {
202 		struct fuse_entry_out outarg;
203 		FUSE_ARGS(args);
204 		struct fuse_forget_link *forget;
205 		u64 attr_version;
206 
207 		/* For negative dentries, always do a fresh lookup */
208 		if (!inode)
209 			goto invalid;
210 
211 		ret = -ECHILD;
212 		if (flags & LOOKUP_RCU)
213 			goto out;
214 
215 		fc = get_fuse_conn(inode);
216 
217 		forget = fuse_alloc_forget();
218 		ret = -ENOMEM;
219 		if (!forget)
220 			goto out;
221 
222 		attr_version = fuse_get_attr_version(fc);
223 
224 		parent = dget_parent(entry);
225 		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
226 				 &entry->d_name, &outarg);
227 		ret = fuse_simple_request(fc, &args);
228 		dput(parent);
229 		/* Zero nodeid is same as -ENOENT */
230 		if (!ret && !outarg.nodeid)
231 			ret = -ENOENT;
232 		if (!ret) {
233 			fi = get_fuse_inode(inode);
234 			if (outarg.nodeid != get_node_id(inode)) {
235 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
236 				goto invalid;
237 			}
238 			spin_lock(&fc->lock);
239 			fi->nlookup++;
240 			spin_unlock(&fc->lock);
241 		}
242 		kfree(forget);
243 		if (ret == -ENOMEM)
244 			goto out;
245 		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
246 			goto invalid;
247 
248 		forget_all_cached_acls(inode);
249 		fuse_change_attributes(inode, &outarg.attr,
250 				       entry_attr_timeout(&outarg),
251 				       attr_version);
252 		fuse_change_entry_timeout(entry, &outarg);
253 	} else if (inode) {
254 		fi = get_fuse_inode(inode);
255 		if (flags & LOOKUP_RCU) {
256 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
257 				return -ECHILD;
258 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
259 			parent = dget_parent(entry);
260 			fuse_advise_use_readdirplus(d_inode(parent));
261 			dput(parent);
262 		}
263 	}
264 	ret = 1;
265 out:
266 	return ret;
267 
268 invalid:
269 	ret = 0;
270 	goto out;
271 }
272 
273 static int invalid_nodeid(u64 nodeid)
274 {
275 	return !nodeid || nodeid == FUSE_ROOT_ID;
276 }
277 
278 const struct dentry_operations fuse_dentry_operations = {
279 	.d_revalidate	= fuse_dentry_revalidate,
280 };
281 
282 int fuse_valid_type(int m)
283 {
284 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
285 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
286 }
287 
288 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
289 		     struct fuse_entry_out *outarg, struct inode **inode)
290 {
291 	struct fuse_conn *fc = get_fuse_conn_super(sb);
292 	FUSE_ARGS(args);
293 	struct fuse_forget_link *forget;
294 	u64 attr_version;
295 	int err;
296 
297 	*inode = NULL;
298 	err = -ENAMETOOLONG;
299 	if (name->len > FUSE_NAME_MAX)
300 		goto out;
301 
302 
303 	forget = fuse_alloc_forget();
304 	err = -ENOMEM;
305 	if (!forget)
306 		goto out;
307 
308 	attr_version = fuse_get_attr_version(fc);
309 
310 	fuse_lookup_init(fc, &args, nodeid, name, outarg);
311 	err = fuse_simple_request(fc, &args);
312 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
313 	if (err || !outarg->nodeid)
314 		goto out_put_forget;
315 
316 	err = -EIO;
317 	if (!outarg->nodeid)
318 		goto out_put_forget;
319 	if (!fuse_valid_type(outarg->attr.mode))
320 		goto out_put_forget;
321 
322 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
323 			   &outarg->attr, entry_attr_timeout(outarg),
324 			   attr_version);
325 	err = -ENOMEM;
326 	if (!*inode) {
327 		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
328 		goto out;
329 	}
330 	err = 0;
331 
332  out_put_forget:
333 	kfree(forget);
334  out:
335 	return err;
336 }
337 
338 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
339 				  unsigned int flags)
340 {
341 	int err;
342 	struct fuse_entry_out outarg;
343 	struct inode *inode;
344 	struct dentry *newent;
345 	bool outarg_valid = true;
346 
347 	fuse_lock_inode(dir);
348 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
349 			       &outarg, &inode);
350 	fuse_unlock_inode(dir);
351 	if (err == -ENOENT) {
352 		outarg_valid = false;
353 		err = 0;
354 	}
355 	if (err)
356 		goto out_err;
357 
358 	err = -EIO;
359 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
360 		goto out_iput;
361 
362 	newent = d_splice_alias(inode, entry);
363 	err = PTR_ERR(newent);
364 	if (IS_ERR(newent))
365 		goto out_err;
366 
367 	entry = newent ? newent : entry;
368 	if (outarg_valid)
369 		fuse_change_entry_timeout(entry, &outarg);
370 	else
371 		fuse_invalidate_entry_cache(entry);
372 
373 	fuse_advise_use_readdirplus(dir);
374 	return newent;
375 
376  out_iput:
377 	iput(inode);
378  out_err:
379 	return ERR_PTR(err);
380 }
381 
382 /*
383  * Atomic create+open operation
384  *
385  * If the filesystem doesn't support this, then fall back to separate
386  * 'mknod' + 'open' requests.
387  */
388 static int fuse_create_open(struct inode *dir, struct dentry *entry,
389 			    struct file *file, unsigned flags,
390 			    umode_t mode, int *opened)
391 {
392 	int err;
393 	struct inode *inode;
394 	struct fuse_conn *fc = get_fuse_conn(dir);
395 	FUSE_ARGS(args);
396 	struct fuse_forget_link *forget;
397 	struct fuse_create_in inarg;
398 	struct fuse_open_out outopen;
399 	struct fuse_entry_out outentry;
400 	struct fuse_file *ff;
401 
402 	/* Userspace expects S_IFREG in create mode */
403 	BUG_ON((mode & S_IFMT) != S_IFREG);
404 
405 	forget = fuse_alloc_forget();
406 	err = -ENOMEM;
407 	if (!forget)
408 		goto out_err;
409 
410 	err = -ENOMEM;
411 	ff = fuse_file_alloc(fc);
412 	if (!ff)
413 		goto out_put_forget_req;
414 
415 	if (!fc->dont_mask)
416 		mode &= ~current_umask();
417 
418 	flags &= ~O_NOCTTY;
419 	memset(&inarg, 0, sizeof(inarg));
420 	memset(&outentry, 0, sizeof(outentry));
421 	inarg.flags = flags;
422 	inarg.mode = mode;
423 	inarg.umask = current_umask();
424 	args.in.h.opcode = FUSE_CREATE;
425 	args.in.h.nodeid = get_node_id(dir);
426 	args.in.numargs = 2;
427 	args.in.args[0].size = sizeof(inarg);
428 	args.in.args[0].value = &inarg;
429 	args.in.args[1].size = entry->d_name.len + 1;
430 	args.in.args[1].value = entry->d_name.name;
431 	args.out.numargs = 2;
432 	args.out.args[0].size = sizeof(outentry);
433 	args.out.args[0].value = &outentry;
434 	args.out.args[1].size = sizeof(outopen);
435 	args.out.args[1].value = &outopen;
436 	err = fuse_simple_request(fc, &args);
437 	if (err)
438 		goto out_free_ff;
439 
440 	err = -EIO;
441 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
442 		goto out_free_ff;
443 
444 	ff->fh = outopen.fh;
445 	ff->nodeid = outentry.nodeid;
446 	ff->open_flags = outopen.open_flags;
447 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
448 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
449 	if (!inode) {
450 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
451 		fuse_sync_release(ff, flags);
452 		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
453 		err = -ENOMEM;
454 		goto out_err;
455 	}
456 	kfree(forget);
457 	d_instantiate(entry, inode);
458 	fuse_change_entry_timeout(entry, &outentry);
459 	fuse_invalidate_attr(dir);
460 	err = finish_open(file, entry, generic_file_open, opened);
461 	if (err) {
462 		fuse_sync_release(ff, flags);
463 	} else {
464 		file->private_data = fuse_file_get(ff);
465 		fuse_finish_open(inode, file);
466 	}
467 	return err;
468 
469 out_free_ff:
470 	fuse_file_free(ff);
471 out_put_forget_req:
472 	kfree(forget);
473 out_err:
474 	return err;
475 }
476 
477 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
478 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
479 			    struct file *file, unsigned flags,
480 			    umode_t mode, int *opened)
481 {
482 	int err;
483 	struct fuse_conn *fc = get_fuse_conn(dir);
484 	struct dentry *res = NULL;
485 
486 	if (d_in_lookup(entry)) {
487 		res = fuse_lookup(dir, entry, 0);
488 		if (IS_ERR(res))
489 			return PTR_ERR(res);
490 
491 		if (res)
492 			entry = res;
493 	}
494 
495 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
496 		goto no_open;
497 
498 	/* Only creates */
499 	*opened |= FILE_CREATED;
500 
501 	if (fc->no_create)
502 		goto mknod;
503 
504 	err = fuse_create_open(dir, entry, file, flags, mode, opened);
505 	if (err == -ENOSYS) {
506 		fc->no_create = 1;
507 		goto mknod;
508 	}
509 out_dput:
510 	dput(res);
511 	return err;
512 
513 mknod:
514 	err = fuse_mknod(dir, entry, mode, 0);
515 	if (err)
516 		goto out_dput;
517 no_open:
518 	return finish_no_open(file, res);
519 }
520 
521 /*
522  * Code shared between mknod, mkdir, symlink and link
523  */
524 static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
525 			    struct inode *dir, struct dentry *entry,
526 			    umode_t mode)
527 {
528 	struct fuse_entry_out outarg;
529 	struct inode *inode;
530 	int err;
531 	struct fuse_forget_link *forget;
532 
533 	forget = fuse_alloc_forget();
534 	if (!forget)
535 		return -ENOMEM;
536 
537 	memset(&outarg, 0, sizeof(outarg));
538 	args->in.h.nodeid = get_node_id(dir);
539 	args->out.numargs = 1;
540 	args->out.args[0].size = sizeof(outarg);
541 	args->out.args[0].value = &outarg;
542 	err = fuse_simple_request(fc, args);
543 	if (err)
544 		goto out_put_forget_req;
545 
546 	err = -EIO;
547 	if (invalid_nodeid(outarg.nodeid))
548 		goto out_put_forget_req;
549 
550 	if ((outarg.attr.mode ^ mode) & S_IFMT)
551 		goto out_put_forget_req;
552 
553 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
554 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
555 	if (!inode) {
556 		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
557 		return -ENOMEM;
558 	}
559 	kfree(forget);
560 
561 	err = d_instantiate_no_diralias(entry, inode);
562 	if (err)
563 		return err;
564 
565 	fuse_change_entry_timeout(entry, &outarg);
566 	fuse_invalidate_attr(dir);
567 	return 0;
568 
569  out_put_forget_req:
570 	kfree(forget);
571 	return err;
572 }
573 
574 static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
575 		      dev_t rdev)
576 {
577 	struct fuse_mknod_in inarg;
578 	struct fuse_conn *fc = get_fuse_conn(dir);
579 	FUSE_ARGS(args);
580 
581 	if (!fc->dont_mask)
582 		mode &= ~current_umask();
583 
584 	memset(&inarg, 0, sizeof(inarg));
585 	inarg.mode = mode;
586 	inarg.rdev = new_encode_dev(rdev);
587 	inarg.umask = current_umask();
588 	args.in.h.opcode = FUSE_MKNOD;
589 	args.in.numargs = 2;
590 	args.in.args[0].size = sizeof(inarg);
591 	args.in.args[0].value = &inarg;
592 	args.in.args[1].size = entry->d_name.len + 1;
593 	args.in.args[1].value = entry->d_name.name;
594 	return create_new_entry(fc, &args, dir, entry, mode);
595 }
596 
597 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
598 		       bool excl)
599 {
600 	return fuse_mknod(dir, entry, mode, 0);
601 }
602 
603 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
604 {
605 	struct fuse_mkdir_in inarg;
606 	struct fuse_conn *fc = get_fuse_conn(dir);
607 	FUSE_ARGS(args);
608 
609 	if (!fc->dont_mask)
610 		mode &= ~current_umask();
611 
612 	memset(&inarg, 0, sizeof(inarg));
613 	inarg.mode = mode;
614 	inarg.umask = current_umask();
615 	args.in.h.opcode = FUSE_MKDIR;
616 	args.in.numargs = 2;
617 	args.in.args[0].size = sizeof(inarg);
618 	args.in.args[0].value = &inarg;
619 	args.in.args[1].size = entry->d_name.len + 1;
620 	args.in.args[1].value = entry->d_name.name;
621 	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
622 }
623 
624 static int fuse_symlink(struct inode *dir, struct dentry *entry,
625 			const char *link)
626 {
627 	struct fuse_conn *fc = get_fuse_conn(dir);
628 	unsigned len = strlen(link) + 1;
629 	FUSE_ARGS(args);
630 
631 	args.in.h.opcode = FUSE_SYMLINK;
632 	args.in.numargs = 2;
633 	args.in.args[0].size = entry->d_name.len + 1;
634 	args.in.args[0].value = entry->d_name.name;
635 	args.in.args[1].size = len;
636 	args.in.args[1].value = link;
637 	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
638 }
639 
640 void fuse_update_ctime(struct inode *inode)
641 {
642 	if (!IS_NOCMTIME(inode)) {
643 		inode->i_ctime = current_fs_time(inode->i_sb);
644 		mark_inode_dirty_sync(inode);
645 	}
646 }
647 
648 static int fuse_unlink(struct inode *dir, struct dentry *entry)
649 {
650 	int err;
651 	struct fuse_conn *fc = get_fuse_conn(dir);
652 	FUSE_ARGS(args);
653 
654 	args.in.h.opcode = FUSE_UNLINK;
655 	args.in.h.nodeid = get_node_id(dir);
656 	args.in.numargs = 1;
657 	args.in.args[0].size = entry->d_name.len + 1;
658 	args.in.args[0].value = entry->d_name.name;
659 	err = fuse_simple_request(fc, &args);
660 	if (!err) {
661 		struct inode *inode = d_inode(entry);
662 		struct fuse_inode *fi = get_fuse_inode(inode);
663 
664 		spin_lock(&fc->lock);
665 		fi->attr_version = ++fc->attr_version;
666 		/*
667 		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
668 		 * happen if userspace filesystem is careless.  It would be
669 		 * difficult to enforce correct nlink usage so just ignore this
670 		 * condition here
671 		 */
672 		if (inode->i_nlink > 0)
673 			drop_nlink(inode);
674 		spin_unlock(&fc->lock);
675 		fuse_invalidate_attr(inode);
676 		fuse_invalidate_attr(dir);
677 		fuse_invalidate_entry_cache(entry);
678 		fuse_update_ctime(inode);
679 	} else if (err == -EINTR)
680 		fuse_invalidate_entry(entry);
681 	return err;
682 }
683 
684 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
685 {
686 	int err;
687 	struct fuse_conn *fc = get_fuse_conn(dir);
688 	FUSE_ARGS(args);
689 
690 	args.in.h.opcode = FUSE_RMDIR;
691 	args.in.h.nodeid = get_node_id(dir);
692 	args.in.numargs = 1;
693 	args.in.args[0].size = entry->d_name.len + 1;
694 	args.in.args[0].value = entry->d_name.name;
695 	err = fuse_simple_request(fc, &args);
696 	if (!err) {
697 		clear_nlink(d_inode(entry));
698 		fuse_invalidate_attr(dir);
699 		fuse_invalidate_entry_cache(entry);
700 	} else if (err == -EINTR)
701 		fuse_invalidate_entry(entry);
702 	return err;
703 }
704 
705 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
706 			      struct inode *newdir, struct dentry *newent,
707 			      unsigned int flags, int opcode, size_t argsize)
708 {
709 	int err;
710 	struct fuse_rename2_in inarg;
711 	struct fuse_conn *fc = get_fuse_conn(olddir);
712 	FUSE_ARGS(args);
713 
714 	memset(&inarg, 0, argsize);
715 	inarg.newdir = get_node_id(newdir);
716 	inarg.flags = flags;
717 	args.in.h.opcode = opcode;
718 	args.in.h.nodeid = get_node_id(olddir);
719 	args.in.numargs = 3;
720 	args.in.args[0].size = argsize;
721 	args.in.args[0].value = &inarg;
722 	args.in.args[1].size = oldent->d_name.len + 1;
723 	args.in.args[1].value = oldent->d_name.name;
724 	args.in.args[2].size = newent->d_name.len + 1;
725 	args.in.args[2].value = newent->d_name.name;
726 	err = fuse_simple_request(fc, &args);
727 	if (!err) {
728 		/* ctime changes */
729 		fuse_invalidate_attr(d_inode(oldent));
730 		fuse_update_ctime(d_inode(oldent));
731 
732 		if (flags & RENAME_EXCHANGE) {
733 			fuse_invalidate_attr(d_inode(newent));
734 			fuse_update_ctime(d_inode(newent));
735 		}
736 
737 		fuse_invalidate_attr(olddir);
738 		if (olddir != newdir)
739 			fuse_invalidate_attr(newdir);
740 
741 		/* newent will end up negative */
742 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
743 			fuse_invalidate_attr(d_inode(newent));
744 			fuse_invalidate_entry_cache(newent);
745 			fuse_update_ctime(d_inode(newent));
746 		}
747 	} else if (err == -EINTR) {
748 		/* If request was interrupted, DEITY only knows if the
749 		   rename actually took place.  If the invalidation
750 		   fails (e.g. some process has CWD under the renamed
751 		   directory), then there can be inconsistency between
752 		   the dcache and the real filesystem.  Tough luck. */
753 		fuse_invalidate_entry(oldent);
754 		if (d_really_is_positive(newent))
755 			fuse_invalidate_entry(newent);
756 	}
757 
758 	return err;
759 }
760 
761 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
762 			struct inode *newdir, struct dentry *newent,
763 			unsigned int flags)
764 {
765 	struct fuse_conn *fc = get_fuse_conn(olddir);
766 	int err;
767 
768 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
769 		return -EINVAL;
770 
771 	if (flags) {
772 		if (fc->no_rename2 || fc->minor < 23)
773 			return -EINVAL;
774 
775 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
776 					 FUSE_RENAME2,
777 					 sizeof(struct fuse_rename2_in));
778 		if (err == -ENOSYS) {
779 			fc->no_rename2 = 1;
780 			err = -EINVAL;
781 		}
782 	} else {
783 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
784 					 FUSE_RENAME,
785 					 sizeof(struct fuse_rename_in));
786 	}
787 
788 	return err;
789 }
790 
791 static int fuse_link(struct dentry *entry, struct inode *newdir,
792 		     struct dentry *newent)
793 {
794 	int err;
795 	struct fuse_link_in inarg;
796 	struct inode *inode = d_inode(entry);
797 	struct fuse_conn *fc = get_fuse_conn(inode);
798 	FUSE_ARGS(args);
799 
800 	memset(&inarg, 0, sizeof(inarg));
801 	inarg.oldnodeid = get_node_id(inode);
802 	args.in.h.opcode = FUSE_LINK;
803 	args.in.numargs = 2;
804 	args.in.args[0].size = sizeof(inarg);
805 	args.in.args[0].value = &inarg;
806 	args.in.args[1].size = newent->d_name.len + 1;
807 	args.in.args[1].value = newent->d_name.name;
808 	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
809 	/* Contrary to "normal" filesystems it can happen that link
810 	   makes two "logical" inodes point to the same "physical"
811 	   inode.  We invalidate the attributes of the old one, so it
812 	   will reflect changes in the backing inode (link count,
813 	   etc.)
814 	*/
815 	if (!err) {
816 		struct fuse_inode *fi = get_fuse_inode(inode);
817 
818 		spin_lock(&fc->lock);
819 		fi->attr_version = ++fc->attr_version;
820 		inc_nlink(inode);
821 		spin_unlock(&fc->lock);
822 		fuse_invalidate_attr(inode);
823 		fuse_update_ctime(inode);
824 	} else if (err == -EINTR) {
825 		fuse_invalidate_attr(inode);
826 	}
827 	return err;
828 }
829 
830 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
831 			  struct kstat *stat)
832 {
833 	unsigned int blkbits;
834 	struct fuse_conn *fc = get_fuse_conn(inode);
835 
836 	/* see the comment in fuse_change_attributes() */
837 	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
838 		attr->size = i_size_read(inode);
839 		attr->mtime = inode->i_mtime.tv_sec;
840 		attr->mtimensec = inode->i_mtime.tv_nsec;
841 		attr->ctime = inode->i_ctime.tv_sec;
842 		attr->ctimensec = inode->i_ctime.tv_nsec;
843 	}
844 
845 	stat->dev = inode->i_sb->s_dev;
846 	stat->ino = attr->ino;
847 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
848 	stat->nlink = attr->nlink;
849 	stat->uid = make_kuid(&init_user_ns, attr->uid);
850 	stat->gid = make_kgid(&init_user_ns, attr->gid);
851 	stat->rdev = inode->i_rdev;
852 	stat->atime.tv_sec = attr->atime;
853 	stat->atime.tv_nsec = attr->atimensec;
854 	stat->mtime.tv_sec = attr->mtime;
855 	stat->mtime.tv_nsec = attr->mtimensec;
856 	stat->ctime.tv_sec = attr->ctime;
857 	stat->ctime.tv_nsec = attr->ctimensec;
858 	stat->size = attr->size;
859 	stat->blocks = attr->blocks;
860 
861 	if (attr->blksize != 0)
862 		blkbits = ilog2(attr->blksize);
863 	else
864 		blkbits = inode->i_sb->s_blocksize_bits;
865 
866 	stat->blksize = 1 << blkbits;
867 }
868 
869 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
870 			   struct file *file)
871 {
872 	int err;
873 	struct fuse_getattr_in inarg;
874 	struct fuse_attr_out outarg;
875 	struct fuse_conn *fc = get_fuse_conn(inode);
876 	FUSE_ARGS(args);
877 	u64 attr_version;
878 
879 	attr_version = fuse_get_attr_version(fc);
880 
881 	memset(&inarg, 0, sizeof(inarg));
882 	memset(&outarg, 0, sizeof(outarg));
883 	/* Directories have separate file-handle space */
884 	if (file && S_ISREG(inode->i_mode)) {
885 		struct fuse_file *ff = file->private_data;
886 
887 		inarg.getattr_flags |= FUSE_GETATTR_FH;
888 		inarg.fh = ff->fh;
889 	}
890 	args.in.h.opcode = FUSE_GETATTR;
891 	args.in.h.nodeid = get_node_id(inode);
892 	args.in.numargs = 1;
893 	args.in.args[0].size = sizeof(inarg);
894 	args.in.args[0].value = &inarg;
895 	args.out.numargs = 1;
896 	args.out.args[0].size = sizeof(outarg);
897 	args.out.args[0].value = &outarg;
898 	err = fuse_simple_request(fc, &args);
899 	if (!err) {
900 		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
901 			make_bad_inode(inode);
902 			err = -EIO;
903 		} else {
904 			fuse_change_attributes(inode, &outarg.attr,
905 					       attr_timeout(&outarg),
906 					       attr_version);
907 			if (stat)
908 				fuse_fillattr(inode, &outarg.attr, stat);
909 		}
910 	}
911 	return err;
912 }
913 
914 int fuse_update_attributes(struct inode *inode, struct kstat *stat,
915 			   struct file *file, bool *refreshed)
916 {
917 	struct fuse_inode *fi = get_fuse_inode(inode);
918 	int err;
919 	bool r;
920 
921 	if (time_before64(fi->i_time, get_jiffies_64())) {
922 		r = true;
923 		forget_all_cached_acls(inode);
924 		err = fuse_do_getattr(inode, stat, file);
925 	} else {
926 		r = false;
927 		err = 0;
928 		if (stat) {
929 			generic_fillattr(inode, stat);
930 			stat->mode = fi->orig_i_mode;
931 			stat->ino = fi->orig_ino;
932 		}
933 	}
934 
935 	if (refreshed != NULL)
936 		*refreshed = r;
937 
938 	return err;
939 }
940 
941 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
942 			     u64 child_nodeid, struct qstr *name)
943 {
944 	int err = -ENOTDIR;
945 	struct inode *parent;
946 	struct dentry *dir;
947 	struct dentry *entry;
948 
949 	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
950 	if (!parent)
951 		return -ENOENT;
952 
953 	inode_lock(parent);
954 	if (!S_ISDIR(parent->i_mode))
955 		goto unlock;
956 
957 	err = -ENOENT;
958 	dir = d_find_alias(parent);
959 	if (!dir)
960 		goto unlock;
961 
962 	name->hash = full_name_hash(dir, name->name, name->len);
963 	entry = d_lookup(dir, name);
964 	dput(dir);
965 	if (!entry)
966 		goto unlock;
967 
968 	fuse_invalidate_attr(parent);
969 	fuse_invalidate_entry(entry);
970 
971 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
972 		inode_lock(d_inode(entry));
973 		if (get_node_id(d_inode(entry)) != child_nodeid) {
974 			err = -ENOENT;
975 			goto badentry;
976 		}
977 		if (d_mountpoint(entry)) {
978 			err = -EBUSY;
979 			goto badentry;
980 		}
981 		if (d_is_dir(entry)) {
982 			shrink_dcache_parent(entry);
983 			if (!simple_empty(entry)) {
984 				err = -ENOTEMPTY;
985 				goto badentry;
986 			}
987 			d_inode(entry)->i_flags |= S_DEAD;
988 		}
989 		dont_mount(entry);
990 		clear_nlink(d_inode(entry));
991 		err = 0;
992  badentry:
993 		inode_unlock(d_inode(entry));
994 		if (!err)
995 			d_delete(entry);
996 	} else {
997 		err = 0;
998 	}
999 	dput(entry);
1000 
1001  unlock:
1002 	inode_unlock(parent);
1003 	iput(parent);
1004 	return err;
1005 }
1006 
1007 /*
1008  * Calling into a user-controlled filesystem gives the filesystem
1009  * daemon ptrace-like capabilities over the current process.  This
1010  * means, that the filesystem daemon is able to record the exact
1011  * filesystem operations performed, and can also control the behavior
1012  * of the requester process in otherwise impossible ways.  For example
1013  * it can delay the operation for arbitrary length of time allowing
1014  * DoS against the requester.
1015  *
1016  * For this reason only those processes can call into the filesystem,
1017  * for which the owner of the mount has ptrace privilege.  This
1018  * excludes processes started by other users, suid or sgid processes.
1019  */
1020 int fuse_allow_current_process(struct fuse_conn *fc)
1021 {
1022 	const struct cred *cred;
1023 
1024 	if (fc->flags & FUSE_ALLOW_OTHER)
1025 		return 1;
1026 
1027 	cred = current_cred();
1028 	if (uid_eq(cred->euid, fc->user_id) &&
1029 	    uid_eq(cred->suid, fc->user_id) &&
1030 	    uid_eq(cred->uid,  fc->user_id) &&
1031 	    gid_eq(cred->egid, fc->group_id) &&
1032 	    gid_eq(cred->sgid, fc->group_id) &&
1033 	    gid_eq(cred->gid,  fc->group_id))
1034 		return 1;
1035 
1036 	return 0;
1037 }
1038 
1039 static int fuse_access(struct inode *inode, int mask)
1040 {
1041 	struct fuse_conn *fc = get_fuse_conn(inode);
1042 	FUSE_ARGS(args);
1043 	struct fuse_access_in inarg;
1044 	int err;
1045 
1046 	BUG_ON(mask & MAY_NOT_BLOCK);
1047 
1048 	if (fc->no_access)
1049 		return 0;
1050 
1051 	memset(&inarg, 0, sizeof(inarg));
1052 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1053 	args.in.h.opcode = FUSE_ACCESS;
1054 	args.in.h.nodeid = get_node_id(inode);
1055 	args.in.numargs = 1;
1056 	args.in.args[0].size = sizeof(inarg);
1057 	args.in.args[0].value = &inarg;
1058 	err = fuse_simple_request(fc, &args);
1059 	if (err == -ENOSYS) {
1060 		fc->no_access = 1;
1061 		err = 0;
1062 	}
1063 	return err;
1064 }
1065 
1066 static int fuse_perm_getattr(struct inode *inode, int mask)
1067 {
1068 	if (mask & MAY_NOT_BLOCK)
1069 		return -ECHILD;
1070 
1071 	forget_all_cached_acls(inode);
1072 	return fuse_do_getattr(inode, NULL, NULL);
1073 }
1074 
1075 /*
1076  * Check permission.  The two basic access models of FUSE are:
1077  *
1078  * 1) Local access checking ('default_permissions' mount option) based
1079  * on file mode.  This is the plain old disk filesystem permission
1080  * modell.
1081  *
1082  * 2) "Remote" access checking, where server is responsible for
1083  * checking permission in each inode operation.  An exception to this
1084  * is if ->permission() was invoked from sys_access() in which case an
1085  * access request is sent.  Execute permission is still checked
1086  * locally based on file mode.
1087  */
1088 static int fuse_permission(struct inode *inode, int mask)
1089 {
1090 	struct fuse_conn *fc = get_fuse_conn(inode);
1091 	bool refreshed = false;
1092 	int err = 0;
1093 
1094 	if (!fuse_allow_current_process(fc))
1095 		return -EACCES;
1096 
1097 	/*
1098 	 * If attributes are needed, refresh them before proceeding
1099 	 */
1100 	if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1101 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1102 		struct fuse_inode *fi = get_fuse_inode(inode);
1103 
1104 		if (time_before64(fi->i_time, get_jiffies_64())) {
1105 			refreshed = true;
1106 
1107 			err = fuse_perm_getattr(inode, mask);
1108 			if (err)
1109 				return err;
1110 		}
1111 	}
1112 
1113 	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1114 		err = generic_permission(inode, mask);
1115 
1116 		/* If permission is denied, try to refresh file
1117 		   attributes.  This is also needed, because the root
1118 		   node will at first have no permissions */
1119 		if (err == -EACCES && !refreshed) {
1120 			err = fuse_perm_getattr(inode, mask);
1121 			if (!err)
1122 				err = generic_permission(inode, mask);
1123 		}
1124 
1125 		/* Note: the opposite of the above test does not
1126 		   exist.  So if permissions are revoked this won't be
1127 		   noticed immediately, only after the attribute
1128 		   timeout has expired */
1129 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1130 		err = fuse_access(inode, mask);
1131 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1132 		if (!(inode->i_mode & S_IXUGO)) {
1133 			if (refreshed)
1134 				return -EACCES;
1135 
1136 			err = fuse_perm_getattr(inode, mask);
1137 			if (!err && !(inode->i_mode & S_IXUGO))
1138 				return -EACCES;
1139 		}
1140 	}
1141 	return err;
1142 }
1143 
1144 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1145 			 struct dir_context *ctx)
1146 {
1147 	while (nbytes >= FUSE_NAME_OFFSET) {
1148 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1149 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
1150 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1151 			return -EIO;
1152 		if (reclen > nbytes)
1153 			break;
1154 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1155 			return -EIO;
1156 
1157 		if (!dir_emit(ctx, dirent->name, dirent->namelen,
1158 			       dirent->ino, dirent->type))
1159 			break;
1160 
1161 		buf += reclen;
1162 		nbytes -= reclen;
1163 		ctx->pos = dirent->off;
1164 	}
1165 
1166 	return 0;
1167 }
1168 
1169 static int fuse_direntplus_link(struct file *file,
1170 				struct fuse_direntplus *direntplus,
1171 				u64 attr_version)
1172 {
1173 	struct fuse_entry_out *o = &direntplus->entry_out;
1174 	struct fuse_dirent *dirent = &direntplus->dirent;
1175 	struct dentry *parent = file->f_path.dentry;
1176 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1177 	struct dentry *dentry;
1178 	struct dentry *alias;
1179 	struct inode *dir = d_inode(parent);
1180 	struct fuse_conn *fc;
1181 	struct inode *inode;
1182 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1183 
1184 	if (!o->nodeid) {
1185 		/*
1186 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1187 		 * ENOENT. Instead, it only means the userspace filesystem did
1188 		 * not want to return attributes/handle for this entry.
1189 		 *
1190 		 * So do nothing.
1191 		 */
1192 		return 0;
1193 	}
1194 
1195 	if (name.name[0] == '.') {
1196 		/*
1197 		 * We could potentially refresh the attributes of the directory
1198 		 * and its parent?
1199 		 */
1200 		if (name.len == 1)
1201 			return 0;
1202 		if (name.name[1] == '.' && name.len == 2)
1203 			return 0;
1204 	}
1205 
1206 	if (invalid_nodeid(o->nodeid))
1207 		return -EIO;
1208 	if (!fuse_valid_type(o->attr.mode))
1209 		return -EIO;
1210 
1211 	fc = get_fuse_conn(dir);
1212 
1213 	name.hash = full_name_hash(parent, name.name, name.len);
1214 	dentry = d_lookup(parent, &name);
1215 	if (!dentry) {
1216 retry:
1217 		dentry = d_alloc_parallel(parent, &name, &wq);
1218 		if (IS_ERR(dentry))
1219 			return PTR_ERR(dentry);
1220 	}
1221 	if (!d_in_lookup(dentry)) {
1222 		struct fuse_inode *fi;
1223 		inode = d_inode(dentry);
1224 		if (!inode ||
1225 		    get_node_id(inode) != o->nodeid ||
1226 		    ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1227 			d_invalidate(dentry);
1228 			dput(dentry);
1229 			goto retry;
1230 		}
1231 		if (is_bad_inode(inode)) {
1232 			dput(dentry);
1233 			return -EIO;
1234 		}
1235 
1236 		fi = get_fuse_inode(inode);
1237 		spin_lock(&fc->lock);
1238 		fi->nlookup++;
1239 		spin_unlock(&fc->lock);
1240 
1241 		forget_all_cached_acls(inode);
1242 		fuse_change_attributes(inode, &o->attr,
1243 				       entry_attr_timeout(o),
1244 				       attr_version);
1245 		/*
1246 		 * The other branch comes via fuse_iget()
1247 		 * which bumps nlookup inside
1248 		 */
1249 	} else {
1250 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1251 				  &o->attr, entry_attr_timeout(o),
1252 				  attr_version);
1253 		if (!inode)
1254 			inode = ERR_PTR(-ENOMEM);
1255 
1256 		alias = d_splice_alias(inode, dentry);
1257 		d_lookup_done(dentry);
1258 		if (alias) {
1259 			dput(dentry);
1260 			dentry = alias;
1261 		}
1262 		if (IS_ERR(dentry))
1263 			return PTR_ERR(dentry);
1264 	}
1265 	if (fc->readdirplus_auto)
1266 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1267 	fuse_change_entry_timeout(dentry, o);
1268 
1269 	dput(dentry);
1270 	return 0;
1271 }
1272 
1273 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1274 			     struct dir_context *ctx, u64 attr_version)
1275 {
1276 	struct fuse_direntplus *direntplus;
1277 	struct fuse_dirent *dirent;
1278 	size_t reclen;
1279 	int over = 0;
1280 	int ret;
1281 
1282 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1283 		direntplus = (struct fuse_direntplus *) buf;
1284 		dirent = &direntplus->dirent;
1285 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1286 
1287 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1288 			return -EIO;
1289 		if (reclen > nbytes)
1290 			break;
1291 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1292 			return -EIO;
1293 
1294 		if (!over) {
1295 			/* We fill entries into dstbuf only as much as
1296 			   it can hold. But we still continue iterating
1297 			   over remaining entries to link them. If not,
1298 			   we need to send a FORGET for each of those
1299 			   which we did not link.
1300 			*/
1301 			over = !dir_emit(ctx, dirent->name, dirent->namelen,
1302 				       dirent->ino, dirent->type);
1303 			ctx->pos = dirent->off;
1304 		}
1305 
1306 		buf += reclen;
1307 		nbytes -= reclen;
1308 
1309 		ret = fuse_direntplus_link(file, direntplus, attr_version);
1310 		if (ret)
1311 			fuse_force_forget(file, direntplus->entry_out.nodeid);
1312 	}
1313 
1314 	return 0;
1315 }
1316 
1317 static int fuse_readdir(struct file *file, struct dir_context *ctx)
1318 {
1319 	int plus, err;
1320 	size_t nbytes;
1321 	struct page *page;
1322 	struct inode *inode = file_inode(file);
1323 	struct fuse_conn *fc = get_fuse_conn(inode);
1324 	struct fuse_req *req;
1325 	u64 attr_version = 0;
1326 
1327 	if (is_bad_inode(inode))
1328 		return -EIO;
1329 
1330 	req = fuse_get_req(fc, 1);
1331 	if (IS_ERR(req))
1332 		return PTR_ERR(req);
1333 
1334 	page = alloc_page(GFP_KERNEL);
1335 	if (!page) {
1336 		fuse_put_request(fc, req);
1337 		return -ENOMEM;
1338 	}
1339 
1340 	plus = fuse_use_readdirplus(inode, ctx);
1341 	req->out.argpages = 1;
1342 	req->num_pages = 1;
1343 	req->pages[0] = page;
1344 	req->page_descs[0].length = PAGE_SIZE;
1345 	if (plus) {
1346 		attr_version = fuse_get_attr_version(fc);
1347 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1348 			       FUSE_READDIRPLUS);
1349 	} else {
1350 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1351 			       FUSE_READDIR);
1352 	}
1353 	fuse_lock_inode(inode);
1354 	fuse_request_send(fc, req);
1355 	fuse_unlock_inode(inode);
1356 	nbytes = req->out.args[0].size;
1357 	err = req->out.h.error;
1358 	fuse_put_request(fc, req);
1359 	if (!err) {
1360 		if (plus) {
1361 			err = parse_dirplusfile(page_address(page), nbytes,
1362 						file, ctx,
1363 						attr_version);
1364 		} else {
1365 			err = parse_dirfile(page_address(page), nbytes, file,
1366 					    ctx);
1367 		}
1368 	}
1369 
1370 	__free_page(page);
1371 	fuse_invalidate_atime(inode);
1372 	return err;
1373 }
1374 
1375 static const char *fuse_get_link(struct dentry *dentry,
1376 				 struct inode *inode,
1377 				 struct delayed_call *done)
1378 {
1379 	struct fuse_conn *fc = get_fuse_conn(inode);
1380 	FUSE_ARGS(args);
1381 	char *link;
1382 	ssize_t ret;
1383 
1384 	if (!dentry)
1385 		return ERR_PTR(-ECHILD);
1386 
1387 	link = kmalloc(PAGE_SIZE, GFP_KERNEL);
1388 	if (!link)
1389 		return ERR_PTR(-ENOMEM);
1390 
1391 	args.in.h.opcode = FUSE_READLINK;
1392 	args.in.h.nodeid = get_node_id(inode);
1393 	args.out.argvar = 1;
1394 	args.out.numargs = 1;
1395 	args.out.args[0].size = PAGE_SIZE - 1;
1396 	args.out.args[0].value = link;
1397 	ret = fuse_simple_request(fc, &args);
1398 	if (ret < 0) {
1399 		kfree(link);
1400 		link = ERR_PTR(ret);
1401 	} else {
1402 		link[ret] = '\0';
1403 		set_delayed_call(done, kfree_link, link);
1404 	}
1405 	fuse_invalidate_atime(inode);
1406 	return link;
1407 }
1408 
1409 static int fuse_dir_open(struct inode *inode, struct file *file)
1410 {
1411 	return fuse_open_common(inode, file, true);
1412 }
1413 
1414 static int fuse_dir_release(struct inode *inode, struct file *file)
1415 {
1416 	fuse_release_common(file, FUSE_RELEASEDIR);
1417 
1418 	return 0;
1419 }
1420 
1421 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1422 			  int datasync)
1423 {
1424 	return fuse_fsync_common(file, start, end, datasync, 1);
1425 }
1426 
1427 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1428 			    unsigned long arg)
1429 {
1430 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1431 
1432 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1433 	if (fc->minor < 18)
1434 		return -ENOTTY;
1435 
1436 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1437 }
1438 
1439 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1440 				   unsigned long arg)
1441 {
1442 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1443 
1444 	if (fc->minor < 18)
1445 		return -ENOTTY;
1446 
1447 	return fuse_ioctl_common(file, cmd, arg,
1448 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1449 }
1450 
1451 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1452 {
1453 	/* Always update if mtime is explicitly set  */
1454 	if (ivalid & ATTR_MTIME_SET)
1455 		return true;
1456 
1457 	/* Or if kernel i_mtime is the official one */
1458 	if (trust_local_mtime)
1459 		return true;
1460 
1461 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1462 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1463 		return false;
1464 
1465 	/* In all other cases update */
1466 	return true;
1467 }
1468 
1469 static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1470 			   bool trust_local_cmtime)
1471 {
1472 	unsigned ivalid = iattr->ia_valid;
1473 
1474 	if (ivalid & ATTR_MODE)
1475 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1476 	if (ivalid & ATTR_UID)
1477 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1478 	if (ivalid & ATTR_GID)
1479 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1480 	if (ivalid & ATTR_SIZE)
1481 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1482 	if (ivalid & ATTR_ATIME) {
1483 		arg->valid |= FATTR_ATIME;
1484 		arg->atime = iattr->ia_atime.tv_sec;
1485 		arg->atimensec = iattr->ia_atime.tv_nsec;
1486 		if (!(ivalid & ATTR_ATIME_SET))
1487 			arg->valid |= FATTR_ATIME_NOW;
1488 	}
1489 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1490 		arg->valid |= FATTR_MTIME;
1491 		arg->mtime = iattr->ia_mtime.tv_sec;
1492 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1493 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1494 			arg->valid |= FATTR_MTIME_NOW;
1495 	}
1496 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1497 		arg->valid |= FATTR_CTIME;
1498 		arg->ctime = iattr->ia_ctime.tv_sec;
1499 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1500 	}
1501 }
1502 
1503 /*
1504  * Prevent concurrent writepages on inode
1505  *
1506  * This is done by adding a negative bias to the inode write counter
1507  * and waiting for all pending writes to finish.
1508  */
1509 void fuse_set_nowrite(struct inode *inode)
1510 {
1511 	struct fuse_conn *fc = get_fuse_conn(inode);
1512 	struct fuse_inode *fi = get_fuse_inode(inode);
1513 
1514 	BUG_ON(!inode_is_locked(inode));
1515 
1516 	spin_lock(&fc->lock);
1517 	BUG_ON(fi->writectr < 0);
1518 	fi->writectr += FUSE_NOWRITE;
1519 	spin_unlock(&fc->lock);
1520 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1521 }
1522 
1523 /*
1524  * Allow writepages on inode
1525  *
1526  * Remove the bias from the writecounter and send any queued
1527  * writepages.
1528  */
1529 static void __fuse_release_nowrite(struct inode *inode)
1530 {
1531 	struct fuse_inode *fi = get_fuse_inode(inode);
1532 
1533 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1534 	fi->writectr = 0;
1535 	fuse_flush_writepages(inode);
1536 }
1537 
1538 void fuse_release_nowrite(struct inode *inode)
1539 {
1540 	struct fuse_conn *fc = get_fuse_conn(inode);
1541 
1542 	spin_lock(&fc->lock);
1543 	__fuse_release_nowrite(inode);
1544 	spin_unlock(&fc->lock);
1545 }
1546 
1547 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1548 			      struct inode *inode,
1549 			      struct fuse_setattr_in *inarg_p,
1550 			      struct fuse_attr_out *outarg_p)
1551 {
1552 	args->in.h.opcode = FUSE_SETATTR;
1553 	args->in.h.nodeid = get_node_id(inode);
1554 	args->in.numargs = 1;
1555 	args->in.args[0].size = sizeof(*inarg_p);
1556 	args->in.args[0].value = inarg_p;
1557 	args->out.numargs = 1;
1558 	args->out.args[0].size = sizeof(*outarg_p);
1559 	args->out.args[0].value = outarg_p;
1560 }
1561 
1562 /*
1563  * Flush inode->i_mtime to the server
1564  */
1565 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1566 {
1567 	struct fuse_conn *fc = get_fuse_conn(inode);
1568 	FUSE_ARGS(args);
1569 	struct fuse_setattr_in inarg;
1570 	struct fuse_attr_out outarg;
1571 
1572 	memset(&inarg, 0, sizeof(inarg));
1573 	memset(&outarg, 0, sizeof(outarg));
1574 
1575 	inarg.valid = FATTR_MTIME;
1576 	inarg.mtime = inode->i_mtime.tv_sec;
1577 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1578 	if (fc->minor >= 23) {
1579 		inarg.valid |= FATTR_CTIME;
1580 		inarg.ctime = inode->i_ctime.tv_sec;
1581 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1582 	}
1583 	if (ff) {
1584 		inarg.valid |= FATTR_FH;
1585 		inarg.fh = ff->fh;
1586 	}
1587 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1588 
1589 	return fuse_simple_request(fc, &args);
1590 }
1591 
1592 /*
1593  * Set attributes, and at the same time refresh them.
1594  *
1595  * Truncation is slightly complicated, because the 'truncate' request
1596  * may fail, in which case we don't want to touch the mapping.
1597  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1598  * and the actual truncation by hand.
1599  */
1600 int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1601 		    struct file *file)
1602 {
1603 	struct fuse_conn *fc = get_fuse_conn(inode);
1604 	struct fuse_inode *fi = get_fuse_inode(inode);
1605 	FUSE_ARGS(args);
1606 	struct fuse_setattr_in inarg;
1607 	struct fuse_attr_out outarg;
1608 	bool is_truncate = false;
1609 	bool is_wb = fc->writeback_cache;
1610 	loff_t oldsize;
1611 	int err;
1612 	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1613 
1614 	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1615 		attr->ia_valid |= ATTR_FORCE;
1616 
1617 	err = inode_change_ok(inode, attr);
1618 	if (err)
1619 		return err;
1620 
1621 	if (attr->ia_valid & ATTR_OPEN) {
1622 		if (fc->atomic_o_trunc)
1623 			return 0;
1624 		file = NULL;
1625 	}
1626 
1627 	if (attr->ia_valid & ATTR_SIZE)
1628 		is_truncate = true;
1629 
1630 	if (is_truncate) {
1631 		fuse_set_nowrite(inode);
1632 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1633 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1634 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1635 	}
1636 
1637 	memset(&inarg, 0, sizeof(inarg));
1638 	memset(&outarg, 0, sizeof(outarg));
1639 	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
1640 	if (file) {
1641 		struct fuse_file *ff = file->private_data;
1642 		inarg.valid |= FATTR_FH;
1643 		inarg.fh = ff->fh;
1644 	}
1645 	if (attr->ia_valid & ATTR_SIZE) {
1646 		/* For mandatory locking in truncate */
1647 		inarg.valid |= FATTR_LOCKOWNER;
1648 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1649 	}
1650 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1651 	err = fuse_simple_request(fc, &args);
1652 	if (err) {
1653 		if (err == -EINTR)
1654 			fuse_invalidate_attr(inode);
1655 		goto error;
1656 	}
1657 
1658 	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1659 		make_bad_inode(inode);
1660 		err = -EIO;
1661 		goto error;
1662 	}
1663 
1664 	spin_lock(&fc->lock);
1665 	/* the kernel maintains i_mtime locally */
1666 	if (trust_local_cmtime) {
1667 		if (attr->ia_valid & ATTR_MTIME)
1668 			inode->i_mtime = attr->ia_mtime;
1669 		if (attr->ia_valid & ATTR_CTIME)
1670 			inode->i_ctime = attr->ia_ctime;
1671 		/* FIXME: clear I_DIRTY_SYNC? */
1672 	}
1673 
1674 	fuse_change_attributes_common(inode, &outarg.attr,
1675 				      attr_timeout(&outarg));
1676 	oldsize = inode->i_size;
1677 	/* see the comment in fuse_change_attributes() */
1678 	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1679 		i_size_write(inode, outarg.attr.size);
1680 
1681 	if (is_truncate) {
1682 		/* NOTE: this may release/reacquire fc->lock */
1683 		__fuse_release_nowrite(inode);
1684 	}
1685 	spin_unlock(&fc->lock);
1686 
1687 	/*
1688 	 * Only call invalidate_inode_pages2() after removing
1689 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1690 	 */
1691 	if ((is_truncate || !is_wb) &&
1692 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1693 		truncate_pagecache(inode, outarg.attr.size);
1694 		invalidate_inode_pages2(inode->i_mapping);
1695 	}
1696 
1697 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1698 	return 0;
1699 
1700 error:
1701 	if (is_truncate)
1702 		fuse_release_nowrite(inode);
1703 
1704 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1705 	return err;
1706 }
1707 
1708 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1709 {
1710 	struct inode *inode = d_inode(entry);
1711 	struct fuse_conn *fc = get_fuse_conn(inode);
1712 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1713 	int ret;
1714 
1715 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1716 		return -EACCES;
1717 
1718 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1719 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1720 				    ATTR_MODE);
1721 
1722 		/*
1723 		 * The only sane way to reliably kill suid/sgid is to do it in
1724 		 * the userspace filesystem
1725 		 *
1726 		 * This should be done on write(), truncate() and chown().
1727 		 */
1728 		if (!fc->handle_killpriv) {
1729 			int kill;
1730 
1731 			/*
1732 			 * ia_mode calculation may have used stale i_mode.
1733 			 * Refresh and recalculate.
1734 			 */
1735 			ret = fuse_do_getattr(inode, NULL, file);
1736 			if (ret)
1737 				return ret;
1738 
1739 			attr->ia_mode = inode->i_mode;
1740 			kill = should_remove_suid(entry);
1741 			if (kill & ATTR_KILL_SUID) {
1742 				attr->ia_valid |= ATTR_MODE;
1743 				attr->ia_mode &= ~S_ISUID;
1744 			}
1745 			if (kill & ATTR_KILL_SGID) {
1746 				attr->ia_valid |= ATTR_MODE;
1747 				attr->ia_mode &= ~S_ISGID;
1748 			}
1749 		}
1750 	}
1751 	if (!attr->ia_valid)
1752 		return 0;
1753 
1754 	ret = fuse_do_setattr(inode, attr, file);
1755 	if (!ret) {
1756 		/*
1757 		 * If filesystem supports acls it may have updated acl xattrs in
1758 		 * the filesystem, so forget cached acls for the inode.
1759 		 */
1760 		if (fc->posix_acl)
1761 			forget_all_cached_acls(inode);
1762 
1763 		/* Directory mode changed, may need to revalidate access */
1764 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1765 			fuse_invalidate_entry_cache(entry);
1766 	}
1767 	return ret;
1768 }
1769 
1770 static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1771 			struct kstat *stat)
1772 {
1773 	struct inode *inode = d_inode(entry);
1774 	struct fuse_conn *fc = get_fuse_conn(inode);
1775 
1776 	if (!fuse_allow_current_process(fc))
1777 		return -EACCES;
1778 
1779 	return fuse_update_attributes(inode, stat, NULL, NULL);
1780 }
1781 
1782 static const struct inode_operations fuse_dir_inode_operations = {
1783 	.lookup		= fuse_lookup,
1784 	.mkdir		= fuse_mkdir,
1785 	.symlink	= fuse_symlink,
1786 	.unlink		= fuse_unlink,
1787 	.rmdir		= fuse_rmdir,
1788 	.rename2	= fuse_rename2,
1789 	.link		= fuse_link,
1790 	.setattr	= fuse_setattr,
1791 	.create		= fuse_create,
1792 	.atomic_open	= fuse_atomic_open,
1793 	.mknod		= fuse_mknod,
1794 	.permission	= fuse_permission,
1795 	.getattr	= fuse_getattr,
1796 	.setxattr	= generic_setxattr,
1797 	.getxattr	= generic_getxattr,
1798 	.listxattr	= fuse_listxattr,
1799 	.removexattr	= generic_removexattr,
1800 	.get_acl	= fuse_get_acl,
1801 	.set_acl	= fuse_set_acl,
1802 };
1803 
1804 static const struct file_operations fuse_dir_operations = {
1805 	.llseek		= generic_file_llseek,
1806 	.read		= generic_read_dir,
1807 	.iterate_shared	= fuse_readdir,
1808 	.open		= fuse_dir_open,
1809 	.release	= fuse_dir_release,
1810 	.fsync		= fuse_dir_fsync,
1811 	.unlocked_ioctl	= fuse_dir_ioctl,
1812 	.compat_ioctl	= fuse_dir_compat_ioctl,
1813 };
1814 
1815 static const struct inode_operations fuse_common_inode_operations = {
1816 	.setattr	= fuse_setattr,
1817 	.permission	= fuse_permission,
1818 	.getattr	= fuse_getattr,
1819 	.setxattr	= generic_setxattr,
1820 	.getxattr	= generic_getxattr,
1821 	.listxattr	= fuse_listxattr,
1822 	.removexattr	= generic_removexattr,
1823 	.get_acl	= fuse_get_acl,
1824 	.set_acl	= fuse_set_acl,
1825 };
1826 
1827 static const struct inode_operations fuse_symlink_inode_operations = {
1828 	.setattr	= fuse_setattr,
1829 	.get_link	= fuse_get_link,
1830 	.readlink	= generic_readlink,
1831 	.getattr	= fuse_getattr,
1832 	.setxattr	= generic_setxattr,
1833 	.getxattr	= generic_getxattr,
1834 	.listxattr	= fuse_listxattr,
1835 	.removexattr	= generic_removexattr,
1836 };
1837 
1838 void fuse_init_common(struct inode *inode)
1839 {
1840 	inode->i_op = &fuse_common_inode_operations;
1841 }
1842 
1843 void fuse_init_dir(struct inode *inode)
1844 {
1845 	inode->i_op = &fuse_dir_inode_operations;
1846 	inode->i_fop = &fuse_dir_operations;
1847 }
1848 
1849 void fuse_init_symlink(struct inode *inode)
1850 {
1851 	inode->i_op = &fuse_symlink_inode_operations;
1852 }
1853