xref: /openbmc/linux/fs/fuse/dir.c (revision 4f205687)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/sched.h>
14 #include <linux/namei.h>
15 #include <linux/slab.h>
16 
17 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
18 {
19 	struct fuse_conn *fc = get_fuse_conn(dir);
20 	struct fuse_inode *fi = get_fuse_inode(dir);
21 
22 	if (!fc->do_readdirplus)
23 		return false;
24 	if (!fc->readdirplus_auto)
25 		return true;
26 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27 		return true;
28 	if (ctx->pos == 0)
29 		return true;
30 	return false;
31 }
32 
33 static void fuse_advise_use_readdirplus(struct inode *dir)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(dir);
36 
37 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38 }
39 
40 #if BITS_PER_LONG >= 64
41 static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
42 {
43 	entry->d_time = time;
44 }
45 
46 static inline u64 fuse_dentry_time(struct dentry *entry)
47 {
48 	return entry->d_time;
49 }
50 #else
51 /*
52  * On 32 bit archs store the high 32 bits of time in d_fsdata
53  */
54 static void fuse_dentry_settime(struct dentry *entry, u64 time)
55 {
56 	entry->d_time = time;
57 	entry->d_fsdata = (void *) (unsigned long) (time >> 32);
58 }
59 
60 static u64 fuse_dentry_time(struct dentry *entry)
61 {
62 	return (u64) entry->d_time +
63 		((u64) (unsigned long) entry->d_fsdata << 32);
64 }
65 #endif
66 
67 /*
68  * FUSE caches dentries and attributes with separate timeout.  The
69  * time in jiffies until the dentry/attributes are valid is stored in
70  * dentry->d_time and fuse_inode->i_time respectively.
71  */
72 
73 /*
74  * Calculate the time in jiffies until a dentry/attributes are valid
75  */
76 static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
77 {
78 	if (sec || nsec) {
79 		struct timespec ts = {sec, nsec};
80 		return get_jiffies_64() + timespec_to_jiffies(&ts);
81 	} else
82 		return 0;
83 }
84 
85 /*
86  * Set dentry and possibly attribute timeouts from the lookup/mk*
87  * replies
88  */
89 static void fuse_change_entry_timeout(struct dentry *entry,
90 				      struct fuse_entry_out *o)
91 {
92 	fuse_dentry_settime(entry,
93 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
94 }
95 
96 static u64 attr_timeout(struct fuse_attr_out *o)
97 {
98 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
99 }
100 
101 static u64 entry_attr_timeout(struct fuse_entry_out *o)
102 {
103 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
104 }
105 
106 /*
107  * Mark the attributes as stale, so that at the next call to
108  * ->getattr() they will be fetched from userspace
109  */
110 void fuse_invalidate_attr(struct inode *inode)
111 {
112 	get_fuse_inode(inode)->i_time = 0;
113 }
114 
115 /**
116  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
117  * atime is not used.
118  */
119 void fuse_invalidate_atime(struct inode *inode)
120 {
121 	if (!IS_RDONLY(inode))
122 		fuse_invalidate_attr(inode);
123 }
124 
125 /*
126  * Just mark the entry as stale, so that a next attempt to look it up
127  * will result in a new lookup call to userspace
128  *
129  * This is called when a dentry is about to become negative and the
130  * timeout is unknown (unlink, rmdir, rename and in some cases
131  * lookup)
132  */
133 void fuse_invalidate_entry_cache(struct dentry *entry)
134 {
135 	fuse_dentry_settime(entry, 0);
136 }
137 
138 /*
139  * Same as fuse_invalidate_entry_cache(), but also try to remove the
140  * dentry from the hash
141  */
142 static void fuse_invalidate_entry(struct dentry *entry)
143 {
144 	d_invalidate(entry);
145 	fuse_invalidate_entry_cache(entry);
146 }
147 
148 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
149 			     u64 nodeid, struct qstr *name,
150 			     struct fuse_entry_out *outarg)
151 {
152 	memset(outarg, 0, sizeof(struct fuse_entry_out));
153 	args->in.h.opcode = FUSE_LOOKUP;
154 	args->in.h.nodeid = nodeid;
155 	args->in.numargs = 1;
156 	args->in.args[0].size = name->len + 1;
157 	args->in.args[0].value = name->name;
158 	args->out.numargs = 1;
159 	args->out.args[0].size = sizeof(struct fuse_entry_out);
160 	args->out.args[0].value = outarg;
161 }
162 
163 u64 fuse_get_attr_version(struct fuse_conn *fc)
164 {
165 	u64 curr_version;
166 
167 	/*
168 	 * The spin lock isn't actually needed on 64bit archs, but we
169 	 * don't yet care too much about such optimizations.
170 	 */
171 	spin_lock(&fc->lock);
172 	curr_version = fc->attr_version;
173 	spin_unlock(&fc->lock);
174 
175 	return curr_version;
176 }
177 
178 /*
179  * Check whether the dentry is still valid
180  *
181  * If the entry validity timeout has expired and the dentry is
182  * positive, try to redo the lookup.  If the lookup results in a
183  * different inode, then let the VFS invalidate the dentry and redo
184  * the lookup once more.  If the lookup results in the same inode,
185  * then refresh the attributes, timeouts and mark the dentry valid.
186  */
187 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
188 {
189 	struct inode *inode;
190 	struct dentry *parent;
191 	struct fuse_conn *fc;
192 	struct fuse_inode *fi;
193 	int ret;
194 
195 	inode = d_inode_rcu(entry);
196 	if (inode && is_bad_inode(inode))
197 		goto invalid;
198 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
199 		 (flags & LOOKUP_REVAL)) {
200 		struct fuse_entry_out outarg;
201 		FUSE_ARGS(args);
202 		struct fuse_forget_link *forget;
203 		u64 attr_version;
204 
205 		/* For negative dentries, always do a fresh lookup */
206 		if (!inode)
207 			goto invalid;
208 
209 		ret = -ECHILD;
210 		if (flags & LOOKUP_RCU)
211 			goto out;
212 
213 		fc = get_fuse_conn(inode);
214 
215 		forget = fuse_alloc_forget();
216 		ret = -ENOMEM;
217 		if (!forget)
218 			goto out;
219 
220 		attr_version = fuse_get_attr_version(fc);
221 
222 		parent = dget_parent(entry);
223 		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
224 				 &entry->d_name, &outarg);
225 		ret = fuse_simple_request(fc, &args);
226 		dput(parent);
227 		/* Zero nodeid is same as -ENOENT */
228 		if (!ret && !outarg.nodeid)
229 			ret = -ENOENT;
230 		if (!ret) {
231 			fi = get_fuse_inode(inode);
232 			if (outarg.nodeid != get_node_id(inode)) {
233 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
234 				goto invalid;
235 			}
236 			spin_lock(&fc->lock);
237 			fi->nlookup++;
238 			spin_unlock(&fc->lock);
239 		}
240 		kfree(forget);
241 		if (ret == -ENOMEM)
242 			goto out;
243 		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
244 			goto invalid;
245 
246 		fuse_change_attributes(inode, &outarg.attr,
247 				       entry_attr_timeout(&outarg),
248 				       attr_version);
249 		fuse_change_entry_timeout(entry, &outarg);
250 	} else if (inode) {
251 		fi = get_fuse_inode(inode);
252 		if (flags & LOOKUP_RCU) {
253 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
254 				return -ECHILD;
255 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
256 			parent = dget_parent(entry);
257 			fuse_advise_use_readdirplus(d_inode(parent));
258 			dput(parent);
259 		}
260 	}
261 	ret = 1;
262 out:
263 	return ret;
264 
265 invalid:
266 	ret = 0;
267 	goto out;
268 }
269 
270 static int invalid_nodeid(u64 nodeid)
271 {
272 	return !nodeid || nodeid == FUSE_ROOT_ID;
273 }
274 
275 const struct dentry_operations fuse_dentry_operations = {
276 	.d_revalidate	= fuse_dentry_revalidate,
277 };
278 
279 int fuse_valid_type(int m)
280 {
281 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
282 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
283 }
284 
285 int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
286 		     struct fuse_entry_out *outarg, struct inode **inode)
287 {
288 	struct fuse_conn *fc = get_fuse_conn_super(sb);
289 	FUSE_ARGS(args);
290 	struct fuse_forget_link *forget;
291 	u64 attr_version;
292 	int err;
293 
294 	*inode = NULL;
295 	err = -ENAMETOOLONG;
296 	if (name->len > FUSE_NAME_MAX)
297 		goto out;
298 
299 
300 	forget = fuse_alloc_forget();
301 	err = -ENOMEM;
302 	if (!forget)
303 		goto out;
304 
305 	attr_version = fuse_get_attr_version(fc);
306 
307 	fuse_lookup_init(fc, &args, nodeid, name, outarg);
308 	err = fuse_simple_request(fc, &args);
309 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
310 	if (err || !outarg->nodeid)
311 		goto out_put_forget;
312 
313 	err = -EIO;
314 	if (!outarg->nodeid)
315 		goto out_put_forget;
316 	if (!fuse_valid_type(outarg->attr.mode))
317 		goto out_put_forget;
318 
319 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
320 			   &outarg->attr, entry_attr_timeout(outarg),
321 			   attr_version);
322 	err = -ENOMEM;
323 	if (!*inode) {
324 		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
325 		goto out;
326 	}
327 	err = 0;
328 
329  out_put_forget:
330 	kfree(forget);
331  out:
332 	return err;
333 }
334 
335 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
336 				  unsigned int flags)
337 {
338 	int err;
339 	struct fuse_entry_out outarg;
340 	struct inode *inode;
341 	struct dentry *newent;
342 	bool outarg_valid = true;
343 
344 	fuse_lock_inode(dir);
345 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
346 			       &outarg, &inode);
347 	fuse_unlock_inode(dir);
348 	if (err == -ENOENT) {
349 		outarg_valid = false;
350 		err = 0;
351 	}
352 	if (err)
353 		goto out_err;
354 
355 	err = -EIO;
356 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
357 		goto out_iput;
358 
359 	newent = d_splice_alias(inode, entry);
360 	err = PTR_ERR(newent);
361 	if (IS_ERR(newent))
362 		goto out_err;
363 
364 	entry = newent ? newent : entry;
365 	if (outarg_valid)
366 		fuse_change_entry_timeout(entry, &outarg);
367 	else
368 		fuse_invalidate_entry_cache(entry);
369 
370 	fuse_advise_use_readdirplus(dir);
371 	return newent;
372 
373  out_iput:
374 	iput(inode);
375  out_err:
376 	return ERR_PTR(err);
377 }
378 
379 /*
380  * Atomic create+open operation
381  *
382  * If the filesystem doesn't support this, then fall back to separate
383  * 'mknod' + 'open' requests.
384  */
385 static int fuse_create_open(struct inode *dir, struct dentry *entry,
386 			    struct file *file, unsigned flags,
387 			    umode_t mode, int *opened)
388 {
389 	int err;
390 	struct inode *inode;
391 	struct fuse_conn *fc = get_fuse_conn(dir);
392 	FUSE_ARGS(args);
393 	struct fuse_forget_link *forget;
394 	struct fuse_create_in inarg;
395 	struct fuse_open_out outopen;
396 	struct fuse_entry_out outentry;
397 	struct fuse_file *ff;
398 
399 	/* Userspace expects S_IFREG in create mode */
400 	BUG_ON((mode & S_IFMT) != S_IFREG);
401 
402 	forget = fuse_alloc_forget();
403 	err = -ENOMEM;
404 	if (!forget)
405 		goto out_err;
406 
407 	err = -ENOMEM;
408 	ff = fuse_file_alloc(fc);
409 	if (!ff)
410 		goto out_put_forget_req;
411 
412 	if (!fc->dont_mask)
413 		mode &= ~current_umask();
414 
415 	flags &= ~O_NOCTTY;
416 	memset(&inarg, 0, sizeof(inarg));
417 	memset(&outentry, 0, sizeof(outentry));
418 	inarg.flags = flags;
419 	inarg.mode = mode;
420 	inarg.umask = current_umask();
421 	args.in.h.opcode = FUSE_CREATE;
422 	args.in.h.nodeid = get_node_id(dir);
423 	args.in.numargs = 2;
424 	args.in.args[0].size = sizeof(inarg);
425 	args.in.args[0].value = &inarg;
426 	args.in.args[1].size = entry->d_name.len + 1;
427 	args.in.args[1].value = entry->d_name.name;
428 	args.out.numargs = 2;
429 	args.out.args[0].size = sizeof(outentry);
430 	args.out.args[0].value = &outentry;
431 	args.out.args[1].size = sizeof(outopen);
432 	args.out.args[1].value = &outopen;
433 	err = fuse_simple_request(fc, &args);
434 	if (err)
435 		goto out_free_ff;
436 
437 	err = -EIO;
438 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
439 		goto out_free_ff;
440 
441 	ff->fh = outopen.fh;
442 	ff->nodeid = outentry.nodeid;
443 	ff->open_flags = outopen.open_flags;
444 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
445 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
446 	if (!inode) {
447 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
448 		fuse_sync_release(ff, flags);
449 		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
450 		err = -ENOMEM;
451 		goto out_err;
452 	}
453 	kfree(forget);
454 	d_instantiate(entry, inode);
455 	fuse_change_entry_timeout(entry, &outentry);
456 	fuse_invalidate_attr(dir);
457 	err = finish_open(file, entry, generic_file_open, opened);
458 	if (err) {
459 		fuse_sync_release(ff, flags);
460 	} else {
461 		file->private_data = fuse_file_get(ff);
462 		fuse_finish_open(inode, file);
463 	}
464 	return err;
465 
466 out_free_ff:
467 	fuse_file_free(ff);
468 out_put_forget_req:
469 	kfree(forget);
470 out_err:
471 	return err;
472 }
473 
474 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
475 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
476 			    struct file *file, unsigned flags,
477 			    umode_t mode, int *opened)
478 {
479 	int err;
480 	struct fuse_conn *fc = get_fuse_conn(dir);
481 	struct dentry *res = NULL;
482 
483 	if (d_in_lookup(entry)) {
484 		res = fuse_lookup(dir, entry, 0);
485 		if (IS_ERR(res))
486 			return PTR_ERR(res);
487 
488 		if (res)
489 			entry = res;
490 	}
491 
492 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
493 		goto no_open;
494 
495 	/* Only creates */
496 	*opened |= FILE_CREATED;
497 
498 	if (fc->no_create)
499 		goto mknod;
500 
501 	err = fuse_create_open(dir, entry, file, flags, mode, opened);
502 	if (err == -ENOSYS) {
503 		fc->no_create = 1;
504 		goto mknod;
505 	}
506 out_dput:
507 	dput(res);
508 	return err;
509 
510 mknod:
511 	err = fuse_mknod(dir, entry, mode, 0);
512 	if (err)
513 		goto out_dput;
514 no_open:
515 	return finish_no_open(file, res);
516 }
517 
518 /*
519  * Code shared between mknod, mkdir, symlink and link
520  */
521 static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
522 			    struct inode *dir, struct dentry *entry,
523 			    umode_t mode)
524 {
525 	struct fuse_entry_out outarg;
526 	struct inode *inode;
527 	int err;
528 	struct fuse_forget_link *forget;
529 
530 	forget = fuse_alloc_forget();
531 	if (!forget)
532 		return -ENOMEM;
533 
534 	memset(&outarg, 0, sizeof(outarg));
535 	args->in.h.nodeid = get_node_id(dir);
536 	args->out.numargs = 1;
537 	args->out.args[0].size = sizeof(outarg);
538 	args->out.args[0].value = &outarg;
539 	err = fuse_simple_request(fc, args);
540 	if (err)
541 		goto out_put_forget_req;
542 
543 	err = -EIO;
544 	if (invalid_nodeid(outarg.nodeid))
545 		goto out_put_forget_req;
546 
547 	if ((outarg.attr.mode ^ mode) & S_IFMT)
548 		goto out_put_forget_req;
549 
550 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
551 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
552 	if (!inode) {
553 		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
554 		return -ENOMEM;
555 	}
556 	kfree(forget);
557 
558 	err = d_instantiate_no_diralias(entry, inode);
559 	if (err)
560 		return err;
561 
562 	fuse_change_entry_timeout(entry, &outarg);
563 	fuse_invalidate_attr(dir);
564 	return 0;
565 
566  out_put_forget_req:
567 	kfree(forget);
568 	return err;
569 }
570 
571 static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
572 		      dev_t rdev)
573 {
574 	struct fuse_mknod_in inarg;
575 	struct fuse_conn *fc = get_fuse_conn(dir);
576 	FUSE_ARGS(args);
577 
578 	if (!fc->dont_mask)
579 		mode &= ~current_umask();
580 
581 	memset(&inarg, 0, sizeof(inarg));
582 	inarg.mode = mode;
583 	inarg.rdev = new_encode_dev(rdev);
584 	inarg.umask = current_umask();
585 	args.in.h.opcode = FUSE_MKNOD;
586 	args.in.numargs = 2;
587 	args.in.args[0].size = sizeof(inarg);
588 	args.in.args[0].value = &inarg;
589 	args.in.args[1].size = entry->d_name.len + 1;
590 	args.in.args[1].value = entry->d_name.name;
591 	return create_new_entry(fc, &args, dir, entry, mode);
592 }
593 
594 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
595 		       bool excl)
596 {
597 	return fuse_mknod(dir, entry, mode, 0);
598 }
599 
600 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
601 {
602 	struct fuse_mkdir_in inarg;
603 	struct fuse_conn *fc = get_fuse_conn(dir);
604 	FUSE_ARGS(args);
605 
606 	if (!fc->dont_mask)
607 		mode &= ~current_umask();
608 
609 	memset(&inarg, 0, sizeof(inarg));
610 	inarg.mode = mode;
611 	inarg.umask = current_umask();
612 	args.in.h.opcode = FUSE_MKDIR;
613 	args.in.numargs = 2;
614 	args.in.args[0].size = sizeof(inarg);
615 	args.in.args[0].value = &inarg;
616 	args.in.args[1].size = entry->d_name.len + 1;
617 	args.in.args[1].value = entry->d_name.name;
618 	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
619 }
620 
621 static int fuse_symlink(struct inode *dir, struct dentry *entry,
622 			const char *link)
623 {
624 	struct fuse_conn *fc = get_fuse_conn(dir);
625 	unsigned len = strlen(link) + 1;
626 	FUSE_ARGS(args);
627 
628 	args.in.h.opcode = FUSE_SYMLINK;
629 	args.in.numargs = 2;
630 	args.in.args[0].size = entry->d_name.len + 1;
631 	args.in.args[0].value = entry->d_name.name;
632 	args.in.args[1].size = len;
633 	args.in.args[1].value = link;
634 	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
635 }
636 
637 static inline void fuse_update_ctime(struct inode *inode)
638 {
639 	if (!IS_NOCMTIME(inode)) {
640 		inode->i_ctime = current_fs_time(inode->i_sb);
641 		mark_inode_dirty_sync(inode);
642 	}
643 }
644 
645 static int fuse_unlink(struct inode *dir, struct dentry *entry)
646 {
647 	int err;
648 	struct fuse_conn *fc = get_fuse_conn(dir);
649 	FUSE_ARGS(args);
650 
651 	args.in.h.opcode = FUSE_UNLINK;
652 	args.in.h.nodeid = get_node_id(dir);
653 	args.in.numargs = 1;
654 	args.in.args[0].size = entry->d_name.len + 1;
655 	args.in.args[0].value = entry->d_name.name;
656 	err = fuse_simple_request(fc, &args);
657 	if (!err) {
658 		struct inode *inode = d_inode(entry);
659 		struct fuse_inode *fi = get_fuse_inode(inode);
660 
661 		spin_lock(&fc->lock);
662 		fi->attr_version = ++fc->attr_version;
663 		/*
664 		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
665 		 * happen if userspace filesystem is careless.  It would be
666 		 * difficult to enforce correct nlink usage so just ignore this
667 		 * condition here
668 		 */
669 		if (inode->i_nlink > 0)
670 			drop_nlink(inode);
671 		spin_unlock(&fc->lock);
672 		fuse_invalidate_attr(inode);
673 		fuse_invalidate_attr(dir);
674 		fuse_invalidate_entry_cache(entry);
675 		fuse_update_ctime(inode);
676 	} else if (err == -EINTR)
677 		fuse_invalidate_entry(entry);
678 	return err;
679 }
680 
681 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
682 {
683 	int err;
684 	struct fuse_conn *fc = get_fuse_conn(dir);
685 	FUSE_ARGS(args);
686 
687 	args.in.h.opcode = FUSE_RMDIR;
688 	args.in.h.nodeid = get_node_id(dir);
689 	args.in.numargs = 1;
690 	args.in.args[0].size = entry->d_name.len + 1;
691 	args.in.args[0].value = entry->d_name.name;
692 	err = fuse_simple_request(fc, &args);
693 	if (!err) {
694 		clear_nlink(d_inode(entry));
695 		fuse_invalidate_attr(dir);
696 		fuse_invalidate_entry_cache(entry);
697 	} else if (err == -EINTR)
698 		fuse_invalidate_entry(entry);
699 	return err;
700 }
701 
702 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
703 			      struct inode *newdir, struct dentry *newent,
704 			      unsigned int flags, int opcode, size_t argsize)
705 {
706 	int err;
707 	struct fuse_rename2_in inarg;
708 	struct fuse_conn *fc = get_fuse_conn(olddir);
709 	FUSE_ARGS(args);
710 
711 	memset(&inarg, 0, argsize);
712 	inarg.newdir = get_node_id(newdir);
713 	inarg.flags = flags;
714 	args.in.h.opcode = opcode;
715 	args.in.h.nodeid = get_node_id(olddir);
716 	args.in.numargs = 3;
717 	args.in.args[0].size = argsize;
718 	args.in.args[0].value = &inarg;
719 	args.in.args[1].size = oldent->d_name.len + 1;
720 	args.in.args[1].value = oldent->d_name.name;
721 	args.in.args[2].size = newent->d_name.len + 1;
722 	args.in.args[2].value = newent->d_name.name;
723 	err = fuse_simple_request(fc, &args);
724 	if (!err) {
725 		/* ctime changes */
726 		fuse_invalidate_attr(d_inode(oldent));
727 		fuse_update_ctime(d_inode(oldent));
728 
729 		if (flags & RENAME_EXCHANGE) {
730 			fuse_invalidate_attr(d_inode(newent));
731 			fuse_update_ctime(d_inode(newent));
732 		}
733 
734 		fuse_invalidate_attr(olddir);
735 		if (olddir != newdir)
736 			fuse_invalidate_attr(newdir);
737 
738 		/* newent will end up negative */
739 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
740 			fuse_invalidate_attr(d_inode(newent));
741 			fuse_invalidate_entry_cache(newent);
742 			fuse_update_ctime(d_inode(newent));
743 		}
744 	} else if (err == -EINTR) {
745 		/* If request was interrupted, DEITY only knows if the
746 		   rename actually took place.  If the invalidation
747 		   fails (e.g. some process has CWD under the renamed
748 		   directory), then there can be inconsistency between
749 		   the dcache and the real filesystem.  Tough luck. */
750 		fuse_invalidate_entry(oldent);
751 		if (d_really_is_positive(newent))
752 			fuse_invalidate_entry(newent);
753 	}
754 
755 	return err;
756 }
757 
758 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
759 			struct inode *newdir, struct dentry *newent,
760 			unsigned int flags)
761 {
762 	struct fuse_conn *fc = get_fuse_conn(olddir);
763 	int err;
764 
765 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
766 		return -EINVAL;
767 
768 	if (flags) {
769 		if (fc->no_rename2 || fc->minor < 23)
770 			return -EINVAL;
771 
772 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
773 					 FUSE_RENAME2,
774 					 sizeof(struct fuse_rename2_in));
775 		if (err == -ENOSYS) {
776 			fc->no_rename2 = 1;
777 			err = -EINVAL;
778 		}
779 	} else {
780 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
781 					 FUSE_RENAME,
782 					 sizeof(struct fuse_rename_in));
783 	}
784 
785 	return err;
786 }
787 
788 static int fuse_link(struct dentry *entry, struct inode *newdir,
789 		     struct dentry *newent)
790 {
791 	int err;
792 	struct fuse_link_in inarg;
793 	struct inode *inode = d_inode(entry);
794 	struct fuse_conn *fc = get_fuse_conn(inode);
795 	FUSE_ARGS(args);
796 
797 	memset(&inarg, 0, sizeof(inarg));
798 	inarg.oldnodeid = get_node_id(inode);
799 	args.in.h.opcode = FUSE_LINK;
800 	args.in.numargs = 2;
801 	args.in.args[0].size = sizeof(inarg);
802 	args.in.args[0].value = &inarg;
803 	args.in.args[1].size = newent->d_name.len + 1;
804 	args.in.args[1].value = newent->d_name.name;
805 	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
806 	/* Contrary to "normal" filesystems it can happen that link
807 	   makes two "logical" inodes point to the same "physical"
808 	   inode.  We invalidate the attributes of the old one, so it
809 	   will reflect changes in the backing inode (link count,
810 	   etc.)
811 	*/
812 	if (!err) {
813 		struct fuse_inode *fi = get_fuse_inode(inode);
814 
815 		spin_lock(&fc->lock);
816 		fi->attr_version = ++fc->attr_version;
817 		inc_nlink(inode);
818 		spin_unlock(&fc->lock);
819 		fuse_invalidate_attr(inode);
820 		fuse_update_ctime(inode);
821 	} else if (err == -EINTR) {
822 		fuse_invalidate_attr(inode);
823 	}
824 	return err;
825 }
826 
827 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
828 			  struct kstat *stat)
829 {
830 	unsigned int blkbits;
831 	struct fuse_conn *fc = get_fuse_conn(inode);
832 
833 	/* see the comment in fuse_change_attributes() */
834 	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
835 		attr->size = i_size_read(inode);
836 		attr->mtime = inode->i_mtime.tv_sec;
837 		attr->mtimensec = inode->i_mtime.tv_nsec;
838 		attr->ctime = inode->i_ctime.tv_sec;
839 		attr->ctimensec = inode->i_ctime.tv_nsec;
840 	}
841 
842 	stat->dev = inode->i_sb->s_dev;
843 	stat->ino = attr->ino;
844 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
845 	stat->nlink = attr->nlink;
846 	stat->uid = make_kuid(&init_user_ns, attr->uid);
847 	stat->gid = make_kgid(&init_user_ns, attr->gid);
848 	stat->rdev = inode->i_rdev;
849 	stat->atime.tv_sec = attr->atime;
850 	stat->atime.tv_nsec = attr->atimensec;
851 	stat->mtime.tv_sec = attr->mtime;
852 	stat->mtime.tv_nsec = attr->mtimensec;
853 	stat->ctime.tv_sec = attr->ctime;
854 	stat->ctime.tv_nsec = attr->ctimensec;
855 	stat->size = attr->size;
856 	stat->blocks = attr->blocks;
857 
858 	if (attr->blksize != 0)
859 		blkbits = ilog2(attr->blksize);
860 	else
861 		blkbits = inode->i_sb->s_blocksize_bits;
862 
863 	stat->blksize = 1 << blkbits;
864 }
865 
866 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
867 			   struct file *file)
868 {
869 	int err;
870 	struct fuse_getattr_in inarg;
871 	struct fuse_attr_out outarg;
872 	struct fuse_conn *fc = get_fuse_conn(inode);
873 	FUSE_ARGS(args);
874 	u64 attr_version;
875 
876 	attr_version = fuse_get_attr_version(fc);
877 
878 	memset(&inarg, 0, sizeof(inarg));
879 	memset(&outarg, 0, sizeof(outarg));
880 	/* Directories have separate file-handle space */
881 	if (file && S_ISREG(inode->i_mode)) {
882 		struct fuse_file *ff = file->private_data;
883 
884 		inarg.getattr_flags |= FUSE_GETATTR_FH;
885 		inarg.fh = ff->fh;
886 	}
887 	args.in.h.opcode = FUSE_GETATTR;
888 	args.in.h.nodeid = get_node_id(inode);
889 	args.in.numargs = 1;
890 	args.in.args[0].size = sizeof(inarg);
891 	args.in.args[0].value = &inarg;
892 	args.out.numargs = 1;
893 	args.out.args[0].size = sizeof(outarg);
894 	args.out.args[0].value = &outarg;
895 	err = fuse_simple_request(fc, &args);
896 	if (!err) {
897 		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
898 			make_bad_inode(inode);
899 			err = -EIO;
900 		} else {
901 			fuse_change_attributes(inode, &outarg.attr,
902 					       attr_timeout(&outarg),
903 					       attr_version);
904 			if (stat)
905 				fuse_fillattr(inode, &outarg.attr, stat);
906 		}
907 	}
908 	return err;
909 }
910 
911 int fuse_update_attributes(struct inode *inode, struct kstat *stat,
912 			   struct file *file, bool *refreshed)
913 {
914 	struct fuse_inode *fi = get_fuse_inode(inode);
915 	int err;
916 	bool r;
917 
918 	if (time_before64(fi->i_time, get_jiffies_64())) {
919 		r = true;
920 		err = fuse_do_getattr(inode, stat, file);
921 	} else {
922 		r = false;
923 		err = 0;
924 		if (stat) {
925 			generic_fillattr(inode, stat);
926 			stat->mode = fi->orig_i_mode;
927 			stat->ino = fi->orig_ino;
928 		}
929 	}
930 
931 	if (refreshed != NULL)
932 		*refreshed = r;
933 
934 	return err;
935 }
936 
937 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
938 			     u64 child_nodeid, struct qstr *name)
939 {
940 	int err = -ENOTDIR;
941 	struct inode *parent;
942 	struct dentry *dir;
943 	struct dentry *entry;
944 
945 	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
946 	if (!parent)
947 		return -ENOENT;
948 
949 	inode_lock(parent);
950 	if (!S_ISDIR(parent->i_mode))
951 		goto unlock;
952 
953 	err = -ENOENT;
954 	dir = d_find_alias(parent);
955 	if (!dir)
956 		goto unlock;
957 
958 	entry = d_lookup(dir, name);
959 	dput(dir);
960 	if (!entry)
961 		goto unlock;
962 
963 	fuse_invalidate_attr(parent);
964 	fuse_invalidate_entry(entry);
965 
966 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
967 		inode_lock(d_inode(entry));
968 		if (get_node_id(d_inode(entry)) != child_nodeid) {
969 			err = -ENOENT;
970 			goto badentry;
971 		}
972 		if (d_mountpoint(entry)) {
973 			err = -EBUSY;
974 			goto badentry;
975 		}
976 		if (d_is_dir(entry)) {
977 			shrink_dcache_parent(entry);
978 			if (!simple_empty(entry)) {
979 				err = -ENOTEMPTY;
980 				goto badentry;
981 			}
982 			d_inode(entry)->i_flags |= S_DEAD;
983 		}
984 		dont_mount(entry);
985 		clear_nlink(d_inode(entry));
986 		err = 0;
987  badentry:
988 		inode_unlock(d_inode(entry));
989 		if (!err)
990 			d_delete(entry);
991 	} else {
992 		err = 0;
993 	}
994 	dput(entry);
995 
996  unlock:
997 	inode_unlock(parent);
998 	iput(parent);
999 	return err;
1000 }
1001 
1002 /*
1003  * Calling into a user-controlled filesystem gives the filesystem
1004  * daemon ptrace-like capabilities over the current process.  This
1005  * means, that the filesystem daemon is able to record the exact
1006  * filesystem operations performed, and can also control the behavior
1007  * of the requester process in otherwise impossible ways.  For example
1008  * it can delay the operation for arbitrary length of time allowing
1009  * DoS against the requester.
1010  *
1011  * For this reason only those processes can call into the filesystem,
1012  * for which the owner of the mount has ptrace privilege.  This
1013  * excludes processes started by other users, suid or sgid processes.
1014  */
1015 int fuse_allow_current_process(struct fuse_conn *fc)
1016 {
1017 	const struct cred *cred;
1018 
1019 	if (fc->flags & FUSE_ALLOW_OTHER)
1020 		return 1;
1021 
1022 	cred = current_cred();
1023 	if (uid_eq(cred->euid, fc->user_id) &&
1024 	    uid_eq(cred->suid, fc->user_id) &&
1025 	    uid_eq(cred->uid,  fc->user_id) &&
1026 	    gid_eq(cred->egid, fc->group_id) &&
1027 	    gid_eq(cred->sgid, fc->group_id) &&
1028 	    gid_eq(cred->gid,  fc->group_id))
1029 		return 1;
1030 
1031 	return 0;
1032 }
1033 
1034 static int fuse_access(struct inode *inode, int mask)
1035 {
1036 	struct fuse_conn *fc = get_fuse_conn(inode);
1037 	FUSE_ARGS(args);
1038 	struct fuse_access_in inarg;
1039 	int err;
1040 
1041 	BUG_ON(mask & MAY_NOT_BLOCK);
1042 
1043 	if (fc->no_access)
1044 		return 0;
1045 
1046 	memset(&inarg, 0, sizeof(inarg));
1047 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1048 	args.in.h.opcode = FUSE_ACCESS;
1049 	args.in.h.nodeid = get_node_id(inode);
1050 	args.in.numargs = 1;
1051 	args.in.args[0].size = sizeof(inarg);
1052 	args.in.args[0].value = &inarg;
1053 	err = fuse_simple_request(fc, &args);
1054 	if (err == -ENOSYS) {
1055 		fc->no_access = 1;
1056 		err = 0;
1057 	}
1058 	return err;
1059 }
1060 
1061 static int fuse_perm_getattr(struct inode *inode, int mask)
1062 {
1063 	if (mask & MAY_NOT_BLOCK)
1064 		return -ECHILD;
1065 
1066 	return fuse_do_getattr(inode, NULL, NULL);
1067 }
1068 
1069 /*
1070  * Check permission.  The two basic access models of FUSE are:
1071  *
1072  * 1) Local access checking ('default_permissions' mount option) based
1073  * on file mode.  This is the plain old disk filesystem permission
1074  * modell.
1075  *
1076  * 2) "Remote" access checking, where server is responsible for
1077  * checking permission in each inode operation.  An exception to this
1078  * is if ->permission() was invoked from sys_access() in which case an
1079  * access request is sent.  Execute permission is still checked
1080  * locally based on file mode.
1081  */
1082 static int fuse_permission(struct inode *inode, int mask)
1083 {
1084 	struct fuse_conn *fc = get_fuse_conn(inode);
1085 	bool refreshed = false;
1086 	int err = 0;
1087 
1088 	if (!fuse_allow_current_process(fc))
1089 		return -EACCES;
1090 
1091 	/*
1092 	 * If attributes are needed, refresh them before proceeding
1093 	 */
1094 	if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1095 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1096 		struct fuse_inode *fi = get_fuse_inode(inode);
1097 
1098 		if (time_before64(fi->i_time, get_jiffies_64())) {
1099 			refreshed = true;
1100 
1101 			err = fuse_perm_getattr(inode, mask);
1102 			if (err)
1103 				return err;
1104 		}
1105 	}
1106 
1107 	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1108 		err = generic_permission(inode, mask);
1109 
1110 		/* If permission is denied, try to refresh file
1111 		   attributes.  This is also needed, because the root
1112 		   node will at first have no permissions */
1113 		if (err == -EACCES && !refreshed) {
1114 			err = fuse_perm_getattr(inode, mask);
1115 			if (!err)
1116 				err = generic_permission(inode, mask);
1117 		}
1118 
1119 		/* Note: the opposite of the above test does not
1120 		   exist.  So if permissions are revoked this won't be
1121 		   noticed immediately, only after the attribute
1122 		   timeout has expired */
1123 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1124 		err = fuse_access(inode, mask);
1125 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1126 		if (!(inode->i_mode & S_IXUGO)) {
1127 			if (refreshed)
1128 				return -EACCES;
1129 
1130 			err = fuse_perm_getattr(inode, mask);
1131 			if (!err && !(inode->i_mode & S_IXUGO))
1132 				return -EACCES;
1133 		}
1134 	}
1135 	return err;
1136 }
1137 
1138 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1139 			 struct dir_context *ctx)
1140 {
1141 	while (nbytes >= FUSE_NAME_OFFSET) {
1142 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1143 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
1144 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1145 			return -EIO;
1146 		if (reclen > nbytes)
1147 			break;
1148 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1149 			return -EIO;
1150 
1151 		if (!dir_emit(ctx, dirent->name, dirent->namelen,
1152 			       dirent->ino, dirent->type))
1153 			break;
1154 
1155 		buf += reclen;
1156 		nbytes -= reclen;
1157 		ctx->pos = dirent->off;
1158 	}
1159 
1160 	return 0;
1161 }
1162 
1163 static int fuse_direntplus_link(struct file *file,
1164 				struct fuse_direntplus *direntplus,
1165 				u64 attr_version)
1166 {
1167 	struct fuse_entry_out *o = &direntplus->entry_out;
1168 	struct fuse_dirent *dirent = &direntplus->dirent;
1169 	struct dentry *parent = file->f_path.dentry;
1170 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1171 	struct dentry *dentry;
1172 	struct dentry *alias;
1173 	struct inode *dir = d_inode(parent);
1174 	struct fuse_conn *fc;
1175 	struct inode *inode;
1176 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1177 
1178 	if (!o->nodeid) {
1179 		/*
1180 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1181 		 * ENOENT. Instead, it only means the userspace filesystem did
1182 		 * not want to return attributes/handle for this entry.
1183 		 *
1184 		 * So do nothing.
1185 		 */
1186 		return 0;
1187 	}
1188 
1189 	if (name.name[0] == '.') {
1190 		/*
1191 		 * We could potentially refresh the attributes of the directory
1192 		 * and its parent?
1193 		 */
1194 		if (name.len == 1)
1195 			return 0;
1196 		if (name.name[1] == '.' && name.len == 2)
1197 			return 0;
1198 	}
1199 
1200 	if (invalid_nodeid(o->nodeid))
1201 		return -EIO;
1202 	if (!fuse_valid_type(o->attr.mode))
1203 		return -EIO;
1204 
1205 	fc = get_fuse_conn(dir);
1206 
1207 	name.hash = full_name_hash(name.name, name.len);
1208 	dentry = d_lookup(parent, &name);
1209 	if (!dentry) {
1210 retry:
1211 		dentry = d_alloc_parallel(parent, &name, &wq);
1212 		if (IS_ERR(dentry))
1213 			return PTR_ERR(dentry);
1214 	}
1215 	if (!d_in_lookup(dentry)) {
1216 		struct fuse_inode *fi;
1217 		inode = d_inode(dentry);
1218 		if (!inode ||
1219 		    get_node_id(inode) != o->nodeid ||
1220 		    ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1221 			d_invalidate(dentry);
1222 			dput(dentry);
1223 			goto retry;
1224 		}
1225 		if (is_bad_inode(inode)) {
1226 			dput(dentry);
1227 			return -EIO;
1228 		}
1229 
1230 		fi = get_fuse_inode(inode);
1231 		spin_lock(&fc->lock);
1232 		fi->nlookup++;
1233 		spin_unlock(&fc->lock);
1234 
1235 		fuse_change_attributes(inode, &o->attr,
1236 				       entry_attr_timeout(o),
1237 				       attr_version);
1238 		/*
1239 		 * The other branch comes via fuse_iget()
1240 		 * which bumps nlookup inside
1241 		 */
1242 	} else {
1243 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1244 				  &o->attr, entry_attr_timeout(o),
1245 				  attr_version);
1246 		if (!inode)
1247 			inode = ERR_PTR(-ENOMEM);
1248 
1249 		alias = d_splice_alias(inode, dentry);
1250 		d_lookup_done(dentry);
1251 		if (alias) {
1252 			dput(dentry);
1253 			dentry = alias;
1254 		}
1255 		if (IS_ERR(dentry))
1256 			return PTR_ERR(dentry);
1257 	}
1258 	if (fc->readdirplus_auto)
1259 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1260 	fuse_change_entry_timeout(dentry, o);
1261 
1262 	dput(dentry);
1263 	return 0;
1264 }
1265 
1266 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1267 			     struct dir_context *ctx, u64 attr_version)
1268 {
1269 	struct fuse_direntplus *direntplus;
1270 	struct fuse_dirent *dirent;
1271 	size_t reclen;
1272 	int over = 0;
1273 	int ret;
1274 
1275 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1276 		direntplus = (struct fuse_direntplus *) buf;
1277 		dirent = &direntplus->dirent;
1278 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1279 
1280 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1281 			return -EIO;
1282 		if (reclen > nbytes)
1283 			break;
1284 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1285 			return -EIO;
1286 
1287 		if (!over) {
1288 			/* We fill entries into dstbuf only as much as
1289 			   it can hold. But we still continue iterating
1290 			   over remaining entries to link them. If not,
1291 			   we need to send a FORGET for each of those
1292 			   which we did not link.
1293 			*/
1294 			over = !dir_emit(ctx, dirent->name, dirent->namelen,
1295 				       dirent->ino, dirent->type);
1296 			ctx->pos = dirent->off;
1297 		}
1298 
1299 		buf += reclen;
1300 		nbytes -= reclen;
1301 
1302 		ret = fuse_direntplus_link(file, direntplus, attr_version);
1303 		if (ret)
1304 			fuse_force_forget(file, direntplus->entry_out.nodeid);
1305 	}
1306 
1307 	return 0;
1308 }
1309 
1310 static int fuse_readdir(struct file *file, struct dir_context *ctx)
1311 {
1312 	int plus, err;
1313 	size_t nbytes;
1314 	struct page *page;
1315 	struct inode *inode = file_inode(file);
1316 	struct fuse_conn *fc = get_fuse_conn(inode);
1317 	struct fuse_req *req;
1318 	u64 attr_version = 0;
1319 
1320 	if (is_bad_inode(inode))
1321 		return -EIO;
1322 
1323 	req = fuse_get_req(fc, 1);
1324 	if (IS_ERR(req))
1325 		return PTR_ERR(req);
1326 
1327 	page = alloc_page(GFP_KERNEL);
1328 	if (!page) {
1329 		fuse_put_request(fc, req);
1330 		return -ENOMEM;
1331 	}
1332 
1333 	plus = fuse_use_readdirplus(inode, ctx);
1334 	req->out.argpages = 1;
1335 	req->num_pages = 1;
1336 	req->pages[0] = page;
1337 	req->page_descs[0].length = PAGE_SIZE;
1338 	if (plus) {
1339 		attr_version = fuse_get_attr_version(fc);
1340 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1341 			       FUSE_READDIRPLUS);
1342 	} else {
1343 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1344 			       FUSE_READDIR);
1345 	}
1346 	fuse_lock_inode(inode);
1347 	fuse_request_send(fc, req);
1348 	fuse_unlock_inode(inode);
1349 	nbytes = req->out.args[0].size;
1350 	err = req->out.h.error;
1351 	fuse_put_request(fc, req);
1352 	if (!err) {
1353 		if (plus) {
1354 			err = parse_dirplusfile(page_address(page), nbytes,
1355 						file, ctx,
1356 						attr_version);
1357 		} else {
1358 			err = parse_dirfile(page_address(page), nbytes, file,
1359 					    ctx);
1360 		}
1361 	}
1362 
1363 	__free_page(page);
1364 	fuse_invalidate_atime(inode);
1365 	return err;
1366 }
1367 
1368 static const char *fuse_get_link(struct dentry *dentry,
1369 				 struct inode *inode,
1370 				 struct delayed_call *done)
1371 {
1372 	struct fuse_conn *fc = get_fuse_conn(inode);
1373 	FUSE_ARGS(args);
1374 	char *link;
1375 	ssize_t ret;
1376 
1377 	if (!dentry)
1378 		return ERR_PTR(-ECHILD);
1379 
1380 	link = kmalloc(PAGE_SIZE, GFP_KERNEL);
1381 	if (!link)
1382 		return ERR_PTR(-ENOMEM);
1383 
1384 	args.in.h.opcode = FUSE_READLINK;
1385 	args.in.h.nodeid = get_node_id(inode);
1386 	args.out.argvar = 1;
1387 	args.out.numargs = 1;
1388 	args.out.args[0].size = PAGE_SIZE - 1;
1389 	args.out.args[0].value = link;
1390 	ret = fuse_simple_request(fc, &args);
1391 	if (ret < 0) {
1392 		kfree(link);
1393 		link = ERR_PTR(ret);
1394 	} else {
1395 		link[ret] = '\0';
1396 		set_delayed_call(done, kfree_link, link);
1397 	}
1398 	fuse_invalidate_atime(inode);
1399 	return link;
1400 }
1401 
1402 static int fuse_dir_open(struct inode *inode, struct file *file)
1403 {
1404 	return fuse_open_common(inode, file, true);
1405 }
1406 
1407 static int fuse_dir_release(struct inode *inode, struct file *file)
1408 {
1409 	fuse_release_common(file, FUSE_RELEASEDIR);
1410 
1411 	return 0;
1412 }
1413 
1414 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1415 			  int datasync)
1416 {
1417 	return fuse_fsync_common(file, start, end, datasync, 1);
1418 }
1419 
1420 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1421 			    unsigned long arg)
1422 {
1423 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1424 
1425 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1426 	if (fc->minor < 18)
1427 		return -ENOTTY;
1428 
1429 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1430 }
1431 
1432 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1433 				   unsigned long arg)
1434 {
1435 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1436 
1437 	if (fc->minor < 18)
1438 		return -ENOTTY;
1439 
1440 	return fuse_ioctl_common(file, cmd, arg,
1441 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1442 }
1443 
1444 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1445 {
1446 	/* Always update if mtime is explicitly set  */
1447 	if (ivalid & ATTR_MTIME_SET)
1448 		return true;
1449 
1450 	/* Or if kernel i_mtime is the official one */
1451 	if (trust_local_mtime)
1452 		return true;
1453 
1454 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1455 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1456 		return false;
1457 
1458 	/* In all other cases update */
1459 	return true;
1460 }
1461 
1462 static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1463 			   bool trust_local_cmtime)
1464 {
1465 	unsigned ivalid = iattr->ia_valid;
1466 
1467 	if (ivalid & ATTR_MODE)
1468 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1469 	if (ivalid & ATTR_UID)
1470 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1471 	if (ivalid & ATTR_GID)
1472 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1473 	if (ivalid & ATTR_SIZE)
1474 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1475 	if (ivalid & ATTR_ATIME) {
1476 		arg->valid |= FATTR_ATIME;
1477 		arg->atime = iattr->ia_atime.tv_sec;
1478 		arg->atimensec = iattr->ia_atime.tv_nsec;
1479 		if (!(ivalid & ATTR_ATIME_SET))
1480 			arg->valid |= FATTR_ATIME_NOW;
1481 	}
1482 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1483 		arg->valid |= FATTR_MTIME;
1484 		arg->mtime = iattr->ia_mtime.tv_sec;
1485 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1486 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1487 			arg->valid |= FATTR_MTIME_NOW;
1488 	}
1489 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1490 		arg->valid |= FATTR_CTIME;
1491 		arg->ctime = iattr->ia_ctime.tv_sec;
1492 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1493 	}
1494 }
1495 
1496 /*
1497  * Prevent concurrent writepages on inode
1498  *
1499  * This is done by adding a negative bias to the inode write counter
1500  * and waiting for all pending writes to finish.
1501  */
1502 void fuse_set_nowrite(struct inode *inode)
1503 {
1504 	struct fuse_conn *fc = get_fuse_conn(inode);
1505 	struct fuse_inode *fi = get_fuse_inode(inode);
1506 
1507 	BUG_ON(!inode_is_locked(inode));
1508 
1509 	spin_lock(&fc->lock);
1510 	BUG_ON(fi->writectr < 0);
1511 	fi->writectr += FUSE_NOWRITE;
1512 	spin_unlock(&fc->lock);
1513 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1514 }
1515 
1516 /*
1517  * Allow writepages on inode
1518  *
1519  * Remove the bias from the writecounter and send any queued
1520  * writepages.
1521  */
1522 static void __fuse_release_nowrite(struct inode *inode)
1523 {
1524 	struct fuse_inode *fi = get_fuse_inode(inode);
1525 
1526 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1527 	fi->writectr = 0;
1528 	fuse_flush_writepages(inode);
1529 }
1530 
1531 void fuse_release_nowrite(struct inode *inode)
1532 {
1533 	struct fuse_conn *fc = get_fuse_conn(inode);
1534 
1535 	spin_lock(&fc->lock);
1536 	__fuse_release_nowrite(inode);
1537 	spin_unlock(&fc->lock);
1538 }
1539 
1540 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1541 			      struct inode *inode,
1542 			      struct fuse_setattr_in *inarg_p,
1543 			      struct fuse_attr_out *outarg_p)
1544 {
1545 	args->in.h.opcode = FUSE_SETATTR;
1546 	args->in.h.nodeid = get_node_id(inode);
1547 	args->in.numargs = 1;
1548 	args->in.args[0].size = sizeof(*inarg_p);
1549 	args->in.args[0].value = inarg_p;
1550 	args->out.numargs = 1;
1551 	args->out.args[0].size = sizeof(*outarg_p);
1552 	args->out.args[0].value = outarg_p;
1553 }
1554 
1555 /*
1556  * Flush inode->i_mtime to the server
1557  */
1558 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1559 {
1560 	struct fuse_conn *fc = get_fuse_conn(inode);
1561 	FUSE_ARGS(args);
1562 	struct fuse_setattr_in inarg;
1563 	struct fuse_attr_out outarg;
1564 
1565 	memset(&inarg, 0, sizeof(inarg));
1566 	memset(&outarg, 0, sizeof(outarg));
1567 
1568 	inarg.valid = FATTR_MTIME;
1569 	inarg.mtime = inode->i_mtime.tv_sec;
1570 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1571 	if (fc->minor >= 23) {
1572 		inarg.valid |= FATTR_CTIME;
1573 		inarg.ctime = inode->i_ctime.tv_sec;
1574 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1575 	}
1576 	if (ff) {
1577 		inarg.valid |= FATTR_FH;
1578 		inarg.fh = ff->fh;
1579 	}
1580 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1581 
1582 	return fuse_simple_request(fc, &args);
1583 }
1584 
1585 /*
1586  * Set attributes, and at the same time refresh them.
1587  *
1588  * Truncation is slightly complicated, because the 'truncate' request
1589  * may fail, in which case we don't want to touch the mapping.
1590  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1591  * and the actual truncation by hand.
1592  */
1593 int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1594 		    struct file *file)
1595 {
1596 	struct fuse_conn *fc = get_fuse_conn(inode);
1597 	struct fuse_inode *fi = get_fuse_inode(inode);
1598 	FUSE_ARGS(args);
1599 	struct fuse_setattr_in inarg;
1600 	struct fuse_attr_out outarg;
1601 	bool is_truncate = false;
1602 	bool is_wb = fc->writeback_cache;
1603 	loff_t oldsize;
1604 	int err;
1605 	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1606 
1607 	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1608 		attr->ia_valid |= ATTR_FORCE;
1609 
1610 	err = inode_change_ok(inode, attr);
1611 	if (err)
1612 		return err;
1613 
1614 	if (attr->ia_valid & ATTR_OPEN) {
1615 		if (fc->atomic_o_trunc)
1616 			return 0;
1617 		file = NULL;
1618 	}
1619 
1620 	if (attr->ia_valid & ATTR_SIZE)
1621 		is_truncate = true;
1622 
1623 	if (is_truncate) {
1624 		fuse_set_nowrite(inode);
1625 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1626 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1627 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1628 	}
1629 
1630 	memset(&inarg, 0, sizeof(inarg));
1631 	memset(&outarg, 0, sizeof(outarg));
1632 	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
1633 	if (file) {
1634 		struct fuse_file *ff = file->private_data;
1635 		inarg.valid |= FATTR_FH;
1636 		inarg.fh = ff->fh;
1637 	}
1638 	if (attr->ia_valid & ATTR_SIZE) {
1639 		/* For mandatory locking in truncate */
1640 		inarg.valid |= FATTR_LOCKOWNER;
1641 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1642 	}
1643 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1644 	err = fuse_simple_request(fc, &args);
1645 	if (err) {
1646 		if (err == -EINTR)
1647 			fuse_invalidate_attr(inode);
1648 		goto error;
1649 	}
1650 
1651 	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1652 		make_bad_inode(inode);
1653 		err = -EIO;
1654 		goto error;
1655 	}
1656 
1657 	spin_lock(&fc->lock);
1658 	/* the kernel maintains i_mtime locally */
1659 	if (trust_local_cmtime) {
1660 		if (attr->ia_valid & ATTR_MTIME)
1661 			inode->i_mtime = attr->ia_mtime;
1662 		if (attr->ia_valid & ATTR_CTIME)
1663 			inode->i_ctime = attr->ia_ctime;
1664 		/* FIXME: clear I_DIRTY_SYNC? */
1665 	}
1666 
1667 	fuse_change_attributes_common(inode, &outarg.attr,
1668 				      attr_timeout(&outarg));
1669 	oldsize = inode->i_size;
1670 	/* see the comment in fuse_change_attributes() */
1671 	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1672 		i_size_write(inode, outarg.attr.size);
1673 
1674 	if (is_truncate) {
1675 		/* NOTE: this may release/reacquire fc->lock */
1676 		__fuse_release_nowrite(inode);
1677 	}
1678 	spin_unlock(&fc->lock);
1679 
1680 	/*
1681 	 * Only call invalidate_inode_pages2() after removing
1682 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1683 	 */
1684 	if ((is_truncate || !is_wb) &&
1685 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1686 		truncate_pagecache(inode, outarg.attr.size);
1687 		invalidate_inode_pages2(inode->i_mapping);
1688 	}
1689 
1690 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1691 	return 0;
1692 
1693 error:
1694 	if (is_truncate)
1695 		fuse_release_nowrite(inode);
1696 
1697 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1698 	return err;
1699 }
1700 
1701 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1702 {
1703 	struct inode *inode = d_inode(entry);
1704 
1705 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1706 		return -EACCES;
1707 
1708 	if (attr->ia_valid & ATTR_FILE)
1709 		return fuse_do_setattr(inode, attr, attr->ia_file);
1710 	else
1711 		return fuse_do_setattr(inode, attr, NULL);
1712 }
1713 
1714 static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1715 			struct kstat *stat)
1716 {
1717 	struct inode *inode = d_inode(entry);
1718 	struct fuse_conn *fc = get_fuse_conn(inode);
1719 
1720 	if (!fuse_allow_current_process(fc))
1721 		return -EACCES;
1722 
1723 	return fuse_update_attributes(inode, stat, NULL, NULL);
1724 }
1725 
1726 static int fuse_setxattr(struct dentry *unused, struct inode *inode,
1727 			 const char *name, const void *value,
1728 			 size_t size, int flags)
1729 {
1730 	struct fuse_conn *fc = get_fuse_conn(inode);
1731 	FUSE_ARGS(args);
1732 	struct fuse_setxattr_in inarg;
1733 	int err;
1734 
1735 	if (fc->no_setxattr)
1736 		return -EOPNOTSUPP;
1737 
1738 	memset(&inarg, 0, sizeof(inarg));
1739 	inarg.size = size;
1740 	inarg.flags = flags;
1741 	args.in.h.opcode = FUSE_SETXATTR;
1742 	args.in.h.nodeid = get_node_id(inode);
1743 	args.in.numargs = 3;
1744 	args.in.args[0].size = sizeof(inarg);
1745 	args.in.args[0].value = &inarg;
1746 	args.in.args[1].size = strlen(name) + 1;
1747 	args.in.args[1].value = name;
1748 	args.in.args[2].size = size;
1749 	args.in.args[2].value = value;
1750 	err = fuse_simple_request(fc, &args);
1751 	if (err == -ENOSYS) {
1752 		fc->no_setxattr = 1;
1753 		err = -EOPNOTSUPP;
1754 	}
1755 	if (!err) {
1756 		fuse_invalidate_attr(inode);
1757 		fuse_update_ctime(inode);
1758 	}
1759 	return err;
1760 }
1761 
1762 static ssize_t fuse_getxattr(struct dentry *entry, struct inode *inode,
1763 			     const char *name, void *value, size_t size)
1764 {
1765 	struct fuse_conn *fc = get_fuse_conn(inode);
1766 	FUSE_ARGS(args);
1767 	struct fuse_getxattr_in inarg;
1768 	struct fuse_getxattr_out outarg;
1769 	ssize_t ret;
1770 
1771 	if (fc->no_getxattr)
1772 		return -EOPNOTSUPP;
1773 
1774 	memset(&inarg, 0, sizeof(inarg));
1775 	inarg.size = size;
1776 	args.in.h.opcode = FUSE_GETXATTR;
1777 	args.in.h.nodeid = get_node_id(inode);
1778 	args.in.numargs = 2;
1779 	args.in.args[0].size = sizeof(inarg);
1780 	args.in.args[0].value = &inarg;
1781 	args.in.args[1].size = strlen(name) + 1;
1782 	args.in.args[1].value = name;
1783 	/* This is really two different operations rolled into one */
1784 	args.out.numargs = 1;
1785 	if (size) {
1786 		args.out.argvar = 1;
1787 		args.out.args[0].size = size;
1788 		args.out.args[0].value = value;
1789 	} else {
1790 		args.out.args[0].size = sizeof(outarg);
1791 		args.out.args[0].value = &outarg;
1792 	}
1793 	ret = fuse_simple_request(fc, &args);
1794 	if (!ret && !size)
1795 		ret = outarg.size;
1796 	if (ret == -ENOSYS) {
1797 		fc->no_getxattr = 1;
1798 		ret = -EOPNOTSUPP;
1799 	}
1800 	return ret;
1801 }
1802 
1803 static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1804 {
1805 	struct inode *inode = d_inode(entry);
1806 	struct fuse_conn *fc = get_fuse_conn(inode);
1807 	FUSE_ARGS(args);
1808 	struct fuse_getxattr_in inarg;
1809 	struct fuse_getxattr_out outarg;
1810 	ssize_t ret;
1811 
1812 	if (!fuse_allow_current_process(fc))
1813 		return -EACCES;
1814 
1815 	if (fc->no_listxattr)
1816 		return -EOPNOTSUPP;
1817 
1818 	memset(&inarg, 0, sizeof(inarg));
1819 	inarg.size = size;
1820 	args.in.h.opcode = FUSE_LISTXATTR;
1821 	args.in.h.nodeid = get_node_id(inode);
1822 	args.in.numargs = 1;
1823 	args.in.args[0].size = sizeof(inarg);
1824 	args.in.args[0].value = &inarg;
1825 	/* This is really two different operations rolled into one */
1826 	args.out.numargs = 1;
1827 	if (size) {
1828 		args.out.argvar = 1;
1829 		args.out.args[0].size = size;
1830 		args.out.args[0].value = list;
1831 	} else {
1832 		args.out.args[0].size = sizeof(outarg);
1833 		args.out.args[0].value = &outarg;
1834 	}
1835 	ret = fuse_simple_request(fc, &args);
1836 	if (!ret && !size)
1837 		ret = outarg.size;
1838 	if (ret == -ENOSYS) {
1839 		fc->no_listxattr = 1;
1840 		ret = -EOPNOTSUPP;
1841 	}
1842 	return ret;
1843 }
1844 
1845 static int fuse_removexattr(struct dentry *entry, const char *name)
1846 {
1847 	struct inode *inode = d_inode(entry);
1848 	struct fuse_conn *fc = get_fuse_conn(inode);
1849 	FUSE_ARGS(args);
1850 	int err;
1851 
1852 	if (fc->no_removexattr)
1853 		return -EOPNOTSUPP;
1854 
1855 	args.in.h.opcode = FUSE_REMOVEXATTR;
1856 	args.in.h.nodeid = get_node_id(inode);
1857 	args.in.numargs = 1;
1858 	args.in.args[0].size = strlen(name) + 1;
1859 	args.in.args[0].value = name;
1860 	err = fuse_simple_request(fc, &args);
1861 	if (err == -ENOSYS) {
1862 		fc->no_removexattr = 1;
1863 		err = -EOPNOTSUPP;
1864 	}
1865 	if (!err) {
1866 		fuse_invalidate_attr(inode);
1867 		fuse_update_ctime(inode);
1868 	}
1869 	return err;
1870 }
1871 
1872 static const struct inode_operations fuse_dir_inode_operations = {
1873 	.lookup		= fuse_lookup,
1874 	.mkdir		= fuse_mkdir,
1875 	.symlink	= fuse_symlink,
1876 	.unlink		= fuse_unlink,
1877 	.rmdir		= fuse_rmdir,
1878 	.rename2	= fuse_rename2,
1879 	.link		= fuse_link,
1880 	.setattr	= fuse_setattr,
1881 	.create		= fuse_create,
1882 	.atomic_open	= fuse_atomic_open,
1883 	.mknod		= fuse_mknod,
1884 	.permission	= fuse_permission,
1885 	.getattr	= fuse_getattr,
1886 	.setxattr	= fuse_setxattr,
1887 	.getxattr	= fuse_getxattr,
1888 	.listxattr	= fuse_listxattr,
1889 	.removexattr	= fuse_removexattr,
1890 };
1891 
1892 static const struct file_operations fuse_dir_operations = {
1893 	.llseek		= generic_file_llseek,
1894 	.read		= generic_read_dir,
1895 	.iterate_shared	= fuse_readdir,
1896 	.open		= fuse_dir_open,
1897 	.release	= fuse_dir_release,
1898 	.fsync		= fuse_dir_fsync,
1899 	.unlocked_ioctl	= fuse_dir_ioctl,
1900 	.compat_ioctl	= fuse_dir_compat_ioctl,
1901 };
1902 
1903 static const struct inode_operations fuse_common_inode_operations = {
1904 	.setattr	= fuse_setattr,
1905 	.permission	= fuse_permission,
1906 	.getattr	= fuse_getattr,
1907 	.setxattr	= fuse_setxattr,
1908 	.getxattr	= fuse_getxattr,
1909 	.listxattr	= fuse_listxattr,
1910 	.removexattr	= fuse_removexattr,
1911 };
1912 
1913 static const struct inode_operations fuse_symlink_inode_operations = {
1914 	.setattr	= fuse_setattr,
1915 	.get_link	= fuse_get_link,
1916 	.readlink	= generic_readlink,
1917 	.getattr	= fuse_getattr,
1918 	.setxattr	= fuse_setxattr,
1919 	.getxattr	= fuse_getxattr,
1920 	.listxattr	= fuse_listxattr,
1921 	.removexattr	= fuse_removexattr,
1922 };
1923 
1924 void fuse_init_common(struct inode *inode)
1925 {
1926 	inode->i_op = &fuse_common_inode_operations;
1927 }
1928 
1929 void fuse_init_dir(struct inode *inode)
1930 {
1931 	inode->i_op = &fuse_dir_inode_operations;
1932 	inode->i_fop = &fuse_dir_operations;
1933 }
1934 
1935 void fuse_init_symlink(struct inode *inode)
1936 {
1937 	inode->i_op = &fuse_symlink_inode_operations;
1938 }
1939