xref: /openbmc/linux/fs/fuse/dir.c (revision 3d3337de)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/sched.h>
14 #include <linux/namei.h>
15 #include <linux/slab.h>
16 
17 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
18 {
19 	struct fuse_conn *fc = get_fuse_conn(dir);
20 	struct fuse_inode *fi = get_fuse_inode(dir);
21 
22 	if (!fc->do_readdirplus)
23 		return false;
24 	if (!fc->readdirplus_auto)
25 		return true;
26 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27 		return true;
28 	if (ctx->pos == 0)
29 		return true;
30 	return false;
31 }
32 
33 static void fuse_advise_use_readdirplus(struct inode *dir)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(dir);
36 
37 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38 }
39 
40 #if BITS_PER_LONG >= 64
41 static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
42 {
43 	entry->d_time = time;
44 }
45 
46 static inline u64 fuse_dentry_time(struct dentry *entry)
47 {
48 	return entry->d_time;
49 }
50 #else
51 /*
52  * On 32 bit archs store the high 32 bits of time in d_fsdata
53  */
54 static void fuse_dentry_settime(struct dentry *entry, u64 time)
55 {
56 	entry->d_time = time;
57 	entry->d_fsdata = (void *) (unsigned long) (time >> 32);
58 }
59 
60 static u64 fuse_dentry_time(struct dentry *entry)
61 {
62 	return (u64) entry->d_time +
63 		((u64) (unsigned long) entry->d_fsdata << 32);
64 }
65 #endif
66 
67 /*
68  * FUSE caches dentries and attributes with separate timeout.  The
69  * time in jiffies until the dentry/attributes are valid is stored in
70  * dentry->d_time and fuse_inode->i_time respectively.
71  */
72 
73 /*
74  * Calculate the time in jiffies until a dentry/attributes are valid
75  */
76 static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
77 {
78 	if (sec || nsec) {
79 		struct timespec ts = {sec, nsec};
80 		return get_jiffies_64() + timespec_to_jiffies(&ts);
81 	} else
82 		return 0;
83 }
84 
85 /*
86  * Set dentry and possibly attribute timeouts from the lookup/mk*
87  * replies
88  */
89 static void fuse_change_entry_timeout(struct dentry *entry,
90 				      struct fuse_entry_out *o)
91 {
92 	fuse_dentry_settime(entry,
93 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
94 }
95 
96 static u64 attr_timeout(struct fuse_attr_out *o)
97 {
98 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
99 }
100 
101 static u64 entry_attr_timeout(struct fuse_entry_out *o)
102 {
103 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
104 }
105 
106 /*
107  * Mark the attributes as stale, so that at the next call to
108  * ->getattr() they will be fetched from userspace
109  */
110 void fuse_invalidate_attr(struct inode *inode)
111 {
112 	get_fuse_inode(inode)->i_time = 0;
113 }
114 
115 /**
116  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
117  * atime is not used.
118  */
119 void fuse_invalidate_atime(struct inode *inode)
120 {
121 	if (!IS_RDONLY(inode))
122 		fuse_invalidate_attr(inode);
123 }
124 
125 /*
126  * Just mark the entry as stale, so that a next attempt to look it up
127  * will result in a new lookup call to userspace
128  *
129  * This is called when a dentry is about to become negative and the
130  * timeout is unknown (unlink, rmdir, rename and in some cases
131  * lookup)
132  */
133 void fuse_invalidate_entry_cache(struct dentry *entry)
134 {
135 	fuse_dentry_settime(entry, 0);
136 }
137 
138 /*
139  * Same as fuse_invalidate_entry_cache(), but also try to remove the
140  * dentry from the hash
141  */
142 static void fuse_invalidate_entry(struct dentry *entry)
143 {
144 	d_invalidate(entry);
145 	fuse_invalidate_entry_cache(entry);
146 }
147 
148 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
149 			     u64 nodeid, struct qstr *name,
150 			     struct fuse_entry_out *outarg)
151 {
152 	memset(outarg, 0, sizeof(struct fuse_entry_out));
153 	args->in.h.opcode = FUSE_LOOKUP;
154 	args->in.h.nodeid = nodeid;
155 	args->in.numargs = 1;
156 	args->in.args[0].size = name->len + 1;
157 	args->in.args[0].value = name->name;
158 	args->out.numargs = 1;
159 	args->out.args[0].size = sizeof(struct fuse_entry_out);
160 	args->out.args[0].value = outarg;
161 }
162 
163 u64 fuse_get_attr_version(struct fuse_conn *fc)
164 {
165 	u64 curr_version;
166 
167 	/*
168 	 * The spin lock isn't actually needed on 64bit archs, but we
169 	 * don't yet care too much about such optimizations.
170 	 */
171 	spin_lock(&fc->lock);
172 	curr_version = fc->attr_version;
173 	spin_unlock(&fc->lock);
174 
175 	return curr_version;
176 }
177 
178 /*
179  * Check whether the dentry is still valid
180  *
181  * If the entry validity timeout has expired and the dentry is
182  * positive, try to redo the lookup.  If the lookup results in a
183  * different inode, then let the VFS invalidate the dentry and redo
184  * the lookup once more.  If the lookup results in the same inode,
185  * then refresh the attributes, timeouts and mark the dentry valid.
186  */
187 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
188 {
189 	struct inode *inode;
190 	struct dentry *parent;
191 	struct fuse_conn *fc;
192 	struct fuse_inode *fi;
193 	int ret;
194 
195 	inode = d_inode_rcu(entry);
196 	if (inode && is_bad_inode(inode))
197 		goto invalid;
198 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
199 		 (flags & LOOKUP_REVAL)) {
200 		struct fuse_entry_out outarg;
201 		FUSE_ARGS(args);
202 		struct fuse_forget_link *forget;
203 		u64 attr_version;
204 
205 		/* For negative dentries, always do a fresh lookup */
206 		if (!inode)
207 			goto invalid;
208 
209 		ret = -ECHILD;
210 		if (flags & LOOKUP_RCU)
211 			goto out;
212 
213 		fc = get_fuse_conn(inode);
214 
215 		forget = fuse_alloc_forget();
216 		ret = -ENOMEM;
217 		if (!forget)
218 			goto out;
219 
220 		attr_version = fuse_get_attr_version(fc);
221 
222 		parent = dget_parent(entry);
223 		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
224 				 &entry->d_name, &outarg);
225 		ret = fuse_simple_request(fc, &args);
226 		dput(parent);
227 		/* Zero nodeid is same as -ENOENT */
228 		if (!ret && !outarg.nodeid)
229 			ret = -ENOENT;
230 		if (!ret) {
231 			fi = get_fuse_inode(inode);
232 			if (outarg.nodeid != get_node_id(inode)) {
233 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
234 				goto invalid;
235 			}
236 			spin_lock(&fc->lock);
237 			fi->nlookup++;
238 			spin_unlock(&fc->lock);
239 		}
240 		kfree(forget);
241 		if (ret == -ENOMEM)
242 			goto out;
243 		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
244 			goto invalid;
245 
246 		fuse_change_attributes(inode, &outarg.attr,
247 				       entry_attr_timeout(&outarg),
248 				       attr_version);
249 		fuse_change_entry_timeout(entry, &outarg);
250 	} else if (inode) {
251 		fi = get_fuse_inode(inode);
252 		if (flags & LOOKUP_RCU) {
253 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
254 				return -ECHILD;
255 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
256 			parent = dget_parent(entry);
257 			fuse_advise_use_readdirplus(d_inode(parent));
258 			dput(parent);
259 		}
260 	}
261 	ret = 1;
262 out:
263 	return ret;
264 
265 invalid:
266 	ret = 0;
267 	goto out;
268 }
269 
270 static int invalid_nodeid(u64 nodeid)
271 {
272 	return !nodeid || nodeid == FUSE_ROOT_ID;
273 }
274 
275 const struct dentry_operations fuse_dentry_operations = {
276 	.d_revalidate	= fuse_dentry_revalidate,
277 };
278 
279 int fuse_valid_type(int m)
280 {
281 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
282 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
283 }
284 
285 int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
286 		     struct fuse_entry_out *outarg, struct inode **inode)
287 {
288 	struct fuse_conn *fc = get_fuse_conn_super(sb);
289 	FUSE_ARGS(args);
290 	struct fuse_forget_link *forget;
291 	u64 attr_version;
292 	int err;
293 
294 	*inode = NULL;
295 	err = -ENAMETOOLONG;
296 	if (name->len > FUSE_NAME_MAX)
297 		goto out;
298 
299 
300 	forget = fuse_alloc_forget();
301 	err = -ENOMEM;
302 	if (!forget)
303 		goto out;
304 
305 	attr_version = fuse_get_attr_version(fc);
306 
307 	fuse_lookup_init(fc, &args, nodeid, name, outarg);
308 	err = fuse_simple_request(fc, &args);
309 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
310 	if (err || !outarg->nodeid)
311 		goto out_put_forget;
312 
313 	err = -EIO;
314 	if (!outarg->nodeid)
315 		goto out_put_forget;
316 	if (!fuse_valid_type(outarg->attr.mode))
317 		goto out_put_forget;
318 
319 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
320 			   &outarg->attr, entry_attr_timeout(outarg),
321 			   attr_version);
322 	err = -ENOMEM;
323 	if (!*inode) {
324 		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
325 		goto out;
326 	}
327 	err = 0;
328 
329  out_put_forget:
330 	kfree(forget);
331  out:
332 	return err;
333 }
334 
335 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
336 				  unsigned int flags)
337 {
338 	int err;
339 	struct fuse_entry_out outarg;
340 	struct inode *inode;
341 	struct dentry *newent;
342 	bool outarg_valid = true;
343 
344 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
345 			       &outarg, &inode);
346 	if (err == -ENOENT) {
347 		outarg_valid = false;
348 		err = 0;
349 	}
350 	if (err)
351 		goto out_err;
352 
353 	err = -EIO;
354 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
355 		goto out_iput;
356 
357 	newent = d_splice_alias(inode, entry);
358 	err = PTR_ERR(newent);
359 	if (IS_ERR(newent))
360 		goto out_err;
361 
362 	entry = newent ? newent : entry;
363 	if (outarg_valid)
364 		fuse_change_entry_timeout(entry, &outarg);
365 	else
366 		fuse_invalidate_entry_cache(entry);
367 
368 	fuse_advise_use_readdirplus(dir);
369 	return newent;
370 
371  out_iput:
372 	iput(inode);
373  out_err:
374 	return ERR_PTR(err);
375 }
376 
377 /*
378  * Atomic create+open operation
379  *
380  * If the filesystem doesn't support this, then fall back to separate
381  * 'mknod' + 'open' requests.
382  */
383 static int fuse_create_open(struct inode *dir, struct dentry *entry,
384 			    struct file *file, unsigned flags,
385 			    umode_t mode, int *opened)
386 {
387 	int err;
388 	struct inode *inode;
389 	struct fuse_conn *fc = get_fuse_conn(dir);
390 	FUSE_ARGS(args);
391 	struct fuse_forget_link *forget;
392 	struct fuse_create_in inarg;
393 	struct fuse_open_out outopen;
394 	struct fuse_entry_out outentry;
395 	struct fuse_file *ff;
396 
397 	/* Userspace expects S_IFREG in create mode */
398 	BUG_ON((mode & S_IFMT) != S_IFREG);
399 
400 	forget = fuse_alloc_forget();
401 	err = -ENOMEM;
402 	if (!forget)
403 		goto out_err;
404 
405 	err = -ENOMEM;
406 	ff = fuse_file_alloc(fc);
407 	if (!ff)
408 		goto out_put_forget_req;
409 
410 	if (!fc->dont_mask)
411 		mode &= ~current_umask();
412 
413 	flags &= ~O_NOCTTY;
414 	memset(&inarg, 0, sizeof(inarg));
415 	memset(&outentry, 0, sizeof(outentry));
416 	inarg.flags = flags;
417 	inarg.mode = mode;
418 	inarg.umask = current_umask();
419 	args.in.h.opcode = FUSE_CREATE;
420 	args.in.h.nodeid = get_node_id(dir);
421 	args.in.numargs = 2;
422 	args.in.args[0].size = sizeof(inarg);
423 	args.in.args[0].value = &inarg;
424 	args.in.args[1].size = entry->d_name.len + 1;
425 	args.in.args[1].value = entry->d_name.name;
426 	args.out.numargs = 2;
427 	args.out.args[0].size = sizeof(outentry);
428 	args.out.args[0].value = &outentry;
429 	args.out.args[1].size = sizeof(outopen);
430 	args.out.args[1].value = &outopen;
431 	err = fuse_simple_request(fc, &args);
432 	if (err)
433 		goto out_free_ff;
434 
435 	err = -EIO;
436 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
437 		goto out_free_ff;
438 
439 	ff->fh = outopen.fh;
440 	ff->nodeid = outentry.nodeid;
441 	ff->open_flags = outopen.open_flags;
442 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
443 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
444 	if (!inode) {
445 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
446 		fuse_sync_release(ff, flags);
447 		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
448 		err = -ENOMEM;
449 		goto out_err;
450 	}
451 	kfree(forget);
452 	d_instantiate(entry, inode);
453 	fuse_change_entry_timeout(entry, &outentry);
454 	fuse_invalidate_attr(dir);
455 	err = finish_open(file, entry, generic_file_open, opened);
456 	if (err) {
457 		fuse_sync_release(ff, flags);
458 	} else {
459 		file->private_data = fuse_file_get(ff);
460 		fuse_finish_open(inode, file);
461 	}
462 	return err;
463 
464 out_free_ff:
465 	fuse_file_free(ff);
466 out_put_forget_req:
467 	kfree(forget);
468 out_err:
469 	return err;
470 }
471 
472 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
473 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
474 			    struct file *file, unsigned flags,
475 			    umode_t mode, int *opened)
476 {
477 	int err;
478 	struct fuse_conn *fc = get_fuse_conn(dir);
479 	struct dentry *res = NULL;
480 
481 	if (d_unhashed(entry)) {
482 		res = fuse_lookup(dir, entry, 0);
483 		if (IS_ERR(res))
484 			return PTR_ERR(res);
485 
486 		if (res)
487 			entry = res;
488 	}
489 
490 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
491 		goto no_open;
492 
493 	/* Only creates */
494 	*opened |= FILE_CREATED;
495 
496 	if (fc->no_create)
497 		goto mknod;
498 
499 	err = fuse_create_open(dir, entry, file, flags, mode, opened);
500 	if (err == -ENOSYS) {
501 		fc->no_create = 1;
502 		goto mknod;
503 	}
504 out_dput:
505 	dput(res);
506 	return err;
507 
508 mknod:
509 	err = fuse_mknod(dir, entry, mode, 0);
510 	if (err)
511 		goto out_dput;
512 no_open:
513 	return finish_no_open(file, res);
514 }
515 
516 /*
517  * Code shared between mknod, mkdir, symlink and link
518  */
519 static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
520 			    struct inode *dir, struct dentry *entry,
521 			    umode_t mode)
522 {
523 	struct fuse_entry_out outarg;
524 	struct inode *inode;
525 	int err;
526 	struct fuse_forget_link *forget;
527 
528 	forget = fuse_alloc_forget();
529 	if (!forget)
530 		return -ENOMEM;
531 
532 	memset(&outarg, 0, sizeof(outarg));
533 	args->in.h.nodeid = get_node_id(dir);
534 	args->out.numargs = 1;
535 	args->out.args[0].size = sizeof(outarg);
536 	args->out.args[0].value = &outarg;
537 	err = fuse_simple_request(fc, args);
538 	if (err)
539 		goto out_put_forget_req;
540 
541 	err = -EIO;
542 	if (invalid_nodeid(outarg.nodeid))
543 		goto out_put_forget_req;
544 
545 	if ((outarg.attr.mode ^ mode) & S_IFMT)
546 		goto out_put_forget_req;
547 
548 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
549 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
550 	if (!inode) {
551 		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
552 		return -ENOMEM;
553 	}
554 	kfree(forget);
555 
556 	err = d_instantiate_no_diralias(entry, inode);
557 	if (err)
558 		return err;
559 
560 	fuse_change_entry_timeout(entry, &outarg);
561 	fuse_invalidate_attr(dir);
562 	return 0;
563 
564  out_put_forget_req:
565 	kfree(forget);
566 	return err;
567 }
568 
569 static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
570 		      dev_t rdev)
571 {
572 	struct fuse_mknod_in inarg;
573 	struct fuse_conn *fc = get_fuse_conn(dir);
574 	FUSE_ARGS(args);
575 
576 	if (!fc->dont_mask)
577 		mode &= ~current_umask();
578 
579 	memset(&inarg, 0, sizeof(inarg));
580 	inarg.mode = mode;
581 	inarg.rdev = new_encode_dev(rdev);
582 	inarg.umask = current_umask();
583 	args.in.h.opcode = FUSE_MKNOD;
584 	args.in.numargs = 2;
585 	args.in.args[0].size = sizeof(inarg);
586 	args.in.args[0].value = &inarg;
587 	args.in.args[1].size = entry->d_name.len + 1;
588 	args.in.args[1].value = entry->d_name.name;
589 	return create_new_entry(fc, &args, dir, entry, mode);
590 }
591 
592 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
593 		       bool excl)
594 {
595 	return fuse_mknod(dir, entry, mode, 0);
596 }
597 
598 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
599 {
600 	struct fuse_mkdir_in inarg;
601 	struct fuse_conn *fc = get_fuse_conn(dir);
602 	FUSE_ARGS(args);
603 
604 	if (!fc->dont_mask)
605 		mode &= ~current_umask();
606 
607 	memset(&inarg, 0, sizeof(inarg));
608 	inarg.mode = mode;
609 	inarg.umask = current_umask();
610 	args.in.h.opcode = FUSE_MKDIR;
611 	args.in.numargs = 2;
612 	args.in.args[0].size = sizeof(inarg);
613 	args.in.args[0].value = &inarg;
614 	args.in.args[1].size = entry->d_name.len + 1;
615 	args.in.args[1].value = entry->d_name.name;
616 	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
617 }
618 
619 static int fuse_symlink(struct inode *dir, struct dentry *entry,
620 			const char *link)
621 {
622 	struct fuse_conn *fc = get_fuse_conn(dir);
623 	unsigned len = strlen(link) + 1;
624 	FUSE_ARGS(args);
625 
626 	args.in.h.opcode = FUSE_SYMLINK;
627 	args.in.numargs = 2;
628 	args.in.args[0].size = entry->d_name.len + 1;
629 	args.in.args[0].value = entry->d_name.name;
630 	args.in.args[1].size = len;
631 	args.in.args[1].value = link;
632 	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
633 }
634 
635 static inline void fuse_update_ctime(struct inode *inode)
636 {
637 	if (!IS_NOCMTIME(inode)) {
638 		inode->i_ctime = current_fs_time(inode->i_sb);
639 		mark_inode_dirty_sync(inode);
640 	}
641 }
642 
643 static int fuse_unlink(struct inode *dir, struct dentry *entry)
644 {
645 	int err;
646 	struct fuse_conn *fc = get_fuse_conn(dir);
647 	FUSE_ARGS(args);
648 
649 	args.in.h.opcode = FUSE_UNLINK;
650 	args.in.h.nodeid = get_node_id(dir);
651 	args.in.numargs = 1;
652 	args.in.args[0].size = entry->d_name.len + 1;
653 	args.in.args[0].value = entry->d_name.name;
654 	err = fuse_simple_request(fc, &args);
655 	if (!err) {
656 		struct inode *inode = d_inode(entry);
657 		struct fuse_inode *fi = get_fuse_inode(inode);
658 
659 		spin_lock(&fc->lock);
660 		fi->attr_version = ++fc->attr_version;
661 		/*
662 		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
663 		 * happen if userspace filesystem is careless.  It would be
664 		 * difficult to enforce correct nlink usage so just ignore this
665 		 * condition here
666 		 */
667 		if (inode->i_nlink > 0)
668 			drop_nlink(inode);
669 		spin_unlock(&fc->lock);
670 		fuse_invalidate_attr(inode);
671 		fuse_invalidate_attr(dir);
672 		fuse_invalidate_entry_cache(entry);
673 		fuse_update_ctime(inode);
674 	} else if (err == -EINTR)
675 		fuse_invalidate_entry(entry);
676 	return err;
677 }
678 
679 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
680 {
681 	int err;
682 	struct fuse_conn *fc = get_fuse_conn(dir);
683 	FUSE_ARGS(args);
684 
685 	args.in.h.opcode = FUSE_RMDIR;
686 	args.in.h.nodeid = get_node_id(dir);
687 	args.in.numargs = 1;
688 	args.in.args[0].size = entry->d_name.len + 1;
689 	args.in.args[0].value = entry->d_name.name;
690 	err = fuse_simple_request(fc, &args);
691 	if (!err) {
692 		clear_nlink(d_inode(entry));
693 		fuse_invalidate_attr(dir);
694 		fuse_invalidate_entry_cache(entry);
695 	} else if (err == -EINTR)
696 		fuse_invalidate_entry(entry);
697 	return err;
698 }
699 
700 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
701 			      struct inode *newdir, struct dentry *newent,
702 			      unsigned int flags, int opcode, size_t argsize)
703 {
704 	int err;
705 	struct fuse_rename2_in inarg;
706 	struct fuse_conn *fc = get_fuse_conn(olddir);
707 	FUSE_ARGS(args);
708 
709 	memset(&inarg, 0, argsize);
710 	inarg.newdir = get_node_id(newdir);
711 	inarg.flags = flags;
712 	args.in.h.opcode = opcode;
713 	args.in.h.nodeid = get_node_id(olddir);
714 	args.in.numargs = 3;
715 	args.in.args[0].size = argsize;
716 	args.in.args[0].value = &inarg;
717 	args.in.args[1].size = oldent->d_name.len + 1;
718 	args.in.args[1].value = oldent->d_name.name;
719 	args.in.args[2].size = newent->d_name.len + 1;
720 	args.in.args[2].value = newent->d_name.name;
721 	err = fuse_simple_request(fc, &args);
722 	if (!err) {
723 		/* ctime changes */
724 		fuse_invalidate_attr(d_inode(oldent));
725 		fuse_update_ctime(d_inode(oldent));
726 
727 		if (flags & RENAME_EXCHANGE) {
728 			fuse_invalidate_attr(d_inode(newent));
729 			fuse_update_ctime(d_inode(newent));
730 		}
731 
732 		fuse_invalidate_attr(olddir);
733 		if (olddir != newdir)
734 			fuse_invalidate_attr(newdir);
735 
736 		/* newent will end up negative */
737 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
738 			fuse_invalidate_attr(d_inode(newent));
739 			fuse_invalidate_entry_cache(newent);
740 			fuse_update_ctime(d_inode(newent));
741 		}
742 	} else if (err == -EINTR) {
743 		/* If request was interrupted, DEITY only knows if the
744 		   rename actually took place.  If the invalidation
745 		   fails (e.g. some process has CWD under the renamed
746 		   directory), then there can be inconsistency between
747 		   the dcache and the real filesystem.  Tough luck. */
748 		fuse_invalidate_entry(oldent);
749 		if (d_really_is_positive(newent))
750 			fuse_invalidate_entry(newent);
751 	}
752 
753 	return err;
754 }
755 
756 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
757 			struct inode *newdir, struct dentry *newent,
758 			unsigned int flags)
759 {
760 	struct fuse_conn *fc = get_fuse_conn(olddir);
761 	int err;
762 
763 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
764 		return -EINVAL;
765 
766 	if (flags) {
767 		if (fc->no_rename2 || fc->minor < 23)
768 			return -EINVAL;
769 
770 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
771 					 FUSE_RENAME2,
772 					 sizeof(struct fuse_rename2_in));
773 		if (err == -ENOSYS) {
774 			fc->no_rename2 = 1;
775 			err = -EINVAL;
776 		}
777 	} else {
778 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
779 					 FUSE_RENAME,
780 					 sizeof(struct fuse_rename_in));
781 	}
782 
783 	return err;
784 }
785 
786 static int fuse_link(struct dentry *entry, struct inode *newdir,
787 		     struct dentry *newent)
788 {
789 	int err;
790 	struct fuse_link_in inarg;
791 	struct inode *inode = d_inode(entry);
792 	struct fuse_conn *fc = get_fuse_conn(inode);
793 	FUSE_ARGS(args);
794 
795 	memset(&inarg, 0, sizeof(inarg));
796 	inarg.oldnodeid = get_node_id(inode);
797 	args.in.h.opcode = FUSE_LINK;
798 	args.in.numargs = 2;
799 	args.in.args[0].size = sizeof(inarg);
800 	args.in.args[0].value = &inarg;
801 	args.in.args[1].size = newent->d_name.len + 1;
802 	args.in.args[1].value = newent->d_name.name;
803 	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
804 	/* Contrary to "normal" filesystems it can happen that link
805 	   makes two "logical" inodes point to the same "physical"
806 	   inode.  We invalidate the attributes of the old one, so it
807 	   will reflect changes in the backing inode (link count,
808 	   etc.)
809 	*/
810 	if (!err) {
811 		struct fuse_inode *fi = get_fuse_inode(inode);
812 
813 		spin_lock(&fc->lock);
814 		fi->attr_version = ++fc->attr_version;
815 		inc_nlink(inode);
816 		spin_unlock(&fc->lock);
817 		fuse_invalidate_attr(inode);
818 		fuse_update_ctime(inode);
819 	} else if (err == -EINTR) {
820 		fuse_invalidate_attr(inode);
821 	}
822 	return err;
823 }
824 
825 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
826 			  struct kstat *stat)
827 {
828 	unsigned int blkbits;
829 	struct fuse_conn *fc = get_fuse_conn(inode);
830 
831 	/* see the comment in fuse_change_attributes() */
832 	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
833 		attr->size = i_size_read(inode);
834 		attr->mtime = inode->i_mtime.tv_sec;
835 		attr->mtimensec = inode->i_mtime.tv_nsec;
836 		attr->ctime = inode->i_ctime.tv_sec;
837 		attr->ctimensec = inode->i_ctime.tv_nsec;
838 	}
839 
840 	stat->dev = inode->i_sb->s_dev;
841 	stat->ino = attr->ino;
842 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
843 	stat->nlink = attr->nlink;
844 	stat->uid = make_kuid(&init_user_ns, attr->uid);
845 	stat->gid = make_kgid(&init_user_ns, attr->gid);
846 	stat->rdev = inode->i_rdev;
847 	stat->atime.tv_sec = attr->atime;
848 	stat->atime.tv_nsec = attr->atimensec;
849 	stat->mtime.tv_sec = attr->mtime;
850 	stat->mtime.tv_nsec = attr->mtimensec;
851 	stat->ctime.tv_sec = attr->ctime;
852 	stat->ctime.tv_nsec = attr->ctimensec;
853 	stat->size = attr->size;
854 	stat->blocks = attr->blocks;
855 
856 	if (attr->blksize != 0)
857 		blkbits = ilog2(attr->blksize);
858 	else
859 		blkbits = inode->i_sb->s_blocksize_bits;
860 
861 	stat->blksize = 1 << blkbits;
862 }
863 
864 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
865 			   struct file *file)
866 {
867 	int err;
868 	struct fuse_getattr_in inarg;
869 	struct fuse_attr_out outarg;
870 	struct fuse_conn *fc = get_fuse_conn(inode);
871 	FUSE_ARGS(args);
872 	u64 attr_version;
873 
874 	attr_version = fuse_get_attr_version(fc);
875 
876 	memset(&inarg, 0, sizeof(inarg));
877 	memset(&outarg, 0, sizeof(outarg));
878 	/* Directories have separate file-handle space */
879 	if (file && S_ISREG(inode->i_mode)) {
880 		struct fuse_file *ff = file->private_data;
881 
882 		inarg.getattr_flags |= FUSE_GETATTR_FH;
883 		inarg.fh = ff->fh;
884 	}
885 	args.in.h.opcode = FUSE_GETATTR;
886 	args.in.h.nodeid = get_node_id(inode);
887 	args.in.numargs = 1;
888 	args.in.args[0].size = sizeof(inarg);
889 	args.in.args[0].value = &inarg;
890 	args.out.numargs = 1;
891 	args.out.args[0].size = sizeof(outarg);
892 	args.out.args[0].value = &outarg;
893 	err = fuse_simple_request(fc, &args);
894 	if (!err) {
895 		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
896 			make_bad_inode(inode);
897 			err = -EIO;
898 		} else {
899 			fuse_change_attributes(inode, &outarg.attr,
900 					       attr_timeout(&outarg),
901 					       attr_version);
902 			if (stat)
903 				fuse_fillattr(inode, &outarg.attr, stat);
904 		}
905 	}
906 	return err;
907 }
908 
909 int fuse_update_attributes(struct inode *inode, struct kstat *stat,
910 			   struct file *file, bool *refreshed)
911 {
912 	struct fuse_inode *fi = get_fuse_inode(inode);
913 	int err;
914 	bool r;
915 
916 	if (time_before64(fi->i_time, get_jiffies_64())) {
917 		r = true;
918 		err = fuse_do_getattr(inode, stat, file);
919 	} else {
920 		r = false;
921 		err = 0;
922 		if (stat) {
923 			generic_fillattr(inode, stat);
924 			stat->mode = fi->orig_i_mode;
925 			stat->ino = fi->orig_ino;
926 		}
927 	}
928 
929 	if (refreshed != NULL)
930 		*refreshed = r;
931 
932 	return err;
933 }
934 
935 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
936 			     u64 child_nodeid, struct qstr *name)
937 {
938 	int err = -ENOTDIR;
939 	struct inode *parent;
940 	struct dentry *dir;
941 	struct dentry *entry;
942 
943 	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
944 	if (!parent)
945 		return -ENOENT;
946 
947 	mutex_lock(&parent->i_mutex);
948 	if (!S_ISDIR(parent->i_mode))
949 		goto unlock;
950 
951 	err = -ENOENT;
952 	dir = d_find_alias(parent);
953 	if (!dir)
954 		goto unlock;
955 
956 	entry = d_lookup(dir, name);
957 	dput(dir);
958 	if (!entry)
959 		goto unlock;
960 
961 	fuse_invalidate_attr(parent);
962 	fuse_invalidate_entry(entry);
963 
964 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
965 		mutex_lock(&d_inode(entry)->i_mutex);
966 		if (get_node_id(d_inode(entry)) != child_nodeid) {
967 			err = -ENOENT;
968 			goto badentry;
969 		}
970 		if (d_mountpoint(entry)) {
971 			err = -EBUSY;
972 			goto badentry;
973 		}
974 		if (d_is_dir(entry)) {
975 			shrink_dcache_parent(entry);
976 			if (!simple_empty(entry)) {
977 				err = -ENOTEMPTY;
978 				goto badentry;
979 			}
980 			d_inode(entry)->i_flags |= S_DEAD;
981 		}
982 		dont_mount(entry);
983 		clear_nlink(d_inode(entry));
984 		err = 0;
985  badentry:
986 		mutex_unlock(&d_inode(entry)->i_mutex);
987 		if (!err)
988 			d_delete(entry);
989 	} else {
990 		err = 0;
991 	}
992 	dput(entry);
993 
994  unlock:
995 	mutex_unlock(&parent->i_mutex);
996 	iput(parent);
997 	return err;
998 }
999 
1000 /*
1001  * Calling into a user-controlled filesystem gives the filesystem
1002  * daemon ptrace-like capabilities over the current process.  This
1003  * means, that the filesystem daemon is able to record the exact
1004  * filesystem operations performed, and can also control the behavior
1005  * of the requester process in otherwise impossible ways.  For example
1006  * it can delay the operation for arbitrary length of time allowing
1007  * DoS against the requester.
1008  *
1009  * For this reason only those processes can call into the filesystem,
1010  * for which the owner of the mount has ptrace privilege.  This
1011  * excludes processes started by other users, suid or sgid processes.
1012  */
1013 int fuse_allow_current_process(struct fuse_conn *fc)
1014 {
1015 	const struct cred *cred;
1016 
1017 	if (fc->flags & FUSE_ALLOW_OTHER)
1018 		return 1;
1019 
1020 	cred = current_cred();
1021 	if (uid_eq(cred->euid, fc->user_id) &&
1022 	    uid_eq(cred->suid, fc->user_id) &&
1023 	    uid_eq(cred->uid,  fc->user_id) &&
1024 	    gid_eq(cred->egid, fc->group_id) &&
1025 	    gid_eq(cred->sgid, fc->group_id) &&
1026 	    gid_eq(cred->gid,  fc->group_id))
1027 		return 1;
1028 
1029 	return 0;
1030 }
1031 
1032 static int fuse_access(struct inode *inode, int mask)
1033 {
1034 	struct fuse_conn *fc = get_fuse_conn(inode);
1035 	FUSE_ARGS(args);
1036 	struct fuse_access_in inarg;
1037 	int err;
1038 
1039 	BUG_ON(mask & MAY_NOT_BLOCK);
1040 
1041 	if (fc->no_access)
1042 		return 0;
1043 
1044 	memset(&inarg, 0, sizeof(inarg));
1045 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1046 	args.in.h.opcode = FUSE_ACCESS;
1047 	args.in.h.nodeid = get_node_id(inode);
1048 	args.in.numargs = 1;
1049 	args.in.args[0].size = sizeof(inarg);
1050 	args.in.args[0].value = &inarg;
1051 	err = fuse_simple_request(fc, &args);
1052 	if (err == -ENOSYS) {
1053 		fc->no_access = 1;
1054 		err = 0;
1055 	}
1056 	return err;
1057 }
1058 
1059 static int fuse_perm_getattr(struct inode *inode, int mask)
1060 {
1061 	if (mask & MAY_NOT_BLOCK)
1062 		return -ECHILD;
1063 
1064 	return fuse_do_getattr(inode, NULL, NULL);
1065 }
1066 
1067 /*
1068  * Check permission.  The two basic access models of FUSE are:
1069  *
1070  * 1) Local access checking ('default_permissions' mount option) based
1071  * on file mode.  This is the plain old disk filesystem permission
1072  * modell.
1073  *
1074  * 2) "Remote" access checking, where server is responsible for
1075  * checking permission in each inode operation.  An exception to this
1076  * is if ->permission() was invoked from sys_access() in which case an
1077  * access request is sent.  Execute permission is still checked
1078  * locally based on file mode.
1079  */
1080 static int fuse_permission(struct inode *inode, int mask)
1081 {
1082 	struct fuse_conn *fc = get_fuse_conn(inode);
1083 	bool refreshed = false;
1084 	int err = 0;
1085 
1086 	if (!fuse_allow_current_process(fc))
1087 		return -EACCES;
1088 
1089 	/*
1090 	 * If attributes are needed, refresh them before proceeding
1091 	 */
1092 	if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1093 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1094 		struct fuse_inode *fi = get_fuse_inode(inode);
1095 
1096 		if (time_before64(fi->i_time, get_jiffies_64())) {
1097 			refreshed = true;
1098 
1099 			err = fuse_perm_getattr(inode, mask);
1100 			if (err)
1101 				return err;
1102 		}
1103 	}
1104 
1105 	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1106 		err = generic_permission(inode, mask);
1107 
1108 		/* If permission is denied, try to refresh file
1109 		   attributes.  This is also needed, because the root
1110 		   node will at first have no permissions */
1111 		if (err == -EACCES && !refreshed) {
1112 			err = fuse_perm_getattr(inode, mask);
1113 			if (!err)
1114 				err = generic_permission(inode, mask);
1115 		}
1116 
1117 		/* Note: the opposite of the above test does not
1118 		   exist.  So if permissions are revoked this won't be
1119 		   noticed immediately, only after the attribute
1120 		   timeout has expired */
1121 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1122 		err = fuse_access(inode, mask);
1123 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1124 		if (!(inode->i_mode & S_IXUGO)) {
1125 			if (refreshed)
1126 				return -EACCES;
1127 
1128 			err = fuse_perm_getattr(inode, mask);
1129 			if (!err && !(inode->i_mode & S_IXUGO))
1130 				return -EACCES;
1131 		}
1132 	}
1133 	return err;
1134 }
1135 
1136 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1137 			 struct dir_context *ctx)
1138 {
1139 	while (nbytes >= FUSE_NAME_OFFSET) {
1140 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1141 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
1142 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1143 			return -EIO;
1144 		if (reclen > nbytes)
1145 			break;
1146 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1147 			return -EIO;
1148 
1149 		if (!dir_emit(ctx, dirent->name, dirent->namelen,
1150 			       dirent->ino, dirent->type))
1151 			break;
1152 
1153 		buf += reclen;
1154 		nbytes -= reclen;
1155 		ctx->pos = dirent->off;
1156 	}
1157 
1158 	return 0;
1159 }
1160 
1161 static int fuse_direntplus_link(struct file *file,
1162 				struct fuse_direntplus *direntplus,
1163 				u64 attr_version)
1164 {
1165 	int err;
1166 	struct fuse_entry_out *o = &direntplus->entry_out;
1167 	struct fuse_dirent *dirent = &direntplus->dirent;
1168 	struct dentry *parent = file->f_path.dentry;
1169 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1170 	struct dentry *dentry;
1171 	struct dentry *alias;
1172 	struct inode *dir = d_inode(parent);
1173 	struct fuse_conn *fc;
1174 	struct inode *inode;
1175 
1176 	if (!o->nodeid) {
1177 		/*
1178 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1179 		 * ENOENT. Instead, it only means the userspace filesystem did
1180 		 * not want to return attributes/handle for this entry.
1181 		 *
1182 		 * So do nothing.
1183 		 */
1184 		return 0;
1185 	}
1186 
1187 	if (name.name[0] == '.') {
1188 		/*
1189 		 * We could potentially refresh the attributes of the directory
1190 		 * and its parent?
1191 		 */
1192 		if (name.len == 1)
1193 			return 0;
1194 		if (name.name[1] == '.' && name.len == 2)
1195 			return 0;
1196 	}
1197 
1198 	if (invalid_nodeid(o->nodeid))
1199 		return -EIO;
1200 	if (!fuse_valid_type(o->attr.mode))
1201 		return -EIO;
1202 
1203 	fc = get_fuse_conn(dir);
1204 
1205 	name.hash = full_name_hash(name.name, name.len);
1206 	dentry = d_lookup(parent, &name);
1207 	if (dentry) {
1208 		inode = d_inode(dentry);
1209 		if (!inode) {
1210 			d_drop(dentry);
1211 		} else if (get_node_id(inode) != o->nodeid ||
1212 			   ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1213 			d_invalidate(dentry);
1214 		} else if (is_bad_inode(inode)) {
1215 			err = -EIO;
1216 			goto out;
1217 		} else {
1218 			struct fuse_inode *fi;
1219 			fi = get_fuse_inode(inode);
1220 			spin_lock(&fc->lock);
1221 			fi->nlookup++;
1222 			spin_unlock(&fc->lock);
1223 
1224 			fuse_change_attributes(inode, &o->attr,
1225 					       entry_attr_timeout(o),
1226 					       attr_version);
1227 
1228 			/*
1229 			 * The other branch to 'found' comes via fuse_iget()
1230 			 * which bumps nlookup inside
1231 			 */
1232 			goto found;
1233 		}
1234 		dput(dentry);
1235 	}
1236 
1237 	dentry = d_alloc(parent, &name);
1238 	err = -ENOMEM;
1239 	if (!dentry)
1240 		goto out;
1241 
1242 	inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1243 			  &o->attr, entry_attr_timeout(o), attr_version);
1244 	if (!inode)
1245 		goto out;
1246 
1247 	alias = d_splice_alias(inode, dentry);
1248 	err = PTR_ERR(alias);
1249 	if (IS_ERR(alias))
1250 		goto out;
1251 
1252 	if (alias) {
1253 		dput(dentry);
1254 		dentry = alias;
1255 	}
1256 
1257 found:
1258 	if (fc->readdirplus_auto)
1259 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1260 	fuse_change_entry_timeout(dentry, o);
1261 
1262 	err = 0;
1263 out:
1264 	dput(dentry);
1265 	return err;
1266 }
1267 
1268 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1269 			     struct dir_context *ctx, u64 attr_version)
1270 {
1271 	struct fuse_direntplus *direntplus;
1272 	struct fuse_dirent *dirent;
1273 	size_t reclen;
1274 	int over = 0;
1275 	int ret;
1276 
1277 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1278 		direntplus = (struct fuse_direntplus *) buf;
1279 		dirent = &direntplus->dirent;
1280 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1281 
1282 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1283 			return -EIO;
1284 		if (reclen > nbytes)
1285 			break;
1286 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1287 			return -EIO;
1288 
1289 		if (!over) {
1290 			/* We fill entries into dstbuf only as much as
1291 			   it can hold. But we still continue iterating
1292 			   over remaining entries to link them. If not,
1293 			   we need to send a FORGET for each of those
1294 			   which we did not link.
1295 			*/
1296 			over = !dir_emit(ctx, dirent->name, dirent->namelen,
1297 				       dirent->ino, dirent->type);
1298 			ctx->pos = dirent->off;
1299 		}
1300 
1301 		buf += reclen;
1302 		nbytes -= reclen;
1303 
1304 		ret = fuse_direntplus_link(file, direntplus, attr_version);
1305 		if (ret)
1306 			fuse_force_forget(file, direntplus->entry_out.nodeid);
1307 	}
1308 
1309 	return 0;
1310 }
1311 
1312 static int fuse_readdir(struct file *file, struct dir_context *ctx)
1313 {
1314 	int plus, err;
1315 	size_t nbytes;
1316 	struct page *page;
1317 	struct inode *inode = file_inode(file);
1318 	struct fuse_conn *fc = get_fuse_conn(inode);
1319 	struct fuse_req *req;
1320 	u64 attr_version = 0;
1321 
1322 	if (is_bad_inode(inode))
1323 		return -EIO;
1324 
1325 	req = fuse_get_req(fc, 1);
1326 	if (IS_ERR(req))
1327 		return PTR_ERR(req);
1328 
1329 	page = alloc_page(GFP_KERNEL);
1330 	if (!page) {
1331 		fuse_put_request(fc, req);
1332 		return -ENOMEM;
1333 	}
1334 
1335 	plus = fuse_use_readdirplus(inode, ctx);
1336 	req->out.argpages = 1;
1337 	req->num_pages = 1;
1338 	req->pages[0] = page;
1339 	req->page_descs[0].length = PAGE_SIZE;
1340 	if (plus) {
1341 		attr_version = fuse_get_attr_version(fc);
1342 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1343 			       FUSE_READDIRPLUS);
1344 	} else {
1345 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1346 			       FUSE_READDIR);
1347 	}
1348 	fuse_request_send(fc, req);
1349 	nbytes = req->out.args[0].size;
1350 	err = req->out.h.error;
1351 	fuse_put_request(fc, req);
1352 	if (!err) {
1353 		if (plus) {
1354 			err = parse_dirplusfile(page_address(page), nbytes,
1355 						file, ctx,
1356 						attr_version);
1357 		} else {
1358 			err = parse_dirfile(page_address(page), nbytes, file,
1359 					    ctx);
1360 		}
1361 	}
1362 
1363 	__free_page(page);
1364 	fuse_invalidate_atime(inode);
1365 	return err;
1366 }
1367 
1368 static char *read_link(struct dentry *dentry)
1369 {
1370 	struct inode *inode = d_inode(dentry);
1371 	struct fuse_conn *fc = get_fuse_conn(inode);
1372 	FUSE_ARGS(args);
1373 	char *link;
1374 	ssize_t ret;
1375 
1376 	link = (char *) __get_free_page(GFP_KERNEL);
1377 	if (!link)
1378 		return ERR_PTR(-ENOMEM);
1379 
1380 	args.in.h.opcode = FUSE_READLINK;
1381 	args.in.h.nodeid = get_node_id(inode);
1382 	args.out.argvar = 1;
1383 	args.out.numargs = 1;
1384 	args.out.args[0].size = PAGE_SIZE - 1;
1385 	args.out.args[0].value = link;
1386 	ret = fuse_simple_request(fc, &args);
1387 	if (ret < 0) {
1388 		free_page((unsigned long) link);
1389 		link = ERR_PTR(ret);
1390 	} else {
1391 		link[ret] = '\0';
1392 	}
1393 	fuse_invalidate_atime(inode);
1394 	return link;
1395 }
1396 
1397 static void free_link(char *link)
1398 {
1399 	if (!IS_ERR(link))
1400 		free_page((unsigned long) link);
1401 }
1402 
1403 static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
1404 {
1405 	nd_set_link(nd, read_link(dentry));
1406 	return NULL;
1407 }
1408 
1409 static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
1410 {
1411 	free_link(nd_get_link(nd));
1412 }
1413 
1414 static int fuse_dir_open(struct inode *inode, struct file *file)
1415 {
1416 	return fuse_open_common(inode, file, true);
1417 }
1418 
1419 static int fuse_dir_release(struct inode *inode, struct file *file)
1420 {
1421 	fuse_release_common(file, FUSE_RELEASEDIR);
1422 
1423 	return 0;
1424 }
1425 
1426 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1427 			  int datasync)
1428 {
1429 	return fuse_fsync_common(file, start, end, datasync, 1);
1430 }
1431 
1432 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1433 			    unsigned long arg)
1434 {
1435 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1436 
1437 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1438 	if (fc->minor < 18)
1439 		return -ENOTTY;
1440 
1441 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1442 }
1443 
1444 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1445 				   unsigned long arg)
1446 {
1447 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1448 
1449 	if (fc->minor < 18)
1450 		return -ENOTTY;
1451 
1452 	return fuse_ioctl_common(file, cmd, arg,
1453 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1454 }
1455 
1456 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1457 {
1458 	/* Always update if mtime is explicitly set  */
1459 	if (ivalid & ATTR_MTIME_SET)
1460 		return true;
1461 
1462 	/* Or if kernel i_mtime is the official one */
1463 	if (trust_local_mtime)
1464 		return true;
1465 
1466 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1467 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1468 		return false;
1469 
1470 	/* In all other cases update */
1471 	return true;
1472 }
1473 
1474 static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1475 			   bool trust_local_cmtime)
1476 {
1477 	unsigned ivalid = iattr->ia_valid;
1478 
1479 	if (ivalid & ATTR_MODE)
1480 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1481 	if (ivalid & ATTR_UID)
1482 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1483 	if (ivalid & ATTR_GID)
1484 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1485 	if (ivalid & ATTR_SIZE)
1486 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1487 	if (ivalid & ATTR_ATIME) {
1488 		arg->valid |= FATTR_ATIME;
1489 		arg->atime = iattr->ia_atime.tv_sec;
1490 		arg->atimensec = iattr->ia_atime.tv_nsec;
1491 		if (!(ivalid & ATTR_ATIME_SET))
1492 			arg->valid |= FATTR_ATIME_NOW;
1493 	}
1494 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1495 		arg->valid |= FATTR_MTIME;
1496 		arg->mtime = iattr->ia_mtime.tv_sec;
1497 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1498 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1499 			arg->valid |= FATTR_MTIME_NOW;
1500 	}
1501 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1502 		arg->valid |= FATTR_CTIME;
1503 		arg->ctime = iattr->ia_ctime.tv_sec;
1504 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1505 	}
1506 }
1507 
1508 /*
1509  * Prevent concurrent writepages on inode
1510  *
1511  * This is done by adding a negative bias to the inode write counter
1512  * and waiting for all pending writes to finish.
1513  */
1514 void fuse_set_nowrite(struct inode *inode)
1515 {
1516 	struct fuse_conn *fc = get_fuse_conn(inode);
1517 	struct fuse_inode *fi = get_fuse_inode(inode);
1518 
1519 	BUG_ON(!mutex_is_locked(&inode->i_mutex));
1520 
1521 	spin_lock(&fc->lock);
1522 	BUG_ON(fi->writectr < 0);
1523 	fi->writectr += FUSE_NOWRITE;
1524 	spin_unlock(&fc->lock);
1525 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1526 }
1527 
1528 /*
1529  * Allow writepages on inode
1530  *
1531  * Remove the bias from the writecounter and send any queued
1532  * writepages.
1533  */
1534 static void __fuse_release_nowrite(struct inode *inode)
1535 {
1536 	struct fuse_inode *fi = get_fuse_inode(inode);
1537 
1538 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1539 	fi->writectr = 0;
1540 	fuse_flush_writepages(inode);
1541 }
1542 
1543 void fuse_release_nowrite(struct inode *inode)
1544 {
1545 	struct fuse_conn *fc = get_fuse_conn(inode);
1546 
1547 	spin_lock(&fc->lock);
1548 	__fuse_release_nowrite(inode);
1549 	spin_unlock(&fc->lock);
1550 }
1551 
1552 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1553 			      struct inode *inode,
1554 			      struct fuse_setattr_in *inarg_p,
1555 			      struct fuse_attr_out *outarg_p)
1556 {
1557 	args->in.h.opcode = FUSE_SETATTR;
1558 	args->in.h.nodeid = get_node_id(inode);
1559 	args->in.numargs = 1;
1560 	args->in.args[0].size = sizeof(*inarg_p);
1561 	args->in.args[0].value = inarg_p;
1562 	args->out.numargs = 1;
1563 	args->out.args[0].size = sizeof(*outarg_p);
1564 	args->out.args[0].value = outarg_p;
1565 }
1566 
1567 /*
1568  * Flush inode->i_mtime to the server
1569  */
1570 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1571 {
1572 	struct fuse_conn *fc = get_fuse_conn(inode);
1573 	FUSE_ARGS(args);
1574 	struct fuse_setattr_in inarg;
1575 	struct fuse_attr_out outarg;
1576 
1577 	memset(&inarg, 0, sizeof(inarg));
1578 	memset(&outarg, 0, sizeof(outarg));
1579 
1580 	inarg.valid = FATTR_MTIME;
1581 	inarg.mtime = inode->i_mtime.tv_sec;
1582 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1583 	if (fc->minor >= 23) {
1584 		inarg.valid |= FATTR_CTIME;
1585 		inarg.ctime = inode->i_ctime.tv_sec;
1586 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1587 	}
1588 	if (ff) {
1589 		inarg.valid |= FATTR_FH;
1590 		inarg.fh = ff->fh;
1591 	}
1592 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1593 
1594 	return fuse_simple_request(fc, &args);
1595 }
1596 
1597 /*
1598  * Set attributes, and at the same time refresh them.
1599  *
1600  * Truncation is slightly complicated, because the 'truncate' request
1601  * may fail, in which case we don't want to touch the mapping.
1602  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1603  * and the actual truncation by hand.
1604  */
1605 int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1606 		    struct file *file)
1607 {
1608 	struct fuse_conn *fc = get_fuse_conn(inode);
1609 	struct fuse_inode *fi = get_fuse_inode(inode);
1610 	FUSE_ARGS(args);
1611 	struct fuse_setattr_in inarg;
1612 	struct fuse_attr_out outarg;
1613 	bool is_truncate = false;
1614 	bool is_wb = fc->writeback_cache;
1615 	loff_t oldsize;
1616 	int err;
1617 	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1618 
1619 	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1620 		attr->ia_valid |= ATTR_FORCE;
1621 
1622 	err = inode_change_ok(inode, attr);
1623 	if (err)
1624 		return err;
1625 
1626 	if (attr->ia_valid & ATTR_OPEN) {
1627 		if (fc->atomic_o_trunc)
1628 			return 0;
1629 		file = NULL;
1630 	}
1631 
1632 	if (attr->ia_valid & ATTR_SIZE)
1633 		is_truncate = true;
1634 
1635 	if (is_truncate) {
1636 		fuse_set_nowrite(inode);
1637 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1638 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1639 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1640 	}
1641 
1642 	memset(&inarg, 0, sizeof(inarg));
1643 	memset(&outarg, 0, sizeof(outarg));
1644 	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
1645 	if (file) {
1646 		struct fuse_file *ff = file->private_data;
1647 		inarg.valid |= FATTR_FH;
1648 		inarg.fh = ff->fh;
1649 	}
1650 	if (attr->ia_valid & ATTR_SIZE) {
1651 		/* For mandatory locking in truncate */
1652 		inarg.valid |= FATTR_LOCKOWNER;
1653 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1654 	}
1655 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1656 	err = fuse_simple_request(fc, &args);
1657 	if (err) {
1658 		if (err == -EINTR)
1659 			fuse_invalidate_attr(inode);
1660 		goto error;
1661 	}
1662 
1663 	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1664 		make_bad_inode(inode);
1665 		err = -EIO;
1666 		goto error;
1667 	}
1668 
1669 	spin_lock(&fc->lock);
1670 	/* the kernel maintains i_mtime locally */
1671 	if (trust_local_cmtime) {
1672 		if (attr->ia_valid & ATTR_MTIME)
1673 			inode->i_mtime = attr->ia_mtime;
1674 		if (attr->ia_valid & ATTR_CTIME)
1675 			inode->i_ctime = attr->ia_ctime;
1676 		/* FIXME: clear I_DIRTY_SYNC? */
1677 	}
1678 
1679 	fuse_change_attributes_common(inode, &outarg.attr,
1680 				      attr_timeout(&outarg));
1681 	oldsize = inode->i_size;
1682 	/* see the comment in fuse_change_attributes() */
1683 	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1684 		i_size_write(inode, outarg.attr.size);
1685 
1686 	if (is_truncate) {
1687 		/* NOTE: this may release/reacquire fc->lock */
1688 		__fuse_release_nowrite(inode);
1689 	}
1690 	spin_unlock(&fc->lock);
1691 
1692 	/*
1693 	 * Only call invalidate_inode_pages2() after removing
1694 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1695 	 */
1696 	if ((is_truncate || !is_wb) &&
1697 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1698 		truncate_pagecache(inode, outarg.attr.size);
1699 		invalidate_inode_pages2(inode->i_mapping);
1700 	}
1701 
1702 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1703 	return 0;
1704 
1705 error:
1706 	if (is_truncate)
1707 		fuse_release_nowrite(inode);
1708 
1709 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1710 	return err;
1711 }
1712 
1713 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1714 {
1715 	struct inode *inode = d_inode(entry);
1716 
1717 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1718 		return -EACCES;
1719 
1720 	if (attr->ia_valid & ATTR_FILE)
1721 		return fuse_do_setattr(inode, attr, attr->ia_file);
1722 	else
1723 		return fuse_do_setattr(inode, attr, NULL);
1724 }
1725 
1726 static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1727 			struct kstat *stat)
1728 {
1729 	struct inode *inode = d_inode(entry);
1730 	struct fuse_conn *fc = get_fuse_conn(inode);
1731 
1732 	if (!fuse_allow_current_process(fc))
1733 		return -EACCES;
1734 
1735 	return fuse_update_attributes(inode, stat, NULL, NULL);
1736 }
1737 
1738 static int fuse_setxattr(struct dentry *entry, const char *name,
1739 			 const void *value, size_t size, int flags)
1740 {
1741 	struct inode *inode = d_inode(entry);
1742 	struct fuse_conn *fc = get_fuse_conn(inode);
1743 	FUSE_ARGS(args);
1744 	struct fuse_setxattr_in inarg;
1745 	int err;
1746 
1747 	if (fc->no_setxattr)
1748 		return -EOPNOTSUPP;
1749 
1750 	memset(&inarg, 0, sizeof(inarg));
1751 	inarg.size = size;
1752 	inarg.flags = flags;
1753 	args.in.h.opcode = FUSE_SETXATTR;
1754 	args.in.h.nodeid = get_node_id(inode);
1755 	args.in.numargs = 3;
1756 	args.in.args[0].size = sizeof(inarg);
1757 	args.in.args[0].value = &inarg;
1758 	args.in.args[1].size = strlen(name) + 1;
1759 	args.in.args[1].value = name;
1760 	args.in.args[2].size = size;
1761 	args.in.args[2].value = value;
1762 	err = fuse_simple_request(fc, &args);
1763 	if (err == -ENOSYS) {
1764 		fc->no_setxattr = 1;
1765 		err = -EOPNOTSUPP;
1766 	}
1767 	if (!err) {
1768 		fuse_invalidate_attr(inode);
1769 		fuse_update_ctime(inode);
1770 	}
1771 	return err;
1772 }
1773 
1774 static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1775 			     void *value, size_t size)
1776 {
1777 	struct inode *inode = d_inode(entry);
1778 	struct fuse_conn *fc = get_fuse_conn(inode);
1779 	FUSE_ARGS(args);
1780 	struct fuse_getxattr_in inarg;
1781 	struct fuse_getxattr_out outarg;
1782 	ssize_t ret;
1783 
1784 	if (fc->no_getxattr)
1785 		return -EOPNOTSUPP;
1786 
1787 	memset(&inarg, 0, sizeof(inarg));
1788 	inarg.size = size;
1789 	args.in.h.opcode = FUSE_GETXATTR;
1790 	args.in.h.nodeid = get_node_id(inode);
1791 	args.in.numargs = 2;
1792 	args.in.args[0].size = sizeof(inarg);
1793 	args.in.args[0].value = &inarg;
1794 	args.in.args[1].size = strlen(name) + 1;
1795 	args.in.args[1].value = name;
1796 	/* This is really two different operations rolled into one */
1797 	args.out.numargs = 1;
1798 	if (size) {
1799 		args.out.argvar = 1;
1800 		args.out.args[0].size = size;
1801 		args.out.args[0].value = value;
1802 	} else {
1803 		args.out.args[0].size = sizeof(outarg);
1804 		args.out.args[0].value = &outarg;
1805 	}
1806 	ret = fuse_simple_request(fc, &args);
1807 	if (!ret && !size)
1808 		ret = outarg.size;
1809 	if (ret == -ENOSYS) {
1810 		fc->no_getxattr = 1;
1811 		ret = -EOPNOTSUPP;
1812 	}
1813 	return ret;
1814 }
1815 
1816 static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1817 {
1818 	struct inode *inode = d_inode(entry);
1819 	struct fuse_conn *fc = get_fuse_conn(inode);
1820 	FUSE_ARGS(args);
1821 	struct fuse_getxattr_in inarg;
1822 	struct fuse_getxattr_out outarg;
1823 	ssize_t ret;
1824 
1825 	if (!fuse_allow_current_process(fc))
1826 		return -EACCES;
1827 
1828 	if (fc->no_listxattr)
1829 		return -EOPNOTSUPP;
1830 
1831 	memset(&inarg, 0, sizeof(inarg));
1832 	inarg.size = size;
1833 	args.in.h.opcode = FUSE_LISTXATTR;
1834 	args.in.h.nodeid = get_node_id(inode);
1835 	args.in.numargs = 1;
1836 	args.in.args[0].size = sizeof(inarg);
1837 	args.in.args[0].value = &inarg;
1838 	/* This is really two different operations rolled into one */
1839 	args.out.numargs = 1;
1840 	if (size) {
1841 		args.out.argvar = 1;
1842 		args.out.args[0].size = size;
1843 		args.out.args[0].value = list;
1844 	} else {
1845 		args.out.args[0].size = sizeof(outarg);
1846 		args.out.args[0].value = &outarg;
1847 	}
1848 	ret = fuse_simple_request(fc, &args);
1849 	if (!ret && !size)
1850 		ret = outarg.size;
1851 	if (ret == -ENOSYS) {
1852 		fc->no_listxattr = 1;
1853 		ret = -EOPNOTSUPP;
1854 	}
1855 	return ret;
1856 }
1857 
1858 static int fuse_removexattr(struct dentry *entry, const char *name)
1859 {
1860 	struct inode *inode = d_inode(entry);
1861 	struct fuse_conn *fc = get_fuse_conn(inode);
1862 	FUSE_ARGS(args);
1863 	int err;
1864 
1865 	if (fc->no_removexattr)
1866 		return -EOPNOTSUPP;
1867 
1868 	args.in.h.opcode = FUSE_REMOVEXATTR;
1869 	args.in.h.nodeid = get_node_id(inode);
1870 	args.in.numargs = 1;
1871 	args.in.args[0].size = strlen(name) + 1;
1872 	args.in.args[0].value = name;
1873 	err = fuse_simple_request(fc, &args);
1874 	if (err == -ENOSYS) {
1875 		fc->no_removexattr = 1;
1876 		err = -EOPNOTSUPP;
1877 	}
1878 	if (!err) {
1879 		fuse_invalidate_attr(inode);
1880 		fuse_update_ctime(inode);
1881 	}
1882 	return err;
1883 }
1884 
1885 static const struct inode_operations fuse_dir_inode_operations = {
1886 	.lookup		= fuse_lookup,
1887 	.mkdir		= fuse_mkdir,
1888 	.symlink	= fuse_symlink,
1889 	.unlink		= fuse_unlink,
1890 	.rmdir		= fuse_rmdir,
1891 	.rename2	= fuse_rename2,
1892 	.link		= fuse_link,
1893 	.setattr	= fuse_setattr,
1894 	.create		= fuse_create,
1895 	.atomic_open	= fuse_atomic_open,
1896 	.mknod		= fuse_mknod,
1897 	.permission	= fuse_permission,
1898 	.getattr	= fuse_getattr,
1899 	.setxattr	= fuse_setxattr,
1900 	.getxattr	= fuse_getxattr,
1901 	.listxattr	= fuse_listxattr,
1902 	.removexattr	= fuse_removexattr,
1903 };
1904 
1905 static const struct file_operations fuse_dir_operations = {
1906 	.llseek		= generic_file_llseek,
1907 	.read		= generic_read_dir,
1908 	.iterate	= fuse_readdir,
1909 	.open		= fuse_dir_open,
1910 	.release	= fuse_dir_release,
1911 	.fsync		= fuse_dir_fsync,
1912 	.unlocked_ioctl	= fuse_dir_ioctl,
1913 	.compat_ioctl	= fuse_dir_compat_ioctl,
1914 };
1915 
1916 static const struct inode_operations fuse_common_inode_operations = {
1917 	.setattr	= fuse_setattr,
1918 	.permission	= fuse_permission,
1919 	.getattr	= fuse_getattr,
1920 	.setxattr	= fuse_setxattr,
1921 	.getxattr	= fuse_getxattr,
1922 	.listxattr	= fuse_listxattr,
1923 	.removexattr	= fuse_removexattr,
1924 };
1925 
1926 static const struct inode_operations fuse_symlink_inode_operations = {
1927 	.setattr	= fuse_setattr,
1928 	.follow_link	= fuse_follow_link,
1929 	.put_link	= fuse_put_link,
1930 	.readlink	= generic_readlink,
1931 	.getattr	= fuse_getattr,
1932 	.setxattr	= fuse_setxattr,
1933 	.getxattr	= fuse_getxattr,
1934 	.listxattr	= fuse_listxattr,
1935 	.removexattr	= fuse_removexattr,
1936 };
1937 
1938 void fuse_init_common(struct inode *inode)
1939 {
1940 	inode->i_op = &fuse_common_inode_operations;
1941 }
1942 
1943 void fuse_init_dir(struct inode *inode)
1944 {
1945 	inode->i_op = &fuse_dir_inode_operations;
1946 	inode->i_fop = &fuse_dir_operations;
1947 }
1948 
1949 void fuse_init_symlink(struct inode *inode)
1950 {
1951 	inode->i_op = &fuse_symlink_inode_operations;
1952 }
1953