xref: /openbmc/linux/fs/fuse/dir.c (revision a06c488d)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/sched.h>
14 #include <linux/namei.h>
15 #include <linux/slab.h>
16 
17 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
18 {
19 	struct fuse_conn *fc = get_fuse_conn(dir);
20 	struct fuse_inode *fi = get_fuse_inode(dir);
21 
22 	if (!fc->do_readdirplus)
23 		return false;
24 	if (!fc->readdirplus_auto)
25 		return true;
26 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27 		return true;
28 	if (ctx->pos == 0)
29 		return true;
30 	return false;
31 }
32 
33 static void fuse_advise_use_readdirplus(struct inode *dir)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(dir);
36 
37 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38 }
39 
40 #if BITS_PER_LONG >= 64
41 static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
42 {
43 	entry->d_time = time;
44 }
45 
46 static inline u64 fuse_dentry_time(struct dentry *entry)
47 {
48 	return entry->d_time;
49 }
50 #else
51 /*
52  * On 32 bit archs store the high 32 bits of time in d_fsdata
53  */
54 static void fuse_dentry_settime(struct dentry *entry, u64 time)
55 {
56 	entry->d_time = time;
57 	entry->d_fsdata = (void *) (unsigned long) (time >> 32);
58 }
59 
60 static u64 fuse_dentry_time(struct dentry *entry)
61 {
62 	return (u64) entry->d_time +
63 		((u64) (unsigned long) entry->d_fsdata << 32);
64 }
65 #endif
66 
67 /*
68  * FUSE caches dentries and attributes with separate timeout.  The
69  * time in jiffies until the dentry/attributes are valid is stored in
70  * dentry->d_time and fuse_inode->i_time respectively.
71  */
72 
73 /*
74  * Calculate the time in jiffies until a dentry/attributes are valid
75  */
76 static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
77 {
78 	if (sec || nsec) {
79 		struct timespec ts = {sec, nsec};
80 		return get_jiffies_64() + timespec_to_jiffies(&ts);
81 	} else
82 		return 0;
83 }
84 
85 /*
86  * Set dentry and possibly attribute timeouts from the lookup/mk*
87  * replies
88  */
89 static void fuse_change_entry_timeout(struct dentry *entry,
90 				      struct fuse_entry_out *o)
91 {
92 	fuse_dentry_settime(entry,
93 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
94 }
95 
96 static u64 attr_timeout(struct fuse_attr_out *o)
97 {
98 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
99 }
100 
101 static u64 entry_attr_timeout(struct fuse_entry_out *o)
102 {
103 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
104 }
105 
106 /*
107  * Mark the attributes as stale, so that at the next call to
108  * ->getattr() they will be fetched from userspace
109  */
110 void fuse_invalidate_attr(struct inode *inode)
111 {
112 	get_fuse_inode(inode)->i_time = 0;
113 }
114 
115 /**
116  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
117  * atime is not used.
118  */
119 void fuse_invalidate_atime(struct inode *inode)
120 {
121 	if (!IS_RDONLY(inode))
122 		fuse_invalidate_attr(inode);
123 }
124 
125 /*
126  * Just mark the entry as stale, so that a next attempt to look it up
127  * will result in a new lookup call to userspace
128  *
129  * This is called when a dentry is about to become negative and the
130  * timeout is unknown (unlink, rmdir, rename and in some cases
131  * lookup)
132  */
133 void fuse_invalidate_entry_cache(struct dentry *entry)
134 {
135 	fuse_dentry_settime(entry, 0);
136 }
137 
138 /*
139  * Same as fuse_invalidate_entry_cache(), but also try to remove the
140  * dentry from the hash
141  */
142 static void fuse_invalidate_entry(struct dentry *entry)
143 {
144 	d_invalidate(entry);
145 	fuse_invalidate_entry_cache(entry);
146 }
147 
148 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
149 			     u64 nodeid, struct qstr *name,
150 			     struct fuse_entry_out *outarg)
151 {
152 	memset(outarg, 0, sizeof(struct fuse_entry_out));
153 	args->in.h.opcode = FUSE_LOOKUP;
154 	args->in.h.nodeid = nodeid;
155 	args->in.numargs = 1;
156 	args->in.args[0].size = name->len + 1;
157 	args->in.args[0].value = name->name;
158 	args->out.numargs = 1;
159 	args->out.args[0].size = sizeof(struct fuse_entry_out);
160 	args->out.args[0].value = outarg;
161 }
162 
163 u64 fuse_get_attr_version(struct fuse_conn *fc)
164 {
165 	u64 curr_version;
166 
167 	/*
168 	 * The spin lock isn't actually needed on 64bit archs, but we
169 	 * don't yet care too much about such optimizations.
170 	 */
171 	spin_lock(&fc->lock);
172 	curr_version = fc->attr_version;
173 	spin_unlock(&fc->lock);
174 
175 	return curr_version;
176 }
177 
178 /*
179  * Check whether the dentry is still valid
180  *
181  * If the entry validity timeout has expired and the dentry is
182  * positive, try to redo the lookup.  If the lookup results in a
183  * different inode, then let the VFS invalidate the dentry and redo
184  * the lookup once more.  If the lookup results in the same inode,
185  * then refresh the attributes, timeouts and mark the dentry valid.
186  */
187 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
188 {
189 	struct inode *inode;
190 	struct dentry *parent;
191 	struct fuse_conn *fc;
192 	struct fuse_inode *fi;
193 	int ret;
194 
195 	inode = d_inode_rcu(entry);
196 	if (inode && is_bad_inode(inode))
197 		goto invalid;
198 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
199 		 (flags & LOOKUP_REVAL)) {
200 		struct fuse_entry_out outarg;
201 		FUSE_ARGS(args);
202 		struct fuse_forget_link *forget;
203 		u64 attr_version;
204 
205 		/* For negative dentries, always do a fresh lookup */
206 		if (!inode)
207 			goto invalid;
208 
209 		ret = -ECHILD;
210 		if (flags & LOOKUP_RCU)
211 			goto out;
212 
213 		fc = get_fuse_conn(inode);
214 
215 		forget = fuse_alloc_forget();
216 		ret = -ENOMEM;
217 		if (!forget)
218 			goto out;
219 
220 		attr_version = fuse_get_attr_version(fc);
221 
222 		parent = dget_parent(entry);
223 		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
224 				 &entry->d_name, &outarg);
225 		ret = fuse_simple_request(fc, &args);
226 		dput(parent);
227 		/* Zero nodeid is same as -ENOENT */
228 		if (!ret && !outarg.nodeid)
229 			ret = -ENOENT;
230 		if (!ret) {
231 			fi = get_fuse_inode(inode);
232 			if (outarg.nodeid != get_node_id(inode)) {
233 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
234 				goto invalid;
235 			}
236 			spin_lock(&fc->lock);
237 			fi->nlookup++;
238 			spin_unlock(&fc->lock);
239 		}
240 		kfree(forget);
241 		if (ret == -ENOMEM)
242 			goto out;
243 		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
244 			goto invalid;
245 
246 		fuse_change_attributes(inode, &outarg.attr,
247 				       entry_attr_timeout(&outarg),
248 				       attr_version);
249 		fuse_change_entry_timeout(entry, &outarg);
250 	} else if (inode) {
251 		fi = get_fuse_inode(inode);
252 		if (flags & LOOKUP_RCU) {
253 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
254 				return -ECHILD;
255 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
256 			parent = dget_parent(entry);
257 			fuse_advise_use_readdirplus(d_inode(parent));
258 			dput(parent);
259 		}
260 	}
261 	ret = 1;
262 out:
263 	return ret;
264 
265 invalid:
266 	ret = 0;
267 	goto out;
268 }
269 
270 static int invalid_nodeid(u64 nodeid)
271 {
272 	return !nodeid || nodeid == FUSE_ROOT_ID;
273 }
274 
275 const struct dentry_operations fuse_dentry_operations = {
276 	.d_revalidate	= fuse_dentry_revalidate,
277 };
278 
279 int fuse_valid_type(int m)
280 {
281 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
282 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
283 }
284 
285 int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
286 		     struct fuse_entry_out *outarg, struct inode **inode)
287 {
288 	struct fuse_conn *fc = get_fuse_conn_super(sb);
289 	FUSE_ARGS(args);
290 	struct fuse_forget_link *forget;
291 	u64 attr_version;
292 	int err;
293 
294 	*inode = NULL;
295 	err = -ENAMETOOLONG;
296 	if (name->len > FUSE_NAME_MAX)
297 		goto out;
298 
299 
300 	forget = fuse_alloc_forget();
301 	err = -ENOMEM;
302 	if (!forget)
303 		goto out;
304 
305 	attr_version = fuse_get_attr_version(fc);
306 
307 	fuse_lookup_init(fc, &args, nodeid, name, outarg);
308 	err = fuse_simple_request(fc, &args);
309 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
310 	if (err || !outarg->nodeid)
311 		goto out_put_forget;
312 
313 	err = -EIO;
314 	if (!outarg->nodeid)
315 		goto out_put_forget;
316 	if (!fuse_valid_type(outarg->attr.mode))
317 		goto out_put_forget;
318 
319 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
320 			   &outarg->attr, entry_attr_timeout(outarg),
321 			   attr_version);
322 	err = -ENOMEM;
323 	if (!*inode) {
324 		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
325 		goto out;
326 	}
327 	err = 0;
328 
329  out_put_forget:
330 	kfree(forget);
331  out:
332 	return err;
333 }
334 
335 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
336 				  unsigned int flags)
337 {
338 	int err;
339 	struct fuse_entry_out outarg;
340 	struct inode *inode;
341 	struct dentry *newent;
342 	bool outarg_valid = true;
343 
344 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
345 			       &outarg, &inode);
346 	if (err == -ENOENT) {
347 		outarg_valid = false;
348 		err = 0;
349 	}
350 	if (err)
351 		goto out_err;
352 
353 	err = -EIO;
354 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
355 		goto out_iput;
356 
357 	newent = d_splice_alias(inode, entry);
358 	err = PTR_ERR(newent);
359 	if (IS_ERR(newent))
360 		goto out_err;
361 
362 	entry = newent ? newent : entry;
363 	if (outarg_valid)
364 		fuse_change_entry_timeout(entry, &outarg);
365 	else
366 		fuse_invalidate_entry_cache(entry);
367 
368 	fuse_advise_use_readdirplus(dir);
369 	return newent;
370 
371  out_iput:
372 	iput(inode);
373  out_err:
374 	return ERR_PTR(err);
375 }
376 
377 /*
378  * Atomic create+open operation
379  *
380  * If the filesystem doesn't support this, then fall back to separate
381  * 'mknod' + 'open' requests.
382  */
383 static int fuse_create_open(struct inode *dir, struct dentry *entry,
384 			    struct file *file, unsigned flags,
385 			    umode_t mode, int *opened)
386 {
387 	int err;
388 	struct inode *inode;
389 	struct fuse_conn *fc = get_fuse_conn(dir);
390 	FUSE_ARGS(args);
391 	struct fuse_forget_link *forget;
392 	struct fuse_create_in inarg;
393 	struct fuse_open_out outopen;
394 	struct fuse_entry_out outentry;
395 	struct fuse_file *ff;
396 
397 	/* Userspace expects S_IFREG in create mode */
398 	BUG_ON((mode & S_IFMT) != S_IFREG);
399 
400 	forget = fuse_alloc_forget();
401 	err = -ENOMEM;
402 	if (!forget)
403 		goto out_err;
404 
405 	err = -ENOMEM;
406 	ff = fuse_file_alloc(fc);
407 	if (!ff)
408 		goto out_put_forget_req;
409 
410 	if (!fc->dont_mask)
411 		mode &= ~current_umask();
412 
413 	flags &= ~O_NOCTTY;
414 	memset(&inarg, 0, sizeof(inarg));
415 	memset(&outentry, 0, sizeof(outentry));
416 	inarg.flags = flags;
417 	inarg.mode = mode;
418 	inarg.umask = current_umask();
419 	args.in.h.opcode = FUSE_CREATE;
420 	args.in.h.nodeid = get_node_id(dir);
421 	args.in.numargs = 2;
422 	args.in.args[0].size = sizeof(inarg);
423 	args.in.args[0].value = &inarg;
424 	args.in.args[1].size = entry->d_name.len + 1;
425 	args.in.args[1].value = entry->d_name.name;
426 	args.out.numargs = 2;
427 	args.out.args[0].size = sizeof(outentry);
428 	args.out.args[0].value = &outentry;
429 	args.out.args[1].size = sizeof(outopen);
430 	args.out.args[1].value = &outopen;
431 	err = fuse_simple_request(fc, &args);
432 	if (err)
433 		goto out_free_ff;
434 
435 	err = -EIO;
436 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
437 		goto out_free_ff;
438 
439 	ff->fh = outopen.fh;
440 	ff->nodeid = outentry.nodeid;
441 	ff->open_flags = outopen.open_flags;
442 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
443 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
444 	if (!inode) {
445 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
446 		fuse_sync_release(ff, flags);
447 		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
448 		err = -ENOMEM;
449 		goto out_err;
450 	}
451 	kfree(forget);
452 	d_instantiate(entry, inode);
453 	fuse_change_entry_timeout(entry, &outentry);
454 	fuse_invalidate_attr(dir);
455 	err = finish_open(file, entry, generic_file_open, opened);
456 	if (err) {
457 		fuse_sync_release(ff, flags);
458 	} else {
459 		file->private_data = fuse_file_get(ff);
460 		fuse_finish_open(inode, file);
461 	}
462 	return err;
463 
464 out_free_ff:
465 	fuse_file_free(ff);
466 out_put_forget_req:
467 	kfree(forget);
468 out_err:
469 	return err;
470 }
471 
472 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
473 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
474 			    struct file *file, unsigned flags,
475 			    umode_t mode, int *opened)
476 {
477 	int err;
478 	struct fuse_conn *fc = get_fuse_conn(dir);
479 	struct dentry *res = NULL;
480 
481 	if (d_unhashed(entry)) {
482 		res = fuse_lookup(dir, entry, 0);
483 		if (IS_ERR(res))
484 			return PTR_ERR(res);
485 
486 		if (res)
487 			entry = res;
488 	}
489 
490 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
491 		goto no_open;
492 
493 	/* Only creates */
494 	*opened |= FILE_CREATED;
495 
496 	if (fc->no_create)
497 		goto mknod;
498 
499 	err = fuse_create_open(dir, entry, file, flags, mode, opened);
500 	if (err == -ENOSYS) {
501 		fc->no_create = 1;
502 		goto mknod;
503 	}
504 out_dput:
505 	dput(res);
506 	return err;
507 
508 mknod:
509 	err = fuse_mknod(dir, entry, mode, 0);
510 	if (err)
511 		goto out_dput;
512 no_open:
513 	return finish_no_open(file, res);
514 }
515 
516 /*
517  * Code shared between mknod, mkdir, symlink and link
518  */
519 static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
520 			    struct inode *dir, struct dentry *entry,
521 			    umode_t mode)
522 {
523 	struct fuse_entry_out outarg;
524 	struct inode *inode;
525 	int err;
526 	struct fuse_forget_link *forget;
527 
528 	forget = fuse_alloc_forget();
529 	if (!forget)
530 		return -ENOMEM;
531 
532 	memset(&outarg, 0, sizeof(outarg));
533 	args->in.h.nodeid = get_node_id(dir);
534 	args->out.numargs = 1;
535 	args->out.args[0].size = sizeof(outarg);
536 	args->out.args[0].value = &outarg;
537 	err = fuse_simple_request(fc, args);
538 	if (err)
539 		goto out_put_forget_req;
540 
541 	err = -EIO;
542 	if (invalid_nodeid(outarg.nodeid))
543 		goto out_put_forget_req;
544 
545 	if ((outarg.attr.mode ^ mode) & S_IFMT)
546 		goto out_put_forget_req;
547 
548 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
549 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
550 	if (!inode) {
551 		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
552 		return -ENOMEM;
553 	}
554 	kfree(forget);
555 
556 	err = d_instantiate_no_diralias(entry, inode);
557 	if (err)
558 		return err;
559 
560 	fuse_change_entry_timeout(entry, &outarg);
561 	fuse_invalidate_attr(dir);
562 	return 0;
563 
564  out_put_forget_req:
565 	kfree(forget);
566 	return err;
567 }
568 
569 static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
570 		      dev_t rdev)
571 {
572 	struct fuse_mknod_in inarg;
573 	struct fuse_conn *fc = get_fuse_conn(dir);
574 	FUSE_ARGS(args);
575 
576 	if (!fc->dont_mask)
577 		mode &= ~current_umask();
578 
579 	memset(&inarg, 0, sizeof(inarg));
580 	inarg.mode = mode;
581 	inarg.rdev = new_encode_dev(rdev);
582 	inarg.umask = current_umask();
583 	args.in.h.opcode = FUSE_MKNOD;
584 	args.in.numargs = 2;
585 	args.in.args[0].size = sizeof(inarg);
586 	args.in.args[0].value = &inarg;
587 	args.in.args[1].size = entry->d_name.len + 1;
588 	args.in.args[1].value = entry->d_name.name;
589 	return create_new_entry(fc, &args, dir, entry, mode);
590 }
591 
592 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
593 		       bool excl)
594 {
595 	return fuse_mknod(dir, entry, mode, 0);
596 }
597 
598 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
599 {
600 	struct fuse_mkdir_in inarg;
601 	struct fuse_conn *fc = get_fuse_conn(dir);
602 	FUSE_ARGS(args);
603 
604 	if (!fc->dont_mask)
605 		mode &= ~current_umask();
606 
607 	memset(&inarg, 0, sizeof(inarg));
608 	inarg.mode = mode;
609 	inarg.umask = current_umask();
610 	args.in.h.opcode = FUSE_MKDIR;
611 	args.in.numargs = 2;
612 	args.in.args[0].size = sizeof(inarg);
613 	args.in.args[0].value = &inarg;
614 	args.in.args[1].size = entry->d_name.len + 1;
615 	args.in.args[1].value = entry->d_name.name;
616 	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
617 }
618 
619 static int fuse_symlink(struct inode *dir, struct dentry *entry,
620 			const char *link)
621 {
622 	struct fuse_conn *fc = get_fuse_conn(dir);
623 	unsigned len = strlen(link) + 1;
624 	FUSE_ARGS(args);
625 
626 	args.in.h.opcode = FUSE_SYMLINK;
627 	args.in.numargs = 2;
628 	args.in.args[0].size = entry->d_name.len + 1;
629 	args.in.args[0].value = entry->d_name.name;
630 	args.in.args[1].size = len;
631 	args.in.args[1].value = link;
632 	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
633 }
634 
635 static inline void fuse_update_ctime(struct inode *inode)
636 {
637 	if (!IS_NOCMTIME(inode)) {
638 		inode->i_ctime = current_fs_time(inode->i_sb);
639 		mark_inode_dirty_sync(inode);
640 	}
641 }
642 
643 static int fuse_unlink(struct inode *dir, struct dentry *entry)
644 {
645 	int err;
646 	struct fuse_conn *fc = get_fuse_conn(dir);
647 	FUSE_ARGS(args);
648 
649 	args.in.h.opcode = FUSE_UNLINK;
650 	args.in.h.nodeid = get_node_id(dir);
651 	args.in.numargs = 1;
652 	args.in.args[0].size = entry->d_name.len + 1;
653 	args.in.args[0].value = entry->d_name.name;
654 	err = fuse_simple_request(fc, &args);
655 	if (!err) {
656 		struct inode *inode = d_inode(entry);
657 		struct fuse_inode *fi = get_fuse_inode(inode);
658 
659 		spin_lock(&fc->lock);
660 		fi->attr_version = ++fc->attr_version;
661 		/*
662 		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
663 		 * happen if userspace filesystem is careless.  It would be
664 		 * difficult to enforce correct nlink usage so just ignore this
665 		 * condition here
666 		 */
667 		if (inode->i_nlink > 0)
668 			drop_nlink(inode);
669 		spin_unlock(&fc->lock);
670 		fuse_invalidate_attr(inode);
671 		fuse_invalidate_attr(dir);
672 		fuse_invalidate_entry_cache(entry);
673 		fuse_update_ctime(inode);
674 	} else if (err == -EINTR)
675 		fuse_invalidate_entry(entry);
676 	return err;
677 }
678 
679 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
680 {
681 	int err;
682 	struct fuse_conn *fc = get_fuse_conn(dir);
683 	FUSE_ARGS(args);
684 
685 	args.in.h.opcode = FUSE_RMDIR;
686 	args.in.h.nodeid = get_node_id(dir);
687 	args.in.numargs = 1;
688 	args.in.args[0].size = entry->d_name.len + 1;
689 	args.in.args[0].value = entry->d_name.name;
690 	err = fuse_simple_request(fc, &args);
691 	if (!err) {
692 		clear_nlink(d_inode(entry));
693 		fuse_invalidate_attr(dir);
694 		fuse_invalidate_entry_cache(entry);
695 	} else if (err == -EINTR)
696 		fuse_invalidate_entry(entry);
697 	return err;
698 }
699 
700 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
701 			      struct inode *newdir, struct dentry *newent,
702 			      unsigned int flags, int opcode, size_t argsize)
703 {
704 	int err;
705 	struct fuse_rename2_in inarg;
706 	struct fuse_conn *fc = get_fuse_conn(olddir);
707 	FUSE_ARGS(args);
708 
709 	memset(&inarg, 0, argsize);
710 	inarg.newdir = get_node_id(newdir);
711 	inarg.flags = flags;
712 	args.in.h.opcode = opcode;
713 	args.in.h.nodeid = get_node_id(olddir);
714 	args.in.numargs = 3;
715 	args.in.args[0].size = argsize;
716 	args.in.args[0].value = &inarg;
717 	args.in.args[1].size = oldent->d_name.len + 1;
718 	args.in.args[1].value = oldent->d_name.name;
719 	args.in.args[2].size = newent->d_name.len + 1;
720 	args.in.args[2].value = newent->d_name.name;
721 	err = fuse_simple_request(fc, &args);
722 	if (!err) {
723 		/* ctime changes */
724 		fuse_invalidate_attr(d_inode(oldent));
725 		fuse_update_ctime(d_inode(oldent));
726 
727 		if (flags & RENAME_EXCHANGE) {
728 			fuse_invalidate_attr(d_inode(newent));
729 			fuse_update_ctime(d_inode(newent));
730 		}
731 
732 		fuse_invalidate_attr(olddir);
733 		if (olddir != newdir)
734 			fuse_invalidate_attr(newdir);
735 
736 		/* newent will end up negative */
737 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
738 			fuse_invalidate_attr(d_inode(newent));
739 			fuse_invalidate_entry_cache(newent);
740 			fuse_update_ctime(d_inode(newent));
741 		}
742 	} else if (err == -EINTR) {
743 		/* If request was interrupted, DEITY only knows if the
744 		   rename actually took place.  If the invalidation
745 		   fails (e.g. some process has CWD under the renamed
746 		   directory), then there can be inconsistency between
747 		   the dcache and the real filesystem.  Tough luck. */
748 		fuse_invalidate_entry(oldent);
749 		if (d_really_is_positive(newent))
750 			fuse_invalidate_entry(newent);
751 	}
752 
753 	return err;
754 }
755 
756 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
757 			struct inode *newdir, struct dentry *newent,
758 			unsigned int flags)
759 {
760 	struct fuse_conn *fc = get_fuse_conn(olddir);
761 	int err;
762 
763 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
764 		return -EINVAL;
765 
766 	if (flags) {
767 		if (fc->no_rename2 || fc->minor < 23)
768 			return -EINVAL;
769 
770 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
771 					 FUSE_RENAME2,
772 					 sizeof(struct fuse_rename2_in));
773 		if (err == -ENOSYS) {
774 			fc->no_rename2 = 1;
775 			err = -EINVAL;
776 		}
777 	} else {
778 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
779 					 FUSE_RENAME,
780 					 sizeof(struct fuse_rename_in));
781 	}
782 
783 	return err;
784 }
785 
786 static int fuse_link(struct dentry *entry, struct inode *newdir,
787 		     struct dentry *newent)
788 {
789 	int err;
790 	struct fuse_link_in inarg;
791 	struct inode *inode = d_inode(entry);
792 	struct fuse_conn *fc = get_fuse_conn(inode);
793 	FUSE_ARGS(args);
794 
795 	memset(&inarg, 0, sizeof(inarg));
796 	inarg.oldnodeid = get_node_id(inode);
797 	args.in.h.opcode = FUSE_LINK;
798 	args.in.numargs = 2;
799 	args.in.args[0].size = sizeof(inarg);
800 	args.in.args[0].value = &inarg;
801 	args.in.args[1].size = newent->d_name.len + 1;
802 	args.in.args[1].value = newent->d_name.name;
803 	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
804 	/* Contrary to "normal" filesystems it can happen that link
805 	   makes two "logical" inodes point to the same "physical"
806 	   inode.  We invalidate the attributes of the old one, so it
807 	   will reflect changes in the backing inode (link count,
808 	   etc.)
809 	*/
810 	if (!err) {
811 		struct fuse_inode *fi = get_fuse_inode(inode);
812 
813 		spin_lock(&fc->lock);
814 		fi->attr_version = ++fc->attr_version;
815 		inc_nlink(inode);
816 		spin_unlock(&fc->lock);
817 		fuse_invalidate_attr(inode);
818 		fuse_update_ctime(inode);
819 	} else if (err == -EINTR) {
820 		fuse_invalidate_attr(inode);
821 	}
822 	return err;
823 }
824 
825 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
826 			  struct kstat *stat)
827 {
828 	unsigned int blkbits;
829 	struct fuse_conn *fc = get_fuse_conn(inode);
830 
831 	/* see the comment in fuse_change_attributes() */
832 	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
833 		attr->size = i_size_read(inode);
834 		attr->mtime = inode->i_mtime.tv_sec;
835 		attr->mtimensec = inode->i_mtime.tv_nsec;
836 		attr->ctime = inode->i_ctime.tv_sec;
837 		attr->ctimensec = inode->i_ctime.tv_nsec;
838 	}
839 
840 	stat->dev = inode->i_sb->s_dev;
841 	stat->ino = attr->ino;
842 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
843 	stat->nlink = attr->nlink;
844 	stat->uid = make_kuid(&init_user_ns, attr->uid);
845 	stat->gid = make_kgid(&init_user_ns, attr->gid);
846 	stat->rdev = inode->i_rdev;
847 	stat->atime.tv_sec = attr->atime;
848 	stat->atime.tv_nsec = attr->atimensec;
849 	stat->mtime.tv_sec = attr->mtime;
850 	stat->mtime.tv_nsec = attr->mtimensec;
851 	stat->ctime.tv_sec = attr->ctime;
852 	stat->ctime.tv_nsec = attr->ctimensec;
853 	stat->size = attr->size;
854 	stat->blocks = attr->blocks;
855 
856 	if (attr->blksize != 0)
857 		blkbits = ilog2(attr->blksize);
858 	else
859 		blkbits = inode->i_sb->s_blocksize_bits;
860 
861 	stat->blksize = 1 << blkbits;
862 }
863 
864 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
865 			   struct file *file)
866 {
867 	int err;
868 	struct fuse_getattr_in inarg;
869 	struct fuse_attr_out outarg;
870 	struct fuse_conn *fc = get_fuse_conn(inode);
871 	FUSE_ARGS(args);
872 	u64 attr_version;
873 
874 	attr_version = fuse_get_attr_version(fc);
875 
876 	memset(&inarg, 0, sizeof(inarg));
877 	memset(&outarg, 0, sizeof(outarg));
878 	/* Directories have separate file-handle space */
879 	if (file && S_ISREG(inode->i_mode)) {
880 		struct fuse_file *ff = file->private_data;
881 
882 		inarg.getattr_flags |= FUSE_GETATTR_FH;
883 		inarg.fh = ff->fh;
884 	}
885 	args.in.h.opcode = FUSE_GETATTR;
886 	args.in.h.nodeid = get_node_id(inode);
887 	args.in.numargs = 1;
888 	args.in.args[0].size = sizeof(inarg);
889 	args.in.args[0].value = &inarg;
890 	args.out.numargs = 1;
891 	args.out.args[0].size = sizeof(outarg);
892 	args.out.args[0].value = &outarg;
893 	err = fuse_simple_request(fc, &args);
894 	if (!err) {
895 		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
896 			make_bad_inode(inode);
897 			err = -EIO;
898 		} else {
899 			fuse_change_attributes(inode, &outarg.attr,
900 					       attr_timeout(&outarg),
901 					       attr_version);
902 			if (stat)
903 				fuse_fillattr(inode, &outarg.attr, stat);
904 		}
905 	}
906 	return err;
907 }
908 
909 int fuse_update_attributes(struct inode *inode, struct kstat *stat,
910 			   struct file *file, bool *refreshed)
911 {
912 	struct fuse_inode *fi = get_fuse_inode(inode);
913 	int err;
914 	bool r;
915 
916 	if (time_before64(fi->i_time, get_jiffies_64())) {
917 		r = true;
918 		err = fuse_do_getattr(inode, stat, file);
919 	} else {
920 		r = false;
921 		err = 0;
922 		if (stat) {
923 			generic_fillattr(inode, stat);
924 			stat->mode = fi->orig_i_mode;
925 			stat->ino = fi->orig_ino;
926 		}
927 	}
928 
929 	if (refreshed != NULL)
930 		*refreshed = r;
931 
932 	return err;
933 }
934 
935 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
936 			     u64 child_nodeid, struct qstr *name)
937 {
938 	int err = -ENOTDIR;
939 	struct inode *parent;
940 	struct dentry *dir;
941 	struct dentry *entry;
942 
943 	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
944 	if (!parent)
945 		return -ENOENT;
946 
947 	inode_lock(parent);
948 	if (!S_ISDIR(parent->i_mode))
949 		goto unlock;
950 
951 	err = -ENOENT;
952 	dir = d_find_alias(parent);
953 	if (!dir)
954 		goto unlock;
955 
956 	entry = d_lookup(dir, name);
957 	dput(dir);
958 	if (!entry)
959 		goto unlock;
960 
961 	fuse_invalidate_attr(parent);
962 	fuse_invalidate_entry(entry);
963 
964 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
965 		inode_lock(d_inode(entry));
966 		if (get_node_id(d_inode(entry)) != child_nodeid) {
967 			err = -ENOENT;
968 			goto badentry;
969 		}
970 		if (d_mountpoint(entry)) {
971 			err = -EBUSY;
972 			goto badentry;
973 		}
974 		if (d_is_dir(entry)) {
975 			shrink_dcache_parent(entry);
976 			if (!simple_empty(entry)) {
977 				err = -ENOTEMPTY;
978 				goto badentry;
979 			}
980 			d_inode(entry)->i_flags |= S_DEAD;
981 		}
982 		dont_mount(entry);
983 		clear_nlink(d_inode(entry));
984 		err = 0;
985  badentry:
986 		inode_unlock(d_inode(entry));
987 		if (!err)
988 			d_delete(entry);
989 	} else {
990 		err = 0;
991 	}
992 	dput(entry);
993 
994  unlock:
995 	inode_unlock(parent);
996 	iput(parent);
997 	return err;
998 }
999 
1000 /*
1001  * Calling into a user-controlled filesystem gives the filesystem
1002  * daemon ptrace-like capabilities over the current process.  This
1003  * means, that the filesystem daemon is able to record the exact
1004  * filesystem operations performed, and can also control the behavior
1005  * of the requester process in otherwise impossible ways.  For example
1006  * it can delay the operation for arbitrary length of time allowing
1007  * DoS against the requester.
1008  *
1009  * For this reason only those processes can call into the filesystem,
1010  * for which the owner of the mount has ptrace privilege.  This
1011  * excludes processes started by other users, suid or sgid processes.
1012  */
1013 int fuse_allow_current_process(struct fuse_conn *fc)
1014 {
1015 	const struct cred *cred;
1016 
1017 	if (fc->flags & FUSE_ALLOW_OTHER)
1018 		return 1;
1019 
1020 	cred = current_cred();
1021 	if (uid_eq(cred->euid, fc->user_id) &&
1022 	    uid_eq(cred->suid, fc->user_id) &&
1023 	    uid_eq(cred->uid,  fc->user_id) &&
1024 	    gid_eq(cred->egid, fc->group_id) &&
1025 	    gid_eq(cred->sgid, fc->group_id) &&
1026 	    gid_eq(cred->gid,  fc->group_id))
1027 		return 1;
1028 
1029 	return 0;
1030 }
1031 
1032 static int fuse_access(struct inode *inode, int mask)
1033 {
1034 	struct fuse_conn *fc = get_fuse_conn(inode);
1035 	FUSE_ARGS(args);
1036 	struct fuse_access_in inarg;
1037 	int err;
1038 
1039 	BUG_ON(mask & MAY_NOT_BLOCK);
1040 
1041 	if (fc->no_access)
1042 		return 0;
1043 
1044 	memset(&inarg, 0, sizeof(inarg));
1045 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1046 	args.in.h.opcode = FUSE_ACCESS;
1047 	args.in.h.nodeid = get_node_id(inode);
1048 	args.in.numargs = 1;
1049 	args.in.args[0].size = sizeof(inarg);
1050 	args.in.args[0].value = &inarg;
1051 	err = fuse_simple_request(fc, &args);
1052 	if (err == -ENOSYS) {
1053 		fc->no_access = 1;
1054 		err = 0;
1055 	}
1056 	return err;
1057 }
1058 
1059 static int fuse_perm_getattr(struct inode *inode, int mask)
1060 {
1061 	if (mask & MAY_NOT_BLOCK)
1062 		return -ECHILD;
1063 
1064 	return fuse_do_getattr(inode, NULL, NULL);
1065 }
1066 
1067 /*
1068  * Check permission.  The two basic access models of FUSE are:
1069  *
1070  * 1) Local access checking ('default_permissions' mount option) based
1071  * on file mode.  This is the plain old disk filesystem permission
1072  * modell.
1073  *
1074  * 2) "Remote" access checking, where server is responsible for
1075  * checking permission in each inode operation.  An exception to this
1076  * is if ->permission() was invoked from sys_access() in which case an
1077  * access request is sent.  Execute permission is still checked
1078  * locally based on file mode.
1079  */
1080 static int fuse_permission(struct inode *inode, int mask)
1081 {
1082 	struct fuse_conn *fc = get_fuse_conn(inode);
1083 	bool refreshed = false;
1084 	int err = 0;
1085 
1086 	if (!fuse_allow_current_process(fc))
1087 		return -EACCES;
1088 
1089 	/*
1090 	 * If attributes are needed, refresh them before proceeding
1091 	 */
1092 	if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1093 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1094 		struct fuse_inode *fi = get_fuse_inode(inode);
1095 
1096 		if (time_before64(fi->i_time, get_jiffies_64())) {
1097 			refreshed = true;
1098 
1099 			err = fuse_perm_getattr(inode, mask);
1100 			if (err)
1101 				return err;
1102 		}
1103 	}
1104 
1105 	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1106 		err = generic_permission(inode, mask);
1107 
1108 		/* If permission is denied, try to refresh file
1109 		   attributes.  This is also needed, because the root
1110 		   node will at first have no permissions */
1111 		if (err == -EACCES && !refreshed) {
1112 			err = fuse_perm_getattr(inode, mask);
1113 			if (!err)
1114 				err = generic_permission(inode, mask);
1115 		}
1116 
1117 		/* Note: the opposite of the above test does not
1118 		   exist.  So if permissions are revoked this won't be
1119 		   noticed immediately, only after the attribute
1120 		   timeout has expired */
1121 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1122 		err = fuse_access(inode, mask);
1123 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1124 		if (!(inode->i_mode & S_IXUGO)) {
1125 			if (refreshed)
1126 				return -EACCES;
1127 
1128 			err = fuse_perm_getattr(inode, mask);
1129 			if (!err && !(inode->i_mode & S_IXUGO))
1130 				return -EACCES;
1131 		}
1132 	}
1133 	return err;
1134 }
1135 
1136 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1137 			 struct dir_context *ctx)
1138 {
1139 	while (nbytes >= FUSE_NAME_OFFSET) {
1140 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1141 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
1142 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1143 			return -EIO;
1144 		if (reclen > nbytes)
1145 			break;
1146 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1147 			return -EIO;
1148 
1149 		if (!dir_emit(ctx, dirent->name, dirent->namelen,
1150 			       dirent->ino, dirent->type))
1151 			break;
1152 
1153 		buf += reclen;
1154 		nbytes -= reclen;
1155 		ctx->pos = dirent->off;
1156 	}
1157 
1158 	return 0;
1159 }
1160 
1161 static int fuse_direntplus_link(struct file *file,
1162 				struct fuse_direntplus *direntplus,
1163 				u64 attr_version)
1164 {
1165 	int err;
1166 	struct fuse_entry_out *o = &direntplus->entry_out;
1167 	struct fuse_dirent *dirent = &direntplus->dirent;
1168 	struct dentry *parent = file->f_path.dentry;
1169 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1170 	struct dentry *dentry;
1171 	struct dentry *alias;
1172 	struct inode *dir = d_inode(parent);
1173 	struct fuse_conn *fc;
1174 	struct inode *inode;
1175 
1176 	if (!o->nodeid) {
1177 		/*
1178 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1179 		 * ENOENT. Instead, it only means the userspace filesystem did
1180 		 * not want to return attributes/handle for this entry.
1181 		 *
1182 		 * So do nothing.
1183 		 */
1184 		return 0;
1185 	}
1186 
1187 	if (name.name[0] == '.') {
1188 		/*
1189 		 * We could potentially refresh the attributes of the directory
1190 		 * and its parent?
1191 		 */
1192 		if (name.len == 1)
1193 			return 0;
1194 		if (name.name[1] == '.' && name.len == 2)
1195 			return 0;
1196 	}
1197 
1198 	if (invalid_nodeid(o->nodeid))
1199 		return -EIO;
1200 	if (!fuse_valid_type(o->attr.mode))
1201 		return -EIO;
1202 
1203 	fc = get_fuse_conn(dir);
1204 
1205 	name.hash = full_name_hash(name.name, name.len);
1206 	dentry = d_lookup(parent, &name);
1207 	if (dentry) {
1208 		inode = d_inode(dentry);
1209 		if (!inode) {
1210 			d_drop(dentry);
1211 		} else if (get_node_id(inode) != o->nodeid ||
1212 			   ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1213 			d_invalidate(dentry);
1214 		} else if (is_bad_inode(inode)) {
1215 			err = -EIO;
1216 			goto out;
1217 		} else {
1218 			struct fuse_inode *fi;
1219 			fi = get_fuse_inode(inode);
1220 			spin_lock(&fc->lock);
1221 			fi->nlookup++;
1222 			spin_unlock(&fc->lock);
1223 
1224 			fuse_change_attributes(inode, &o->attr,
1225 					       entry_attr_timeout(o),
1226 					       attr_version);
1227 
1228 			/*
1229 			 * The other branch to 'found' comes via fuse_iget()
1230 			 * which bumps nlookup inside
1231 			 */
1232 			goto found;
1233 		}
1234 		dput(dentry);
1235 	}
1236 
1237 	dentry = d_alloc(parent, &name);
1238 	err = -ENOMEM;
1239 	if (!dentry)
1240 		goto out;
1241 
1242 	inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1243 			  &o->attr, entry_attr_timeout(o), attr_version);
1244 	if (!inode)
1245 		goto out;
1246 
1247 	alias = d_splice_alias(inode, dentry);
1248 	err = PTR_ERR(alias);
1249 	if (IS_ERR(alias))
1250 		goto out;
1251 
1252 	if (alias) {
1253 		dput(dentry);
1254 		dentry = alias;
1255 	}
1256 
1257 found:
1258 	if (fc->readdirplus_auto)
1259 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1260 	fuse_change_entry_timeout(dentry, o);
1261 
1262 	err = 0;
1263 out:
1264 	dput(dentry);
1265 	return err;
1266 }
1267 
1268 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1269 			     struct dir_context *ctx, u64 attr_version)
1270 {
1271 	struct fuse_direntplus *direntplus;
1272 	struct fuse_dirent *dirent;
1273 	size_t reclen;
1274 	int over = 0;
1275 	int ret;
1276 
1277 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1278 		direntplus = (struct fuse_direntplus *) buf;
1279 		dirent = &direntplus->dirent;
1280 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1281 
1282 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1283 			return -EIO;
1284 		if (reclen > nbytes)
1285 			break;
1286 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1287 			return -EIO;
1288 
1289 		if (!over) {
1290 			/* We fill entries into dstbuf only as much as
1291 			   it can hold. But we still continue iterating
1292 			   over remaining entries to link them. If not,
1293 			   we need to send a FORGET for each of those
1294 			   which we did not link.
1295 			*/
1296 			over = !dir_emit(ctx, dirent->name, dirent->namelen,
1297 				       dirent->ino, dirent->type);
1298 			ctx->pos = dirent->off;
1299 		}
1300 
1301 		buf += reclen;
1302 		nbytes -= reclen;
1303 
1304 		ret = fuse_direntplus_link(file, direntplus, attr_version);
1305 		if (ret)
1306 			fuse_force_forget(file, direntplus->entry_out.nodeid);
1307 	}
1308 
1309 	return 0;
1310 }
1311 
1312 static int fuse_readdir(struct file *file, struct dir_context *ctx)
1313 {
1314 	int plus, err;
1315 	size_t nbytes;
1316 	struct page *page;
1317 	struct inode *inode = file_inode(file);
1318 	struct fuse_conn *fc = get_fuse_conn(inode);
1319 	struct fuse_req *req;
1320 	u64 attr_version = 0;
1321 
1322 	if (is_bad_inode(inode))
1323 		return -EIO;
1324 
1325 	req = fuse_get_req(fc, 1);
1326 	if (IS_ERR(req))
1327 		return PTR_ERR(req);
1328 
1329 	page = alloc_page(GFP_KERNEL);
1330 	if (!page) {
1331 		fuse_put_request(fc, req);
1332 		return -ENOMEM;
1333 	}
1334 
1335 	plus = fuse_use_readdirplus(inode, ctx);
1336 	req->out.argpages = 1;
1337 	req->num_pages = 1;
1338 	req->pages[0] = page;
1339 	req->page_descs[0].length = PAGE_SIZE;
1340 	if (plus) {
1341 		attr_version = fuse_get_attr_version(fc);
1342 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1343 			       FUSE_READDIRPLUS);
1344 	} else {
1345 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1346 			       FUSE_READDIR);
1347 	}
1348 	fuse_request_send(fc, req);
1349 	nbytes = req->out.args[0].size;
1350 	err = req->out.h.error;
1351 	fuse_put_request(fc, req);
1352 	if (!err) {
1353 		if (plus) {
1354 			err = parse_dirplusfile(page_address(page), nbytes,
1355 						file, ctx,
1356 						attr_version);
1357 		} else {
1358 			err = parse_dirfile(page_address(page), nbytes, file,
1359 					    ctx);
1360 		}
1361 	}
1362 
1363 	__free_page(page);
1364 	fuse_invalidate_atime(inode);
1365 	return err;
1366 }
1367 
1368 static const char *fuse_get_link(struct dentry *dentry,
1369 				 struct inode *inode,
1370 				 struct delayed_call *done)
1371 {
1372 	struct fuse_conn *fc = get_fuse_conn(inode);
1373 	FUSE_ARGS(args);
1374 	char *link;
1375 	ssize_t ret;
1376 
1377 	if (!dentry)
1378 		return ERR_PTR(-ECHILD);
1379 
1380 	link = kmalloc(PAGE_SIZE, GFP_KERNEL);
1381 	if (!link)
1382 		return ERR_PTR(-ENOMEM);
1383 
1384 	args.in.h.opcode = FUSE_READLINK;
1385 	args.in.h.nodeid = get_node_id(inode);
1386 	args.out.argvar = 1;
1387 	args.out.numargs = 1;
1388 	args.out.args[0].size = PAGE_SIZE - 1;
1389 	args.out.args[0].value = link;
1390 	ret = fuse_simple_request(fc, &args);
1391 	if (ret < 0) {
1392 		kfree(link);
1393 		link = ERR_PTR(ret);
1394 	} else {
1395 		link[ret] = '\0';
1396 		set_delayed_call(done, kfree_link, link);
1397 	}
1398 	fuse_invalidate_atime(inode);
1399 	return link;
1400 }
1401 
1402 static int fuse_dir_open(struct inode *inode, struct file *file)
1403 {
1404 	return fuse_open_common(inode, file, true);
1405 }
1406 
1407 static int fuse_dir_release(struct inode *inode, struct file *file)
1408 {
1409 	fuse_release_common(file, FUSE_RELEASEDIR);
1410 
1411 	return 0;
1412 }
1413 
1414 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1415 			  int datasync)
1416 {
1417 	return fuse_fsync_common(file, start, end, datasync, 1);
1418 }
1419 
1420 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1421 			    unsigned long arg)
1422 {
1423 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1424 
1425 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1426 	if (fc->minor < 18)
1427 		return -ENOTTY;
1428 
1429 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1430 }
1431 
1432 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1433 				   unsigned long arg)
1434 {
1435 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1436 
1437 	if (fc->minor < 18)
1438 		return -ENOTTY;
1439 
1440 	return fuse_ioctl_common(file, cmd, arg,
1441 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1442 }
1443 
1444 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1445 {
1446 	/* Always update if mtime is explicitly set  */
1447 	if (ivalid & ATTR_MTIME_SET)
1448 		return true;
1449 
1450 	/* Or if kernel i_mtime is the official one */
1451 	if (trust_local_mtime)
1452 		return true;
1453 
1454 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1455 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1456 		return false;
1457 
1458 	/* In all other cases update */
1459 	return true;
1460 }
1461 
1462 static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1463 			   bool trust_local_cmtime)
1464 {
1465 	unsigned ivalid = iattr->ia_valid;
1466 
1467 	if (ivalid & ATTR_MODE)
1468 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1469 	if (ivalid & ATTR_UID)
1470 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1471 	if (ivalid & ATTR_GID)
1472 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1473 	if (ivalid & ATTR_SIZE)
1474 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1475 	if (ivalid & ATTR_ATIME) {
1476 		arg->valid |= FATTR_ATIME;
1477 		arg->atime = iattr->ia_atime.tv_sec;
1478 		arg->atimensec = iattr->ia_atime.tv_nsec;
1479 		if (!(ivalid & ATTR_ATIME_SET))
1480 			arg->valid |= FATTR_ATIME_NOW;
1481 	}
1482 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1483 		arg->valid |= FATTR_MTIME;
1484 		arg->mtime = iattr->ia_mtime.tv_sec;
1485 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1486 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1487 			arg->valid |= FATTR_MTIME_NOW;
1488 	}
1489 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1490 		arg->valid |= FATTR_CTIME;
1491 		arg->ctime = iattr->ia_ctime.tv_sec;
1492 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1493 	}
1494 }
1495 
1496 /*
1497  * Prevent concurrent writepages on inode
1498  *
1499  * This is done by adding a negative bias to the inode write counter
1500  * and waiting for all pending writes to finish.
1501  */
1502 void fuse_set_nowrite(struct inode *inode)
1503 {
1504 	struct fuse_conn *fc = get_fuse_conn(inode);
1505 	struct fuse_inode *fi = get_fuse_inode(inode);
1506 
1507 	BUG_ON(!inode_is_locked(inode));
1508 
1509 	spin_lock(&fc->lock);
1510 	BUG_ON(fi->writectr < 0);
1511 	fi->writectr += FUSE_NOWRITE;
1512 	spin_unlock(&fc->lock);
1513 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1514 }
1515 
1516 /*
1517  * Allow writepages on inode
1518  *
1519  * Remove the bias from the writecounter and send any queued
1520  * writepages.
1521  */
1522 static void __fuse_release_nowrite(struct inode *inode)
1523 {
1524 	struct fuse_inode *fi = get_fuse_inode(inode);
1525 
1526 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1527 	fi->writectr = 0;
1528 	fuse_flush_writepages(inode);
1529 }
1530 
1531 void fuse_release_nowrite(struct inode *inode)
1532 {
1533 	struct fuse_conn *fc = get_fuse_conn(inode);
1534 
1535 	spin_lock(&fc->lock);
1536 	__fuse_release_nowrite(inode);
1537 	spin_unlock(&fc->lock);
1538 }
1539 
1540 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1541 			      struct inode *inode,
1542 			      struct fuse_setattr_in *inarg_p,
1543 			      struct fuse_attr_out *outarg_p)
1544 {
1545 	args->in.h.opcode = FUSE_SETATTR;
1546 	args->in.h.nodeid = get_node_id(inode);
1547 	args->in.numargs = 1;
1548 	args->in.args[0].size = sizeof(*inarg_p);
1549 	args->in.args[0].value = inarg_p;
1550 	args->out.numargs = 1;
1551 	args->out.args[0].size = sizeof(*outarg_p);
1552 	args->out.args[0].value = outarg_p;
1553 }
1554 
1555 /*
1556  * Flush inode->i_mtime to the server
1557  */
1558 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1559 {
1560 	struct fuse_conn *fc = get_fuse_conn(inode);
1561 	FUSE_ARGS(args);
1562 	struct fuse_setattr_in inarg;
1563 	struct fuse_attr_out outarg;
1564 
1565 	memset(&inarg, 0, sizeof(inarg));
1566 	memset(&outarg, 0, sizeof(outarg));
1567 
1568 	inarg.valid = FATTR_MTIME;
1569 	inarg.mtime = inode->i_mtime.tv_sec;
1570 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1571 	if (fc->minor >= 23) {
1572 		inarg.valid |= FATTR_CTIME;
1573 		inarg.ctime = inode->i_ctime.tv_sec;
1574 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1575 	}
1576 	if (ff) {
1577 		inarg.valid |= FATTR_FH;
1578 		inarg.fh = ff->fh;
1579 	}
1580 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1581 
1582 	return fuse_simple_request(fc, &args);
1583 }
1584 
1585 /*
1586  * Set attributes, and at the same time refresh them.
1587  *
1588  * Truncation is slightly complicated, because the 'truncate' request
1589  * may fail, in which case we don't want to touch the mapping.
1590  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1591  * and the actual truncation by hand.
1592  */
1593 int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1594 		    struct file *file)
1595 {
1596 	struct fuse_conn *fc = get_fuse_conn(inode);
1597 	struct fuse_inode *fi = get_fuse_inode(inode);
1598 	FUSE_ARGS(args);
1599 	struct fuse_setattr_in inarg;
1600 	struct fuse_attr_out outarg;
1601 	bool is_truncate = false;
1602 	bool is_wb = fc->writeback_cache;
1603 	loff_t oldsize;
1604 	int err;
1605 	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1606 
1607 	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1608 		attr->ia_valid |= ATTR_FORCE;
1609 
1610 	err = inode_change_ok(inode, attr);
1611 	if (err)
1612 		return err;
1613 
1614 	if (attr->ia_valid & ATTR_OPEN) {
1615 		if (fc->atomic_o_trunc)
1616 			return 0;
1617 		file = NULL;
1618 	}
1619 
1620 	if (attr->ia_valid & ATTR_SIZE)
1621 		is_truncate = true;
1622 
1623 	if (is_truncate) {
1624 		fuse_set_nowrite(inode);
1625 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1626 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1627 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1628 	}
1629 
1630 	memset(&inarg, 0, sizeof(inarg));
1631 	memset(&outarg, 0, sizeof(outarg));
1632 	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
1633 	if (file) {
1634 		struct fuse_file *ff = file->private_data;
1635 		inarg.valid |= FATTR_FH;
1636 		inarg.fh = ff->fh;
1637 	}
1638 	if (attr->ia_valid & ATTR_SIZE) {
1639 		/* For mandatory locking in truncate */
1640 		inarg.valid |= FATTR_LOCKOWNER;
1641 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1642 	}
1643 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1644 	err = fuse_simple_request(fc, &args);
1645 	if (err) {
1646 		if (err == -EINTR)
1647 			fuse_invalidate_attr(inode);
1648 		goto error;
1649 	}
1650 
1651 	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1652 		make_bad_inode(inode);
1653 		err = -EIO;
1654 		goto error;
1655 	}
1656 
1657 	spin_lock(&fc->lock);
1658 	/* the kernel maintains i_mtime locally */
1659 	if (trust_local_cmtime) {
1660 		if (attr->ia_valid & ATTR_MTIME)
1661 			inode->i_mtime = attr->ia_mtime;
1662 		if (attr->ia_valid & ATTR_CTIME)
1663 			inode->i_ctime = attr->ia_ctime;
1664 		/* FIXME: clear I_DIRTY_SYNC? */
1665 	}
1666 
1667 	fuse_change_attributes_common(inode, &outarg.attr,
1668 				      attr_timeout(&outarg));
1669 	oldsize = inode->i_size;
1670 	/* see the comment in fuse_change_attributes() */
1671 	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1672 		i_size_write(inode, outarg.attr.size);
1673 
1674 	if (is_truncate) {
1675 		/* NOTE: this may release/reacquire fc->lock */
1676 		__fuse_release_nowrite(inode);
1677 	}
1678 	spin_unlock(&fc->lock);
1679 
1680 	/*
1681 	 * Only call invalidate_inode_pages2() after removing
1682 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1683 	 */
1684 	if ((is_truncate || !is_wb) &&
1685 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1686 		truncate_pagecache(inode, outarg.attr.size);
1687 		invalidate_inode_pages2(inode->i_mapping);
1688 	}
1689 
1690 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1691 	return 0;
1692 
1693 error:
1694 	if (is_truncate)
1695 		fuse_release_nowrite(inode);
1696 
1697 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1698 	return err;
1699 }
1700 
1701 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1702 {
1703 	struct inode *inode = d_inode(entry);
1704 
1705 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1706 		return -EACCES;
1707 
1708 	if (attr->ia_valid & ATTR_FILE)
1709 		return fuse_do_setattr(inode, attr, attr->ia_file);
1710 	else
1711 		return fuse_do_setattr(inode, attr, NULL);
1712 }
1713 
1714 static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1715 			struct kstat *stat)
1716 {
1717 	struct inode *inode = d_inode(entry);
1718 	struct fuse_conn *fc = get_fuse_conn(inode);
1719 
1720 	if (!fuse_allow_current_process(fc))
1721 		return -EACCES;
1722 
1723 	return fuse_update_attributes(inode, stat, NULL, NULL);
1724 }
1725 
1726 static int fuse_setxattr(struct dentry *entry, const char *name,
1727 			 const void *value, size_t size, int flags)
1728 {
1729 	struct inode *inode = d_inode(entry);
1730 	struct fuse_conn *fc = get_fuse_conn(inode);
1731 	FUSE_ARGS(args);
1732 	struct fuse_setxattr_in inarg;
1733 	int err;
1734 
1735 	if (fc->no_setxattr)
1736 		return -EOPNOTSUPP;
1737 
1738 	memset(&inarg, 0, sizeof(inarg));
1739 	inarg.size = size;
1740 	inarg.flags = flags;
1741 	args.in.h.opcode = FUSE_SETXATTR;
1742 	args.in.h.nodeid = get_node_id(inode);
1743 	args.in.numargs = 3;
1744 	args.in.args[0].size = sizeof(inarg);
1745 	args.in.args[0].value = &inarg;
1746 	args.in.args[1].size = strlen(name) + 1;
1747 	args.in.args[1].value = name;
1748 	args.in.args[2].size = size;
1749 	args.in.args[2].value = value;
1750 	err = fuse_simple_request(fc, &args);
1751 	if (err == -ENOSYS) {
1752 		fc->no_setxattr = 1;
1753 		err = -EOPNOTSUPP;
1754 	}
1755 	if (!err) {
1756 		fuse_invalidate_attr(inode);
1757 		fuse_update_ctime(inode);
1758 	}
1759 	return err;
1760 }
1761 
1762 static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1763 			     void *value, size_t size)
1764 {
1765 	struct inode *inode = d_inode(entry);
1766 	struct fuse_conn *fc = get_fuse_conn(inode);
1767 	FUSE_ARGS(args);
1768 	struct fuse_getxattr_in inarg;
1769 	struct fuse_getxattr_out outarg;
1770 	ssize_t ret;
1771 
1772 	if (fc->no_getxattr)
1773 		return -EOPNOTSUPP;
1774 
1775 	memset(&inarg, 0, sizeof(inarg));
1776 	inarg.size = size;
1777 	args.in.h.opcode = FUSE_GETXATTR;
1778 	args.in.h.nodeid = get_node_id(inode);
1779 	args.in.numargs = 2;
1780 	args.in.args[0].size = sizeof(inarg);
1781 	args.in.args[0].value = &inarg;
1782 	args.in.args[1].size = strlen(name) + 1;
1783 	args.in.args[1].value = name;
1784 	/* This is really two different operations rolled into one */
1785 	args.out.numargs = 1;
1786 	if (size) {
1787 		args.out.argvar = 1;
1788 		args.out.args[0].size = size;
1789 		args.out.args[0].value = value;
1790 	} else {
1791 		args.out.args[0].size = sizeof(outarg);
1792 		args.out.args[0].value = &outarg;
1793 	}
1794 	ret = fuse_simple_request(fc, &args);
1795 	if (!ret && !size)
1796 		ret = outarg.size;
1797 	if (ret == -ENOSYS) {
1798 		fc->no_getxattr = 1;
1799 		ret = -EOPNOTSUPP;
1800 	}
1801 	return ret;
1802 }
1803 
1804 static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1805 {
1806 	struct inode *inode = d_inode(entry);
1807 	struct fuse_conn *fc = get_fuse_conn(inode);
1808 	FUSE_ARGS(args);
1809 	struct fuse_getxattr_in inarg;
1810 	struct fuse_getxattr_out outarg;
1811 	ssize_t ret;
1812 
1813 	if (!fuse_allow_current_process(fc))
1814 		return -EACCES;
1815 
1816 	if (fc->no_listxattr)
1817 		return -EOPNOTSUPP;
1818 
1819 	memset(&inarg, 0, sizeof(inarg));
1820 	inarg.size = size;
1821 	args.in.h.opcode = FUSE_LISTXATTR;
1822 	args.in.h.nodeid = get_node_id(inode);
1823 	args.in.numargs = 1;
1824 	args.in.args[0].size = sizeof(inarg);
1825 	args.in.args[0].value = &inarg;
1826 	/* This is really two different operations rolled into one */
1827 	args.out.numargs = 1;
1828 	if (size) {
1829 		args.out.argvar = 1;
1830 		args.out.args[0].size = size;
1831 		args.out.args[0].value = list;
1832 	} else {
1833 		args.out.args[0].size = sizeof(outarg);
1834 		args.out.args[0].value = &outarg;
1835 	}
1836 	ret = fuse_simple_request(fc, &args);
1837 	if (!ret && !size)
1838 		ret = outarg.size;
1839 	if (ret == -ENOSYS) {
1840 		fc->no_listxattr = 1;
1841 		ret = -EOPNOTSUPP;
1842 	}
1843 	return ret;
1844 }
1845 
1846 static int fuse_removexattr(struct dentry *entry, const char *name)
1847 {
1848 	struct inode *inode = d_inode(entry);
1849 	struct fuse_conn *fc = get_fuse_conn(inode);
1850 	FUSE_ARGS(args);
1851 	int err;
1852 
1853 	if (fc->no_removexattr)
1854 		return -EOPNOTSUPP;
1855 
1856 	args.in.h.opcode = FUSE_REMOVEXATTR;
1857 	args.in.h.nodeid = get_node_id(inode);
1858 	args.in.numargs = 1;
1859 	args.in.args[0].size = strlen(name) + 1;
1860 	args.in.args[0].value = name;
1861 	err = fuse_simple_request(fc, &args);
1862 	if (err == -ENOSYS) {
1863 		fc->no_removexattr = 1;
1864 		err = -EOPNOTSUPP;
1865 	}
1866 	if (!err) {
1867 		fuse_invalidate_attr(inode);
1868 		fuse_update_ctime(inode);
1869 	}
1870 	return err;
1871 }
1872 
1873 static const struct inode_operations fuse_dir_inode_operations = {
1874 	.lookup		= fuse_lookup,
1875 	.mkdir		= fuse_mkdir,
1876 	.symlink	= fuse_symlink,
1877 	.unlink		= fuse_unlink,
1878 	.rmdir		= fuse_rmdir,
1879 	.rename2	= fuse_rename2,
1880 	.link		= fuse_link,
1881 	.setattr	= fuse_setattr,
1882 	.create		= fuse_create,
1883 	.atomic_open	= fuse_atomic_open,
1884 	.mknod		= fuse_mknod,
1885 	.permission	= fuse_permission,
1886 	.getattr	= fuse_getattr,
1887 	.setxattr	= fuse_setxattr,
1888 	.getxattr	= fuse_getxattr,
1889 	.listxattr	= fuse_listxattr,
1890 	.removexattr	= fuse_removexattr,
1891 };
1892 
1893 static const struct file_operations fuse_dir_operations = {
1894 	.llseek		= generic_file_llseek,
1895 	.read		= generic_read_dir,
1896 	.iterate	= fuse_readdir,
1897 	.open		= fuse_dir_open,
1898 	.release	= fuse_dir_release,
1899 	.fsync		= fuse_dir_fsync,
1900 	.unlocked_ioctl	= fuse_dir_ioctl,
1901 	.compat_ioctl	= fuse_dir_compat_ioctl,
1902 };
1903 
1904 static const struct inode_operations fuse_common_inode_operations = {
1905 	.setattr	= fuse_setattr,
1906 	.permission	= fuse_permission,
1907 	.getattr	= fuse_getattr,
1908 	.setxattr	= fuse_setxattr,
1909 	.getxattr	= fuse_getxattr,
1910 	.listxattr	= fuse_listxattr,
1911 	.removexattr	= fuse_removexattr,
1912 };
1913 
1914 static const struct inode_operations fuse_symlink_inode_operations = {
1915 	.setattr	= fuse_setattr,
1916 	.get_link	= fuse_get_link,
1917 	.readlink	= generic_readlink,
1918 	.getattr	= fuse_getattr,
1919 	.setxattr	= fuse_setxattr,
1920 	.getxattr	= fuse_getxattr,
1921 	.listxattr	= fuse_listxattr,
1922 	.removexattr	= fuse_removexattr,
1923 };
1924 
1925 void fuse_init_common(struct inode *inode)
1926 {
1927 	inode->i_op = &fuse_common_inode_operations;
1928 }
1929 
1930 void fuse_init_dir(struct inode *inode)
1931 {
1932 	inode->i_op = &fuse_dir_inode_operations;
1933 	inode->i_fop = &fuse_dir_operations;
1934 }
1935 
1936 void fuse_init_symlink(struct inode *inode)
1937 {
1938 	inode->i_op = &fuse_symlink_inode_operations;
1939 }
1940