xref: /openbmc/linux/fs/fuse/file.c (revision c5a06fdc)
1  /*
2    FUSE: Filesystem in Userspace
3    Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4  
5    This program can be distributed under the terms of the GNU GPL.
6    See the file COPYING.
7  */
8  
9  #include "fuse_i.h"
10  
11  #include <linux/pagemap.h>
12  #include <linux/slab.h>
13  #include <linux/kernel.h>
14  #include <linux/sched.h>
15  #include <linux/sched/signal.h>
16  #include <linux/module.h>
17  #include <linux/swap.h>
18  #include <linux/falloc.h>
19  #include <linux/uio.h>
20  #include <linux/fs.h>
21  #include <linux/filelock.h>
22  #include <linux/file.h>
23  
24  static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
25  			  unsigned int open_flags, int opcode,
26  			  struct fuse_open_out *outargp)
27  {
28  	struct fuse_open_in inarg;
29  	FUSE_ARGS(args);
30  
31  	memset(&inarg, 0, sizeof(inarg));
32  	inarg.flags = open_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
33  	if (!fm->fc->atomic_o_trunc)
34  		inarg.flags &= ~O_TRUNC;
35  
36  	if (fm->fc->handle_killpriv_v2 &&
37  	    (inarg.flags & O_TRUNC) && !capable(CAP_FSETID)) {
38  		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
39  	}
40  
41  	args.opcode = opcode;
42  	args.nodeid = nodeid;
43  	args.in_numargs = 1;
44  	args.in_args[0].size = sizeof(inarg);
45  	args.in_args[0].value = &inarg;
46  	args.out_numargs = 1;
47  	args.out_args[0].size = sizeof(*outargp);
48  	args.out_args[0].value = outargp;
49  
50  	return fuse_simple_request(fm, &args);
51  }
52  
53  struct fuse_release_args {
54  	struct fuse_args args;
55  	struct fuse_release_in inarg;
56  	struct inode *inode;
57  };
58  
59  struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
60  {
61  	struct fuse_file *ff;
62  
63  	ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL_ACCOUNT);
64  	if (unlikely(!ff))
65  		return NULL;
66  
67  	ff->fm = fm;
68  	ff->release_args = kzalloc(sizeof(*ff->release_args),
69  				   GFP_KERNEL_ACCOUNT);
70  	if (!ff->release_args) {
71  		kfree(ff);
72  		return NULL;
73  	}
74  
75  	INIT_LIST_HEAD(&ff->write_entry);
76  	mutex_init(&ff->readdir.lock);
77  	refcount_set(&ff->count, 1);
78  	RB_CLEAR_NODE(&ff->polled_node);
79  	init_waitqueue_head(&ff->poll_wait);
80  
81  	ff->kh = atomic64_inc_return(&fm->fc->khctr);
82  
83  	return ff;
84  }
85  
86  void fuse_file_free(struct fuse_file *ff)
87  {
88  	kfree(ff->release_args);
89  	mutex_destroy(&ff->readdir.lock);
90  	kfree(ff);
91  }
92  
93  static struct fuse_file *fuse_file_get(struct fuse_file *ff)
94  {
95  	refcount_inc(&ff->count);
96  	return ff;
97  }
98  
99  static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
100  			     int error)
101  {
102  	struct fuse_release_args *ra = container_of(args, typeof(*ra), args);
103  
104  	iput(ra->inode);
105  	kfree(ra);
106  }
107  
108  static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
109  {
110  	if (refcount_dec_and_test(&ff->count)) {
111  		struct fuse_args *args = &ff->release_args->args;
112  
113  		if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) {
114  			/* Do nothing when client does not implement 'open' */
115  			fuse_release_end(ff->fm, args, 0);
116  		} else if (sync) {
117  			fuse_simple_request(ff->fm, args);
118  			fuse_release_end(ff->fm, args, 0);
119  		} else {
120  			args->end = fuse_release_end;
121  			if (fuse_simple_background(ff->fm, args,
122  						   GFP_KERNEL | __GFP_NOFAIL))
123  				fuse_release_end(ff->fm, args, -ENOTCONN);
124  		}
125  		kfree(ff);
126  	}
127  }
128  
129  struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
130  				 unsigned int open_flags, bool isdir)
131  {
132  	struct fuse_conn *fc = fm->fc;
133  	struct fuse_file *ff;
134  	int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
135  
136  	ff = fuse_file_alloc(fm);
137  	if (!ff)
138  		return ERR_PTR(-ENOMEM);
139  
140  	ff->fh = 0;
141  	/* Default for no-open */
142  	ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0);
143  	if (isdir ? !fc->no_opendir : !fc->no_open) {
144  		struct fuse_open_out outarg;
145  		int err;
146  
147  		err = fuse_send_open(fm, nodeid, open_flags, opcode, &outarg);
148  		if (!err) {
149  			ff->fh = outarg.fh;
150  			ff->open_flags = outarg.open_flags;
151  
152  		} else if (err != -ENOSYS) {
153  			fuse_file_free(ff);
154  			return ERR_PTR(err);
155  		} else {
156  			if (isdir)
157  				fc->no_opendir = 1;
158  			else
159  				fc->no_open = 1;
160  		}
161  	}
162  
163  	if (isdir)
164  		ff->open_flags &= ~FOPEN_DIRECT_IO;
165  
166  	ff->nodeid = nodeid;
167  
168  	return ff;
169  }
170  
171  int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
172  		 bool isdir)
173  {
174  	struct fuse_file *ff = fuse_file_open(fm, nodeid, file->f_flags, isdir);
175  
176  	if (!IS_ERR(ff))
177  		file->private_data = ff;
178  
179  	return PTR_ERR_OR_ZERO(ff);
180  }
181  EXPORT_SYMBOL_GPL(fuse_do_open);
182  
183  static void fuse_link_write_file(struct file *file)
184  {
185  	struct inode *inode = file_inode(file);
186  	struct fuse_inode *fi = get_fuse_inode(inode);
187  	struct fuse_file *ff = file->private_data;
188  	/*
189  	 * file may be written through mmap, so chain it onto the
190  	 * inodes's write_file list
191  	 */
192  	spin_lock(&fi->lock);
193  	if (list_empty(&ff->write_entry))
194  		list_add(&ff->write_entry, &fi->write_files);
195  	spin_unlock(&fi->lock);
196  }
197  
198  void fuse_finish_open(struct inode *inode, struct file *file)
199  {
200  	struct fuse_file *ff = file->private_data;
201  	struct fuse_conn *fc = get_fuse_conn(inode);
202  
203  	if (ff->open_flags & FOPEN_STREAM)
204  		stream_open(inode, file);
205  	else if (ff->open_flags & FOPEN_NONSEEKABLE)
206  		nonseekable_open(inode, file);
207  
208  	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
209  		struct fuse_inode *fi = get_fuse_inode(inode);
210  
211  		spin_lock(&fi->lock);
212  		fi->attr_version = atomic64_inc_return(&fc->attr_version);
213  		i_size_write(inode, 0);
214  		spin_unlock(&fi->lock);
215  		file_update_time(file);
216  		fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
217  	}
218  	if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
219  		fuse_link_write_file(file);
220  }
221  
222  int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
223  {
224  	struct fuse_mount *fm = get_fuse_mount(inode);
225  	struct fuse_conn *fc = fm->fc;
226  	int err;
227  	bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
228  			  fc->atomic_o_trunc &&
229  			  fc->writeback_cache;
230  	bool dax_truncate = (file->f_flags & O_TRUNC) &&
231  			  fc->atomic_o_trunc && FUSE_IS_DAX(inode);
232  
233  	if (fuse_is_bad(inode))
234  		return -EIO;
235  
236  	err = generic_file_open(inode, file);
237  	if (err)
238  		return err;
239  
240  	if (is_wb_truncate || dax_truncate)
241  		inode_lock(inode);
242  
243  	if (dax_truncate) {
244  		filemap_invalidate_lock(inode->i_mapping);
245  		err = fuse_dax_break_layouts(inode, 0, 0);
246  		if (err)
247  			goto out_inode_unlock;
248  	}
249  
250  	if (is_wb_truncate || dax_truncate)
251  		fuse_set_nowrite(inode);
252  
253  	err = fuse_do_open(fm, get_node_id(inode), file, isdir);
254  	if (!err)
255  		fuse_finish_open(inode, file);
256  
257  	if (is_wb_truncate || dax_truncate)
258  		fuse_release_nowrite(inode);
259  	if (!err) {
260  		struct fuse_file *ff = file->private_data;
261  
262  		if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
263  			truncate_pagecache(inode, 0);
264  		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
265  			invalidate_inode_pages2(inode->i_mapping);
266  	}
267  	if (dax_truncate)
268  		filemap_invalidate_unlock(inode->i_mapping);
269  out_inode_unlock:
270  	if (is_wb_truncate || dax_truncate)
271  		inode_unlock(inode);
272  
273  	return err;
274  }
275  
276  static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
277  				 unsigned int flags, int opcode)
278  {
279  	struct fuse_conn *fc = ff->fm->fc;
280  	struct fuse_release_args *ra = ff->release_args;
281  
282  	/* Inode is NULL on error path of fuse_create_open() */
283  	if (likely(fi)) {
284  		spin_lock(&fi->lock);
285  		list_del(&ff->write_entry);
286  		spin_unlock(&fi->lock);
287  	}
288  	spin_lock(&fc->lock);
289  	if (!RB_EMPTY_NODE(&ff->polled_node))
290  		rb_erase(&ff->polled_node, &fc->polled_files);
291  	spin_unlock(&fc->lock);
292  
293  	wake_up_interruptible_all(&ff->poll_wait);
294  
295  	ra->inarg.fh = ff->fh;
296  	ra->inarg.flags = flags;
297  	ra->args.in_numargs = 1;
298  	ra->args.in_args[0].size = sizeof(struct fuse_release_in);
299  	ra->args.in_args[0].value = &ra->inarg;
300  	ra->args.opcode = opcode;
301  	ra->args.nodeid = ff->nodeid;
302  	ra->args.force = true;
303  	ra->args.nocreds = true;
304  }
305  
306  void fuse_file_release(struct inode *inode, struct fuse_file *ff,
307  		       unsigned int open_flags, fl_owner_t id, bool isdir)
308  {
309  	struct fuse_inode *fi = get_fuse_inode(inode);
310  	struct fuse_release_args *ra = ff->release_args;
311  	int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
312  
313  	fuse_prepare_release(fi, ff, open_flags, opcode);
314  
315  	if (ff->flock) {
316  		ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
317  		ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, id);
318  	}
319  	/* Hold inode until release is finished */
320  	ra->inode = igrab(inode);
321  
322  	/*
323  	 * Normally this will send the RELEASE request, however if
324  	 * some asynchronous READ or WRITE requests are outstanding,
325  	 * the sending will be delayed.
326  	 *
327  	 * Make the release synchronous if this is a fuseblk mount,
328  	 * synchronous RELEASE is allowed (and desirable) in this case
329  	 * because the server can be trusted not to screw up.
330  	 */
331  	fuse_file_put(ff, ff->fm->fc->destroy, isdir);
332  }
333  
334  void fuse_release_common(struct file *file, bool isdir)
335  {
336  	fuse_file_release(file_inode(file), file->private_data, file->f_flags,
337  			  (fl_owner_t) file, isdir);
338  }
339  
340  static int fuse_open(struct inode *inode, struct file *file)
341  {
342  	return fuse_open_common(inode, file, false);
343  }
344  
345  static int fuse_release(struct inode *inode, struct file *file)
346  {
347  	struct fuse_conn *fc = get_fuse_conn(inode);
348  
349  	/*
350  	 * Dirty pages might remain despite write_inode_now() call from
351  	 * fuse_flush() due to writes racing with the close.
352  	 */
353  	if (fc->writeback_cache)
354  		write_inode_now(inode, 1);
355  
356  	fuse_release_common(file, false);
357  
358  	/* return value is ignored by VFS */
359  	return 0;
360  }
361  
362  void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
363  		       unsigned int flags)
364  {
365  	WARN_ON(refcount_read(&ff->count) > 1);
366  	fuse_prepare_release(fi, ff, flags, FUSE_RELEASE);
367  	/*
368  	 * iput(NULL) is a no-op and since the refcount is 1 and everything's
369  	 * synchronous, we are fine with not doing igrab() here"
370  	 */
371  	fuse_file_put(ff, true, false);
372  }
373  EXPORT_SYMBOL_GPL(fuse_sync_release);
374  
375  /*
376   * Scramble the ID space with XTEA, so that the value of the files_struct
377   * pointer is not exposed to userspace.
378   */
379  u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
380  {
381  	u32 *k = fc->scramble_key;
382  	u64 v = (unsigned long) id;
383  	u32 v0 = v;
384  	u32 v1 = v >> 32;
385  	u32 sum = 0;
386  	int i;
387  
388  	for (i = 0; i < 32; i++) {
389  		v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
390  		sum += 0x9E3779B9;
391  		v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
392  	}
393  
394  	return (u64) v0 + ((u64) v1 << 32);
395  }
396  
397  struct fuse_writepage_args {
398  	struct fuse_io_args ia;
399  	struct rb_node writepages_entry;
400  	struct list_head queue_entry;
401  	struct fuse_writepage_args *next;
402  	struct inode *inode;
403  	struct fuse_sync_bucket *bucket;
404  };
405  
406  static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
407  					    pgoff_t idx_from, pgoff_t idx_to)
408  {
409  	struct rb_node *n;
410  
411  	n = fi->writepages.rb_node;
412  
413  	while (n) {
414  		struct fuse_writepage_args *wpa;
415  		pgoff_t curr_index;
416  
417  		wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
418  		WARN_ON(get_fuse_inode(wpa->inode) != fi);
419  		curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
420  		if (idx_from >= curr_index + wpa->ia.ap.num_pages)
421  			n = n->rb_right;
422  		else if (idx_to < curr_index)
423  			n = n->rb_left;
424  		else
425  			return wpa;
426  	}
427  	return NULL;
428  }
429  
430  /*
431   * Check if any page in a range is under writeback
432   *
433   * This is currently done by walking the list of writepage requests
434   * for the inode, which can be pretty inefficient.
435   */
436  static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
437  				   pgoff_t idx_to)
438  {
439  	struct fuse_inode *fi = get_fuse_inode(inode);
440  	bool found;
441  
442  	spin_lock(&fi->lock);
443  	found = fuse_find_writeback(fi, idx_from, idx_to);
444  	spin_unlock(&fi->lock);
445  
446  	return found;
447  }
448  
449  static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
450  {
451  	return fuse_range_is_writeback(inode, index, index);
452  }
453  
454  /*
455   * Wait for page writeback to be completed.
456   *
457   * Since fuse doesn't rely on the VM writeback tracking, this has to
458   * use some other means.
459   */
460  static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
461  {
462  	struct fuse_inode *fi = get_fuse_inode(inode);
463  
464  	wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
465  }
466  
467  /*
468   * Wait for all pending writepages on the inode to finish.
469   *
470   * This is currently done by blocking further writes with FUSE_NOWRITE
471   * and waiting for all sent writes to complete.
472   *
473   * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
474   * could conflict with truncation.
475   */
476  static void fuse_sync_writes(struct inode *inode)
477  {
478  	fuse_set_nowrite(inode);
479  	fuse_release_nowrite(inode);
480  }
481  
482  struct fuse_flush_args {
483  	struct fuse_args args;
484  	struct fuse_flush_in inarg;
485  	struct work_struct work;
486  	struct file *file;
487  };
488  
489  static int fuse_do_flush(struct fuse_flush_args *fa)
490  {
491  	int err;
492  	struct inode *inode = file_inode(fa->file);
493  	struct fuse_mount *fm = get_fuse_mount(inode);
494  
495  	err = write_inode_now(inode, 1);
496  	if (err)
497  		goto out;
498  
499  	inode_lock(inode);
500  	fuse_sync_writes(inode);
501  	inode_unlock(inode);
502  
503  	err = filemap_check_errors(fa->file->f_mapping);
504  	if (err)
505  		goto out;
506  
507  	err = 0;
508  	if (fm->fc->no_flush)
509  		goto inval_attr_out;
510  
511  	err = fuse_simple_request(fm, &fa->args);
512  	if (err == -ENOSYS) {
513  		fm->fc->no_flush = 1;
514  		err = 0;
515  	}
516  
517  inval_attr_out:
518  	/*
519  	 * In memory i_blocks is not maintained by fuse, if writeback cache is
520  	 * enabled, i_blocks from cached attr may not be accurate.
521  	 */
522  	if (!err && fm->fc->writeback_cache)
523  		fuse_invalidate_attr_mask(inode, STATX_BLOCKS);
524  
525  out:
526  	fput(fa->file);
527  	kfree(fa);
528  	return err;
529  }
530  
531  static void fuse_flush_async(struct work_struct *work)
532  {
533  	struct fuse_flush_args *fa = container_of(work, typeof(*fa), work);
534  
535  	fuse_do_flush(fa);
536  }
537  
538  static int fuse_flush(struct file *file, fl_owner_t id)
539  {
540  	struct fuse_flush_args *fa;
541  	struct inode *inode = file_inode(file);
542  	struct fuse_mount *fm = get_fuse_mount(inode);
543  	struct fuse_file *ff = file->private_data;
544  
545  	if (fuse_is_bad(inode))
546  		return -EIO;
547  
548  	if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache)
549  		return 0;
550  
551  	fa = kzalloc(sizeof(*fa), GFP_KERNEL);
552  	if (!fa)
553  		return -ENOMEM;
554  
555  	fa->inarg.fh = ff->fh;
556  	fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
557  	fa->args.opcode = FUSE_FLUSH;
558  	fa->args.nodeid = get_node_id(inode);
559  	fa->args.in_numargs = 1;
560  	fa->args.in_args[0].size = sizeof(fa->inarg);
561  	fa->args.in_args[0].value = &fa->inarg;
562  	fa->args.force = true;
563  	fa->file = get_file(file);
564  
565  	/* Don't wait if the task is exiting */
566  	if (current->flags & PF_EXITING) {
567  		INIT_WORK(&fa->work, fuse_flush_async);
568  		schedule_work(&fa->work);
569  		return 0;
570  	}
571  
572  	return fuse_do_flush(fa);
573  }
574  
575  int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
576  		      int datasync, int opcode)
577  {
578  	struct inode *inode = file->f_mapping->host;
579  	struct fuse_mount *fm = get_fuse_mount(inode);
580  	struct fuse_file *ff = file->private_data;
581  	FUSE_ARGS(args);
582  	struct fuse_fsync_in inarg;
583  
584  	memset(&inarg, 0, sizeof(inarg));
585  	inarg.fh = ff->fh;
586  	inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0;
587  	args.opcode = opcode;
588  	args.nodeid = get_node_id(inode);
589  	args.in_numargs = 1;
590  	args.in_args[0].size = sizeof(inarg);
591  	args.in_args[0].value = &inarg;
592  	return fuse_simple_request(fm, &args);
593  }
594  
595  static int fuse_fsync(struct file *file, loff_t start, loff_t end,
596  		      int datasync)
597  {
598  	struct inode *inode = file->f_mapping->host;
599  	struct fuse_conn *fc = get_fuse_conn(inode);
600  	int err;
601  
602  	if (fuse_is_bad(inode))
603  		return -EIO;
604  
605  	inode_lock(inode);
606  
607  	/*
608  	 * Start writeback against all dirty pages of the inode, then
609  	 * wait for all outstanding writes, before sending the FSYNC
610  	 * request.
611  	 */
612  	err = file_write_and_wait_range(file, start, end);
613  	if (err)
614  		goto out;
615  
616  	fuse_sync_writes(inode);
617  
618  	/*
619  	 * Due to implementation of fuse writeback
620  	 * file_write_and_wait_range() does not catch errors.
621  	 * We have to do this directly after fuse_sync_writes()
622  	 */
623  	err = file_check_and_advance_wb_err(file);
624  	if (err)
625  		goto out;
626  
627  	err = sync_inode_metadata(inode, 1);
628  	if (err)
629  		goto out;
630  
631  	if (fc->no_fsync)
632  		goto out;
633  
634  	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNC);
635  	if (err == -ENOSYS) {
636  		fc->no_fsync = 1;
637  		err = 0;
638  	}
639  out:
640  	inode_unlock(inode);
641  
642  	return err;
643  }
644  
645  void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
646  			 size_t count, int opcode)
647  {
648  	struct fuse_file *ff = file->private_data;
649  	struct fuse_args *args = &ia->ap.args;
650  
651  	ia->read.in.fh = ff->fh;
652  	ia->read.in.offset = pos;
653  	ia->read.in.size = count;
654  	ia->read.in.flags = file->f_flags;
655  	args->opcode = opcode;
656  	args->nodeid = ff->nodeid;
657  	args->in_numargs = 1;
658  	args->in_args[0].size = sizeof(ia->read.in);
659  	args->in_args[0].value = &ia->read.in;
660  	args->out_argvar = true;
661  	args->out_numargs = 1;
662  	args->out_args[0].size = count;
663  }
664  
665  static void fuse_release_user_pages(struct fuse_args_pages *ap,
666  				    bool should_dirty)
667  {
668  	unsigned int i;
669  
670  	for (i = 0; i < ap->num_pages; i++) {
671  		if (should_dirty)
672  			set_page_dirty_lock(ap->pages[i]);
673  		put_page(ap->pages[i]);
674  	}
675  }
676  
677  static void fuse_io_release(struct kref *kref)
678  {
679  	kfree(container_of(kref, struct fuse_io_priv, refcnt));
680  }
681  
682  static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
683  {
684  	if (io->err)
685  		return io->err;
686  
687  	if (io->bytes >= 0 && io->write)
688  		return -EIO;
689  
690  	return io->bytes < 0 ? io->size : io->bytes;
691  }
692  
693  /*
694   * In case of short read, the caller sets 'pos' to the position of
695   * actual end of fuse request in IO request. Otherwise, if bytes_requested
696   * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1.
697   *
698   * An example:
699   * User requested DIO read of 64K. It was split into two 32K fuse requests,
700   * both submitted asynchronously. The first of them was ACKed by userspace as
701   * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The
702   * second request was ACKed as short, e.g. only 1K was read, resulting in
703   * pos == 33K.
704   *
705   * Thus, when all fuse requests are completed, the minimal non-negative 'pos'
706   * will be equal to the length of the longest contiguous fragment of
707   * transferred data starting from the beginning of IO request.
708   */
709  static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
710  {
711  	int left;
712  
713  	spin_lock(&io->lock);
714  	if (err)
715  		io->err = io->err ? : err;
716  	else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes))
717  		io->bytes = pos;
718  
719  	left = --io->reqs;
720  	if (!left && io->blocking)
721  		complete(io->done);
722  	spin_unlock(&io->lock);
723  
724  	if (!left && !io->blocking) {
725  		ssize_t res = fuse_get_res_by_io(io);
726  
727  		if (res >= 0) {
728  			struct inode *inode = file_inode(io->iocb->ki_filp);
729  			struct fuse_conn *fc = get_fuse_conn(inode);
730  			struct fuse_inode *fi = get_fuse_inode(inode);
731  
732  			spin_lock(&fi->lock);
733  			fi->attr_version = atomic64_inc_return(&fc->attr_version);
734  			spin_unlock(&fi->lock);
735  		}
736  
737  		io->iocb->ki_complete(io->iocb, res);
738  	}
739  
740  	kref_put(&io->refcnt, fuse_io_release);
741  }
742  
743  static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
744  					  unsigned int npages)
745  {
746  	struct fuse_io_args *ia;
747  
748  	ia = kzalloc(sizeof(*ia), GFP_KERNEL);
749  	if (ia) {
750  		ia->io = io;
751  		ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL,
752  						&ia->ap.descs);
753  		if (!ia->ap.pages) {
754  			kfree(ia);
755  			ia = NULL;
756  		}
757  	}
758  	return ia;
759  }
760  
761  static void fuse_io_free(struct fuse_io_args *ia)
762  {
763  	kfree(ia->ap.pages);
764  	kfree(ia);
765  }
766  
767  static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
768  				  int err)
769  {
770  	struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
771  	struct fuse_io_priv *io = ia->io;
772  	ssize_t pos = -1;
773  
774  	fuse_release_user_pages(&ia->ap, io->should_dirty);
775  
776  	if (err) {
777  		/* Nothing */
778  	} else if (io->write) {
779  		if (ia->write.out.size > ia->write.in.size) {
780  			err = -EIO;
781  		} else if (ia->write.in.size != ia->write.out.size) {
782  			pos = ia->write.in.offset - io->offset +
783  				ia->write.out.size;
784  		}
785  	} else {
786  		u32 outsize = args->out_args[0].size;
787  
788  		if (ia->read.in.size != outsize)
789  			pos = ia->read.in.offset - io->offset + outsize;
790  	}
791  
792  	fuse_aio_complete(io, err, pos);
793  	fuse_io_free(ia);
794  }
795  
796  static ssize_t fuse_async_req_send(struct fuse_mount *fm,
797  				   struct fuse_io_args *ia, size_t num_bytes)
798  {
799  	ssize_t err;
800  	struct fuse_io_priv *io = ia->io;
801  
802  	spin_lock(&io->lock);
803  	kref_get(&io->refcnt);
804  	io->size += num_bytes;
805  	io->reqs++;
806  	spin_unlock(&io->lock);
807  
808  	ia->ap.args.end = fuse_aio_complete_req;
809  	ia->ap.args.may_block = io->should_dirty;
810  	err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL);
811  	if (err)
812  		fuse_aio_complete_req(fm, &ia->ap.args, err);
813  
814  	return num_bytes;
815  }
816  
817  static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count,
818  			      fl_owner_t owner)
819  {
820  	struct file *file = ia->io->iocb->ki_filp;
821  	struct fuse_file *ff = file->private_data;
822  	struct fuse_mount *fm = ff->fm;
823  
824  	fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
825  	if (owner != NULL) {
826  		ia->read.in.read_flags |= FUSE_READ_LOCKOWNER;
827  		ia->read.in.lock_owner = fuse_lock_owner_id(fm->fc, owner);
828  	}
829  
830  	if (ia->io->async)
831  		return fuse_async_req_send(fm, ia, count);
832  
833  	return fuse_simple_request(fm, &ia->ap.args);
834  }
835  
836  static void fuse_read_update_size(struct inode *inode, loff_t size,
837  				  u64 attr_ver)
838  {
839  	struct fuse_conn *fc = get_fuse_conn(inode);
840  	struct fuse_inode *fi = get_fuse_inode(inode);
841  
842  	spin_lock(&fi->lock);
843  	if (attr_ver >= fi->attr_version && size < inode->i_size &&
844  	    !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
845  		fi->attr_version = atomic64_inc_return(&fc->attr_version);
846  		i_size_write(inode, size);
847  	}
848  	spin_unlock(&fi->lock);
849  }
850  
851  static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
852  			    struct fuse_args_pages *ap)
853  {
854  	struct fuse_conn *fc = get_fuse_conn(inode);
855  
856  	/*
857  	 * If writeback_cache is enabled, a short read means there's a hole in
858  	 * the file.  Some data after the hole is in page cache, but has not
859  	 * reached the client fs yet.  So the hole is not present there.
860  	 */
861  	if (!fc->writeback_cache) {
862  		loff_t pos = page_offset(ap->pages[0]) + num_read;
863  		fuse_read_update_size(inode, pos, attr_ver);
864  	}
865  }
866  
867  static int fuse_do_readpage(struct file *file, struct page *page)
868  {
869  	struct inode *inode = page->mapping->host;
870  	struct fuse_mount *fm = get_fuse_mount(inode);
871  	loff_t pos = page_offset(page);
872  	struct fuse_page_desc desc = { .length = PAGE_SIZE };
873  	struct fuse_io_args ia = {
874  		.ap.args.page_zeroing = true,
875  		.ap.args.out_pages = true,
876  		.ap.num_pages = 1,
877  		.ap.pages = &page,
878  		.ap.descs = &desc,
879  	};
880  	ssize_t res;
881  	u64 attr_ver;
882  
883  	/*
884  	 * Page writeback can extend beyond the lifetime of the
885  	 * page-cache page, so make sure we read a properly synced
886  	 * page.
887  	 */
888  	fuse_wait_on_page_writeback(inode, page->index);
889  
890  	attr_ver = fuse_get_attr_version(fm->fc);
891  
892  	/* Don't overflow end offset */
893  	if (pos + (desc.length - 1) == LLONG_MAX)
894  		desc.length--;
895  
896  	fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ);
897  	res = fuse_simple_request(fm, &ia.ap.args);
898  	if (res < 0)
899  		return res;
900  	/*
901  	 * Short read means EOF.  If file size is larger, truncate it
902  	 */
903  	if (res < desc.length)
904  		fuse_short_read(inode, attr_ver, res, &ia.ap);
905  
906  	SetPageUptodate(page);
907  
908  	return 0;
909  }
910  
911  static int fuse_read_folio(struct file *file, struct folio *folio)
912  {
913  	struct page *page = &folio->page;
914  	struct inode *inode = page->mapping->host;
915  	int err;
916  
917  	err = -EIO;
918  	if (fuse_is_bad(inode))
919  		goto out;
920  
921  	err = fuse_do_readpage(file, page);
922  	fuse_invalidate_atime(inode);
923   out:
924  	unlock_page(page);
925  	return err;
926  }
927  
928  static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
929  			       int err)
930  {
931  	int i;
932  	struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
933  	struct fuse_args_pages *ap = &ia->ap;
934  	size_t count = ia->read.in.size;
935  	size_t num_read = args->out_args[0].size;
936  	struct address_space *mapping = NULL;
937  
938  	for (i = 0; mapping == NULL && i < ap->num_pages; i++)
939  		mapping = ap->pages[i]->mapping;
940  
941  	if (mapping) {
942  		struct inode *inode = mapping->host;
943  
944  		/*
945  		 * Short read means EOF. If file size is larger, truncate it
946  		 */
947  		if (!err && num_read < count)
948  			fuse_short_read(inode, ia->read.attr_ver, num_read, ap);
949  
950  		fuse_invalidate_atime(inode);
951  	}
952  
953  	for (i = 0; i < ap->num_pages; i++) {
954  		struct page *page = ap->pages[i];
955  
956  		if (!err)
957  			SetPageUptodate(page);
958  		else
959  			SetPageError(page);
960  		unlock_page(page);
961  		put_page(page);
962  	}
963  	if (ia->ff)
964  		fuse_file_put(ia->ff, false, false);
965  
966  	fuse_io_free(ia);
967  }
968  
969  static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
970  {
971  	struct fuse_file *ff = file->private_data;
972  	struct fuse_mount *fm = ff->fm;
973  	struct fuse_args_pages *ap = &ia->ap;
974  	loff_t pos = page_offset(ap->pages[0]);
975  	size_t count = ap->num_pages << PAGE_SHIFT;
976  	ssize_t res;
977  	int err;
978  
979  	ap->args.out_pages = true;
980  	ap->args.page_zeroing = true;
981  	ap->args.page_replace = true;
982  
983  	/* Don't overflow end offset */
984  	if (pos + (count - 1) == LLONG_MAX) {
985  		count--;
986  		ap->descs[ap->num_pages - 1].length--;
987  	}
988  	WARN_ON((loff_t) (pos + count) < 0);
989  
990  	fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
991  	ia->read.attr_ver = fuse_get_attr_version(fm->fc);
992  	if (fm->fc->async_read) {
993  		ia->ff = fuse_file_get(ff);
994  		ap->args.end = fuse_readpages_end;
995  		err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
996  		if (!err)
997  			return;
998  	} else {
999  		res = fuse_simple_request(fm, &ap->args);
1000  		err = res < 0 ? res : 0;
1001  	}
1002  	fuse_readpages_end(fm, &ap->args, err);
1003  }
1004  
1005  static void fuse_readahead(struct readahead_control *rac)
1006  {
1007  	struct inode *inode = rac->mapping->host;
1008  	struct fuse_conn *fc = get_fuse_conn(inode);
1009  	unsigned int i, max_pages, nr_pages = 0;
1010  
1011  	if (fuse_is_bad(inode))
1012  		return;
1013  
1014  	max_pages = min_t(unsigned int, fc->max_pages,
1015  			fc->max_read / PAGE_SIZE);
1016  
1017  	for (;;) {
1018  		struct fuse_io_args *ia;
1019  		struct fuse_args_pages *ap;
1020  
1021  		if (fc->num_background >= fc->congestion_threshold &&
1022  		    rac->ra->async_size >= readahead_count(rac))
1023  			/*
1024  			 * Congested and only async pages left, so skip the
1025  			 * rest.
1026  			 */
1027  			break;
1028  
1029  		nr_pages = readahead_count(rac) - nr_pages;
1030  		if (nr_pages > max_pages)
1031  			nr_pages = max_pages;
1032  		if (nr_pages == 0)
1033  			break;
1034  		ia = fuse_io_alloc(NULL, nr_pages);
1035  		if (!ia)
1036  			return;
1037  		ap = &ia->ap;
1038  		nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
1039  		for (i = 0; i < nr_pages; i++) {
1040  			fuse_wait_on_page_writeback(inode,
1041  						    readahead_index(rac) + i);
1042  			ap->descs[i].length = PAGE_SIZE;
1043  		}
1044  		ap->num_pages = nr_pages;
1045  		fuse_send_readpages(ia, rac->file);
1046  	}
1047  }
1048  
1049  static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
1050  {
1051  	struct inode *inode = iocb->ki_filp->f_mapping->host;
1052  	struct fuse_conn *fc = get_fuse_conn(inode);
1053  
1054  	/*
1055  	 * In auto invalidate mode, always update attributes on read.
1056  	 * Otherwise, only update if we attempt to read past EOF (to ensure
1057  	 * i_size is up to date).
1058  	 */
1059  	if (fc->auto_inval_data ||
1060  	    (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) {
1061  		int err;
1062  		err = fuse_update_attributes(inode, iocb->ki_filp, STATX_SIZE);
1063  		if (err)
1064  			return err;
1065  	}
1066  
1067  	return generic_file_read_iter(iocb, to);
1068  }
1069  
1070  static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
1071  				 loff_t pos, size_t count)
1072  {
1073  	struct fuse_args *args = &ia->ap.args;
1074  
1075  	ia->write.in.fh = ff->fh;
1076  	ia->write.in.offset = pos;
1077  	ia->write.in.size = count;
1078  	args->opcode = FUSE_WRITE;
1079  	args->nodeid = ff->nodeid;
1080  	args->in_numargs = 2;
1081  	if (ff->fm->fc->minor < 9)
1082  		args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
1083  	else
1084  		args->in_args[0].size = sizeof(ia->write.in);
1085  	args->in_args[0].value = &ia->write.in;
1086  	args->in_args[1].size = count;
1087  	args->out_numargs = 1;
1088  	args->out_args[0].size = sizeof(ia->write.out);
1089  	args->out_args[0].value = &ia->write.out;
1090  }
1091  
1092  static unsigned int fuse_write_flags(struct kiocb *iocb)
1093  {
1094  	unsigned int flags = iocb->ki_filp->f_flags;
1095  
1096  	if (iocb_is_dsync(iocb))
1097  		flags |= O_DSYNC;
1098  	if (iocb->ki_flags & IOCB_SYNC)
1099  		flags |= O_SYNC;
1100  
1101  	return flags;
1102  }
1103  
1104  static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
1105  			       size_t count, fl_owner_t owner)
1106  {
1107  	struct kiocb *iocb = ia->io->iocb;
1108  	struct file *file = iocb->ki_filp;
1109  	struct fuse_file *ff = file->private_data;
1110  	struct fuse_mount *fm = ff->fm;
1111  	struct fuse_write_in *inarg = &ia->write.in;
1112  	ssize_t err;
1113  
1114  	fuse_write_args_fill(ia, ff, pos, count);
1115  	inarg->flags = fuse_write_flags(iocb);
1116  	if (owner != NULL) {
1117  		inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
1118  		inarg->lock_owner = fuse_lock_owner_id(fm->fc, owner);
1119  	}
1120  
1121  	if (ia->io->async)
1122  		return fuse_async_req_send(fm, ia, count);
1123  
1124  	err = fuse_simple_request(fm, &ia->ap.args);
1125  	if (!err && ia->write.out.size > count)
1126  		err = -EIO;
1127  
1128  	return err ?: ia->write.out.size;
1129  }
1130  
1131  bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written)
1132  {
1133  	struct fuse_conn *fc = get_fuse_conn(inode);
1134  	struct fuse_inode *fi = get_fuse_inode(inode);
1135  	bool ret = false;
1136  
1137  	spin_lock(&fi->lock);
1138  	fi->attr_version = atomic64_inc_return(&fc->attr_version);
1139  	if (written > 0 && pos > inode->i_size) {
1140  		i_size_write(inode, pos);
1141  		ret = true;
1142  	}
1143  	spin_unlock(&fi->lock);
1144  
1145  	fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
1146  
1147  	return ret;
1148  }
1149  
1150  static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
1151  				     struct kiocb *iocb, struct inode *inode,
1152  				     loff_t pos, size_t count)
1153  {
1154  	struct fuse_args_pages *ap = &ia->ap;
1155  	struct file *file = iocb->ki_filp;
1156  	struct fuse_file *ff = file->private_data;
1157  	struct fuse_mount *fm = ff->fm;
1158  	unsigned int offset, i;
1159  	bool short_write;
1160  	int err;
1161  
1162  	for (i = 0; i < ap->num_pages; i++)
1163  		fuse_wait_on_page_writeback(inode, ap->pages[i]->index);
1164  
1165  	fuse_write_args_fill(ia, ff, pos, count);
1166  	ia->write.in.flags = fuse_write_flags(iocb);
1167  	if (fm->fc->handle_killpriv_v2 && !capable(CAP_FSETID))
1168  		ia->write.in.write_flags |= FUSE_WRITE_KILL_SUIDGID;
1169  
1170  	err = fuse_simple_request(fm, &ap->args);
1171  	if (!err && ia->write.out.size > count)
1172  		err = -EIO;
1173  
1174  	short_write = ia->write.out.size < count;
1175  	offset = ap->descs[0].offset;
1176  	count = ia->write.out.size;
1177  	for (i = 0; i < ap->num_pages; i++) {
1178  		struct page *page = ap->pages[i];
1179  
1180  		if (err) {
1181  			ClearPageUptodate(page);
1182  		} else {
1183  			if (count >= PAGE_SIZE - offset)
1184  				count -= PAGE_SIZE - offset;
1185  			else {
1186  				if (short_write)
1187  					ClearPageUptodate(page);
1188  				count = 0;
1189  			}
1190  			offset = 0;
1191  		}
1192  		if (ia->write.page_locked && (i == ap->num_pages - 1))
1193  			unlock_page(page);
1194  		put_page(page);
1195  	}
1196  
1197  	return err;
1198  }
1199  
1200  static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
1201  				     struct address_space *mapping,
1202  				     struct iov_iter *ii, loff_t pos,
1203  				     unsigned int max_pages)
1204  {
1205  	struct fuse_args_pages *ap = &ia->ap;
1206  	struct fuse_conn *fc = get_fuse_conn(mapping->host);
1207  	unsigned offset = pos & (PAGE_SIZE - 1);
1208  	size_t count = 0;
1209  	int err;
1210  
1211  	ap->args.in_pages = true;
1212  	ap->descs[0].offset = offset;
1213  
1214  	do {
1215  		size_t tmp;
1216  		struct page *page;
1217  		pgoff_t index = pos >> PAGE_SHIFT;
1218  		size_t bytes = min_t(size_t, PAGE_SIZE - offset,
1219  				     iov_iter_count(ii));
1220  
1221  		bytes = min_t(size_t, bytes, fc->max_write - count);
1222  
1223   again:
1224  		err = -EFAULT;
1225  		if (fault_in_iov_iter_readable(ii, bytes))
1226  			break;
1227  
1228  		err = -ENOMEM;
1229  		page = grab_cache_page_write_begin(mapping, index);
1230  		if (!page)
1231  			break;
1232  
1233  		if (mapping_writably_mapped(mapping))
1234  			flush_dcache_page(page);
1235  
1236  		tmp = copy_page_from_iter_atomic(page, offset, bytes, ii);
1237  		flush_dcache_page(page);
1238  
1239  		if (!tmp) {
1240  			unlock_page(page);
1241  			put_page(page);
1242  			goto again;
1243  		}
1244  
1245  		err = 0;
1246  		ap->pages[ap->num_pages] = page;
1247  		ap->descs[ap->num_pages].length = tmp;
1248  		ap->num_pages++;
1249  
1250  		count += tmp;
1251  		pos += tmp;
1252  		offset += tmp;
1253  		if (offset == PAGE_SIZE)
1254  			offset = 0;
1255  
1256  		/* If we copied full page, mark it uptodate */
1257  		if (tmp == PAGE_SIZE)
1258  			SetPageUptodate(page);
1259  
1260  		if (PageUptodate(page)) {
1261  			unlock_page(page);
1262  		} else {
1263  			ia->write.page_locked = true;
1264  			break;
1265  		}
1266  		if (!fc->big_writes)
1267  			break;
1268  	} while (iov_iter_count(ii) && count < fc->max_write &&
1269  		 ap->num_pages < max_pages && offset == 0);
1270  
1271  	return count > 0 ? count : err;
1272  }
1273  
1274  static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
1275  				     unsigned int max_pages)
1276  {
1277  	return min_t(unsigned int,
1278  		     ((pos + len - 1) >> PAGE_SHIFT) -
1279  		     (pos >> PAGE_SHIFT) + 1,
1280  		     max_pages);
1281  }
1282  
1283  static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
1284  {
1285  	struct address_space *mapping = iocb->ki_filp->f_mapping;
1286  	struct inode *inode = mapping->host;
1287  	struct fuse_conn *fc = get_fuse_conn(inode);
1288  	struct fuse_inode *fi = get_fuse_inode(inode);
1289  	loff_t pos = iocb->ki_pos;
1290  	int err = 0;
1291  	ssize_t res = 0;
1292  
1293  	if (inode->i_size < pos + iov_iter_count(ii))
1294  		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1295  
1296  	do {
1297  		ssize_t count;
1298  		struct fuse_io_args ia = {};
1299  		struct fuse_args_pages *ap = &ia.ap;
1300  		unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
1301  						      fc->max_pages);
1302  
1303  		ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs);
1304  		if (!ap->pages) {
1305  			err = -ENOMEM;
1306  			break;
1307  		}
1308  
1309  		count = fuse_fill_write_pages(&ia, mapping, ii, pos, nr_pages);
1310  		if (count <= 0) {
1311  			err = count;
1312  		} else {
1313  			err = fuse_send_write_pages(&ia, iocb, inode,
1314  						    pos, count);
1315  			if (!err) {
1316  				size_t num_written = ia.write.out.size;
1317  
1318  				res += num_written;
1319  				pos += num_written;
1320  
1321  				/* break out of the loop on short write */
1322  				if (num_written != count)
1323  					err = -EIO;
1324  			}
1325  		}
1326  		kfree(ap->pages);
1327  	} while (!err && iov_iter_count(ii));
1328  
1329  	fuse_write_update_attr(inode, pos, res);
1330  	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1331  
1332  	if (!res)
1333  		return err;
1334  	iocb->ki_pos += res;
1335  	return res;
1336  }
1337  
1338  static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
1339  {
1340  	struct file *file = iocb->ki_filp;
1341  	struct address_space *mapping = file->f_mapping;
1342  	ssize_t written = 0;
1343  	struct inode *inode = mapping->host;
1344  	ssize_t err;
1345  	struct fuse_conn *fc = get_fuse_conn(inode);
1346  
1347  	if (fc->writeback_cache) {
1348  		/* Update size (EOF optimization) and mode (SUID clearing) */
1349  		err = fuse_update_attributes(mapping->host, file,
1350  					     STATX_SIZE | STATX_MODE);
1351  		if (err)
1352  			return err;
1353  
1354  		if (fc->handle_killpriv_v2 &&
1355  		    setattr_should_drop_suidgid(&nop_mnt_idmap,
1356  						file_inode(file))) {
1357  			goto writethrough;
1358  		}
1359  
1360  		return generic_file_write_iter(iocb, from);
1361  	}
1362  
1363  writethrough:
1364  	inode_lock(inode);
1365  
1366  	err = generic_write_checks(iocb, from);
1367  	if (err <= 0)
1368  		goto out;
1369  
1370  	err = file_remove_privs(file);
1371  	if (err)
1372  		goto out;
1373  
1374  	err = file_update_time(file);
1375  	if (err)
1376  		goto out;
1377  
1378  	if (iocb->ki_flags & IOCB_DIRECT) {
1379  		written = generic_file_direct_write(iocb, from);
1380  		if (written < 0 || !iov_iter_count(from))
1381  			goto out;
1382  		written = direct_write_fallback(iocb, from, written,
1383  				fuse_perform_write(iocb, from));
1384  	} else {
1385  		written = fuse_perform_write(iocb, from);
1386  	}
1387  out:
1388  	inode_unlock(inode);
1389  	if (written > 0)
1390  		written = generic_write_sync(iocb, written);
1391  
1392  	return written ? written : err;
1393  }
1394  
1395  static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
1396  {
1397  	return (unsigned long)iter_iov(ii)->iov_base + ii->iov_offset;
1398  }
1399  
1400  static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
1401  					size_t max_size)
1402  {
1403  	return min(iov_iter_single_seg_count(ii), max_size);
1404  }
1405  
1406  static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
1407  			       size_t *nbytesp, int write,
1408  			       unsigned int max_pages)
1409  {
1410  	size_t nbytes = 0;  /* # bytes already packed in req */
1411  	ssize_t ret = 0;
1412  
1413  	/* Special case for kernel I/O: can copy directly into the buffer */
1414  	if (iov_iter_is_kvec(ii)) {
1415  		unsigned long user_addr = fuse_get_user_addr(ii);
1416  		size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
1417  
1418  		if (write)
1419  			ap->args.in_args[1].value = (void *) user_addr;
1420  		else
1421  			ap->args.out_args[0].value = (void *) user_addr;
1422  
1423  		iov_iter_advance(ii, frag_size);
1424  		*nbytesp = frag_size;
1425  		return 0;
1426  	}
1427  
1428  	while (nbytes < *nbytesp && ap->num_pages < max_pages) {
1429  		unsigned npages;
1430  		size_t start;
1431  		ret = iov_iter_get_pages2(ii, &ap->pages[ap->num_pages],
1432  					*nbytesp - nbytes,
1433  					max_pages - ap->num_pages,
1434  					&start);
1435  		if (ret < 0)
1436  			break;
1437  
1438  		nbytes += ret;
1439  
1440  		ret += start;
1441  		npages = DIV_ROUND_UP(ret, PAGE_SIZE);
1442  
1443  		ap->descs[ap->num_pages].offset = start;
1444  		fuse_page_descs_length_init(ap->descs, ap->num_pages, npages);
1445  
1446  		ap->num_pages += npages;
1447  		ap->descs[ap->num_pages - 1].length -=
1448  			(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
1449  	}
1450  
1451  	ap->args.user_pages = true;
1452  	if (write)
1453  		ap->args.in_pages = true;
1454  	else
1455  		ap->args.out_pages = true;
1456  
1457  	*nbytesp = nbytes;
1458  
1459  	return ret < 0 ? ret : 0;
1460  }
1461  
1462  ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
1463  		       loff_t *ppos, int flags)
1464  {
1465  	int write = flags & FUSE_DIO_WRITE;
1466  	int cuse = flags & FUSE_DIO_CUSE;
1467  	struct file *file = io->iocb->ki_filp;
1468  	struct inode *inode = file->f_mapping->host;
1469  	struct fuse_file *ff = file->private_data;
1470  	struct fuse_conn *fc = ff->fm->fc;
1471  	size_t nmax = write ? fc->max_write : fc->max_read;
1472  	loff_t pos = *ppos;
1473  	size_t count = iov_iter_count(iter);
1474  	pgoff_t idx_from = pos >> PAGE_SHIFT;
1475  	pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT;
1476  	ssize_t res = 0;
1477  	int err = 0;
1478  	struct fuse_io_args *ia;
1479  	unsigned int max_pages;
1480  
1481  	max_pages = iov_iter_npages(iter, fc->max_pages);
1482  	ia = fuse_io_alloc(io, max_pages);
1483  	if (!ia)
1484  		return -ENOMEM;
1485  
1486  	if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
1487  		if (!write)
1488  			inode_lock(inode);
1489  		fuse_sync_writes(inode);
1490  		if (!write)
1491  			inode_unlock(inode);
1492  	}
1493  
1494  	io->should_dirty = !write && user_backed_iter(iter);
1495  	while (count) {
1496  		ssize_t nres;
1497  		fl_owner_t owner = current->files;
1498  		size_t nbytes = min(count, nmax);
1499  
1500  		err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
1501  					  max_pages);
1502  		if (err && !nbytes)
1503  			break;
1504  
1505  		if (write) {
1506  			if (!capable(CAP_FSETID))
1507  				ia->write.in.write_flags |= FUSE_WRITE_KILL_SUIDGID;
1508  
1509  			nres = fuse_send_write(ia, pos, nbytes, owner);
1510  		} else {
1511  			nres = fuse_send_read(ia, pos, nbytes, owner);
1512  		}
1513  
1514  		if (!io->async || nres < 0) {
1515  			fuse_release_user_pages(&ia->ap, io->should_dirty);
1516  			fuse_io_free(ia);
1517  		}
1518  		ia = NULL;
1519  		if (nres < 0) {
1520  			iov_iter_revert(iter, nbytes);
1521  			err = nres;
1522  			break;
1523  		}
1524  		WARN_ON(nres > nbytes);
1525  
1526  		count -= nres;
1527  		res += nres;
1528  		pos += nres;
1529  		if (nres != nbytes) {
1530  			iov_iter_revert(iter, nbytes - nres);
1531  			break;
1532  		}
1533  		if (count) {
1534  			max_pages = iov_iter_npages(iter, fc->max_pages);
1535  			ia = fuse_io_alloc(io, max_pages);
1536  			if (!ia)
1537  				break;
1538  		}
1539  	}
1540  	if (ia)
1541  		fuse_io_free(ia);
1542  	if (res > 0)
1543  		*ppos = pos;
1544  
1545  	return res > 0 ? res : err;
1546  }
1547  EXPORT_SYMBOL_GPL(fuse_direct_io);
1548  
1549  static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
1550  				  struct iov_iter *iter,
1551  				  loff_t *ppos)
1552  {
1553  	ssize_t res;
1554  	struct inode *inode = file_inode(io->iocb->ki_filp);
1555  
1556  	res = fuse_direct_io(io, iter, ppos, 0);
1557  
1558  	fuse_invalidate_atime(inode);
1559  
1560  	return res;
1561  }
1562  
1563  static ssize_t fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
1564  
1565  static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
1566  {
1567  	ssize_t res;
1568  
1569  	if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
1570  		res = fuse_direct_IO(iocb, to);
1571  	} else {
1572  		struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
1573  
1574  		res = __fuse_direct_read(&io, to, &iocb->ki_pos);
1575  	}
1576  
1577  	return res;
1578  }
1579  
1580  static bool fuse_direct_write_extending_i_size(struct kiocb *iocb,
1581  					       struct iov_iter *iter)
1582  {
1583  	struct inode *inode = file_inode(iocb->ki_filp);
1584  
1585  	return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode);
1586  }
1587  
1588  static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
1589  {
1590  	struct inode *inode = file_inode(iocb->ki_filp);
1591  	struct file *file = iocb->ki_filp;
1592  	struct fuse_file *ff = file->private_data;
1593  	struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
1594  	ssize_t res;
1595  	bool exclusive_lock =
1596  		!(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) ||
1597  		iocb->ki_flags & IOCB_APPEND ||
1598  		fuse_direct_write_extending_i_size(iocb, from);
1599  
1600  	/*
1601  	 * Take exclusive lock if
1602  	 * - Parallel direct writes are disabled - a user space decision
1603  	 * - Parallel direct writes are enabled and i_size is being extended.
1604  	 *   This might not be needed at all, but needs further investigation.
1605  	 */
1606  	if (exclusive_lock)
1607  		inode_lock(inode);
1608  	else {
1609  		inode_lock_shared(inode);
1610  
1611  		/* A race with truncate might have come up as the decision for
1612  		 * the lock type was done without holding the lock, check again.
1613  		 */
1614  		if (fuse_direct_write_extending_i_size(iocb, from)) {
1615  			inode_unlock_shared(inode);
1616  			inode_lock(inode);
1617  			exclusive_lock = true;
1618  		}
1619  	}
1620  
1621  	res = generic_write_checks(iocb, from);
1622  	if (res > 0) {
1623  		if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
1624  			res = fuse_direct_IO(iocb, from);
1625  		} else {
1626  			res = fuse_direct_io(&io, from, &iocb->ki_pos,
1627  					     FUSE_DIO_WRITE);
1628  			fuse_write_update_attr(inode, iocb->ki_pos, res);
1629  		}
1630  	}
1631  	if (exclusive_lock)
1632  		inode_unlock(inode);
1633  	else
1634  		inode_unlock_shared(inode);
1635  
1636  	return res;
1637  }
1638  
1639  static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1640  {
1641  	struct file *file = iocb->ki_filp;
1642  	struct fuse_file *ff = file->private_data;
1643  	struct inode *inode = file_inode(file);
1644  
1645  	if (fuse_is_bad(inode))
1646  		return -EIO;
1647  
1648  	if (FUSE_IS_DAX(inode))
1649  		return fuse_dax_read_iter(iocb, to);
1650  
1651  	if (!(ff->open_flags & FOPEN_DIRECT_IO))
1652  		return fuse_cache_read_iter(iocb, to);
1653  	else
1654  		return fuse_direct_read_iter(iocb, to);
1655  }
1656  
1657  static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1658  {
1659  	struct file *file = iocb->ki_filp;
1660  	struct fuse_file *ff = file->private_data;
1661  	struct inode *inode = file_inode(file);
1662  
1663  	if (fuse_is_bad(inode))
1664  		return -EIO;
1665  
1666  	if (FUSE_IS_DAX(inode))
1667  		return fuse_dax_write_iter(iocb, from);
1668  
1669  	if (!(ff->open_flags & FOPEN_DIRECT_IO))
1670  		return fuse_cache_write_iter(iocb, from);
1671  	else
1672  		return fuse_direct_write_iter(iocb, from);
1673  }
1674  
1675  static void fuse_writepage_free(struct fuse_writepage_args *wpa)
1676  {
1677  	struct fuse_args_pages *ap = &wpa->ia.ap;
1678  	int i;
1679  
1680  	if (wpa->bucket)
1681  		fuse_sync_bucket_dec(wpa->bucket);
1682  
1683  	for (i = 0; i < ap->num_pages; i++)
1684  		__free_page(ap->pages[i]);
1685  
1686  	if (wpa->ia.ff)
1687  		fuse_file_put(wpa->ia.ff, false, false);
1688  
1689  	kfree(ap->pages);
1690  	kfree(wpa);
1691  }
1692  
1693  static void fuse_writepage_finish(struct fuse_mount *fm,
1694  				  struct fuse_writepage_args *wpa)
1695  {
1696  	struct fuse_args_pages *ap = &wpa->ia.ap;
1697  	struct inode *inode = wpa->inode;
1698  	struct fuse_inode *fi = get_fuse_inode(inode);
1699  	struct backing_dev_info *bdi = inode_to_bdi(inode);
1700  	int i;
1701  
1702  	for (i = 0; i < ap->num_pages; i++) {
1703  		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
1704  		dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
1705  		wb_writeout_inc(&bdi->wb);
1706  	}
1707  	wake_up(&fi->page_waitq);
1708  }
1709  
1710  /* Called under fi->lock, may release and reacquire it */
1711  static void fuse_send_writepage(struct fuse_mount *fm,
1712  				struct fuse_writepage_args *wpa, loff_t size)
1713  __releases(fi->lock)
1714  __acquires(fi->lock)
1715  {
1716  	struct fuse_writepage_args *aux, *next;
1717  	struct fuse_inode *fi = get_fuse_inode(wpa->inode);
1718  	struct fuse_write_in *inarg = &wpa->ia.write.in;
1719  	struct fuse_args *args = &wpa->ia.ap.args;
1720  	__u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE;
1721  	int err;
1722  
1723  	fi->writectr++;
1724  	if (inarg->offset + data_size <= size) {
1725  		inarg->size = data_size;
1726  	} else if (inarg->offset < size) {
1727  		inarg->size = size - inarg->offset;
1728  	} else {
1729  		/* Got truncated off completely */
1730  		goto out_free;
1731  	}
1732  
1733  	args->in_args[1].size = inarg->size;
1734  	args->force = true;
1735  	args->nocreds = true;
1736  
1737  	err = fuse_simple_background(fm, args, GFP_ATOMIC);
1738  	if (err == -ENOMEM) {
1739  		spin_unlock(&fi->lock);
1740  		err = fuse_simple_background(fm, args, GFP_NOFS | __GFP_NOFAIL);
1741  		spin_lock(&fi->lock);
1742  	}
1743  
1744  	/* Fails on broken connection only */
1745  	if (unlikely(err))
1746  		goto out_free;
1747  
1748  	return;
1749  
1750   out_free:
1751  	fi->writectr--;
1752  	rb_erase(&wpa->writepages_entry, &fi->writepages);
1753  	fuse_writepage_finish(fm, wpa);
1754  	spin_unlock(&fi->lock);
1755  
1756  	/* After fuse_writepage_finish() aux request list is private */
1757  	for (aux = wpa->next; aux; aux = next) {
1758  		next = aux->next;
1759  		aux->next = NULL;
1760  		fuse_writepage_free(aux);
1761  	}
1762  
1763  	fuse_writepage_free(wpa);
1764  	spin_lock(&fi->lock);
1765  }
1766  
1767  /*
1768   * If fi->writectr is positive (no truncate or fsync going on) send
1769   * all queued writepage requests.
1770   *
1771   * Called with fi->lock
1772   */
1773  void fuse_flush_writepages(struct inode *inode)
1774  __releases(fi->lock)
1775  __acquires(fi->lock)
1776  {
1777  	struct fuse_mount *fm = get_fuse_mount(inode);
1778  	struct fuse_inode *fi = get_fuse_inode(inode);
1779  	loff_t crop = i_size_read(inode);
1780  	struct fuse_writepage_args *wpa;
1781  
1782  	while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
1783  		wpa = list_entry(fi->queued_writes.next,
1784  				 struct fuse_writepage_args, queue_entry);
1785  		list_del_init(&wpa->queue_entry);
1786  		fuse_send_writepage(fm, wpa, crop);
1787  	}
1788  }
1789  
1790  static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
1791  						struct fuse_writepage_args *wpa)
1792  {
1793  	pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
1794  	pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
1795  	struct rb_node **p = &root->rb_node;
1796  	struct rb_node  *parent = NULL;
1797  
1798  	WARN_ON(!wpa->ia.ap.num_pages);
1799  	while (*p) {
1800  		struct fuse_writepage_args *curr;
1801  		pgoff_t curr_index;
1802  
1803  		parent = *p;
1804  		curr = rb_entry(parent, struct fuse_writepage_args,
1805  				writepages_entry);
1806  		WARN_ON(curr->inode != wpa->inode);
1807  		curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
1808  
1809  		if (idx_from >= curr_index + curr->ia.ap.num_pages)
1810  			p = &(*p)->rb_right;
1811  		else if (idx_to < curr_index)
1812  			p = &(*p)->rb_left;
1813  		else
1814  			return curr;
1815  	}
1816  
1817  	rb_link_node(&wpa->writepages_entry, parent, p);
1818  	rb_insert_color(&wpa->writepages_entry, root);
1819  	return NULL;
1820  }
1821  
1822  static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
1823  {
1824  	WARN_ON(fuse_insert_writeback(root, wpa));
1825  }
1826  
1827  static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
1828  			       int error)
1829  {
1830  	struct fuse_writepage_args *wpa =
1831  		container_of(args, typeof(*wpa), ia.ap.args);
1832  	struct inode *inode = wpa->inode;
1833  	struct fuse_inode *fi = get_fuse_inode(inode);
1834  	struct fuse_conn *fc = get_fuse_conn(inode);
1835  
1836  	mapping_set_error(inode->i_mapping, error);
1837  	/*
1838  	 * A writeback finished and this might have updated mtime/ctime on
1839  	 * server making local mtime/ctime stale.  Hence invalidate attrs.
1840  	 * Do this only if writeback_cache is not enabled.  If writeback_cache
1841  	 * is enabled, we trust local ctime/mtime.
1842  	 */
1843  	if (!fc->writeback_cache)
1844  		fuse_invalidate_attr_mask(inode, FUSE_STATX_MODIFY);
1845  	spin_lock(&fi->lock);
1846  	rb_erase(&wpa->writepages_entry, &fi->writepages);
1847  	while (wpa->next) {
1848  		struct fuse_mount *fm = get_fuse_mount(inode);
1849  		struct fuse_write_in *inarg = &wpa->ia.write.in;
1850  		struct fuse_writepage_args *next = wpa->next;
1851  
1852  		wpa->next = next->next;
1853  		next->next = NULL;
1854  		next->ia.ff = fuse_file_get(wpa->ia.ff);
1855  		tree_insert(&fi->writepages, next);
1856  
1857  		/*
1858  		 * Skip fuse_flush_writepages() to make it easy to crop requests
1859  		 * based on primary request size.
1860  		 *
1861  		 * 1st case (trivial): there are no concurrent activities using
1862  		 * fuse_set/release_nowrite.  Then we're on safe side because
1863  		 * fuse_flush_writepages() would call fuse_send_writepage()
1864  		 * anyway.
1865  		 *
1866  		 * 2nd case: someone called fuse_set_nowrite and it is waiting
1867  		 * now for completion of all in-flight requests.  This happens
1868  		 * rarely and no more than once per page, so this should be
1869  		 * okay.
1870  		 *
1871  		 * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
1872  		 * of fuse_set_nowrite..fuse_release_nowrite section.  The fact
1873  		 * that fuse_set_nowrite returned implies that all in-flight
1874  		 * requests were completed along with all of their secondary
1875  		 * requests.  Further primary requests are blocked by negative
1876  		 * writectr.  Hence there cannot be any in-flight requests and
1877  		 * no invocations of fuse_writepage_end() while we're in
1878  		 * fuse_set_nowrite..fuse_release_nowrite section.
1879  		 */
1880  		fuse_send_writepage(fm, next, inarg->offset + inarg->size);
1881  	}
1882  	fi->writectr--;
1883  	fuse_writepage_finish(fm, wpa);
1884  	spin_unlock(&fi->lock);
1885  	fuse_writepage_free(wpa);
1886  }
1887  
1888  static struct fuse_file *__fuse_write_file_get(struct fuse_inode *fi)
1889  {
1890  	struct fuse_file *ff;
1891  
1892  	spin_lock(&fi->lock);
1893  	ff = list_first_entry_or_null(&fi->write_files, struct fuse_file,
1894  				      write_entry);
1895  	if (ff)
1896  		fuse_file_get(ff);
1897  	spin_unlock(&fi->lock);
1898  
1899  	return ff;
1900  }
1901  
1902  static struct fuse_file *fuse_write_file_get(struct fuse_inode *fi)
1903  {
1904  	struct fuse_file *ff = __fuse_write_file_get(fi);
1905  	WARN_ON(!ff);
1906  	return ff;
1907  }
1908  
1909  int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
1910  {
1911  	struct fuse_inode *fi = get_fuse_inode(inode);
1912  	struct fuse_file *ff;
1913  	int err;
1914  
1915  	/*
1916  	 * Inode is always written before the last reference is dropped and
1917  	 * hence this should not be reached from reclaim.
1918  	 *
1919  	 * Writing back the inode from reclaim can deadlock if the request
1920  	 * processing itself needs an allocation.  Allocations triggering
1921  	 * reclaim while serving a request can't be prevented, because it can
1922  	 * involve any number of unrelated userspace processes.
1923  	 */
1924  	WARN_ON(wbc->for_reclaim);
1925  
1926  	ff = __fuse_write_file_get(fi);
1927  	err = fuse_flush_times(inode, ff);
1928  	if (ff)
1929  		fuse_file_put(ff, false, false);
1930  
1931  	return err;
1932  }
1933  
1934  static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
1935  {
1936  	struct fuse_writepage_args *wpa;
1937  	struct fuse_args_pages *ap;
1938  
1939  	wpa = kzalloc(sizeof(*wpa), GFP_NOFS);
1940  	if (wpa) {
1941  		ap = &wpa->ia.ap;
1942  		ap->num_pages = 0;
1943  		ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs);
1944  		if (!ap->pages) {
1945  			kfree(wpa);
1946  			wpa = NULL;
1947  		}
1948  	}
1949  	return wpa;
1950  
1951  }
1952  
1953  static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
1954  					 struct fuse_writepage_args *wpa)
1955  {
1956  	if (!fc->sync_fs)
1957  		return;
1958  
1959  	rcu_read_lock();
1960  	/* Prevent resurrection of dead bucket in unlikely race with syncfs */
1961  	do {
1962  		wpa->bucket = rcu_dereference(fc->curr_bucket);
1963  	} while (unlikely(!atomic_inc_not_zero(&wpa->bucket->count)));
1964  	rcu_read_unlock();
1965  }
1966  
1967  static int fuse_writepage_locked(struct page *page)
1968  {
1969  	struct address_space *mapping = page->mapping;
1970  	struct inode *inode = mapping->host;
1971  	struct fuse_conn *fc = get_fuse_conn(inode);
1972  	struct fuse_inode *fi = get_fuse_inode(inode);
1973  	struct fuse_writepage_args *wpa;
1974  	struct fuse_args_pages *ap;
1975  	struct page *tmp_page;
1976  	int error = -ENOMEM;
1977  
1978  	set_page_writeback(page);
1979  
1980  	wpa = fuse_writepage_args_alloc();
1981  	if (!wpa)
1982  		goto err;
1983  	ap = &wpa->ia.ap;
1984  
1985  	tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1986  	if (!tmp_page)
1987  		goto err_free;
1988  
1989  	error = -EIO;
1990  	wpa->ia.ff = fuse_write_file_get(fi);
1991  	if (!wpa->ia.ff)
1992  		goto err_nofile;
1993  
1994  	fuse_writepage_add_to_bucket(fc, wpa);
1995  	fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
1996  
1997  	copy_highpage(tmp_page, page);
1998  	wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
1999  	wpa->next = NULL;
2000  	ap->args.in_pages = true;
2001  	ap->num_pages = 1;
2002  	ap->pages[0] = tmp_page;
2003  	ap->descs[0].offset = 0;
2004  	ap->descs[0].length = PAGE_SIZE;
2005  	ap->args.end = fuse_writepage_end;
2006  	wpa->inode = inode;
2007  
2008  	inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
2009  	inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
2010  
2011  	spin_lock(&fi->lock);
2012  	tree_insert(&fi->writepages, wpa);
2013  	list_add_tail(&wpa->queue_entry, &fi->queued_writes);
2014  	fuse_flush_writepages(inode);
2015  	spin_unlock(&fi->lock);
2016  
2017  	end_page_writeback(page);
2018  
2019  	return 0;
2020  
2021  err_nofile:
2022  	__free_page(tmp_page);
2023  err_free:
2024  	kfree(wpa);
2025  err:
2026  	mapping_set_error(page->mapping, error);
2027  	end_page_writeback(page);
2028  	return error;
2029  }
2030  
2031  static int fuse_writepage(struct page *page, struct writeback_control *wbc)
2032  {
2033  	struct fuse_conn *fc = get_fuse_conn(page->mapping->host);
2034  	int err;
2035  
2036  	if (fuse_page_is_writeback(page->mapping->host, page->index)) {
2037  		/*
2038  		 * ->writepages() should be called for sync() and friends.  We
2039  		 * should only get here on direct reclaim and then we are
2040  		 * allowed to skip a page which is already in flight
2041  		 */
2042  		WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
2043  
2044  		redirty_page_for_writepage(wbc, page);
2045  		unlock_page(page);
2046  		return 0;
2047  	}
2048  
2049  	if (wbc->sync_mode == WB_SYNC_NONE &&
2050  	    fc->num_background >= fc->congestion_threshold)
2051  		return AOP_WRITEPAGE_ACTIVATE;
2052  
2053  	err = fuse_writepage_locked(page);
2054  	unlock_page(page);
2055  
2056  	return err;
2057  }
2058  
2059  struct fuse_fill_wb_data {
2060  	struct fuse_writepage_args *wpa;
2061  	struct fuse_file *ff;
2062  	struct inode *inode;
2063  	struct page **orig_pages;
2064  	unsigned int max_pages;
2065  };
2066  
2067  static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
2068  {
2069  	struct fuse_args_pages *ap = &data->wpa->ia.ap;
2070  	struct fuse_conn *fc = get_fuse_conn(data->inode);
2071  	struct page **pages;
2072  	struct fuse_page_desc *descs;
2073  	unsigned int npages = min_t(unsigned int,
2074  				    max_t(unsigned int, data->max_pages * 2,
2075  					  FUSE_DEFAULT_MAX_PAGES_PER_REQ),
2076  				    fc->max_pages);
2077  	WARN_ON(npages <= data->max_pages);
2078  
2079  	pages = fuse_pages_alloc(npages, GFP_NOFS, &descs);
2080  	if (!pages)
2081  		return false;
2082  
2083  	memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages);
2084  	memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages);
2085  	kfree(ap->pages);
2086  	ap->pages = pages;
2087  	ap->descs = descs;
2088  	data->max_pages = npages;
2089  
2090  	return true;
2091  }
2092  
2093  static void fuse_writepages_send(struct fuse_fill_wb_data *data)
2094  {
2095  	struct fuse_writepage_args *wpa = data->wpa;
2096  	struct inode *inode = data->inode;
2097  	struct fuse_inode *fi = get_fuse_inode(inode);
2098  	int num_pages = wpa->ia.ap.num_pages;
2099  	int i;
2100  
2101  	wpa->ia.ff = fuse_file_get(data->ff);
2102  	spin_lock(&fi->lock);
2103  	list_add_tail(&wpa->queue_entry, &fi->queued_writes);
2104  	fuse_flush_writepages(inode);
2105  	spin_unlock(&fi->lock);
2106  
2107  	for (i = 0; i < num_pages; i++)
2108  		end_page_writeback(data->orig_pages[i]);
2109  }
2110  
2111  /*
2112   * Check under fi->lock if the page is under writeback, and insert it onto the
2113   * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's
2114   * one already added for a page at this offset.  If there's none, then insert
2115   * this new request onto the auxiliary list, otherwise reuse the existing one by
2116   * swapping the new temp page with the old one.
2117   */
2118  static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
2119  			       struct page *page)
2120  {
2121  	struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
2122  	struct fuse_writepage_args *tmp;
2123  	struct fuse_writepage_args *old_wpa;
2124  	struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
2125  
2126  	WARN_ON(new_ap->num_pages != 0);
2127  	new_ap->num_pages = 1;
2128  
2129  	spin_lock(&fi->lock);
2130  	old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa);
2131  	if (!old_wpa) {
2132  		spin_unlock(&fi->lock);
2133  		return true;
2134  	}
2135  
2136  	for (tmp = old_wpa->next; tmp; tmp = tmp->next) {
2137  		pgoff_t curr_index;
2138  
2139  		WARN_ON(tmp->inode != new_wpa->inode);
2140  		curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
2141  		if (curr_index == page->index) {
2142  			WARN_ON(tmp->ia.ap.num_pages != 1);
2143  			swap(tmp->ia.ap.pages[0], new_ap->pages[0]);
2144  			break;
2145  		}
2146  	}
2147  
2148  	if (!tmp) {
2149  		new_wpa->next = old_wpa->next;
2150  		old_wpa->next = new_wpa;
2151  	}
2152  
2153  	spin_unlock(&fi->lock);
2154  
2155  	if (tmp) {
2156  		struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode);
2157  
2158  		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
2159  		dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
2160  		wb_writeout_inc(&bdi->wb);
2161  		fuse_writepage_free(new_wpa);
2162  	}
2163  
2164  	return false;
2165  }
2166  
2167  static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
2168  				     struct fuse_args_pages *ap,
2169  				     struct fuse_fill_wb_data *data)
2170  {
2171  	WARN_ON(!ap->num_pages);
2172  
2173  	/*
2174  	 * Being under writeback is unlikely but possible.  For example direct
2175  	 * read to an mmaped fuse file will set the page dirty twice; once when
2176  	 * the pages are faulted with get_user_pages(), and then after the read
2177  	 * completed.
2178  	 */
2179  	if (fuse_page_is_writeback(data->inode, page->index))
2180  		return true;
2181  
2182  	/* Reached max pages */
2183  	if (ap->num_pages == fc->max_pages)
2184  		return true;
2185  
2186  	/* Reached max write bytes */
2187  	if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write)
2188  		return true;
2189  
2190  	/* Discontinuity */
2191  	if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)
2192  		return true;
2193  
2194  	/* Need to grow the pages array?  If so, did the expansion fail? */
2195  	if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data))
2196  		return true;
2197  
2198  	return false;
2199  }
2200  
2201  static int fuse_writepages_fill(struct folio *folio,
2202  		struct writeback_control *wbc, void *_data)
2203  {
2204  	struct fuse_fill_wb_data *data = _data;
2205  	struct fuse_writepage_args *wpa = data->wpa;
2206  	struct fuse_args_pages *ap = &wpa->ia.ap;
2207  	struct inode *inode = data->inode;
2208  	struct fuse_inode *fi = get_fuse_inode(inode);
2209  	struct fuse_conn *fc = get_fuse_conn(inode);
2210  	struct page *tmp_page;
2211  	int err;
2212  
2213  	if (!data->ff) {
2214  		err = -EIO;
2215  		data->ff = fuse_write_file_get(fi);
2216  		if (!data->ff)
2217  			goto out_unlock;
2218  	}
2219  
2220  	if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) {
2221  		fuse_writepages_send(data);
2222  		data->wpa = NULL;
2223  	}
2224  
2225  	err = -ENOMEM;
2226  	tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
2227  	if (!tmp_page)
2228  		goto out_unlock;
2229  
2230  	/*
2231  	 * The page must not be redirtied until the writeout is completed
2232  	 * (i.e. userspace has sent a reply to the write request).  Otherwise
2233  	 * there could be more than one temporary page instance for each real
2234  	 * page.
2235  	 *
2236  	 * This is ensured by holding the page lock in page_mkwrite() while
2237  	 * checking fuse_page_is_writeback().  We already hold the page lock
2238  	 * since clear_page_dirty_for_io() and keep it held until we add the
2239  	 * request to the fi->writepages list and increment ap->num_pages.
2240  	 * After this fuse_page_is_writeback() will indicate that the page is
2241  	 * under writeback, so we can release the page lock.
2242  	 */
2243  	if (data->wpa == NULL) {
2244  		err = -ENOMEM;
2245  		wpa = fuse_writepage_args_alloc();
2246  		if (!wpa) {
2247  			__free_page(tmp_page);
2248  			goto out_unlock;
2249  		}
2250  		fuse_writepage_add_to_bucket(fc, wpa);
2251  
2252  		data->max_pages = 1;
2253  
2254  		ap = &wpa->ia.ap;
2255  		fuse_write_args_fill(&wpa->ia, data->ff, folio_pos(folio), 0);
2256  		wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
2257  		wpa->next = NULL;
2258  		ap->args.in_pages = true;
2259  		ap->args.end = fuse_writepage_end;
2260  		ap->num_pages = 0;
2261  		wpa->inode = inode;
2262  	}
2263  	folio_start_writeback(folio);
2264  
2265  	copy_highpage(tmp_page, &folio->page);
2266  	ap->pages[ap->num_pages] = tmp_page;
2267  	ap->descs[ap->num_pages].offset = 0;
2268  	ap->descs[ap->num_pages].length = PAGE_SIZE;
2269  	data->orig_pages[ap->num_pages] = &folio->page;
2270  
2271  	inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
2272  	inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
2273  
2274  	err = 0;
2275  	if (data->wpa) {
2276  		/*
2277  		 * Protected by fi->lock against concurrent access by
2278  		 * fuse_page_is_writeback().
2279  		 */
2280  		spin_lock(&fi->lock);
2281  		ap->num_pages++;
2282  		spin_unlock(&fi->lock);
2283  	} else if (fuse_writepage_add(wpa, &folio->page)) {
2284  		data->wpa = wpa;
2285  	} else {
2286  		folio_end_writeback(folio);
2287  	}
2288  out_unlock:
2289  	folio_unlock(folio);
2290  
2291  	return err;
2292  }
2293  
2294  static int fuse_writepages(struct address_space *mapping,
2295  			   struct writeback_control *wbc)
2296  {
2297  	struct inode *inode = mapping->host;
2298  	struct fuse_conn *fc = get_fuse_conn(inode);
2299  	struct fuse_fill_wb_data data;
2300  	int err;
2301  
2302  	err = -EIO;
2303  	if (fuse_is_bad(inode))
2304  		goto out;
2305  
2306  	if (wbc->sync_mode == WB_SYNC_NONE &&
2307  	    fc->num_background >= fc->congestion_threshold)
2308  		return 0;
2309  
2310  	data.inode = inode;
2311  	data.wpa = NULL;
2312  	data.ff = NULL;
2313  
2314  	err = -ENOMEM;
2315  	data.orig_pages = kcalloc(fc->max_pages,
2316  				  sizeof(struct page *),
2317  				  GFP_NOFS);
2318  	if (!data.orig_pages)
2319  		goto out;
2320  
2321  	err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
2322  	if (data.wpa) {
2323  		WARN_ON(!data.wpa->ia.ap.num_pages);
2324  		fuse_writepages_send(&data);
2325  	}
2326  	if (data.ff)
2327  		fuse_file_put(data.ff, false, false);
2328  
2329  	kfree(data.orig_pages);
2330  out:
2331  	return err;
2332  }
2333  
2334  /*
2335   * It's worthy to make sure that space is reserved on disk for the write,
2336   * but how to implement it without killing performance need more thinking.
2337   */
2338  static int fuse_write_begin(struct file *file, struct address_space *mapping,
2339  		loff_t pos, unsigned len, struct page **pagep, void **fsdata)
2340  {
2341  	pgoff_t index = pos >> PAGE_SHIFT;
2342  	struct fuse_conn *fc = get_fuse_conn(file_inode(file));
2343  	struct page *page;
2344  	loff_t fsize;
2345  	int err = -ENOMEM;
2346  
2347  	WARN_ON(!fc->writeback_cache);
2348  
2349  	page = grab_cache_page_write_begin(mapping, index);
2350  	if (!page)
2351  		goto error;
2352  
2353  	fuse_wait_on_page_writeback(mapping->host, page->index);
2354  
2355  	if (PageUptodate(page) || len == PAGE_SIZE)
2356  		goto success;
2357  	/*
2358  	 * Check if the start this page comes after the end of file, in which
2359  	 * case the readpage can be optimized away.
2360  	 */
2361  	fsize = i_size_read(mapping->host);
2362  	if (fsize <= (pos & PAGE_MASK)) {
2363  		size_t off = pos & ~PAGE_MASK;
2364  		if (off)
2365  			zero_user_segment(page, 0, off);
2366  		goto success;
2367  	}
2368  	err = fuse_do_readpage(file, page);
2369  	if (err)
2370  		goto cleanup;
2371  success:
2372  	*pagep = page;
2373  	return 0;
2374  
2375  cleanup:
2376  	unlock_page(page);
2377  	put_page(page);
2378  error:
2379  	return err;
2380  }
2381  
2382  static int fuse_write_end(struct file *file, struct address_space *mapping,
2383  		loff_t pos, unsigned len, unsigned copied,
2384  		struct page *page, void *fsdata)
2385  {
2386  	struct inode *inode = page->mapping->host;
2387  
2388  	/* Haven't copied anything?  Skip zeroing, size extending, dirtying. */
2389  	if (!copied)
2390  		goto unlock;
2391  
2392  	pos += copied;
2393  	if (!PageUptodate(page)) {
2394  		/* Zero any unwritten bytes at the end of the page */
2395  		size_t endoff = pos & ~PAGE_MASK;
2396  		if (endoff)
2397  			zero_user_segment(page, endoff, PAGE_SIZE);
2398  		SetPageUptodate(page);
2399  	}
2400  
2401  	if (pos > inode->i_size)
2402  		i_size_write(inode, pos);
2403  
2404  	set_page_dirty(page);
2405  
2406  unlock:
2407  	unlock_page(page);
2408  	put_page(page);
2409  
2410  	return copied;
2411  }
2412  
2413  static int fuse_launder_folio(struct folio *folio)
2414  {
2415  	int err = 0;
2416  	if (folio_clear_dirty_for_io(folio)) {
2417  		struct inode *inode = folio->mapping->host;
2418  
2419  		/* Serialize with pending writeback for the same page */
2420  		fuse_wait_on_page_writeback(inode, folio->index);
2421  		err = fuse_writepage_locked(&folio->page);
2422  		if (!err)
2423  			fuse_wait_on_page_writeback(inode, folio->index);
2424  	}
2425  	return err;
2426  }
2427  
2428  /*
2429   * Write back dirty data/metadata now (there may not be any suitable
2430   * open files later for data)
2431   */
2432  static void fuse_vma_close(struct vm_area_struct *vma)
2433  {
2434  	int err;
2435  
2436  	err = write_inode_now(vma->vm_file->f_mapping->host, 1);
2437  	mapping_set_error(vma->vm_file->f_mapping, err);
2438  }
2439  
2440  /*
2441   * Wait for writeback against this page to complete before allowing it
2442   * to be marked dirty again, and hence written back again, possibly
2443   * before the previous writepage completed.
2444   *
2445   * Block here, instead of in ->writepage(), so that the userspace fs
2446   * can only block processes actually operating on the filesystem.
2447   *
2448   * Otherwise unprivileged userspace fs would be able to block
2449   * unrelated:
2450   *
2451   * - page migration
2452   * - sync(2)
2453   * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
2454   */
2455  static vm_fault_t fuse_page_mkwrite(struct vm_fault *vmf)
2456  {
2457  	struct page *page = vmf->page;
2458  	struct inode *inode = file_inode(vmf->vma->vm_file);
2459  
2460  	file_update_time(vmf->vma->vm_file);
2461  	lock_page(page);
2462  	if (page->mapping != inode->i_mapping) {
2463  		unlock_page(page);
2464  		return VM_FAULT_NOPAGE;
2465  	}
2466  
2467  	fuse_wait_on_page_writeback(inode, page->index);
2468  	return VM_FAULT_LOCKED;
2469  }
2470  
2471  static const struct vm_operations_struct fuse_file_vm_ops = {
2472  	.close		= fuse_vma_close,
2473  	.fault		= filemap_fault,
2474  	.map_pages	= filemap_map_pages,
2475  	.page_mkwrite	= fuse_page_mkwrite,
2476  };
2477  
2478  static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
2479  {
2480  	struct fuse_file *ff = file->private_data;
2481  
2482  	/* DAX mmap is superior to direct_io mmap */
2483  	if (FUSE_IS_DAX(file_inode(file)))
2484  		return fuse_dax_mmap(file, vma);
2485  
2486  	if (ff->open_flags & FOPEN_DIRECT_IO) {
2487  		/* Can't provide the coherency needed for MAP_SHARED */
2488  		if (vma->vm_flags & VM_MAYSHARE)
2489  			return -ENODEV;
2490  
2491  		invalidate_inode_pages2(file->f_mapping);
2492  
2493  		return generic_file_mmap(file, vma);
2494  	}
2495  
2496  	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
2497  		fuse_link_write_file(file);
2498  
2499  	file_accessed(file);
2500  	vma->vm_ops = &fuse_file_vm_ops;
2501  	return 0;
2502  }
2503  
2504  static int convert_fuse_file_lock(struct fuse_conn *fc,
2505  				  const struct fuse_file_lock *ffl,
2506  				  struct file_lock *fl)
2507  {
2508  	switch (ffl->type) {
2509  	case F_UNLCK:
2510  		break;
2511  
2512  	case F_RDLCK:
2513  	case F_WRLCK:
2514  		if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX ||
2515  		    ffl->end < ffl->start)
2516  			return -EIO;
2517  
2518  		fl->fl_start = ffl->start;
2519  		fl->fl_end = ffl->end;
2520  
2521  		/*
2522  		 * Convert pid into init's pid namespace.  The locks API will
2523  		 * translate it into the caller's pid namespace.
2524  		 */
2525  		rcu_read_lock();
2526  		fl->fl_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns);
2527  		rcu_read_unlock();
2528  		break;
2529  
2530  	default:
2531  		return -EIO;
2532  	}
2533  	fl->fl_type = ffl->type;
2534  	return 0;
2535  }
2536  
2537  static void fuse_lk_fill(struct fuse_args *args, struct file *file,
2538  			 const struct file_lock *fl, int opcode, pid_t pid,
2539  			 int flock, struct fuse_lk_in *inarg)
2540  {
2541  	struct inode *inode = file_inode(file);
2542  	struct fuse_conn *fc = get_fuse_conn(inode);
2543  	struct fuse_file *ff = file->private_data;
2544  
2545  	memset(inarg, 0, sizeof(*inarg));
2546  	inarg->fh = ff->fh;
2547  	inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
2548  	inarg->lk.start = fl->fl_start;
2549  	inarg->lk.end = fl->fl_end;
2550  	inarg->lk.type = fl->fl_type;
2551  	inarg->lk.pid = pid;
2552  	if (flock)
2553  		inarg->lk_flags |= FUSE_LK_FLOCK;
2554  	args->opcode = opcode;
2555  	args->nodeid = get_node_id(inode);
2556  	args->in_numargs = 1;
2557  	args->in_args[0].size = sizeof(*inarg);
2558  	args->in_args[0].value = inarg;
2559  }
2560  
2561  static int fuse_getlk(struct file *file, struct file_lock *fl)
2562  {
2563  	struct inode *inode = file_inode(file);
2564  	struct fuse_mount *fm = get_fuse_mount(inode);
2565  	FUSE_ARGS(args);
2566  	struct fuse_lk_in inarg;
2567  	struct fuse_lk_out outarg;
2568  	int err;
2569  
2570  	fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
2571  	args.out_numargs = 1;
2572  	args.out_args[0].size = sizeof(outarg);
2573  	args.out_args[0].value = &outarg;
2574  	err = fuse_simple_request(fm, &args);
2575  	if (!err)
2576  		err = convert_fuse_file_lock(fm->fc, &outarg.lk, fl);
2577  
2578  	return err;
2579  }
2580  
2581  static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
2582  {
2583  	struct inode *inode = file_inode(file);
2584  	struct fuse_mount *fm = get_fuse_mount(inode);
2585  	FUSE_ARGS(args);
2586  	struct fuse_lk_in inarg;
2587  	int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
2588  	struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
2589  	pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns);
2590  	int err;
2591  
2592  	if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
2593  		/* NLM needs asynchronous locks, which we don't support yet */
2594  		return -ENOLCK;
2595  	}
2596  
2597  	/* Unlock on close is handled by the flush method */
2598  	if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
2599  		return 0;
2600  
2601  	fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
2602  	err = fuse_simple_request(fm, &args);
2603  
2604  	/* locking is restartable */
2605  	if (err == -EINTR)
2606  		err = -ERESTARTSYS;
2607  
2608  	return err;
2609  }
2610  
2611  static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
2612  {
2613  	struct inode *inode = file_inode(file);
2614  	struct fuse_conn *fc = get_fuse_conn(inode);
2615  	int err;
2616  
2617  	if (cmd == F_CANCELLK) {
2618  		err = 0;
2619  	} else if (cmd == F_GETLK) {
2620  		if (fc->no_lock) {
2621  			posix_test_lock(file, fl);
2622  			err = 0;
2623  		} else
2624  			err = fuse_getlk(file, fl);
2625  	} else {
2626  		if (fc->no_lock)
2627  			err = posix_lock_file(file, fl, NULL);
2628  		else
2629  			err = fuse_setlk(file, fl, 0);
2630  	}
2631  	return err;
2632  }
2633  
2634  static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
2635  {
2636  	struct inode *inode = file_inode(file);
2637  	struct fuse_conn *fc = get_fuse_conn(inode);
2638  	int err;
2639  
2640  	if (fc->no_flock) {
2641  		err = locks_lock_file_wait(file, fl);
2642  	} else {
2643  		struct fuse_file *ff = file->private_data;
2644  
2645  		/* emulate flock with POSIX locks */
2646  		ff->flock = true;
2647  		err = fuse_setlk(file, fl, 1);
2648  	}
2649  
2650  	return err;
2651  }
2652  
2653  static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
2654  {
2655  	struct inode *inode = mapping->host;
2656  	struct fuse_mount *fm = get_fuse_mount(inode);
2657  	FUSE_ARGS(args);
2658  	struct fuse_bmap_in inarg;
2659  	struct fuse_bmap_out outarg;
2660  	int err;
2661  
2662  	if (!inode->i_sb->s_bdev || fm->fc->no_bmap)
2663  		return 0;
2664  
2665  	memset(&inarg, 0, sizeof(inarg));
2666  	inarg.block = block;
2667  	inarg.blocksize = inode->i_sb->s_blocksize;
2668  	args.opcode = FUSE_BMAP;
2669  	args.nodeid = get_node_id(inode);
2670  	args.in_numargs = 1;
2671  	args.in_args[0].size = sizeof(inarg);
2672  	args.in_args[0].value = &inarg;
2673  	args.out_numargs = 1;
2674  	args.out_args[0].size = sizeof(outarg);
2675  	args.out_args[0].value = &outarg;
2676  	err = fuse_simple_request(fm, &args);
2677  	if (err == -ENOSYS)
2678  		fm->fc->no_bmap = 1;
2679  
2680  	return err ? 0 : outarg.block;
2681  }
2682  
2683  static loff_t fuse_lseek(struct file *file, loff_t offset, int whence)
2684  {
2685  	struct inode *inode = file->f_mapping->host;
2686  	struct fuse_mount *fm = get_fuse_mount(inode);
2687  	struct fuse_file *ff = file->private_data;
2688  	FUSE_ARGS(args);
2689  	struct fuse_lseek_in inarg = {
2690  		.fh = ff->fh,
2691  		.offset = offset,
2692  		.whence = whence
2693  	};
2694  	struct fuse_lseek_out outarg;
2695  	int err;
2696  
2697  	if (fm->fc->no_lseek)
2698  		goto fallback;
2699  
2700  	args.opcode = FUSE_LSEEK;
2701  	args.nodeid = ff->nodeid;
2702  	args.in_numargs = 1;
2703  	args.in_args[0].size = sizeof(inarg);
2704  	args.in_args[0].value = &inarg;
2705  	args.out_numargs = 1;
2706  	args.out_args[0].size = sizeof(outarg);
2707  	args.out_args[0].value = &outarg;
2708  	err = fuse_simple_request(fm, &args);
2709  	if (err) {
2710  		if (err == -ENOSYS) {
2711  			fm->fc->no_lseek = 1;
2712  			goto fallback;
2713  		}
2714  		return err;
2715  	}
2716  
2717  	return vfs_setpos(file, outarg.offset, inode->i_sb->s_maxbytes);
2718  
2719  fallback:
2720  	err = fuse_update_attributes(inode, file, STATX_SIZE);
2721  	if (!err)
2722  		return generic_file_llseek(file, offset, whence);
2723  	else
2724  		return err;
2725  }
2726  
2727  static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
2728  {
2729  	loff_t retval;
2730  	struct inode *inode = file_inode(file);
2731  
2732  	switch (whence) {
2733  	case SEEK_SET:
2734  	case SEEK_CUR:
2735  		 /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
2736  		retval = generic_file_llseek(file, offset, whence);
2737  		break;
2738  	case SEEK_END:
2739  		inode_lock(inode);
2740  		retval = fuse_update_attributes(inode, file, STATX_SIZE);
2741  		if (!retval)
2742  			retval = generic_file_llseek(file, offset, whence);
2743  		inode_unlock(inode);
2744  		break;
2745  	case SEEK_HOLE:
2746  	case SEEK_DATA:
2747  		inode_lock(inode);
2748  		retval = fuse_lseek(file, offset, whence);
2749  		inode_unlock(inode);
2750  		break;
2751  	default:
2752  		retval = -EINVAL;
2753  	}
2754  
2755  	return retval;
2756  }
2757  
2758  /*
2759   * All files which have been polled are linked to RB tree
2760   * fuse_conn->polled_files which is indexed by kh.  Walk the tree and
2761   * find the matching one.
2762   */
2763  static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh,
2764  					      struct rb_node **parent_out)
2765  {
2766  	struct rb_node **link = &fc->polled_files.rb_node;
2767  	struct rb_node *last = NULL;
2768  
2769  	while (*link) {
2770  		struct fuse_file *ff;
2771  
2772  		last = *link;
2773  		ff = rb_entry(last, struct fuse_file, polled_node);
2774  
2775  		if (kh < ff->kh)
2776  			link = &last->rb_left;
2777  		else if (kh > ff->kh)
2778  			link = &last->rb_right;
2779  		else
2780  			return link;
2781  	}
2782  
2783  	if (parent_out)
2784  		*parent_out = last;
2785  	return link;
2786  }
2787  
2788  /*
2789   * The file is about to be polled.  Make sure it's on the polled_files
2790   * RB tree.  Note that files once added to the polled_files tree are
2791   * not removed before the file is released.  This is because a file
2792   * polled once is likely to be polled again.
2793   */
2794  static void fuse_register_polled_file(struct fuse_conn *fc,
2795  				      struct fuse_file *ff)
2796  {
2797  	spin_lock(&fc->lock);
2798  	if (RB_EMPTY_NODE(&ff->polled_node)) {
2799  		struct rb_node **link, *parent;
2800  
2801  		link = fuse_find_polled_node(fc, ff->kh, &parent);
2802  		BUG_ON(*link);
2803  		rb_link_node(&ff->polled_node, parent, link);
2804  		rb_insert_color(&ff->polled_node, &fc->polled_files);
2805  	}
2806  	spin_unlock(&fc->lock);
2807  }
2808  
2809  __poll_t fuse_file_poll(struct file *file, poll_table *wait)
2810  {
2811  	struct fuse_file *ff = file->private_data;
2812  	struct fuse_mount *fm = ff->fm;
2813  	struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
2814  	struct fuse_poll_out outarg;
2815  	FUSE_ARGS(args);
2816  	int err;
2817  
2818  	if (fm->fc->no_poll)
2819  		return DEFAULT_POLLMASK;
2820  
2821  	poll_wait(file, &ff->poll_wait, wait);
2822  	inarg.events = mangle_poll(poll_requested_events(wait));
2823  
2824  	/*
2825  	 * Ask for notification iff there's someone waiting for it.
2826  	 * The client may ignore the flag and always notify.
2827  	 */
2828  	if (waitqueue_active(&ff->poll_wait)) {
2829  		inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY;
2830  		fuse_register_polled_file(fm->fc, ff);
2831  	}
2832  
2833  	args.opcode = FUSE_POLL;
2834  	args.nodeid = ff->nodeid;
2835  	args.in_numargs = 1;
2836  	args.in_args[0].size = sizeof(inarg);
2837  	args.in_args[0].value = &inarg;
2838  	args.out_numargs = 1;
2839  	args.out_args[0].size = sizeof(outarg);
2840  	args.out_args[0].value = &outarg;
2841  	err = fuse_simple_request(fm, &args);
2842  
2843  	if (!err)
2844  		return demangle_poll(outarg.revents);
2845  	if (err == -ENOSYS) {
2846  		fm->fc->no_poll = 1;
2847  		return DEFAULT_POLLMASK;
2848  	}
2849  	return EPOLLERR;
2850  }
2851  EXPORT_SYMBOL_GPL(fuse_file_poll);
2852  
2853  /*
2854   * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
2855   * wakes up the poll waiters.
2856   */
2857  int fuse_notify_poll_wakeup(struct fuse_conn *fc,
2858  			    struct fuse_notify_poll_wakeup_out *outarg)
2859  {
2860  	u64 kh = outarg->kh;
2861  	struct rb_node **link;
2862  
2863  	spin_lock(&fc->lock);
2864  
2865  	link = fuse_find_polled_node(fc, kh, NULL);
2866  	if (*link) {
2867  		struct fuse_file *ff;
2868  
2869  		ff = rb_entry(*link, struct fuse_file, polled_node);
2870  		wake_up_interruptible_sync(&ff->poll_wait);
2871  	}
2872  
2873  	spin_unlock(&fc->lock);
2874  	return 0;
2875  }
2876  
2877  static void fuse_do_truncate(struct file *file)
2878  {
2879  	struct inode *inode = file->f_mapping->host;
2880  	struct iattr attr;
2881  
2882  	attr.ia_valid = ATTR_SIZE;
2883  	attr.ia_size = i_size_read(inode);
2884  
2885  	attr.ia_file = file;
2886  	attr.ia_valid |= ATTR_FILE;
2887  
2888  	fuse_do_setattr(file_dentry(file), &attr, file);
2889  }
2890  
2891  static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
2892  {
2893  	return round_up(off, fc->max_pages << PAGE_SHIFT);
2894  }
2895  
2896  static ssize_t
2897  fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2898  {
2899  	DECLARE_COMPLETION_ONSTACK(wait);
2900  	ssize_t ret = 0;
2901  	struct file *file = iocb->ki_filp;
2902  	struct fuse_file *ff = file->private_data;
2903  	loff_t pos = 0;
2904  	struct inode *inode;
2905  	loff_t i_size;
2906  	size_t count = iov_iter_count(iter), shortened = 0;
2907  	loff_t offset = iocb->ki_pos;
2908  	struct fuse_io_priv *io;
2909  
2910  	pos = offset;
2911  	inode = file->f_mapping->host;
2912  	i_size = i_size_read(inode);
2913  
2914  	if ((iov_iter_rw(iter) == READ) && (offset >= i_size))
2915  		return 0;
2916  
2917  	io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
2918  	if (!io)
2919  		return -ENOMEM;
2920  	spin_lock_init(&io->lock);
2921  	kref_init(&io->refcnt);
2922  	io->reqs = 1;
2923  	io->bytes = -1;
2924  	io->size = 0;
2925  	io->offset = offset;
2926  	io->write = (iov_iter_rw(iter) == WRITE);
2927  	io->err = 0;
2928  	/*
2929  	 * By default, we want to optimize all I/Os with async request
2930  	 * submission to the client filesystem if supported.
2931  	 */
2932  	io->async = ff->fm->fc->async_dio;
2933  	io->iocb = iocb;
2934  	io->blocking = is_sync_kiocb(iocb);
2935  
2936  	/* optimization for short read */
2937  	if (io->async && !io->write && offset + count > i_size) {
2938  		iov_iter_truncate(iter, fuse_round_up(ff->fm->fc, i_size - offset));
2939  		shortened = count - iov_iter_count(iter);
2940  		count -= shortened;
2941  	}
2942  
2943  	/*
2944  	 * We cannot asynchronously extend the size of a file.
2945  	 * In such case the aio will behave exactly like sync io.
2946  	 */
2947  	if ((offset + count > i_size) && io->write)
2948  		io->blocking = true;
2949  
2950  	if (io->async && io->blocking) {
2951  		/*
2952  		 * Additional reference to keep io around after
2953  		 * calling fuse_aio_complete()
2954  		 */
2955  		kref_get(&io->refcnt);
2956  		io->done = &wait;
2957  	}
2958  
2959  	if (iov_iter_rw(iter) == WRITE) {
2960  		ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE);
2961  		fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
2962  	} else {
2963  		ret = __fuse_direct_read(io, iter, &pos);
2964  	}
2965  	iov_iter_reexpand(iter, iov_iter_count(iter) + shortened);
2966  
2967  	if (io->async) {
2968  		bool blocking = io->blocking;
2969  
2970  		fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
2971  
2972  		/* we have a non-extending, async request, so return */
2973  		if (!blocking)
2974  			return -EIOCBQUEUED;
2975  
2976  		wait_for_completion(&wait);
2977  		ret = fuse_get_res_by_io(io);
2978  	}
2979  
2980  	kref_put(&io->refcnt, fuse_io_release);
2981  
2982  	if (iov_iter_rw(iter) == WRITE) {
2983  		fuse_write_update_attr(inode, pos, ret);
2984  		/* For extending writes we already hold exclusive lock */
2985  		if (ret < 0 && offset + count > i_size)
2986  			fuse_do_truncate(file);
2987  	}
2988  
2989  	return ret;
2990  }
2991  
2992  static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
2993  {
2994  	int err = filemap_write_and_wait_range(inode->i_mapping, start, LLONG_MAX);
2995  
2996  	if (!err)
2997  		fuse_sync_writes(inode);
2998  
2999  	return err;
3000  }
3001  
3002  static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
3003  				loff_t length)
3004  {
3005  	struct fuse_file *ff = file->private_data;
3006  	struct inode *inode = file_inode(file);
3007  	struct fuse_inode *fi = get_fuse_inode(inode);
3008  	struct fuse_mount *fm = ff->fm;
3009  	FUSE_ARGS(args);
3010  	struct fuse_fallocate_in inarg = {
3011  		.fh = ff->fh,
3012  		.offset = offset,
3013  		.length = length,
3014  		.mode = mode
3015  	};
3016  	int err;
3017  	bool block_faults = FUSE_IS_DAX(inode) &&
3018  		(!(mode & FALLOC_FL_KEEP_SIZE) ||
3019  		 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)));
3020  
3021  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
3022  		     FALLOC_FL_ZERO_RANGE))
3023  		return -EOPNOTSUPP;
3024  
3025  	if (fm->fc->no_fallocate)
3026  		return -EOPNOTSUPP;
3027  
3028  	inode_lock(inode);
3029  	if (block_faults) {
3030  		filemap_invalidate_lock(inode->i_mapping);
3031  		err = fuse_dax_break_layouts(inode, 0, 0);
3032  		if (err)
3033  			goto out;
3034  	}
3035  
3036  	if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
3037  		loff_t endbyte = offset + length - 1;
3038  
3039  		err = fuse_writeback_range(inode, offset, endbyte);
3040  		if (err)
3041  			goto out;
3042  	}
3043  
3044  	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
3045  	    offset + length > i_size_read(inode)) {
3046  		err = inode_newsize_ok(inode, offset + length);
3047  		if (err)
3048  			goto out;
3049  	}
3050  
3051  	err = file_modified(file);
3052  	if (err)
3053  		goto out;
3054  
3055  	if (!(mode & FALLOC_FL_KEEP_SIZE))
3056  		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
3057  
3058  	args.opcode = FUSE_FALLOCATE;
3059  	args.nodeid = ff->nodeid;
3060  	args.in_numargs = 1;
3061  	args.in_args[0].size = sizeof(inarg);
3062  	args.in_args[0].value = &inarg;
3063  	err = fuse_simple_request(fm, &args);
3064  	if (err == -ENOSYS) {
3065  		fm->fc->no_fallocate = 1;
3066  		err = -EOPNOTSUPP;
3067  	}
3068  	if (err)
3069  		goto out;
3070  
3071  	/* we could have extended the file */
3072  	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
3073  		if (fuse_write_update_attr(inode, offset + length, length))
3074  			file_update_time(file);
3075  	}
3076  
3077  	if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
3078  		truncate_pagecache_range(inode, offset, offset + length - 1);
3079  
3080  	fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
3081  
3082  out:
3083  	if (!(mode & FALLOC_FL_KEEP_SIZE))
3084  		clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
3085  
3086  	if (block_faults)
3087  		filemap_invalidate_unlock(inode->i_mapping);
3088  
3089  	inode_unlock(inode);
3090  
3091  	fuse_flush_time_update(inode);
3092  
3093  	return err;
3094  }
3095  
3096  static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
3097  				      struct file *file_out, loff_t pos_out,
3098  				      size_t len, unsigned int flags)
3099  {
3100  	struct fuse_file *ff_in = file_in->private_data;
3101  	struct fuse_file *ff_out = file_out->private_data;
3102  	struct inode *inode_in = file_inode(file_in);
3103  	struct inode *inode_out = file_inode(file_out);
3104  	struct fuse_inode *fi_out = get_fuse_inode(inode_out);
3105  	struct fuse_mount *fm = ff_in->fm;
3106  	struct fuse_conn *fc = fm->fc;
3107  	FUSE_ARGS(args);
3108  	struct fuse_copy_file_range_in inarg = {
3109  		.fh_in = ff_in->fh,
3110  		.off_in = pos_in,
3111  		.nodeid_out = ff_out->nodeid,
3112  		.fh_out = ff_out->fh,
3113  		.off_out = pos_out,
3114  		.len = len,
3115  		.flags = flags
3116  	};
3117  	struct fuse_write_out outarg;
3118  	ssize_t err;
3119  	/* mark unstable when write-back is not used, and file_out gets
3120  	 * extended */
3121  	bool is_unstable = (!fc->writeback_cache) &&
3122  			   ((pos_out + len) > inode_out->i_size);
3123  
3124  	if (fc->no_copy_file_range)
3125  		return -EOPNOTSUPP;
3126  
3127  	if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
3128  		return -EXDEV;
3129  
3130  	inode_lock(inode_in);
3131  	err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1);
3132  	inode_unlock(inode_in);
3133  	if (err)
3134  		return err;
3135  
3136  	inode_lock(inode_out);
3137  
3138  	err = file_modified(file_out);
3139  	if (err)
3140  		goto out;
3141  
3142  	/*
3143  	 * Write out dirty pages in the destination file before sending the COPY
3144  	 * request to userspace.  After the request is completed, truncate off
3145  	 * pages (including partial ones) from the cache that have been copied,
3146  	 * since these contain stale data at that point.
3147  	 *
3148  	 * This should be mostly correct, but if the COPY writes to partial
3149  	 * pages (at the start or end) and the parts not covered by the COPY are
3150  	 * written through a memory map after calling fuse_writeback_range(),
3151  	 * then these partial page modifications will be lost on truncation.
3152  	 *
3153  	 * It is unlikely that someone would rely on such mixed style
3154  	 * modifications.  Yet this does give less guarantees than if the
3155  	 * copying was performed with write(2).
3156  	 *
3157  	 * To fix this a mapping->invalidate_lock could be used to prevent new
3158  	 * faults while the copy is ongoing.
3159  	 */
3160  	err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
3161  	if (err)
3162  		goto out;
3163  
3164  	if (is_unstable)
3165  		set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
3166  
3167  	args.opcode = FUSE_COPY_FILE_RANGE;
3168  	args.nodeid = ff_in->nodeid;
3169  	args.in_numargs = 1;
3170  	args.in_args[0].size = sizeof(inarg);
3171  	args.in_args[0].value = &inarg;
3172  	args.out_numargs = 1;
3173  	args.out_args[0].size = sizeof(outarg);
3174  	args.out_args[0].value = &outarg;
3175  	err = fuse_simple_request(fm, &args);
3176  	if (err == -ENOSYS) {
3177  		fc->no_copy_file_range = 1;
3178  		err = -EOPNOTSUPP;
3179  	}
3180  	if (err)
3181  		goto out;
3182  
3183  	truncate_inode_pages_range(inode_out->i_mapping,
3184  				   ALIGN_DOWN(pos_out, PAGE_SIZE),
3185  				   ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
3186  
3187  	file_update_time(file_out);
3188  	fuse_write_update_attr(inode_out, pos_out + outarg.size, outarg.size);
3189  
3190  	err = outarg.size;
3191  out:
3192  	if (is_unstable)
3193  		clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
3194  
3195  	inode_unlock(inode_out);
3196  	file_accessed(file_in);
3197  
3198  	fuse_flush_time_update(inode_out);
3199  
3200  	return err;
3201  }
3202  
3203  static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
3204  				    struct file *dst_file, loff_t dst_off,
3205  				    size_t len, unsigned int flags)
3206  {
3207  	ssize_t ret;
3208  
3209  	ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off,
3210  				     len, flags);
3211  
3212  	if (ret == -EOPNOTSUPP || ret == -EXDEV)
3213  		ret = generic_copy_file_range(src_file, src_off, dst_file,
3214  					      dst_off, len, flags);
3215  	return ret;
3216  }
3217  
3218  static const struct file_operations fuse_file_operations = {
3219  	.llseek		= fuse_file_llseek,
3220  	.read_iter	= fuse_file_read_iter,
3221  	.write_iter	= fuse_file_write_iter,
3222  	.mmap		= fuse_file_mmap,
3223  	.open		= fuse_open,
3224  	.flush		= fuse_flush,
3225  	.release	= fuse_release,
3226  	.fsync		= fuse_fsync,
3227  	.lock		= fuse_file_lock,
3228  	.get_unmapped_area = thp_get_unmapped_area,
3229  	.flock		= fuse_file_flock,
3230  	.splice_read	= filemap_splice_read,
3231  	.splice_write	= iter_file_splice_write,
3232  	.unlocked_ioctl	= fuse_file_ioctl,
3233  	.compat_ioctl	= fuse_file_compat_ioctl,
3234  	.poll		= fuse_file_poll,
3235  	.fallocate	= fuse_file_fallocate,
3236  	.copy_file_range = fuse_copy_file_range,
3237  };
3238  
3239  static const struct address_space_operations fuse_file_aops  = {
3240  	.read_folio	= fuse_read_folio,
3241  	.readahead	= fuse_readahead,
3242  	.writepage	= fuse_writepage,
3243  	.writepages	= fuse_writepages,
3244  	.launder_folio	= fuse_launder_folio,
3245  	.dirty_folio	= filemap_dirty_folio,
3246  	.bmap		= fuse_bmap,
3247  	.direct_IO	= fuse_direct_IO,
3248  	.write_begin	= fuse_write_begin,
3249  	.write_end	= fuse_write_end,
3250  };
3251  
3252  void fuse_init_file_inode(struct inode *inode, unsigned int flags)
3253  {
3254  	struct fuse_inode *fi = get_fuse_inode(inode);
3255  
3256  	inode->i_fop = &fuse_file_operations;
3257  	inode->i_data.a_ops = &fuse_file_aops;
3258  
3259  	INIT_LIST_HEAD(&fi->write_files);
3260  	INIT_LIST_HEAD(&fi->queued_writes);
3261  	fi->writectr = 0;
3262  	init_waitqueue_head(&fi->page_waitq);
3263  	fi->writepages = RB_ROOT;
3264  
3265  	if (IS_ENABLED(CONFIG_FUSE_DAX))
3266  		fuse_dax_inode_init(inode, flags);
3267  }
3268