1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/pagemap.h> 12 #include <linux/slab.h> 13 #include <linux/kernel.h> 14 #include <linux/sched.h> 15 #include <linux/sched/signal.h> 16 #include <linux/module.h> 17 #include <linux/compat.h> 18 #include <linux/swap.h> 19 #include <linux/falloc.h> 20 #include <linux/uio.h> 21 #include <linux/fs.h> 22 23 static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, 24 struct fuse_page_desc **desc) 25 { 26 struct page **pages; 27 28 pages = kzalloc(npages * (sizeof(struct page *) + 29 sizeof(struct fuse_page_desc)), flags); 30 *desc = (void *) (pages + npages); 31 32 return pages; 33 } 34 35 static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file, 36 int opcode, struct fuse_open_out *outargp) 37 { 38 struct fuse_open_in inarg; 39 FUSE_ARGS(args); 40 41 memset(&inarg, 0, sizeof(inarg)); 42 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); 43 if (!fm->fc->atomic_o_trunc) 44 inarg.flags &= ~O_TRUNC; 45 46 if (fm->fc->handle_killpriv_v2 && 47 (inarg.flags & O_TRUNC) && !capable(CAP_FSETID)) { 48 inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; 49 } 50 51 args.opcode = opcode; 52 args.nodeid = nodeid; 53 args.in_numargs = 1; 54 args.in_args[0].size = sizeof(inarg); 55 args.in_args[0].value = &inarg; 56 args.out_numargs = 1; 57 args.out_args[0].size = sizeof(*outargp); 58 args.out_args[0].value = outargp; 59 60 return fuse_simple_request(fm, &args); 61 } 62 63 struct fuse_release_args { 64 struct fuse_args args; 65 struct fuse_release_in inarg; 66 struct inode *inode; 67 }; 68 69 struct fuse_file *fuse_file_alloc(struct fuse_mount *fm) 70 { 71 struct fuse_file *ff; 72 73 ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL_ACCOUNT); 74 if (unlikely(!ff)) 75 return NULL; 76 77 ff->fm = fm; 78 ff->release_args = kzalloc(sizeof(*ff->release_args), 79 GFP_KERNEL_ACCOUNT); 80 if (!ff->release_args) { 81 kfree(ff); 82 return NULL; 83 } 84 85 INIT_LIST_HEAD(&ff->write_entry); 86 mutex_init(&ff->readdir.lock); 87 refcount_set(&ff->count, 1); 88 RB_CLEAR_NODE(&ff->polled_node); 89 init_waitqueue_head(&ff->poll_wait); 90 91 ff->kh = atomic64_inc_return(&fm->fc->khctr); 92 93 return ff; 94 } 95 96 void fuse_file_free(struct fuse_file *ff) 97 { 98 kfree(ff->release_args); 99 mutex_destroy(&ff->readdir.lock); 100 kfree(ff); 101 } 102 103 static struct fuse_file *fuse_file_get(struct fuse_file *ff) 104 { 105 refcount_inc(&ff->count); 106 return ff; 107 } 108 109 static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args, 110 int error) 111 { 112 struct fuse_release_args *ra = container_of(args, typeof(*ra), args); 113 114 iput(ra->inode); 115 kfree(ra); 116 } 117 118 static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) 119 { 120 if (refcount_dec_and_test(&ff->count)) { 121 struct fuse_args *args = &ff->release_args->args; 122 123 if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) { 124 /* Do nothing when client does not implement 'open' */ 125 fuse_release_end(ff->fm, args, 0); 126 } else if (sync) { 127 fuse_simple_request(ff->fm, args); 128 fuse_release_end(ff->fm, args, 0); 129 } else { 130 args->end = fuse_release_end; 131 if (fuse_simple_background(ff->fm, args, 132 GFP_KERNEL | __GFP_NOFAIL)) 133 fuse_release_end(ff->fm, args, -ENOTCONN); 134 } 135 kfree(ff); 136 } 137 } 138 139 int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, 140 bool isdir) 141 { 142 struct fuse_conn *fc = fm->fc; 143 struct fuse_file *ff; 144 int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; 145 146 ff = fuse_file_alloc(fm); 147 if (!ff) 148 return -ENOMEM; 149 150 ff->fh = 0; 151 /* Default for no-open */ 152 ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0); 153 if (isdir ? !fc->no_opendir : !fc->no_open) { 154 struct fuse_open_out outarg; 155 int err; 156 157 err = fuse_send_open(fm, nodeid, file, opcode, &outarg); 158 if (!err) { 159 ff->fh = outarg.fh; 160 ff->open_flags = outarg.open_flags; 161 162 } else if (err != -ENOSYS) { 163 fuse_file_free(ff); 164 return err; 165 } else { 166 if (isdir) 167 fc->no_opendir = 1; 168 else 169 fc->no_open = 1; 170 } 171 } 172 173 if (isdir) 174 ff->open_flags &= ~FOPEN_DIRECT_IO; 175 176 ff->nodeid = nodeid; 177 file->private_data = ff; 178 179 return 0; 180 } 181 EXPORT_SYMBOL_GPL(fuse_do_open); 182 183 static void fuse_link_write_file(struct file *file) 184 { 185 struct inode *inode = file_inode(file); 186 struct fuse_inode *fi = get_fuse_inode(inode); 187 struct fuse_file *ff = file->private_data; 188 /* 189 * file may be written through mmap, so chain it onto the 190 * inodes's write_file list 191 */ 192 spin_lock(&fi->lock); 193 if (list_empty(&ff->write_entry)) 194 list_add(&ff->write_entry, &fi->write_files); 195 spin_unlock(&fi->lock); 196 } 197 198 void fuse_finish_open(struct inode *inode, struct file *file) 199 { 200 struct fuse_file *ff = file->private_data; 201 struct fuse_conn *fc = get_fuse_conn(inode); 202 203 if (!(ff->open_flags & FOPEN_KEEP_CACHE)) 204 invalidate_inode_pages2(inode->i_mapping); 205 if (ff->open_flags & FOPEN_STREAM) 206 stream_open(inode, file); 207 else if (ff->open_flags & FOPEN_NONSEEKABLE) 208 nonseekable_open(inode, file); 209 if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { 210 struct fuse_inode *fi = get_fuse_inode(inode); 211 212 spin_lock(&fi->lock); 213 fi->attr_version = atomic64_inc_return(&fc->attr_version); 214 i_size_write(inode, 0); 215 spin_unlock(&fi->lock); 216 fuse_invalidate_attr(inode); 217 if (fc->writeback_cache) 218 file_update_time(file); 219 } 220 if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) 221 fuse_link_write_file(file); 222 } 223 224 int fuse_open_common(struct inode *inode, struct file *file, bool isdir) 225 { 226 struct fuse_mount *fm = get_fuse_mount(inode); 227 struct fuse_conn *fc = fm->fc; 228 int err; 229 bool is_wb_truncate = (file->f_flags & O_TRUNC) && 230 fc->atomic_o_trunc && 231 fc->writeback_cache; 232 bool dax_truncate = (file->f_flags & O_TRUNC) && 233 fc->atomic_o_trunc && FUSE_IS_DAX(inode); 234 235 if (fuse_is_bad(inode)) 236 return -EIO; 237 238 err = generic_file_open(inode, file); 239 if (err) 240 return err; 241 242 if (is_wb_truncate || dax_truncate) { 243 inode_lock(inode); 244 fuse_set_nowrite(inode); 245 } 246 247 if (dax_truncate) { 248 down_write(&get_fuse_inode(inode)->i_mmap_sem); 249 err = fuse_dax_break_layouts(inode, 0, 0); 250 if (err) 251 goto out; 252 } 253 254 err = fuse_do_open(fm, get_node_id(inode), file, isdir); 255 if (!err) 256 fuse_finish_open(inode, file); 257 258 out: 259 if (dax_truncate) 260 up_write(&get_fuse_inode(inode)->i_mmap_sem); 261 262 if (is_wb_truncate | dax_truncate) { 263 fuse_release_nowrite(inode); 264 inode_unlock(inode); 265 } 266 267 return err; 268 } 269 270 static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, 271 int flags, int opcode) 272 { 273 struct fuse_conn *fc = ff->fm->fc; 274 struct fuse_release_args *ra = ff->release_args; 275 276 /* Inode is NULL on error path of fuse_create_open() */ 277 if (likely(fi)) { 278 spin_lock(&fi->lock); 279 list_del(&ff->write_entry); 280 spin_unlock(&fi->lock); 281 } 282 spin_lock(&fc->lock); 283 if (!RB_EMPTY_NODE(&ff->polled_node)) 284 rb_erase(&ff->polled_node, &fc->polled_files); 285 spin_unlock(&fc->lock); 286 287 wake_up_interruptible_all(&ff->poll_wait); 288 289 ra->inarg.fh = ff->fh; 290 ra->inarg.flags = flags; 291 ra->args.in_numargs = 1; 292 ra->args.in_args[0].size = sizeof(struct fuse_release_in); 293 ra->args.in_args[0].value = &ra->inarg; 294 ra->args.opcode = opcode; 295 ra->args.nodeid = ff->nodeid; 296 ra->args.force = true; 297 ra->args.nocreds = true; 298 } 299 300 void fuse_release_common(struct file *file, bool isdir) 301 { 302 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 303 struct fuse_file *ff = file->private_data; 304 struct fuse_release_args *ra = ff->release_args; 305 int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; 306 307 fuse_prepare_release(fi, ff, file->f_flags, opcode); 308 309 if (ff->flock) { 310 ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; 311 ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, 312 (fl_owner_t) file); 313 } 314 /* Hold inode until release is finished */ 315 ra->inode = igrab(file_inode(file)); 316 317 /* 318 * Normally this will send the RELEASE request, however if 319 * some asynchronous READ or WRITE requests are outstanding, 320 * the sending will be delayed. 321 * 322 * Make the release synchronous if this is a fuseblk mount, 323 * synchronous RELEASE is allowed (and desirable) in this case 324 * because the server can be trusted not to screw up. 325 */ 326 fuse_file_put(ff, ff->fm->fc->destroy, isdir); 327 } 328 329 static int fuse_open(struct inode *inode, struct file *file) 330 { 331 return fuse_open_common(inode, file, false); 332 } 333 334 static int fuse_release(struct inode *inode, struct file *file) 335 { 336 struct fuse_conn *fc = get_fuse_conn(inode); 337 338 /* see fuse_vma_close() for !writeback_cache case */ 339 if (fc->writeback_cache) 340 write_inode_now(inode, 1); 341 342 fuse_release_common(file, false); 343 344 /* return value is ignored by VFS */ 345 return 0; 346 } 347 348 void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, int flags) 349 { 350 WARN_ON(refcount_read(&ff->count) > 1); 351 fuse_prepare_release(fi, ff, flags, FUSE_RELEASE); 352 /* 353 * iput(NULL) is a no-op and since the refcount is 1 and everything's 354 * synchronous, we are fine with not doing igrab() here" 355 */ 356 fuse_file_put(ff, true, false); 357 } 358 EXPORT_SYMBOL_GPL(fuse_sync_release); 359 360 /* 361 * Scramble the ID space with XTEA, so that the value of the files_struct 362 * pointer is not exposed to userspace. 363 */ 364 u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) 365 { 366 u32 *k = fc->scramble_key; 367 u64 v = (unsigned long) id; 368 u32 v0 = v; 369 u32 v1 = v >> 32; 370 u32 sum = 0; 371 int i; 372 373 for (i = 0; i < 32; i++) { 374 v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]); 375 sum += 0x9E3779B9; 376 v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]); 377 } 378 379 return (u64) v0 + ((u64) v1 << 32); 380 } 381 382 struct fuse_writepage_args { 383 struct fuse_io_args ia; 384 struct rb_node writepages_entry; 385 struct list_head queue_entry; 386 struct fuse_writepage_args *next; 387 struct inode *inode; 388 }; 389 390 static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi, 391 pgoff_t idx_from, pgoff_t idx_to) 392 { 393 struct rb_node *n; 394 395 n = fi->writepages.rb_node; 396 397 while (n) { 398 struct fuse_writepage_args *wpa; 399 pgoff_t curr_index; 400 401 wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry); 402 WARN_ON(get_fuse_inode(wpa->inode) != fi); 403 curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT; 404 if (idx_from >= curr_index + wpa->ia.ap.num_pages) 405 n = n->rb_right; 406 else if (idx_to < curr_index) 407 n = n->rb_left; 408 else 409 return wpa; 410 } 411 return NULL; 412 } 413 414 /* 415 * Check if any page in a range is under writeback 416 * 417 * This is currently done by walking the list of writepage requests 418 * for the inode, which can be pretty inefficient. 419 */ 420 static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, 421 pgoff_t idx_to) 422 { 423 struct fuse_inode *fi = get_fuse_inode(inode); 424 bool found; 425 426 spin_lock(&fi->lock); 427 found = fuse_find_writeback(fi, idx_from, idx_to); 428 spin_unlock(&fi->lock); 429 430 return found; 431 } 432 433 static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) 434 { 435 return fuse_range_is_writeback(inode, index, index); 436 } 437 438 /* 439 * Wait for page writeback to be completed. 440 * 441 * Since fuse doesn't rely on the VM writeback tracking, this has to 442 * use some other means. 443 */ 444 static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) 445 { 446 struct fuse_inode *fi = get_fuse_inode(inode); 447 448 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); 449 } 450 451 /* 452 * Wait for all pending writepages on the inode to finish. 453 * 454 * This is currently done by blocking further writes with FUSE_NOWRITE 455 * and waiting for all sent writes to complete. 456 * 457 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage 458 * could conflict with truncation. 459 */ 460 static void fuse_sync_writes(struct inode *inode) 461 { 462 fuse_set_nowrite(inode); 463 fuse_release_nowrite(inode); 464 } 465 466 static int fuse_flush(struct file *file, fl_owner_t id) 467 { 468 struct inode *inode = file_inode(file); 469 struct fuse_mount *fm = get_fuse_mount(inode); 470 struct fuse_file *ff = file->private_data; 471 struct fuse_flush_in inarg; 472 FUSE_ARGS(args); 473 int err; 474 475 if (fuse_is_bad(inode)) 476 return -EIO; 477 478 err = write_inode_now(inode, 1); 479 if (err) 480 return err; 481 482 inode_lock(inode); 483 fuse_sync_writes(inode); 484 inode_unlock(inode); 485 486 err = filemap_check_errors(file->f_mapping); 487 if (err) 488 return err; 489 490 err = 0; 491 if (fm->fc->no_flush) 492 goto inval_attr_out; 493 494 memset(&inarg, 0, sizeof(inarg)); 495 inarg.fh = ff->fh; 496 inarg.lock_owner = fuse_lock_owner_id(fm->fc, id); 497 args.opcode = FUSE_FLUSH; 498 args.nodeid = get_node_id(inode); 499 args.in_numargs = 1; 500 args.in_args[0].size = sizeof(inarg); 501 args.in_args[0].value = &inarg; 502 args.force = true; 503 504 err = fuse_simple_request(fm, &args); 505 if (err == -ENOSYS) { 506 fm->fc->no_flush = 1; 507 err = 0; 508 } 509 510 inval_attr_out: 511 /* 512 * In memory i_blocks is not maintained by fuse, if writeback cache is 513 * enabled, i_blocks from cached attr may not be accurate. 514 */ 515 if (!err && fm->fc->writeback_cache) 516 fuse_invalidate_attr(inode); 517 return err; 518 } 519 520 int fuse_fsync_common(struct file *file, loff_t start, loff_t end, 521 int datasync, int opcode) 522 { 523 struct inode *inode = file->f_mapping->host; 524 struct fuse_mount *fm = get_fuse_mount(inode); 525 struct fuse_file *ff = file->private_data; 526 FUSE_ARGS(args); 527 struct fuse_fsync_in inarg; 528 529 memset(&inarg, 0, sizeof(inarg)); 530 inarg.fh = ff->fh; 531 inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0; 532 args.opcode = opcode; 533 args.nodeid = get_node_id(inode); 534 args.in_numargs = 1; 535 args.in_args[0].size = sizeof(inarg); 536 args.in_args[0].value = &inarg; 537 return fuse_simple_request(fm, &args); 538 } 539 540 static int fuse_fsync(struct file *file, loff_t start, loff_t end, 541 int datasync) 542 { 543 struct inode *inode = file->f_mapping->host; 544 struct fuse_conn *fc = get_fuse_conn(inode); 545 int err; 546 547 if (fuse_is_bad(inode)) 548 return -EIO; 549 550 inode_lock(inode); 551 552 /* 553 * Start writeback against all dirty pages of the inode, then 554 * wait for all outstanding writes, before sending the FSYNC 555 * request. 556 */ 557 err = file_write_and_wait_range(file, start, end); 558 if (err) 559 goto out; 560 561 fuse_sync_writes(inode); 562 563 /* 564 * Due to implementation of fuse writeback 565 * file_write_and_wait_range() does not catch errors. 566 * We have to do this directly after fuse_sync_writes() 567 */ 568 err = file_check_and_advance_wb_err(file); 569 if (err) 570 goto out; 571 572 err = sync_inode_metadata(inode, 1); 573 if (err) 574 goto out; 575 576 if (fc->no_fsync) 577 goto out; 578 579 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNC); 580 if (err == -ENOSYS) { 581 fc->no_fsync = 1; 582 err = 0; 583 } 584 out: 585 inode_unlock(inode); 586 587 return err; 588 } 589 590 void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos, 591 size_t count, int opcode) 592 { 593 struct fuse_file *ff = file->private_data; 594 struct fuse_args *args = &ia->ap.args; 595 596 ia->read.in.fh = ff->fh; 597 ia->read.in.offset = pos; 598 ia->read.in.size = count; 599 ia->read.in.flags = file->f_flags; 600 args->opcode = opcode; 601 args->nodeid = ff->nodeid; 602 args->in_numargs = 1; 603 args->in_args[0].size = sizeof(ia->read.in); 604 args->in_args[0].value = &ia->read.in; 605 args->out_argvar = true; 606 args->out_numargs = 1; 607 args->out_args[0].size = count; 608 } 609 610 static void fuse_release_user_pages(struct fuse_args_pages *ap, 611 bool should_dirty) 612 { 613 unsigned int i; 614 615 for (i = 0; i < ap->num_pages; i++) { 616 if (should_dirty) 617 set_page_dirty_lock(ap->pages[i]); 618 put_page(ap->pages[i]); 619 } 620 } 621 622 static void fuse_io_release(struct kref *kref) 623 { 624 kfree(container_of(kref, struct fuse_io_priv, refcnt)); 625 } 626 627 static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io) 628 { 629 if (io->err) 630 return io->err; 631 632 if (io->bytes >= 0 && io->write) 633 return -EIO; 634 635 return io->bytes < 0 ? io->size : io->bytes; 636 } 637 638 /** 639 * In case of short read, the caller sets 'pos' to the position of 640 * actual end of fuse request in IO request. Otherwise, if bytes_requested 641 * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1. 642 * 643 * An example: 644 * User requested DIO read of 64K. It was splitted into two 32K fuse requests, 645 * both submitted asynchronously. The first of them was ACKed by userspace as 646 * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The 647 * second request was ACKed as short, e.g. only 1K was read, resulting in 648 * pos == 33K. 649 * 650 * Thus, when all fuse requests are completed, the minimal non-negative 'pos' 651 * will be equal to the length of the longest contiguous fragment of 652 * transferred data starting from the beginning of IO request. 653 */ 654 static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) 655 { 656 int left; 657 658 spin_lock(&io->lock); 659 if (err) 660 io->err = io->err ? : err; 661 else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes)) 662 io->bytes = pos; 663 664 left = --io->reqs; 665 if (!left && io->blocking) 666 complete(io->done); 667 spin_unlock(&io->lock); 668 669 if (!left && !io->blocking) { 670 ssize_t res = fuse_get_res_by_io(io); 671 672 if (res >= 0) { 673 struct inode *inode = file_inode(io->iocb->ki_filp); 674 struct fuse_conn *fc = get_fuse_conn(inode); 675 struct fuse_inode *fi = get_fuse_inode(inode); 676 677 spin_lock(&fi->lock); 678 fi->attr_version = atomic64_inc_return(&fc->attr_version); 679 spin_unlock(&fi->lock); 680 } 681 682 io->iocb->ki_complete(io->iocb, res, 0); 683 } 684 685 kref_put(&io->refcnt, fuse_io_release); 686 } 687 688 static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io, 689 unsigned int npages) 690 { 691 struct fuse_io_args *ia; 692 693 ia = kzalloc(sizeof(*ia), GFP_KERNEL); 694 if (ia) { 695 ia->io = io; 696 ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL, 697 &ia->ap.descs); 698 if (!ia->ap.pages) { 699 kfree(ia); 700 ia = NULL; 701 } 702 } 703 return ia; 704 } 705 706 static void fuse_io_free(struct fuse_io_args *ia) 707 { 708 kfree(ia->ap.pages); 709 kfree(ia); 710 } 711 712 static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args, 713 int err) 714 { 715 struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); 716 struct fuse_io_priv *io = ia->io; 717 ssize_t pos = -1; 718 719 fuse_release_user_pages(&ia->ap, io->should_dirty); 720 721 if (err) { 722 /* Nothing */ 723 } else if (io->write) { 724 if (ia->write.out.size > ia->write.in.size) { 725 err = -EIO; 726 } else if (ia->write.in.size != ia->write.out.size) { 727 pos = ia->write.in.offset - io->offset + 728 ia->write.out.size; 729 } 730 } else { 731 u32 outsize = args->out_args[0].size; 732 733 if (ia->read.in.size != outsize) 734 pos = ia->read.in.offset - io->offset + outsize; 735 } 736 737 fuse_aio_complete(io, err, pos); 738 fuse_io_free(ia); 739 } 740 741 static ssize_t fuse_async_req_send(struct fuse_mount *fm, 742 struct fuse_io_args *ia, size_t num_bytes) 743 { 744 ssize_t err; 745 struct fuse_io_priv *io = ia->io; 746 747 spin_lock(&io->lock); 748 kref_get(&io->refcnt); 749 io->size += num_bytes; 750 io->reqs++; 751 spin_unlock(&io->lock); 752 753 ia->ap.args.end = fuse_aio_complete_req; 754 ia->ap.args.may_block = io->should_dirty; 755 err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL); 756 if (err) 757 fuse_aio_complete_req(fm, &ia->ap.args, err); 758 759 return num_bytes; 760 } 761 762 static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count, 763 fl_owner_t owner) 764 { 765 struct file *file = ia->io->iocb->ki_filp; 766 struct fuse_file *ff = file->private_data; 767 struct fuse_mount *fm = ff->fm; 768 769 fuse_read_args_fill(ia, file, pos, count, FUSE_READ); 770 if (owner != NULL) { 771 ia->read.in.read_flags |= FUSE_READ_LOCKOWNER; 772 ia->read.in.lock_owner = fuse_lock_owner_id(fm->fc, owner); 773 } 774 775 if (ia->io->async) 776 return fuse_async_req_send(fm, ia, count); 777 778 return fuse_simple_request(fm, &ia->ap.args); 779 } 780 781 static void fuse_read_update_size(struct inode *inode, loff_t size, 782 u64 attr_ver) 783 { 784 struct fuse_conn *fc = get_fuse_conn(inode); 785 struct fuse_inode *fi = get_fuse_inode(inode); 786 787 spin_lock(&fi->lock); 788 if (attr_ver == fi->attr_version && size < inode->i_size && 789 !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { 790 fi->attr_version = atomic64_inc_return(&fc->attr_version); 791 i_size_write(inode, size); 792 } 793 spin_unlock(&fi->lock); 794 } 795 796 static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read, 797 struct fuse_args_pages *ap) 798 { 799 struct fuse_conn *fc = get_fuse_conn(inode); 800 801 if (fc->writeback_cache) { 802 /* 803 * A hole in a file. Some data after the hole are in page cache, 804 * but have not reached the client fs yet. So, the hole is not 805 * present there. 806 */ 807 int i; 808 int start_idx = num_read >> PAGE_SHIFT; 809 size_t off = num_read & (PAGE_SIZE - 1); 810 811 for (i = start_idx; i < ap->num_pages; i++) { 812 zero_user_segment(ap->pages[i], off, PAGE_SIZE); 813 off = 0; 814 } 815 } else { 816 loff_t pos = page_offset(ap->pages[0]) + num_read; 817 fuse_read_update_size(inode, pos, attr_ver); 818 } 819 } 820 821 static int fuse_do_readpage(struct file *file, struct page *page) 822 { 823 struct inode *inode = page->mapping->host; 824 struct fuse_mount *fm = get_fuse_mount(inode); 825 loff_t pos = page_offset(page); 826 struct fuse_page_desc desc = { .length = PAGE_SIZE }; 827 struct fuse_io_args ia = { 828 .ap.args.page_zeroing = true, 829 .ap.args.out_pages = true, 830 .ap.num_pages = 1, 831 .ap.pages = &page, 832 .ap.descs = &desc, 833 }; 834 ssize_t res; 835 u64 attr_ver; 836 837 /* 838 * Page writeback can extend beyond the lifetime of the 839 * page-cache page, so make sure we read a properly synced 840 * page. 841 */ 842 fuse_wait_on_page_writeback(inode, page->index); 843 844 attr_ver = fuse_get_attr_version(fm->fc); 845 846 /* Don't overflow end offset */ 847 if (pos + (desc.length - 1) == LLONG_MAX) 848 desc.length--; 849 850 fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); 851 res = fuse_simple_request(fm, &ia.ap.args); 852 if (res < 0) 853 return res; 854 /* 855 * Short read means EOF. If file size is larger, truncate it 856 */ 857 if (res < desc.length) 858 fuse_short_read(inode, attr_ver, res, &ia.ap); 859 860 SetPageUptodate(page); 861 862 return 0; 863 } 864 865 static int fuse_readpage(struct file *file, struct page *page) 866 { 867 struct inode *inode = page->mapping->host; 868 int err; 869 870 err = -EIO; 871 if (fuse_is_bad(inode)) 872 goto out; 873 874 err = fuse_do_readpage(file, page); 875 fuse_invalidate_atime(inode); 876 out: 877 unlock_page(page); 878 return err; 879 } 880 881 static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, 882 int err) 883 { 884 int i; 885 struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); 886 struct fuse_args_pages *ap = &ia->ap; 887 size_t count = ia->read.in.size; 888 size_t num_read = args->out_args[0].size; 889 struct address_space *mapping = NULL; 890 891 for (i = 0; mapping == NULL && i < ap->num_pages; i++) 892 mapping = ap->pages[i]->mapping; 893 894 if (mapping) { 895 struct inode *inode = mapping->host; 896 897 /* 898 * Short read means EOF. If file size is larger, truncate it 899 */ 900 if (!err && num_read < count) 901 fuse_short_read(inode, ia->read.attr_ver, num_read, ap); 902 903 fuse_invalidate_atime(inode); 904 } 905 906 for (i = 0; i < ap->num_pages; i++) { 907 struct page *page = ap->pages[i]; 908 909 if (!err) 910 SetPageUptodate(page); 911 else 912 SetPageError(page); 913 unlock_page(page); 914 put_page(page); 915 } 916 if (ia->ff) 917 fuse_file_put(ia->ff, false, false); 918 919 fuse_io_free(ia); 920 } 921 922 static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) 923 { 924 struct fuse_file *ff = file->private_data; 925 struct fuse_mount *fm = ff->fm; 926 struct fuse_args_pages *ap = &ia->ap; 927 loff_t pos = page_offset(ap->pages[0]); 928 size_t count = ap->num_pages << PAGE_SHIFT; 929 ssize_t res; 930 int err; 931 932 ap->args.out_pages = true; 933 ap->args.page_zeroing = true; 934 ap->args.page_replace = true; 935 936 /* Don't overflow end offset */ 937 if (pos + (count - 1) == LLONG_MAX) { 938 count--; 939 ap->descs[ap->num_pages - 1].length--; 940 } 941 WARN_ON((loff_t) (pos + count) < 0); 942 943 fuse_read_args_fill(ia, file, pos, count, FUSE_READ); 944 ia->read.attr_ver = fuse_get_attr_version(fm->fc); 945 if (fm->fc->async_read) { 946 ia->ff = fuse_file_get(ff); 947 ap->args.end = fuse_readpages_end; 948 err = fuse_simple_background(fm, &ap->args, GFP_KERNEL); 949 if (!err) 950 return; 951 } else { 952 res = fuse_simple_request(fm, &ap->args); 953 err = res < 0 ? res : 0; 954 } 955 fuse_readpages_end(fm, &ap->args, err); 956 } 957 958 static void fuse_readahead(struct readahead_control *rac) 959 { 960 struct inode *inode = rac->mapping->host; 961 struct fuse_conn *fc = get_fuse_conn(inode); 962 unsigned int i, max_pages, nr_pages = 0; 963 964 if (fuse_is_bad(inode)) 965 return; 966 967 max_pages = min_t(unsigned int, fc->max_pages, 968 fc->max_read / PAGE_SIZE); 969 970 for (;;) { 971 struct fuse_io_args *ia; 972 struct fuse_args_pages *ap; 973 974 nr_pages = readahead_count(rac) - nr_pages; 975 if (nr_pages > max_pages) 976 nr_pages = max_pages; 977 if (nr_pages == 0) 978 break; 979 ia = fuse_io_alloc(NULL, nr_pages); 980 if (!ia) 981 return; 982 ap = &ia->ap; 983 nr_pages = __readahead_batch(rac, ap->pages, nr_pages); 984 for (i = 0; i < nr_pages; i++) { 985 fuse_wait_on_page_writeback(inode, 986 readahead_index(rac) + i); 987 ap->descs[i].length = PAGE_SIZE; 988 } 989 ap->num_pages = nr_pages; 990 fuse_send_readpages(ia, rac->file); 991 } 992 } 993 994 static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) 995 { 996 struct inode *inode = iocb->ki_filp->f_mapping->host; 997 struct fuse_conn *fc = get_fuse_conn(inode); 998 999 /* 1000 * In auto invalidate mode, always update attributes on read. 1001 * Otherwise, only update if we attempt to read past EOF (to ensure 1002 * i_size is up to date). 1003 */ 1004 if (fc->auto_inval_data || 1005 (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) { 1006 int err; 1007 err = fuse_update_attributes(inode, iocb->ki_filp); 1008 if (err) 1009 return err; 1010 } 1011 1012 return generic_file_read_iter(iocb, to); 1013 } 1014 1015 static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff, 1016 loff_t pos, size_t count) 1017 { 1018 struct fuse_args *args = &ia->ap.args; 1019 1020 ia->write.in.fh = ff->fh; 1021 ia->write.in.offset = pos; 1022 ia->write.in.size = count; 1023 args->opcode = FUSE_WRITE; 1024 args->nodeid = ff->nodeid; 1025 args->in_numargs = 2; 1026 if (ff->fm->fc->minor < 9) 1027 args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; 1028 else 1029 args->in_args[0].size = sizeof(ia->write.in); 1030 args->in_args[0].value = &ia->write.in; 1031 args->in_args[1].size = count; 1032 args->out_numargs = 1; 1033 args->out_args[0].size = sizeof(ia->write.out); 1034 args->out_args[0].value = &ia->write.out; 1035 } 1036 1037 static unsigned int fuse_write_flags(struct kiocb *iocb) 1038 { 1039 unsigned int flags = iocb->ki_filp->f_flags; 1040 1041 if (iocb->ki_flags & IOCB_DSYNC) 1042 flags |= O_DSYNC; 1043 if (iocb->ki_flags & IOCB_SYNC) 1044 flags |= O_SYNC; 1045 1046 return flags; 1047 } 1048 1049 static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, 1050 size_t count, fl_owner_t owner) 1051 { 1052 struct kiocb *iocb = ia->io->iocb; 1053 struct file *file = iocb->ki_filp; 1054 struct fuse_file *ff = file->private_data; 1055 struct fuse_mount *fm = ff->fm; 1056 struct fuse_write_in *inarg = &ia->write.in; 1057 ssize_t err; 1058 1059 fuse_write_args_fill(ia, ff, pos, count); 1060 inarg->flags = fuse_write_flags(iocb); 1061 if (owner != NULL) { 1062 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 1063 inarg->lock_owner = fuse_lock_owner_id(fm->fc, owner); 1064 } 1065 1066 if (ia->io->async) 1067 return fuse_async_req_send(fm, ia, count); 1068 1069 err = fuse_simple_request(fm, &ia->ap.args); 1070 if (!err && ia->write.out.size > count) 1071 err = -EIO; 1072 1073 return err ?: ia->write.out.size; 1074 } 1075 1076 bool fuse_write_update_size(struct inode *inode, loff_t pos) 1077 { 1078 struct fuse_conn *fc = get_fuse_conn(inode); 1079 struct fuse_inode *fi = get_fuse_inode(inode); 1080 bool ret = false; 1081 1082 spin_lock(&fi->lock); 1083 fi->attr_version = atomic64_inc_return(&fc->attr_version); 1084 if (pos > inode->i_size) { 1085 i_size_write(inode, pos); 1086 ret = true; 1087 } 1088 spin_unlock(&fi->lock); 1089 1090 return ret; 1091 } 1092 1093 static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, 1094 struct kiocb *iocb, struct inode *inode, 1095 loff_t pos, size_t count) 1096 { 1097 struct fuse_args_pages *ap = &ia->ap; 1098 struct file *file = iocb->ki_filp; 1099 struct fuse_file *ff = file->private_data; 1100 struct fuse_mount *fm = ff->fm; 1101 unsigned int offset, i; 1102 int err; 1103 1104 for (i = 0; i < ap->num_pages; i++) 1105 fuse_wait_on_page_writeback(inode, ap->pages[i]->index); 1106 1107 fuse_write_args_fill(ia, ff, pos, count); 1108 ia->write.in.flags = fuse_write_flags(iocb); 1109 if (fm->fc->handle_killpriv_v2 && !capable(CAP_FSETID)) 1110 ia->write.in.write_flags |= FUSE_WRITE_KILL_SUIDGID; 1111 1112 err = fuse_simple_request(fm, &ap->args); 1113 if (!err && ia->write.out.size > count) 1114 err = -EIO; 1115 1116 offset = ap->descs[0].offset; 1117 count = ia->write.out.size; 1118 for (i = 0; i < ap->num_pages; i++) { 1119 struct page *page = ap->pages[i]; 1120 1121 if (!err && !offset && count >= PAGE_SIZE) 1122 SetPageUptodate(page); 1123 1124 if (count > PAGE_SIZE - offset) 1125 count -= PAGE_SIZE - offset; 1126 else 1127 count = 0; 1128 offset = 0; 1129 1130 unlock_page(page); 1131 put_page(page); 1132 } 1133 1134 return err; 1135 } 1136 1137 static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap, 1138 struct address_space *mapping, 1139 struct iov_iter *ii, loff_t pos, 1140 unsigned int max_pages) 1141 { 1142 struct fuse_conn *fc = get_fuse_conn(mapping->host); 1143 unsigned offset = pos & (PAGE_SIZE - 1); 1144 size_t count = 0; 1145 int err; 1146 1147 ap->args.in_pages = true; 1148 ap->descs[0].offset = offset; 1149 1150 do { 1151 size_t tmp; 1152 struct page *page; 1153 pgoff_t index = pos >> PAGE_SHIFT; 1154 size_t bytes = min_t(size_t, PAGE_SIZE - offset, 1155 iov_iter_count(ii)); 1156 1157 bytes = min_t(size_t, bytes, fc->max_write - count); 1158 1159 again: 1160 err = -EFAULT; 1161 if (iov_iter_fault_in_readable(ii, bytes)) 1162 break; 1163 1164 err = -ENOMEM; 1165 page = grab_cache_page_write_begin(mapping, index, 0); 1166 if (!page) 1167 break; 1168 1169 if (mapping_writably_mapped(mapping)) 1170 flush_dcache_page(page); 1171 1172 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); 1173 flush_dcache_page(page); 1174 1175 iov_iter_advance(ii, tmp); 1176 if (!tmp) { 1177 unlock_page(page); 1178 put_page(page); 1179 bytes = min(bytes, iov_iter_single_seg_count(ii)); 1180 goto again; 1181 } 1182 1183 err = 0; 1184 ap->pages[ap->num_pages] = page; 1185 ap->descs[ap->num_pages].length = tmp; 1186 ap->num_pages++; 1187 1188 count += tmp; 1189 pos += tmp; 1190 offset += tmp; 1191 if (offset == PAGE_SIZE) 1192 offset = 0; 1193 1194 if (!fc->big_writes) 1195 break; 1196 } while (iov_iter_count(ii) && count < fc->max_write && 1197 ap->num_pages < max_pages && offset == 0); 1198 1199 return count > 0 ? count : err; 1200 } 1201 1202 static inline unsigned int fuse_wr_pages(loff_t pos, size_t len, 1203 unsigned int max_pages) 1204 { 1205 return min_t(unsigned int, 1206 ((pos + len - 1) >> PAGE_SHIFT) - 1207 (pos >> PAGE_SHIFT) + 1, 1208 max_pages); 1209 } 1210 1211 static ssize_t fuse_perform_write(struct kiocb *iocb, 1212 struct address_space *mapping, 1213 struct iov_iter *ii, loff_t pos) 1214 { 1215 struct inode *inode = mapping->host; 1216 struct fuse_conn *fc = get_fuse_conn(inode); 1217 struct fuse_inode *fi = get_fuse_inode(inode); 1218 int err = 0; 1219 ssize_t res = 0; 1220 1221 if (inode->i_size < pos + iov_iter_count(ii)) 1222 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1223 1224 do { 1225 ssize_t count; 1226 struct fuse_io_args ia = {}; 1227 struct fuse_args_pages *ap = &ia.ap; 1228 unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii), 1229 fc->max_pages); 1230 1231 ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs); 1232 if (!ap->pages) { 1233 err = -ENOMEM; 1234 break; 1235 } 1236 1237 count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages); 1238 if (count <= 0) { 1239 err = count; 1240 } else { 1241 err = fuse_send_write_pages(&ia, iocb, inode, 1242 pos, count); 1243 if (!err) { 1244 size_t num_written = ia.write.out.size; 1245 1246 res += num_written; 1247 pos += num_written; 1248 1249 /* break out of the loop on short write */ 1250 if (num_written != count) 1251 err = -EIO; 1252 } 1253 } 1254 kfree(ap->pages); 1255 } while (!err && iov_iter_count(ii)); 1256 1257 if (res > 0) 1258 fuse_write_update_size(inode, pos); 1259 1260 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1261 fuse_invalidate_attr(inode); 1262 1263 return res > 0 ? res : err; 1264 } 1265 1266 static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) 1267 { 1268 struct file *file = iocb->ki_filp; 1269 struct address_space *mapping = file->f_mapping; 1270 ssize_t written = 0; 1271 ssize_t written_buffered = 0; 1272 struct inode *inode = mapping->host; 1273 ssize_t err; 1274 struct fuse_conn *fc = get_fuse_conn(inode); 1275 loff_t endbyte = 0; 1276 1277 if (fc->writeback_cache) { 1278 /* Update size (EOF optimization) and mode (SUID clearing) */ 1279 err = fuse_update_attributes(mapping->host, file); 1280 if (err) 1281 return err; 1282 1283 if (fc->handle_killpriv_v2 && 1284 should_remove_suid(file_dentry(file))) { 1285 goto writethrough; 1286 } 1287 1288 return generic_file_write_iter(iocb, from); 1289 } 1290 1291 writethrough: 1292 inode_lock(inode); 1293 1294 /* We can write back this queue in page reclaim */ 1295 current->backing_dev_info = inode_to_bdi(inode); 1296 1297 err = generic_write_checks(iocb, from); 1298 if (err <= 0) 1299 goto out; 1300 1301 err = file_remove_privs(file); 1302 if (err) 1303 goto out; 1304 1305 err = file_update_time(file); 1306 if (err) 1307 goto out; 1308 1309 if (iocb->ki_flags & IOCB_DIRECT) { 1310 loff_t pos = iocb->ki_pos; 1311 written = generic_file_direct_write(iocb, from); 1312 if (written < 0 || !iov_iter_count(from)) 1313 goto out; 1314 1315 pos += written; 1316 1317 written_buffered = fuse_perform_write(iocb, mapping, from, pos); 1318 if (written_buffered < 0) { 1319 err = written_buffered; 1320 goto out; 1321 } 1322 endbyte = pos + written_buffered - 1; 1323 1324 err = filemap_write_and_wait_range(file->f_mapping, pos, 1325 endbyte); 1326 if (err) 1327 goto out; 1328 1329 invalidate_mapping_pages(file->f_mapping, 1330 pos >> PAGE_SHIFT, 1331 endbyte >> PAGE_SHIFT); 1332 1333 written += written_buffered; 1334 iocb->ki_pos = pos + written_buffered; 1335 } else { 1336 written = fuse_perform_write(iocb, mapping, from, iocb->ki_pos); 1337 if (written >= 0) 1338 iocb->ki_pos += written; 1339 } 1340 out: 1341 current->backing_dev_info = NULL; 1342 inode_unlock(inode); 1343 if (written > 0) 1344 written = generic_write_sync(iocb, written); 1345 1346 return written ? written : err; 1347 } 1348 1349 static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs, 1350 unsigned int index, 1351 unsigned int nr_pages) 1352 { 1353 int i; 1354 1355 for (i = index; i < index + nr_pages; i++) 1356 descs[i].length = PAGE_SIZE - descs[i].offset; 1357 } 1358 1359 static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii) 1360 { 1361 return (unsigned long)ii->iov->iov_base + ii->iov_offset; 1362 } 1363 1364 static inline size_t fuse_get_frag_size(const struct iov_iter *ii, 1365 size_t max_size) 1366 { 1367 return min(iov_iter_single_seg_count(ii), max_size); 1368 } 1369 1370 static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, 1371 size_t *nbytesp, int write, 1372 unsigned int max_pages) 1373 { 1374 size_t nbytes = 0; /* # bytes already packed in req */ 1375 ssize_t ret = 0; 1376 1377 /* Special case for kernel I/O: can copy directly into the buffer */ 1378 if (iov_iter_is_kvec(ii)) { 1379 unsigned long user_addr = fuse_get_user_addr(ii); 1380 size_t frag_size = fuse_get_frag_size(ii, *nbytesp); 1381 1382 if (write) 1383 ap->args.in_args[1].value = (void *) user_addr; 1384 else 1385 ap->args.out_args[0].value = (void *) user_addr; 1386 1387 iov_iter_advance(ii, frag_size); 1388 *nbytesp = frag_size; 1389 return 0; 1390 } 1391 1392 while (nbytes < *nbytesp && ap->num_pages < max_pages) { 1393 unsigned npages; 1394 size_t start; 1395 ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages], 1396 *nbytesp - nbytes, 1397 max_pages - ap->num_pages, 1398 &start); 1399 if (ret < 0) 1400 break; 1401 1402 iov_iter_advance(ii, ret); 1403 nbytes += ret; 1404 1405 ret += start; 1406 npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE; 1407 1408 ap->descs[ap->num_pages].offset = start; 1409 fuse_page_descs_length_init(ap->descs, ap->num_pages, npages); 1410 1411 ap->num_pages += npages; 1412 ap->descs[ap->num_pages - 1].length -= 1413 (PAGE_SIZE - ret) & (PAGE_SIZE - 1); 1414 } 1415 1416 if (write) 1417 ap->args.in_pages = true; 1418 else 1419 ap->args.out_pages = true; 1420 1421 *nbytesp = nbytes; 1422 1423 return ret < 0 ? ret : 0; 1424 } 1425 1426 ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, 1427 loff_t *ppos, int flags) 1428 { 1429 int write = flags & FUSE_DIO_WRITE; 1430 int cuse = flags & FUSE_DIO_CUSE; 1431 struct file *file = io->iocb->ki_filp; 1432 struct inode *inode = file->f_mapping->host; 1433 struct fuse_file *ff = file->private_data; 1434 struct fuse_conn *fc = ff->fm->fc; 1435 size_t nmax = write ? fc->max_write : fc->max_read; 1436 loff_t pos = *ppos; 1437 size_t count = iov_iter_count(iter); 1438 pgoff_t idx_from = pos >> PAGE_SHIFT; 1439 pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT; 1440 ssize_t res = 0; 1441 int err = 0; 1442 struct fuse_io_args *ia; 1443 unsigned int max_pages; 1444 1445 max_pages = iov_iter_npages(iter, fc->max_pages); 1446 ia = fuse_io_alloc(io, max_pages); 1447 if (!ia) 1448 return -ENOMEM; 1449 1450 ia->io = io; 1451 if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { 1452 if (!write) 1453 inode_lock(inode); 1454 fuse_sync_writes(inode); 1455 if (!write) 1456 inode_unlock(inode); 1457 } 1458 1459 io->should_dirty = !write && iter_is_iovec(iter); 1460 while (count) { 1461 ssize_t nres; 1462 fl_owner_t owner = current->files; 1463 size_t nbytes = min(count, nmax); 1464 1465 err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write, 1466 max_pages); 1467 if (err && !nbytes) 1468 break; 1469 1470 if (write) { 1471 if (!capable(CAP_FSETID)) 1472 ia->write.in.write_flags |= FUSE_WRITE_KILL_SUIDGID; 1473 1474 nres = fuse_send_write(ia, pos, nbytes, owner); 1475 } else { 1476 nres = fuse_send_read(ia, pos, nbytes, owner); 1477 } 1478 1479 if (!io->async || nres < 0) { 1480 fuse_release_user_pages(&ia->ap, io->should_dirty); 1481 fuse_io_free(ia); 1482 } 1483 ia = NULL; 1484 if (nres < 0) { 1485 iov_iter_revert(iter, nbytes); 1486 err = nres; 1487 break; 1488 } 1489 WARN_ON(nres > nbytes); 1490 1491 count -= nres; 1492 res += nres; 1493 pos += nres; 1494 if (nres != nbytes) { 1495 iov_iter_revert(iter, nbytes - nres); 1496 break; 1497 } 1498 if (count) { 1499 max_pages = iov_iter_npages(iter, fc->max_pages); 1500 ia = fuse_io_alloc(io, max_pages); 1501 if (!ia) 1502 break; 1503 } 1504 } 1505 if (ia) 1506 fuse_io_free(ia); 1507 if (res > 0) 1508 *ppos = pos; 1509 1510 return res > 0 ? res : err; 1511 } 1512 EXPORT_SYMBOL_GPL(fuse_direct_io); 1513 1514 static ssize_t __fuse_direct_read(struct fuse_io_priv *io, 1515 struct iov_iter *iter, 1516 loff_t *ppos) 1517 { 1518 ssize_t res; 1519 struct inode *inode = file_inode(io->iocb->ki_filp); 1520 1521 res = fuse_direct_io(io, iter, ppos, 0); 1522 1523 fuse_invalidate_atime(inode); 1524 1525 return res; 1526 } 1527 1528 static ssize_t fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter); 1529 1530 static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) 1531 { 1532 ssize_t res; 1533 1534 if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { 1535 res = fuse_direct_IO(iocb, to); 1536 } else { 1537 struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); 1538 1539 res = __fuse_direct_read(&io, to, &iocb->ki_pos); 1540 } 1541 1542 return res; 1543 } 1544 1545 static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) 1546 { 1547 struct inode *inode = file_inode(iocb->ki_filp); 1548 struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); 1549 ssize_t res; 1550 1551 /* Don't allow parallel writes to the same file */ 1552 inode_lock(inode); 1553 res = generic_write_checks(iocb, from); 1554 if (res > 0) { 1555 if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { 1556 res = fuse_direct_IO(iocb, from); 1557 } else { 1558 res = fuse_direct_io(&io, from, &iocb->ki_pos, 1559 FUSE_DIO_WRITE); 1560 } 1561 } 1562 fuse_invalidate_attr(inode); 1563 if (res > 0) 1564 fuse_write_update_size(inode, iocb->ki_pos); 1565 inode_unlock(inode); 1566 1567 return res; 1568 } 1569 1570 static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 1571 { 1572 struct file *file = iocb->ki_filp; 1573 struct fuse_file *ff = file->private_data; 1574 struct inode *inode = file_inode(file); 1575 1576 if (fuse_is_bad(inode)) 1577 return -EIO; 1578 1579 if (FUSE_IS_DAX(inode)) 1580 return fuse_dax_read_iter(iocb, to); 1581 1582 if (!(ff->open_flags & FOPEN_DIRECT_IO)) 1583 return fuse_cache_read_iter(iocb, to); 1584 else 1585 return fuse_direct_read_iter(iocb, to); 1586 } 1587 1588 static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 1589 { 1590 struct file *file = iocb->ki_filp; 1591 struct fuse_file *ff = file->private_data; 1592 struct inode *inode = file_inode(file); 1593 1594 if (fuse_is_bad(inode)) 1595 return -EIO; 1596 1597 if (FUSE_IS_DAX(inode)) 1598 return fuse_dax_write_iter(iocb, from); 1599 1600 if (!(ff->open_flags & FOPEN_DIRECT_IO)) 1601 return fuse_cache_write_iter(iocb, from); 1602 else 1603 return fuse_direct_write_iter(iocb, from); 1604 } 1605 1606 static void fuse_writepage_free(struct fuse_writepage_args *wpa) 1607 { 1608 struct fuse_args_pages *ap = &wpa->ia.ap; 1609 int i; 1610 1611 for (i = 0; i < ap->num_pages; i++) 1612 __free_page(ap->pages[i]); 1613 1614 if (wpa->ia.ff) 1615 fuse_file_put(wpa->ia.ff, false, false); 1616 1617 kfree(ap->pages); 1618 kfree(wpa); 1619 } 1620 1621 static void fuse_writepage_finish(struct fuse_mount *fm, 1622 struct fuse_writepage_args *wpa) 1623 { 1624 struct fuse_args_pages *ap = &wpa->ia.ap; 1625 struct inode *inode = wpa->inode; 1626 struct fuse_inode *fi = get_fuse_inode(inode); 1627 struct backing_dev_info *bdi = inode_to_bdi(inode); 1628 int i; 1629 1630 for (i = 0; i < ap->num_pages; i++) { 1631 dec_wb_stat(&bdi->wb, WB_WRITEBACK); 1632 dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP); 1633 wb_writeout_inc(&bdi->wb); 1634 } 1635 wake_up(&fi->page_waitq); 1636 } 1637 1638 /* Called under fi->lock, may release and reacquire it */ 1639 static void fuse_send_writepage(struct fuse_mount *fm, 1640 struct fuse_writepage_args *wpa, loff_t size) 1641 __releases(fi->lock) 1642 __acquires(fi->lock) 1643 { 1644 struct fuse_writepage_args *aux, *next; 1645 struct fuse_inode *fi = get_fuse_inode(wpa->inode); 1646 struct fuse_write_in *inarg = &wpa->ia.write.in; 1647 struct fuse_args *args = &wpa->ia.ap.args; 1648 __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE; 1649 int err; 1650 1651 fi->writectr++; 1652 if (inarg->offset + data_size <= size) { 1653 inarg->size = data_size; 1654 } else if (inarg->offset < size) { 1655 inarg->size = size - inarg->offset; 1656 } else { 1657 /* Got truncated off completely */ 1658 goto out_free; 1659 } 1660 1661 args->in_args[1].size = inarg->size; 1662 args->force = true; 1663 args->nocreds = true; 1664 1665 err = fuse_simple_background(fm, args, GFP_ATOMIC); 1666 if (err == -ENOMEM) { 1667 spin_unlock(&fi->lock); 1668 err = fuse_simple_background(fm, args, GFP_NOFS | __GFP_NOFAIL); 1669 spin_lock(&fi->lock); 1670 } 1671 1672 /* Fails on broken connection only */ 1673 if (unlikely(err)) 1674 goto out_free; 1675 1676 return; 1677 1678 out_free: 1679 fi->writectr--; 1680 rb_erase(&wpa->writepages_entry, &fi->writepages); 1681 fuse_writepage_finish(fm, wpa); 1682 spin_unlock(&fi->lock); 1683 1684 /* After fuse_writepage_finish() aux request list is private */ 1685 for (aux = wpa->next; aux; aux = next) { 1686 next = aux->next; 1687 aux->next = NULL; 1688 fuse_writepage_free(aux); 1689 } 1690 1691 fuse_writepage_free(wpa); 1692 spin_lock(&fi->lock); 1693 } 1694 1695 /* 1696 * If fi->writectr is positive (no truncate or fsync going on) send 1697 * all queued writepage requests. 1698 * 1699 * Called with fi->lock 1700 */ 1701 void fuse_flush_writepages(struct inode *inode) 1702 __releases(fi->lock) 1703 __acquires(fi->lock) 1704 { 1705 struct fuse_mount *fm = get_fuse_mount(inode); 1706 struct fuse_inode *fi = get_fuse_inode(inode); 1707 loff_t crop = i_size_read(inode); 1708 struct fuse_writepage_args *wpa; 1709 1710 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { 1711 wpa = list_entry(fi->queued_writes.next, 1712 struct fuse_writepage_args, queue_entry); 1713 list_del_init(&wpa->queue_entry); 1714 fuse_send_writepage(fm, wpa, crop); 1715 } 1716 } 1717 1718 static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root, 1719 struct fuse_writepage_args *wpa) 1720 { 1721 pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT; 1722 pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1; 1723 struct rb_node **p = &root->rb_node; 1724 struct rb_node *parent = NULL; 1725 1726 WARN_ON(!wpa->ia.ap.num_pages); 1727 while (*p) { 1728 struct fuse_writepage_args *curr; 1729 pgoff_t curr_index; 1730 1731 parent = *p; 1732 curr = rb_entry(parent, struct fuse_writepage_args, 1733 writepages_entry); 1734 WARN_ON(curr->inode != wpa->inode); 1735 curr_index = curr->ia.write.in.offset >> PAGE_SHIFT; 1736 1737 if (idx_from >= curr_index + curr->ia.ap.num_pages) 1738 p = &(*p)->rb_right; 1739 else if (idx_to < curr_index) 1740 p = &(*p)->rb_left; 1741 else 1742 return curr; 1743 } 1744 1745 rb_link_node(&wpa->writepages_entry, parent, p); 1746 rb_insert_color(&wpa->writepages_entry, root); 1747 return NULL; 1748 } 1749 1750 static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa) 1751 { 1752 WARN_ON(fuse_insert_writeback(root, wpa)); 1753 } 1754 1755 static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, 1756 int error) 1757 { 1758 struct fuse_writepage_args *wpa = 1759 container_of(args, typeof(*wpa), ia.ap.args); 1760 struct inode *inode = wpa->inode; 1761 struct fuse_inode *fi = get_fuse_inode(inode); 1762 1763 mapping_set_error(inode->i_mapping, error); 1764 spin_lock(&fi->lock); 1765 rb_erase(&wpa->writepages_entry, &fi->writepages); 1766 while (wpa->next) { 1767 struct fuse_mount *fm = get_fuse_mount(inode); 1768 struct fuse_write_in *inarg = &wpa->ia.write.in; 1769 struct fuse_writepage_args *next = wpa->next; 1770 1771 wpa->next = next->next; 1772 next->next = NULL; 1773 next->ia.ff = fuse_file_get(wpa->ia.ff); 1774 tree_insert(&fi->writepages, next); 1775 1776 /* 1777 * Skip fuse_flush_writepages() to make it easy to crop requests 1778 * based on primary request size. 1779 * 1780 * 1st case (trivial): there are no concurrent activities using 1781 * fuse_set/release_nowrite. Then we're on safe side because 1782 * fuse_flush_writepages() would call fuse_send_writepage() 1783 * anyway. 1784 * 1785 * 2nd case: someone called fuse_set_nowrite and it is waiting 1786 * now for completion of all in-flight requests. This happens 1787 * rarely and no more than once per page, so this should be 1788 * okay. 1789 * 1790 * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle 1791 * of fuse_set_nowrite..fuse_release_nowrite section. The fact 1792 * that fuse_set_nowrite returned implies that all in-flight 1793 * requests were completed along with all of their secondary 1794 * requests. Further primary requests are blocked by negative 1795 * writectr. Hence there cannot be any in-flight requests and 1796 * no invocations of fuse_writepage_end() while we're in 1797 * fuse_set_nowrite..fuse_release_nowrite section. 1798 */ 1799 fuse_send_writepage(fm, next, inarg->offset + inarg->size); 1800 } 1801 fi->writectr--; 1802 fuse_writepage_finish(fm, wpa); 1803 spin_unlock(&fi->lock); 1804 fuse_writepage_free(wpa); 1805 } 1806 1807 static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc, 1808 struct fuse_inode *fi) 1809 { 1810 struct fuse_file *ff = NULL; 1811 1812 spin_lock(&fi->lock); 1813 if (!list_empty(&fi->write_files)) { 1814 ff = list_entry(fi->write_files.next, struct fuse_file, 1815 write_entry); 1816 fuse_file_get(ff); 1817 } 1818 spin_unlock(&fi->lock); 1819 1820 return ff; 1821 } 1822 1823 static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, 1824 struct fuse_inode *fi) 1825 { 1826 struct fuse_file *ff = __fuse_write_file_get(fc, fi); 1827 WARN_ON(!ff); 1828 return ff; 1829 } 1830 1831 int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) 1832 { 1833 struct fuse_conn *fc = get_fuse_conn(inode); 1834 struct fuse_inode *fi = get_fuse_inode(inode); 1835 struct fuse_file *ff; 1836 int err; 1837 1838 ff = __fuse_write_file_get(fc, fi); 1839 err = fuse_flush_times(inode, ff); 1840 if (ff) 1841 fuse_file_put(ff, false, false); 1842 1843 return err; 1844 } 1845 1846 static struct fuse_writepage_args *fuse_writepage_args_alloc(void) 1847 { 1848 struct fuse_writepage_args *wpa; 1849 struct fuse_args_pages *ap; 1850 1851 wpa = kzalloc(sizeof(*wpa), GFP_NOFS); 1852 if (wpa) { 1853 ap = &wpa->ia.ap; 1854 ap->num_pages = 0; 1855 ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs); 1856 if (!ap->pages) { 1857 kfree(wpa); 1858 wpa = NULL; 1859 } 1860 } 1861 return wpa; 1862 1863 } 1864 1865 static int fuse_writepage_locked(struct page *page) 1866 { 1867 struct address_space *mapping = page->mapping; 1868 struct inode *inode = mapping->host; 1869 struct fuse_conn *fc = get_fuse_conn(inode); 1870 struct fuse_inode *fi = get_fuse_inode(inode); 1871 struct fuse_writepage_args *wpa; 1872 struct fuse_args_pages *ap; 1873 struct page *tmp_page; 1874 int error = -ENOMEM; 1875 1876 set_page_writeback(page); 1877 1878 wpa = fuse_writepage_args_alloc(); 1879 if (!wpa) 1880 goto err; 1881 ap = &wpa->ia.ap; 1882 1883 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 1884 if (!tmp_page) 1885 goto err_free; 1886 1887 error = -EIO; 1888 wpa->ia.ff = fuse_write_file_get(fc, fi); 1889 if (!wpa->ia.ff) 1890 goto err_nofile; 1891 1892 fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0); 1893 1894 copy_highpage(tmp_page, page); 1895 wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; 1896 wpa->next = NULL; 1897 ap->args.in_pages = true; 1898 ap->num_pages = 1; 1899 ap->pages[0] = tmp_page; 1900 ap->descs[0].offset = 0; 1901 ap->descs[0].length = PAGE_SIZE; 1902 ap->args.end = fuse_writepage_end; 1903 wpa->inode = inode; 1904 1905 inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); 1906 inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); 1907 1908 spin_lock(&fi->lock); 1909 tree_insert(&fi->writepages, wpa); 1910 list_add_tail(&wpa->queue_entry, &fi->queued_writes); 1911 fuse_flush_writepages(inode); 1912 spin_unlock(&fi->lock); 1913 1914 end_page_writeback(page); 1915 1916 return 0; 1917 1918 err_nofile: 1919 __free_page(tmp_page); 1920 err_free: 1921 kfree(wpa); 1922 err: 1923 mapping_set_error(page->mapping, error); 1924 end_page_writeback(page); 1925 return error; 1926 } 1927 1928 static int fuse_writepage(struct page *page, struct writeback_control *wbc) 1929 { 1930 int err; 1931 1932 if (fuse_page_is_writeback(page->mapping->host, page->index)) { 1933 /* 1934 * ->writepages() should be called for sync() and friends. We 1935 * should only get here on direct reclaim and then we are 1936 * allowed to skip a page which is already in flight 1937 */ 1938 WARN_ON(wbc->sync_mode == WB_SYNC_ALL); 1939 1940 redirty_page_for_writepage(wbc, page); 1941 unlock_page(page); 1942 return 0; 1943 } 1944 1945 err = fuse_writepage_locked(page); 1946 unlock_page(page); 1947 1948 return err; 1949 } 1950 1951 struct fuse_fill_wb_data { 1952 struct fuse_writepage_args *wpa; 1953 struct fuse_file *ff; 1954 struct inode *inode; 1955 struct page **orig_pages; 1956 unsigned int max_pages; 1957 }; 1958 1959 static bool fuse_pages_realloc(struct fuse_fill_wb_data *data) 1960 { 1961 struct fuse_args_pages *ap = &data->wpa->ia.ap; 1962 struct fuse_conn *fc = get_fuse_conn(data->inode); 1963 struct page **pages; 1964 struct fuse_page_desc *descs; 1965 unsigned int npages = min_t(unsigned int, 1966 max_t(unsigned int, data->max_pages * 2, 1967 FUSE_DEFAULT_MAX_PAGES_PER_REQ), 1968 fc->max_pages); 1969 WARN_ON(npages <= data->max_pages); 1970 1971 pages = fuse_pages_alloc(npages, GFP_NOFS, &descs); 1972 if (!pages) 1973 return false; 1974 1975 memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages); 1976 memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages); 1977 kfree(ap->pages); 1978 ap->pages = pages; 1979 ap->descs = descs; 1980 data->max_pages = npages; 1981 1982 return true; 1983 } 1984 1985 static void fuse_writepages_send(struct fuse_fill_wb_data *data) 1986 { 1987 struct fuse_writepage_args *wpa = data->wpa; 1988 struct inode *inode = data->inode; 1989 struct fuse_inode *fi = get_fuse_inode(inode); 1990 int num_pages = wpa->ia.ap.num_pages; 1991 int i; 1992 1993 wpa->ia.ff = fuse_file_get(data->ff); 1994 spin_lock(&fi->lock); 1995 list_add_tail(&wpa->queue_entry, &fi->queued_writes); 1996 fuse_flush_writepages(inode); 1997 spin_unlock(&fi->lock); 1998 1999 for (i = 0; i < num_pages; i++) 2000 end_page_writeback(data->orig_pages[i]); 2001 } 2002 2003 /* 2004 * Check under fi->lock if the page is under writeback, and insert it onto the 2005 * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's 2006 * one already added for a page at this offset. If there's none, then insert 2007 * this new request onto the auxiliary list, otherwise reuse the existing one by 2008 * swapping the new temp page with the old one. 2009 */ 2010 static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa, 2011 struct page *page) 2012 { 2013 struct fuse_inode *fi = get_fuse_inode(new_wpa->inode); 2014 struct fuse_writepage_args *tmp; 2015 struct fuse_writepage_args *old_wpa; 2016 struct fuse_args_pages *new_ap = &new_wpa->ia.ap; 2017 2018 WARN_ON(new_ap->num_pages != 0); 2019 new_ap->num_pages = 1; 2020 2021 spin_lock(&fi->lock); 2022 old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa); 2023 if (!old_wpa) { 2024 spin_unlock(&fi->lock); 2025 return true; 2026 } 2027 2028 for (tmp = old_wpa->next; tmp; tmp = tmp->next) { 2029 pgoff_t curr_index; 2030 2031 WARN_ON(tmp->inode != new_wpa->inode); 2032 curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT; 2033 if (curr_index == page->index) { 2034 WARN_ON(tmp->ia.ap.num_pages != 1); 2035 swap(tmp->ia.ap.pages[0], new_ap->pages[0]); 2036 break; 2037 } 2038 } 2039 2040 if (!tmp) { 2041 new_wpa->next = old_wpa->next; 2042 old_wpa->next = new_wpa; 2043 } 2044 2045 spin_unlock(&fi->lock); 2046 2047 if (tmp) { 2048 struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode); 2049 2050 dec_wb_stat(&bdi->wb, WB_WRITEBACK); 2051 dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP); 2052 wb_writeout_inc(&bdi->wb); 2053 fuse_writepage_free(new_wpa); 2054 } 2055 2056 return false; 2057 } 2058 2059 static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page, 2060 struct fuse_args_pages *ap, 2061 struct fuse_fill_wb_data *data) 2062 { 2063 WARN_ON(!ap->num_pages); 2064 2065 /* 2066 * Being under writeback is unlikely but possible. For example direct 2067 * read to an mmaped fuse file will set the page dirty twice; once when 2068 * the pages are faulted with get_user_pages(), and then after the read 2069 * completed. 2070 */ 2071 if (fuse_page_is_writeback(data->inode, page->index)) 2072 return true; 2073 2074 /* Reached max pages */ 2075 if (ap->num_pages == fc->max_pages) 2076 return true; 2077 2078 /* Reached max write bytes */ 2079 if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write) 2080 return true; 2081 2082 /* Discontinuity */ 2083 if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index) 2084 return true; 2085 2086 /* Need to grow the pages array? If so, did the expansion fail? */ 2087 if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data)) 2088 return true; 2089 2090 return false; 2091 } 2092 2093 static int fuse_writepages_fill(struct page *page, 2094 struct writeback_control *wbc, void *_data) 2095 { 2096 struct fuse_fill_wb_data *data = _data; 2097 struct fuse_writepage_args *wpa = data->wpa; 2098 struct fuse_args_pages *ap = &wpa->ia.ap; 2099 struct inode *inode = data->inode; 2100 struct fuse_inode *fi = get_fuse_inode(inode); 2101 struct fuse_conn *fc = get_fuse_conn(inode); 2102 struct page *tmp_page; 2103 int err; 2104 2105 if (!data->ff) { 2106 err = -EIO; 2107 data->ff = fuse_write_file_get(fc, fi); 2108 if (!data->ff) 2109 goto out_unlock; 2110 } 2111 2112 if (wpa && fuse_writepage_need_send(fc, page, ap, data)) { 2113 fuse_writepages_send(data); 2114 data->wpa = NULL; 2115 } 2116 2117 err = -ENOMEM; 2118 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 2119 if (!tmp_page) 2120 goto out_unlock; 2121 2122 /* 2123 * The page must not be redirtied until the writeout is completed 2124 * (i.e. userspace has sent a reply to the write request). Otherwise 2125 * there could be more than one temporary page instance for each real 2126 * page. 2127 * 2128 * This is ensured by holding the page lock in page_mkwrite() while 2129 * checking fuse_page_is_writeback(). We already hold the page lock 2130 * since clear_page_dirty_for_io() and keep it held until we add the 2131 * request to the fi->writepages list and increment ap->num_pages. 2132 * After this fuse_page_is_writeback() will indicate that the page is 2133 * under writeback, so we can release the page lock. 2134 */ 2135 if (data->wpa == NULL) { 2136 err = -ENOMEM; 2137 wpa = fuse_writepage_args_alloc(); 2138 if (!wpa) { 2139 __free_page(tmp_page); 2140 goto out_unlock; 2141 } 2142 data->max_pages = 1; 2143 2144 ap = &wpa->ia.ap; 2145 fuse_write_args_fill(&wpa->ia, data->ff, page_offset(page), 0); 2146 wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; 2147 wpa->next = NULL; 2148 ap->args.in_pages = true; 2149 ap->args.end = fuse_writepage_end; 2150 ap->num_pages = 0; 2151 wpa->inode = inode; 2152 } 2153 set_page_writeback(page); 2154 2155 copy_highpage(tmp_page, page); 2156 ap->pages[ap->num_pages] = tmp_page; 2157 ap->descs[ap->num_pages].offset = 0; 2158 ap->descs[ap->num_pages].length = PAGE_SIZE; 2159 data->orig_pages[ap->num_pages] = page; 2160 2161 inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); 2162 inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); 2163 2164 err = 0; 2165 if (data->wpa) { 2166 /* 2167 * Protected by fi->lock against concurrent access by 2168 * fuse_page_is_writeback(). 2169 */ 2170 spin_lock(&fi->lock); 2171 ap->num_pages++; 2172 spin_unlock(&fi->lock); 2173 } else if (fuse_writepage_add(wpa, page)) { 2174 data->wpa = wpa; 2175 } else { 2176 end_page_writeback(page); 2177 } 2178 out_unlock: 2179 unlock_page(page); 2180 2181 return err; 2182 } 2183 2184 static int fuse_writepages(struct address_space *mapping, 2185 struct writeback_control *wbc) 2186 { 2187 struct inode *inode = mapping->host; 2188 struct fuse_conn *fc = get_fuse_conn(inode); 2189 struct fuse_fill_wb_data data; 2190 int err; 2191 2192 err = -EIO; 2193 if (fuse_is_bad(inode)) 2194 goto out; 2195 2196 data.inode = inode; 2197 data.wpa = NULL; 2198 data.ff = NULL; 2199 2200 err = -ENOMEM; 2201 data.orig_pages = kcalloc(fc->max_pages, 2202 sizeof(struct page *), 2203 GFP_NOFS); 2204 if (!data.orig_pages) 2205 goto out; 2206 2207 err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); 2208 if (data.wpa) { 2209 WARN_ON(!data.wpa->ia.ap.num_pages); 2210 fuse_writepages_send(&data); 2211 } 2212 if (data.ff) 2213 fuse_file_put(data.ff, false, false); 2214 2215 kfree(data.orig_pages); 2216 out: 2217 return err; 2218 } 2219 2220 /* 2221 * It's worthy to make sure that space is reserved on disk for the write, 2222 * but how to implement it without killing performance need more thinking. 2223 */ 2224 static int fuse_write_begin(struct file *file, struct address_space *mapping, 2225 loff_t pos, unsigned len, unsigned flags, 2226 struct page **pagep, void **fsdata) 2227 { 2228 pgoff_t index = pos >> PAGE_SHIFT; 2229 struct fuse_conn *fc = get_fuse_conn(file_inode(file)); 2230 struct page *page; 2231 loff_t fsize; 2232 int err = -ENOMEM; 2233 2234 WARN_ON(!fc->writeback_cache); 2235 2236 page = grab_cache_page_write_begin(mapping, index, flags); 2237 if (!page) 2238 goto error; 2239 2240 fuse_wait_on_page_writeback(mapping->host, page->index); 2241 2242 if (PageUptodate(page) || len == PAGE_SIZE) 2243 goto success; 2244 /* 2245 * Check if the start this page comes after the end of file, in which 2246 * case the readpage can be optimized away. 2247 */ 2248 fsize = i_size_read(mapping->host); 2249 if (fsize <= (pos & PAGE_MASK)) { 2250 size_t off = pos & ~PAGE_MASK; 2251 if (off) 2252 zero_user_segment(page, 0, off); 2253 goto success; 2254 } 2255 err = fuse_do_readpage(file, page); 2256 if (err) 2257 goto cleanup; 2258 success: 2259 *pagep = page; 2260 return 0; 2261 2262 cleanup: 2263 unlock_page(page); 2264 put_page(page); 2265 error: 2266 return err; 2267 } 2268 2269 static int fuse_write_end(struct file *file, struct address_space *mapping, 2270 loff_t pos, unsigned len, unsigned copied, 2271 struct page *page, void *fsdata) 2272 { 2273 struct inode *inode = page->mapping->host; 2274 2275 /* Haven't copied anything? Skip zeroing, size extending, dirtying. */ 2276 if (!copied) 2277 goto unlock; 2278 2279 if (!PageUptodate(page)) { 2280 /* Zero any unwritten bytes at the end of the page */ 2281 size_t endoff = (pos + copied) & ~PAGE_MASK; 2282 if (endoff) 2283 zero_user_segment(page, endoff, PAGE_SIZE); 2284 SetPageUptodate(page); 2285 } 2286 2287 fuse_write_update_size(inode, pos + copied); 2288 set_page_dirty(page); 2289 2290 unlock: 2291 unlock_page(page); 2292 put_page(page); 2293 2294 return copied; 2295 } 2296 2297 static int fuse_launder_page(struct page *page) 2298 { 2299 int err = 0; 2300 if (clear_page_dirty_for_io(page)) { 2301 struct inode *inode = page->mapping->host; 2302 2303 /* Serialize with pending writeback for the same page */ 2304 fuse_wait_on_page_writeback(inode, page->index); 2305 err = fuse_writepage_locked(page); 2306 if (!err) 2307 fuse_wait_on_page_writeback(inode, page->index); 2308 } 2309 return err; 2310 } 2311 2312 /* 2313 * Write back dirty pages now, because there may not be any suitable 2314 * open files later 2315 */ 2316 static void fuse_vma_close(struct vm_area_struct *vma) 2317 { 2318 filemap_write_and_wait(vma->vm_file->f_mapping); 2319 } 2320 2321 /* 2322 * Wait for writeback against this page to complete before allowing it 2323 * to be marked dirty again, and hence written back again, possibly 2324 * before the previous writepage completed. 2325 * 2326 * Block here, instead of in ->writepage(), so that the userspace fs 2327 * can only block processes actually operating on the filesystem. 2328 * 2329 * Otherwise unprivileged userspace fs would be able to block 2330 * unrelated: 2331 * 2332 * - page migration 2333 * - sync(2) 2334 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER 2335 */ 2336 static vm_fault_t fuse_page_mkwrite(struct vm_fault *vmf) 2337 { 2338 struct page *page = vmf->page; 2339 struct inode *inode = file_inode(vmf->vma->vm_file); 2340 2341 file_update_time(vmf->vma->vm_file); 2342 lock_page(page); 2343 if (page->mapping != inode->i_mapping) { 2344 unlock_page(page); 2345 return VM_FAULT_NOPAGE; 2346 } 2347 2348 fuse_wait_on_page_writeback(inode, page->index); 2349 return VM_FAULT_LOCKED; 2350 } 2351 2352 static const struct vm_operations_struct fuse_file_vm_ops = { 2353 .close = fuse_vma_close, 2354 .fault = filemap_fault, 2355 .map_pages = filemap_map_pages, 2356 .page_mkwrite = fuse_page_mkwrite, 2357 }; 2358 2359 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 2360 { 2361 struct fuse_file *ff = file->private_data; 2362 2363 /* DAX mmap is superior to direct_io mmap */ 2364 if (FUSE_IS_DAX(file_inode(file))) 2365 return fuse_dax_mmap(file, vma); 2366 2367 if (ff->open_flags & FOPEN_DIRECT_IO) { 2368 /* Can't provide the coherency needed for MAP_SHARED */ 2369 if (vma->vm_flags & VM_MAYSHARE) 2370 return -ENODEV; 2371 2372 invalidate_inode_pages2(file->f_mapping); 2373 2374 return generic_file_mmap(file, vma); 2375 } 2376 2377 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) 2378 fuse_link_write_file(file); 2379 2380 file_accessed(file); 2381 vma->vm_ops = &fuse_file_vm_ops; 2382 return 0; 2383 } 2384 2385 static int convert_fuse_file_lock(struct fuse_conn *fc, 2386 const struct fuse_file_lock *ffl, 2387 struct file_lock *fl) 2388 { 2389 switch (ffl->type) { 2390 case F_UNLCK: 2391 break; 2392 2393 case F_RDLCK: 2394 case F_WRLCK: 2395 if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX || 2396 ffl->end < ffl->start) 2397 return -EIO; 2398 2399 fl->fl_start = ffl->start; 2400 fl->fl_end = ffl->end; 2401 2402 /* 2403 * Convert pid into init's pid namespace. The locks API will 2404 * translate it into the caller's pid namespace. 2405 */ 2406 rcu_read_lock(); 2407 fl->fl_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns); 2408 rcu_read_unlock(); 2409 break; 2410 2411 default: 2412 return -EIO; 2413 } 2414 fl->fl_type = ffl->type; 2415 return 0; 2416 } 2417 2418 static void fuse_lk_fill(struct fuse_args *args, struct file *file, 2419 const struct file_lock *fl, int opcode, pid_t pid, 2420 int flock, struct fuse_lk_in *inarg) 2421 { 2422 struct inode *inode = file_inode(file); 2423 struct fuse_conn *fc = get_fuse_conn(inode); 2424 struct fuse_file *ff = file->private_data; 2425 2426 memset(inarg, 0, sizeof(*inarg)); 2427 inarg->fh = ff->fh; 2428 inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner); 2429 inarg->lk.start = fl->fl_start; 2430 inarg->lk.end = fl->fl_end; 2431 inarg->lk.type = fl->fl_type; 2432 inarg->lk.pid = pid; 2433 if (flock) 2434 inarg->lk_flags |= FUSE_LK_FLOCK; 2435 args->opcode = opcode; 2436 args->nodeid = get_node_id(inode); 2437 args->in_numargs = 1; 2438 args->in_args[0].size = sizeof(*inarg); 2439 args->in_args[0].value = inarg; 2440 } 2441 2442 static int fuse_getlk(struct file *file, struct file_lock *fl) 2443 { 2444 struct inode *inode = file_inode(file); 2445 struct fuse_mount *fm = get_fuse_mount(inode); 2446 FUSE_ARGS(args); 2447 struct fuse_lk_in inarg; 2448 struct fuse_lk_out outarg; 2449 int err; 2450 2451 fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg); 2452 args.out_numargs = 1; 2453 args.out_args[0].size = sizeof(outarg); 2454 args.out_args[0].value = &outarg; 2455 err = fuse_simple_request(fm, &args); 2456 if (!err) 2457 err = convert_fuse_file_lock(fm->fc, &outarg.lk, fl); 2458 2459 return err; 2460 } 2461 2462 static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) 2463 { 2464 struct inode *inode = file_inode(file); 2465 struct fuse_mount *fm = get_fuse_mount(inode); 2466 FUSE_ARGS(args); 2467 struct fuse_lk_in inarg; 2468 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; 2469 struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL; 2470 pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns); 2471 int err; 2472 2473 if (fl->fl_lmops && fl->fl_lmops->lm_grant) { 2474 /* NLM needs asynchronous locks, which we don't support yet */ 2475 return -ENOLCK; 2476 } 2477 2478 /* Unlock on close is handled by the flush method */ 2479 if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX) 2480 return 0; 2481 2482 fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg); 2483 err = fuse_simple_request(fm, &args); 2484 2485 /* locking is restartable */ 2486 if (err == -EINTR) 2487 err = -ERESTARTSYS; 2488 2489 return err; 2490 } 2491 2492 static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) 2493 { 2494 struct inode *inode = file_inode(file); 2495 struct fuse_conn *fc = get_fuse_conn(inode); 2496 int err; 2497 2498 if (cmd == F_CANCELLK) { 2499 err = 0; 2500 } else if (cmd == F_GETLK) { 2501 if (fc->no_lock) { 2502 posix_test_lock(file, fl); 2503 err = 0; 2504 } else 2505 err = fuse_getlk(file, fl); 2506 } else { 2507 if (fc->no_lock) 2508 err = posix_lock_file(file, fl, NULL); 2509 else 2510 err = fuse_setlk(file, fl, 0); 2511 } 2512 return err; 2513 } 2514 2515 static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) 2516 { 2517 struct inode *inode = file_inode(file); 2518 struct fuse_conn *fc = get_fuse_conn(inode); 2519 int err; 2520 2521 if (fc->no_flock) { 2522 err = locks_lock_file_wait(file, fl); 2523 } else { 2524 struct fuse_file *ff = file->private_data; 2525 2526 /* emulate flock with POSIX locks */ 2527 ff->flock = true; 2528 err = fuse_setlk(file, fl, 1); 2529 } 2530 2531 return err; 2532 } 2533 2534 static sector_t fuse_bmap(struct address_space *mapping, sector_t block) 2535 { 2536 struct inode *inode = mapping->host; 2537 struct fuse_mount *fm = get_fuse_mount(inode); 2538 FUSE_ARGS(args); 2539 struct fuse_bmap_in inarg; 2540 struct fuse_bmap_out outarg; 2541 int err; 2542 2543 if (!inode->i_sb->s_bdev || fm->fc->no_bmap) 2544 return 0; 2545 2546 memset(&inarg, 0, sizeof(inarg)); 2547 inarg.block = block; 2548 inarg.blocksize = inode->i_sb->s_blocksize; 2549 args.opcode = FUSE_BMAP; 2550 args.nodeid = get_node_id(inode); 2551 args.in_numargs = 1; 2552 args.in_args[0].size = sizeof(inarg); 2553 args.in_args[0].value = &inarg; 2554 args.out_numargs = 1; 2555 args.out_args[0].size = sizeof(outarg); 2556 args.out_args[0].value = &outarg; 2557 err = fuse_simple_request(fm, &args); 2558 if (err == -ENOSYS) 2559 fm->fc->no_bmap = 1; 2560 2561 return err ? 0 : outarg.block; 2562 } 2563 2564 static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) 2565 { 2566 struct inode *inode = file->f_mapping->host; 2567 struct fuse_mount *fm = get_fuse_mount(inode); 2568 struct fuse_file *ff = file->private_data; 2569 FUSE_ARGS(args); 2570 struct fuse_lseek_in inarg = { 2571 .fh = ff->fh, 2572 .offset = offset, 2573 .whence = whence 2574 }; 2575 struct fuse_lseek_out outarg; 2576 int err; 2577 2578 if (fm->fc->no_lseek) 2579 goto fallback; 2580 2581 args.opcode = FUSE_LSEEK; 2582 args.nodeid = ff->nodeid; 2583 args.in_numargs = 1; 2584 args.in_args[0].size = sizeof(inarg); 2585 args.in_args[0].value = &inarg; 2586 args.out_numargs = 1; 2587 args.out_args[0].size = sizeof(outarg); 2588 args.out_args[0].value = &outarg; 2589 err = fuse_simple_request(fm, &args); 2590 if (err) { 2591 if (err == -ENOSYS) { 2592 fm->fc->no_lseek = 1; 2593 goto fallback; 2594 } 2595 return err; 2596 } 2597 2598 return vfs_setpos(file, outarg.offset, inode->i_sb->s_maxbytes); 2599 2600 fallback: 2601 err = fuse_update_attributes(inode, file); 2602 if (!err) 2603 return generic_file_llseek(file, offset, whence); 2604 else 2605 return err; 2606 } 2607 2608 static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) 2609 { 2610 loff_t retval; 2611 struct inode *inode = file_inode(file); 2612 2613 switch (whence) { 2614 case SEEK_SET: 2615 case SEEK_CUR: 2616 /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */ 2617 retval = generic_file_llseek(file, offset, whence); 2618 break; 2619 case SEEK_END: 2620 inode_lock(inode); 2621 retval = fuse_update_attributes(inode, file); 2622 if (!retval) 2623 retval = generic_file_llseek(file, offset, whence); 2624 inode_unlock(inode); 2625 break; 2626 case SEEK_HOLE: 2627 case SEEK_DATA: 2628 inode_lock(inode); 2629 retval = fuse_lseek(file, offset, whence); 2630 inode_unlock(inode); 2631 break; 2632 default: 2633 retval = -EINVAL; 2634 } 2635 2636 return retval; 2637 } 2638 2639 /* 2640 * CUSE servers compiled on 32bit broke on 64bit kernels because the 2641 * ABI was defined to be 'struct iovec' which is different on 32bit 2642 * and 64bit. Fortunately we can determine which structure the server 2643 * used from the size of the reply. 2644 */ 2645 static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, 2646 size_t transferred, unsigned count, 2647 bool is_compat) 2648 { 2649 #ifdef CONFIG_COMPAT 2650 if (count * sizeof(struct compat_iovec) == transferred) { 2651 struct compat_iovec *ciov = src; 2652 unsigned i; 2653 2654 /* 2655 * With this interface a 32bit server cannot support 2656 * non-compat (i.e. ones coming from 64bit apps) ioctl 2657 * requests 2658 */ 2659 if (!is_compat) 2660 return -EINVAL; 2661 2662 for (i = 0; i < count; i++) { 2663 dst[i].iov_base = compat_ptr(ciov[i].iov_base); 2664 dst[i].iov_len = ciov[i].iov_len; 2665 } 2666 return 0; 2667 } 2668 #endif 2669 2670 if (count * sizeof(struct iovec) != transferred) 2671 return -EIO; 2672 2673 memcpy(dst, src, transferred); 2674 return 0; 2675 } 2676 2677 /* Make sure iov_length() won't overflow */ 2678 static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov, 2679 size_t count) 2680 { 2681 size_t n; 2682 u32 max = fc->max_pages << PAGE_SHIFT; 2683 2684 for (n = 0; n < count; n++, iov++) { 2685 if (iov->iov_len > (size_t) max) 2686 return -ENOMEM; 2687 max -= iov->iov_len; 2688 } 2689 return 0; 2690 } 2691 2692 static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst, 2693 void *src, size_t transferred, unsigned count, 2694 bool is_compat) 2695 { 2696 unsigned i; 2697 struct fuse_ioctl_iovec *fiov = src; 2698 2699 if (fc->minor < 16) { 2700 return fuse_copy_ioctl_iovec_old(dst, src, transferred, 2701 count, is_compat); 2702 } 2703 2704 if (count * sizeof(struct fuse_ioctl_iovec) != transferred) 2705 return -EIO; 2706 2707 for (i = 0; i < count; i++) { 2708 /* Did the server supply an inappropriate value? */ 2709 if (fiov[i].base != (unsigned long) fiov[i].base || 2710 fiov[i].len != (unsigned long) fiov[i].len) 2711 return -EIO; 2712 2713 dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base; 2714 dst[i].iov_len = (size_t) fiov[i].len; 2715 2716 #ifdef CONFIG_COMPAT 2717 if (is_compat && 2718 (ptr_to_compat(dst[i].iov_base) != fiov[i].base || 2719 (compat_size_t) dst[i].iov_len != fiov[i].len)) 2720 return -EIO; 2721 #endif 2722 } 2723 2724 return 0; 2725 } 2726 2727 2728 /* 2729 * For ioctls, there is no generic way to determine how much memory 2730 * needs to be read and/or written. Furthermore, ioctls are allowed 2731 * to dereference the passed pointer, so the parameter requires deep 2732 * copying but FUSE has no idea whatsoever about what to copy in or 2733 * out. 2734 * 2735 * This is solved by allowing FUSE server to retry ioctl with 2736 * necessary in/out iovecs. Let's assume the ioctl implementation 2737 * needs to read in the following structure. 2738 * 2739 * struct a { 2740 * char *buf; 2741 * size_t buflen; 2742 * } 2743 * 2744 * On the first callout to FUSE server, inarg->in_size and 2745 * inarg->out_size will be NULL; then, the server completes the ioctl 2746 * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and 2747 * the actual iov array to 2748 * 2749 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } } 2750 * 2751 * which tells FUSE to copy in the requested area and retry the ioctl. 2752 * On the second round, the server has access to the structure and 2753 * from that it can tell what to look for next, so on the invocation, 2754 * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to 2755 * 2756 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) }, 2757 * { .iov_base = a.buf, .iov_len = a.buflen } } 2758 * 2759 * FUSE will copy both struct a and the pointed buffer from the 2760 * process doing the ioctl and retry ioctl with both struct a and the 2761 * buffer. 2762 * 2763 * This time, FUSE server has everything it needs and completes ioctl 2764 * without FUSE_IOCTL_RETRY which finishes the ioctl call. 2765 * 2766 * Copying data out works the same way. 2767 * 2768 * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel 2769 * automatically initializes in and out iovs by decoding @cmd with 2770 * _IOC_* macros and the server is not allowed to request RETRY. This 2771 * limits ioctl data transfers to well-formed ioctls and is the forced 2772 * behavior for all FUSE servers. 2773 */ 2774 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 2775 unsigned int flags) 2776 { 2777 struct fuse_file *ff = file->private_data; 2778 struct fuse_mount *fm = ff->fm; 2779 struct fuse_ioctl_in inarg = { 2780 .fh = ff->fh, 2781 .cmd = cmd, 2782 .arg = arg, 2783 .flags = flags 2784 }; 2785 struct fuse_ioctl_out outarg; 2786 struct iovec *iov_page = NULL; 2787 struct iovec *in_iov = NULL, *out_iov = NULL; 2788 unsigned int in_iovs = 0, out_iovs = 0, max_pages; 2789 size_t in_size, out_size, c; 2790 ssize_t transferred; 2791 int err, i; 2792 struct iov_iter ii; 2793 struct fuse_args_pages ap = {}; 2794 2795 #if BITS_PER_LONG == 32 2796 inarg.flags |= FUSE_IOCTL_32BIT; 2797 #else 2798 if (flags & FUSE_IOCTL_COMPAT) { 2799 inarg.flags |= FUSE_IOCTL_32BIT; 2800 #ifdef CONFIG_X86_X32 2801 if (in_x32_syscall()) 2802 inarg.flags |= FUSE_IOCTL_COMPAT_X32; 2803 #endif 2804 } 2805 #endif 2806 2807 /* assume all the iovs returned by client always fits in a page */ 2808 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 2809 2810 err = -ENOMEM; 2811 ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs); 2812 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); 2813 if (!ap.pages || !iov_page) 2814 goto out; 2815 2816 fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages); 2817 2818 /* 2819 * If restricted, initialize IO parameters as encoded in @cmd. 2820 * RETRY from server is not allowed. 2821 */ 2822 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { 2823 struct iovec *iov = iov_page; 2824 2825 iov->iov_base = (void __user *)arg; 2826 2827 switch (cmd) { 2828 case FS_IOC_GETFLAGS: 2829 case FS_IOC_SETFLAGS: 2830 iov->iov_len = sizeof(int); 2831 break; 2832 default: 2833 iov->iov_len = _IOC_SIZE(cmd); 2834 break; 2835 } 2836 2837 if (_IOC_DIR(cmd) & _IOC_WRITE) { 2838 in_iov = iov; 2839 in_iovs = 1; 2840 } 2841 2842 if (_IOC_DIR(cmd) & _IOC_READ) { 2843 out_iov = iov; 2844 out_iovs = 1; 2845 } 2846 } 2847 2848 retry: 2849 inarg.in_size = in_size = iov_length(in_iov, in_iovs); 2850 inarg.out_size = out_size = iov_length(out_iov, out_iovs); 2851 2852 /* 2853 * Out data can be used either for actual out data or iovs, 2854 * make sure there always is at least one page. 2855 */ 2856 out_size = max_t(size_t, out_size, PAGE_SIZE); 2857 max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE); 2858 2859 /* make sure there are enough buffer pages and init request with them */ 2860 err = -ENOMEM; 2861 if (max_pages > fm->fc->max_pages) 2862 goto out; 2863 while (ap.num_pages < max_pages) { 2864 ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 2865 if (!ap.pages[ap.num_pages]) 2866 goto out; 2867 ap.num_pages++; 2868 } 2869 2870 2871 /* okay, let's send it to the client */ 2872 ap.args.opcode = FUSE_IOCTL; 2873 ap.args.nodeid = ff->nodeid; 2874 ap.args.in_numargs = 1; 2875 ap.args.in_args[0].size = sizeof(inarg); 2876 ap.args.in_args[0].value = &inarg; 2877 if (in_size) { 2878 ap.args.in_numargs++; 2879 ap.args.in_args[1].size = in_size; 2880 ap.args.in_pages = true; 2881 2882 err = -EFAULT; 2883 iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size); 2884 for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { 2885 c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii); 2886 if (c != PAGE_SIZE && iov_iter_count(&ii)) 2887 goto out; 2888 } 2889 } 2890 2891 ap.args.out_numargs = 2; 2892 ap.args.out_args[0].size = sizeof(outarg); 2893 ap.args.out_args[0].value = &outarg; 2894 ap.args.out_args[1].size = out_size; 2895 ap.args.out_pages = true; 2896 ap.args.out_argvar = true; 2897 2898 transferred = fuse_simple_request(fm, &ap.args); 2899 err = transferred; 2900 if (transferred < 0) 2901 goto out; 2902 2903 /* did it ask for retry? */ 2904 if (outarg.flags & FUSE_IOCTL_RETRY) { 2905 void *vaddr; 2906 2907 /* no retry if in restricted mode */ 2908 err = -EIO; 2909 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) 2910 goto out; 2911 2912 in_iovs = outarg.in_iovs; 2913 out_iovs = outarg.out_iovs; 2914 2915 /* 2916 * Make sure things are in boundary, separate checks 2917 * are to protect against overflow. 2918 */ 2919 err = -ENOMEM; 2920 if (in_iovs > FUSE_IOCTL_MAX_IOV || 2921 out_iovs > FUSE_IOCTL_MAX_IOV || 2922 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) 2923 goto out; 2924 2925 vaddr = kmap_atomic(ap.pages[0]); 2926 err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, 2927 transferred, in_iovs + out_iovs, 2928 (flags & FUSE_IOCTL_COMPAT) != 0); 2929 kunmap_atomic(vaddr); 2930 if (err) 2931 goto out; 2932 2933 in_iov = iov_page; 2934 out_iov = in_iov + in_iovs; 2935 2936 err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs); 2937 if (err) 2938 goto out; 2939 2940 err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs); 2941 if (err) 2942 goto out; 2943 2944 goto retry; 2945 } 2946 2947 err = -EIO; 2948 if (transferred > inarg.out_size) 2949 goto out; 2950 2951 err = -EFAULT; 2952 iov_iter_init(&ii, READ, out_iov, out_iovs, transferred); 2953 for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { 2954 c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii); 2955 if (c != PAGE_SIZE && iov_iter_count(&ii)) 2956 goto out; 2957 } 2958 err = 0; 2959 out: 2960 free_page((unsigned long) iov_page); 2961 while (ap.num_pages) 2962 __free_page(ap.pages[--ap.num_pages]); 2963 kfree(ap.pages); 2964 2965 return err ? err : outarg.result; 2966 } 2967 EXPORT_SYMBOL_GPL(fuse_do_ioctl); 2968 2969 long fuse_ioctl_common(struct file *file, unsigned int cmd, 2970 unsigned long arg, unsigned int flags) 2971 { 2972 struct inode *inode = file_inode(file); 2973 struct fuse_conn *fc = get_fuse_conn(inode); 2974 2975 if (!fuse_allow_current_process(fc)) 2976 return -EACCES; 2977 2978 if (fuse_is_bad(inode)) 2979 return -EIO; 2980 2981 return fuse_do_ioctl(file, cmd, arg, flags); 2982 } 2983 2984 static long fuse_file_ioctl(struct file *file, unsigned int cmd, 2985 unsigned long arg) 2986 { 2987 return fuse_ioctl_common(file, cmd, arg, 0); 2988 } 2989 2990 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, 2991 unsigned long arg) 2992 { 2993 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); 2994 } 2995 2996 /* 2997 * All files which have been polled are linked to RB tree 2998 * fuse_conn->polled_files which is indexed by kh. Walk the tree and 2999 * find the matching one. 3000 */ 3001 static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh, 3002 struct rb_node **parent_out) 3003 { 3004 struct rb_node **link = &fc->polled_files.rb_node; 3005 struct rb_node *last = NULL; 3006 3007 while (*link) { 3008 struct fuse_file *ff; 3009 3010 last = *link; 3011 ff = rb_entry(last, struct fuse_file, polled_node); 3012 3013 if (kh < ff->kh) 3014 link = &last->rb_left; 3015 else if (kh > ff->kh) 3016 link = &last->rb_right; 3017 else 3018 return link; 3019 } 3020 3021 if (parent_out) 3022 *parent_out = last; 3023 return link; 3024 } 3025 3026 /* 3027 * The file is about to be polled. Make sure it's on the polled_files 3028 * RB tree. Note that files once added to the polled_files tree are 3029 * not removed before the file is released. This is because a file 3030 * polled once is likely to be polled again. 3031 */ 3032 static void fuse_register_polled_file(struct fuse_conn *fc, 3033 struct fuse_file *ff) 3034 { 3035 spin_lock(&fc->lock); 3036 if (RB_EMPTY_NODE(&ff->polled_node)) { 3037 struct rb_node **link, *parent; 3038 3039 link = fuse_find_polled_node(fc, ff->kh, &parent); 3040 BUG_ON(*link); 3041 rb_link_node(&ff->polled_node, parent, link); 3042 rb_insert_color(&ff->polled_node, &fc->polled_files); 3043 } 3044 spin_unlock(&fc->lock); 3045 } 3046 3047 __poll_t fuse_file_poll(struct file *file, poll_table *wait) 3048 { 3049 struct fuse_file *ff = file->private_data; 3050 struct fuse_mount *fm = ff->fm; 3051 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; 3052 struct fuse_poll_out outarg; 3053 FUSE_ARGS(args); 3054 int err; 3055 3056 if (fm->fc->no_poll) 3057 return DEFAULT_POLLMASK; 3058 3059 poll_wait(file, &ff->poll_wait, wait); 3060 inarg.events = mangle_poll(poll_requested_events(wait)); 3061 3062 /* 3063 * Ask for notification iff there's someone waiting for it. 3064 * The client may ignore the flag and always notify. 3065 */ 3066 if (waitqueue_active(&ff->poll_wait)) { 3067 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; 3068 fuse_register_polled_file(fm->fc, ff); 3069 } 3070 3071 args.opcode = FUSE_POLL; 3072 args.nodeid = ff->nodeid; 3073 args.in_numargs = 1; 3074 args.in_args[0].size = sizeof(inarg); 3075 args.in_args[0].value = &inarg; 3076 args.out_numargs = 1; 3077 args.out_args[0].size = sizeof(outarg); 3078 args.out_args[0].value = &outarg; 3079 err = fuse_simple_request(fm, &args); 3080 3081 if (!err) 3082 return demangle_poll(outarg.revents); 3083 if (err == -ENOSYS) { 3084 fm->fc->no_poll = 1; 3085 return DEFAULT_POLLMASK; 3086 } 3087 return EPOLLERR; 3088 } 3089 EXPORT_SYMBOL_GPL(fuse_file_poll); 3090 3091 /* 3092 * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and 3093 * wakes up the poll waiters. 3094 */ 3095 int fuse_notify_poll_wakeup(struct fuse_conn *fc, 3096 struct fuse_notify_poll_wakeup_out *outarg) 3097 { 3098 u64 kh = outarg->kh; 3099 struct rb_node **link; 3100 3101 spin_lock(&fc->lock); 3102 3103 link = fuse_find_polled_node(fc, kh, NULL); 3104 if (*link) { 3105 struct fuse_file *ff; 3106 3107 ff = rb_entry(*link, struct fuse_file, polled_node); 3108 wake_up_interruptible_sync(&ff->poll_wait); 3109 } 3110 3111 spin_unlock(&fc->lock); 3112 return 0; 3113 } 3114 3115 static void fuse_do_truncate(struct file *file) 3116 { 3117 struct inode *inode = file->f_mapping->host; 3118 struct iattr attr; 3119 3120 attr.ia_valid = ATTR_SIZE; 3121 attr.ia_size = i_size_read(inode); 3122 3123 attr.ia_file = file; 3124 attr.ia_valid |= ATTR_FILE; 3125 3126 fuse_do_setattr(file_dentry(file), &attr, file); 3127 } 3128 3129 static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off) 3130 { 3131 return round_up(off, fc->max_pages << PAGE_SHIFT); 3132 } 3133 3134 static ssize_t 3135 fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 3136 { 3137 DECLARE_COMPLETION_ONSTACK(wait); 3138 ssize_t ret = 0; 3139 struct file *file = iocb->ki_filp; 3140 struct fuse_file *ff = file->private_data; 3141 loff_t pos = 0; 3142 struct inode *inode; 3143 loff_t i_size; 3144 size_t count = iov_iter_count(iter), shortened = 0; 3145 loff_t offset = iocb->ki_pos; 3146 struct fuse_io_priv *io; 3147 3148 pos = offset; 3149 inode = file->f_mapping->host; 3150 i_size = i_size_read(inode); 3151 3152 if ((iov_iter_rw(iter) == READ) && (offset >= i_size)) 3153 return 0; 3154 3155 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); 3156 if (!io) 3157 return -ENOMEM; 3158 spin_lock_init(&io->lock); 3159 kref_init(&io->refcnt); 3160 io->reqs = 1; 3161 io->bytes = -1; 3162 io->size = 0; 3163 io->offset = offset; 3164 io->write = (iov_iter_rw(iter) == WRITE); 3165 io->err = 0; 3166 /* 3167 * By default, we want to optimize all I/Os with async request 3168 * submission to the client filesystem if supported. 3169 */ 3170 io->async = ff->fm->fc->async_dio; 3171 io->iocb = iocb; 3172 io->blocking = is_sync_kiocb(iocb); 3173 3174 /* optimization for short read */ 3175 if (io->async && !io->write && offset + count > i_size) { 3176 iov_iter_truncate(iter, fuse_round_up(ff->fm->fc, i_size - offset)); 3177 shortened = count - iov_iter_count(iter); 3178 count -= shortened; 3179 } 3180 3181 /* 3182 * We cannot asynchronously extend the size of a file. 3183 * In such case the aio will behave exactly like sync io. 3184 */ 3185 if ((offset + count > i_size) && io->write) 3186 io->blocking = true; 3187 3188 if (io->async && io->blocking) { 3189 /* 3190 * Additional reference to keep io around after 3191 * calling fuse_aio_complete() 3192 */ 3193 kref_get(&io->refcnt); 3194 io->done = &wait; 3195 } 3196 3197 if (iov_iter_rw(iter) == WRITE) { 3198 ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); 3199 fuse_invalidate_attr(inode); 3200 } else { 3201 ret = __fuse_direct_read(io, iter, &pos); 3202 } 3203 iov_iter_reexpand(iter, iov_iter_count(iter) + shortened); 3204 3205 if (io->async) { 3206 bool blocking = io->blocking; 3207 3208 fuse_aio_complete(io, ret < 0 ? ret : 0, -1); 3209 3210 /* we have a non-extending, async request, so return */ 3211 if (!blocking) 3212 return -EIOCBQUEUED; 3213 3214 wait_for_completion(&wait); 3215 ret = fuse_get_res_by_io(io); 3216 } 3217 3218 kref_put(&io->refcnt, fuse_io_release); 3219 3220 if (iov_iter_rw(iter) == WRITE) { 3221 if (ret > 0) 3222 fuse_write_update_size(inode, pos); 3223 else if (ret < 0 && offset + count > i_size) 3224 fuse_do_truncate(file); 3225 } 3226 3227 return ret; 3228 } 3229 3230 static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end) 3231 { 3232 int err = filemap_write_and_wait_range(inode->i_mapping, start, end); 3233 3234 if (!err) 3235 fuse_sync_writes(inode); 3236 3237 return err; 3238 } 3239 3240 static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, 3241 loff_t length) 3242 { 3243 struct fuse_file *ff = file->private_data; 3244 struct inode *inode = file_inode(file); 3245 struct fuse_inode *fi = get_fuse_inode(inode); 3246 struct fuse_mount *fm = ff->fm; 3247 FUSE_ARGS(args); 3248 struct fuse_fallocate_in inarg = { 3249 .fh = ff->fh, 3250 .offset = offset, 3251 .length = length, 3252 .mode = mode 3253 }; 3254 int err; 3255 bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || 3256 (mode & FALLOC_FL_PUNCH_HOLE); 3257 3258 bool block_faults = FUSE_IS_DAX(inode) && lock_inode; 3259 3260 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 3261 return -EOPNOTSUPP; 3262 3263 if (fm->fc->no_fallocate) 3264 return -EOPNOTSUPP; 3265 3266 if (lock_inode) { 3267 inode_lock(inode); 3268 if (block_faults) { 3269 down_write(&fi->i_mmap_sem); 3270 err = fuse_dax_break_layouts(inode, 0, 0); 3271 if (err) 3272 goto out; 3273 } 3274 3275 if (mode & FALLOC_FL_PUNCH_HOLE) { 3276 loff_t endbyte = offset + length - 1; 3277 3278 err = fuse_writeback_range(inode, offset, endbyte); 3279 if (err) 3280 goto out; 3281 } 3282 } 3283 3284 if (!(mode & FALLOC_FL_KEEP_SIZE) && 3285 offset + length > i_size_read(inode)) { 3286 err = inode_newsize_ok(inode, offset + length); 3287 if (err) 3288 goto out; 3289 } 3290 3291 if (!(mode & FALLOC_FL_KEEP_SIZE)) 3292 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 3293 3294 args.opcode = FUSE_FALLOCATE; 3295 args.nodeid = ff->nodeid; 3296 args.in_numargs = 1; 3297 args.in_args[0].size = sizeof(inarg); 3298 args.in_args[0].value = &inarg; 3299 err = fuse_simple_request(fm, &args); 3300 if (err == -ENOSYS) { 3301 fm->fc->no_fallocate = 1; 3302 err = -EOPNOTSUPP; 3303 } 3304 if (err) 3305 goto out; 3306 3307 /* we could have extended the file */ 3308 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 3309 bool changed = fuse_write_update_size(inode, offset + length); 3310 3311 if (changed && fm->fc->writeback_cache) 3312 file_update_time(file); 3313 } 3314 3315 if (mode & FALLOC_FL_PUNCH_HOLE) 3316 truncate_pagecache_range(inode, offset, offset + length - 1); 3317 3318 fuse_invalidate_attr(inode); 3319 3320 out: 3321 if (!(mode & FALLOC_FL_KEEP_SIZE)) 3322 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 3323 3324 if (block_faults) 3325 up_write(&fi->i_mmap_sem); 3326 3327 if (lock_inode) 3328 inode_unlock(inode); 3329 3330 return err; 3331 } 3332 3333 static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, 3334 struct file *file_out, loff_t pos_out, 3335 size_t len, unsigned int flags) 3336 { 3337 struct fuse_file *ff_in = file_in->private_data; 3338 struct fuse_file *ff_out = file_out->private_data; 3339 struct inode *inode_in = file_inode(file_in); 3340 struct inode *inode_out = file_inode(file_out); 3341 struct fuse_inode *fi_out = get_fuse_inode(inode_out); 3342 struct fuse_mount *fm = ff_in->fm; 3343 struct fuse_conn *fc = fm->fc; 3344 FUSE_ARGS(args); 3345 struct fuse_copy_file_range_in inarg = { 3346 .fh_in = ff_in->fh, 3347 .off_in = pos_in, 3348 .nodeid_out = ff_out->nodeid, 3349 .fh_out = ff_out->fh, 3350 .off_out = pos_out, 3351 .len = len, 3352 .flags = flags 3353 }; 3354 struct fuse_write_out outarg; 3355 ssize_t err; 3356 /* mark unstable when write-back is not used, and file_out gets 3357 * extended */ 3358 bool is_unstable = (!fc->writeback_cache) && 3359 ((pos_out + len) > inode_out->i_size); 3360 3361 if (fc->no_copy_file_range) 3362 return -EOPNOTSUPP; 3363 3364 if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) 3365 return -EXDEV; 3366 3367 inode_lock(inode_in); 3368 err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1); 3369 inode_unlock(inode_in); 3370 if (err) 3371 return err; 3372 3373 inode_lock(inode_out); 3374 3375 err = file_modified(file_out); 3376 if (err) 3377 goto out; 3378 3379 /* 3380 * Write out dirty pages in the destination file before sending the COPY 3381 * request to userspace. After the request is completed, truncate off 3382 * pages (including partial ones) from the cache that have been copied, 3383 * since these contain stale data at that point. 3384 * 3385 * This should be mostly correct, but if the COPY writes to partial 3386 * pages (at the start or end) and the parts not covered by the COPY are 3387 * written through a memory map after calling fuse_writeback_range(), 3388 * then these partial page modifications will be lost on truncation. 3389 * 3390 * It is unlikely that someone would rely on such mixed style 3391 * modifications. Yet this does give less guarantees than if the 3392 * copying was performed with write(2). 3393 * 3394 * To fix this a i_mmap_sem style lock could be used to prevent new 3395 * faults while the copy is ongoing. 3396 */ 3397 err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1); 3398 if (err) 3399 goto out; 3400 3401 if (is_unstable) 3402 set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); 3403 3404 args.opcode = FUSE_COPY_FILE_RANGE; 3405 args.nodeid = ff_in->nodeid; 3406 args.in_numargs = 1; 3407 args.in_args[0].size = sizeof(inarg); 3408 args.in_args[0].value = &inarg; 3409 args.out_numargs = 1; 3410 args.out_args[0].size = sizeof(outarg); 3411 args.out_args[0].value = &outarg; 3412 err = fuse_simple_request(fm, &args); 3413 if (err == -ENOSYS) { 3414 fc->no_copy_file_range = 1; 3415 err = -EOPNOTSUPP; 3416 } 3417 if (err) 3418 goto out; 3419 3420 truncate_inode_pages_range(inode_out->i_mapping, 3421 ALIGN_DOWN(pos_out, PAGE_SIZE), 3422 ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); 3423 3424 if (fc->writeback_cache) { 3425 fuse_write_update_size(inode_out, pos_out + outarg.size); 3426 file_update_time(file_out); 3427 } 3428 3429 fuse_invalidate_attr(inode_out); 3430 3431 err = outarg.size; 3432 out: 3433 if (is_unstable) 3434 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); 3435 3436 inode_unlock(inode_out); 3437 file_accessed(file_in); 3438 3439 return err; 3440 } 3441 3442 static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, 3443 struct file *dst_file, loff_t dst_off, 3444 size_t len, unsigned int flags) 3445 { 3446 ssize_t ret; 3447 3448 ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, 3449 len, flags); 3450 3451 if (ret == -EOPNOTSUPP || ret == -EXDEV) 3452 ret = generic_copy_file_range(src_file, src_off, dst_file, 3453 dst_off, len, flags); 3454 return ret; 3455 } 3456 3457 static const struct file_operations fuse_file_operations = { 3458 .llseek = fuse_file_llseek, 3459 .read_iter = fuse_file_read_iter, 3460 .write_iter = fuse_file_write_iter, 3461 .mmap = fuse_file_mmap, 3462 .open = fuse_open, 3463 .flush = fuse_flush, 3464 .release = fuse_release, 3465 .fsync = fuse_fsync, 3466 .lock = fuse_file_lock, 3467 .get_unmapped_area = thp_get_unmapped_area, 3468 .flock = fuse_file_flock, 3469 .splice_read = generic_file_splice_read, 3470 .splice_write = iter_file_splice_write, 3471 .unlocked_ioctl = fuse_file_ioctl, 3472 .compat_ioctl = fuse_file_compat_ioctl, 3473 .poll = fuse_file_poll, 3474 .fallocate = fuse_file_fallocate, 3475 .copy_file_range = fuse_copy_file_range, 3476 }; 3477 3478 static const struct address_space_operations fuse_file_aops = { 3479 .readpage = fuse_readpage, 3480 .readahead = fuse_readahead, 3481 .writepage = fuse_writepage, 3482 .writepages = fuse_writepages, 3483 .launder_page = fuse_launder_page, 3484 .set_page_dirty = __set_page_dirty_nobuffers, 3485 .bmap = fuse_bmap, 3486 .direct_IO = fuse_direct_IO, 3487 .write_begin = fuse_write_begin, 3488 .write_end = fuse_write_end, 3489 }; 3490 3491 void fuse_init_file_inode(struct inode *inode) 3492 { 3493 struct fuse_inode *fi = get_fuse_inode(inode); 3494 3495 inode->i_fop = &fuse_file_operations; 3496 inode->i_data.a_ops = &fuse_file_aops; 3497 3498 INIT_LIST_HEAD(&fi->write_files); 3499 INIT_LIST_HEAD(&fi->queued_writes); 3500 fi->writectr = 0; 3501 init_waitqueue_head(&fi->page_waitq); 3502 fi->writepages = RB_ROOT; 3503 3504 if (IS_ENABLED(CONFIG_FUSE_DAX)) 3505 fuse_dax_inode_init(inode); 3506 } 3507