1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/pagemap.h> 12 #include <linux/slab.h> 13 #include <linux/kernel.h> 14 #include <linux/sched.h> 15 #include <linux/module.h> 16 #include <linux/compat.h> 17 #include <linux/swap.h> 18 #include <linux/aio.h> 19 20 static const struct file_operations fuse_direct_io_file_operations; 21 22 static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 23 int opcode, struct fuse_open_out *outargp) 24 { 25 struct fuse_open_in inarg; 26 struct fuse_req *req; 27 int err; 28 29 req = fuse_get_req_nopages(fc); 30 if (IS_ERR(req)) 31 return PTR_ERR(req); 32 33 memset(&inarg, 0, sizeof(inarg)); 34 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); 35 if (!fc->atomic_o_trunc) 36 inarg.flags &= ~O_TRUNC; 37 req->in.h.opcode = opcode; 38 req->in.h.nodeid = nodeid; 39 req->in.numargs = 1; 40 req->in.args[0].size = sizeof(inarg); 41 req->in.args[0].value = &inarg; 42 req->out.numargs = 1; 43 req->out.args[0].size = sizeof(*outargp); 44 req->out.args[0].value = outargp; 45 fuse_request_send(fc, req); 46 err = req->out.h.error; 47 fuse_put_request(fc, req); 48 49 return err; 50 } 51 52 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) 53 { 54 struct fuse_file *ff; 55 56 ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); 57 if (unlikely(!ff)) 58 return NULL; 59 60 ff->fc = fc; 61 ff->reserved_req = fuse_request_alloc(0); 62 if (unlikely(!ff->reserved_req)) { 63 kfree(ff); 64 return NULL; 65 } 66 67 INIT_LIST_HEAD(&ff->write_entry); 68 atomic_set(&ff->count, 0); 69 RB_CLEAR_NODE(&ff->polled_node); 70 init_waitqueue_head(&ff->poll_wait); 71 72 spin_lock(&fc->lock); 73 ff->kh = ++fc->khctr; 74 spin_unlock(&fc->lock); 75 76 return ff; 77 } 78 79 void fuse_file_free(struct fuse_file *ff) 80 { 81 fuse_request_free(ff->reserved_req); 82 kfree(ff); 83 } 84 85 struct fuse_file *fuse_file_get(struct fuse_file *ff) 86 { 87 atomic_inc(&ff->count); 88 return ff; 89 } 90 91 static void fuse_release_async(struct work_struct *work) 92 { 93 struct fuse_req *req; 94 struct fuse_conn *fc; 95 struct path path; 96 97 req = container_of(work, struct fuse_req, misc.release.work); 98 path = req->misc.release.path; 99 fc = get_fuse_conn(path.dentry->d_inode); 100 101 fuse_put_request(fc, req); 102 path_put(&path); 103 } 104 105 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 106 { 107 if (fc->destroy_req) { 108 /* 109 * If this is a fuseblk mount, then it's possible that 110 * releasing the path will result in releasing the 111 * super block and sending the DESTROY request. If 112 * the server is single threaded, this would hang. 113 * For this reason do the path_put() in a separate 114 * thread. 115 */ 116 atomic_inc(&req->count); 117 INIT_WORK(&req->misc.release.work, fuse_release_async); 118 schedule_work(&req->misc.release.work); 119 } else { 120 path_put(&req->misc.release.path); 121 } 122 } 123 124 static void fuse_file_put(struct fuse_file *ff, bool sync) 125 { 126 if (atomic_dec_and_test(&ff->count)) { 127 struct fuse_req *req = ff->reserved_req; 128 129 if (sync) { 130 req->background = 0; 131 fuse_request_send(ff->fc, req); 132 path_put(&req->misc.release.path); 133 fuse_put_request(ff->fc, req); 134 } else { 135 req->end = fuse_release_end; 136 req->background = 1; 137 fuse_request_send_background(ff->fc, req); 138 } 139 kfree(ff); 140 } 141 } 142 143 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 144 bool isdir) 145 { 146 struct fuse_open_out outarg; 147 struct fuse_file *ff; 148 int err; 149 int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; 150 151 ff = fuse_file_alloc(fc); 152 if (!ff) 153 return -ENOMEM; 154 155 err = fuse_send_open(fc, nodeid, file, opcode, &outarg); 156 if (err) { 157 fuse_file_free(ff); 158 return err; 159 } 160 161 if (isdir) 162 outarg.open_flags &= ~FOPEN_DIRECT_IO; 163 164 ff->fh = outarg.fh; 165 ff->nodeid = nodeid; 166 ff->open_flags = outarg.open_flags; 167 file->private_data = fuse_file_get(ff); 168 169 return 0; 170 } 171 EXPORT_SYMBOL_GPL(fuse_do_open); 172 173 void fuse_finish_open(struct inode *inode, struct file *file) 174 { 175 struct fuse_file *ff = file->private_data; 176 struct fuse_conn *fc = get_fuse_conn(inode); 177 178 if (ff->open_flags & FOPEN_DIRECT_IO) 179 file->f_op = &fuse_direct_io_file_operations; 180 if (!(ff->open_flags & FOPEN_KEEP_CACHE)) 181 invalidate_inode_pages2(inode->i_mapping); 182 if (ff->open_flags & FOPEN_NONSEEKABLE) 183 nonseekable_open(inode, file); 184 if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { 185 struct fuse_inode *fi = get_fuse_inode(inode); 186 187 spin_lock(&fc->lock); 188 fi->attr_version = ++fc->attr_version; 189 i_size_write(inode, 0); 190 spin_unlock(&fc->lock); 191 fuse_invalidate_attr(inode); 192 } 193 } 194 195 int fuse_open_common(struct inode *inode, struct file *file, bool isdir) 196 { 197 struct fuse_conn *fc = get_fuse_conn(inode); 198 int err; 199 200 err = generic_file_open(inode, file); 201 if (err) 202 return err; 203 204 err = fuse_do_open(fc, get_node_id(inode), file, isdir); 205 if (err) 206 return err; 207 208 fuse_finish_open(inode, file); 209 210 return 0; 211 } 212 213 static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) 214 { 215 struct fuse_conn *fc = ff->fc; 216 struct fuse_req *req = ff->reserved_req; 217 struct fuse_release_in *inarg = &req->misc.release.in; 218 219 spin_lock(&fc->lock); 220 list_del(&ff->write_entry); 221 if (!RB_EMPTY_NODE(&ff->polled_node)) 222 rb_erase(&ff->polled_node, &fc->polled_files); 223 spin_unlock(&fc->lock); 224 225 wake_up_interruptible_all(&ff->poll_wait); 226 227 inarg->fh = ff->fh; 228 inarg->flags = flags; 229 req->in.h.opcode = opcode; 230 req->in.h.nodeid = ff->nodeid; 231 req->in.numargs = 1; 232 req->in.args[0].size = sizeof(struct fuse_release_in); 233 req->in.args[0].value = inarg; 234 } 235 236 void fuse_release_common(struct file *file, int opcode) 237 { 238 struct fuse_file *ff; 239 struct fuse_req *req; 240 241 ff = file->private_data; 242 if (unlikely(!ff)) 243 return; 244 245 req = ff->reserved_req; 246 fuse_prepare_release(ff, file->f_flags, opcode); 247 248 if (ff->flock) { 249 struct fuse_release_in *inarg = &req->misc.release.in; 250 inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; 251 inarg->lock_owner = fuse_lock_owner_id(ff->fc, 252 (fl_owner_t) file); 253 } 254 /* Hold vfsmount and dentry until release is finished */ 255 path_get(&file->f_path); 256 req->misc.release.path = file->f_path; 257 258 /* 259 * Normally this will send the RELEASE request, however if 260 * some asynchronous READ or WRITE requests are outstanding, 261 * the sending will be delayed. 262 * 263 * Make the release synchronous if this is a fuseblk mount, 264 * synchronous RELEASE is allowed (and desirable) in this case 265 * because the server can be trusted not to screw up. 266 */ 267 fuse_file_put(ff, ff->fc->destroy_req != NULL); 268 } 269 270 static int fuse_open(struct inode *inode, struct file *file) 271 { 272 return fuse_open_common(inode, file, false); 273 } 274 275 static int fuse_release(struct inode *inode, struct file *file) 276 { 277 fuse_release_common(file, FUSE_RELEASE); 278 279 /* return value is ignored by VFS */ 280 return 0; 281 } 282 283 void fuse_sync_release(struct fuse_file *ff, int flags) 284 { 285 WARN_ON(atomic_read(&ff->count) > 1); 286 fuse_prepare_release(ff, flags, FUSE_RELEASE); 287 ff->reserved_req->force = 1; 288 ff->reserved_req->background = 0; 289 fuse_request_send(ff->fc, ff->reserved_req); 290 fuse_put_request(ff->fc, ff->reserved_req); 291 kfree(ff); 292 } 293 EXPORT_SYMBOL_GPL(fuse_sync_release); 294 295 /* 296 * Scramble the ID space with XTEA, so that the value of the files_struct 297 * pointer is not exposed to userspace. 298 */ 299 u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) 300 { 301 u32 *k = fc->scramble_key; 302 u64 v = (unsigned long) id; 303 u32 v0 = v; 304 u32 v1 = v >> 32; 305 u32 sum = 0; 306 int i; 307 308 for (i = 0; i < 32; i++) { 309 v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]); 310 sum += 0x9E3779B9; 311 v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]); 312 } 313 314 return (u64) v0 + ((u64) v1 << 32); 315 } 316 317 /* 318 * Check if page is under writeback 319 * 320 * This is currently done by walking the list of writepage requests 321 * for the inode, which can be pretty inefficient. 322 */ 323 static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) 324 { 325 struct fuse_conn *fc = get_fuse_conn(inode); 326 struct fuse_inode *fi = get_fuse_inode(inode); 327 struct fuse_req *req; 328 bool found = false; 329 330 spin_lock(&fc->lock); 331 list_for_each_entry(req, &fi->writepages, writepages_entry) { 332 pgoff_t curr_index; 333 334 BUG_ON(req->inode != inode); 335 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; 336 if (curr_index == index) { 337 found = true; 338 break; 339 } 340 } 341 spin_unlock(&fc->lock); 342 343 return found; 344 } 345 346 /* 347 * Wait for page writeback to be completed. 348 * 349 * Since fuse doesn't rely on the VM writeback tracking, this has to 350 * use some other means. 351 */ 352 static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) 353 { 354 struct fuse_inode *fi = get_fuse_inode(inode); 355 356 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); 357 return 0; 358 } 359 360 static int fuse_flush(struct file *file, fl_owner_t id) 361 { 362 struct inode *inode = file_inode(file); 363 struct fuse_conn *fc = get_fuse_conn(inode); 364 struct fuse_file *ff = file->private_data; 365 struct fuse_req *req; 366 struct fuse_flush_in inarg; 367 int err; 368 369 if (is_bad_inode(inode)) 370 return -EIO; 371 372 if (fc->no_flush) 373 return 0; 374 375 req = fuse_get_req_nofail_nopages(fc, file); 376 memset(&inarg, 0, sizeof(inarg)); 377 inarg.fh = ff->fh; 378 inarg.lock_owner = fuse_lock_owner_id(fc, id); 379 req->in.h.opcode = FUSE_FLUSH; 380 req->in.h.nodeid = get_node_id(inode); 381 req->in.numargs = 1; 382 req->in.args[0].size = sizeof(inarg); 383 req->in.args[0].value = &inarg; 384 req->force = 1; 385 fuse_request_send(fc, req); 386 err = req->out.h.error; 387 fuse_put_request(fc, req); 388 if (err == -ENOSYS) { 389 fc->no_flush = 1; 390 err = 0; 391 } 392 return err; 393 } 394 395 /* 396 * Wait for all pending writepages on the inode to finish. 397 * 398 * This is currently done by blocking further writes with FUSE_NOWRITE 399 * and waiting for all sent writes to complete. 400 * 401 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage 402 * could conflict with truncation. 403 */ 404 static void fuse_sync_writes(struct inode *inode) 405 { 406 fuse_set_nowrite(inode); 407 fuse_release_nowrite(inode); 408 } 409 410 int fuse_fsync_common(struct file *file, loff_t start, loff_t end, 411 int datasync, int isdir) 412 { 413 struct inode *inode = file->f_mapping->host; 414 struct fuse_conn *fc = get_fuse_conn(inode); 415 struct fuse_file *ff = file->private_data; 416 struct fuse_req *req; 417 struct fuse_fsync_in inarg; 418 int err; 419 420 if (is_bad_inode(inode)) 421 return -EIO; 422 423 err = filemap_write_and_wait_range(inode->i_mapping, start, end); 424 if (err) 425 return err; 426 427 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 428 return 0; 429 430 mutex_lock(&inode->i_mutex); 431 432 /* 433 * Start writeback against all dirty pages of the inode, then 434 * wait for all outstanding writes, before sending the FSYNC 435 * request. 436 */ 437 err = write_inode_now(inode, 0); 438 if (err) 439 goto out; 440 441 fuse_sync_writes(inode); 442 443 req = fuse_get_req_nopages(fc); 444 if (IS_ERR(req)) { 445 err = PTR_ERR(req); 446 goto out; 447 } 448 449 memset(&inarg, 0, sizeof(inarg)); 450 inarg.fh = ff->fh; 451 inarg.fsync_flags = datasync ? 1 : 0; 452 req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC; 453 req->in.h.nodeid = get_node_id(inode); 454 req->in.numargs = 1; 455 req->in.args[0].size = sizeof(inarg); 456 req->in.args[0].value = &inarg; 457 fuse_request_send(fc, req); 458 err = req->out.h.error; 459 fuse_put_request(fc, req); 460 if (err == -ENOSYS) { 461 if (isdir) 462 fc->no_fsyncdir = 1; 463 else 464 fc->no_fsync = 1; 465 err = 0; 466 } 467 out: 468 mutex_unlock(&inode->i_mutex); 469 return err; 470 } 471 472 static int fuse_fsync(struct file *file, loff_t start, loff_t end, 473 int datasync) 474 { 475 return fuse_fsync_common(file, start, end, datasync, 0); 476 } 477 478 void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, 479 size_t count, int opcode) 480 { 481 struct fuse_read_in *inarg = &req->misc.read.in; 482 struct fuse_file *ff = file->private_data; 483 484 inarg->fh = ff->fh; 485 inarg->offset = pos; 486 inarg->size = count; 487 inarg->flags = file->f_flags; 488 req->in.h.opcode = opcode; 489 req->in.h.nodeid = ff->nodeid; 490 req->in.numargs = 1; 491 req->in.args[0].size = sizeof(struct fuse_read_in); 492 req->in.args[0].value = inarg; 493 req->out.argvar = 1; 494 req->out.numargs = 1; 495 req->out.args[0].size = count; 496 } 497 498 static void fuse_release_user_pages(struct fuse_req *req, int write) 499 { 500 unsigned i; 501 502 for (i = 0; i < req->num_pages; i++) { 503 struct page *page = req->pages[i]; 504 if (write) 505 set_page_dirty_lock(page); 506 put_page(page); 507 } 508 } 509 510 /** 511 * In case of short read, the caller sets 'pos' to the position of 512 * actual end of fuse request in IO request. Otherwise, if bytes_requested 513 * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1. 514 * 515 * An example: 516 * User requested DIO read of 64K. It was splitted into two 32K fuse requests, 517 * both submitted asynchronously. The first of them was ACKed by userspace as 518 * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The 519 * second request was ACKed as short, e.g. only 1K was read, resulting in 520 * pos == 33K. 521 * 522 * Thus, when all fuse requests are completed, the minimal non-negative 'pos' 523 * will be equal to the length of the longest contiguous fragment of 524 * transferred data starting from the beginning of IO request. 525 */ 526 static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) 527 { 528 int left; 529 530 spin_lock(&io->lock); 531 if (err) 532 io->err = io->err ? : err; 533 else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes)) 534 io->bytes = pos; 535 536 left = --io->reqs; 537 spin_unlock(&io->lock); 538 539 if (!left) { 540 long res; 541 542 if (io->err) 543 res = io->err; 544 else if (io->bytes >= 0 && io->write) 545 res = -EIO; 546 else { 547 res = io->bytes < 0 ? io->size : io->bytes; 548 549 if (!is_sync_kiocb(io->iocb)) { 550 struct path *path = &io->iocb->ki_filp->f_path; 551 struct inode *inode = path->dentry->d_inode; 552 struct fuse_conn *fc = get_fuse_conn(inode); 553 struct fuse_inode *fi = get_fuse_inode(inode); 554 555 spin_lock(&fc->lock); 556 fi->attr_version = ++fc->attr_version; 557 spin_unlock(&fc->lock); 558 } 559 } 560 561 aio_complete(io->iocb, res, 0); 562 kfree(io); 563 } 564 } 565 566 static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) 567 { 568 struct fuse_io_priv *io = req->io; 569 ssize_t pos = -1; 570 571 fuse_release_user_pages(req, !io->write); 572 573 if (io->write) { 574 if (req->misc.write.in.size != req->misc.write.out.size) 575 pos = req->misc.write.in.offset - io->offset + 576 req->misc.write.out.size; 577 } else { 578 if (req->misc.read.in.size != req->out.args[0].size) 579 pos = req->misc.read.in.offset - io->offset + 580 req->out.args[0].size; 581 } 582 583 fuse_aio_complete(io, req->out.h.error, pos); 584 } 585 586 static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req, 587 size_t num_bytes, struct fuse_io_priv *io) 588 { 589 spin_lock(&io->lock); 590 io->size += num_bytes; 591 io->reqs++; 592 spin_unlock(&io->lock); 593 594 req->io = io; 595 req->end = fuse_aio_complete_req; 596 597 __fuse_get_request(req); 598 fuse_request_send_background(fc, req); 599 600 return num_bytes; 601 } 602 603 static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io, 604 loff_t pos, size_t count, fl_owner_t owner) 605 { 606 struct file *file = io->file; 607 struct fuse_file *ff = file->private_data; 608 struct fuse_conn *fc = ff->fc; 609 610 fuse_read_fill(req, file, pos, count, FUSE_READ); 611 if (owner != NULL) { 612 struct fuse_read_in *inarg = &req->misc.read.in; 613 614 inarg->read_flags |= FUSE_READ_LOCKOWNER; 615 inarg->lock_owner = fuse_lock_owner_id(fc, owner); 616 } 617 618 if (io->async) 619 return fuse_async_req_send(fc, req, count, io); 620 621 fuse_request_send(fc, req); 622 return req->out.args[0].size; 623 } 624 625 static void fuse_read_update_size(struct inode *inode, loff_t size, 626 u64 attr_ver) 627 { 628 struct fuse_conn *fc = get_fuse_conn(inode); 629 struct fuse_inode *fi = get_fuse_inode(inode); 630 631 spin_lock(&fc->lock); 632 if (attr_ver == fi->attr_version && size < inode->i_size) { 633 fi->attr_version = ++fc->attr_version; 634 i_size_write(inode, size); 635 } 636 spin_unlock(&fc->lock); 637 } 638 639 static int fuse_readpage(struct file *file, struct page *page) 640 { 641 struct fuse_io_priv io = { .async = 0, .file = file }; 642 struct inode *inode = page->mapping->host; 643 struct fuse_conn *fc = get_fuse_conn(inode); 644 struct fuse_req *req; 645 size_t num_read; 646 loff_t pos = page_offset(page); 647 size_t count = PAGE_CACHE_SIZE; 648 u64 attr_ver; 649 int err; 650 651 err = -EIO; 652 if (is_bad_inode(inode)) 653 goto out; 654 655 /* 656 * Page writeback can extend beyond the lifetime of the 657 * page-cache page, so make sure we read a properly synced 658 * page. 659 */ 660 fuse_wait_on_page_writeback(inode, page->index); 661 662 req = fuse_get_req(fc, 1); 663 err = PTR_ERR(req); 664 if (IS_ERR(req)) 665 goto out; 666 667 attr_ver = fuse_get_attr_version(fc); 668 669 req->out.page_zeroing = 1; 670 req->out.argpages = 1; 671 req->num_pages = 1; 672 req->pages[0] = page; 673 req->page_descs[0].length = count; 674 num_read = fuse_send_read(req, &io, pos, count, NULL); 675 err = req->out.h.error; 676 fuse_put_request(fc, req); 677 678 if (!err) { 679 /* 680 * Short read means EOF. If file size is larger, truncate it 681 */ 682 if (num_read < count) 683 fuse_read_update_size(inode, pos + num_read, attr_ver); 684 685 SetPageUptodate(page); 686 } 687 688 fuse_invalidate_attr(inode); /* atime changed */ 689 out: 690 unlock_page(page); 691 return err; 692 } 693 694 static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) 695 { 696 int i; 697 size_t count = req->misc.read.in.size; 698 size_t num_read = req->out.args[0].size; 699 struct address_space *mapping = NULL; 700 701 for (i = 0; mapping == NULL && i < req->num_pages; i++) 702 mapping = req->pages[i]->mapping; 703 704 if (mapping) { 705 struct inode *inode = mapping->host; 706 707 /* 708 * Short read means EOF. If file size is larger, truncate it 709 */ 710 if (!req->out.h.error && num_read < count) { 711 loff_t pos; 712 713 pos = page_offset(req->pages[0]) + num_read; 714 fuse_read_update_size(inode, pos, 715 req->misc.read.attr_ver); 716 } 717 fuse_invalidate_attr(inode); /* atime changed */ 718 } 719 720 for (i = 0; i < req->num_pages; i++) { 721 struct page *page = req->pages[i]; 722 if (!req->out.h.error) 723 SetPageUptodate(page); 724 else 725 SetPageError(page); 726 unlock_page(page); 727 page_cache_release(page); 728 } 729 if (req->ff) 730 fuse_file_put(req->ff, false); 731 } 732 733 static void fuse_send_readpages(struct fuse_req *req, struct file *file) 734 { 735 struct fuse_file *ff = file->private_data; 736 struct fuse_conn *fc = ff->fc; 737 loff_t pos = page_offset(req->pages[0]); 738 size_t count = req->num_pages << PAGE_CACHE_SHIFT; 739 740 req->out.argpages = 1; 741 req->out.page_zeroing = 1; 742 req->out.page_replace = 1; 743 fuse_read_fill(req, file, pos, count, FUSE_READ); 744 req->misc.read.attr_ver = fuse_get_attr_version(fc); 745 if (fc->async_read) { 746 req->ff = fuse_file_get(ff); 747 req->end = fuse_readpages_end; 748 fuse_request_send_background(fc, req); 749 } else { 750 fuse_request_send(fc, req); 751 fuse_readpages_end(fc, req); 752 fuse_put_request(fc, req); 753 } 754 } 755 756 struct fuse_fill_data { 757 struct fuse_req *req; 758 struct file *file; 759 struct inode *inode; 760 unsigned nr_pages; 761 }; 762 763 static int fuse_readpages_fill(void *_data, struct page *page) 764 { 765 struct fuse_fill_data *data = _data; 766 struct fuse_req *req = data->req; 767 struct inode *inode = data->inode; 768 struct fuse_conn *fc = get_fuse_conn(inode); 769 770 fuse_wait_on_page_writeback(inode, page->index); 771 772 if (req->num_pages && 773 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 774 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 775 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 776 int nr_alloc = min_t(unsigned, data->nr_pages, 777 FUSE_MAX_PAGES_PER_REQ); 778 fuse_send_readpages(req, data->file); 779 if (fc->async_read) 780 req = fuse_get_req_for_background(fc, nr_alloc); 781 else 782 req = fuse_get_req(fc, nr_alloc); 783 784 data->req = req; 785 if (IS_ERR(req)) { 786 unlock_page(page); 787 return PTR_ERR(req); 788 } 789 } 790 791 if (WARN_ON(req->num_pages >= req->max_pages)) { 792 fuse_put_request(fc, req); 793 return -EIO; 794 } 795 796 page_cache_get(page); 797 req->pages[req->num_pages] = page; 798 req->page_descs[req->num_pages].length = PAGE_SIZE; 799 req->num_pages++; 800 data->nr_pages--; 801 return 0; 802 } 803 804 static int fuse_readpages(struct file *file, struct address_space *mapping, 805 struct list_head *pages, unsigned nr_pages) 806 { 807 struct inode *inode = mapping->host; 808 struct fuse_conn *fc = get_fuse_conn(inode); 809 struct fuse_fill_data data; 810 int err; 811 int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ); 812 813 err = -EIO; 814 if (is_bad_inode(inode)) 815 goto out; 816 817 data.file = file; 818 data.inode = inode; 819 if (fc->async_read) 820 data.req = fuse_get_req_for_background(fc, nr_alloc); 821 else 822 data.req = fuse_get_req(fc, nr_alloc); 823 data.nr_pages = nr_pages; 824 err = PTR_ERR(data.req); 825 if (IS_ERR(data.req)) 826 goto out; 827 828 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); 829 if (!err) { 830 if (data.req->num_pages) 831 fuse_send_readpages(data.req, file); 832 else 833 fuse_put_request(fc, data.req); 834 } 835 out: 836 return err; 837 } 838 839 static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, 840 unsigned long nr_segs, loff_t pos) 841 { 842 struct inode *inode = iocb->ki_filp->f_mapping->host; 843 struct fuse_conn *fc = get_fuse_conn(inode); 844 845 /* 846 * In auto invalidate mode, always update attributes on read. 847 * Otherwise, only update if we attempt to read past EOF (to ensure 848 * i_size is up to date). 849 */ 850 if (fc->auto_inval_data || 851 (pos + iov_length(iov, nr_segs) > i_size_read(inode))) { 852 int err; 853 err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL); 854 if (err) 855 return err; 856 } 857 858 return generic_file_aio_read(iocb, iov, nr_segs, pos); 859 } 860 861 static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, 862 loff_t pos, size_t count) 863 { 864 struct fuse_write_in *inarg = &req->misc.write.in; 865 struct fuse_write_out *outarg = &req->misc.write.out; 866 867 inarg->fh = ff->fh; 868 inarg->offset = pos; 869 inarg->size = count; 870 req->in.h.opcode = FUSE_WRITE; 871 req->in.h.nodeid = ff->nodeid; 872 req->in.numargs = 2; 873 if (ff->fc->minor < 9) 874 req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; 875 else 876 req->in.args[0].size = sizeof(struct fuse_write_in); 877 req->in.args[0].value = inarg; 878 req->in.args[1].size = count; 879 req->out.numargs = 1; 880 req->out.args[0].size = sizeof(struct fuse_write_out); 881 req->out.args[0].value = outarg; 882 } 883 884 static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, 885 loff_t pos, size_t count, fl_owner_t owner) 886 { 887 struct file *file = io->file; 888 struct fuse_file *ff = file->private_data; 889 struct fuse_conn *fc = ff->fc; 890 struct fuse_write_in *inarg = &req->misc.write.in; 891 892 fuse_write_fill(req, ff, pos, count); 893 inarg->flags = file->f_flags; 894 if (owner != NULL) { 895 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 896 inarg->lock_owner = fuse_lock_owner_id(fc, owner); 897 } 898 899 if (io->async) 900 return fuse_async_req_send(fc, req, count, io); 901 902 fuse_request_send(fc, req); 903 return req->misc.write.out.size; 904 } 905 906 void fuse_write_update_size(struct inode *inode, loff_t pos) 907 { 908 struct fuse_conn *fc = get_fuse_conn(inode); 909 struct fuse_inode *fi = get_fuse_inode(inode); 910 911 spin_lock(&fc->lock); 912 fi->attr_version = ++fc->attr_version; 913 if (pos > inode->i_size) 914 i_size_write(inode, pos); 915 spin_unlock(&fc->lock); 916 } 917 918 static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, 919 struct inode *inode, loff_t pos, 920 size_t count) 921 { 922 size_t res; 923 unsigned offset; 924 unsigned i; 925 struct fuse_io_priv io = { .async = 0, .file = file }; 926 927 for (i = 0; i < req->num_pages; i++) 928 fuse_wait_on_page_writeback(inode, req->pages[i]->index); 929 930 res = fuse_send_write(req, &io, pos, count, NULL); 931 932 offset = req->page_descs[0].offset; 933 count = res; 934 for (i = 0; i < req->num_pages; i++) { 935 struct page *page = req->pages[i]; 936 937 if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE) 938 SetPageUptodate(page); 939 940 if (count > PAGE_CACHE_SIZE - offset) 941 count -= PAGE_CACHE_SIZE - offset; 942 else 943 count = 0; 944 offset = 0; 945 946 unlock_page(page); 947 page_cache_release(page); 948 } 949 950 return res; 951 } 952 953 static ssize_t fuse_fill_write_pages(struct fuse_req *req, 954 struct address_space *mapping, 955 struct iov_iter *ii, loff_t pos) 956 { 957 struct fuse_conn *fc = get_fuse_conn(mapping->host); 958 unsigned offset = pos & (PAGE_CACHE_SIZE - 1); 959 size_t count = 0; 960 int err; 961 962 req->in.argpages = 1; 963 req->page_descs[0].offset = offset; 964 965 do { 966 size_t tmp; 967 struct page *page; 968 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 969 size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset, 970 iov_iter_count(ii)); 971 972 bytes = min_t(size_t, bytes, fc->max_write - count); 973 974 again: 975 err = -EFAULT; 976 if (iov_iter_fault_in_readable(ii, bytes)) 977 break; 978 979 err = -ENOMEM; 980 page = grab_cache_page_write_begin(mapping, index, 0); 981 if (!page) 982 break; 983 984 if (mapping_writably_mapped(mapping)) 985 flush_dcache_page(page); 986 987 pagefault_disable(); 988 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); 989 pagefault_enable(); 990 flush_dcache_page(page); 991 992 mark_page_accessed(page); 993 994 if (!tmp) { 995 unlock_page(page); 996 page_cache_release(page); 997 bytes = min(bytes, iov_iter_single_seg_count(ii)); 998 goto again; 999 } 1000 1001 err = 0; 1002 req->pages[req->num_pages] = page; 1003 req->page_descs[req->num_pages].length = tmp; 1004 req->num_pages++; 1005 1006 iov_iter_advance(ii, tmp); 1007 count += tmp; 1008 pos += tmp; 1009 offset += tmp; 1010 if (offset == PAGE_CACHE_SIZE) 1011 offset = 0; 1012 1013 if (!fc->big_writes) 1014 break; 1015 } while (iov_iter_count(ii) && count < fc->max_write && 1016 req->num_pages < req->max_pages && offset == 0); 1017 1018 return count > 0 ? count : err; 1019 } 1020 1021 static inline unsigned fuse_wr_pages(loff_t pos, size_t len) 1022 { 1023 return min_t(unsigned, 1024 ((pos + len - 1) >> PAGE_CACHE_SHIFT) - 1025 (pos >> PAGE_CACHE_SHIFT) + 1, 1026 FUSE_MAX_PAGES_PER_REQ); 1027 } 1028 1029 static ssize_t fuse_perform_write(struct file *file, 1030 struct address_space *mapping, 1031 struct iov_iter *ii, loff_t pos) 1032 { 1033 struct inode *inode = mapping->host; 1034 struct fuse_conn *fc = get_fuse_conn(inode); 1035 int err = 0; 1036 ssize_t res = 0; 1037 1038 if (is_bad_inode(inode)) 1039 return -EIO; 1040 1041 do { 1042 struct fuse_req *req; 1043 ssize_t count; 1044 unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii)); 1045 1046 req = fuse_get_req(fc, nr_pages); 1047 if (IS_ERR(req)) { 1048 err = PTR_ERR(req); 1049 break; 1050 } 1051 1052 count = fuse_fill_write_pages(req, mapping, ii, pos); 1053 if (count <= 0) { 1054 err = count; 1055 } else { 1056 size_t num_written; 1057 1058 num_written = fuse_send_write_pages(req, file, inode, 1059 pos, count); 1060 err = req->out.h.error; 1061 if (!err) { 1062 res += num_written; 1063 pos += num_written; 1064 1065 /* break out of the loop on short write */ 1066 if (num_written != count) 1067 err = -EIO; 1068 } 1069 } 1070 fuse_put_request(fc, req); 1071 } while (!err && iov_iter_count(ii)); 1072 1073 if (res > 0) 1074 fuse_write_update_size(inode, pos); 1075 1076 fuse_invalidate_attr(inode); 1077 1078 return res > 0 ? res : err; 1079 } 1080 1081 static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 1082 unsigned long nr_segs, loff_t pos) 1083 { 1084 struct file *file = iocb->ki_filp; 1085 struct address_space *mapping = file->f_mapping; 1086 size_t count = 0; 1087 size_t ocount = 0; 1088 ssize_t written = 0; 1089 ssize_t written_buffered = 0; 1090 struct inode *inode = mapping->host; 1091 ssize_t err; 1092 struct iov_iter i; 1093 loff_t endbyte = 0; 1094 1095 WARN_ON(iocb->ki_pos != pos); 1096 1097 ocount = 0; 1098 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); 1099 if (err) 1100 return err; 1101 1102 count = ocount; 1103 mutex_lock(&inode->i_mutex); 1104 1105 /* We can write back this queue in page reclaim */ 1106 current->backing_dev_info = mapping->backing_dev_info; 1107 1108 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1109 if (err) 1110 goto out; 1111 1112 if (count == 0) 1113 goto out; 1114 1115 err = file_remove_suid(file); 1116 if (err) 1117 goto out; 1118 1119 err = file_update_time(file); 1120 if (err) 1121 goto out; 1122 1123 if (file->f_flags & O_DIRECT) { 1124 written = generic_file_direct_write(iocb, iov, &nr_segs, 1125 pos, &iocb->ki_pos, 1126 count, ocount); 1127 if (written < 0 || written == count) 1128 goto out; 1129 1130 pos += written; 1131 count -= written; 1132 1133 iov_iter_init(&i, iov, nr_segs, count, written); 1134 written_buffered = fuse_perform_write(file, mapping, &i, pos); 1135 if (written_buffered < 0) { 1136 err = written_buffered; 1137 goto out; 1138 } 1139 endbyte = pos + written_buffered - 1; 1140 1141 err = filemap_write_and_wait_range(file->f_mapping, pos, 1142 endbyte); 1143 if (err) 1144 goto out; 1145 1146 invalidate_mapping_pages(file->f_mapping, 1147 pos >> PAGE_CACHE_SHIFT, 1148 endbyte >> PAGE_CACHE_SHIFT); 1149 1150 written += written_buffered; 1151 iocb->ki_pos = pos + written_buffered; 1152 } else { 1153 iov_iter_init(&i, iov, nr_segs, count, 0); 1154 written = fuse_perform_write(file, mapping, &i, pos); 1155 if (written >= 0) 1156 iocb->ki_pos = pos + written; 1157 } 1158 out: 1159 current->backing_dev_info = NULL; 1160 mutex_unlock(&inode->i_mutex); 1161 1162 return written ? written : err; 1163 } 1164 1165 static inline void fuse_page_descs_length_init(struct fuse_req *req, 1166 unsigned index, unsigned nr_pages) 1167 { 1168 int i; 1169 1170 for (i = index; i < index + nr_pages; i++) 1171 req->page_descs[i].length = PAGE_SIZE - 1172 req->page_descs[i].offset; 1173 } 1174 1175 static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii) 1176 { 1177 return (unsigned long)ii->iov->iov_base + ii->iov_offset; 1178 } 1179 1180 static inline size_t fuse_get_frag_size(const struct iov_iter *ii, 1181 size_t max_size) 1182 { 1183 return min(iov_iter_single_seg_count(ii), max_size); 1184 } 1185 1186 static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, 1187 size_t *nbytesp, int write) 1188 { 1189 size_t nbytes = 0; /* # bytes already packed in req */ 1190 1191 /* Special case for kernel I/O: can copy directly into the buffer */ 1192 if (segment_eq(get_fs(), KERNEL_DS)) { 1193 unsigned long user_addr = fuse_get_user_addr(ii); 1194 size_t frag_size = fuse_get_frag_size(ii, *nbytesp); 1195 1196 if (write) 1197 req->in.args[1].value = (void *) user_addr; 1198 else 1199 req->out.args[0].value = (void *) user_addr; 1200 1201 iov_iter_advance(ii, frag_size); 1202 *nbytesp = frag_size; 1203 return 0; 1204 } 1205 1206 while (nbytes < *nbytesp && req->num_pages < req->max_pages) { 1207 unsigned npages; 1208 unsigned long user_addr = fuse_get_user_addr(ii); 1209 unsigned offset = user_addr & ~PAGE_MASK; 1210 size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes); 1211 int ret; 1212 1213 unsigned n = req->max_pages - req->num_pages; 1214 frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT); 1215 1216 npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 1217 npages = clamp(npages, 1U, n); 1218 1219 ret = get_user_pages_fast(user_addr, npages, !write, 1220 &req->pages[req->num_pages]); 1221 if (ret < 0) 1222 return ret; 1223 1224 npages = ret; 1225 frag_size = min_t(size_t, frag_size, 1226 (npages << PAGE_SHIFT) - offset); 1227 iov_iter_advance(ii, frag_size); 1228 1229 req->page_descs[req->num_pages].offset = offset; 1230 fuse_page_descs_length_init(req, req->num_pages, npages); 1231 1232 req->num_pages += npages; 1233 req->page_descs[req->num_pages - 1].length -= 1234 (npages << PAGE_SHIFT) - offset - frag_size; 1235 1236 nbytes += frag_size; 1237 } 1238 1239 if (write) 1240 req->in.argpages = 1; 1241 else 1242 req->out.argpages = 1; 1243 1244 *nbytesp = nbytes; 1245 1246 return 0; 1247 } 1248 1249 static inline int fuse_iter_npages(const struct iov_iter *ii_p) 1250 { 1251 struct iov_iter ii = *ii_p; 1252 int npages = 0; 1253 1254 while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) { 1255 unsigned long user_addr = fuse_get_user_addr(&ii); 1256 unsigned offset = user_addr & ~PAGE_MASK; 1257 size_t frag_size = iov_iter_single_seg_count(&ii); 1258 1259 npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 1260 iov_iter_advance(&ii, frag_size); 1261 } 1262 1263 return min(npages, FUSE_MAX_PAGES_PER_REQ); 1264 } 1265 1266 ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, 1267 unsigned long nr_segs, size_t count, loff_t *ppos, 1268 int write) 1269 { 1270 struct file *file = io->file; 1271 struct fuse_file *ff = file->private_data; 1272 struct fuse_conn *fc = ff->fc; 1273 size_t nmax = write ? fc->max_write : fc->max_read; 1274 loff_t pos = *ppos; 1275 ssize_t res = 0; 1276 struct fuse_req *req; 1277 struct iov_iter ii; 1278 1279 iov_iter_init(&ii, iov, nr_segs, count, 0); 1280 1281 req = fuse_get_req(fc, fuse_iter_npages(&ii)); 1282 if (IS_ERR(req)) 1283 return PTR_ERR(req); 1284 1285 while (count) { 1286 size_t nres; 1287 fl_owner_t owner = current->files; 1288 size_t nbytes = min(count, nmax); 1289 int err = fuse_get_user_pages(req, &ii, &nbytes, write); 1290 if (err) { 1291 res = err; 1292 break; 1293 } 1294 1295 if (write) 1296 nres = fuse_send_write(req, io, pos, nbytes, owner); 1297 else 1298 nres = fuse_send_read(req, io, pos, nbytes, owner); 1299 1300 if (!io->async) 1301 fuse_release_user_pages(req, !write); 1302 if (req->out.h.error) { 1303 if (!res) 1304 res = req->out.h.error; 1305 break; 1306 } else if (nres > nbytes) { 1307 res = -EIO; 1308 break; 1309 } 1310 count -= nres; 1311 res += nres; 1312 pos += nres; 1313 if (nres != nbytes) 1314 break; 1315 if (count) { 1316 fuse_put_request(fc, req); 1317 req = fuse_get_req(fc, fuse_iter_npages(&ii)); 1318 if (IS_ERR(req)) 1319 break; 1320 } 1321 } 1322 if (!IS_ERR(req)) 1323 fuse_put_request(fc, req); 1324 if (res > 0) 1325 *ppos = pos; 1326 1327 return res; 1328 } 1329 EXPORT_SYMBOL_GPL(fuse_direct_io); 1330 1331 static ssize_t __fuse_direct_read(struct fuse_io_priv *io, 1332 const struct iovec *iov, 1333 unsigned long nr_segs, loff_t *ppos, 1334 size_t count) 1335 { 1336 ssize_t res; 1337 struct file *file = io->file; 1338 struct inode *inode = file_inode(file); 1339 1340 if (is_bad_inode(inode)) 1341 return -EIO; 1342 1343 res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0); 1344 1345 fuse_invalidate_attr(inode); 1346 1347 return res; 1348 } 1349 1350 static ssize_t fuse_direct_read(struct file *file, char __user *buf, 1351 size_t count, loff_t *ppos) 1352 { 1353 struct fuse_io_priv io = { .async = 0, .file = file }; 1354 struct iovec iov = { .iov_base = buf, .iov_len = count }; 1355 return __fuse_direct_read(&io, &iov, 1, ppos, count); 1356 } 1357 1358 static ssize_t __fuse_direct_write(struct fuse_io_priv *io, 1359 const struct iovec *iov, 1360 unsigned long nr_segs, loff_t *ppos) 1361 { 1362 struct file *file = io->file; 1363 struct inode *inode = file_inode(file); 1364 size_t count = iov_length(iov, nr_segs); 1365 ssize_t res; 1366 1367 res = generic_write_checks(file, ppos, &count, 0); 1368 if (!res) 1369 res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); 1370 1371 fuse_invalidate_attr(inode); 1372 1373 return res; 1374 } 1375 1376 static ssize_t fuse_direct_write(struct file *file, const char __user *buf, 1377 size_t count, loff_t *ppos) 1378 { 1379 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 1380 struct inode *inode = file_inode(file); 1381 ssize_t res; 1382 struct fuse_io_priv io = { .async = 0, .file = file }; 1383 1384 if (is_bad_inode(inode)) 1385 return -EIO; 1386 1387 /* Don't allow parallel writes to the same file */ 1388 mutex_lock(&inode->i_mutex); 1389 res = __fuse_direct_write(&io, &iov, 1, ppos); 1390 if (res > 0) 1391 fuse_write_update_size(inode, *ppos); 1392 mutex_unlock(&inode->i_mutex); 1393 1394 return res; 1395 } 1396 1397 static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1398 { 1399 __free_page(req->pages[0]); 1400 fuse_file_put(req->ff, false); 1401 } 1402 1403 static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1404 { 1405 struct inode *inode = req->inode; 1406 struct fuse_inode *fi = get_fuse_inode(inode); 1407 struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; 1408 1409 list_del(&req->writepages_entry); 1410 dec_bdi_stat(bdi, BDI_WRITEBACK); 1411 dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); 1412 bdi_writeout_inc(bdi); 1413 wake_up(&fi->page_waitq); 1414 } 1415 1416 /* Called under fc->lock, may release and reacquire it */ 1417 static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) 1418 __releases(fc->lock) 1419 __acquires(fc->lock) 1420 { 1421 struct fuse_inode *fi = get_fuse_inode(req->inode); 1422 loff_t size = i_size_read(req->inode); 1423 struct fuse_write_in *inarg = &req->misc.write.in; 1424 1425 if (!fc->connected) 1426 goto out_free; 1427 1428 if (inarg->offset + PAGE_CACHE_SIZE <= size) { 1429 inarg->size = PAGE_CACHE_SIZE; 1430 } else if (inarg->offset < size) { 1431 inarg->size = size & (PAGE_CACHE_SIZE - 1); 1432 } else { 1433 /* Got truncated off completely */ 1434 goto out_free; 1435 } 1436 1437 req->in.args[1].size = inarg->size; 1438 fi->writectr++; 1439 fuse_request_send_background_locked(fc, req); 1440 return; 1441 1442 out_free: 1443 fuse_writepage_finish(fc, req); 1444 spin_unlock(&fc->lock); 1445 fuse_writepage_free(fc, req); 1446 fuse_put_request(fc, req); 1447 spin_lock(&fc->lock); 1448 } 1449 1450 /* 1451 * If fi->writectr is positive (no truncate or fsync going on) send 1452 * all queued writepage requests. 1453 * 1454 * Called with fc->lock 1455 */ 1456 void fuse_flush_writepages(struct inode *inode) 1457 __releases(fc->lock) 1458 __acquires(fc->lock) 1459 { 1460 struct fuse_conn *fc = get_fuse_conn(inode); 1461 struct fuse_inode *fi = get_fuse_inode(inode); 1462 struct fuse_req *req; 1463 1464 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { 1465 req = list_entry(fi->queued_writes.next, struct fuse_req, list); 1466 list_del_init(&req->list); 1467 fuse_send_writepage(fc, req); 1468 } 1469 } 1470 1471 static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) 1472 { 1473 struct inode *inode = req->inode; 1474 struct fuse_inode *fi = get_fuse_inode(inode); 1475 1476 mapping_set_error(inode->i_mapping, req->out.h.error); 1477 spin_lock(&fc->lock); 1478 fi->writectr--; 1479 fuse_writepage_finish(fc, req); 1480 spin_unlock(&fc->lock); 1481 fuse_writepage_free(fc, req); 1482 } 1483 1484 static int fuse_writepage_locked(struct page *page) 1485 { 1486 struct address_space *mapping = page->mapping; 1487 struct inode *inode = mapping->host; 1488 struct fuse_conn *fc = get_fuse_conn(inode); 1489 struct fuse_inode *fi = get_fuse_inode(inode); 1490 struct fuse_req *req; 1491 struct fuse_file *ff; 1492 struct page *tmp_page; 1493 1494 set_page_writeback(page); 1495 1496 req = fuse_request_alloc_nofs(1); 1497 if (!req) 1498 goto err; 1499 1500 req->background = 1; /* writeback always goes to bg_queue */ 1501 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 1502 if (!tmp_page) 1503 goto err_free; 1504 1505 spin_lock(&fc->lock); 1506 BUG_ON(list_empty(&fi->write_files)); 1507 ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); 1508 req->ff = fuse_file_get(ff); 1509 spin_unlock(&fc->lock); 1510 1511 fuse_write_fill(req, ff, page_offset(page), 0); 1512 1513 copy_highpage(tmp_page, page); 1514 req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; 1515 req->in.argpages = 1; 1516 req->num_pages = 1; 1517 req->pages[0] = tmp_page; 1518 req->page_descs[0].offset = 0; 1519 req->page_descs[0].length = PAGE_SIZE; 1520 req->end = fuse_writepage_end; 1521 req->inode = inode; 1522 1523 inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); 1524 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); 1525 end_page_writeback(page); 1526 1527 spin_lock(&fc->lock); 1528 list_add(&req->writepages_entry, &fi->writepages); 1529 list_add_tail(&req->list, &fi->queued_writes); 1530 fuse_flush_writepages(inode); 1531 spin_unlock(&fc->lock); 1532 1533 return 0; 1534 1535 err_free: 1536 fuse_request_free(req); 1537 err: 1538 end_page_writeback(page); 1539 return -ENOMEM; 1540 } 1541 1542 static int fuse_writepage(struct page *page, struct writeback_control *wbc) 1543 { 1544 int err; 1545 1546 err = fuse_writepage_locked(page); 1547 unlock_page(page); 1548 1549 return err; 1550 } 1551 1552 static int fuse_launder_page(struct page *page) 1553 { 1554 int err = 0; 1555 if (clear_page_dirty_for_io(page)) { 1556 struct inode *inode = page->mapping->host; 1557 err = fuse_writepage_locked(page); 1558 if (!err) 1559 fuse_wait_on_page_writeback(inode, page->index); 1560 } 1561 return err; 1562 } 1563 1564 /* 1565 * Write back dirty pages now, because there may not be any suitable 1566 * open files later 1567 */ 1568 static void fuse_vma_close(struct vm_area_struct *vma) 1569 { 1570 filemap_write_and_wait(vma->vm_file->f_mapping); 1571 } 1572 1573 /* 1574 * Wait for writeback against this page to complete before allowing it 1575 * to be marked dirty again, and hence written back again, possibly 1576 * before the previous writepage completed. 1577 * 1578 * Block here, instead of in ->writepage(), so that the userspace fs 1579 * can only block processes actually operating on the filesystem. 1580 * 1581 * Otherwise unprivileged userspace fs would be able to block 1582 * unrelated: 1583 * 1584 * - page migration 1585 * - sync(2) 1586 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER 1587 */ 1588 static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1589 { 1590 struct page *page = vmf->page; 1591 /* 1592 * Don't use page->mapping as it may become NULL from a 1593 * concurrent truncate. 1594 */ 1595 struct inode *inode = vma->vm_file->f_mapping->host; 1596 1597 fuse_wait_on_page_writeback(inode, page->index); 1598 return 0; 1599 } 1600 1601 static const struct vm_operations_struct fuse_file_vm_ops = { 1602 .close = fuse_vma_close, 1603 .fault = filemap_fault, 1604 .page_mkwrite = fuse_page_mkwrite, 1605 .remap_pages = generic_file_remap_pages, 1606 }; 1607 1608 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 1609 { 1610 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { 1611 struct inode *inode = file_inode(file); 1612 struct fuse_conn *fc = get_fuse_conn(inode); 1613 struct fuse_inode *fi = get_fuse_inode(inode); 1614 struct fuse_file *ff = file->private_data; 1615 /* 1616 * file may be written through mmap, so chain it onto the 1617 * inodes's write_file list 1618 */ 1619 spin_lock(&fc->lock); 1620 if (list_empty(&ff->write_entry)) 1621 list_add(&ff->write_entry, &fi->write_files); 1622 spin_unlock(&fc->lock); 1623 } 1624 file_accessed(file); 1625 vma->vm_ops = &fuse_file_vm_ops; 1626 return 0; 1627 } 1628 1629 static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma) 1630 { 1631 /* Can't provide the coherency needed for MAP_SHARED */ 1632 if (vma->vm_flags & VM_MAYSHARE) 1633 return -ENODEV; 1634 1635 invalidate_inode_pages2(file->f_mapping); 1636 1637 return generic_file_mmap(file, vma); 1638 } 1639 1640 static int convert_fuse_file_lock(const struct fuse_file_lock *ffl, 1641 struct file_lock *fl) 1642 { 1643 switch (ffl->type) { 1644 case F_UNLCK: 1645 break; 1646 1647 case F_RDLCK: 1648 case F_WRLCK: 1649 if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX || 1650 ffl->end < ffl->start) 1651 return -EIO; 1652 1653 fl->fl_start = ffl->start; 1654 fl->fl_end = ffl->end; 1655 fl->fl_pid = ffl->pid; 1656 break; 1657 1658 default: 1659 return -EIO; 1660 } 1661 fl->fl_type = ffl->type; 1662 return 0; 1663 } 1664 1665 static void fuse_lk_fill(struct fuse_req *req, struct file *file, 1666 const struct file_lock *fl, int opcode, pid_t pid, 1667 int flock) 1668 { 1669 struct inode *inode = file_inode(file); 1670 struct fuse_conn *fc = get_fuse_conn(inode); 1671 struct fuse_file *ff = file->private_data; 1672 struct fuse_lk_in *arg = &req->misc.lk_in; 1673 1674 arg->fh = ff->fh; 1675 arg->owner = fuse_lock_owner_id(fc, fl->fl_owner); 1676 arg->lk.start = fl->fl_start; 1677 arg->lk.end = fl->fl_end; 1678 arg->lk.type = fl->fl_type; 1679 arg->lk.pid = pid; 1680 if (flock) 1681 arg->lk_flags |= FUSE_LK_FLOCK; 1682 req->in.h.opcode = opcode; 1683 req->in.h.nodeid = get_node_id(inode); 1684 req->in.numargs = 1; 1685 req->in.args[0].size = sizeof(*arg); 1686 req->in.args[0].value = arg; 1687 } 1688 1689 static int fuse_getlk(struct file *file, struct file_lock *fl) 1690 { 1691 struct inode *inode = file_inode(file); 1692 struct fuse_conn *fc = get_fuse_conn(inode); 1693 struct fuse_req *req; 1694 struct fuse_lk_out outarg; 1695 int err; 1696 1697 req = fuse_get_req_nopages(fc); 1698 if (IS_ERR(req)) 1699 return PTR_ERR(req); 1700 1701 fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0); 1702 req->out.numargs = 1; 1703 req->out.args[0].size = sizeof(outarg); 1704 req->out.args[0].value = &outarg; 1705 fuse_request_send(fc, req); 1706 err = req->out.h.error; 1707 fuse_put_request(fc, req); 1708 if (!err) 1709 err = convert_fuse_file_lock(&outarg.lk, fl); 1710 1711 return err; 1712 } 1713 1714 static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) 1715 { 1716 struct inode *inode = file_inode(file); 1717 struct fuse_conn *fc = get_fuse_conn(inode); 1718 struct fuse_req *req; 1719 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; 1720 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; 1721 int err; 1722 1723 if (fl->fl_lmops && fl->fl_lmops->lm_grant) { 1724 /* NLM needs asynchronous locks, which we don't support yet */ 1725 return -ENOLCK; 1726 } 1727 1728 /* Unlock on close is handled by the flush method */ 1729 if (fl->fl_flags & FL_CLOSE) 1730 return 0; 1731 1732 req = fuse_get_req_nopages(fc); 1733 if (IS_ERR(req)) 1734 return PTR_ERR(req); 1735 1736 fuse_lk_fill(req, file, fl, opcode, pid, flock); 1737 fuse_request_send(fc, req); 1738 err = req->out.h.error; 1739 /* locking is restartable */ 1740 if (err == -EINTR) 1741 err = -ERESTARTSYS; 1742 fuse_put_request(fc, req); 1743 return err; 1744 } 1745 1746 static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) 1747 { 1748 struct inode *inode = file_inode(file); 1749 struct fuse_conn *fc = get_fuse_conn(inode); 1750 int err; 1751 1752 if (cmd == F_CANCELLK) { 1753 err = 0; 1754 } else if (cmd == F_GETLK) { 1755 if (fc->no_lock) { 1756 posix_test_lock(file, fl); 1757 err = 0; 1758 } else 1759 err = fuse_getlk(file, fl); 1760 } else { 1761 if (fc->no_lock) 1762 err = posix_lock_file(file, fl, NULL); 1763 else 1764 err = fuse_setlk(file, fl, 0); 1765 } 1766 return err; 1767 } 1768 1769 static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) 1770 { 1771 struct inode *inode = file_inode(file); 1772 struct fuse_conn *fc = get_fuse_conn(inode); 1773 int err; 1774 1775 if (fc->no_flock) { 1776 err = flock_lock_file_wait(file, fl); 1777 } else { 1778 struct fuse_file *ff = file->private_data; 1779 1780 /* emulate flock with POSIX locks */ 1781 fl->fl_owner = (fl_owner_t) file; 1782 ff->flock = true; 1783 err = fuse_setlk(file, fl, 1); 1784 } 1785 1786 return err; 1787 } 1788 1789 static sector_t fuse_bmap(struct address_space *mapping, sector_t block) 1790 { 1791 struct inode *inode = mapping->host; 1792 struct fuse_conn *fc = get_fuse_conn(inode); 1793 struct fuse_req *req; 1794 struct fuse_bmap_in inarg; 1795 struct fuse_bmap_out outarg; 1796 int err; 1797 1798 if (!inode->i_sb->s_bdev || fc->no_bmap) 1799 return 0; 1800 1801 req = fuse_get_req_nopages(fc); 1802 if (IS_ERR(req)) 1803 return 0; 1804 1805 memset(&inarg, 0, sizeof(inarg)); 1806 inarg.block = block; 1807 inarg.blocksize = inode->i_sb->s_blocksize; 1808 req->in.h.opcode = FUSE_BMAP; 1809 req->in.h.nodeid = get_node_id(inode); 1810 req->in.numargs = 1; 1811 req->in.args[0].size = sizeof(inarg); 1812 req->in.args[0].value = &inarg; 1813 req->out.numargs = 1; 1814 req->out.args[0].size = sizeof(outarg); 1815 req->out.args[0].value = &outarg; 1816 fuse_request_send(fc, req); 1817 err = req->out.h.error; 1818 fuse_put_request(fc, req); 1819 if (err == -ENOSYS) 1820 fc->no_bmap = 1; 1821 1822 return err ? 0 : outarg.block; 1823 } 1824 1825 static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) 1826 { 1827 loff_t retval; 1828 struct inode *inode = file_inode(file); 1829 1830 /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */ 1831 if (whence == SEEK_CUR || whence == SEEK_SET) 1832 return generic_file_llseek(file, offset, whence); 1833 1834 mutex_lock(&inode->i_mutex); 1835 retval = fuse_update_attributes(inode, NULL, file, NULL); 1836 if (!retval) 1837 retval = generic_file_llseek(file, offset, whence); 1838 mutex_unlock(&inode->i_mutex); 1839 1840 return retval; 1841 } 1842 1843 static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, 1844 unsigned int nr_segs, size_t bytes, bool to_user) 1845 { 1846 struct iov_iter ii; 1847 int page_idx = 0; 1848 1849 if (!bytes) 1850 return 0; 1851 1852 iov_iter_init(&ii, iov, nr_segs, bytes, 0); 1853 1854 while (iov_iter_count(&ii)) { 1855 struct page *page = pages[page_idx++]; 1856 size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii)); 1857 void *kaddr; 1858 1859 kaddr = kmap(page); 1860 1861 while (todo) { 1862 char __user *uaddr = ii.iov->iov_base + ii.iov_offset; 1863 size_t iov_len = ii.iov->iov_len - ii.iov_offset; 1864 size_t copy = min(todo, iov_len); 1865 size_t left; 1866 1867 if (!to_user) 1868 left = copy_from_user(kaddr, uaddr, copy); 1869 else 1870 left = copy_to_user(uaddr, kaddr, copy); 1871 1872 if (unlikely(left)) 1873 return -EFAULT; 1874 1875 iov_iter_advance(&ii, copy); 1876 todo -= copy; 1877 kaddr += copy; 1878 } 1879 1880 kunmap(page); 1881 } 1882 1883 return 0; 1884 } 1885 1886 /* 1887 * CUSE servers compiled on 32bit broke on 64bit kernels because the 1888 * ABI was defined to be 'struct iovec' which is different on 32bit 1889 * and 64bit. Fortunately we can determine which structure the server 1890 * used from the size of the reply. 1891 */ 1892 static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, 1893 size_t transferred, unsigned count, 1894 bool is_compat) 1895 { 1896 #ifdef CONFIG_COMPAT 1897 if (count * sizeof(struct compat_iovec) == transferred) { 1898 struct compat_iovec *ciov = src; 1899 unsigned i; 1900 1901 /* 1902 * With this interface a 32bit server cannot support 1903 * non-compat (i.e. ones coming from 64bit apps) ioctl 1904 * requests 1905 */ 1906 if (!is_compat) 1907 return -EINVAL; 1908 1909 for (i = 0; i < count; i++) { 1910 dst[i].iov_base = compat_ptr(ciov[i].iov_base); 1911 dst[i].iov_len = ciov[i].iov_len; 1912 } 1913 return 0; 1914 } 1915 #endif 1916 1917 if (count * sizeof(struct iovec) != transferred) 1918 return -EIO; 1919 1920 memcpy(dst, src, transferred); 1921 return 0; 1922 } 1923 1924 /* Make sure iov_length() won't overflow */ 1925 static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) 1926 { 1927 size_t n; 1928 u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; 1929 1930 for (n = 0; n < count; n++, iov++) { 1931 if (iov->iov_len > (size_t) max) 1932 return -ENOMEM; 1933 max -= iov->iov_len; 1934 } 1935 return 0; 1936 } 1937 1938 static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst, 1939 void *src, size_t transferred, unsigned count, 1940 bool is_compat) 1941 { 1942 unsigned i; 1943 struct fuse_ioctl_iovec *fiov = src; 1944 1945 if (fc->minor < 16) { 1946 return fuse_copy_ioctl_iovec_old(dst, src, transferred, 1947 count, is_compat); 1948 } 1949 1950 if (count * sizeof(struct fuse_ioctl_iovec) != transferred) 1951 return -EIO; 1952 1953 for (i = 0; i < count; i++) { 1954 /* Did the server supply an inappropriate value? */ 1955 if (fiov[i].base != (unsigned long) fiov[i].base || 1956 fiov[i].len != (unsigned long) fiov[i].len) 1957 return -EIO; 1958 1959 dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base; 1960 dst[i].iov_len = (size_t) fiov[i].len; 1961 1962 #ifdef CONFIG_COMPAT 1963 if (is_compat && 1964 (ptr_to_compat(dst[i].iov_base) != fiov[i].base || 1965 (compat_size_t) dst[i].iov_len != fiov[i].len)) 1966 return -EIO; 1967 #endif 1968 } 1969 1970 return 0; 1971 } 1972 1973 1974 /* 1975 * For ioctls, there is no generic way to determine how much memory 1976 * needs to be read and/or written. Furthermore, ioctls are allowed 1977 * to dereference the passed pointer, so the parameter requires deep 1978 * copying but FUSE has no idea whatsoever about what to copy in or 1979 * out. 1980 * 1981 * This is solved by allowing FUSE server to retry ioctl with 1982 * necessary in/out iovecs. Let's assume the ioctl implementation 1983 * needs to read in the following structure. 1984 * 1985 * struct a { 1986 * char *buf; 1987 * size_t buflen; 1988 * } 1989 * 1990 * On the first callout to FUSE server, inarg->in_size and 1991 * inarg->out_size will be NULL; then, the server completes the ioctl 1992 * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and 1993 * the actual iov array to 1994 * 1995 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } } 1996 * 1997 * which tells FUSE to copy in the requested area and retry the ioctl. 1998 * On the second round, the server has access to the structure and 1999 * from that it can tell what to look for next, so on the invocation, 2000 * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to 2001 * 2002 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) }, 2003 * { .iov_base = a.buf, .iov_len = a.buflen } } 2004 * 2005 * FUSE will copy both struct a and the pointed buffer from the 2006 * process doing the ioctl and retry ioctl with both struct a and the 2007 * buffer. 2008 * 2009 * This time, FUSE server has everything it needs and completes ioctl 2010 * without FUSE_IOCTL_RETRY which finishes the ioctl call. 2011 * 2012 * Copying data out works the same way. 2013 * 2014 * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel 2015 * automatically initializes in and out iovs by decoding @cmd with 2016 * _IOC_* macros and the server is not allowed to request RETRY. This 2017 * limits ioctl data transfers to well-formed ioctls and is the forced 2018 * behavior for all FUSE servers. 2019 */ 2020 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 2021 unsigned int flags) 2022 { 2023 struct fuse_file *ff = file->private_data; 2024 struct fuse_conn *fc = ff->fc; 2025 struct fuse_ioctl_in inarg = { 2026 .fh = ff->fh, 2027 .cmd = cmd, 2028 .arg = arg, 2029 .flags = flags 2030 }; 2031 struct fuse_ioctl_out outarg; 2032 struct fuse_req *req = NULL; 2033 struct page **pages = NULL; 2034 struct iovec *iov_page = NULL; 2035 struct iovec *in_iov = NULL, *out_iov = NULL; 2036 unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages; 2037 size_t in_size, out_size, transferred; 2038 int err; 2039 2040 #if BITS_PER_LONG == 32 2041 inarg.flags |= FUSE_IOCTL_32BIT; 2042 #else 2043 if (flags & FUSE_IOCTL_COMPAT) 2044 inarg.flags |= FUSE_IOCTL_32BIT; 2045 #endif 2046 2047 /* assume all the iovs returned by client always fits in a page */ 2048 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 2049 2050 err = -ENOMEM; 2051 pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL); 2052 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); 2053 if (!pages || !iov_page) 2054 goto out; 2055 2056 /* 2057 * If restricted, initialize IO parameters as encoded in @cmd. 2058 * RETRY from server is not allowed. 2059 */ 2060 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { 2061 struct iovec *iov = iov_page; 2062 2063 iov->iov_base = (void __user *)arg; 2064 iov->iov_len = _IOC_SIZE(cmd); 2065 2066 if (_IOC_DIR(cmd) & _IOC_WRITE) { 2067 in_iov = iov; 2068 in_iovs = 1; 2069 } 2070 2071 if (_IOC_DIR(cmd) & _IOC_READ) { 2072 out_iov = iov; 2073 out_iovs = 1; 2074 } 2075 } 2076 2077 retry: 2078 inarg.in_size = in_size = iov_length(in_iov, in_iovs); 2079 inarg.out_size = out_size = iov_length(out_iov, out_iovs); 2080 2081 /* 2082 * Out data can be used either for actual out data or iovs, 2083 * make sure there always is at least one page. 2084 */ 2085 out_size = max_t(size_t, out_size, PAGE_SIZE); 2086 max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE); 2087 2088 /* make sure there are enough buffer pages and init request with them */ 2089 err = -ENOMEM; 2090 if (max_pages > FUSE_MAX_PAGES_PER_REQ) 2091 goto out; 2092 while (num_pages < max_pages) { 2093 pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 2094 if (!pages[num_pages]) 2095 goto out; 2096 num_pages++; 2097 } 2098 2099 req = fuse_get_req(fc, num_pages); 2100 if (IS_ERR(req)) { 2101 err = PTR_ERR(req); 2102 req = NULL; 2103 goto out; 2104 } 2105 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); 2106 req->num_pages = num_pages; 2107 fuse_page_descs_length_init(req, 0, req->num_pages); 2108 2109 /* okay, let's send it to the client */ 2110 req->in.h.opcode = FUSE_IOCTL; 2111 req->in.h.nodeid = ff->nodeid; 2112 req->in.numargs = 1; 2113 req->in.args[0].size = sizeof(inarg); 2114 req->in.args[0].value = &inarg; 2115 if (in_size) { 2116 req->in.numargs++; 2117 req->in.args[1].size = in_size; 2118 req->in.argpages = 1; 2119 2120 err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size, 2121 false); 2122 if (err) 2123 goto out; 2124 } 2125 2126 req->out.numargs = 2; 2127 req->out.args[0].size = sizeof(outarg); 2128 req->out.args[0].value = &outarg; 2129 req->out.args[1].size = out_size; 2130 req->out.argpages = 1; 2131 req->out.argvar = 1; 2132 2133 fuse_request_send(fc, req); 2134 err = req->out.h.error; 2135 transferred = req->out.args[1].size; 2136 fuse_put_request(fc, req); 2137 req = NULL; 2138 if (err) 2139 goto out; 2140 2141 /* did it ask for retry? */ 2142 if (outarg.flags & FUSE_IOCTL_RETRY) { 2143 void *vaddr; 2144 2145 /* no retry if in restricted mode */ 2146 err = -EIO; 2147 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) 2148 goto out; 2149 2150 in_iovs = outarg.in_iovs; 2151 out_iovs = outarg.out_iovs; 2152 2153 /* 2154 * Make sure things are in boundary, separate checks 2155 * are to protect against overflow. 2156 */ 2157 err = -ENOMEM; 2158 if (in_iovs > FUSE_IOCTL_MAX_IOV || 2159 out_iovs > FUSE_IOCTL_MAX_IOV || 2160 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) 2161 goto out; 2162 2163 vaddr = kmap_atomic(pages[0]); 2164 err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, 2165 transferred, in_iovs + out_iovs, 2166 (flags & FUSE_IOCTL_COMPAT) != 0); 2167 kunmap_atomic(vaddr); 2168 if (err) 2169 goto out; 2170 2171 in_iov = iov_page; 2172 out_iov = in_iov + in_iovs; 2173 2174 err = fuse_verify_ioctl_iov(in_iov, in_iovs); 2175 if (err) 2176 goto out; 2177 2178 err = fuse_verify_ioctl_iov(out_iov, out_iovs); 2179 if (err) 2180 goto out; 2181 2182 goto retry; 2183 } 2184 2185 err = -EIO; 2186 if (transferred > inarg.out_size) 2187 goto out; 2188 2189 err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true); 2190 out: 2191 if (req) 2192 fuse_put_request(fc, req); 2193 free_page((unsigned long) iov_page); 2194 while (num_pages) 2195 __free_page(pages[--num_pages]); 2196 kfree(pages); 2197 2198 return err ? err : outarg.result; 2199 } 2200 EXPORT_SYMBOL_GPL(fuse_do_ioctl); 2201 2202 long fuse_ioctl_common(struct file *file, unsigned int cmd, 2203 unsigned long arg, unsigned int flags) 2204 { 2205 struct inode *inode = file_inode(file); 2206 struct fuse_conn *fc = get_fuse_conn(inode); 2207 2208 if (!fuse_allow_current_process(fc)) 2209 return -EACCES; 2210 2211 if (is_bad_inode(inode)) 2212 return -EIO; 2213 2214 return fuse_do_ioctl(file, cmd, arg, flags); 2215 } 2216 2217 static long fuse_file_ioctl(struct file *file, unsigned int cmd, 2218 unsigned long arg) 2219 { 2220 return fuse_ioctl_common(file, cmd, arg, 0); 2221 } 2222 2223 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, 2224 unsigned long arg) 2225 { 2226 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); 2227 } 2228 2229 /* 2230 * All files which have been polled are linked to RB tree 2231 * fuse_conn->polled_files which is indexed by kh. Walk the tree and 2232 * find the matching one. 2233 */ 2234 static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh, 2235 struct rb_node **parent_out) 2236 { 2237 struct rb_node **link = &fc->polled_files.rb_node; 2238 struct rb_node *last = NULL; 2239 2240 while (*link) { 2241 struct fuse_file *ff; 2242 2243 last = *link; 2244 ff = rb_entry(last, struct fuse_file, polled_node); 2245 2246 if (kh < ff->kh) 2247 link = &last->rb_left; 2248 else if (kh > ff->kh) 2249 link = &last->rb_right; 2250 else 2251 return link; 2252 } 2253 2254 if (parent_out) 2255 *parent_out = last; 2256 return link; 2257 } 2258 2259 /* 2260 * The file is about to be polled. Make sure it's on the polled_files 2261 * RB tree. Note that files once added to the polled_files tree are 2262 * not removed before the file is released. This is because a file 2263 * polled once is likely to be polled again. 2264 */ 2265 static void fuse_register_polled_file(struct fuse_conn *fc, 2266 struct fuse_file *ff) 2267 { 2268 spin_lock(&fc->lock); 2269 if (RB_EMPTY_NODE(&ff->polled_node)) { 2270 struct rb_node **link, *parent; 2271 2272 link = fuse_find_polled_node(fc, ff->kh, &parent); 2273 BUG_ON(*link); 2274 rb_link_node(&ff->polled_node, parent, link); 2275 rb_insert_color(&ff->polled_node, &fc->polled_files); 2276 } 2277 spin_unlock(&fc->lock); 2278 } 2279 2280 unsigned fuse_file_poll(struct file *file, poll_table *wait) 2281 { 2282 struct fuse_file *ff = file->private_data; 2283 struct fuse_conn *fc = ff->fc; 2284 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; 2285 struct fuse_poll_out outarg; 2286 struct fuse_req *req; 2287 int err; 2288 2289 if (fc->no_poll) 2290 return DEFAULT_POLLMASK; 2291 2292 poll_wait(file, &ff->poll_wait, wait); 2293 inarg.events = (__u32)poll_requested_events(wait); 2294 2295 /* 2296 * Ask for notification iff there's someone waiting for it. 2297 * The client may ignore the flag and always notify. 2298 */ 2299 if (waitqueue_active(&ff->poll_wait)) { 2300 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; 2301 fuse_register_polled_file(fc, ff); 2302 } 2303 2304 req = fuse_get_req_nopages(fc); 2305 if (IS_ERR(req)) 2306 return POLLERR; 2307 2308 req->in.h.opcode = FUSE_POLL; 2309 req->in.h.nodeid = ff->nodeid; 2310 req->in.numargs = 1; 2311 req->in.args[0].size = sizeof(inarg); 2312 req->in.args[0].value = &inarg; 2313 req->out.numargs = 1; 2314 req->out.args[0].size = sizeof(outarg); 2315 req->out.args[0].value = &outarg; 2316 fuse_request_send(fc, req); 2317 err = req->out.h.error; 2318 fuse_put_request(fc, req); 2319 2320 if (!err) 2321 return outarg.revents; 2322 if (err == -ENOSYS) { 2323 fc->no_poll = 1; 2324 return DEFAULT_POLLMASK; 2325 } 2326 return POLLERR; 2327 } 2328 EXPORT_SYMBOL_GPL(fuse_file_poll); 2329 2330 /* 2331 * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and 2332 * wakes up the poll waiters. 2333 */ 2334 int fuse_notify_poll_wakeup(struct fuse_conn *fc, 2335 struct fuse_notify_poll_wakeup_out *outarg) 2336 { 2337 u64 kh = outarg->kh; 2338 struct rb_node **link; 2339 2340 spin_lock(&fc->lock); 2341 2342 link = fuse_find_polled_node(fc, kh, NULL); 2343 if (*link) { 2344 struct fuse_file *ff; 2345 2346 ff = rb_entry(*link, struct fuse_file, polled_node); 2347 wake_up_interruptible_sync(&ff->poll_wait); 2348 } 2349 2350 spin_unlock(&fc->lock); 2351 return 0; 2352 } 2353 2354 static void fuse_do_truncate(struct file *file) 2355 { 2356 struct inode *inode = file->f_mapping->host; 2357 struct iattr attr; 2358 2359 attr.ia_valid = ATTR_SIZE; 2360 attr.ia_size = i_size_read(inode); 2361 2362 attr.ia_file = file; 2363 attr.ia_valid |= ATTR_FILE; 2364 2365 fuse_do_setattr(inode, &attr, file); 2366 } 2367 2368 static ssize_t 2369 fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 2370 loff_t offset, unsigned long nr_segs) 2371 { 2372 ssize_t ret = 0; 2373 struct file *file = iocb->ki_filp; 2374 struct fuse_file *ff = file->private_data; 2375 loff_t pos = 0; 2376 struct inode *inode; 2377 loff_t i_size; 2378 size_t count = iov_length(iov, nr_segs); 2379 struct fuse_io_priv *io; 2380 2381 pos = offset; 2382 inode = file->f_mapping->host; 2383 i_size = i_size_read(inode); 2384 2385 /* optimization for short read */ 2386 if (rw != WRITE && offset + count > i_size) { 2387 if (offset >= i_size) 2388 return 0; 2389 count = i_size - offset; 2390 } 2391 2392 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); 2393 if (!io) 2394 return -ENOMEM; 2395 spin_lock_init(&io->lock); 2396 io->reqs = 1; 2397 io->bytes = -1; 2398 io->size = 0; 2399 io->offset = offset; 2400 io->write = (rw == WRITE); 2401 io->err = 0; 2402 io->file = file; 2403 /* 2404 * By default, we want to optimize all I/Os with async request 2405 * submission to the client filesystem if supported. 2406 */ 2407 io->async = ff->fc->async_dio; 2408 io->iocb = iocb; 2409 2410 /* 2411 * We cannot asynchronously extend the size of a file. We have no method 2412 * to wait on real async I/O requests, so we must submit this request 2413 * synchronously. 2414 */ 2415 if (!is_sync_kiocb(iocb) && (offset + count > i_size)) 2416 io->async = false; 2417 2418 if (rw == WRITE) 2419 ret = __fuse_direct_write(io, iov, nr_segs, &pos); 2420 else 2421 ret = __fuse_direct_read(io, iov, nr_segs, &pos, count); 2422 2423 if (io->async) { 2424 fuse_aio_complete(io, ret < 0 ? ret : 0, -1); 2425 2426 /* we have a non-extending, async request, so return */ 2427 if (ret > 0 && !is_sync_kiocb(iocb)) 2428 return -EIOCBQUEUED; 2429 2430 ret = wait_on_sync_kiocb(iocb); 2431 } else { 2432 kfree(io); 2433 } 2434 2435 if (rw == WRITE) { 2436 if (ret > 0) 2437 fuse_write_update_size(inode, pos); 2438 else if (ret < 0 && offset + count > i_size) 2439 fuse_do_truncate(file); 2440 } 2441 2442 return ret; 2443 } 2444 2445 static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, 2446 loff_t length) 2447 { 2448 struct fuse_file *ff = file->private_data; 2449 struct fuse_conn *fc = ff->fc; 2450 struct fuse_req *req; 2451 struct fuse_fallocate_in inarg = { 2452 .fh = ff->fh, 2453 .offset = offset, 2454 .length = length, 2455 .mode = mode 2456 }; 2457 int err; 2458 2459 if (fc->no_fallocate) 2460 return -EOPNOTSUPP; 2461 2462 req = fuse_get_req_nopages(fc); 2463 if (IS_ERR(req)) 2464 return PTR_ERR(req); 2465 2466 req->in.h.opcode = FUSE_FALLOCATE; 2467 req->in.h.nodeid = ff->nodeid; 2468 req->in.numargs = 1; 2469 req->in.args[0].size = sizeof(inarg); 2470 req->in.args[0].value = &inarg; 2471 fuse_request_send(fc, req); 2472 err = req->out.h.error; 2473 if (err == -ENOSYS) { 2474 fc->no_fallocate = 1; 2475 err = -EOPNOTSUPP; 2476 } 2477 fuse_put_request(fc, req); 2478 2479 return err; 2480 } 2481 2482 static const struct file_operations fuse_file_operations = { 2483 .llseek = fuse_file_llseek, 2484 .read = do_sync_read, 2485 .aio_read = fuse_file_aio_read, 2486 .write = do_sync_write, 2487 .aio_write = fuse_file_aio_write, 2488 .mmap = fuse_file_mmap, 2489 .open = fuse_open, 2490 .flush = fuse_flush, 2491 .release = fuse_release, 2492 .fsync = fuse_fsync, 2493 .lock = fuse_file_lock, 2494 .flock = fuse_file_flock, 2495 .splice_read = generic_file_splice_read, 2496 .unlocked_ioctl = fuse_file_ioctl, 2497 .compat_ioctl = fuse_file_compat_ioctl, 2498 .poll = fuse_file_poll, 2499 .fallocate = fuse_file_fallocate, 2500 }; 2501 2502 static const struct file_operations fuse_direct_io_file_operations = { 2503 .llseek = fuse_file_llseek, 2504 .read = fuse_direct_read, 2505 .write = fuse_direct_write, 2506 .mmap = fuse_direct_mmap, 2507 .open = fuse_open, 2508 .flush = fuse_flush, 2509 .release = fuse_release, 2510 .fsync = fuse_fsync, 2511 .lock = fuse_file_lock, 2512 .flock = fuse_file_flock, 2513 .unlocked_ioctl = fuse_file_ioctl, 2514 .compat_ioctl = fuse_file_compat_ioctl, 2515 .poll = fuse_file_poll, 2516 .fallocate = fuse_file_fallocate, 2517 /* no splice_read */ 2518 }; 2519 2520 static const struct address_space_operations fuse_file_aops = { 2521 .readpage = fuse_readpage, 2522 .writepage = fuse_writepage, 2523 .launder_page = fuse_launder_page, 2524 .readpages = fuse_readpages, 2525 .set_page_dirty = __set_page_dirty_nobuffers, 2526 .bmap = fuse_bmap, 2527 .direct_IO = fuse_direct_IO, 2528 }; 2529 2530 void fuse_init_file_inode(struct inode *inode) 2531 { 2532 inode->i_fop = &fuse_file_operations; 2533 inode->i_data.a_ops = &fuse_file_aops; 2534 } 2535