1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/pagemap.h> 12 #include <linux/slab.h> 13 #include <linux/kernel.h> 14 #include <linux/sched.h> 15 #include <linux/module.h> 16 #include <linux/compat.h> 17 18 static const struct file_operations fuse_direct_io_file_operations; 19 20 static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 21 int opcode, struct fuse_open_out *outargp) 22 { 23 struct fuse_open_in inarg; 24 struct fuse_req *req; 25 int err; 26 27 req = fuse_get_req(fc); 28 if (IS_ERR(req)) 29 return PTR_ERR(req); 30 31 memset(&inarg, 0, sizeof(inarg)); 32 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); 33 if (!fc->atomic_o_trunc) 34 inarg.flags &= ~O_TRUNC; 35 req->in.h.opcode = opcode; 36 req->in.h.nodeid = nodeid; 37 req->in.numargs = 1; 38 req->in.args[0].size = sizeof(inarg); 39 req->in.args[0].value = &inarg; 40 req->out.numargs = 1; 41 req->out.args[0].size = sizeof(*outargp); 42 req->out.args[0].value = outargp; 43 fuse_request_send(fc, req); 44 err = req->out.h.error; 45 fuse_put_request(fc, req); 46 47 return err; 48 } 49 50 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) 51 { 52 struct fuse_file *ff; 53 54 ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); 55 if (unlikely(!ff)) 56 return NULL; 57 58 ff->fc = fc; 59 ff->reserved_req = fuse_request_alloc(); 60 if (unlikely(!ff->reserved_req)) { 61 kfree(ff); 62 return NULL; 63 } 64 65 INIT_LIST_HEAD(&ff->write_entry); 66 atomic_set(&ff->count, 0); 67 RB_CLEAR_NODE(&ff->polled_node); 68 init_waitqueue_head(&ff->poll_wait); 69 70 spin_lock(&fc->lock); 71 ff->kh = ++fc->khctr; 72 spin_unlock(&fc->lock); 73 74 return ff; 75 } 76 77 void fuse_file_free(struct fuse_file *ff) 78 { 79 fuse_request_free(ff->reserved_req); 80 kfree(ff); 81 } 82 83 struct fuse_file *fuse_file_get(struct fuse_file *ff) 84 { 85 atomic_inc(&ff->count); 86 return ff; 87 } 88 89 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 90 { 91 path_put(&req->misc.release.path); 92 } 93 94 static void fuse_file_put(struct fuse_file *ff) 95 { 96 if (atomic_dec_and_test(&ff->count)) { 97 struct fuse_req *req = ff->reserved_req; 98 99 req->end = fuse_release_end; 100 fuse_request_send_background(ff->fc, req); 101 kfree(ff); 102 } 103 } 104 105 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 106 bool isdir) 107 { 108 struct fuse_open_out outarg; 109 struct fuse_file *ff; 110 int err; 111 int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; 112 113 ff = fuse_file_alloc(fc); 114 if (!ff) 115 return -ENOMEM; 116 117 err = fuse_send_open(fc, nodeid, file, opcode, &outarg); 118 if (err) { 119 fuse_file_free(ff); 120 return err; 121 } 122 123 if (isdir) 124 outarg.open_flags &= ~FOPEN_DIRECT_IO; 125 126 ff->fh = outarg.fh; 127 ff->nodeid = nodeid; 128 ff->open_flags = outarg.open_flags; 129 file->private_data = fuse_file_get(ff); 130 131 return 0; 132 } 133 EXPORT_SYMBOL_GPL(fuse_do_open); 134 135 void fuse_finish_open(struct inode *inode, struct file *file) 136 { 137 struct fuse_file *ff = file->private_data; 138 struct fuse_conn *fc = get_fuse_conn(inode); 139 140 if (ff->open_flags & FOPEN_DIRECT_IO) 141 file->f_op = &fuse_direct_io_file_operations; 142 if (!(ff->open_flags & FOPEN_KEEP_CACHE)) 143 invalidate_inode_pages2(inode->i_mapping); 144 if (ff->open_flags & FOPEN_NONSEEKABLE) 145 nonseekable_open(inode, file); 146 if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { 147 struct fuse_inode *fi = get_fuse_inode(inode); 148 149 spin_lock(&fc->lock); 150 fi->attr_version = ++fc->attr_version; 151 i_size_write(inode, 0); 152 spin_unlock(&fc->lock); 153 fuse_invalidate_attr(inode); 154 } 155 } 156 157 int fuse_open_common(struct inode *inode, struct file *file, bool isdir) 158 { 159 struct fuse_conn *fc = get_fuse_conn(inode); 160 int err; 161 162 /* VFS checks this, but only _after_ ->open() */ 163 if (file->f_flags & O_DIRECT) 164 return -EINVAL; 165 166 err = generic_file_open(inode, file); 167 if (err) 168 return err; 169 170 err = fuse_do_open(fc, get_node_id(inode), file, isdir); 171 if (err) 172 return err; 173 174 fuse_finish_open(inode, file); 175 176 return 0; 177 } 178 179 static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) 180 { 181 struct fuse_conn *fc = ff->fc; 182 struct fuse_req *req = ff->reserved_req; 183 struct fuse_release_in *inarg = &req->misc.release.in; 184 185 spin_lock(&fc->lock); 186 list_del(&ff->write_entry); 187 if (!RB_EMPTY_NODE(&ff->polled_node)) 188 rb_erase(&ff->polled_node, &fc->polled_files); 189 spin_unlock(&fc->lock); 190 191 wake_up_interruptible_sync(&ff->poll_wait); 192 193 inarg->fh = ff->fh; 194 inarg->flags = flags; 195 req->in.h.opcode = opcode; 196 req->in.h.nodeid = ff->nodeid; 197 req->in.numargs = 1; 198 req->in.args[0].size = sizeof(struct fuse_release_in); 199 req->in.args[0].value = inarg; 200 } 201 202 void fuse_release_common(struct file *file, int opcode) 203 { 204 struct fuse_file *ff; 205 struct fuse_req *req; 206 207 ff = file->private_data; 208 if (unlikely(!ff)) 209 return; 210 211 req = ff->reserved_req; 212 fuse_prepare_release(ff, file->f_flags, opcode); 213 214 /* Hold vfsmount and dentry until release is finished */ 215 path_get(&file->f_path); 216 req->misc.release.path = file->f_path; 217 218 /* 219 * Normally this will send the RELEASE request, however if 220 * some asynchronous READ or WRITE requests are outstanding, 221 * the sending will be delayed. 222 */ 223 fuse_file_put(ff); 224 } 225 226 static int fuse_open(struct inode *inode, struct file *file) 227 { 228 return fuse_open_common(inode, file, false); 229 } 230 231 static int fuse_release(struct inode *inode, struct file *file) 232 { 233 fuse_release_common(file, FUSE_RELEASE); 234 235 /* return value is ignored by VFS */ 236 return 0; 237 } 238 239 void fuse_sync_release(struct fuse_file *ff, int flags) 240 { 241 WARN_ON(atomic_read(&ff->count) > 1); 242 fuse_prepare_release(ff, flags, FUSE_RELEASE); 243 ff->reserved_req->force = 1; 244 fuse_request_send(ff->fc, ff->reserved_req); 245 fuse_put_request(ff->fc, ff->reserved_req); 246 kfree(ff); 247 } 248 EXPORT_SYMBOL_GPL(fuse_sync_release); 249 250 /* 251 * Scramble the ID space with XTEA, so that the value of the files_struct 252 * pointer is not exposed to userspace. 253 */ 254 u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) 255 { 256 u32 *k = fc->scramble_key; 257 u64 v = (unsigned long) id; 258 u32 v0 = v; 259 u32 v1 = v >> 32; 260 u32 sum = 0; 261 int i; 262 263 for (i = 0; i < 32; i++) { 264 v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]); 265 sum += 0x9E3779B9; 266 v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]); 267 } 268 269 return (u64) v0 + ((u64) v1 << 32); 270 } 271 272 /* 273 * Check if page is under writeback 274 * 275 * This is currently done by walking the list of writepage requests 276 * for the inode, which can be pretty inefficient. 277 */ 278 static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) 279 { 280 struct fuse_conn *fc = get_fuse_conn(inode); 281 struct fuse_inode *fi = get_fuse_inode(inode); 282 struct fuse_req *req; 283 bool found = false; 284 285 spin_lock(&fc->lock); 286 list_for_each_entry(req, &fi->writepages, writepages_entry) { 287 pgoff_t curr_index; 288 289 BUG_ON(req->inode != inode); 290 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; 291 if (curr_index == index) { 292 found = true; 293 break; 294 } 295 } 296 spin_unlock(&fc->lock); 297 298 return found; 299 } 300 301 /* 302 * Wait for page writeback to be completed. 303 * 304 * Since fuse doesn't rely on the VM writeback tracking, this has to 305 * use some other means. 306 */ 307 static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) 308 { 309 struct fuse_inode *fi = get_fuse_inode(inode); 310 311 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); 312 return 0; 313 } 314 315 static int fuse_flush(struct file *file, fl_owner_t id) 316 { 317 struct inode *inode = file->f_path.dentry->d_inode; 318 struct fuse_conn *fc = get_fuse_conn(inode); 319 struct fuse_file *ff = file->private_data; 320 struct fuse_req *req; 321 struct fuse_flush_in inarg; 322 int err; 323 324 if (is_bad_inode(inode)) 325 return -EIO; 326 327 if (fc->no_flush) 328 return 0; 329 330 req = fuse_get_req_nofail(fc, file); 331 memset(&inarg, 0, sizeof(inarg)); 332 inarg.fh = ff->fh; 333 inarg.lock_owner = fuse_lock_owner_id(fc, id); 334 req->in.h.opcode = FUSE_FLUSH; 335 req->in.h.nodeid = get_node_id(inode); 336 req->in.numargs = 1; 337 req->in.args[0].size = sizeof(inarg); 338 req->in.args[0].value = &inarg; 339 req->force = 1; 340 fuse_request_send(fc, req); 341 err = req->out.h.error; 342 fuse_put_request(fc, req); 343 if (err == -ENOSYS) { 344 fc->no_flush = 1; 345 err = 0; 346 } 347 return err; 348 } 349 350 /* 351 * Wait for all pending writepages on the inode to finish. 352 * 353 * This is currently done by blocking further writes with FUSE_NOWRITE 354 * and waiting for all sent writes to complete. 355 * 356 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage 357 * could conflict with truncation. 358 */ 359 static void fuse_sync_writes(struct inode *inode) 360 { 361 fuse_set_nowrite(inode); 362 fuse_release_nowrite(inode); 363 } 364 365 int fuse_fsync_common(struct file *file, int datasync, int isdir) 366 { 367 struct inode *inode = file->f_mapping->host; 368 struct fuse_conn *fc = get_fuse_conn(inode); 369 struct fuse_file *ff = file->private_data; 370 struct fuse_req *req; 371 struct fuse_fsync_in inarg; 372 int err; 373 374 if (is_bad_inode(inode)) 375 return -EIO; 376 377 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 378 return 0; 379 380 /* 381 * Start writeback against all dirty pages of the inode, then 382 * wait for all outstanding writes, before sending the FSYNC 383 * request. 384 */ 385 err = write_inode_now(inode, 0); 386 if (err) 387 return err; 388 389 fuse_sync_writes(inode); 390 391 req = fuse_get_req(fc); 392 if (IS_ERR(req)) 393 return PTR_ERR(req); 394 395 memset(&inarg, 0, sizeof(inarg)); 396 inarg.fh = ff->fh; 397 inarg.fsync_flags = datasync ? 1 : 0; 398 req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC; 399 req->in.h.nodeid = get_node_id(inode); 400 req->in.numargs = 1; 401 req->in.args[0].size = sizeof(inarg); 402 req->in.args[0].value = &inarg; 403 fuse_request_send(fc, req); 404 err = req->out.h.error; 405 fuse_put_request(fc, req); 406 if (err == -ENOSYS) { 407 if (isdir) 408 fc->no_fsyncdir = 1; 409 else 410 fc->no_fsync = 1; 411 err = 0; 412 } 413 return err; 414 } 415 416 static int fuse_fsync(struct file *file, int datasync) 417 { 418 return fuse_fsync_common(file, datasync, 0); 419 } 420 421 void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, 422 size_t count, int opcode) 423 { 424 struct fuse_read_in *inarg = &req->misc.read.in; 425 struct fuse_file *ff = file->private_data; 426 427 inarg->fh = ff->fh; 428 inarg->offset = pos; 429 inarg->size = count; 430 inarg->flags = file->f_flags; 431 req->in.h.opcode = opcode; 432 req->in.h.nodeid = ff->nodeid; 433 req->in.numargs = 1; 434 req->in.args[0].size = sizeof(struct fuse_read_in); 435 req->in.args[0].value = inarg; 436 req->out.argvar = 1; 437 req->out.numargs = 1; 438 req->out.args[0].size = count; 439 } 440 441 static size_t fuse_send_read(struct fuse_req *req, struct file *file, 442 loff_t pos, size_t count, fl_owner_t owner) 443 { 444 struct fuse_file *ff = file->private_data; 445 struct fuse_conn *fc = ff->fc; 446 447 fuse_read_fill(req, file, pos, count, FUSE_READ); 448 if (owner != NULL) { 449 struct fuse_read_in *inarg = &req->misc.read.in; 450 451 inarg->read_flags |= FUSE_READ_LOCKOWNER; 452 inarg->lock_owner = fuse_lock_owner_id(fc, owner); 453 } 454 fuse_request_send(fc, req); 455 return req->out.args[0].size; 456 } 457 458 static void fuse_read_update_size(struct inode *inode, loff_t size, 459 u64 attr_ver) 460 { 461 struct fuse_conn *fc = get_fuse_conn(inode); 462 struct fuse_inode *fi = get_fuse_inode(inode); 463 464 spin_lock(&fc->lock); 465 if (attr_ver == fi->attr_version && size < inode->i_size) { 466 fi->attr_version = ++fc->attr_version; 467 i_size_write(inode, size); 468 } 469 spin_unlock(&fc->lock); 470 } 471 472 static int fuse_readpage(struct file *file, struct page *page) 473 { 474 struct inode *inode = page->mapping->host; 475 struct fuse_conn *fc = get_fuse_conn(inode); 476 struct fuse_req *req; 477 size_t num_read; 478 loff_t pos = page_offset(page); 479 size_t count = PAGE_CACHE_SIZE; 480 u64 attr_ver; 481 int err; 482 483 err = -EIO; 484 if (is_bad_inode(inode)) 485 goto out; 486 487 /* 488 * Page writeback can extend beyond the liftime of the 489 * page-cache page, so make sure we read a properly synced 490 * page. 491 */ 492 fuse_wait_on_page_writeback(inode, page->index); 493 494 req = fuse_get_req(fc); 495 err = PTR_ERR(req); 496 if (IS_ERR(req)) 497 goto out; 498 499 attr_ver = fuse_get_attr_version(fc); 500 501 req->out.page_zeroing = 1; 502 req->out.argpages = 1; 503 req->num_pages = 1; 504 req->pages[0] = page; 505 num_read = fuse_send_read(req, file, pos, count, NULL); 506 err = req->out.h.error; 507 fuse_put_request(fc, req); 508 509 if (!err) { 510 /* 511 * Short read means EOF. If file size is larger, truncate it 512 */ 513 if (num_read < count) 514 fuse_read_update_size(inode, pos + num_read, attr_ver); 515 516 SetPageUptodate(page); 517 } 518 519 fuse_invalidate_attr(inode); /* atime changed */ 520 out: 521 unlock_page(page); 522 return err; 523 } 524 525 static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) 526 { 527 int i; 528 size_t count = req->misc.read.in.size; 529 size_t num_read = req->out.args[0].size; 530 struct address_space *mapping = NULL; 531 532 for (i = 0; mapping == NULL && i < req->num_pages; i++) 533 mapping = req->pages[i]->mapping; 534 535 if (mapping) { 536 struct inode *inode = mapping->host; 537 538 /* 539 * Short read means EOF. If file size is larger, truncate it 540 */ 541 if (!req->out.h.error && num_read < count) { 542 loff_t pos; 543 544 pos = page_offset(req->pages[0]) + num_read; 545 fuse_read_update_size(inode, pos, 546 req->misc.read.attr_ver); 547 } 548 fuse_invalidate_attr(inode); /* atime changed */ 549 } 550 551 for (i = 0; i < req->num_pages; i++) { 552 struct page *page = req->pages[i]; 553 if (!req->out.h.error) 554 SetPageUptodate(page); 555 else 556 SetPageError(page); 557 unlock_page(page); 558 page_cache_release(page); 559 } 560 if (req->ff) 561 fuse_file_put(req->ff); 562 } 563 564 static void fuse_send_readpages(struct fuse_req *req, struct file *file) 565 { 566 struct fuse_file *ff = file->private_data; 567 struct fuse_conn *fc = ff->fc; 568 loff_t pos = page_offset(req->pages[0]); 569 size_t count = req->num_pages << PAGE_CACHE_SHIFT; 570 571 req->out.argpages = 1; 572 req->out.page_zeroing = 1; 573 req->out.page_replace = 1; 574 fuse_read_fill(req, file, pos, count, FUSE_READ); 575 req->misc.read.attr_ver = fuse_get_attr_version(fc); 576 if (fc->async_read) { 577 req->ff = fuse_file_get(ff); 578 req->end = fuse_readpages_end; 579 fuse_request_send_background(fc, req); 580 } else { 581 fuse_request_send(fc, req); 582 fuse_readpages_end(fc, req); 583 fuse_put_request(fc, req); 584 } 585 } 586 587 struct fuse_fill_data { 588 struct fuse_req *req; 589 struct file *file; 590 struct inode *inode; 591 }; 592 593 static int fuse_readpages_fill(void *_data, struct page *page) 594 { 595 struct fuse_fill_data *data = _data; 596 struct fuse_req *req = data->req; 597 struct inode *inode = data->inode; 598 struct fuse_conn *fc = get_fuse_conn(inode); 599 600 fuse_wait_on_page_writeback(inode, page->index); 601 602 if (req->num_pages && 603 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 604 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 605 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 606 fuse_send_readpages(req, data->file); 607 data->req = req = fuse_get_req(fc); 608 if (IS_ERR(req)) { 609 unlock_page(page); 610 return PTR_ERR(req); 611 } 612 } 613 page_cache_get(page); 614 req->pages[req->num_pages] = page; 615 req->num_pages++; 616 return 0; 617 } 618 619 static int fuse_readpages(struct file *file, struct address_space *mapping, 620 struct list_head *pages, unsigned nr_pages) 621 { 622 struct inode *inode = mapping->host; 623 struct fuse_conn *fc = get_fuse_conn(inode); 624 struct fuse_fill_data data; 625 int err; 626 627 err = -EIO; 628 if (is_bad_inode(inode)) 629 goto out; 630 631 data.file = file; 632 data.inode = inode; 633 data.req = fuse_get_req(fc); 634 err = PTR_ERR(data.req); 635 if (IS_ERR(data.req)) 636 goto out; 637 638 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); 639 if (!err) { 640 if (data.req->num_pages) 641 fuse_send_readpages(data.req, file); 642 else 643 fuse_put_request(fc, data.req); 644 } 645 out: 646 return err; 647 } 648 649 static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, 650 unsigned long nr_segs, loff_t pos) 651 { 652 struct inode *inode = iocb->ki_filp->f_mapping->host; 653 654 if (pos + iov_length(iov, nr_segs) > i_size_read(inode)) { 655 int err; 656 /* 657 * If trying to read past EOF, make sure the i_size 658 * attribute is up-to-date. 659 */ 660 err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL); 661 if (err) 662 return err; 663 } 664 665 return generic_file_aio_read(iocb, iov, nr_segs, pos); 666 } 667 668 static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, 669 loff_t pos, size_t count) 670 { 671 struct fuse_write_in *inarg = &req->misc.write.in; 672 struct fuse_write_out *outarg = &req->misc.write.out; 673 674 inarg->fh = ff->fh; 675 inarg->offset = pos; 676 inarg->size = count; 677 req->in.h.opcode = FUSE_WRITE; 678 req->in.h.nodeid = ff->nodeid; 679 req->in.numargs = 2; 680 if (ff->fc->minor < 9) 681 req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; 682 else 683 req->in.args[0].size = sizeof(struct fuse_write_in); 684 req->in.args[0].value = inarg; 685 req->in.args[1].size = count; 686 req->out.numargs = 1; 687 req->out.args[0].size = sizeof(struct fuse_write_out); 688 req->out.args[0].value = outarg; 689 } 690 691 static size_t fuse_send_write(struct fuse_req *req, struct file *file, 692 loff_t pos, size_t count, fl_owner_t owner) 693 { 694 struct fuse_file *ff = file->private_data; 695 struct fuse_conn *fc = ff->fc; 696 struct fuse_write_in *inarg = &req->misc.write.in; 697 698 fuse_write_fill(req, ff, pos, count); 699 inarg->flags = file->f_flags; 700 if (owner != NULL) { 701 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 702 inarg->lock_owner = fuse_lock_owner_id(fc, owner); 703 } 704 fuse_request_send(fc, req); 705 return req->misc.write.out.size; 706 } 707 708 static int fuse_write_begin(struct file *file, struct address_space *mapping, 709 loff_t pos, unsigned len, unsigned flags, 710 struct page **pagep, void **fsdata) 711 { 712 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 713 714 *pagep = grab_cache_page_write_begin(mapping, index, flags); 715 if (!*pagep) 716 return -ENOMEM; 717 return 0; 718 } 719 720 void fuse_write_update_size(struct inode *inode, loff_t pos) 721 { 722 struct fuse_conn *fc = get_fuse_conn(inode); 723 struct fuse_inode *fi = get_fuse_inode(inode); 724 725 spin_lock(&fc->lock); 726 fi->attr_version = ++fc->attr_version; 727 if (pos > inode->i_size) 728 i_size_write(inode, pos); 729 spin_unlock(&fc->lock); 730 } 731 732 static int fuse_buffered_write(struct file *file, struct inode *inode, 733 loff_t pos, unsigned count, struct page *page) 734 { 735 int err; 736 size_t nres; 737 struct fuse_conn *fc = get_fuse_conn(inode); 738 unsigned offset = pos & (PAGE_CACHE_SIZE - 1); 739 struct fuse_req *req; 740 741 if (is_bad_inode(inode)) 742 return -EIO; 743 744 /* 745 * Make sure writepages on the same page are not mixed up with 746 * plain writes. 747 */ 748 fuse_wait_on_page_writeback(inode, page->index); 749 750 req = fuse_get_req(fc); 751 if (IS_ERR(req)) 752 return PTR_ERR(req); 753 754 req->in.argpages = 1; 755 req->num_pages = 1; 756 req->pages[0] = page; 757 req->page_offset = offset; 758 nres = fuse_send_write(req, file, pos, count, NULL); 759 err = req->out.h.error; 760 fuse_put_request(fc, req); 761 if (!err && !nres) 762 err = -EIO; 763 if (!err) { 764 pos += nres; 765 fuse_write_update_size(inode, pos); 766 if (count == PAGE_CACHE_SIZE) 767 SetPageUptodate(page); 768 } 769 fuse_invalidate_attr(inode); 770 return err ? err : nres; 771 } 772 773 static int fuse_write_end(struct file *file, struct address_space *mapping, 774 loff_t pos, unsigned len, unsigned copied, 775 struct page *page, void *fsdata) 776 { 777 struct inode *inode = mapping->host; 778 int res = 0; 779 780 if (copied) 781 res = fuse_buffered_write(file, inode, pos, copied, page); 782 783 unlock_page(page); 784 page_cache_release(page); 785 return res; 786 } 787 788 static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, 789 struct inode *inode, loff_t pos, 790 size_t count) 791 { 792 size_t res; 793 unsigned offset; 794 unsigned i; 795 796 for (i = 0; i < req->num_pages; i++) 797 fuse_wait_on_page_writeback(inode, req->pages[i]->index); 798 799 res = fuse_send_write(req, file, pos, count, NULL); 800 801 offset = req->page_offset; 802 count = res; 803 for (i = 0; i < req->num_pages; i++) { 804 struct page *page = req->pages[i]; 805 806 if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE) 807 SetPageUptodate(page); 808 809 if (count > PAGE_CACHE_SIZE - offset) 810 count -= PAGE_CACHE_SIZE - offset; 811 else 812 count = 0; 813 offset = 0; 814 815 unlock_page(page); 816 page_cache_release(page); 817 } 818 819 return res; 820 } 821 822 static ssize_t fuse_fill_write_pages(struct fuse_req *req, 823 struct address_space *mapping, 824 struct iov_iter *ii, loff_t pos) 825 { 826 struct fuse_conn *fc = get_fuse_conn(mapping->host); 827 unsigned offset = pos & (PAGE_CACHE_SIZE - 1); 828 size_t count = 0; 829 int err; 830 831 req->in.argpages = 1; 832 req->page_offset = offset; 833 834 do { 835 size_t tmp; 836 struct page *page; 837 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 838 size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset, 839 iov_iter_count(ii)); 840 841 bytes = min_t(size_t, bytes, fc->max_write - count); 842 843 again: 844 err = -EFAULT; 845 if (iov_iter_fault_in_readable(ii, bytes)) 846 break; 847 848 err = -ENOMEM; 849 page = grab_cache_page_write_begin(mapping, index, 0); 850 if (!page) 851 break; 852 853 if (mapping_writably_mapped(mapping)) 854 flush_dcache_page(page); 855 856 pagefault_disable(); 857 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); 858 pagefault_enable(); 859 flush_dcache_page(page); 860 861 if (!tmp) { 862 unlock_page(page); 863 page_cache_release(page); 864 bytes = min(bytes, iov_iter_single_seg_count(ii)); 865 goto again; 866 } 867 868 err = 0; 869 req->pages[req->num_pages] = page; 870 req->num_pages++; 871 872 iov_iter_advance(ii, tmp); 873 count += tmp; 874 pos += tmp; 875 offset += tmp; 876 if (offset == PAGE_CACHE_SIZE) 877 offset = 0; 878 879 if (!fc->big_writes) 880 break; 881 } while (iov_iter_count(ii) && count < fc->max_write && 882 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); 883 884 return count > 0 ? count : err; 885 } 886 887 static ssize_t fuse_perform_write(struct file *file, 888 struct address_space *mapping, 889 struct iov_iter *ii, loff_t pos) 890 { 891 struct inode *inode = mapping->host; 892 struct fuse_conn *fc = get_fuse_conn(inode); 893 int err = 0; 894 ssize_t res = 0; 895 896 if (is_bad_inode(inode)) 897 return -EIO; 898 899 do { 900 struct fuse_req *req; 901 ssize_t count; 902 903 req = fuse_get_req(fc); 904 if (IS_ERR(req)) { 905 err = PTR_ERR(req); 906 break; 907 } 908 909 count = fuse_fill_write_pages(req, mapping, ii, pos); 910 if (count <= 0) { 911 err = count; 912 } else { 913 size_t num_written; 914 915 num_written = fuse_send_write_pages(req, file, inode, 916 pos, count); 917 err = req->out.h.error; 918 if (!err) { 919 res += num_written; 920 pos += num_written; 921 922 /* break out of the loop on short write */ 923 if (num_written != count) 924 err = -EIO; 925 } 926 } 927 fuse_put_request(fc, req); 928 } while (!err && iov_iter_count(ii)); 929 930 if (res > 0) 931 fuse_write_update_size(inode, pos); 932 933 fuse_invalidate_attr(inode); 934 935 return res > 0 ? res : err; 936 } 937 938 static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 939 unsigned long nr_segs, loff_t pos) 940 { 941 struct file *file = iocb->ki_filp; 942 struct address_space *mapping = file->f_mapping; 943 size_t count = 0; 944 ssize_t written = 0; 945 struct inode *inode = mapping->host; 946 ssize_t err; 947 struct iov_iter i; 948 949 WARN_ON(iocb->ki_pos != pos); 950 951 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); 952 if (err) 953 return err; 954 955 mutex_lock(&inode->i_mutex); 956 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 957 958 /* We can write back this queue in page reclaim */ 959 current->backing_dev_info = mapping->backing_dev_info; 960 961 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 962 if (err) 963 goto out; 964 965 if (count == 0) 966 goto out; 967 968 err = file_remove_suid(file); 969 if (err) 970 goto out; 971 972 file_update_time(file); 973 974 iov_iter_init(&i, iov, nr_segs, count, 0); 975 written = fuse_perform_write(file, mapping, &i, pos); 976 if (written >= 0) 977 iocb->ki_pos = pos + written; 978 979 out: 980 current->backing_dev_info = NULL; 981 mutex_unlock(&inode->i_mutex); 982 983 return written ? written : err; 984 } 985 986 static void fuse_release_user_pages(struct fuse_req *req, int write) 987 { 988 unsigned i; 989 990 for (i = 0; i < req->num_pages; i++) { 991 struct page *page = req->pages[i]; 992 if (write) 993 set_page_dirty_lock(page); 994 put_page(page); 995 } 996 } 997 998 static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, 999 size_t *nbytesp, int write) 1000 { 1001 size_t nbytes = *nbytesp; 1002 unsigned long user_addr = (unsigned long) buf; 1003 unsigned offset = user_addr & ~PAGE_MASK; 1004 int npages; 1005 1006 /* Special case for kernel I/O: can copy directly into the buffer */ 1007 if (segment_eq(get_fs(), KERNEL_DS)) { 1008 if (write) 1009 req->in.args[1].value = (void *) user_addr; 1010 else 1011 req->out.args[0].value = (void *) user_addr; 1012 1013 return 0; 1014 } 1015 1016 nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); 1017 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 1018 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); 1019 npages = get_user_pages_fast(user_addr, npages, !write, req->pages); 1020 if (npages < 0) 1021 return npages; 1022 1023 req->num_pages = npages; 1024 req->page_offset = offset; 1025 1026 if (write) 1027 req->in.argpages = 1; 1028 else 1029 req->out.argpages = 1; 1030 1031 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; 1032 *nbytesp = min(*nbytesp, nbytes); 1033 1034 return 0; 1035 } 1036 1037 ssize_t fuse_direct_io(struct file *file, const char __user *buf, 1038 size_t count, loff_t *ppos, int write) 1039 { 1040 struct fuse_file *ff = file->private_data; 1041 struct fuse_conn *fc = ff->fc; 1042 size_t nmax = write ? fc->max_write : fc->max_read; 1043 loff_t pos = *ppos; 1044 ssize_t res = 0; 1045 struct fuse_req *req; 1046 1047 req = fuse_get_req(fc); 1048 if (IS_ERR(req)) 1049 return PTR_ERR(req); 1050 1051 while (count) { 1052 size_t nres; 1053 fl_owner_t owner = current->files; 1054 size_t nbytes = min(count, nmax); 1055 int err = fuse_get_user_pages(req, buf, &nbytes, write); 1056 if (err) { 1057 res = err; 1058 break; 1059 } 1060 1061 if (write) 1062 nres = fuse_send_write(req, file, pos, nbytes, owner); 1063 else 1064 nres = fuse_send_read(req, file, pos, nbytes, owner); 1065 1066 fuse_release_user_pages(req, !write); 1067 if (req->out.h.error) { 1068 if (!res) 1069 res = req->out.h.error; 1070 break; 1071 } else if (nres > nbytes) { 1072 res = -EIO; 1073 break; 1074 } 1075 count -= nres; 1076 res += nres; 1077 pos += nres; 1078 buf += nres; 1079 if (nres != nbytes) 1080 break; 1081 if (count) { 1082 fuse_put_request(fc, req); 1083 req = fuse_get_req(fc); 1084 if (IS_ERR(req)) 1085 break; 1086 } 1087 } 1088 if (!IS_ERR(req)) 1089 fuse_put_request(fc, req); 1090 if (res > 0) 1091 *ppos = pos; 1092 1093 return res; 1094 } 1095 EXPORT_SYMBOL_GPL(fuse_direct_io); 1096 1097 static ssize_t fuse_direct_read(struct file *file, char __user *buf, 1098 size_t count, loff_t *ppos) 1099 { 1100 ssize_t res; 1101 struct inode *inode = file->f_path.dentry->d_inode; 1102 1103 if (is_bad_inode(inode)) 1104 return -EIO; 1105 1106 res = fuse_direct_io(file, buf, count, ppos, 0); 1107 1108 fuse_invalidate_attr(inode); 1109 1110 return res; 1111 } 1112 1113 static ssize_t fuse_direct_write(struct file *file, const char __user *buf, 1114 size_t count, loff_t *ppos) 1115 { 1116 struct inode *inode = file->f_path.dentry->d_inode; 1117 ssize_t res; 1118 1119 if (is_bad_inode(inode)) 1120 return -EIO; 1121 1122 /* Don't allow parallel writes to the same file */ 1123 mutex_lock(&inode->i_mutex); 1124 res = generic_write_checks(file, ppos, &count, 0); 1125 if (!res) { 1126 res = fuse_direct_io(file, buf, count, ppos, 1); 1127 if (res > 0) 1128 fuse_write_update_size(inode, *ppos); 1129 } 1130 mutex_unlock(&inode->i_mutex); 1131 1132 fuse_invalidate_attr(inode); 1133 1134 return res; 1135 } 1136 1137 static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1138 { 1139 __free_page(req->pages[0]); 1140 fuse_file_put(req->ff); 1141 } 1142 1143 static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1144 { 1145 struct inode *inode = req->inode; 1146 struct fuse_inode *fi = get_fuse_inode(inode); 1147 struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; 1148 1149 list_del(&req->writepages_entry); 1150 dec_bdi_stat(bdi, BDI_WRITEBACK); 1151 dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); 1152 bdi_writeout_inc(bdi); 1153 wake_up(&fi->page_waitq); 1154 } 1155 1156 /* Called under fc->lock, may release and reacquire it */ 1157 static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) 1158 __releases(fc->lock) 1159 __acquires(fc->lock) 1160 { 1161 struct fuse_inode *fi = get_fuse_inode(req->inode); 1162 loff_t size = i_size_read(req->inode); 1163 struct fuse_write_in *inarg = &req->misc.write.in; 1164 1165 if (!fc->connected) 1166 goto out_free; 1167 1168 if (inarg->offset + PAGE_CACHE_SIZE <= size) { 1169 inarg->size = PAGE_CACHE_SIZE; 1170 } else if (inarg->offset < size) { 1171 inarg->size = size & (PAGE_CACHE_SIZE - 1); 1172 } else { 1173 /* Got truncated off completely */ 1174 goto out_free; 1175 } 1176 1177 req->in.args[1].size = inarg->size; 1178 fi->writectr++; 1179 fuse_request_send_background_locked(fc, req); 1180 return; 1181 1182 out_free: 1183 fuse_writepage_finish(fc, req); 1184 spin_unlock(&fc->lock); 1185 fuse_writepage_free(fc, req); 1186 fuse_put_request(fc, req); 1187 spin_lock(&fc->lock); 1188 } 1189 1190 /* 1191 * If fi->writectr is positive (no truncate or fsync going on) send 1192 * all queued writepage requests. 1193 * 1194 * Called with fc->lock 1195 */ 1196 void fuse_flush_writepages(struct inode *inode) 1197 __releases(fc->lock) 1198 __acquires(fc->lock) 1199 { 1200 struct fuse_conn *fc = get_fuse_conn(inode); 1201 struct fuse_inode *fi = get_fuse_inode(inode); 1202 struct fuse_req *req; 1203 1204 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { 1205 req = list_entry(fi->queued_writes.next, struct fuse_req, list); 1206 list_del_init(&req->list); 1207 fuse_send_writepage(fc, req); 1208 } 1209 } 1210 1211 static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) 1212 { 1213 struct inode *inode = req->inode; 1214 struct fuse_inode *fi = get_fuse_inode(inode); 1215 1216 mapping_set_error(inode->i_mapping, req->out.h.error); 1217 spin_lock(&fc->lock); 1218 fi->writectr--; 1219 fuse_writepage_finish(fc, req); 1220 spin_unlock(&fc->lock); 1221 fuse_writepage_free(fc, req); 1222 } 1223 1224 static int fuse_writepage_locked(struct page *page) 1225 { 1226 struct address_space *mapping = page->mapping; 1227 struct inode *inode = mapping->host; 1228 struct fuse_conn *fc = get_fuse_conn(inode); 1229 struct fuse_inode *fi = get_fuse_inode(inode); 1230 struct fuse_req *req; 1231 struct fuse_file *ff; 1232 struct page *tmp_page; 1233 1234 set_page_writeback(page); 1235 1236 req = fuse_request_alloc_nofs(); 1237 if (!req) 1238 goto err; 1239 1240 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 1241 if (!tmp_page) 1242 goto err_free; 1243 1244 spin_lock(&fc->lock); 1245 BUG_ON(list_empty(&fi->write_files)); 1246 ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); 1247 req->ff = fuse_file_get(ff); 1248 spin_unlock(&fc->lock); 1249 1250 fuse_write_fill(req, ff, page_offset(page), 0); 1251 1252 copy_highpage(tmp_page, page); 1253 req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; 1254 req->in.argpages = 1; 1255 req->num_pages = 1; 1256 req->pages[0] = tmp_page; 1257 req->page_offset = 0; 1258 req->end = fuse_writepage_end; 1259 req->inode = inode; 1260 1261 inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); 1262 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); 1263 end_page_writeback(page); 1264 1265 spin_lock(&fc->lock); 1266 list_add(&req->writepages_entry, &fi->writepages); 1267 list_add_tail(&req->list, &fi->queued_writes); 1268 fuse_flush_writepages(inode); 1269 spin_unlock(&fc->lock); 1270 1271 return 0; 1272 1273 err_free: 1274 fuse_request_free(req); 1275 err: 1276 end_page_writeback(page); 1277 return -ENOMEM; 1278 } 1279 1280 static int fuse_writepage(struct page *page, struct writeback_control *wbc) 1281 { 1282 int err; 1283 1284 err = fuse_writepage_locked(page); 1285 unlock_page(page); 1286 1287 return err; 1288 } 1289 1290 static int fuse_launder_page(struct page *page) 1291 { 1292 int err = 0; 1293 if (clear_page_dirty_for_io(page)) { 1294 struct inode *inode = page->mapping->host; 1295 err = fuse_writepage_locked(page); 1296 if (!err) 1297 fuse_wait_on_page_writeback(inode, page->index); 1298 } 1299 return err; 1300 } 1301 1302 /* 1303 * Write back dirty pages now, because there may not be any suitable 1304 * open files later 1305 */ 1306 static void fuse_vma_close(struct vm_area_struct *vma) 1307 { 1308 filemap_write_and_wait(vma->vm_file->f_mapping); 1309 } 1310 1311 /* 1312 * Wait for writeback against this page to complete before allowing it 1313 * to be marked dirty again, and hence written back again, possibly 1314 * before the previous writepage completed. 1315 * 1316 * Block here, instead of in ->writepage(), so that the userspace fs 1317 * can only block processes actually operating on the filesystem. 1318 * 1319 * Otherwise unprivileged userspace fs would be able to block 1320 * unrelated: 1321 * 1322 * - page migration 1323 * - sync(2) 1324 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER 1325 */ 1326 static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1327 { 1328 struct page *page = vmf->page; 1329 /* 1330 * Don't use page->mapping as it may become NULL from a 1331 * concurrent truncate. 1332 */ 1333 struct inode *inode = vma->vm_file->f_mapping->host; 1334 1335 fuse_wait_on_page_writeback(inode, page->index); 1336 return 0; 1337 } 1338 1339 static const struct vm_operations_struct fuse_file_vm_ops = { 1340 .close = fuse_vma_close, 1341 .fault = filemap_fault, 1342 .page_mkwrite = fuse_page_mkwrite, 1343 }; 1344 1345 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 1346 { 1347 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { 1348 struct inode *inode = file->f_dentry->d_inode; 1349 struct fuse_conn *fc = get_fuse_conn(inode); 1350 struct fuse_inode *fi = get_fuse_inode(inode); 1351 struct fuse_file *ff = file->private_data; 1352 /* 1353 * file may be written through mmap, so chain it onto the 1354 * inodes's write_file list 1355 */ 1356 spin_lock(&fc->lock); 1357 if (list_empty(&ff->write_entry)) 1358 list_add(&ff->write_entry, &fi->write_files); 1359 spin_unlock(&fc->lock); 1360 } 1361 file_accessed(file); 1362 vma->vm_ops = &fuse_file_vm_ops; 1363 return 0; 1364 } 1365 1366 static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma) 1367 { 1368 /* Can't provide the coherency needed for MAP_SHARED */ 1369 if (vma->vm_flags & VM_MAYSHARE) 1370 return -ENODEV; 1371 1372 invalidate_inode_pages2(file->f_mapping); 1373 1374 return generic_file_mmap(file, vma); 1375 } 1376 1377 static int convert_fuse_file_lock(const struct fuse_file_lock *ffl, 1378 struct file_lock *fl) 1379 { 1380 switch (ffl->type) { 1381 case F_UNLCK: 1382 break; 1383 1384 case F_RDLCK: 1385 case F_WRLCK: 1386 if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX || 1387 ffl->end < ffl->start) 1388 return -EIO; 1389 1390 fl->fl_start = ffl->start; 1391 fl->fl_end = ffl->end; 1392 fl->fl_pid = ffl->pid; 1393 break; 1394 1395 default: 1396 return -EIO; 1397 } 1398 fl->fl_type = ffl->type; 1399 return 0; 1400 } 1401 1402 static void fuse_lk_fill(struct fuse_req *req, struct file *file, 1403 const struct file_lock *fl, int opcode, pid_t pid, 1404 int flock) 1405 { 1406 struct inode *inode = file->f_path.dentry->d_inode; 1407 struct fuse_conn *fc = get_fuse_conn(inode); 1408 struct fuse_file *ff = file->private_data; 1409 struct fuse_lk_in *arg = &req->misc.lk_in; 1410 1411 arg->fh = ff->fh; 1412 arg->owner = fuse_lock_owner_id(fc, fl->fl_owner); 1413 arg->lk.start = fl->fl_start; 1414 arg->lk.end = fl->fl_end; 1415 arg->lk.type = fl->fl_type; 1416 arg->lk.pid = pid; 1417 if (flock) 1418 arg->lk_flags |= FUSE_LK_FLOCK; 1419 req->in.h.opcode = opcode; 1420 req->in.h.nodeid = get_node_id(inode); 1421 req->in.numargs = 1; 1422 req->in.args[0].size = sizeof(*arg); 1423 req->in.args[0].value = arg; 1424 } 1425 1426 static int fuse_getlk(struct file *file, struct file_lock *fl) 1427 { 1428 struct inode *inode = file->f_path.dentry->d_inode; 1429 struct fuse_conn *fc = get_fuse_conn(inode); 1430 struct fuse_req *req; 1431 struct fuse_lk_out outarg; 1432 int err; 1433 1434 req = fuse_get_req(fc); 1435 if (IS_ERR(req)) 1436 return PTR_ERR(req); 1437 1438 fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0); 1439 req->out.numargs = 1; 1440 req->out.args[0].size = sizeof(outarg); 1441 req->out.args[0].value = &outarg; 1442 fuse_request_send(fc, req); 1443 err = req->out.h.error; 1444 fuse_put_request(fc, req); 1445 if (!err) 1446 err = convert_fuse_file_lock(&outarg.lk, fl); 1447 1448 return err; 1449 } 1450 1451 static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) 1452 { 1453 struct inode *inode = file->f_path.dentry->d_inode; 1454 struct fuse_conn *fc = get_fuse_conn(inode); 1455 struct fuse_req *req; 1456 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; 1457 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; 1458 int err; 1459 1460 if (fl->fl_lmops && fl->fl_lmops->fl_grant) { 1461 /* NLM needs asynchronous locks, which we don't support yet */ 1462 return -ENOLCK; 1463 } 1464 1465 /* Unlock on close is handled by the flush method */ 1466 if (fl->fl_flags & FL_CLOSE) 1467 return 0; 1468 1469 req = fuse_get_req(fc); 1470 if (IS_ERR(req)) 1471 return PTR_ERR(req); 1472 1473 fuse_lk_fill(req, file, fl, opcode, pid, flock); 1474 fuse_request_send(fc, req); 1475 err = req->out.h.error; 1476 /* locking is restartable */ 1477 if (err == -EINTR) 1478 err = -ERESTARTSYS; 1479 fuse_put_request(fc, req); 1480 return err; 1481 } 1482 1483 static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) 1484 { 1485 struct inode *inode = file->f_path.dentry->d_inode; 1486 struct fuse_conn *fc = get_fuse_conn(inode); 1487 int err; 1488 1489 if (cmd == F_CANCELLK) { 1490 err = 0; 1491 } else if (cmd == F_GETLK) { 1492 if (fc->no_lock) { 1493 posix_test_lock(file, fl); 1494 err = 0; 1495 } else 1496 err = fuse_getlk(file, fl); 1497 } else { 1498 if (fc->no_lock) 1499 err = posix_lock_file(file, fl, NULL); 1500 else 1501 err = fuse_setlk(file, fl, 0); 1502 } 1503 return err; 1504 } 1505 1506 static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) 1507 { 1508 struct inode *inode = file->f_path.dentry->d_inode; 1509 struct fuse_conn *fc = get_fuse_conn(inode); 1510 int err; 1511 1512 if (fc->no_lock) { 1513 err = flock_lock_file_wait(file, fl); 1514 } else { 1515 /* emulate flock with POSIX locks */ 1516 fl->fl_owner = (fl_owner_t) file; 1517 err = fuse_setlk(file, fl, 1); 1518 } 1519 1520 return err; 1521 } 1522 1523 static sector_t fuse_bmap(struct address_space *mapping, sector_t block) 1524 { 1525 struct inode *inode = mapping->host; 1526 struct fuse_conn *fc = get_fuse_conn(inode); 1527 struct fuse_req *req; 1528 struct fuse_bmap_in inarg; 1529 struct fuse_bmap_out outarg; 1530 int err; 1531 1532 if (!inode->i_sb->s_bdev || fc->no_bmap) 1533 return 0; 1534 1535 req = fuse_get_req(fc); 1536 if (IS_ERR(req)) 1537 return 0; 1538 1539 memset(&inarg, 0, sizeof(inarg)); 1540 inarg.block = block; 1541 inarg.blocksize = inode->i_sb->s_blocksize; 1542 req->in.h.opcode = FUSE_BMAP; 1543 req->in.h.nodeid = get_node_id(inode); 1544 req->in.numargs = 1; 1545 req->in.args[0].size = sizeof(inarg); 1546 req->in.args[0].value = &inarg; 1547 req->out.numargs = 1; 1548 req->out.args[0].size = sizeof(outarg); 1549 req->out.args[0].value = &outarg; 1550 fuse_request_send(fc, req); 1551 err = req->out.h.error; 1552 fuse_put_request(fc, req); 1553 if (err == -ENOSYS) 1554 fc->no_bmap = 1; 1555 1556 return err ? 0 : outarg.block; 1557 } 1558 1559 static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin) 1560 { 1561 loff_t retval; 1562 struct inode *inode = file->f_path.dentry->d_inode; 1563 1564 mutex_lock(&inode->i_mutex); 1565 switch (origin) { 1566 case SEEK_END: 1567 retval = fuse_update_attributes(inode, NULL, file, NULL); 1568 if (retval) 1569 goto exit; 1570 offset += i_size_read(inode); 1571 break; 1572 case SEEK_CUR: 1573 offset += file->f_pos; 1574 } 1575 retval = -EINVAL; 1576 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { 1577 if (offset != file->f_pos) { 1578 file->f_pos = offset; 1579 file->f_version = 0; 1580 } 1581 retval = offset; 1582 } 1583 exit: 1584 mutex_unlock(&inode->i_mutex); 1585 return retval; 1586 } 1587 1588 static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, 1589 unsigned int nr_segs, size_t bytes, bool to_user) 1590 { 1591 struct iov_iter ii; 1592 int page_idx = 0; 1593 1594 if (!bytes) 1595 return 0; 1596 1597 iov_iter_init(&ii, iov, nr_segs, bytes, 0); 1598 1599 while (iov_iter_count(&ii)) { 1600 struct page *page = pages[page_idx++]; 1601 size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii)); 1602 void *kaddr; 1603 1604 kaddr = kmap(page); 1605 1606 while (todo) { 1607 char __user *uaddr = ii.iov->iov_base + ii.iov_offset; 1608 size_t iov_len = ii.iov->iov_len - ii.iov_offset; 1609 size_t copy = min(todo, iov_len); 1610 size_t left; 1611 1612 if (!to_user) 1613 left = copy_from_user(kaddr, uaddr, copy); 1614 else 1615 left = copy_to_user(uaddr, kaddr, copy); 1616 1617 if (unlikely(left)) 1618 return -EFAULT; 1619 1620 iov_iter_advance(&ii, copy); 1621 todo -= copy; 1622 kaddr += copy; 1623 } 1624 1625 kunmap(page); 1626 } 1627 1628 return 0; 1629 } 1630 1631 /* 1632 * CUSE servers compiled on 32bit broke on 64bit kernels because the 1633 * ABI was defined to be 'struct iovec' which is different on 32bit 1634 * and 64bit. Fortunately we can determine which structure the server 1635 * used from the size of the reply. 1636 */ 1637 static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src, 1638 size_t transferred, unsigned count, 1639 bool is_compat) 1640 { 1641 #ifdef CONFIG_COMPAT 1642 if (count * sizeof(struct compat_iovec) == transferred) { 1643 struct compat_iovec *ciov = src; 1644 unsigned i; 1645 1646 /* 1647 * With this interface a 32bit server cannot support 1648 * non-compat (i.e. ones coming from 64bit apps) ioctl 1649 * requests 1650 */ 1651 if (!is_compat) 1652 return -EINVAL; 1653 1654 for (i = 0; i < count; i++) { 1655 dst[i].iov_base = compat_ptr(ciov[i].iov_base); 1656 dst[i].iov_len = ciov[i].iov_len; 1657 } 1658 return 0; 1659 } 1660 #endif 1661 1662 if (count * sizeof(struct iovec) != transferred) 1663 return -EIO; 1664 1665 memcpy(dst, src, transferred); 1666 return 0; 1667 } 1668 1669 /* Make sure iov_length() won't overflow */ 1670 static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) 1671 { 1672 size_t n; 1673 u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; 1674 1675 for (n = 0; n < count; n++) { 1676 if (iov->iov_len > (size_t) max) 1677 return -ENOMEM; 1678 max -= iov->iov_len; 1679 } 1680 return 0; 1681 } 1682 1683 /* 1684 * For ioctls, there is no generic way to determine how much memory 1685 * needs to be read and/or written. Furthermore, ioctls are allowed 1686 * to dereference the passed pointer, so the parameter requires deep 1687 * copying but FUSE has no idea whatsoever about what to copy in or 1688 * out. 1689 * 1690 * This is solved by allowing FUSE server to retry ioctl with 1691 * necessary in/out iovecs. Let's assume the ioctl implementation 1692 * needs to read in the following structure. 1693 * 1694 * struct a { 1695 * char *buf; 1696 * size_t buflen; 1697 * } 1698 * 1699 * On the first callout to FUSE server, inarg->in_size and 1700 * inarg->out_size will be NULL; then, the server completes the ioctl 1701 * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and 1702 * the actual iov array to 1703 * 1704 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } } 1705 * 1706 * which tells FUSE to copy in the requested area and retry the ioctl. 1707 * On the second round, the server has access to the structure and 1708 * from that it can tell what to look for next, so on the invocation, 1709 * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to 1710 * 1711 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) }, 1712 * { .iov_base = a.buf, .iov_len = a.buflen } } 1713 * 1714 * FUSE will copy both struct a and the pointed buffer from the 1715 * process doing the ioctl and retry ioctl with both struct a and the 1716 * buffer. 1717 * 1718 * This time, FUSE server has everything it needs and completes ioctl 1719 * without FUSE_IOCTL_RETRY which finishes the ioctl call. 1720 * 1721 * Copying data out works the same way. 1722 * 1723 * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel 1724 * automatically initializes in and out iovs by decoding @cmd with 1725 * _IOC_* macros and the server is not allowed to request RETRY. This 1726 * limits ioctl data transfers to well-formed ioctls and is the forced 1727 * behavior for all FUSE servers. 1728 */ 1729 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 1730 unsigned int flags) 1731 { 1732 struct fuse_file *ff = file->private_data; 1733 struct fuse_conn *fc = ff->fc; 1734 struct fuse_ioctl_in inarg = { 1735 .fh = ff->fh, 1736 .cmd = cmd, 1737 .arg = arg, 1738 .flags = flags 1739 }; 1740 struct fuse_ioctl_out outarg; 1741 struct fuse_req *req = NULL; 1742 struct page **pages = NULL; 1743 struct page *iov_page = NULL; 1744 struct iovec *in_iov = NULL, *out_iov = NULL; 1745 unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages; 1746 size_t in_size, out_size, transferred; 1747 int err; 1748 1749 /* assume all the iovs returned by client always fits in a page */ 1750 BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 1751 1752 err = -ENOMEM; 1753 pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); 1754 iov_page = alloc_page(GFP_KERNEL); 1755 if (!pages || !iov_page) 1756 goto out; 1757 1758 /* 1759 * If restricted, initialize IO parameters as encoded in @cmd. 1760 * RETRY from server is not allowed. 1761 */ 1762 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { 1763 struct iovec *iov = page_address(iov_page); 1764 1765 iov->iov_base = (void __user *)arg; 1766 iov->iov_len = _IOC_SIZE(cmd); 1767 1768 if (_IOC_DIR(cmd) & _IOC_WRITE) { 1769 in_iov = iov; 1770 in_iovs = 1; 1771 } 1772 1773 if (_IOC_DIR(cmd) & _IOC_READ) { 1774 out_iov = iov; 1775 out_iovs = 1; 1776 } 1777 } 1778 1779 retry: 1780 inarg.in_size = in_size = iov_length(in_iov, in_iovs); 1781 inarg.out_size = out_size = iov_length(out_iov, out_iovs); 1782 1783 /* 1784 * Out data can be used either for actual out data or iovs, 1785 * make sure there always is at least one page. 1786 */ 1787 out_size = max_t(size_t, out_size, PAGE_SIZE); 1788 max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE); 1789 1790 /* make sure there are enough buffer pages and init request with them */ 1791 err = -ENOMEM; 1792 if (max_pages > FUSE_MAX_PAGES_PER_REQ) 1793 goto out; 1794 while (num_pages < max_pages) { 1795 pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 1796 if (!pages[num_pages]) 1797 goto out; 1798 num_pages++; 1799 } 1800 1801 req = fuse_get_req(fc); 1802 if (IS_ERR(req)) { 1803 err = PTR_ERR(req); 1804 req = NULL; 1805 goto out; 1806 } 1807 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); 1808 req->num_pages = num_pages; 1809 1810 /* okay, let's send it to the client */ 1811 req->in.h.opcode = FUSE_IOCTL; 1812 req->in.h.nodeid = ff->nodeid; 1813 req->in.numargs = 1; 1814 req->in.args[0].size = sizeof(inarg); 1815 req->in.args[0].value = &inarg; 1816 if (in_size) { 1817 req->in.numargs++; 1818 req->in.args[1].size = in_size; 1819 req->in.argpages = 1; 1820 1821 err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size, 1822 false); 1823 if (err) 1824 goto out; 1825 } 1826 1827 req->out.numargs = 2; 1828 req->out.args[0].size = sizeof(outarg); 1829 req->out.args[0].value = &outarg; 1830 req->out.args[1].size = out_size; 1831 req->out.argpages = 1; 1832 req->out.argvar = 1; 1833 1834 fuse_request_send(fc, req); 1835 err = req->out.h.error; 1836 transferred = req->out.args[1].size; 1837 fuse_put_request(fc, req); 1838 req = NULL; 1839 if (err) 1840 goto out; 1841 1842 /* did it ask for retry? */ 1843 if (outarg.flags & FUSE_IOCTL_RETRY) { 1844 char *vaddr; 1845 1846 /* no retry if in restricted mode */ 1847 err = -EIO; 1848 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) 1849 goto out; 1850 1851 in_iovs = outarg.in_iovs; 1852 out_iovs = outarg.out_iovs; 1853 1854 /* 1855 * Make sure things are in boundary, separate checks 1856 * are to protect against overflow. 1857 */ 1858 err = -ENOMEM; 1859 if (in_iovs > FUSE_IOCTL_MAX_IOV || 1860 out_iovs > FUSE_IOCTL_MAX_IOV || 1861 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) 1862 goto out; 1863 1864 vaddr = kmap_atomic(pages[0], KM_USER0); 1865 err = fuse_copy_ioctl_iovec(page_address(iov_page), vaddr, 1866 transferred, in_iovs + out_iovs, 1867 (flags & FUSE_IOCTL_COMPAT) != 0); 1868 kunmap_atomic(vaddr, KM_USER0); 1869 if (err) 1870 goto out; 1871 1872 in_iov = page_address(iov_page); 1873 out_iov = in_iov + in_iovs; 1874 1875 err = fuse_verify_ioctl_iov(in_iov, in_iovs); 1876 if (err) 1877 goto out; 1878 1879 err = fuse_verify_ioctl_iov(out_iov, out_iovs); 1880 if (err) 1881 goto out; 1882 1883 goto retry; 1884 } 1885 1886 err = -EIO; 1887 if (transferred > inarg.out_size) 1888 goto out; 1889 1890 err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true); 1891 out: 1892 if (req) 1893 fuse_put_request(fc, req); 1894 if (iov_page) 1895 __free_page(iov_page); 1896 while (num_pages) 1897 __free_page(pages[--num_pages]); 1898 kfree(pages); 1899 1900 return err ? err : outarg.result; 1901 } 1902 EXPORT_SYMBOL_GPL(fuse_do_ioctl); 1903 1904 static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, 1905 unsigned long arg, unsigned int flags) 1906 { 1907 struct inode *inode = file->f_dentry->d_inode; 1908 struct fuse_conn *fc = get_fuse_conn(inode); 1909 1910 if (!fuse_allow_task(fc, current)) 1911 return -EACCES; 1912 1913 if (is_bad_inode(inode)) 1914 return -EIO; 1915 1916 return fuse_do_ioctl(file, cmd, arg, flags); 1917 } 1918 1919 static long fuse_file_ioctl(struct file *file, unsigned int cmd, 1920 unsigned long arg) 1921 { 1922 return fuse_file_ioctl_common(file, cmd, arg, 0); 1923 } 1924 1925 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, 1926 unsigned long arg) 1927 { 1928 return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); 1929 } 1930 1931 /* 1932 * All files which have been polled are linked to RB tree 1933 * fuse_conn->polled_files which is indexed by kh. Walk the tree and 1934 * find the matching one. 1935 */ 1936 static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh, 1937 struct rb_node **parent_out) 1938 { 1939 struct rb_node **link = &fc->polled_files.rb_node; 1940 struct rb_node *last = NULL; 1941 1942 while (*link) { 1943 struct fuse_file *ff; 1944 1945 last = *link; 1946 ff = rb_entry(last, struct fuse_file, polled_node); 1947 1948 if (kh < ff->kh) 1949 link = &last->rb_left; 1950 else if (kh > ff->kh) 1951 link = &last->rb_right; 1952 else 1953 return link; 1954 } 1955 1956 if (parent_out) 1957 *parent_out = last; 1958 return link; 1959 } 1960 1961 /* 1962 * The file is about to be polled. Make sure it's on the polled_files 1963 * RB tree. Note that files once added to the polled_files tree are 1964 * not removed before the file is released. This is because a file 1965 * polled once is likely to be polled again. 1966 */ 1967 static void fuse_register_polled_file(struct fuse_conn *fc, 1968 struct fuse_file *ff) 1969 { 1970 spin_lock(&fc->lock); 1971 if (RB_EMPTY_NODE(&ff->polled_node)) { 1972 struct rb_node **link, *parent; 1973 1974 link = fuse_find_polled_node(fc, ff->kh, &parent); 1975 BUG_ON(*link); 1976 rb_link_node(&ff->polled_node, parent, link); 1977 rb_insert_color(&ff->polled_node, &fc->polled_files); 1978 } 1979 spin_unlock(&fc->lock); 1980 } 1981 1982 unsigned fuse_file_poll(struct file *file, poll_table *wait) 1983 { 1984 struct fuse_file *ff = file->private_data; 1985 struct fuse_conn *fc = ff->fc; 1986 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; 1987 struct fuse_poll_out outarg; 1988 struct fuse_req *req; 1989 int err; 1990 1991 if (fc->no_poll) 1992 return DEFAULT_POLLMASK; 1993 1994 poll_wait(file, &ff->poll_wait, wait); 1995 1996 /* 1997 * Ask for notification iff there's someone waiting for it. 1998 * The client may ignore the flag and always notify. 1999 */ 2000 if (waitqueue_active(&ff->poll_wait)) { 2001 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; 2002 fuse_register_polled_file(fc, ff); 2003 } 2004 2005 req = fuse_get_req(fc); 2006 if (IS_ERR(req)) 2007 return POLLERR; 2008 2009 req->in.h.opcode = FUSE_POLL; 2010 req->in.h.nodeid = ff->nodeid; 2011 req->in.numargs = 1; 2012 req->in.args[0].size = sizeof(inarg); 2013 req->in.args[0].value = &inarg; 2014 req->out.numargs = 1; 2015 req->out.args[0].size = sizeof(outarg); 2016 req->out.args[0].value = &outarg; 2017 fuse_request_send(fc, req); 2018 err = req->out.h.error; 2019 fuse_put_request(fc, req); 2020 2021 if (!err) 2022 return outarg.revents; 2023 if (err == -ENOSYS) { 2024 fc->no_poll = 1; 2025 return DEFAULT_POLLMASK; 2026 } 2027 return POLLERR; 2028 } 2029 EXPORT_SYMBOL_GPL(fuse_file_poll); 2030 2031 /* 2032 * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and 2033 * wakes up the poll waiters. 2034 */ 2035 int fuse_notify_poll_wakeup(struct fuse_conn *fc, 2036 struct fuse_notify_poll_wakeup_out *outarg) 2037 { 2038 u64 kh = outarg->kh; 2039 struct rb_node **link; 2040 2041 spin_lock(&fc->lock); 2042 2043 link = fuse_find_polled_node(fc, kh, NULL); 2044 if (*link) { 2045 struct fuse_file *ff; 2046 2047 ff = rb_entry(*link, struct fuse_file, polled_node); 2048 wake_up_interruptible_sync(&ff->poll_wait); 2049 } 2050 2051 spin_unlock(&fc->lock); 2052 return 0; 2053 } 2054 2055 static const struct file_operations fuse_file_operations = { 2056 .llseek = fuse_file_llseek, 2057 .read = do_sync_read, 2058 .aio_read = fuse_file_aio_read, 2059 .write = do_sync_write, 2060 .aio_write = fuse_file_aio_write, 2061 .mmap = fuse_file_mmap, 2062 .open = fuse_open, 2063 .flush = fuse_flush, 2064 .release = fuse_release, 2065 .fsync = fuse_fsync, 2066 .lock = fuse_file_lock, 2067 .flock = fuse_file_flock, 2068 .splice_read = generic_file_splice_read, 2069 .unlocked_ioctl = fuse_file_ioctl, 2070 .compat_ioctl = fuse_file_compat_ioctl, 2071 .poll = fuse_file_poll, 2072 }; 2073 2074 static const struct file_operations fuse_direct_io_file_operations = { 2075 .llseek = fuse_file_llseek, 2076 .read = fuse_direct_read, 2077 .write = fuse_direct_write, 2078 .mmap = fuse_direct_mmap, 2079 .open = fuse_open, 2080 .flush = fuse_flush, 2081 .release = fuse_release, 2082 .fsync = fuse_fsync, 2083 .lock = fuse_file_lock, 2084 .flock = fuse_file_flock, 2085 .unlocked_ioctl = fuse_file_ioctl, 2086 .compat_ioctl = fuse_file_compat_ioctl, 2087 .poll = fuse_file_poll, 2088 /* no splice_read */ 2089 }; 2090 2091 static const struct address_space_operations fuse_file_aops = { 2092 .readpage = fuse_readpage, 2093 .writepage = fuse_writepage, 2094 .launder_page = fuse_launder_page, 2095 .write_begin = fuse_write_begin, 2096 .write_end = fuse_write_end, 2097 .readpages = fuse_readpages, 2098 .set_page_dirty = __set_page_dirty_nobuffers, 2099 .bmap = fuse_bmap, 2100 }; 2101 2102 void fuse_init_file_inode(struct inode *inode) 2103 { 2104 inode->i_fop = &fuse_file_operations; 2105 inode->i_data.a_ops = &fuse_file_aops; 2106 } 2107