1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/init.h> 12 #include <linux/module.h> 13 #include <linux/poll.h> 14 #include <linux/uio.h> 15 #include <linux/miscdevice.h> 16 #include <linux/pagemap.h> 17 #include <linux/file.h> 18 #include <linux/slab.h> 19 #include <linux/pipe_fs_i.h> 20 21 MODULE_ALIAS_MISCDEV(FUSE_MINOR); 22 23 static struct kmem_cache *fuse_req_cachep; 24 25 static struct fuse_conn *fuse_get_conn(struct file *file) 26 { 27 /* 28 * Lockless access is OK, because file->private data is set 29 * once during mount and is valid until the file is released. 30 */ 31 return file->private_data; 32 } 33 34 static void fuse_request_init(struct fuse_req *req) 35 { 36 memset(req, 0, sizeof(*req)); 37 INIT_LIST_HEAD(&req->list); 38 INIT_LIST_HEAD(&req->intr_entry); 39 init_waitqueue_head(&req->waitq); 40 atomic_set(&req->count, 1); 41 } 42 43 struct fuse_req *fuse_request_alloc(void) 44 { 45 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL); 46 if (req) 47 fuse_request_init(req); 48 return req; 49 } 50 EXPORT_SYMBOL_GPL(fuse_request_alloc); 51 52 struct fuse_req *fuse_request_alloc_nofs(void) 53 { 54 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS); 55 if (req) 56 fuse_request_init(req); 57 return req; 58 } 59 60 void fuse_request_free(struct fuse_req *req) 61 { 62 kmem_cache_free(fuse_req_cachep, req); 63 } 64 65 static void block_sigs(sigset_t *oldset) 66 { 67 sigset_t mask; 68 69 siginitsetinv(&mask, sigmask(SIGKILL)); 70 sigprocmask(SIG_BLOCK, &mask, oldset); 71 } 72 73 static void restore_sigs(sigset_t *oldset) 74 { 75 sigprocmask(SIG_SETMASK, oldset, NULL); 76 } 77 78 static void __fuse_get_request(struct fuse_req *req) 79 { 80 atomic_inc(&req->count); 81 } 82 83 /* Must be called with > 1 refcount */ 84 static void __fuse_put_request(struct fuse_req *req) 85 { 86 BUG_ON(atomic_read(&req->count) < 2); 87 atomic_dec(&req->count); 88 } 89 90 static void fuse_req_init_context(struct fuse_req *req) 91 { 92 req->in.h.uid = current_fsuid(); 93 req->in.h.gid = current_fsgid(); 94 req->in.h.pid = current->pid; 95 } 96 97 struct fuse_req *fuse_get_req(struct fuse_conn *fc) 98 { 99 struct fuse_req *req; 100 sigset_t oldset; 101 int intr; 102 int err; 103 104 atomic_inc(&fc->num_waiting); 105 block_sigs(&oldset); 106 intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked); 107 restore_sigs(&oldset); 108 err = -EINTR; 109 if (intr) 110 goto out; 111 112 err = -ENOTCONN; 113 if (!fc->connected) 114 goto out; 115 116 req = fuse_request_alloc(); 117 err = -ENOMEM; 118 if (!req) 119 goto out; 120 121 fuse_req_init_context(req); 122 req->waiting = 1; 123 return req; 124 125 out: 126 atomic_dec(&fc->num_waiting); 127 return ERR_PTR(err); 128 } 129 EXPORT_SYMBOL_GPL(fuse_get_req); 130 131 /* 132 * Return request in fuse_file->reserved_req. However that may 133 * currently be in use. If that is the case, wait for it to become 134 * available. 135 */ 136 static struct fuse_req *get_reserved_req(struct fuse_conn *fc, 137 struct file *file) 138 { 139 struct fuse_req *req = NULL; 140 struct fuse_file *ff = file->private_data; 141 142 do { 143 wait_event(fc->reserved_req_waitq, ff->reserved_req); 144 spin_lock(&fc->lock); 145 if (ff->reserved_req) { 146 req = ff->reserved_req; 147 ff->reserved_req = NULL; 148 get_file(file); 149 req->stolen_file = file; 150 } 151 spin_unlock(&fc->lock); 152 } while (!req); 153 154 return req; 155 } 156 157 /* 158 * Put stolen request back into fuse_file->reserved_req 159 */ 160 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) 161 { 162 struct file *file = req->stolen_file; 163 struct fuse_file *ff = file->private_data; 164 165 spin_lock(&fc->lock); 166 fuse_request_init(req); 167 BUG_ON(ff->reserved_req); 168 ff->reserved_req = req; 169 wake_up_all(&fc->reserved_req_waitq); 170 spin_unlock(&fc->lock); 171 fput(file); 172 } 173 174 /* 175 * Gets a requests for a file operation, always succeeds 176 * 177 * This is used for sending the FLUSH request, which must get to 178 * userspace, due to POSIX locks which may need to be unlocked. 179 * 180 * If allocation fails due to OOM, use the reserved request in 181 * fuse_file. 182 * 183 * This is very unlikely to deadlock accidentally, since the 184 * filesystem should not have it's own file open. If deadlock is 185 * intentional, it can still be broken by "aborting" the filesystem. 186 */ 187 struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) 188 { 189 struct fuse_req *req; 190 191 atomic_inc(&fc->num_waiting); 192 wait_event(fc->blocked_waitq, !fc->blocked); 193 req = fuse_request_alloc(); 194 if (!req) 195 req = get_reserved_req(fc, file); 196 197 fuse_req_init_context(req); 198 req->waiting = 1; 199 return req; 200 } 201 202 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 203 { 204 if (atomic_dec_and_test(&req->count)) { 205 if (req->waiting) 206 atomic_dec(&fc->num_waiting); 207 208 if (req->stolen_file) 209 put_reserved_req(fc, req); 210 else 211 fuse_request_free(req); 212 } 213 } 214 EXPORT_SYMBOL_GPL(fuse_put_request); 215 216 static unsigned len_args(unsigned numargs, struct fuse_arg *args) 217 { 218 unsigned nbytes = 0; 219 unsigned i; 220 221 for (i = 0; i < numargs; i++) 222 nbytes += args[i].size; 223 224 return nbytes; 225 } 226 227 static u64 fuse_get_unique(struct fuse_conn *fc) 228 { 229 fc->reqctr++; 230 /* zero is special */ 231 if (fc->reqctr == 0) 232 fc->reqctr = 1; 233 234 return fc->reqctr; 235 } 236 237 static void queue_request(struct fuse_conn *fc, struct fuse_req *req) 238 { 239 req->in.h.unique = fuse_get_unique(fc); 240 req->in.h.len = sizeof(struct fuse_in_header) + 241 len_args(req->in.numargs, (struct fuse_arg *) req->in.args); 242 list_add_tail(&req->list, &fc->pending); 243 req->state = FUSE_REQ_PENDING; 244 if (!req->waiting) { 245 req->waiting = 1; 246 atomic_inc(&fc->num_waiting); 247 } 248 wake_up(&fc->waitq); 249 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 250 } 251 252 static void flush_bg_queue(struct fuse_conn *fc) 253 { 254 while (fc->active_background < fc->max_background && 255 !list_empty(&fc->bg_queue)) { 256 struct fuse_req *req; 257 258 req = list_entry(fc->bg_queue.next, struct fuse_req, list); 259 list_del(&req->list); 260 fc->active_background++; 261 queue_request(fc, req); 262 } 263 } 264 265 /* 266 * This function is called when a request is finished. Either a reply 267 * has arrived or it was aborted (and not yet sent) or some error 268 * occurred during communication with userspace, or the device file 269 * was closed. The requester thread is woken up (if still waiting), 270 * the 'end' callback is called if given, else the reference to the 271 * request is released 272 * 273 * Called with fc->lock, unlocks it 274 */ 275 static void request_end(struct fuse_conn *fc, struct fuse_req *req) 276 __releases(&fc->lock) 277 { 278 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 279 req->end = NULL; 280 list_del(&req->list); 281 list_del(&req->intr_entry); 282 req->state = FUSE_REQ_FINISHED; 283 if (req->background) { 284 if (fc->num_background == fc->max_background) { 285 fc->blocked = 0; 286 wake_up_all(&fc->blocked_waitq); 287 } 288 if (fc->num_background == fc->congestion_threshold && 289 fc->connected && fc->bdi_initialized) { 290 clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); 291 clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); 292 } 293 fc->num_background--; 294 fc->active_background--; 295 flush_bg_queue(fc); 296 } 297 spin_unlock(&fc->lock); 298 wake_up(&req->waitq); 299 if (end) 300 end(fc, req); 301 fuse_put_request(fc, req); 302 } 303 304 static void wait_answer_interruptible(struct fuse_conn *fc, 305 struct fuse_req *req) 306 __releases(&fc->lock) 307 __acquires(&fc->lock) 308 { 309 if (signal_pending(current)) 310 return; 311 312 spin_unlock(&fc->lock); 313 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); 314 spin_lock(&fc->lock); 315 } 316 317 static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) 318 { 319 list_add_tail(&req->intr_entry, &fc->interrupts); 320 wake_up(&fc->waitq); 321 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 322 } 323 324 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 325 __releases(&fc->lock) 326 __acquires(&fc->lock) 327 { 328 if (!fc->no_interrupt) { 329 /* Any signal may interrupt this */ 330 wait_answer_interruptible(fc, req); 331 332 if (req->aborted) 333 goto aborted; 334 if (req->state == FUSE_REQ_FINISHED) 335 return; 336 337 req->interrupted = 1; 338 if (req->state == FUSE_REQ_SENT) 339 queue_interrupt(fc, req); 340 } 341 342 if (!req->force) { 343 sigset_t oldset; 344 345 /* Only fatal signals may interrupt this */ 346 block_sigs(&oldset); 347 wait_answer_interruptible(fc, req); 348 restore_sigs(&oldset); 349 350 if (req->aborted) 351 goto aborted; 352 if (req->state == FUSE_REQ_FINISHED) 353 return; 354 355 /* Request is not yet in userspace, bail out */ 356 if (req->state == FUSE_REQ_PENDING) { 357 list_del(&req->list); 358 __fuse_put_request(req); 359 req->out.h.error = -EINTR; 360 return; 361 } 362 } 363 364 /* 365 * Either request is already in userspace, or it was forced. 366 * Wait it out. 367 */ 368 spin_unlock(&fc->lock); 369 wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); 370 spin_lock(&fc->lock); 371 372 if (!req->aborted) 373 return; 374 375 aborted: 376 BUG_ON(req->state != FUSE_REQ_FINISHED); 377 if (req->locked) { 378 /* This is uninterruptible sleep, because data is 379 being copied to/from the buffers of req. During 380 locked state, there mustn't be any filesystem 381 operation (e.g. page fault), since that could lead 382 to deadlock */ 383 spin_unlock(&fc->lock); 384 wait_event(req->waitq, !req->locked); 385 spin_lock(&fc->lock); 386 } 387 } 388 389 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) 390 { 391 req->isreply = 1; 392 spin_lock(&fc->lock); 393 if (!fc->connected) 394 req->out.h.error = -ENOTCONN; 395 else if (fc->conn_error) 396 req->out.h.error = -ECONNREFUSED; 397 else { 398 queue_request(fc, req); 399 /* acquire extra reference, since request is still needed 400 after request_end() */ 401 __fuse_get_request(req); 402 403 request_wait_answer(fc, req); 404 } 405 spin_unlock(&fc->lock); 406 } 407 EXPORT_SYMBOL_GPL(fuse_request_send); 408 409 static void fuse_request_send_nowait_locked(struct fuse_conn *fc, 410 struct fuse_req *req) 411 { 412 req->background = 1; 413 fc->num_background++; 414 if (fc->num_background == fc->max_background) 415 fc->blocked = 1; 416 if (fc->num_background == fc->congestion_threshold && 417 fc->bdi_initialized) { 418 set_bdi_congested(&fc->bdi, BLK_RW_SYNC); 419 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); 420 } 421 list_add_tail(&req->list, &fc->bg_queue); 422 flush_bg_queue(fc); 423 } 424 425 static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) 426 { 427 spin_lock(&fc->lock); 428 if (fc->connected) { 429 fuse_request_send_nowait_locked(fc, req); 430 spin_unlock(&fc->lock); 431 } else { 432 req->out.h.error = -ENOTCONN; 433 request_end(fc, req); 434 } 435 } 436 437 void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req) 438 { 439 req->isreply = 0; 440 fuse_request_send_nowait(fc, req); 441 } 442 443 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) 444 { 445 req->isreply = 1; 446 fuse_request_send_nowait(fc, req); 447 } 448 EXPORT_SYMBOL_GPL(fuse_request_send_background); 449 450 /* 451 * Called under fc->lock 452 * 453 * fc->connected must have been checked previously 454 */ 455 void fuse_request_send_background_locked(struct fuse_conn *fc, 456 struct fuse_req *req) 457 { 458 req->isreply = 1; 459 fuse_request_send_nowait_locked(fc, req); 460 } 461 462 /* 463 * Lock the request. Up to the next unlock_request() there mustn't be 464 * anything that could cause a page-fault. If the request was already 465 * aborted bail out. 466 */ 467 static int lock_request(struct fuse_conn *fc, struct fuse_req *req) 468 { 469 int err = 0; 470 if (req) { 471 spin_lock(&fc->lock); 472 if (req->aborted) 473 err = -ENOENT; 474 else 475 req->locked = 1; 476 spin_unlock(&fc->lock); 477 } 478 return err; 479 } 480 481 /* 482 * Unlock request. If it was aborted during being locked, the 483 * requester thread is currently waiting for it to be unlocked, so 484 * wake it up. 485 */ 486 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req) 487 { 488 if (req) { 489 spin_lock(&fc->lock); 490 req->locked = 0; 491 if (req->aborted) 492 wake_up(&req->waitq); 493 spin_unlock(&fc->lock); 494 } 495 } 496 497 struct fuse_copy_state { 498 struct fuse_conn *fc; 499 int write; 500 struct fuse_req *req; 501 const struct iovec *iov; 502 struct pipe_buffer *pipebufs; 503 struct pipe_buffer *currbuf; 504 struct pipe_inode_info *pipe; 505 unsigned long nr_segs; 506 unsigned long seglen; 507 unsigned long addr; 508 struct page *pg; 509 void *mapaddr; 510 void *buf; 511 unsigned len; 512 }; 513 514 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, 515 int write, struct fuse_req *req, 516 const struct iovec *iov, unsigned long nr_segs) 517 { 518 memset(cs, 0, sizeof(*cs)); 519 cs->fc = fc; 520 cs->write = write; 521 cs->req = req; 522 cs->iov = iov; 523 cs->nr_segs = nr_segs; 524 } 525 526 /* Unmap and put previous page of userspace buffer */ 527 static void fuse_copy_finish(struct fuse_copy_state *cs) 528 { 529 if (cs->currbuf) { 530 struct pipe_buffer *buf = cs->currbuf; 531 532 buf->ops->unmap(cs->pipe, buf, cs->mapaddr); 533 534 cs->currbuf = NULL; 535 cs->mapaddr = NULL; 536 } else if (cs->mapaddr) { 537 kunmap_atomic(cs->mapaddr, KM_USER0); 538 if (cs->write) { 539 flush_dcache_page(cs->pg); 540 set_page_dirty_lock(cs->pg); 541 } 542 put_page(cs->pg); 543 cs->mapaddr = NULL; 544 } 545 } 546 547 /* 548 * Get another pagefull of userspace buffer, and map it to kernel 549 * address space, and lock request 550 */ 551 static int fuse_copy_fill(struct fuse_copy_state *cs) 552 { 553 unsigned long offset; 554 int err; 555 556 unlock_request(cs->fc, cs->req); 557 fuse_copy_finish(cs); 558 if (cs->pipebufs) { 559 struct pipe_buffer *buf = cs->pipebufs; 560 561 err = buf->ops->confirm(cs->pipe, buf); 562 if (err) 563 return err; 564 565 BUG_ON(!cs->nr_segs); 566 cs->currbuf = buf; 567 cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); 568 cs->len = buf->len; 569 cs->buf = cs->mapaddr + buf->offset; 570 cs->pipebufs++; 571 cs->nr_segs--; 572 } else { 573 if (!cs->seglen) { 574 BUG_ON(!cs->nr_segs); 575 cs->seglen = cs->iov[0].iov_len; 576 cs->addr = (unsigned long) cs->iov[0].iov_base; 577 cs->iov++; 578 cs->nr_segs--; 579 } 580 err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg); 581 if (err < 0) 582 return err; 583 BUG_ON(err != 1); 584 offset = cs->addr % PAGE_SIZE; 585 cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); 586 cs->buf = cs->mapaddr + offset; 587 cs->len = min(PAGE_SIZE - offset, cs->seglen); 588 cs->seglen -= cs->len; 589 cs->addr += cs->len; 590 } 591 592 return lock_request(cs->fc, cs->req); 593 } 594 595 /* Do as much copy to/from userspace buffer as we can */ 596 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) 597 { 598 unsigned ncpy = min(*size, cs->len); 599 if (val) { 600 if (cs->write) 601 memcpy(cs->buf, *val, ncpy); 602 else 603 memcpy(*val, cs->buf, ncpy); 604 *val += ncpy; 605 } 606 *size -= ncpy; 607 cs->len -= ncpy; 608 cs->buf += ncpy; 609 return ncpy; 610 } 611 612 /* 613 * Copy a page in the request to/from the userspace buffer. Must be 614 * done atomically 615 */ 616 static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, 617 unsigned offset, unsigned count, int zeroing) 618 { 619 if (page && zeroing && count < PAGE_SIZE) { 620 void *mapaddr = kmap_atomic(page, KM_USER1); 621 memset(mapaddr, 0, PAGE_SIZE); 622 kunmap_atomic(mapaddr, KM_USER1); 623 } 624 while (count) { 625 if (!cs->len) { 626 int err = fuse_copy_fill(cs); 627 if (err) 628 return err; 629 } 630 if (page) { 631 void *mapaddr = kmap_atomic(page, KM_USER1); 632 void *buf = mapaddr + offset; 633 offset += fuse_copy_do(cs, &buf, &count); 634 kunmap_atomic(mapaddr, KM_USER1); 635 } else 636 offset += fuse_copy_do(cs, NULL, &count); 637 } 638 if (page && !cs->write) 639 flush_dcache_page(page); 640 return 0; 641 } 642 643 /* Copy pages in the request to/from userspace buffer */ 644 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, 645 int zeroing) 646 { 647 unsigned i; 648 struct fuse_req *req = cs->req; 649 unsigned offset = req->page_offset; 650 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); 651 652 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { 653 struct page *page = req->pages[i]; 654 int err = fuse_copy_page(cs, page, offset, count, zeroing); 655 if (err) 656 return err; 657 658 nbytes -= count; 659 count = min(nbytes, (unsigned) PAGE_SIZE); 660 offset = 0; 661 } 662 return 0; 663 } 664 665 /* Copy a single argument in the request to/from userspace buffer */ 666 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) 667 { 668 while (size) { 669 if (!cs->len) { 670 int err = fuse_copy_fill(cs); 671 if (err) 672 return err; 673 } 674 fuse_copy_do(cs, &val, &size); 675 } 676 return 0; 677 } 678 679 /* Copy request arguments to/from userspace buffer */ 680 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, 681 unsigned argpages, struct fuse_arg *args, 682 int zeroing) 683 { 684 int err = 0; 685 unsigned i; 686 687 for (i = 0; !err && i < numargs; i++) { 688 struct fuse_arg *arg = &args[i]; 689 if (i == numargs - 1 && argpages) 690 err = fuse_copy_pages(cs, arg->size, zeroing); 691 else 692 err = fuse_copy_one(cs, arg->value, arg->size); 693 } 694 return err; 695 } 696 697 static int request_pending(struct fuse_conn *fc) 698 { 699 return !list_empty(&fc->pending) || !list_empty(&fc->interrupts); 700 } 701 702 /* Wait until a request is available on the pending list */ 703 static void request_wait(struct fuse_conn *fc) 704 __releases(&fc->lock) 705 __acquires(&fc->lock) 706 { 707 DECLARE_WAITQUEUE(wait, current); 708 709 add_wait_queue_exclusive(&fc->waitq, &wait); 710 while (fc->connected && !request_pending(fc)) { 711 set_current_state(TASK_INTERRUPTIBLE); 712 if (signal_pending(current)) 713 break; 714 715 spin_unlock(&fc->lock); 716 schedule(); 717 spin_lock(&fc->lock); 718 } 719 set_current_state(TASK_RUNNING); 720 remove_wait_queue(&fc->waitq, &wait); 721 } 722 723 /* 724 * Transfer an interrupt request to userspace 725 * 726 * Unlike other requests this is assembled on demand, without a need 727 * to allocate a separate fuse_req structure. 728 * 729 * Called with fc->lock held, releases it 730 */ 731 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, 732 const struct iovec *iov, unsigned long nr_segs) 733 __releases(&fc->lock) 734 { 735 struct fuse_copy_state cs; 736 struct fuse_in_header ih; 737 struct fuse_interrupt_in arg; 738 unsigned reqsize = sizeof(ih) + sizeof(arg); 739 int err; 740 741 list_del_init(&req->intr_entry); 742 req->intr_unique = fuse_get_unique(fc); 743 memset(&ih, 0, sizeof(ih)); 744 memset(&arg, 0, sizeof(arg)); 745 ih.len = reqsize; 746 ih.opcode = FUSE_INTERRUPT; 747 ih.unique = req->intr_unique; 748 arg.unique = req->in.h.unique; 749 750 spin_unlock(&fc->lock); 751 if (iov_length(iov, nr_segs) < reqsize) 752 return -EINVAL; 753 754 fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs); 755 err = fuse_copy_one(&cs, &ih, sizeof(ih)); 756 if (!err) 757 err = fuse_copy_one(&cs, &arg, sizeof(arg)); 758 fuse_copy_finish(&cs); 759 760 return err ? err : reqsize; 761 } 762 763 /* 764 * Read a single request into the userspace filesystem's buffer. This 765 * function waits until a request is available, then removes it from 766 * the pending list and copies request data to userspace buffer. If 767 * no reply is needed (FORGET) or request has been aborted or there 768 * was an error during the copying then it's finished by calling 769 * request_end(). Otherwise add it to the processing list, and set 770 * the 'sent' flag. 771 */ 772 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, 773 unsigned long nr_segs, loff_t pos) 774 { 775 int err; 776 struct fuse_req *req; 777 struct fuse_in *in; 778 struct fuse_copy_state cs; 779 unsigned reqsize; 780 struct file *file = iocb->ki_filp; 781 struct fuse_conn *fc = fuse_get_conn(file); 782 if (!fc) 783 return -EPERM; 784 785 restart: 786 spin_lock(&fc->lock); 787 err = -EAGAIN; 788 if ((file->f_flags & O_NONBLOCK) && fc->connected && 789 !request_pending(fc)) 790 goto err_unlock; 791 792 request_wait(fc); 793 err = -ENODEV; 794 if (!fc->connected) 795 goto err_unlock; 796 err = -ERESTARTSYS; 797 if (!request_pending(fc)) 798 goto err_unlock; 799 800 if (!list_empty(&fc->interrupts)) { 801 req = list_entry(fc->interrupts.next, struct fuse_req, 802 intr_entry); 803 return fuse_read_interrupt(fc, req, iov, nr_segs); 804 } 805 806 req = list_entry(fc->pending.next, struct fuse_req, list); 807 req->state = FUSE_REQ_READING; 808 list_move(&req->list, &fc->io); 809 810 in = &req->in; 811 reqsize = in->h.len; 812 /* If request is too large, reply with an error and restart the read */ 813 if (iov_length(iov, nr_segs) < reqsize) { 814 req->out.h.error = -EIO; 815 /* SETXATTR is special, since it may contain too large data */ 816 if (in->h.opcode == FUSE_SETXATTR) 817 req->out.h.error = -E2BIG; 818 request_end(fc, req); 819 goto restart; 820 } 821 spin_unlock(&fc->lock); 822 fuse_copy_init(&cs, fc, 1, req, iov, nr_segs); 823 err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); 824 if (!err) 825 err = fuse_copy_args(&cs, in->numargs, in->argpages, 826 (struct fuse_arg *) in->args, 0); 827 fuse_copy_finish(&cs); 828 spin_lock(&fc->lock); 829 req->locked = 0; 830 if (req->aborted) { 831 request_end(fc, req); 832 return -ENODEV; 833 } 834 if (err) { 835 req->out.h.error = -EIO; 836 request_end(fc, req); 837 return err; 838 } 839 if (!req->isreply) 840 request_end(fc, req); 841 else { 842 req->state = FUSE_REQ_SENT; 843 list_move_tail(&req->list, &fc->processing); 844 if (req->interrupted) 845 queue_interrupt(fc, req); 846 spin_unlock(&fc->lock); 847 } 848 return reqsize; 849 850 err_unlock: 851 spin_unlock(&fc->lock); 852 return err; 853 } 854 855 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, 856 struct fuse_copy_state *cs) 857 { 858 struct fuse_notify_poll_wakeup_out outarg; 859 int err = -EINVAL; 860 861 if (size != sizeof(outarg)) 862 goto err; 863 864 err = fuse_copy_one(cs, &outarg, sizeof(outarg)); 865 if (err) 866 goto err; 867 868 fuse_copy_finish(cs); 869 return fuse_notify_poll_wakeup(fc, &outarg); 870 871 err: 872 fuse_copy_finish(cs); 873 return err; 874 } 875 876 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, 877 struct fuse_copy_state *cs) 878 { 879 struct fuse_notify_inval_inode_out outarg; 880 int err = -EINVAL; 881 882 if (size != sizeof(outarg)) 883 goto err; 884 885 err = fuse_copy_one(cs, &outarg, sizeof(outarg)); 886 if (err) 887 goto err; 888 fuse_copy_finish(cs); 889 890 down_read(&fc->killsb); 891 err = -ENOENT; 892 if (fc->sb) { 893 err = fuse_reverse_inval_inode(fc->sb, outarg.ino, 894 outarg.off, outarg.len); 895 } 896 up_read(&fc->killsb); 897 return err; 898 899 err: 900 fuse_copy_finish(cs); 901 return err; 902 } 903 904 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, 905 struct fuse_copy_state *cs) 906 { 907 struct fuse_notify_inval_entry_out outarg; 908 int err = -ENOMEM; 909 char *buf; 910 struct qstr name; 911 912 buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL); 913 if (!buf) 914 goto err; 915 916 err = -EINVAL; 917 if (size < sizeof(outarg)) 918 goto err; 919 920 err = fuse_copy_one(cs, &outarg, sizeof(outarg)); 921 if (err) 922 goto err; 923 924 err = -ENAMETOOLONG; 925 if (outarg.namelen > FUSE_NAME_MAX) 926 goto err; 927 928 name.name = buf; 929 name.len = outarg.namelen; 930 err = fuse_copy_one(cs, buf, outarg.namelen + 1); 931 if (err) 932 goto err; 933 fuse_copy_finish(cs); 934 buf[outarg.namelen] = 0; 935 name.hash = full_name_hash(name.name, name.len); 936 937 down_read(&fc->killsb); 938 err = -ENOENT; 939 if (fc->sb) 940 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name); 941 up_read(&fc->killsb); 942 kfree(buf); 943 return err; 944 945 err: 946 kfree(buf); 947 fuse_copy_finish(cs); 948 return err; 949 } 950 951 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, 952 unsigned int size, struct fuse_copy_state *cs) 953 { 954 switch (code) { 955 case FUSE_NOTIFY_POLL: 956 return fuse_notify_poll(fc, size, cs); 957 958 case FUSE_NOTIFY_INVAL_INODE: 959 return fuse_notify_inval_inode(fc, size, cs); 960 961 case FUSE_NOTIFY_INVAL_ENTRY: 962 return fuse_notify_inval_entry(fc, size, cs); 963 964 default: 965 fuse_copy_finish(cs); 966 return -EINVAL; 967 } 968 } 969 970 /* Look up request on processing list by unique ID */ 971 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) 972 { 973 struct list_head *entry; 974 975 list_for_each(entry, &fc->processing) { 976 struct fuse_req *req; 977 req = list_entry(entry, struct fuse_req, list); 978 if (req->in.h.unique == unique || req->intr_unique == unique) 979 return req; 980 } 981 return NULL; 982 } 983 984 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, 985 unsigned nbytes) 986 { 987 unsigned reqsize = sizeof(struct fuse_out_header); 988 989 if (out->h.error) 990 return nbytes != reqsize ? -EINVAL : 0; 991 992 reqsize += len_args(out->numargs, out->args); 993 994 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar)) 995 return -EINVAL; 996 else if (reqsize > nbytes) { 997 struct fuse_arg *lastarg = &out->args[out->numargs-1]; 998 unsigned diffsize = reqsize - nbytes; 999 if (diffsize > lastarg->size) 1000 return -EINVAL; 1001 lastarg->size -= diffsize; 1002 } 1003 return fuse_copy_args(cs, out->numargs, out->argpages, out->args, 1004 out->page_zeroing); 1005 } 1006 1007 /* 1008 * Write a single reply to a request. First the header is copied from 1009 * the write buffer. The request is then searched on the processing 1010 * list by the unique ID found in the header. If found, then remove 1011 * it from the list and copy the rest of the buffer to the request. 1012 * The request is finished by calling request_end() 1013 */ 1014 static ssize_t fuse_dev_do_write(struct fuse_conn *fc, 1015 struct fuse_copy_state *cs, size_t nbytes) 1016 { 1017 int err; 1018 struct fuse_req *req; 1019 struct fuse_out_header oh; 1020 1021 if (nbytes < sizeof(struct fuse_out_header)) 1022 return -EINVAL; 1023 1024 err = fuse_copy_one(cs, &oh, sizeof(oh)); 1025 if (err) 1026 goto err_finish; 1027 1028 err = -EINVAL; 1029 if (oh.len != nbytes) 1030 goto err_finish; 1031 1032 /* 1033 * Zero oh.unique indicates unsolicited notification message 1034 * and error contains notification code. 1035 */ 1036 if (!oh.unique) { 1037 err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs); 1038 return err ? err : nbytes; 1039 } 1040 1041 err = -EINVAL; 1042 if (oh.error <= -1000 || oh.error > 0) 1043 goto err_finish; 1044 1045 spin_lock(&fc->lock); 1046 err = -ENOENT; 1047 if (!fc->connected) 1048 goto err_unlock; 1049 1050 req = request_find(fc, oh.unique); 1051 if (!req) 1052 goto err_unlock; 1053 1054 if (req->aborted) { 1055 spin_unlock(&fc->lock); 1056 fuse_copy_finish(cs); 1057 spin_lock(&fc->lock); 1058 request_end(fc, req); 1059 return -ENOENT; 1060 } 1061 /* Is it an interrupt reply? */ 1062 if (req->intr_unique == oh.unique) { 1063 err = -EINVAL; 1064 if (nbytes != sizeof(struct fuse_out_header)) 1065 goto err_unlock; 1066 1067 if (oh.error == -ENOSYS) 1068 fc->no_interrupt = 1; 1069 else if (oh.error == -EAGAIN) 1070 queue_interrupt(fc, req); 1071 1072 spin_unlock(&fc->lock); 1073 fuse_copy_finish(cs); 1074 return nbytes; 1075 } 1076 1077 req->state = FUSE_REQ_WRITING; 1078 list_move(&req->list, &fc->io); 1079 req->out.h = oh; 1080 req->locked = 1; 1081 cs->req = req; 1082 spin_unlock(&fc->lock); 1083 1084 err = copy_out_args(cs, &req->out, nbytes); 1085 fuse_copy_finish(cs); 1086 1087 spin_lock(&fc->lock); 1088 req->locked = 0; 1089 if (!err) { 1090 if (req->aborted) 1091 err = -ENOENT; 1092 } else if (!req->aborted) 1093 req->out.h.error = -EIO; 1094 request_end(fc, req); 1095 1096 return err ? err : nbytes; 1097 1098 err_unlock: 1099 spin_unlock(&fc->lock); 1100 err_finish: 1101 fuse_copy_finish(cs); 1102 return err; 1103 } 1104 1105 static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, 1106 unsigned long nr_segs, loff_t pos) 1107 { 1108 struct fuse_copy_state cs; 1109 struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); 1110 if (!fc) 1111 return -EPERM; 1112 1113 fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs); 1114 1115 return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs)); 1116 } 1117 1118 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, 1119 struct file *out, loff_t *ppos, 1120 size_t len, unsigned int flags) 1121 { 1122 unsigned nbuf; 1123 unsigned idx; 1124 struct pipe_buffer *bufs; 1125 struct fuse_copy_state cs; 1126 struct fuse_conn *fc; 1127 size_t rem; 1128 ssize_t ret; 1129 1130 fc = fuse_get_conn(out); 1131 if (!fc) 1132 return -EPERM; 1133 1134 bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); 1135 if (!bufs) 1136 return -ENOMEM; 1137 1138 pipe_lock(pipe); 1139 nbuf = 0; 1140 rem = 0; 1141 for (idx = 0; idx < pipe->nrbufs && rem < len; idx++) 1142 rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len; 1143 1144 ret = -EINVAL; 1145 if (rem < len) { 1146 pipe_unlock(pipe); 1147 goto out; 1148 } 1149 1150 rem = len; 1151 while (rem) { 1152 struct pipe_buffer *ibuf; 1153 struct pipe_buffer *obuf; 1154 1155 BUG_ON(nbuf >= pipe->buffers); 1156 BUG_ON(!pipe->nrbufs); 1157 ibuf = &pipe->bufs[pipe->curbuf]; 1158 obuf = &bufs[nbuf]; 1159 1160 if (rem >= ibuf->len) { 1161 *obuf = *ibuf; 1162 ibuf->ops = NULL; 1163 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); 1164 pipe->nrbufs--; 1165 } else { 1166 ibuf->ops->get(pipe, ibuf); 1167 *obuf = *ibuf; 1168 obuf->flags &= ~PIPE_BUF_FLAG_GIFT; 1169 obuf->len = rem; 1170 ibuf->offset += obuf->len; 1171 ibuf->len -= obuf->len; 1172 } 1173 nbuf++; 1174 rem -= obuf->len; 1175 } 1176 pipe_unlock(pipe); 1177 1178 memset(&cs, 0, sizeof(struct fuse_copy_state)); 1179 cs.fc = fc; 1180 cs.write = 0; 1181 cs.pipebufs = bufs; 1182 cs.nr_segs = nbuf; 1183 cs.pipe = pipe; 1184 1185 ret = fuse_dev_do_write(fc, &cs, len); 1186 1187 for (idx = 0; idx < nbuf; idx++) { 1188 struct pipe_buffer *buf = &bufs[idx]; 1189 buf->ops->release(pipe, buf); 1190 } 1191 out: 1192 kfree(bufs); 1193 return ret; 1194 } 1195 1196 static unsigned fuse_dev_poll(struct file *file, poll_table *wait) 1197 { 1198 unsigned mask = POLLOUT | POLLWRNORM; 1199 struct fuse_conn *fc = fuse_get_conn(file); 1200 if (!fc) 1201 return POLLERR; 1202 1203 poll_wait(file, &fc->waitq, wait); 1204 1205 spin_lock(&fc->lock); 1206 if (!fc->connected) 1207 mask = POLLERR; 1208 else if (request_pending(fc)) 1209 mask |= POLLIN | POLLRDNORM; 1210 spin_unlock(&fc->lock); 1211 1212 return mask; 1213 } 1214 1215 /* 1216 * Abort all requests on the given list (pending or processing) 1217 * 1218 * This function releases and reacquires fc->lock 1219 */ 1220 static void end_requests(struct fuse_conn *fc, struct list_head *head) 1221 __releases(&fc->lock) 1222 __acquires(&fc->lock) 1223 { 1224 while (!list_empty(head)) { 1225 struct fuse_req *req; 1226 req = list_entry(head->next, struct fuse_req, list); 1227 req->out.h.error = -ECONNABORTED; 1228 request_end(fc, req); 1229 spin_lock(&fc->lock); 1230 } 1231 } 1232 1233 /* 1234 * Abort requests under I/O 1235 * 1236 * The requests are set to aborted and finished, and the request 1237 * waiter is woken up. This will make request_wait_answer() wait 1238 * until the request is unlocked and then return. 1239 * 1240 * If the request is asynchronous, then the end function needs to be 1241 * called after waiting for the request to be unlocked (if it was 1242 * locked). 1243 */ 1244 static void end_io_requests(struct fuse_conn *fc) 1245 __releases(&fc->lock) 1246 __acquires(&fc->lock) 1247 { 1248 while (!list_empty(&fc->io)) { 1249 struct fuse_req *req = 1250 list_entry(fc->io.next, struct fuse_req, list); 1251 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 1252 1253 req->aborted = 1; 1254 req->out.h.error = -ECONNABORTED; 1255 req->state = FUSE_REQ_FINISHED; 1256 list_del_init(&req->list); 1257 wake_up(&req->waitq); 1258 if (end) { 1259 req->end = NULL; 1260 __fuse_get_request(req); 1261 spin_unlock(&fc->lock); 1262 wait_event(req->waitq, !req->locked); 1263 end(fc, req); 1264 fuse_put_request(fc, req); 1265 spin_lock(&fc->lock); 1266 } 1267 } 1268 } 1269 1270 /* 1271 * Abort all requests. 1272 * 1273 * Emergency exit in case of a malicious or accidental deadlock, or 1274 * just a hung filesystem. 1275 * 1276 * The same effect is usually achievable through killing the 1277 * filesystem daemon and all users of the filesystem. The exception 1278 * is the combination of an asynchronous request and the tricky 1279 * deadlock (see Documentation/filesystems/fuse.txt). 1280 * 1281 * During the aborting, progression of requests from the pending and 1282 * processing lists onto the io list, and progression of new requests 1283 * onto the pending list is prevented by req->connected being false. 1284 * 1285 * Progression of requests under I/O to the processing list is 1286 * prevented by the req->aborted flag being true for these requests. 1287 * For this reason requests on the io list must be aborted first. 1288 */ 1289 void fuse_abort_conn(struct fuse_conn *fc) 1290 { 1291 spin_lock(&fc->lock); 1292 if (fc->connected) { 1293 fc->connected = 0; 1294 fc->blocked = 0; 1295 end_io_requests(fc); 1296 end_requests(fc, &fc->pending); 1297 end_requests(fc, &fc->processing); 1298 wake_up_all(&fc->waitq); 1299 wake_up_all(&fc->blocked_waitq); 1300 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 1301 } 1302 spin_unlock(&fc->lock); 1303 } 1304 EXPORT_SYMBOL_GPL(fuse_abort_conn); 1305 1306 int fuse_dev_release(struct inode *inode, struct file *file) 1307 { 1308 struct fuse_conn *fc = fuse_get_conn(file); 1309 if (fc) { 1310 spin_lock(&fc->lock); 1311 fc->connected = 0; 1312 end_requests(fc, &fc->pending); 1313 end_requests(fc, &fc->processing); 1314 spin_unlock(&fc->lock); 1315 fuse_conn_put(fc); 1316 } 1317 1318 return 0; 1319 } 1320 EXPORT_SYMBOL_GPL(fuse_dev_release); 1321 1322 static int fuse_dev_fasync(int fd, struct file *file, int on) 1323 { 1324 struct fuse_conn *fc = fuse_get_conn(file); 1325 if (!fc) 1326 return -EPERM; 1327 1328 /* No locking - fasync_helper does its own locking */ 1329 return fasync_helper(fd, file, on, &fc->fasync); 1330 } 1331 1332 const struct file_operations fuse_dev_operations = { 1333 .owner = THIS_MODULE, 1334 .llseek = no_llseek, 1335 .read = do_sync_read, 1336 .aio_read = fuse_dev_read, 1337 .write = do_sync_write, 1338 .aio_write = fuse_dev_write, 1339 .splice_write = fuse_dev_splice_write, 1340 .poll = fuse_dev_poll, 1341 .release = fuse_dev_release, 1342 .fasync = fuse_dev_fasync, 1343 }; 1344 EXPORT_SYMBOL_GPL(fuse_dev_operations); 1345 1346 static struct miscdevice fuse_miscdevice = { 1347 .minor = FUSE_MINOR, 1348 .name = "fuse", 1349 .fops = &fuse_dev_operations, 1350 }; 1351 1352 int __init fuse_dev_init(void) 1353 { 1354 int err = -ENOMEM; 1355 fuse_req_cachep = kmem_cache_create("fuse_request", 1356 sizeof(struct fuse_req), 1357 0, 0, NULL); 1358 if (!fuse_req_cachep) 1359 goto out; 1360 1361 err = misc_register(&fuse_miscdevice); 1362 if (err) 1363 goto out_cache_clean; 1364 1365 return 0; 1366 1367 out_cache_clean: 1368 kmem_cache_destroy(fuse_req_cachep); 1369 out: 1370 return err; 1371 } 1372 1373 void fuse_dev_cleanup(void) 1374 { 1375 misc_deregister(&fuse_miscdevice); 1376 kmem_cache_destroy(fuse_req_cachep); 1377 } 1378