1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/module.h> 9 #include <linux/virtio.h> 10 #include <linux/virtio_fs.h> 11 #include <linux/delay.h> 12 #include <linux/fs_context.h> 13 #include <linux/highmem.h> 14 #include "fuse_i.h" 15 16 /* List of virtio-fs device instances and a lock for the list. Also provides 17 * mutual exclusion in device removal and mounting path 18 */ 19 static DEFINE_MUTEX(virtio_fs_mutex); 20 static LIST_HEAD(virtio_fs_instances); 21 22 enum { 23 VQ_HIPRIO, 24 VQ_REQUEST 25 }; 26 27 /* Per-virtqueue state */ 28 struct virtio_fs_vq { 29 spinlock_t lock; 30 struct virtqueue *vq; /* protected by ->lock */ 31 struct work_struct done_work; 32 struct list_head queued_reqs; 33 struct delayed_work dispatch_work; 34 struct fuse_dev *fud; 35 bool connected; 36 long in_flight; 37 char name[24]; 38 } ____cacheline_aligned_in_smp; 39 40 /* A virtio-fs device instance */ 41 struct virtio_fs { 42 struct kref refcount; 43 struct list_head list; /* on virtio_fs_instances */ 44 char *tag; 45 struct virtio_fs_vq *vqs; 46 unsigned int nvqs; /* number of virtqueues */ 47 unsigned int num_request_queues; /* number of request queues */ 48 }; 49 50 struct virtio_fs_forget { 51 struct fuse_in_header ih; 52 struct fuse_forget_in arg; 53 /* This request can be temporarily queued on virt queue */ 54 struct list_head list; 55 }; 56 57 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 58 { 59 struct virtio_fs *fs = vq->vdev->priv; 60 61 return &fs->vqs[vq->index]; 62 } 63 64 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) 65 { 66 return &vq_to_fsvq(vq)->fud->pq; 67 } 68 69 static void release_virtio_fs_obj(struct kref *ref) 70 { 71 struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); 72 73 kfree(vfs->vqs); 74 kfree(vfs); 75 } 76 77 /* Make sure virtiofs_mutex is held */ 78 static void virtio_fs_put(struct virtio_fs *fs) 79 { 80 kref_put(&fs->refcount, release_virtio_fs_obj); 81 } 82 83 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 84 { 85 struct virtio_fs *vfs = fiq->priv; 86 87 mutex_lock(&virtio_fs_mutex); 88 virtio_fs_put(vfs); 89 mutex_unlock(&virtio_fs_mutex); 90 } 91 92 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 93 { 94 WARN_ON(fsvq->in_flight < 0); 95 96 /* Wait for in flight requests to finish.*/ 97 while (1) { 98 spin_lock(&fsvq->lock); 99 if (!fsvq->in_flight) { 100 spin_unlock(&fsvq->lock); 101 break; 102 } 103 spin_unlock(&fsvq->lock); 104 /* TODO use completion instead of timeout */ 105 usleep_range(1000, 2000); 106 } 107 108 flush_work(&fsvq->done_work); 109 flush_delayed_work(&fsvq->dispatch_work); 110 } 111 112 static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq) 113 { 114 struct virtio_fs_forget *forget; 115 116 spin_lock(&fsvq->lock); 117 while (1) { 118 forget = list_first_entry_or_null(&fsvq->queued_reqs, 119 struct virtio_fs_forget, list); 120 if (!forget) 121 break; 122 list_del(&forget->list); 123 kfree(forget); 124 } 125 spin_unlock(&fsvq->lock); 126 } 127 128 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 129 { 130 struct virtio_fs_vq *fsvq; 131 int i; 132 133 for (i = 0; i < fs->nvqs; i++) { 134 fsvq = &fs->vqs[i]; 135 if (i == VQ_HIPRIO) 136 drain_hiprio_queued_reqs(fsvq); 137 138 virtio_fs_drain_queue(fsvq); 139 } 140 } 141 142 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 143 { 144 struct virtio_fs_vq *fsvq; 145 int i; 146 147 for (i = 0; i < fs->nvqs; i++) { 148 fsvq = &fs->vqs[i]; 149 spin_lock(&fsvq->lock); 150 fsvq->connected = true; 151 spin_unlock(&fsvq->lock); 152 } 153 } 154 155 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 156 static int virtio_fs_add_instance(struct virtio_fs *fs) 157 { 158 struct virtio_fs *fs2; 159 bool duplicate = false; 160 161 mutex_lock(&virtio_fs_mutex); 162 163 list_for_each_entry(fs2, &virtio_fs_instances, list) { 164 if (strcmp(fs->tag, fs2->tag) == 0) 165 duplicate = true; 166 } 167 168 if (!duplicate) 169 list_add_tail(&fs->list, &virtio_fs_instances); 170 171 mutex_unlock(&virtio_fs_mutex); 172 173 if (duplicate) 174 return -EEXIST; 175 return 0; 176 } 177 178 /* Return the virtio_fs with a given tag, or NULL */ 179 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 180 { 181 struct virtio_fs *fs; 182 183 mutex_lock(&virtio_fs_mutex); 184 185 list_for_each_entry(fs, &virtio_fs_instances, list) { 186 if (strcmp(fs->tag, tag) == 0) { 187 kref_get(&fs->refcount); 188 goto found; 189 } 190 } 191 192 fs = NULL; /* not found */ 193 194 found: 195 mutex_unlock(&virtio_fs_mutex); 196 197 return fs; 198 } 199 200 static void virtio_fs_free_devs(struct virtio_fs *fs) 201 { 202 unsigned int i; 203 204 for (i = 0; i < fs->nvqs; i++) { 205 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 206 207 if (!fsvq->fud) 208 continue; 209 210 fuse_dev_free(fsvq->fud); 211 fsvq->fud = NULL; 212 } 213 } 214 215 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 216 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 217 { 218 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 219 char *end; 220 size_t len; 221 222 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 223 &tag_buf, sizeof(tag_buf)); 224 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 225 if (end == tag_buf) 226 return -EINVAL; /* empty tag */ 227 if (!end) 228 end = &tag_buf[sizeof(tag_buf)]; 229 230 len = end - tag_buf; 231 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 232 if (!fs->tag) 233 return -ENOMEM; 234 memcpy(fs->tag, tag_buf, len); 235 fs->tag[len] = '\0'; 236 return 0; 237 } 238 239 /* Work function for hiprio completion */ 240 static void virtio_fs_hiprio_done_work(struct work_struct *work) 241 { 242 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 243 done_work); 244 struct virtqueue *vq = fsvq->vq; 245 246 /* Free completed FUSE_FORGET requests */ 247 spin_lock(&fsvq->lock); 248 do { 249 unsigned int len; 250 void *req; 251 252 virtqueue_disable_cb(vq); 253 254 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 255 kfree(req); 256 fsvq->in_flight--; 257 } 258 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 259 spin_unlock(&fsvq->lock); 260 } 261 262 static void virtio_fs_dummy_dispatch_work(struct work_struct *work) 263 { 264 } 265 266 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 267 { 268 struct virtio_fs_forget *forget; 269 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 270 dispatch_work.work); 271 struct virtqueue *vq = fsvq->vq; 272 struct scatterlist sg; 273 struct scatterlist *sgs[] = {&sg}; 274 bool notify; 275 int ret; 276 277 pr_debug("virtio-fs: worker %s called.\n", __func__); 278 while (1) { 279 spin_lock(&fsvq->lock); 280 forget = list_first_entry_or_null(&fsvq->queued_reqs, 281 struct virtio_fs_forget, list); 282 if (!forget) { 283 spin_unlock(&fsvq->lock); 284 return; 285 } 286 287 list_del(&forget->list); 288 if (!fsvq->connected) { 289 spin_unlock(&fsvq->lock); 290 kfree(forget); 291 continue; 292 } 293 294 sg_init_one(&sg, forget, sizeof(*forget)); 295 296 /* Enqueue the request */ 297 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 298 ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); 299 if (ret < 0) { 300 if (ret == -ENOMEM || ret == -ENOSPC) { 301 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 302 ret); 303 list_add_tail(&forget->list, 304 &fsvq->queued_reqs); 305 schedule_delayed_work(&fsvq->dispatch_work, 306 msecs_to_jiffies(1)); 307 } else { 308 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 309 ret); 310 kfree(forget); 311 } 312 spin_unlock(&fsvq->lock); 313 return; 314 } 315 316 fsvq->in_flight++; 317 notify = virtqueue_kick_prepare(vq); 318 spin_unlock(&fsvq->lock); 319 320 if (notify) 321 virtqueue_notify(vq); 322 pr_debug("virtio-fs: worker %s dispatched one forget request.\n", 323 __func__); 324 } 325 } 326 327 /* Allocate and copy args into req->argbuf */ 328 static int copy_args_to_argbuf(struct fuse_req *req) 329 { 330 struct fuse_args *args = req->args; 331 unsigned int offset = 0; 332 unsigned int num_in; 333 unsigned int num_out; 334 unsigned int len; 335 unsigned int i; 336 337 num_in = args->in_numargs - args->in_pages; 338 num_out = args->out_numargs - args->out_pages; 339 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 340 fuse_len_args(num_out, args->out_args); 341 342 req->argbuf = kmalloc(len, GFP_ATOMIC); 343 if (!req->argbuf) 344 return -ENOMEM; 345 346 for (i = 0; i < num_in; i++) { 347 memcpy(req->argbuf + offset, 348 args->in_args[i].value, 349 args->in_args[i].size); 350 offset += args->in_args[i].size; 351 } 352 353 return 0; 354 } 355 356 /* Copy args out of and free req->argbuf */ 357 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 358 { 359 unsigned int remaining; 360 unsigned int offset; 361 unsigned int num_in; 362 unsigned int num_out; 363 unsigned int i; 364 365 remaining = req->out.h.len - sizeof(req->out.h); 366 num_in = args->in_numargs - args->in_pages; 367 num_out = args->out_numargs - args->out_pages; 368 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 369 370 for (i = 0; i < num_out; i++) { 371 unsigned int argsize = args->out_args[i].size; 372 373 if (args->out_argvar && 374 i == args->out_numargs - 1 && 375 argsize > remaining) { 376 argsize = remaining; 377 } 378 379 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 380 offset += argsize; 381 382 if (i != args->out_numargs - 1) 383 remaining -= argsize; 384 } 385 386 /* Store the actual size of the variable-length arg */ 387 if (args->out_argvar) 388 args->out_args[args->out_numargs - 1].size = remaining; 389 390 kfree(req->argbuf); 391 req->argbuf = NULL; 392 } 393 394 /* Work function for request completion */ 395 static void virtio_fs_requests_done_work(struct work_struct *work) 396 { 397 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 398 done_work); 399 struct fuse_pqueue *fpq = &fsvq->fud->pq; 400 struct fuse_conn *fc = fsvq->fud->fc; 401 struct virtqueue *vq = fsvq->vq; 402 struct fuse_req *req; 403 struct fuse_args_pages *ap; 404 struct fuse_req *next; 405 struct fuse_args *args; 406 unsigned int len, i, thislen; 407 struct page *page; 408 LIST_HEAD(reqs); 409 410 /* Collect completed requests off the virtqueue */ 411 spin_lock(&fsvq->lock); 412 do { 413 virtqueue_disable_cb(vq); 414 415 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 416 spin_lock(&fpq->lock); 417 list_move_tail(&req->list, &reqs); 418 spin_unlock(&fpq->lock); 419 } 420 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 421 spin_unlock(&fsvq->lock); 422 423 /* End requests */ 424 list_for_each_entry_safe(req, next, &reqs, list) { 425 /* 426 * TODO verify that server properly follows FUSE protocol 427 * (oh.uniq, oh.len) 428 */ 429 args = req->args; 430 copy_args_from_argbuf(args, req); 431 432 if (args->out_pages && args->page_zeroing) { 433 len = args->out_args[args->out_numargs - 1].size; 434 ap = container_of(args, typeof(*ap), args); 435 for (i = 0; i < ap->num_pages; i++) { 436 thislen = ap->descs[i].length; 437 if (len < thislen) { 438 WARN_ON(ap->descs[i].offset); 439 page = ap->pages[i]; 440 zero_user_segment(page, len, thislen); 441 len = 0; 442 } else { 443 len -= thislen; 444 } 445 } 446 } 447 448 spin_lock(&fpq->lock); 449 clear_bit(FR_SENT, &req->flags); 450 list_del_init(&req->list); 451 spin_unlock(&fpq->lock); 452 453 fuse_request_end(fc, req); 454 spin_lock(&fsvq->lock); 455 fsvq->in_flight--; 456 spin_unlock(&fsvq->lock); 457 } 458 } 459 460 /* Virtqueue interrupt handler */ 461 static void virtio_fs_vq_done(struct virtqueue *vq) 462 { 463 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 464 465 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 466 467 schedule_work(&fsvq->done_work); 468 } 469 470 /* Initialize virtqueues */ 471 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 472 struct virtio_fs *fs) 473 { 474 struct virtqueue **vqs; 475 vq_callback_t **callbacks; 476 const char **names; 477 unsigned int i; 478 int ret = 0; 479 480 virtio_cread(vdev, struct virtio_fs_config, num_request_queues, 481 &fs->num_request_queues); 482 if (fs->num_request_queues == 0) 483 return -EINVAL; 484 485 fs->nvqs = 1 + fs->num_request_queues; 486 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 487 if (!fs->vqs) 488 return -ENOMEM; 489 490 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 491 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 492 GFP_KERNEL); 493 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 494 if (!vqs || !callbacks || !names) { 495 ret = -ENOMEM; 496 goto out; 497 } 498 499 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 500 snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name), 501 "hiprio"); 502 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 503 INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); 504 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); 505 INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, 506 virtio_fs_hiprio_dispatch_work); 507 spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); 508 509 /* Initialize the requests virtqueues */ 510 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 511 spin_lock_init(&fs->vqs[i].lock); 512 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); 513 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, 514 virtio_fs_dummy_dispatch_work); 515 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); 516 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), 517 "requests.%u", i - VQ_REQUEST); 518 callbacks[i] = virtio_fs_vq_done; 519 names[i] = fs->vqs[i].name; 520 } 521 522 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 523 if (ret < 0) 524 goto out; 525 526 for (i = 0; i < fs->nvqs; i++) 527 fs->vqs[i].vq = vqs[i]; 528 529 virtio_fs_start_all_queues(fs); 530 out: 531 kfree(names); 532 kfree(callbacks); 533 kfree(vqs); 534 if (ret) 535 kfree(fs->vqs); 536 return ret; 537 } 538 539 /* Free virtqueues (device must already be reset) */ 540 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, 541 struct virtio_fs *fs) 542 { 543 vdev->config->del_vqs(vdev); 544 } 545 546 static int virtio_fs_probe(struct virtio_device *vdev) 547 { 548 struct virtio_fs *fs; 549 int ret; 550 551 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 552 if (!fs) 553 return -ENOMEM; 554 kref_init(&fs->refcount); 555 vdev->priv = fs; 556 557 ret = virtio_fs_read_tag(vdev, fs); 558 if (ret < 0) 559 goto out; 560 561 ret = virtio_fs_setup_vqs(vdev, fs); 562 if (ret < 0) 563 goto out; 564 565 /* TODO vq affinity */ 566 567 /* Bring the device online in case the filesystem is mounted and 568 * requests need to be sent before we return. 569 */ 570 virtio_device_ready(vdev); 571 572 ret = virtio_fs_add_instance(fs); 573 if (ret < 0) 574 goto out_vqs; 575 576 return 0; 577 578 out_vqs: 579 vdev->config->reset(vdev); 580 virtio_fs_cleanup_vqs(vdev, fs); 581 582 out: 583 vdev->priv = NULL; 584 kfree(fs); 585 return ret; 586 } 587 588 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 589 { 590 struct virtio_fs_vq *fsvq; 591 int i; 592 593 for (i = 0; i < fs->nvqs; i++) { 594 fsvq = &fs->vqs[i]; 595 spin_lock(&fsvq->lock); 596 fsvq->connected = false; 597 spin_unlock(&fsvq->lock); 598 } 599 } 600 601 static void virtio_fs_remove(struct virtio_device *vdev) 602 { 603 struct virtio_fs *fs = vdev->priv; 604 605 mutex_lock(&virtio_fs_mutex); 606 /* This device is going away. No one should get new reference */ 607 list_del_init(&fs->list); 608 virtio_fs_stop_all_queues(fs); 609 virtio_fs_drain_all_queues(fs); 610 vdev->config->reset(vdev); 611 virtio_fs_cleanup_vqs(vdev, fs); 612 613 vdev->priv = NULL; 614 /* Put device reference on virtio_fs object */ 615 virtio_fs_put(fs); 616 mutex_unlock(&virtio_fs_mutex); 617 } 618 619 #ifdef CONFIG_PM_SLEEP 620 static int virtio_fs_freeze(struct virtio_device *vdev) 621 { 622 /* TODO need to save state here */ 623 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 624 return -EOPNOTSUPP; 625 } 626 627 static int virtio_fs_restore(struct virtio_device *vdev) 628 { 629 /* TODO need to restore state here */ 630 return 0; 631 } 632 #endif /* CONFIG_PM_SLEEP */ 633 634 const static struct virtio_device_id id_table[] = { 635 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 636 {}, 637 }; 638 639 const static unsigned int feature_table[] = {}; 640 641 static struct virtio_driver virtio_fs_driver = { 642 .driver.name = KBUILD_MODNAME, 643 .driver.owner = THIS_MODULE, 644 .id_table = id_table, 645 .feature_table = feature_table, 646 .feature_table_size = ARRAY_SIZE(feature_table), 647 .probe = virtio_fs_probe, 648 .remove = virtio_fs_remove, 649 #ifdef CONFIG_PM_SLEEP 650 .freeze = virtio_fs_freeze, 651 .restore = virtio_fs_restore, 652 #endif 653 }; 654 655 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 656 __releases(fiq->lock) 657 { 658 struct fuse_forget_link *link; 659 struct virtio_fs_forget *forget; 660 struct scatterlist sg; 661 struct scatterlist *sgs[] = {&sg}; 662 struct virtio_fs *fs; 663 struct virtqueue *vq; 664 struct virtio_fs_vq *fsvq; 665 bool notify; 666 u64 unique; 667 int ret; 668 669 link = fuse_dequeue_forget(fiq, 1, NULL); 670 unique = fuse_get_unique(fiq); 671 672 fs = fiq->priv; 673 fsvq = &fs->vqs[VQ_HIPRIO]; 674 spin_unlock(&fiq->lock); 675 676 /* Allocate a buffer for the request */ 677 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 678 679 forget->ih = (struct fuse_in_header){ 680 .opcode = FUSE_FORGET, 681 .nodeid = link->forget_one.nodeid, 682 .unique = unique, 683 .len = sizeof(*forget), 684 }; 685 forget->arg = (struct fuse_forget_in){ 686 .nlookup = link->forget_one.nlookup, 687 }; 688 689 sg_init_one(&sg, forget, sizeof(*forget)); 690 691 /* Enqueue the request */ 692 spin_lock(&fsvq->lock); 693 694 if (!fsvq->connected) { 695 kfree(forget); 696 spin_unlock(&fsvq->lock); 697 goto out; 698 } 699 700 vq = fsvq->vq; 701 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 702 703 ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); 704 if (ret < 0) { 705 if (ret == -ENOMEM || ret == -ENOSPC) { 706 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n", 707 ret); 708 list_add_tail(&forget->list, &fsvq->queued_reqs); 709 schedule_delayed_work(&fsvq->dispatch_work, 710 msecs_to_jiffies(1)); 711 } else { 712 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 713 ret); 714 kfree(forget); 715 } 716 spin_unlock(&fsvq->lock); 717 goto out; 718 } 719 720 fsvq->in_flight++; 721 notify = virtqueue_kick_prepare(vq); 722 723 spin_unlock(&fsvq->lock); 724 725 if (notify) 726 virtqueue_notify(vq); 727 out: 728 kfree(link); 729 } 730 731 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 732 __releases(fiq->lock) 733 { 734 /* 735 * TODO interrupts. 736 * 737 * Normal fs operations on a local filesystems aren't interruptible. 738 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 739 * with shared lock between host and guest. 740 */ 741 spin_unlock(&fiq->lock); 742 } 743 744 /* Return the number of scatter-gather list elements required */ 745 static unsigned int sg_count_fuse_req(struct fuse_req *req) 746 { 747 struct fuse_args *args = req->args; 748 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 749 unsigned int total_sgs = 1 /* fuse_in_header */; 750 751 if (args->in_numargs - args->in_pages) 752 total_sgs += 1; 753 754 if (args->in_pages) 755 total_sgs += ap->num_pages; 756 757 if (!test_bit(FR_ISREPLY, &req->flags)) 758 return total_sgs; 759 760 total_sgs += 1 /* fuse_out_header */; 761 762 if (args->out_numargs - args->out_pages) 763 total_sgs += 1; 764 765 if (args->out_pages) 766 total_sgs += ap->num_pages; 767 768 return total_sgs; 769 } 770 771 /* Add pages to scatter-gather list and return number of elements used */ 772 static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 773 struct page **pages, 774 struct fuse_page_desc *page_descs, 775 unsigned int num_pages, 776 unsigned int total_len) 777 { 778 unsigned int i; 779 unsigned int this_len; 780 781 for (i = 0; i < num_pages && total_len; i++) { 782 sg_init_table(&sg[i], 1); 783 this_len = min(page_descs[i].length, total_len); 784 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 785 total_len -= this_len; 786 } 787 788 return i; 789 } 790 791 /* Add args to scatter-gather list and return number of elements used */ 792 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 793 struct fuse_req *req, 794 struct fuse_arg *args, 795 unsigned int numargs, 796 bool argpages, 797 void *argbuf, 798 unsigned int *len_used) 799 { 800 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 801 unsigned int total_sgs = 0; 802 unsigned int len; 803 804 len = fuse_len_args(numargs - argpages, args); 805 if (len) 806 sg_init_one(&sg[total_sgs++], argbuf, len); 807 808 if (argpages) 809 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 810 ap->pages, ap->descs, 811 ap->num_pages, 812 args[numargs - 1].size); 813 814 if (len_used) 815 *len_used = len; 816 817 return total_sgs; 818 } 819 820 /* Add a request to a virtqueue and kick the device */ 821 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 822 struct fuse_req *req) 823 { 824 /* requests need at least 4 elements */ 825 struct scatterlist *stack_sgs[6]; 826 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 827 struct scatterlist **sgs = stack_sgs; 828 struct scatterlist *sg = stack_sg; 829 struct virtqueue *vq; 830 struct fuse_args *args = req->args; 831 unsigned int argbuf_used = 0; 832 unsigned int out_sgs = 0; 833 unsigned int in_sgs = 0; 834 unsigned int total_sgs; 835 unsigned int i; 836 int ret; 837 bool notify; 838 839 /* Does the sglist fit on the stack? */ 840 total_sgs = sg_count_fuse_req(req); 841 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 842 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 843 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 844 if (!sgs || !sg) { 845 ret = -ENOMEM; 846 goto out; 847 } 848 } 849 850 /* Use a bounce buffer since stack args cannot be mapped */ 851 ret = copy_args_to_argbuf(req); 852 if (ret < 0) 853 goto out; 854 855 /* Request elements */ 856 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 857 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 858 (struct fuse_arg *)args->in_args, 859 args->in_numargs, args->in_pages, 860 req->argbuf, &argbuf_used); 861 862 /* Reply elements */ 863 if (test_bit(FR_ISREPLY, &req->flags)) { 864 sg_init_one(&sg[out_sgs + in_sgs++], 865 &req->out.h, sizeof(req->out.h)); 866 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 867 args->out_args, args->out_numargs, 868 args->out_pages, 869 req->argbuf + argbuf_used, NULL); 870 } 871 872 WARN_ON(out_sgs + in_sgs != total_sgs); 873 874 for (i = 0; i < total_sgs; i++) 875 sgs[i] = &sg[i]; 876 877 spin_lock(&fsvq->lock); 878 879 if (!fsvq->connected) { 880 spin_unlock(&fsvq->lock); 881 ret = -ENOTCONN; 882 goto out; 883 } 884 885 vq = fsvq->vq; 886 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 887 if (ret < 0) { 888 spin_unlock(&fsvq->lock); 889 goto out; 890 } 891 892 fsvq->in_flight++; 893 notify = virtqueue_kick_prepare(vq); 894 895 spin_unlock(&fsvq->lock); 896 897 if (notify) 898 virtqueue_notify(vq); 899 900 out: 901 if (ret < 0 && req->argbuf) { 902 kfree(req->argbuf); 903 req->argbuf = NULL; 904 } 905 if (sgs != stack_sgs) { 906 kfree(sgs); 907 kfree(sg); 908 } 909 910 return ret; 911 } 912 913 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 914 __releases(fiq->lock) 915 { 916 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 917 struct virtio_fs *fs; 918 struct fuse_conn *fc; 919 struct fuse_req *req; 920 struct fuse_pqueue *fpq; 921 int ret; 922 923 WARN_ON(list_empty(&fiq->pending)); 924 req = list_last_entry(&fiq->pending, struct fuse_req, list); 925 clear_bit(FR_PENDING, &req->flags); 926 list_del_init(&req->list); 927 WARN_ON(!list_empty(&fiq->pending)); 928 spin_unlock(&fiq->lock); 929 930 fs = fiq->priv; 931 fc = fs->vqs[queue_id].fud->fc; 932 933 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 934 __func__, req->in.h.opcode, req->in.h.unique, 935 req->in.h.nodeid, req->in.h.len, 936 fuse_len_args(req->args->out_numargs, req->args->out_args)); 937 938 fpq = &fs->vqs[queue_id].fud->pq; 939 spin_lock(&fpq->lock); 940 if (!fpq->connected) { 941 spin_unlock(&fpq->lock); 942 req->out.h.error = -ENODEV; 943 pr_err("virtio-fs: %s disconnected\n", __func__); 944 fuse_request_end(fc, req); 945 return; 946 } 947 list_add_tail(&req->list, fpq->processing); 948 spin_unlock(&fpq->lock); 949 set_bit(FR_SENT, &req->flags); 950 /* matches barrier in request_wait_answer() */ 951 smp_mb__after_atomic(); 952 953 retry: 954 ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req); 955 if (ret < 0) { 956 if (ret == -ENOMEM || ret == -ENOSPC) { 957 /* Virtqueue full. Retry submission */ 958 /* TODO use completion instead of timeout */ 959 usleep_range(20, 30); 960 goto retry; 961 } 962 req->out.h.error = ret; 963 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 964 spin_lock(&fpq->lock); 965 clear_bit(FR_SENT, &req->flags); 966 list_del_init(&req->list); 967 spin_unlock(&fpq->lock); 968 fuse_request_end(fc, req); 969 return; 970 } 971 } 972 973 const static struct fuse_iqueue_ops virtio_fs_fiq_ops = { 974 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 975 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 976 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 977 .release = virtio_fs_fiq_release, 978 }; 979 980 static int virtio_fs_fill_super(struct super_block *sb) 981 { 982 struct fuse_conn *fc = get_fuse_conn_super(sb); 983 struct virtio_fs *fs = fc->iq.priv; 984 unsigned int i; 985 int err; 986 struct fuse_fs_context ctx = { 987 .rootmode = S_IFDIR, 988 .default_permissions = 1, 989 .allow_other = 1, 990 .max_read = UINT_MAX, 991 .blksize = 512, 992 .destroy = true, 993 .no_control = true, 994 .no_force_umount = true, 995 }; 996 997 mutex_lock(&virtio_fs_mutex); 998 999 /* After holding mutex, make sure virtiofs device is still there. 1000 * Though we are holding a reference to it, drive ->remove might 1001 * still have cleaned up virtual queues. In that case bail out. 1002 */ 1003 err = -EINVAL; 1004 if (list_empty(&fs->list)) { 1005 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1006 goto err; 1007 } 1008 1009 err = -ENOMEM; 1010 /* Allocate fuse_dev for hiprio and notification queues */ 1011 for (i = 0; i < VQ_REQUEST; i++) { 1012 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1013 1014 fsvq->fud = fuse_dev_alloc(); 1015 if (!fsvq->fud) 1016 goto err_free_fuse_devs; 1017 } 1018 1019 ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud; 1020 err = fuse_fill_super_common(sb, &ctx); 1021 if (err < 0) 1022 goto err_free_fuse_devs; 1023 1024 fc = fs->vqs[VQ_REQUEST].fud->fc; 1025 1026 for (i = 0; i < fs->nvqs; i++) { 1027 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1028 1029 if (i == VQ_REQUEST) 1030 continue; /* already initialized */ 1031 fuse_dev_install(fsvq->fud, fc); 1032 } 1033 1034 /* Previous unmount will stop all queues. Start these again */ 1035 virtio_fs_start_all_queues(fs); 1036 fuse_send_init(fc); 1037 mutex_unlock(&virtio_fs_mutex); 1038 return 0; 1039 1040 err_free_fuse_devs: 1041 virtio_fs_free_devs(fs); 1042 err: 1043 mutex_unlock(&virtio_fs_mutex); 1044 return err; 1045 } 1046 1047 static void virtio_kill_sb(struct super_block *sb) 1048 { 1049 struct fuse_conn *fc = get_fuse_conn_super(sb); 1050 struct virtio_fs *vfs; 1051 struct virtio_fs_vq *fsvq; 1052 1053 /* If mount failed, we can still be called without any fc */ 1054 if (!fc) 1055 return fuse_kill_sb_anon(sb); 1056 1057 vfs = fc->iq.priv; 1058 fsvq = &vfs->vqs[VQ_HIPRIO]; 1059 1060 /* Stop forget queue. Soon destroy will be sent */ 1061 spin_lock(&fsvq->lock); 1062 fsvq->connected = false; 1063 spin_unlock(&fsvq->lock); 1064 virtio_fs_drain_all_queues(vfs); 1065 1066 fuse_kill_sb_anon(sb); 1067 1068 /* fuse_kill_sb_anon() must have sent destroy. Stop all queues 1069 * and drain one more time and free fuse devices. Freeing fuse 1070 * devices will drop their reference on fuse_conn and that in 1071 * turn will drop its reference on virtio_fs object. 1072 */ 1073 virtio_fs_stop_all_queues(vfs); 1074 virtio_fs_drain_all_queues(vfs); 1075 virtio_fs_free_devs(vfs); 1076 } 1077 1078 static int virtio_fs_test_super(struct super_block *sb, 1079 struct fs_context *fsc) 1080 { 1081 struct fuse_conn *fc = fsc->s_fs_info; 1082 1083 return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv; 1084 } 1085 1086 static int virtio_fs_set_super(struct super_block *sb, 1087 struct fs_context *fsc) 1088 { 1089 int err; 1090 1091 err = get_anon_bdev(&sb->s_dev); 1092 if (!err) 1093 fuse_conn_get(fsc->s_fs_info); 1094 1095 return err; 1096 } 1097 1098 static int virtio_fs_get_tree(struct fs_context *fsc) 1099 { 1100 struct virtio_fs *fs; 1101 struct super_block *sb; 1102 struct fuse_conn *fc; 1103 int err; 1104 1105 /* This gets a reference on virtio_fs object. This ptr gets installed 1106 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1107 * to drop the reference to this object. 1108 */ 1109 fs = virtio_fs_find_instance(fsc->source); 1110 if (!fs) { 1111 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1112 return -EINVAL; 1113 } 1114 1115 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1116 if (!fc) { 1117 mutex_lock(&virtio_fs_mutex); 1118 virtio_fs_put(fs); 1119 mutex_unlock(&virtio_fs_mutex); 1120 return -ENOMEM; 1121 } 1122 1123 fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops, 1124 fs); 1125 fc->release = fuse_free_conn; 1126 fc->delete_stale = true; 1127 1128 fsc->s_fs_info = fc; 1129 sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); 1130 fuse_conn_put(fc); 1131 if (IS_ERR(sb)) 1132 return PTR_ERR(sb); 1133 1134 if (!sb->s_root) { 1135 err = virtio_fs_fill_super(sb); 1136 if (err) { 1137 deactivate_locked_super(sb); 1138 return err; 1139 } 1140 1141 sb->s_flags |= SB_ACTIVE; 1142 } 1143 1144 WARN_ON(fsc->root); 1145 fsc->root = dget(sb->s_root); 1146 return 0; 1147 } 1148 1149 static const struct fs_context_operations virtio_fs_context_ops = { 1150 .get_tree = virtio_fs_get_tree, 1151 }; 1152 1153 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1154 { 1155 fsc->ops = &virtio_fs_context_ops; 1156 return 0; 1157 } 1158 1159 static struct file_system_type virtio_fs_type = { 1160 .owner = THIS_MODULE, 1161 .name = "virtiofs", 1162 .init_fs_context = virtio_fs_init_fs_context, 1163 .kill_sb = virtio_kill_sb, 1164 }; 1165 1166 static int __init virtio_fs_init(void) 1167 { 1168 int ret; 1169 1170 ret = register_virtio_driver(&virtio_fs_driver); 1171 if (ret < 0) 1172 return ret; 1173 1174 ret = register_filesystem(&virtio_fs_type); 1175 if (ret < 0) { 1176 unregister_virtio_driver(&virtio_fs_driver); 1177 return ret; 1178 } 1179 1180 return 0; 1181 } 1182 module_init(virtio_fs_init); 1183 1184 static void __exit virtio_fs_exit(void) 1185 { 1186 unregister_filesystem(&virtio_fs_type); 1187 unregister_virtio_driver(&virtio_fs_driver); 1188 } 1189 module_exit(virtio_fs_exit); 1190 1191 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1192 MODULE_DESCRIPTION("Virtio Filesystem"); 1193 MODULE_LICENSE("GPL"); 1194 MODULE_ALIAS_FS(KBUILD_MODNAME); 1195 MODULE_DEVICE_TABLE(virtio, id_table); 1196