1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/module.h> 9 #include <linux/virtio.h> 10 #include <linux/virtio_fs.h> 11 #include <linux/delay.h> 12 #include <linux/fs_context.h> 13 #include <linux/highmem.h> 14 #include "fuse_i.h" 15 16 /* List of virtio-fs device instances and a lock for the list. Also provides 17 * mutual exclusion in device removal and mounting path 18 */ 19 static DEFINE_MUTEX(virtio_fs_mutex); 20 static LIST_HEAD(virtio_fs_instances); 21 22 enum { 23 VQ_HIPRIO, 24 VQ_REQUEST 25 }; 26 27 /* Per-virtqueue state */ 28 struct virtio_fs_vq { 29 spinlock_t lock; 30 struct virtqueue *vq; /* protected by ->lock */ 31 struct work_struct done_work; 32 struct list_head queued_reqs; 33 struct list_head end_reqs; /* End these requests */ 34 struct delayed_work dispatch_work; 35 struct fuse_dev *fud; 36 bool connected; 37 long in_flight; 38 char name[24]; 39 } ____cacheline_aligned_in_smp; 40 41 /* A virtio-fs device instance */ 42 struct virtio_fs { 43 struct kref refcount; 44 struct list_head list; /* on virtio_fs_instances */ 45 char *tag; 46 struct virtio_fs_vq *vqs; 47 unsigned int nvqs; /* number of virtqueues */ 48 unsigned int num_request_queues; /* number of request queues */ 49 }; 50 51 struct virtio_fs_forget { 52 struct fuse_in_header ih; 53 struct fuse_forget_in arg; 54 /* This request can be temporarily queued on virt queue */ 55 struct list_head list; 56 }; 57 58 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 59 struct fuse_req *req, bool in_flight); 60 61 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 62 { 63 struct virtio_fs *fs = vq->vdev->priv; 64 65 return &fs->vqs[vq->index]; 66 } 67 68 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) 69 { 70 return &vq_to_fsvq(vq)->fud->pq; 71 } 72 73 /* Should be called with fsvq->lock held. */ 74 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 75 { 76 fsvq->in_flight++; 77 } 78 79 /* Should be called with fsvq->lock held. */ 80 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 81 { 82 WARN_ON(fsvq->in_flight <= 0); 83 fsvq->in_flight--; 84 } 85 86 static void release_virtio_fs_obj(struct kref *ref) 87 { 88 struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); 89 90 kfree(vfs->vqs); 91 kfree(vfs); 92 } 93 94 /* Make sure virtiofs_mutex is held */ 95 static void virtio_fs_put(struct virtio_fs *fs) 96 { 97 kref_put(&fs->refcount, release_virtio_fs_obj); 98 } 99 100 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 101 { 102 struct virtio_fs *vfs = fiq->priv; 103 104 mutex_lock(&virtio_fs_mutex); 105 virtio_fs_put(vfs); 106 mutex_unlock(&virtio_fs_mutex); 107 } 108 109 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 110 { 111 WARN_ON(fsvq->in_flight < 0); 112 113 /* Wait for in flight requests to finish.*/ 114 while (1) { 115 spin_lock(&fsvq->lock); 116 if (!fsvq->in_flight) { 117 spin_unlock(&fsvq->lock); 118 break; 119 } 120 spin_unlock(&fsvq->lock); 121 /* TODO use completion instead of timeout */ 122 usleep_range(1000, 2000); 123 } 124 125 flush_work(&fsvq->done_work); 126 flush_delayed_work(&fsvq->dispatch_work); 127 } 128 129 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 130 { 131 struct virtio_fs_vq *fsvq; 132 int i; 133 134 for (i = 0; i < fs->nvqs; i++) { 135 fsvq = &fs->vqs[i]; 136 virtio_fs_drain_queue(fsvq); 137 } 138 } 139 140 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 141 { 142 struct virtio_fs_vq *fsvq; 143 int i; 144 145 for (i = 0; i < fs->nvqs; i++) { 146 fsvq = &fs->vqs[i]; 147 spin_lock(&fsvq->lock); 148 fsvq->connected = true; 149 spin_unlock(&fsvq->lock); 150 } 151 } 152 153 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 154 static int virtio_fs_add_instance(struct virtio_fs *fs) 155 { 156 struct virtio_fs *fs2; 157 bool duplicate = false; 158 159 mutex_lock(&virtio_fs_mutex); 160 161 list_for_each_entry(fs2, &virtio_fs_instances, list) { 162 if (strcmp(fs->tag, fs2->tag) == 0) 163 duplicate = true; 164 } 165 166 if (!duplicate) 167 list_add_tail(&fs->list, &virtio_fs_instances); 168 169 mutex_unlock(&virtio_fs_mutex); 170 171 if (duplicate) 172 return -EEXIST; 173 return 0; 174 } 175 176 /* Return the virtio_fs with a given tag, or NULL */ 177 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 178 { 179 struct virtio_fs *fs; 180 181 mutex_lock(&virtio_fs_mutex); 182 183 list_for_each_entry(fs, &virtio_fs_instances, list) { 184 if (strcmp(fs->tag, tag) == 0) { 185 kref_get(&fs->refcount); 186 goto found; 187 } 188 } 189 190 fs = NULL; /* not found */ 191 192 found: 193 mutex_unlock(&virtio_fs_mutex); 194 195 return fs; 196 } 197 198 static void virtio_fs_free_devs(struct virtio_fs *fs) 199 { 200 unsigned int i; 201 202 for (i = 0; i < fs->nvqs; i++) { 203 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 204 205 if (!fsvq->fud) 206 continue; 207 208 fuse_dev_free(fsvq->fud); 209 fsvq->fud = NULL; 210 } 211 } 212 213 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 214 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 215 { 216 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 217 char *end; 218 size_t len; 219 220 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 221 &tag_buf, sizeof(tag_buf)); 222 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 223 if (end == tag_buf) 224 return -EINVAL; /* empty tag */ 225 if (!end) 226 end = &tag_buf[sizeof(tag_buf)]; 227 228 len = end - tag_buf; 229 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 230 if (!fs->tag) 231 return -ENOMEM; 232 memcpy(fs->tag, tag_buf, len); 233 fs->tag[len] = '\0'; 234 return 0; 235 } 236 237 /* Work function for hiprio completion */ 238 static void virtio_fs_hiprio_done_work(struct work_struct *work) 239 { 240 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 241 done_work); 242 struct virtqueue *vq = fsvq->vq; 243 244 /* Free completed FUSE_FORGET requests */ 245 spin_lock(&fsvq->lock); 246 do { 247 unsigned int len; 248 void *req; 249 250 virtqueue_disable_cb(vq); 251 252 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 253 kfree(req); 254 dec_in_flight_req(fsvq); 255 } 256 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 257 spin_unlock(&fsvq->lock); 258 } 259 260 static void virtio_fs_request_dispatch_work(struct work_struct *work) 261 { 262 struct fuse_req *req; 263 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 264 dispatch_work.work); 265 struct fuse_conn *fc = fsvq->fud->fc; 266 int ret; 267 268 pr_debug("virtio-fs: worker %s called.\n", __func__); 269 while (1) { 270 spin_lock(&fsvq->lock); 271 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 272 list); 273 if (!req) { 274 spin_unlock(&fsvq->lock); 275 break; 276 } 277 278 list_del_init(&req->list); 279 spin_unlock(&fsvq->lock); 280 fuse_request_end(fc, req); 281 } 282 283 /* Dispatch pending requests */ 284 while (1) { 285 spin_lock(&fsvq->lock); 286 req = list_first_entry_or_null(&fsvq->queued_reqs, 287 struct fuse_req, list); 288 if (!req) { 289 spin_unlock(&fsvq->lock); 290 return; 291 } 292 list_del_init(&req->list); 293 spin_unlock(&fsvq->lock); 294 295 ret = virtio_fs_enqueue_req(fsvq, req, true); 296 if (ret < 0) { 297 if (ret == -ENOMEM || ret == -ENOSPC) { 298 spin_lock(&fsvq->lock); 299 list_add_tail(&req->list, &fsvq->queued_reqs); 300 schedule_delayed_work(&fsvq->dispatch_work, 301 msecs_to_jiffies(1)); 302 spin_unlock(&fsvq->lock); 303 return; 304 } 305 req->out.h.error = ret; 306 spin_lock(&fsvq->lock); 307 dec_in_flight_req(fsvq); 308 spin_unlock(&fsvq->lock); 309 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 310 ret); 311 fuse_request_end(fc, req); 312 } 313 } 314 } 315 316 /* 317 * Returns 1 if queue is full and sender should wait a bit before sending 318 * next request, 0 otherwise. 319 */ 320 static int send_forget_request(struct virtio_fs_vq *fsvq, 321 struct virtio_fs_forget *forget, 322 bool in_flight) 323 { 324 struct scatterlist sg; 325 struct virtqueue *vq; 326 int ret = 0; 327 bool notify; 328 329 spin_lock(&fsvq->lock); 330 if (!fsvq->connected) { 331 if (in_flight) 332 dec_in_flight_req(fsvq); 333 kfree(forget); 334 goto out; 335 } 336 337 sg_init_one(&sg, forget, sizeof(*forget)); 338 vq = fsvq->vq; 339 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 340 341 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 342 if (ret < 0) { 343 if (ret == -ENOMEM || ret == -ENOSPC) { 344 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 345 ret); 346 list_add_tail(&forget->list, &fsvq->queued_reqs); 347 schedule_delayed_work(&fsvq->dispatch_work, 348 msecs_to_jiffies(1)); 349 if (!in_flight) 350 inc_in_flight_req(fsvq); 351 /* Queue is full */ 352 ret = 1; 353 } else { 354 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 355 ret); 356 kfree(forget); 357 if (in_flight) 358 dec_in_flight_req(fsvq); 359 } 360 goto out; 361 } 362 363 if (!in_flight) 364 inc_in_flight_req(fsvq); 365 notify = virtqueue_kick_prepare(vq); 366 spin_unlock(&fsvq->lock); 367 368 if (notify) 369 virtqueue_notify(vq); 370 return ret; 371 out: 372 spin_unlock(&fsvq->lock); 373 return ret; 374 } 375 376 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 377 { 378 struct virtio_fs_forget *forget; 379 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 380 dispatch_work.work); 381 pr_debug("virtio-fs: worker %s called.\n", __func__); 382 while (1) { 383 spin_lock(&fsvq->lock); 384 forget = list_first_entry_or_null(&fsvq->queued_reqs, 385 struct virtio_fs_forget, list); 386 if (!forget) { 387 spin_unlock(&fsvq->lock); 388 return; 389 } 390 391 list_del(&forget->list); 392 spin_unlock(&fsvq->lock); 393 if (send_forget_request(fsvq, forget, true)) 394 return; 395 } 396 } 397 398 /* Allocate and copy args into req->argbuf */ 399 static int copy_args_to_argbuf(struct fuse_req *req) 400 { 401 struct fuse_args *args = req->args; 402 unsigned int offset = 0; 403 unsigned int num_in; 404 unsigned int num_out; 405 unsigned int len; 406 unsigned int i; 407 408 num_in = args->in_numargs - args->in_pages; 409 num_out = args->out_numargs - args->out_pages; 410 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 411 fuse_len_args(num_out, args->out_args); 412 413 req->argbuf = kmalloc(len, GFP_ATOMIC); 414 if (!req->argbuf) 415 return -ENOMEM; 416 417 for (i = 0; i < num_in; i++) { 418 memcpy(req->argbuf + offset, 419 args->in_args[i].value, 420 args->in_args[i].size); 421 offset += args->in_args[i].size; 422 } 423 424 return 0; 425 } 426 427 /* Copy args out of and free req->argbuf */ 428 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 429 { 430 unsigned int remaining; 431 unsigned int offset; 432 unsigned int num_in; 433 unsigned int num_out; 434 unsigned int i; 435 436 remaining = req->out.h.len - sizeof(req->out.h); 437 num_in = args->in_numargs - args->in_pages; 438 num_out = args->out_numargs - args->out_pages; 439 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 440 441 for (i = 0; i < num_out; i++) { 442 unsigned int argsize = args->out_args[i].size; 443 444 if (args->out_argvar && 445 i == args->out_numargs - 1 && 446 argsize > remaining) { 447 argsize = remaining; 448 } 449 450 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 451 offset += argsize; 452 453 if (i != args->out_numargs - 1) 454 remaining -= argsize; 455 } 456 457 /* Store the actual size of the variable-length arg */ 458 if (args->out_argvar) 459 args->out_args[args->out_numargs - 1].size = remaining; 460 461 kfree(req->argbuf); 462 req->argbuf = NULL; 463 } 464 465 /* Work function for request completion */ 466 static void virtio_fs_requests_done_work(struct work_struct *work) 467 { 468 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 469 done_work); 470 struct fuse_pqueue *fpq = &fsvq->fud->pq; 471 struct fuse_conn *fc = fsvq->fud->fc; 472 struct virtqueue *vq = fsvq->vq; 473 struct fuse_req *req; 474 struct fuse_args_pages *ap; 475 struct fuse_req *next; 476 struct fuse_args *args; 477 unsigned int len, i, thislen; 478 struct page *page; 479 LIST_HEAD(reqs); 480 481 /* Collect completed requests off the virtqueue */ 482 spin_lock(&fsvq->lock); 483 do { 484 virtqueue_disable_cb(vq); 485 486 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 487 spin_lock(&fpq->lock); 488 list_move_tail(&req->list, &reqs); 489 spin_unlock(&fpq->lock); 490 } 491 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 492 spin_unlock(&fsvq->lock); 493 494 /* End requests */ 495 list_for_each_entry_safe(req, next, &reqs, list) { 496 /* 497 * TODO verify that server properly follows FUSE protocol 498 * (oh.uniq, oh.len) 499 */ 500 args = req->args; 501 copy_args_from_argbuf(args, req); 502 503 if (args->out_pages && args->page_zeroing) { 504 len = args->out_args[args->out_numargs - 1].size; 505 ap = container_of(args, typeof(*ap), args); 506 for (i = 0; i < ap->num_pages; i++) { 507 thislen = ap->descs[i].length; 508 if (len < thislen) { 509 WARN_ON(ap->descs[i].offset); 510 page = ap->pages[i]; 511 zero_user_segment(page, len, thislen); 512 len = 0; 513 } else { 514 len -= thislen; 515 } 516 } 517 } 518 519 spin_lock(&fpq->lock); 520 clear_bit(FR_SENT, &req->flags); 521 list_del_init(&req->list); 522 spin_unlock(&fpq->lock); 523 524 fuse_request_end(fc, req); 525 spin_lock(&fsvq->lock); 526 dec_in_flight_req(fsvq); 527 spin_unlock(&fsvq->lock); 528 } 529 } 530 531 /* Virtqueue interrupt handler */ 532 static void virtio_fs_vq_done(struct virtqueue *vq) 533 { 534 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 535 536 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 537 538 schedule_work(&fsvq->done_work); 539 } 540 541 /* Initialize virtqueues */ 542 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 543 struct virtio_fs *fs) 544 { 545 struct virtqueue **vqs; 546 vq_callback_t **callbacks; 547 const char **names; 548 unsigned int i; 549 int ret = 0; 550 551 virtio_cread(vdev, struct virtio_fs_config, num_request_queues, 552 &fs->num_request_queues); 553 if (fs->num_request_queues == 0) 554 return -EINVAL; 555 556 fs->nvqs = 1 + fs->num_request_queues; 557 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 558 if (!fs->vqs) 559 return -ENOMEM; 560 561 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 562 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 563 GFP_KERNEL); 564 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 565 if (!vqs || !callbacks || !names) { 566 ret = -ENOMEM; 567 goto out; 568 } 569 570 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 571 snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name), 572 "hiprio"); 573 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 574 INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); 575 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); 576 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs); 577 INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, 578 virtio_fs_hiprio_dispatch_work); 579 spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); 580 581 /* Initialize the requests virtqueues */ 582 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 583 spin_lock_init(&fs->vqs[i].lock); 584 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); 585 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, 586 virtio_fs_request_dispatch_work); 587 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); 588 INIT_LIST_HEAD(&fs->vqs[i].end_reqs); 589 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), 590 "requests.%u", i - VQ_REQUEST); 591 callbacks[i] = virtio_fs_vq_done; 592 names[i] = fs->vqs[i].name; 593 } 594 595 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 596 if (ret < 0) 597 goto out; 598 599 for (i = 0; i < fs->nvqs; i++) 600 fs->vqs[i].vq = vqs[i]; 601 602 virtio_fs_start_all_queues(fs); 603 out: 604 kfree(names); 605 kfree(callbacks); 606 kfree(vqs); 607 if (ret) 608 kfree(fs->vqs); 609 return ret; 610 } 611 612 /* Free virtqueues (device must already be reset) */ 613 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, 614 struct virtio_fs *fs) 615 { 616 vdev->config->del_vqs(vdev); 617 } 618 619 static int virtio_fs_probe(struct virtio_device *vdev) 620 { 621 struct virtio_fs *fs; 622 int ret; 623 624 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 625 if (!fs) 626 return -ENOMEM; 627 kref_init(&fs->refcount); 628 vdev->priv = fs; 629 630 ret = virtio_fs_read_tag(vdev, fs); 631 if (ret < 0) 632 goto out; 633 634 ret = virtio_fs_setup_vqs(vdev, fs); 635 if (ret < 0) 636 goto out; 637 638 /* TODO vq affinity */ 639 640 /* Bring the device online in case the filesystem is mounted and 641 * requests need to be sent before we return. 642 */ 643 virtio_device_ready(vdev); 644 645 ret = virtio_fs_add_instance(fs); 646 if (ret < 0) 647 goto out_vqs; 648 649 return 0; 650 651 out_vqs: 652 vdev->config->reset(vdev); 653 virtio_fs_cleanup_vqs(vdev, fs); 654 655 out: 656 vdev->priv = NULL; 657 kfree(fs); 658 return ret; 659 } 660 661 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 662 { 663 struct virtio_fs_vq *fsvq; 664 int i; 665 666 for (i = 0; i < fs->nvqs; i++) { 667 fsvq = &fs->vqs[i]; 668 spin_lock(&fsvq->lock); 669 fsvq->connected = false; 670 spin_unlock(&fsvq->lock); 671 } 672 } 673 674 static void virtio_fs_remove(struct virtio_device *vdev) 675 { 676 struct virtio_fs *fs = vdev->priv; 677 678 mutex_lock(&virtio_fs_mutex); 679 /* This device is going away. No one should get new reference */ 680 list_del_init(&fs->list); 681 virtio_fs_stop_all_queues(fs); 682 virtio_fs_drain_all_queues(fs); 683 vdev->config->reset(vdev); 684 virtio_fs_cleanup_vqs(vdev, fs); 685 686 vdev->priv = NULL; 687 /* Put device reference on virtio_fs object */ 688 virtio_fs_put(fs); 689 mutex_unlock(&virtio_fs_mutex); 690 } 691 692 #ifdef CONFIG_PM_SLEEP 693 static int virtio_fs_freeze(struct virtio_device *vdev) 694 { 695 /* TODO need to save state here */ 696 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 697 return -EOPNOTSUPP; 698 } 699 700 static int virtio_fs_restore(struct virtio_device *vdev) 701 { 702 /* TODO need to restore state here */ 703 return 0; 704 } 705 #endif /* CONFIG_PM_SLEEP */ 706 707 static const struct virtio_device_id id_table[] = { 708 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 709 {}, 710 }; 711 712 static const unsigned int feature_table[] = {}; 713 714 static struct virtio_driver virtio_fs_driver = { 715 .driver.name = KBUILD_MODNAME, 716 .driver.owner = THIS_MODULE, 717 .id_table = id_table, 718 .feature_table = feature_table, 719 .feature_table_size = ARRAY_SIZE(feature_table), 720 .probe = virtio_fs_probe, 721 .remove = virtio_fs_remove, 722 #ifdef CONFIG_PM_SLEEP 723 .freeze = virtio_fs_freeze, 724 .restore = virtio_fs_restore, 725 #endif 726 }; 727 728 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 729 __releases(fiq->lock) 730 { 731 struct fuse_forget_link *link; 732 struct virtio_fs_forget *forget; 733 struct virtio_fs *fs; 734 struct virtio_fs_vq *fsvq; 735 u64 unique; 736 737 link = fuse_dequeue_forget(fiq, 1, NULL); 738 unique = fuse_get_unique(fiq); 739 740 fs = fiq->priv; 741 fsvq = &fs->vqs[VQ_HIPRIO]; 742 spin_unlock(&fiq->lock); 743 744 /* Allocate a buffer for the request */ 745 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 746 747 forget->ih = (struct fuse_in_header){ 748 .opcode = FUSE_FORGET, 749 .nodeid = link->forget_one.nodeid, 750 .unique = unique, 751 .len = sizeof(*forget), 752 }; 753 forget->arg = (struct fuse_forget_in){ 754 .nlookup = link->forget_one.nlookup, 755 }; 756 757 send_forget_request(fsvq, forget, false); 758 kfree(link); 759 } 760 761 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 762 __releases(fiq->lock) 763 { 764 /* 765 * TODO interrupts. 766 * 767 * Normal fs operations on a local filesystems aren't interruptible. 768 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 769 * with shared lock between host and guest. 770 */ 771 spin_unlock(&fiq->lock); 772 } 773 774 /* Return the number of scatter-gather list elements required */ 775 static unsigned int sg_count_fuse_req(struct fuse_req *req) 776 { 777 struct fuse_args *args = req->args; 778 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 779 unsigned int total_sgs = 1 /* fuse_in_header */; 780 781 if (args->in_numargs - args->in_pages) 782 total_sgs += 1; 783 784 if (args->in_pages) 785 total_sgs += ap->num_pages; 786 787 if (!test_bit(FR_ISREPLY, &req->flags)) 788 return total_sgs; 789 790 total_sgs += 1 /* fuse_out_header */; 791 792 if (args->out_numargs - args->out_pages) 793 total_sgs += 1; 794 795 if (args->out_pages) 796 total_sgs += ap->num_pages; 797 798 return total_sgs; 799 } 800 801 /* Add pages to scatter-gather list and return number of elements used */ 802 static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 803 struct page **pages, 804 struct fuse_page_desc *page_descs, 805 unsigned int num_pages, 806 unsigned int total_len) 807 { 808 unsigned int i; 809 unsigned int this_len; 810 811 for (i = 0; i < num_pages && total_len; i++) { 812 sg_init_table(&sg[i], 1); 813 this_len = min(page_descs[i].length, total_len); 814 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 815 total_len -= this_len; 816 } 817 818 return i; 819 } 820 821 /* Add args to scatter-gather list and return number of elements used */ 822 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 823 struct fuse_req *req, 824 struct fuse_arg *args, 825 unsigned int numargs, 826 bool argpages, 827 void *argbuf, 828 unsigned int *len_used) 829 { 830 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 831 unsigned int total_sgs = 0; 832 unsigned int len; 833 834 len = fuse_len_args(numargs - argpages, args); 835 if (len) 836 sg_init_one(&sg[total_sgs++], argbuf, len); 837 838 if (argpages) 839 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 840 ap->pages, ap->descs, 841 ap->num_pages, 842 args[numargs - 1].size); 843 844 if (len_used) 845 *len_used = len; 846 847 return total_sgs; 848 } 849 850 /* Add a request to a virtqueue and kick the device */ 851 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 852 struct fuse_req *req, bool in_flight) 853 { 854 /* requests need at least 4 elements */ 855 struct scatterlist *stack_sgs[6]; 856 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 857 struct scatterlist **sgs = stack_sgs; 858 struct scatterlist *sg = stack_sg; 859 struct virtqueue *vq; 860 struct fuse_args *args = req->args; 861 unsigned int argbuf_used = 0; 862 unsigned int out_sgs = 0; 863 unsigned int in_sgs = 0; 864 unsigned int total_sgs; 865 unsigned int i; 866 int ret; 867 bool notify; 868 struct fuse_pqueue *fpq; 869 870 /* Does the sglist fit on the stack? */ 871 total_sgs = sg_count_fuse_req(req); 872 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 873 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 874 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 875 if (!sgs || !sg) { 876 ret = -ENOMEM; 877 goto out; 878 } 879 } 880 881 /* Use a bounce buffer since stack args cannot be mapped */ 882 ret = copy_args_to_argbuf(req); 883 if (ret < 0) 884 goto out; 885 886 /* Request elements */ 887 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 888 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 889 (struct fuse_arg *)args->in_args, 890 args->in_numargs, args->in_pages, 891 req->argbuf, &argbuf_used); 892 893 /* Reply elements */ 894 if (test_bit(FR_ISREPLY, &req->flags)) { 895 sg_init_one(&sg[out_sgs + in_sgs++], 896 &req->out.h, sizeof(req->out.h)); 897 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 898 args->out_args, args->out_numargs, 899 args->out_pages, 900 req->argbuf + argbuf_used, NULL); 901 } 902 903 WARN_ON(out_sgs + in_sgs != total_sgs); 904 905 for (i = 0; i < total_sgs; i++) 906 sgs[i] = &sg[i]; 907 908 spin_lock(&fsvq->lock); 909 910 if (!fsvq->connected) { 911 spin_unlock(&fsvq->lock); 912 ret = -ENOTCONN; 913 goto out; 914 } 915 916 vq = fsvq->vq; 917 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 918 if (ret < 0) { 919 spin_unlock(&fsvq->lock); 920 goto out; 921 } 922 923 /* Request successfully sent. */ 924 fpq = &fsvq->fud->pq; 925 spin_lock(&fpq->lock); 926 list_add_tail(&req->list, fpq->processing); 927 spin_unlock(&fpq->lock); 928 set_bit(FR_SENT, &req->flags); 929 /* matches barrier in request_wait_answer() */ 930 smp_mb__after_atomic(); 931 932 if (!in_flight) 933 inc_in_flight_req(fsvq); 934 notify = virtqueue_kick_prepare(vq); 935 936 spin_unlock(&fsvq->lock); 937 938 if (notify) 939 virtqueue_notify(vq); 940 941 out: 942 if (ret < 0 && req->argbuf) { 943 kfree(req->argbuf); 944 req->argbuf = NULL; 945 } 946 if (sgs != stack_sgs) { 947 kfree(sgs); 948 kfree(sg); 949 } 950 951 return ret; 952 } 953 954 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 955 __releases(fiq->lock) 956 { 957 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 958 struct virtio_fs *fs; 959 struct fuse_req *req; 960 struct virtio_fs_vq *fsvq; 961 int ret; 962 963 WARN_ON(list_empty(&fiq->pending)); 964 req = list_last_entry(&fiq->pending, struct fuse_req, list); 965 clear_bit(FR_PENDING, &req->flags); 966 list_del_init(&req->list); 967 WARN_ON(!list_empty(&fiq->pending)); 968 spin_unlock(&fiq->lock); 969 970 fs = fiq->priv; 971 972 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 973 __func__, req->in.h.opcode, req->in.h.unique, 974 req->in.h.nodeid, req->in.h.len, 975 fuse_len_args(req->args->out_numargs, req->args->out_args)); 976 977 fsvq = &fs->vqs[queue_id]; 978 ret = virtio_fs_enqueue_req(fsvq, req, false); 979 if (ret < 0) { 980 if (ret == -ENOMEM || ret == -ENOSPC) { 981 /* 982 * Virtqueue full. Retry submission from worker 983 * context as we might be holding fc->bg_lock. 984 */ 985 spin_lock(&fsvq->lock); 986 list_add_tail(&req->list, &fsvq->queued_reqs); 987 inc_in_flight_req(fsvq); 988 schedule_delayed_work(&fsvq->dispatch_work, 989 msecs_to_jiffies(1)); 990 spin_unlock(&fsvq->lock); 991 return; 992 } 993 req->out.h.error = ret; 994 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 995 996 /* Can't end request in submission context. Use a worker */ 997 spin_lock(&fsvq->lock); 998 list_add_tail(&req->list, &fsvq->end_reqs); 999 schedule_delayed_work(&fsvq->dispatch_work, 0); 1000 spin_unlock(&fsvq->lock); 1001 return; 1002 } 1003 } 1004 1005 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1006 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1007 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1008 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1009 .release = virtio_fs_fiq_release, 1010 }; 1011 1012 static int virtio_fs_fill_super(struct super_block *sb) 1013 { 1014 struct fuse_conn *fc = get_fuse_conn_super(sb); 1015 struct virtio_fs *fs = fc->iq.priv; 1016 unsigned int i; 1017 int err; 1018 struct fuse_fs_context ctx = { 1019 .rootmode = S_IFDIR, 1020 .default_permissions = 1, 1021 .allow_other = 1, 1022 .max_read = UINT_MAX, 1023 .blksize = 512, 1024 .destroy = true, 1025 .no_control = true, 1026 .no_force_umount = true, 1027 .no_mount_options = true, 1028 }; 1029 1030 mutex_lock(&virtio_fs_mutex); 1031 1032 /* After holding mutex, make sure virtiofs device is still there. 1033 * Though we are holding a reference to it, drive ->remove might 1034 * still have cleaned up virtual queues. In that case bail out. 1035 */ 1036 err = -EINVAL; 1037 if (list_empty(&fs->list)) { 1038 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1039 goto err; 1040 } 1041 1042 err = -ENOMEM; 1043 /* Allocate fuse_dev for hiprio and notification queues */ 1044 for (i = 0; i < VQ_REQUEST; i++) { 1045 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1046 1047 fsvq->fud = fuse_dev_alloc(); 1048 if (!fsvq->fud) 1049 goto err_free_fuse_devs; 1050 } 1051 1052 ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud; 1053 err = fuse_fill_super_common(sb, &ctx); 1054 if (err < 0) 1055 goto err_free_fuse_devs; 1056 1057 fc = fs->vqs[VQ_REQUEST].fud->fc; 1058 1059 for (i = 0; i < fs->nvqs; i++) { 1060 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1061 1062 if (i == VQ_REQUEST) 1063 continue; /* already initialized */ 1064 fuse_dev_install(fsvq->fud, fc); 1065 } 1066 1067 /* Previous unmount will stop all queues. Start these again */ 1068 virtio_fs_start_all_queues(fs); 1069 fuse_send_init(fc); 1070 mutex_unlock(&virtio_fs_mutex); 1071 return 0; 1072 1073 err_free_fuse_devs: 1074 virtio_fs_free_devs(fs); 1075 err: 1076 mutex_unlock(&virtio_fs_mutex); 1077 return err; 1078 } 1079 1080 static void virtio_kill_sb(struct super_block *sb) 1081 { 1082 struct fuse_conn *fc = get_fuse_conn_super(sb); 1083 struct virtio_fs *vfs; 1084 struct virtio_fs_vq *fsvq; 1085 1086 /* If mount failed, we can still be called without any fc */ 1087 if (!fc) 1088 return fuse_kill_sb_anon(sb); 1089 1090 vfs = fc->iq.priv; 1091 fsvq = &vfs->vqs[VQ_HIPRIO]; 1092 1093 /* Stop forget queue. Soon destroy will be sent */ 1094 spin_lock(&fsvq->lock); 1095 fsvq->connected = false; 1096 spin_unlock(&fsvq->lock); 1097 virtio_fs_drain_all_queues(vfs); 1098 1099 fuse_kill_sb_anon(sb); 1100 1101 /* fuse_kill_sb_anon() must have sent destroy. Stop all queues 1102 * and drain one more time and free fuse devices. Freeing fuse 1103 * devices will drop their reference on fuse_conn and that in 1104 * turn will drop its reference on virtio_fs object. 1105 */ 1106 virtio_fs_stop_all_queues(vfs); 1107 virtio_fs_drain_all_queues(vfs); 1108 virtio_fs_free_devs(vfs); 1109 } 1110 1111 static int virtio_fs_test_super(struct super_block *sb, 1112 struct fs_context *fsc) 1113 { 1114 struct fuse_conn *fc = fsc->s_fs_info; 1115 1116 return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv; 1117 } 1118 1119 static int virtio_fs_set_super(struct super_block *sb, 1120 struct fs_context *fsc) 1121 { 1122 int err; 1123 1124 err = get_anon_bdev(&sb->s_dev); 1125 if (!err) 1126 fuse_conn_get(fsc->s_fs_info); 1127 1128 return err; 1129 } 1130 1131 static int virtio_fs_get_tree(struct fs_context *fsc) 1132 { 1133 struct virtio_fs *fs; 1134 struct super_block *sb; 1135 struct fuse_conn *fc; 1136 int err; 1137 1138 /* This gets a reference on virtio_fs object. This ptr gets installed 1139 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1140 * to drop the reference to this object. 1141 */ 1142 fs = virtio_fs_find_instance(fsc->source); 1143 if (!fs) { 1144 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1145 return -EINVAL; 1146 } 1147 1148 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1149 if (!fc) { 1150 mutex_lock(&virtio_fs_mutex); 1151 virtio_fs_put(fs); 1152 mutex_unlock(&virtio_fs_mutex); 1153 return -ENOMEM; 1154 } 1155 1156 fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops, 1157 fs); 1158 fc->release = fuse_free_conn; 1159 fc->delete_stale = true; 1160 1161 fsc->s_fs_info = fc; 1162 sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); 1163 fuse_conn_put(fc); 1164 if (IS_ERR(sb)) 1165 return PTR_ERR(sb); 1166 1167 if (!sb->s_root) { 1168 err = virtio_fs_fill_super(sb); 1169 if (err) { 1170 deactivate_locked_super(sb); 1171 return err; 1172 } 1173 1174 sb->s_flags |= SB_ACTIVE; 1175 } 1176 1177 WARN_ON(fsc->root); 1178 fsc->root = dget(sb->s_root); 1179 return 0; 1180 } 1181 1182 static const struct fs_context_operations virtio_fs_context_ops = { 1183 .get_tree = virtio_fs_get_tree, 1184 }; 1185 1186 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1187 { 1188 fsc->ops = &virtio_fs_context_ops; 1189 return 0; 1190 } 1191 1192 static struct file_system_type virtio_fs_type = { 1193 .owner = THIS_MODULE, 1194 .name = "virtiofs", 1195 .init_fs_context = virtio_fs_init_fs_context, 1196 .kill_sb = virtio_kill_sb, 1197 }; 1198 1199 static int __init virtio_fs_init(void) 1200 { 1201 int ret; 1202 1203 ret = register_virtio_driver(&virtio_fs_driver); 1204 if (ret < 0) 1205 return ret; 1206 1207 ret = register_filesystem(&virtio_fs_type); 1208 if (ret < 0) { 1209 unregister_virtio_driver(&virtio_fs_driver); 1210 return ret; 1211 } 1212 1213 return 0; 1214 } 1215 module_init(virtio_fs_init); 1216 1217 static void __exit virtio_fs_exit(void) 1218 { 1219 unregister_filesystem(&virtio_fs_type); 1220 unregister_virtio_driver(&virtio_fs_driver); 1221 } 1222 module_exit(virtio_fs_exit); 1223 1224 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1225 MODULE_DESCRIPTION("Virtio Filesystem"); 1226 MODULE_LICENSE("GPL"); 1227 MODULE_ALIAS_FS(KBUILD_MODNAME); 1228 MODULE_DEVICE_TABLE(virtio, id_table); 1229