1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/module.h> 9 #include <linux/virtio.h> 10 #include <linux/virtio_fs.h> 11 #include <linux/delay.h> 12 #include <linux/fs_context.h> 13 #include <linux/highmem.h> 14 #include "fuse_i.h" 15 16 /* List of virtio-fs device instances and a lock for the list. Also provides 17 * mutual exclusion in device removal and mounting path 18 */ 19 static DEFINE_MUTEX(virtio_fs_mutex); 20 static LIST_HEAD(virtio_fs_instances); 21 22 enum { 23 VQ_HIPRIO, 24 VQ_REQUEST 25 }; 26 27 /* Per-virtqueue state */ 28 struct virtio_fs_vq { 29 spinlock_t lock; 30 struct virtqueue *vq; /* protected by ->lock */ 31 struct work_struct done_work; 32 struct list_head queued_reqs; 33 struct list_head end_reqs; /* End these requests */ 34 struct delayed_work dispatch_work; 35 struct fuse_dev *fud; 36 bool connected; 37 long in_flight; 38 char name[24]; 39 } ____cacheline_aligned_in_smp; 40 41 /* A virtio-fs device instance */ 42 struct virtio_fs { 43 struct kref refcount; 44 struct list_head list; /* on virtio_fs_instances */ 45 char *tag; 46 struct virtio_fs_vq *vqs; 47 unsigned int nvqs; /* number of virtqueues */ 48 unsigned int num_request_queues; /* number of request queues */ 49 }; 50 51 struct virtio_fs_forget { 52 struct fuse_in_header ih; 53 struct fuse_forget_in arg; 54 /* This request can be temporarily queued on virt queue */ 55 struct list_head list; 56 }; 57 58 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 59 struct fuse_req *req, bool in_flight); 60 61 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 62 { 63 struct virtio_fs *fs = vq->vdev->priv; 64 65 return &fs->vqs[vq->index]; 66 } 67 68 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) 69 { 70 return &vq_to_fsvq(vq)->fud->pq; 71 } 72 73 /* Should be called with fsvq->lock held. */ 74 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 75 { 76 fsvq->in_flight++; 77 } 78 79 /* Should be called with fsvq->lock held. */ 80 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 81 { 82 WARN_ON(fsvq->in_flight <= 0); 83 fsvq->in_flight--; 84 } 85 86 static void release_virtio_fs_obj(struct kref *ref) 87 { 88 struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); 89 90 kfree(vfs->vqs); 91 kfree(vfs); 92 } 93 94 /* Make sure virtiofs_mutex is held */ 95 static void virtio_fs_put(struct virtio_fs *fs) 96 { 97 kref_put(&fs->refcount, release_virtio_fs_obj); 98 } 99 100 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 101 { 102 struct virtio_fs *vfs = fiq->priv; 103 104 mutex_lock(&virtio_fs_mutex); 105 virtio_fs_put(vfs); 106 mutex_unlock(&virtio_fs_mutex); 107 } 108 109 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 110 { 111 WARN_ON(fsvq->in_flight < 0); 112 113 /* Wait for in flight requests to finish.*/ 114 while (1) { 115 spin_lock(&fsvq->lock); 116 if (!fsvq->in_flight) { 117 spin_unlock(&fsvq->lock); 118 break; 119 } 120 spin_unlock(&fsvq->lock); 121 /* TODO use completion instead of timeout */ 122 usleep_range(1000, 2000); 123 } 124 125 flush_work(&fsvq->done_work); 126 flush_delayed_work(&fsvq->dispatch_work); 127 } 128 129 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 130 { 131 struct virtio_fs_vq *fsvq; 132 int i; 133 134 for (i = 0; i < fs->nvqs; i++) { 135 fsvq = &fs->vqs[i]; 136 virtio_fs_drain_queue(fsvq); 137 } 138 } 139 140 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 141 { 142 struct virtio_fs_vq *fsvq; 143 int i; 144 145 for (i = 0; i < fs->nvqs; i++) { 146 fsvq = &fs->vqs[i]; 147 spin_lock(&fsvq->lock); 148 fsvq->connected = true; 149 spin_unlock(&fsvq->lock); 150 } 151 } 152 153 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 154 static int virtio_fs_add_instance(struct virtio_fs *fs) 155 { 156 struct virtio_fs *fs2; 157 bool duplicate = false; 158 159 mutex_lock(&virtio_fs_mutex); 160 161 list_for_each_entry(fs2, &virtio_fs_instances, list) { 162 if (strcmp(fs->tag, fs2->tag) == 0) 163 duplicate = true; 164 } 165 166 if (!duplicate) 167 list_add_tail(&fs->list, &virtio_fs_instances); 168 169 mutex_unlock(&virtio_fs_mutex); 170 171 if (duplicate) 172 return -EEXIST; 173 return 0; 174 } 175 176 /* Return the virtio_fs with a given tag, or NULL */ 177 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 178 { 179 struct virtio_fs *fs; 180 181 mutex_lock(&virtio_fs_mutex); 182 183 list_for_each_entry(fs, &virtio_fs_instances, list) { 184 if (strcmp(fs->tag, tag) == 0) { 185 kref_get(&fs->refcount); 186 goto found; 187 } 188 } 189 190 fs = NULL; /* not found */ 191 192 found: 193 mutex_unlock(&virtio_fs_mutex); 194 195 return fs; 196 } 197 198 static void virtio_fs_free_devs(struct virtio_fs *fs) 199 { 200 unsigned int i; 201 202 for (i = 0; i < fs->nvqs; i++) { 203 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 204 205 if (!fsvq->fud) 206 continue; 207 208 fuse_dev_free(fsvq->fud); 209 fsvq->fud = NULL; 210 } 211 } 212 213 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 214 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 215 { 216 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 217 char *end; 218 size_t len; 219 220 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 221 &tag_buf, sizeof(tag_buf)); 222 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 223 if (end == tag_buf) 224 return -EINVAL; /* empty tag */ 225 if (!end) 226 end = &tag_buf[sizeof(tag_buf)]; 227 228 len = end - tag_buf; 229 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 230 if (!fs->tag) 231 return -ENOMEM; 232 memcpy(fs->tag, tag_buf, len); 233 fs->tag[len] = '\0'; 234 return 0; 235 } 236 237 /* Work function for hiprio completion */ 238 static void virtio_fs_hiprio_done_work(struct work_struct *work) 239 { 240 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 241 done_work); 242 struct virtqueue *vq = fsvq->vq; 243 244 /* Free completed FUSE_FORGET requests */ 245 spin_lock(&fsvq->lock); 246 do { 247 unsigned int len; 248 void *req; 249 250 virtqueue_disable_cb(vq); 251 252 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 253 kfree(req); 254 dec_in_flight_req(fsvq); 255 } 256 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 257 spin_unlock(&fsvq->lock); 258 } 259 260 static void virtio_fs_request_dispatch_work(struct work_struct *work) 261 { 262 struct fuse_req *req; 263 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 264 dispatch_work.work); 265 struct fuse_conn *fc = fsvq->fud->fc; 266 int ret; 267 268 pr_debug("virtio-fs: worker %s called.\n", __func__); 269 while (1) { 270 spin_lock(&fsvq->lock); 271 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 272 list); 273 if (!req) { 274 spin_unlock(&fsvq->lock); 275 break; 276 } 277 278 list_del_init(&req->list); 279 spin_unlock(&fsvq->lock); 280 fuse_request_end(fc, req); 281 } 282 283 /* Dispatch pending requests */ 284 while (1) { 285 spin_lock(&fsvq->lock); 286 req = list_first_entry_or_null(&fsvq->queued_reqs, 287 struct fuse_req, list); 288 if (!req) { 289 spin_unlock(&fsvq->lock); 290 return; 291 } 292 list_del_init(&req->list); 293 spin_unlock(&fsvq->lock); 294 295 ret = virtio_fs_enqueue_req(fsvq, req, true); 296 if (ret < 0) { 297 if (ret == -ENOMEM || ret == -ENOSPC) { 298 spin_lock(&fsvq->lock); 299 list_add_tail(&req->list, &fsvq->queued_reqs); 300 schedule_delayed_work(&fsvq->dispatch_work, 301 msecs_to_jiffies(1)); 302 spin_unlock(&fsvq->lock); 303 return; 304 } 305 req->out.h.error = ret; 306 spin_lock(&fsvq->lock); 307 dec_in_flight_req(fsvq); 308 spin_unlock(&fsvq->lock); 309 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 310 ret); 311 fuse_request_end(fc, req); 312 } 313 } 314 } 315 316 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 317 { 318 struct virtio_fs_forget *forget; 319 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 320 dispatch_work.work); 321 struct virtqueue *vq = fsvq->vq; 322 struct scatterlist sg; 323 struct scatterlist *sgs[] = {&sg}; 324 bool notify; 325 int ret; 326 327 pr_debug("virtio-fs: worker %s called.\n", __func__); 328 while (1) { 329 spin_lock(&fsvq->lock); 330 forget = list_first_entry_or_null(&fsvq->queued_reqs, 331 struct virtio_fs_forget, list); 332 if (!forget) { 333 spin_unlock(&fsvq->lock); 334 return; 335 } 336 337 list_del(&forget->list); 338 if (!fsvq->connected) { 339 dec_in_flight_req(fsvq); 340 spin_unlock(&fsvq->lock); 341 kfree(forget); 342 continue; 343 } 344 345 sg_init_one(&sg, forget, sizeof(*forget)); 346 347 /* Enqueue the request */ 348 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 349 ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); 350 if (ret < 0) { 351 if (ret == -ENOMEM || ret == -ENOSPC) { 352 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 353 ret); 354 list_add_tail(&forget->list, 355 &fsvq->queued_reqs); 356 schedule_delayed_work(&fsvq->dispatch_work, 357 msecs_to_jiffies(1)); 358 } else { 359 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 360 ret); 361 dec_in_flight_req(fsvq); 362 kfree(forget); 363 } 364 spin_unlock(&fsvq->lock); 365 return; 366 } 367 368 notify = virtqueue_kick_prepare(vq); 369 spin_unlock(&fsvq->lock); 370 371 if (notify) 372 virtqueue_notify(vq); 373 pr_debug("virtio-fs: worker %s dispatched one forget request.\n", 374 __func__); 375 } 376 } 377 378 /* Allocate and copy args into req->argbuf */ 379 static int copy_args_to_argbuf(struct fuse_req *req) 380 { 381 struct fuse_args *args = req->args; 382 unsigned int offset = 0; 383 unsigned int num_in; 384 unsigned int num_out; 385 unsigned int len; 386 unsigned int i; 387 388 num_in = args->in_numargs - args->in_pages; 389 num_out = args->out_numargs - args->out_pages; 390 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 391 fuse_len_args(num_out, args->out_args); 392 393 req->argbuf = kmalloc(len, GFP_ATOMIC); 394 if (!req->argbuf) 395 return -ENOMEM; 396 397 for (i = 0; i < num_in; i++) { 398 memcpy(req->argbuf + offset, 399 args->in_args[i].value, 400 args->in_args[i].size); 401 offset += args->in_args[i].size; 402 } 403 404 return 0; 405 } 406 407 /* Copy args out of and free req->argbuf */ 408 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 409 { 410 unsigned int remaining; 411 unsigned int offset; 412 unsigned int num_in; 413 unsigned int num_out; 414 unsigned int i; 415 416 remaining = req->out.h.len - sizeof(req->out.h); 417 num_in = args->in_numargs - args->in_pages; 418 num_out = args->out_numargs - args->out_pages; 419 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 420 421 for (i = 0; i < num_out; i++) { 422 unsigned int argsize = args->out_args[i].size; 423 424 if (args->out_argvar && 425 i == args->out_numargs - 1 && 426 argsize > remaining) { 427 argsize = remaining; 428 } 429 430 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 431 offset += argsize; 432 433 if (i != args->out_numargs - 1) 434 remaining -= argsize; 435 } 436 437 /* Store the actual size of the variable-length arg */ 438 if (args->out_argvar) 439 args->out_args[args->out_numargs - 1].size = remaining; 440 441 kfree(req->argbuf); 442 req->argbuf = NULL; 443 } 444 445 /* Work function for request completion */ 446 static void virtio_fs_requests_done_work(struct work_struct *work) 447 { 448 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 449 done_work); 450 struct fuse_pqueue *fpq = &fsvq->fud->pq; 451 struct fuse_conn *fc = fsvq->fud->fc; 452 struct virtqueue *vq = fsvq->vq; 453 struct fuse_req *req; 454 struct fuse_args_pages *ap; 455 struct fuse_req *next; 456 struct fuse_args *args; 457 unsigned int len, i, thislen; 458 struct page *page; 459 LIST_HEAD(reqs); 460 461 /* Collect completed requests off the virtqueue */ 462 spin_lock(&fsvq->lock); 463 do { 464 virtqueue_disable_cb(vq); 465 466 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 467 spin_lock(&fpq->lock); 468 list_move_tail(&req->list, &reqs); 469 spin_unlock(&fpq->lock); 470 } 471 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 472 spin_unlock(&fsvq->lock); 473 474 /* End requests */ 475 list_for_each_entry_safe(req, next, &reqs, list) { 476 /* 477 * TODO verify that server properly follows FUSE protocol 478 * (oh.uniq, oh.len) 479 */ 480 args = req->args; 481 copy_args_from_argbuf(args, req); 482 483 if (args->out_pages && args->page_zeroing) { 484 len = args->out_args[args->out_numargs - 1].size; 485 ap = container_of(args, typeof(*ap), args); 486 for (i = 0; i < ap->num_pages; i++) { 487 thislen = ap->descs[i].length; 488 if (len < thislen) { 489 WARN_ON(ap->descs[i].offset); 490 page = ap->pages[i]; 491 zero_user_segment(page, len, thislen); 492 len = 0; 493 } else { 494 len -= thislen; 495 } 496 } 497 } 498 499 spin_lock(&fpq->lock); 500 clear_bit(FR_SENT, &req->flags); 501 list_del_init(&req->list); 502 spin_unlock(&fpq->lock); 503 504 fuse_request_end(fc, req); 505 spin_lock(&fsvq->lock); 506 dec_in_flight_req(fsvq); 507 spin_unlock(&fsvq->lock); 508 } 509 } 510 511 /* Virtqueue interrupt handler */ 512 static void virtio_fs_vq_done(struct virtqueue *vq) 513 { 514 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 515 516 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 517 518 schedule_work(&fsvq->done_work); 519 } 520 521 /* Initialize virtqueues */ 522 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 523 struct virtio_fs *fs) 524 { 525 struct virtqueue **vqs; 526 vq_callback_t **callbacks; 527 const char **names; 528 unsigned int i; 529 int ret = 0; 530 531 virtio_cread(vdev, struct virtio_fs_config, num_request_queues, 532 &fs->num_request_queues); 533 if (fs->num_request_queues == 0) 534 return -EINVAL; 535 536 fs->nvqs = 1 + fs->num_request_queues; 537 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 538 if (!fs->vqs) 539 return -ENOMEM; 540 541 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 542 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 543 GFP_KERNEL); 544 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 545 if (!vqs || !callbacks || !names) { 546 ret = -ENOMEM; 547 goto out; 548 } 549 550 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 551 snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name), 552 "hiprio"); 553 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 554 INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); 555 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); 556 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs); 557 INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, 558 virtio_fs_hiprio_dispatch_work); 559 spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); 560 561 /* Initialize the requests virtqueues */ 562 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 563 spin_lock_init(&fs->vqs[i].lock); 564 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); 565 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, 566 virtio_fs_request_dispatch_work); 567 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); 568 INIT_LIST_HEAD(&fs->vqs[i].end_reqs); 569 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), 570 "requests.%u", i - VQ_REQUEST); 571 callbacks[i] = virtio_fs_vq_done; 572 names[i] = fs->vqs[i].name; 573 } 574 575 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 576 if (ret < 0) 577 goto out; 578 579 for (i = 0; i < fs->nvqs; i++) 580 fs->vqs[i].vq = vqs[i]; 581 582 virtio_fs_start_all_queues(fs); 583 out: 584 kfree(names); 585 kfree(callbacks); 586 kfree(vqs); 587 if (ret) 588 kfree(fs->vqs); 589 return ret; 590 } 591 592 /* Free virtqueues (device must already be reset) */ 593 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, 594 struct virtio_fs *fs) 595 { 596 vdev->config->del_vqs(vdev); 597 } 598 599 static int virtio_fs_probe(struct virtio_device *vdev) 600 { 601 struct virtio_fs *fs; 602 int ret; 603 604 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 605 if (!fs) 606 return -ENOMEM; 607 kref_init(&fs->refcount); 608 vdev->priv = fs; 609 610 ret = virtio_fs_read_tag(vdev, fs); 611 if (ret < 0) 612 goto out; 613 614 ret = virtio_fs_setup_vqs(vdev, fs); 615 if (ret < 0) 616 goto out; 617 618 /* TODO vq affinity */ 619 620 /* Bring the device online in case the filesystem is mounted and 621 * requests need to be sent before we return. 622 */ 623 virtio_device_ready(vdev); 624 625 ret = virtio_fs_add_instance(fs); 626 if (ret < 0) 627 goto out_vqs; 628 629 return 0; 630 631 out_vqs: 632 vdev->config->reset(vdev); 633 virtio_fs_cleanup_vqs(vdev, fs); 634 635 out: 636 vdev->priv = NULL; 637 kfree(fs); 638 return ret; 639 } 640 641 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 642 { 643 struct virtio_fs_vq *fsvq; 644 int i; 645 646 for (i = 0; i < fs->nvqs; i++) { 647 fsvq = &fs->vqs[i]; 648 spin_lock(&fsvq->lock); 649 fsvq->connected = false; 650 spin_unlock(&fsvq->lock); 651 } 652 } 653 654 static void virtio_fs_remove(struct virtio_device *vdev) 655 { 656 struct virtio_fs *fs = vdev->priv; 657 658 mutex_lock(&virtio_fs_mutex); 659 /* This device is going away. No one should get new reference */ 660 list_del_init(&fs->list); 661 virtio_fs_stop_all_queues(fs); 662 virtio_fs_drain_all_queues(fs); 663 vdev->config->reset(vdev); 664 virtio_fs_cleanup_vqs(vdev, fs); 665 666 vdev->priv = NULL; 667 /* Put device reference on virtio_fs object */ 668 virtio_fs_put(fs); 669 mutex_unlock(&virtio_fs_mutex); 670 } 671 672 #ifdef CONFIG_PM_SLEEP 673 static int virtio_fs_freeze(struct virtio_device *vdev) 674 { 675 /* TODO need to save state here */ 676 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 677 return -EOPNOTSUPP; 678 } 679 680 static int virtio_fs_restore(struct virtio_device *vdev) 681 { 682 /* TODO need to restore state here */ 683 return 0; 684 } 685 #endif /* CONFIG_PM_SLEEP */ 686 687 const static struct virtio_device_id id_table[] = { 688 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 689 {}, 690 }; 691 692 const static unsigned int feature_table[] = {}; 693 694 static struct virtio_driver virtio_fs_driver = { 695 .driver.name = KBUILD_MODNAME, 696 .driver.owner = THIS_MODULE, 697 .id_table = id_table, 698 .feature_table = feature_table, 699 .feature_table_size = ARRAY_SIZE(feature_table), 700 .probe = virtio_fs_probe, 701 .remove = virtio_fs_remove, 702 #ifdef CONFIG_PM_SLEEP 703 .freeze = virtio_fs_freeze, 704 .restore = virtio_fs_restore, 705 #endif 706 }; 707 708 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 709 __releases(fiq->lock) 710 { 711 struct fuse_forget_link *link; 712 struct virtio_fs_forget *forget; 713 struct scatterlist sg; 714 struct scatterlist *sgs[] = {&sg}; 715 struct virtio_fs *fs; 716 struct virtqueue *vq; 717 struct virtio_fs_vq *fsvq; 718 bool notify; 719 u64 unique; 720 int ret; 721 722 link = fuse_dequeue_forget(fiq, 1, NULL); 723 unique = fuse_get_unique(fiq); 724 725 fs = fiq->priv; 726 fsvq = &fs->vqs[VQ_HIPRIO]; 727 spin_unlock(&fiq->lock); 728 729 /* Allocate a buffer for the request */ 730 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 731 732 forget->ih = (struct fuse_in_header){ 733 .opcode = FUSE_FORGET, 734 .nodeid = link->forget_one.nodeid, 735 .unique = unique, 736 .len = sizeof(*forget), 737 }; 738 forget->arg = (struct fuse_forget_in){ 739 .nlookup = link->forget_one.nlookup, 740 }; 741 742 sg_init_one(&sg, forget, sizeof(*forget)); 743 744 /* Enqueue the request */ 745 spin_lock(&fsvq->lock); 746 747 if (!fsvq->connected) { 748 kfree(forget); 749 spin_unlock(&fsvq->lock); 750 goto out; 751 } 752 753 vq = fsvq->vq; 754 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 755 756 ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); 757 if (ret < 0) { 758 if (ret == -ENOMEM || ret == -ENOSPC) { 759 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n", 760 ret); 761 list_add_tail(&forget->list, &fsvq->queued_reqs); 762 schedule_delayed_work(&fsvq->dispatch_work, 763 msecs_to_jiffies(1)); 764 inc_in_flight_req(fsvq); 765 } else { 766 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 767 ret); 768 kfree(forget); 769 } 770 spin_unlock(&fsvq->lock); 771 goto out; 772 } 773 774 inc_in_flight_req(fsvq); 775 notify = virtqueue_kick_prepare(vq); 776 777 spin_unlock(&fsvq->lock); 778 779 if (notify) 780 virtqueue_notify(vq); 781 out: 782 kfree(link); 783 } 784 785 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 786 __releases(fiq->lock) 787 { 788 /* 789 * TODO interrupts. 790 * 791 * Normal fs operations on a local filesystems aren't interruptible. 792 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 793 * with shared lock between host and guest. 794 */ 795 spin_unlock(&fiq->lock); 796 } 797 798 /* Return the number of scatter-gather list elements required */ 799 static unsigned int sg_count_fuse_req(struct fuse_req *req) 800 { 801 struct fuse_args *args = req->args; 802 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 803 unsigned int total_sgs = 1 /* fuse_in_header */; 804 805 if (args->in_numargs - args->in_pages) 806 total_sgs += 1; 807 808 if (args->in_pages) 809 total_sgs += ap->num_pages; 810 811 if (!test_bit(FR_ISREPLY, &req->flags)) 812 return total_sgs; 813 814 total_sgs += 1 /* fuse_out_header */; 815 816 if (args->out_numargs - args->out_pages) 817 total_sgs += 1; 818 819 if (args->out_pages) 820 total_sgs += ap->num_pages; 821 822 return total_sgs; 823 } 824 825 /* Add pages to scatter-gather list and return number of elements used */ 826 static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 827 struct page **pages, 828 struct fuse_page_desc *page_descs, 829 unsigned int num_pages, 830 unsigned int total_len) 831 { 832 unsigned int i; 833 unsigned int this_len; 834 835 for (i = 0; i < num_pages && total_len; i++) { 836 sg_init_table(&sg[i], 1); 837 this_len = min(page_descs[i].length, total_len); 838 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 839 total_len -= this_len; 840 } 841 842 return i; 843 } 844 845 /* Add args to scatter-gather list and return number of elements used */ 846 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 847 struct fuse_req *req, 848 struct fuse_arg *args, 849 unsigned int numargs, 850 bool argpages, 851 void *argbuf, 852 unsigned int *len_used) 853 { 854 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 855 unsigned int total_sgs = 0; 856 unsigned int len; 857 858 len = fuse_len_args(numargs - argpages, args); 859 if (len) 860 sg_init_one(&sg[total_sgs++], argbuf, len); 861 862 if (argpages) 863 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 864 ap->pages, ap->descs, 865 ap->num_pages, 866 args[numargs - 1].size); 867 868 if (len_used) 869 *len_used = len; 870 871 return total_sgs; 872 } 873 874 /* Add a request to a virtqueue and kick the device */ 875 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 876 struct fuse_req *req, bool in_flight) 877 { 878 /* requests need at least 4 elements */ 879 struct scatterlist *stack_sgs[6]; 880 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 881 struct scatterlist **sgs = stack_sgs; 882 struct scatterlist *sg = stack_sg; 883 struct virtqueue *vq; 884 struct fuse_args *args = req->args; 885 unsigned int argbuf_used = 0; 886 unsigned int out_sgs = 0; 887 unsigned int in_sgs = 0; 888 unsigned int total_sgs; 889 unsigned int i; 890 int ret; 891 bool notify; 892 struct fuse_pqueue *fpq; 893 894 /* Does the sglist fit on the stack? */ 895 total_sgs = sg_count_fuse_req(req); 896 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 897 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 898 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 899 if (!sgs || !sg) { 900 ret = -ENOMEM; 901 goto out; 902 } 903 } 904 905 /* Use a bounce buffer since stack args cannot be mapped */ 906 ret = copy_args_to_argbuf(req); 907 if (ret < 0) 908 goto out; 909 910 /* Request elements */ 911 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 912 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 913 (struct fuse_arg *)args->in_args, 914 args->in_numargs, args->in_pages, 915 req->argbuf, &argbuf_used); 916 917 /* Reply elements */ 918 if (test_bit(FR_ISREPLY, &req->flags)) { 919 sg_init_one(&sg[out_sgs + in_sgs++], 920 &req->out.h, sizeof(req->out.h)); 921 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 922 args->out_args, args->out_numargs, 923 args->out_pages, 924 req->argbuf + argbuf_used, NULL); 925 } 926 927 WARN_ON(out_sgs + in_sgs != total_sgs); 928 929 for (i = 0; i < total_sgs; i++) 930 sgs[i] = &sg[i]; 931 932 spin_lock(&fsvq->lock); 933 934 if (!fsvq->connected) { 935 spin_unlock(&fsvq->lock); 936 ret = -ENOTCONN; 937 goto out; 938 } 939 940 vq = fsvq->vq; 941 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 942 if (ret < 0) { 943 spin_unlock(&fsvq->lock); 944 goto out; 945 } 946 947 /* Request successfully sent. */ 948 fpq = &fsvq->fud->pq; 949 spin_lock(&fpq->lock); 950 list_add_tail(&req->list, fpq->processing); 951 spin_unlock(&fpq->lock); 952 set_bit(FR_SENT, &req->flags); 953 /* matches barrier in request_wait_answer() */ 954 smp_mb__after_atomic(); 955 956 if (!in_flight) 957 inc_in_flight_req(fsvq); 958 notify = virtqueue_kick_prepare(vq); 959 960 spin_unlock(&fsvq->lock); 961 962 if (notify) 963 virtqueue_notify(vq); 964 965 out: 966 if (ret < 0 && req->argbuf) { 967 kfree(req->argbuf); 968 req->argbuf = NULL; 969 } 970 if (sgs != stack_sgs) { 971 kfree(sgs); 972 kfree(sg); 973 } 974 975 return ret; 976 } 977 978 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 979 __releases(fiq->lock) 980 { 981 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 982 struct virtio_fs *fs; 983 struct fuse_req *req; 984 struct virtio_fs_vq *fsvq; 985 int ret; 986 987 WARN_ON(list_empty(&fiq->pending)); 988 req = list_last_entry(&fiq->pending, struct fuse_req, list); 989 clear_bit(FR_PENDING, &req->flags); 990 list_del_init(&req->list); 991 WARN_ON(!list_empty(&fiq->pending)); 992 spin_unlock(&fiq->lock); 993 994 fs = fiq->priv; 995 996 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 997 __func__, req->in.h.opcode, req->in.h.unique, 998 req->in.h.nodeid, req->in.h.len, 999 fuse_len_args(req->args->out_numargs, req->args->out_args)); 1000 1001 fsvq = &fs->vqs[queue_id]; 1002 ret = virtio_fs_enqueue_req(fsvq, req, false); 1003 if (ret < 0) { 1004 if (ret == -ENOMEM || ret == -ENOSPC) { 1005 /* 1006 * Virtqueue full. Retry submission from worker 1007 * context as we might be holding fc->bg_lock. 1008 */ 1009 spin_lock(&fsvq->lock); 1010 list_add_tail(&req->list, &fsvq->queued_reqs); 1011 inc_in_flight_req(fsvq); 1012 schedule_delayed_work(&fsvq->dispatch_work, 1013 msecs_to_jiffies(1)); 1014 spin_unlock(&fsvq->lock); 1015 return; 1016 } 1017 req->out.h.error = ret; 1018 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1019 1020 /* Can't end request in submission context. Use a worker */ 1021 spin_lock(&fsvq->lock); 1022 list_add_tail(&req->list, &fsvq->end_reqs); 1023 schedule_delayed_work(&fsvq->dispatch_work, 0); 1024 spin_unlock(&fsvq->lock); 1025 return; 1026 } 1027 } 1028 1029 const static struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1030 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1031 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1032 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1033 .release = virtio_fs_fiq_release, 1034 }; 1035 1036 static int virtio_fs_fill_super(struct super_block *sb) 1037 { 1038 struct fuse_conn *fc = get_fuse_conn_super(sb); 1039 struct virtio_fs *fs = fc->iq.priv; 1040 unsigned int i; 1041 int err; 1042 struct fuse_fs_context ctx = { 1043 .rootmode = S_IFDIR, 1044 .default_permissions = 1, 1045 .allow_other = 1, 1046 .max_read = UINT_MAX, 1047 .blksize = 512, 1048 .destroy = true, 1049 .no_control = true, 1050 .no_force_umount = true, 1051 .no_mount_options = true, 1052 }; 1053 1054 mutex_lock(&virtio_fs_mutex); 1055 1056 /* After holding mutex, make sure virtiofs device is still there. 1057 * Though we are holding a reference to it, drive ->remove might 1058 * still have cleaned up virtual queues. In that case bail out. 1059 */ 1060 err = -EINVAL; 1061 if (list_empty(&fs->list)) { 1062 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1063 goto err; 1064 } 1065 1066 err = -ENOMEM; 1067 /* Allocate fuse_dev for hiprio and notification queues */ 1068 for (i = 0; i < VQ_REQUEST; i++) { 1069 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1070 1071 fsvq->fud = fuse_dev_alloc(); 1072 if (!fsvq->fud) 1073 goto err_free_fuse_devs; 1074 } 1075 1076 ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud; 1077 err = fuse_fill_super_common(sb, &ctx); 1078 if (err < 0) 1079 goto err_free_fuse_devs; 1080 1081 fc = fs->vqs[VQ_REQUEST].fud->fc; 1082 1083 for (i = 0; i < fs->nvqs; i++) { 1084 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1085 1086 if (i == VQ_REQUEST) 1087 continue; /* already initialized */ 1088 fuse_dev_install(fsvq->fud, fc); 1089 } 1090 1091 /* Previous unmount will stop all queues. Start these again */ 1092 virtio_fs_start_all_queues(fs); 1093 fuse_send_init(fc); 1094 mutex_unlock(&virtio_fs_mutex); 1095 return 0; 1096 1097 err_free_fuse_devs: 1098 virtio_fs_free_devs(fs); 1099 err: 1100 mutex_unlock(&virtio_fs_mutex); 1101 return err; 1102 } 1103 1104 static void virtio_kill_sb(struct super_block *sb) 1105 { 1106 struct fuse_conn *fc = get_fuse_conn_super(sb); 1107 struct virtio_fs *vfs; 1108 struct virtio_fs_vq *fsvq; 1109 1110 /* If mount failed, we can still be called without any fc */ 1111 if (!fc) 1112 return fuse_kill_sb_anon(sb); 1113 1114 vfs = fc->iq.priv; 1115 fsvq = &vfs->vqs[VQ_HIPRIO]; 1116 1117 /* Stop forget queue. Soon destroy will be sent */ 1118 spin_lock(&fsvq->lock); 1119 fsvq->connected = false; 1120 spin_unlock(&fsvq->lock); 1121 virtio_fs_drain_all_queues(vfs); 1122 1123 fuse_kill_sb_anon(sb); 1124 1125 /* fuse_kill_sb_anon() must have sent destroy. Stop all queues 1126 * and drain one more time and free fuse devices. Freeing fuse 1127 * devices will drop their reference on fuse_conn and that in 1128 * turn will drop its reference on virtio_fs object. 1129 */ 1130 virtio_fs_stop_all_queues(vfs); 1131 virtio_fs_drain_all_queues(vfs); 1132 virtio_fs_free_devs(vfs); 1133 } 1134 1135 static int virtio_fs_test_super(struct super_block *sb, 1136 struct fs_context *fsc) 1137 { 1138 struct fuse_conn *fc = fsc->s_fs_info; 1139 1140 return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv; 1141 } 1142 1143 static int virtio_fs_set_super(struct super_block *sb, 1144 struct fs_context *fsc) 1145 { 1146 int err; 1147 1148 err = get_anon_bdev(&sb->s_dev); 1149 if (!err) 1150 fuse_conn_get(fsc->s_fs_info); 1151 1152 return err; 1153 } 1154 1155 static int virtio_fs_get_tree(struct fs_context *fsc) 1156 { 1157 struct virtio_fs *fs; 1158 struct super_block *sb; 1159 struct fuse_conn *fc; 1160 int err; 1161 1162 /* This gets a reference on virtio_fs object. This ptr gets installed 1163 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1164 * to drop the reference to this object. 1165 */ 1166 fs = virtio_fs_find_instance(fsc->source); 1167 if (!fs) { 1168 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1169 return -EINVAL; 1170 } 1171 1172 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1173 if (!fc) { 1174 mutex_lock(&virtio_fs_mutex); 1175 virtio_fs_put(fs); 1176 mutex_unlock(&virtio_fs_mutex); 1177 return -ENOMEM; 1178 } 1179 1180 fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops, 1181 fs); 1182 fc->release = fuse_free_conn; 1183 fc->delete_stale = true; 1184 1185 fsc->s_fs_info = fc; 1186 sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); 1187 fuse_conn_put(fc); 1188 if (IS_ERR(sb)) 1189 return PTR_ERR(sb); 1190 1191 if (!sb->s_root) { 1192 err = virtio_fs_fill_super(sb); 1193 if (err) { 1194 deactivate_locked_super(sb); 1195 return err; 1196 } 1197 1198 sb->s_flags |= SB_ACTIVE; 1199 } 1200 1201 WARN_ON(fsc->root); 1202 fsc->root = dget(sb->s_root); 1203 return 0; 1204 } 1205 1206 static const struct fs_context_operations virtio_fs_context_ops = { 1207 .get_tree = virtio_fs_get_tree, 1208 }; 1209 1210 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1211 { 1212 fsc->ops = &virtio_fs_context_ops; 1213 return 0; 1214 } 1215 1216 static struct file_system_type virtio_fs_type = { 1217 .owner = THIS_MODULE, 1218 .name = "virtiofs", 1219 .init_fs_context = virtio_fs_init_fs_context, 1220 .kill_sb = virtio_kill_sb, 1221 }; 1222 1223 static int __init virtio_fs_init(void) 1224 { 1225 int ret; 1226 1227 ret = register_virtio_driver(&virtio_fs_driver); 1228 if (ret < 0) 1229 return ret; 1230 1231 ret = register_filesystem(&virtio_fs_type); 1232 if (ret < 0) { 1233 unregister_virtio_driver(&virtio_fs_driver); 1234 return ret; 1235 } 1236 1237 return 0; 1238 } 1239 module_init(virtio_fs_init); 1240 1241 static void __exit virtio_fs_exit(void) 1242 { 1243 unregister_filesystem(&virtio_fs_type); 1244 unregister_virtio_driver(&virtio_fs_driver); 1245 } 1246 module_exit(virtio_fs_exit); 1247 1248 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1249 MODULE_DESCRIPTION("Virtio Filesystem"); 1250 MODULE_LICENSE("GPL"); 1251 MODULE_ALIAS_FS(KBUILD_MODNAME); 1252 MODULE_DEVICE_TABLE(virtio, id_table); 1253