1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/module.h> 9 #include <linux/virtio.h> 10 #include <linux/virtio_fs.h> 11 #include <linux/delay.h> 12 #include <linux/fs_context.h> 13 #include <linux/highmem.h> 14 #include "fuse_i.h" 15 16 /* List of virtio-fs device instances and a lock for the list. Also provides 17 * mutual exclusion in device removal and mounting path 18 */ 19 static DEFINE_MUTEX(virtio_fs_mutex); 20 static LIST_HEAD(virtio_fs_instances); 21 22 enum { 23 VQ_HIPRIO, 24 VQ_REQUEST 25 }; 26 27 /* Per-virtqueue state */ 28 struct virtio_fs_vq { 29 spinlock_t lock; 30 struct virtqueue *vq; /* protected by ->lock */ 31 struct work_struct done_work; 32 struct list_head queued_reqs; 33 struct list_head end_reqs; /* End these requests */ 34 struct delayed_work dispatch_work; 35 struct fuse_dev *fud; 36 bool connected; 37 long in_flight; 38 struct completion in_flight_zero; /* No inflight requests */ 39 char name[24]; 40 } ____cacheline_aligned_in_smp; 41 42 /* A virtio-fs device instance */ 43 struct virtio_fs { 44 struct kref refcount; 45 struct list_head list; /* on virtio_fs_instances */ 46 char *tag; 47 struct virtio_fs_vq *vqs; 48 unsigned int nvqs; /* number of virtqueues */ 49 unsigned int num_request_queues; /* number of request queues */ 50 }; 51 52 struct virtio_fs_forget_req { 53 struct fuse_in_header ih; 54 struct fuse_forget_in arg; 55 }; 56 57 struct virtio_fs_forget { 58 /* This request can be temporarily queued on virt queue */ 59 struct list_head list; 60 struct virtio_fs_forget_req req; 61 }; 62 63 struct virtio_fs_req_work { 64 struct fuse_req *req; 65 struct virtio_fs_vq *fsvq; 66 struct work_struct done_work; 67 }; 68 69 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 70 struct fuse_req *req, bool in_flight); 71 72 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 73 { 74 struct virtio_fs *fs = vq->vdev->priv; 75 76 return &fs->vqs[vq->index]; 77 } 78 79 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) 80 { 81 return &vq_to_fsvq(vq)->fud->pq; 82 } 83 84 /* Should be called with fsvq->lock held. */ 85 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 86 { 87 fsvq->in_flight++; 88 } 89 90 /* Should be called with fsvq->lock held. */ 91 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 92 { 93 WARN_ON(fsvq->in_flight <= 0); 94 fsvq->in_flight--; 95 if (!fsvq->in_flight) 96 complete(&fsvq->in_flight_zero); 97 } 98 99 static void release_virtio_fs_obj(struct kref *ref) 100 { 101 struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); 102 103 kfree(vfs->vqs); 104 kfree(vfs); 105 } 106 107 /* Make sure virtiofs_mutex is held */ 108 static void virtio_fs_put(struct virtio_fs *fs) 109 { 110 kref_put(&fs->refcount, release_virtio_fs_obj); 111 } 112 113 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 114 { 115 struct virtio_fs *vfs = fiq->priv; 116 117 mutex_lock(&virtio_fs_mutex); 118 virtio_fs_put(vfs); 119 mutex_unlock(&virtio_fs_mutex); 120 } 121 122 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 123 { 124 WARN_ON(fsvq->in_flight < 0); 125 126 /* Wait for in flight requests to finish.*/ 127 spin_lock(&fsvq->lock); 128 if (fsvq->in_flight) { 129 /* We are holding virtio_fs_mutex. There should not be any 130 * waiters waiting for completion. 131 */ 132 reinit_completion(&fsvq->in_flight_zero); 133 spin_unlock(&fsvq->lock); 134 wait_for_completion(&fsvq->in_flight_zero); 135 } else { 136 spin_unlock(&fsvq->lock); 137 } 138 139 flush_work(&fsvq->done_work); 140 flush_delayed_work(&fsvq->dispatch_work); 141 } 142 143 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 144 { 145 struct virtio_fs_vq *fsvq; 146 int i; 147 148 for (i = 0; i < fs->nvqs; i++) { 149 fsvq = &fs->vqs[i]; 150 virtio_fs_drain_queue(fsvq); 151 } 152 } 153 154 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 155 { 156 /* Provides mutual exclusion between ->remove and ->kill_sb 157 * paths. We don't want both of these draining queue at the 158 * same time. Current completion logic reinits completion 159 * and that means there should not be any other thread 160 * doing reinit or waiting for completion already. 161 */ 162 mutex_lock(&virtio_fs_mutex); 163 virtio_fs_drain_all_queues_locked(fs); 164 mutex_unlock(&virtio_fs_mutex); 165 } 166 167 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 168 { 169 struct virtio_fs_vq *fsvq; 170 int i; 171 172 for (i = 0; i < fs->nvqs; i++) { 173 fsvq = &fs->vqs[i]; 174 spin_lock(&fsvq->lock); 175 fsvq->connected = true; 176 spin_unlock(&fsvq->lock); 177 } 178 } 179 180 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 181 static int virtio_fs_add_instance(struct virtio_fs *fs) 182 { 183 struct virtio_fs *fs2; 184 bool duplicate = false; 185 186 mutex_lock(&virtio_fs_mutex); 187 188 list_for_each_entry(fs2, &virtio_fs_instances, list) { 189 if (strcmp(fs->tag, fs2->tag) == 0) 190 duplicate = true; 191 } 192 193 if (!duplicate) 194 list_add_tail(&fs->list, &virtio_fs_instances); 195 196 mutex_unlock(&virtio_fs_mutex); 197 198 if (duplicate) 199 return -EEXIST; 200 return 0; 201 } 202 203 /* Return the virtio_fs with a given tag, or NULL */ 204 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 205 { 206 struct virtio_fs *fs; 207 208 mutex_lock(&virtio_fs_mutex); 209 210 list_for_each_entry(fs, &virtio_fs_instances, list) { 211 if (strcmp(fs->tag, tag) == 0) { 212 kref_get(&fs->refcount); 213 goto found; 214 } 215 } 216 217 fs = NULL; /* not found */ 218 219 found: 220 mutex_unlock(&virtio_fs_mutex); 221 222 return fs; 223 } 224 225 static void virtio_fs_free_devs(struct virtio_fs *fs) 226 { 227 unsigned int i; 228 229 for (i = 0; i < fs->nvqs; i++) { 230 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 231 232 if (!fsvq->fud) 233 continue; 234 235 fuse_dev_free(fsvq->fud); 236 fsvq->fud = NULL; 237 } 238 } 239 240 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 241 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 242 { 243 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 244 char *end; 245 size_t len; 246 247 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 248 &tag_buf, sizeof(tag_buf)); 249 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 250 if (end == tag_buf) 251 return -EINVAL; /* empty tag */ 252 if (!end) 253 end = &tag_buf[sizeof(tag_buf)]; 254 255 len = end - tag_buf; 256 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 257 if (!fs->tag) 258 return -ENOMEM; 259 memcpy(fs->tag, tag_buf, len); 260 fs->tag[len] = '\0'; 261 return 0; 262 } 263 264 /* Work function for hiprio completion */ 265 static void virtio_fs_hiprio_done_work(struct work_struct *work) 266 { 267 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 268 done_work); 269 struct virtqueue *vq = fsvq->vq; 270 271 /* Free completed FUSE_FORGET requests */ 272 spin_lock(&fsvq->lock); 273 do { 274 unsigned int len; 275 void *req; 276 277 virtqueue_disable_cb(vq); 278 279 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 280 kfree(req); 281 dec_in_flight_req(fsvq); 282 } 283 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 284 spin_unlock(&fsvq->lock); 285 } 286 287 static void virtio_fs_request_dispatch_work(struct work_struct *work) 288 { 289 struct fuse_req *req; 290 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 291 dispatch_work.work); 292 struct fuse_conn *fc = fsvq->fud->fc; 293 int ret; 294 295 pr_debug("virtio-fs: worker %s called.\n", __func__); 296 while (1) { 297 spin_lock(&fsvq->lock); 298 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 299 list); 300 if (!req) { 301 spin_unlock(&fsvq->lock); 302 break; 303 } 304 305 list_del_init(&req->list); 306 spin_unlock(&fsvq->lock); 307 fuse_request_end(fc, req); 308 } 309 310 /* Dispatch pending requests */ 311 while (1) { 312 spin_lock(&fsvq->lock); 313 req = list_first_entry_or_null(&fsvq->queued_reqs, 314 struct fuse_req, list); 315 if (!req) { 316 spin_unlock(&fsvq->lock); 317 return; 318 } 319 list_del_init(&req->list); 320 spin_unlock(&fsvq->lock); 321 322 ret = virtio_fs_enqueue_req(fsvq, req, true); 323 if (ret < 0) { 324 if (ret == -ENOMEM || ret == -ENOSPC) { 325 spin_lock(&fsvq->lock); 326 list_add_tail(&req->list, &fsvq->queued_reqs); 327 schedule_delayed_work(&fsvq->dispatch_work, 328 msecs_to_jiffies(1)); 329 spin_unlock(&fsvq->lock); 330 return; 331 } 332 req->out.h.error = ret; 333 spin_lock(&fsvq->lock); 334 dec_in_flight_req(fsvq); 335 spin_unlock(&fsvq->lock); 336 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 337 ret); 338 fuse_request_end(fc, req); 339 } 340 } 341 } 342 343 /* 344 * Returns 1 if queue is full and sender should wait a bit before sending 345 * next request, 0 otherwise. 346 */ 347 static int send_forget_request(struct virtio_fs_vq *fsvq, 348 struct virtio_fs_forget *forget, 349 bool in_flight) 350 { 351 struct scatterlist sg; 352 struct virtqueue *vq; 353 int ret = 0; 354 bool notify; 355 struct virtio_fs_forget_req *req = &forget->req; 356 357 spin_lock(&fsvq->lock); 358 if (!fsvq->connected) { 359 if (in_flight) 360 dec_in_flight_req(fsvq); 361 kfree(forget); 362 goto out; 363 } 364 365 sg_init_one(&sg, req, sizeof(*req)); 366 vq = fsvq->vq; 367 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 368 369 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 370 if (ret < 0) { 371 if (ret == -ENOMEM || ret == -ENOSPC) { 372 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 373 ret); 374 list_add_tail(&forget->list, &fsvq->queued_reqs); 375 schedule_delayed_work(&fsvq->dispatch_work, 376 msecs_to_jiffies(1)); 377 if (!in_flight) 378 inc_in_flight_req(fsvq); 379 /* Queue is full */ 380 ret = 1; 381 } else { 382 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 383 ret); 384 kfree(forget); 385 if (in_flight) 386 dec_in_flight_req(fsvq); 387 } 388 goto out; 389 } 390 391 if (!in_flight) 392 inc_in_flight_req(fsvq); 393 notify = virtqueue_kick_prepare(vq); 394 spin_unlock(&fsvq->lock); 395 396 if (notify) 397 virtqueue_notify(vq); 398 return ret; 399 out: 400 spin_unlock(&fsvq->lock); 401 return ret; 402 } 403 404 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 405 { 406 struct virtio_fs_forget *forget; 407 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 408 dispatch_work.work); 409 pr_debug("virtio-fs: worker %s called.\n", __func__); 410 while (1) { 411 spin_lock(&fsvq->lock); 412 forget = list_first_entry_or_null(&fsvq->queued_reqs, 413 struct virtio_fs_forget, list); 414 if (!forget) { 415 spin_unlock(&fsvq->lock); 416 return; 417 } 418 419 list_del(&forget->list); 420 spin_unlock(&fsvq->lock); 421 if (send_forget_request(fsvq, forget, true)) 422 return; 423 } 424 } 425 426 /* Allocate and copy args into req->argbuf */ 427 static int copy_args_to_argbuf(struct fuse_req *req) 428 { 429 struct fuse_args *args = req->args; 430 unsigned int offset = 0; 431 unsigned int num_in; 432 unsigned int num_out; 433 unsigned int len; 434 unsigned int i; 435 436 num_in = args->in_numargs - args->in_pages; 437 num_out = args->out_numargs - args->out_pages; 438 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 439 fuse_len_args(num_out, args->out_args); 440 441 req->argbuf = kmalloc(len, GFP_ATOMIC); 442 if (!req->argbuf) 443 return -ENOMEM; 444 445 for (i = 0; i < num_in; i++) { 446 memcpy(req->argbuf + offset, 447 args->in_args[i].value, 448 args->in_args[i].size); 449 offset += args->in_args[i].size; 450 } 451 452 return 0; 453 } 454 455 /* Copy args out of and free req->argbuf */ 456 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 457 { 458 unsigned int remaining; 459 unsigned int offset; 460 unsigned int num_in; 461 unsigned int num_out; 462 unsigned int i; 463 464 remaining = req->out.h.len - sizeof(req->out.h); 465 num_in = args->in_numargs - args->in_pages; 466 num_out = args->out_numargs - args->out_pages; 467 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 468 469 for (i = 0; i < num_out; i++) { 470 unsigned int argsize = args->out_args[i].size; 471 472 if (args->out_argvar && 473 i == args->out_numargs - 1 && 474 argsize > remaining) { 475 argsize = remaining; 476 } 477 478 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 479 offset += argsize; 480 481 if (i != args->out_numargs - 1) 482 remaining -= argsize; 483 } 484 485 /* Store the actual size of the variable-length arg */ 486 if (args->out_argvar) 487 args->out_args[args->out_numargs - 1].size = remaining; 488 489 kfree(req->argbuf); 490 req->argbuf = NULL; 491 } 492 493 /* Work function for request completion */ 494 static void virtio_fs_request_complete(struct fuse_req *req, 495 struct virtio_fs_vq *fsvq) 496 { 497 struct fuse_pqueue *fpq = &fsvq->fud->pq; 498 struct fuse_conn *fc = fsvq->fud->fc; 499 struct fuse_args *args; 500 struct fuse_args_pages *ap; 501 unsigned int len, i, thislen; 502 struct page *page; 503 504 /* 505 * TODO verify that server properly follows FUSE protocol 506 * (oh.uniq, oh.len) 507 */ 508 args = req->args; 509 copy_args_from_argbuf(args, req); 510 511 if (args->out_pages && args->page_zeroing) { 512 len = args->out_args[args->out_numargs - 1].size; 513 ap = container_of(args, typeof(*ap), args); 514 for (i = 0; i < ap->num_pages; i++) { 515 thislen = ap->descs[i].length; 516 if (len < thislen) { 517 WARN_ON(ap->descs[i].offset); 518 page = ap->pages[i]; 519 zero_user_segment(page, len, thislen); 520 len = 0; 521 } else { 522 len -= thislen; 523 } 524 } 525 } 526 527 spin_lock(&fpq->lock); 528 clear_bit(FR_SENT, &req->flags); 529 spin_unlock(&fpq->lock); 530 531 fuse_request_end(fc, req); 532 spin_lock(&fsvq->lock); 533 dec_in_flight_req(fsvq); 534 spin_unlock(&fsvq->lock); 535 } 536 537 static void virtio_fs_complete_req_work(struct work_struct *work) 538 { 539 struct virtio_fs_req_work *w = 540 container_of(work, typeof(*w), done_work); 541 542 virtio_fs_request_complete(w->req, w->fsvq); 543 kfree(w); 544 } 545 546 static void virtio_fs_requests_done_work(struct work_struct *work) 547 { 548 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 549 done_work); 550 struct fuse_pqueue *fpq = &fsvq->fud->pq; 551 struct virtqueue *vq = fsvq->vq; 552 struct fuse_req *req; 553 struct fuse_req *next; 554 unsigned int len; 555 LIST_HEAD(reqs); 556 557 /* Collect completed requests off the virtqueue */ 558 spin_lock(&fsvq->lock); 559 do { 560 virtqueue_disable_cb(vq); 561 562 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 563 spin_lock(&fpq->lock); 564 list_move_tail(&req->list, &reqs); 565 spin_unlock(&fpq->lock); 566 } 567 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 568 spin_unlock(&fsvq->lock); 569 570 /* End requests */ 571 list_for_each_entry_safe(req, next, &reqs, list) { 572 list_del_init(&req->list); 573 574 /* blocking async request completes in a worker context */ 575 if (req->args->may_block) { 576 struct virtio_fs_req_work *w; 577 578 w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); 579 INIT_WORK(&w->done_work, virtio_fs_complete_req_work); 580 w->fsvq = fsvq; 581 w->req = req; 582 schedule_work(&w->done_work); 583 } else { 584 virtio_fs_request_complete(req, fsvq); 585 } 586 } 587 } 588 589 /* Virtqueue interrupt handler */ 590 static void virtio_fs_vq_done(struct virtqueue *vq) 591 { 592 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 593 594 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 595 596 schedule_work(&fsvq->done_work); 597 } 598 599 /* Initialize virtqueues */ 600 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 601 struct virtio_fs *fs) 602 { 603 struct virtqueue **vqs; 604 vq_callback_t **callbacks; 605 const char **names; 606 unsigned int i; 607 int ret = 0; 608 609 virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, 610 &fs->num_request_queues); 611 if (fs->num_request_queues == 0) 612 return -EINVAL; 613 614 fs->nvqs = 1 + fs->num_request_queues; 615 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 616 if (!fs->vqs) 617 return -ENOMEM; 618 619 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 620 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 621 GFP_KERNEL); 622 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 623 if (!vqs || !callbacks || !names) { 624 ret = -ENOMEM; 625 goto out; 626 } 627 628 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 629 snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name), 630 "hiprio"); 631 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 632 INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); 633 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); 634 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs); 635 INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, 636 virtio_fs_hiprio_dispatch_work); 637 init_completion(&fs->vqs[VQ_HIPRIO].in_flight_zero); 638 spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); 639 640 /* Initialize the requests virtqueues */ 641 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 642 spin_lock_init(&fs->vqs[i].lock); 643 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); 644 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, 645 virtio_fs_request_dispatch_work); 646 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); 647 INIT_LIST_HEAD(&fs->vqs[i].end_reqs); 648 init_completion(&fs->vqs[i].in_flight_zero); 649 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), 650 "requests.%u", i - VQ_REQUEST); 651 callbacks[i] = virtio_fs_vq_done; 652 names[i] = fs->vqs[i].name; 653 } 654 655 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 656 if (ret < 0) 657 goto out; 658 659 for (i = 0; i < fs->nvqs; i++) 660 fs->vqs[i].vq = vqs[i]; 661 662 virtio_fs_start_all_queues(fs); 663 out: 664 kfree(names); 665 kfree(callbacks); 666 kfree(vqs); 667 if (ret) 668 kfree(fs->vqs); 669 return ret; 670 } 671 672 /* Free virtqueues (device must already be reset) */ 673 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, 674 struct virtio_fs *fs) 675 { 676 vdev->config->del_vqs(vdev); 677 } 678 679 static int virtio_fs_probe(struct virtio_device *vdev) 680 { 681 struct virtio_fs *fs; 682 int ret; 683 684 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 685 if (!fs) 686 return -ENOMEM; 687 kref_init(&fs->refcount); 688 vdev->priv = fs; 689 690 ret = virtio_fs_read_tag(vdev, fs); 691 if (ret < 0) 692 goto out; 693 694 ret = virtio_fs_setup_vqs(vdev, fs); 695 if (ret < 0) 696 goto out; 697 698 /* TODO vq affinity */ 699 700 /* Bring the device online in case the filesystem is mounted and 701 * requests need to be sent before we return. 702 */ 703 virtio_device_ready(vdev); 704 705 ret = virtio_fs_add_instance(fs); 706 if (ret < 0) 707 goto out_vqs; 708 709 return 0; 710 711 out_vqs: 712 vdev->config->reset(vdev); 713 virtio_fs_cleanup_vqs(vdev, fs); 714 715 out: 716 vdev->priv = NULL; 717 kfree(fs); 718 return ret; 719 } 720 721 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 722 { 723 struct virtio_fs_vq *fsvq; 724 int i; 725 726 for (i = 0; i < fs->nvqs; i++) { 727 fsvq = &fs->vqs[i]; 728 spin_lock(&fsvq->lock); 729 fsvq->connected = false; 730 spin_unlock(&fsvq->lock); 731 } 732 } 733 734 static void virtio_fs_remove(struct virtio_device *vdev) 735 { 736 struct virtio_fs *fs = vdev->priv; 737 738 mutex_lock(&virtio_fs_mutex); 739 /* This device is going away. No one should get new reference */ 740 list_del_init(&fs->list); 741 virtio_fs_stop_all_queues(fs); 742 virtio_fs_drain_all_queues_locked(fs); 743 vdev->config->reset(vdev); 744 virtio_fs_cleanup_vqs(vdev, fs); 745 746 vdev->priv = NULL; 747 /* Put device reference on virtio_fs object */ 748 virtio_fs_put(fs); 749 mutex_unlock(&virtio_fs_mutex); 750 } 751 752 #ifdef CONFIG_PM_SLEEP 753 static int virtio_fs_freeze(struct virtio_device *vdev) 754 { 755 /* TODO need to save state here */ 756 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 757 return -EOPNOTSUPP; 758 } 759 760 static int virtio_fs_restore(struct virtio_device *vdev) 761 { 762 /* TODO need to restore state here */ 763 return 0; 764 } 765 #endif /* CONFIG_PM_SLEEP */ 766 767 static const struct virtio_device_id id_table[] = { 768 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 769 {}, 770 }; 771 772 static const unsigned int feature_table[] = {}; 773 774 static struct virtio_driver virtio_fs_driver = { 775 .driver.name = KBUILD_MODNAME, 776 .driver.owner = THIS_MODULE, 777 .id_table = id_table, 778 .feature_table = feature_table, 779 .feature_table_size = ARRAY_SIZE(feature_table), 780 .probe = virtio_fs_probe, 781 .remove = virtio_fs_remove, 782 #ifdef CONFIG_PM_SLEEP 783 .freeze = virtio_fs_freeze, 784 .restore = virtio_fs_restore, 785 #endif 786 }; 787 788 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 789 __releases(fiq->lock) 790 { 791 struct fuse_forget_link *link; 792 struct virtio_fs_forget *forget; 793 struct virtio_fs_forget_req *req; 794 struct virtio_fs *fs; 795 struct virtio_fs_vq *fsvq; 796 u64 unique; 797 798 link = fuse_dequeue_forget(fiq, 1, NULL); 799 unique = fuse_get_unique(fiq); 800 801 fs = fiq->priv; 802 fsvq = &fs->vqs[VQ_HIPRIO]; 803 spin_unlock(&fiq->lock); 804 805 /* Allocate a buffer for the request */ 806 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 807 req = &forget->req; 808 809 req->ih = (struct fuse_in_header){ 810 .opcode = FUSE_FORGET, 811 .nodeid = link->forget_one.nodeid, 812 .unique = unique, 813 .len = sizeof(*req), 814 }; 815 req->arg = (struct fuse_forget_in){ 816 .nlookup = link->forget_one.nlookup, 817 }; 818 819 send_forget_request(fsvq, forget, false); 820 kfree(link); 821 } 822 823 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 824 __releases(fiq->lock) 825 { 826 /* 827 * TODO interrupts. 828 * 829 * Normal fs operations on a local filesystems aren't interruptible. 830 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 831 * with shared lock between host and guest. 832 */ 833 spin_unlock(&fiq->lock); 834 } 835 836 /* Return the number of scatter-gather list elements required */ 837 static unsigned int sg_count_fuse_req(struct fuse_req *req) 838 { 839 struct fuse_args *args = req->args; 840 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 841 unsigned int total_sgs = 1 /* fuse_in_header */; 842 843 if (args->in_numargs - args->in_pages) 844 total_sgs += 1; 845 846 if (args->in_pages) 847 total_sgs += ap->num_pages; 848 849 if (!test_bit(FR_ISREPLY, &req->flags)) 850 return total_sgs; 851 852 total_sgs += 1 /* fuse_out_header */; 853 854 if (args->out_numargs - args->out_pages) 855 total_sgs += 1; 856 857 if (args->out_pages) 858 total_sgs += ap->num_pages; 859 860 return total_sgs; 861 } 862 863 /* Add pages to scatter-gather list and return number of elements used */ 864 static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 865 struct page **pages, 866 struct fuse_page_desc *page_descs, 867 unsigned int num_pages, 868 unsigned int total_len) 869 { 870 unsigned int i; 871 unsigned int this_len; 872 873 for (i = 0; i < num_pages && total_len; i++) { 874 sg_init_table(&sg[i], 1); 875 this_len = min(page_descs[i].length, total_len); 876 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 877 total_len -= this_len; 878 } 879 880 return i; 881 } 882 883 /* Add args to scatter-gather list and return number of elements used */ 884 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 885 struct fuse_req *req, 886 struct fuse_arg *args, 887 unsigned int numargs, 888 bool argpages, 889 void *argbuf, 890 unsigned int *len_used) 891 { 892 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 893 unsigned int total_sgs = 0; 894 unsigned int len; 895 896 len = fuse_len_args(numargs - argpages, args); 897 if (len) 898 sg_init_one(&sg[total_sgs++], argbuf, len); 899 900 if (argpages) 901 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 902 ap->pages, ap->descs, 903 ap->num_pages, 904 args[numargs - 1].size); 905 906 if (len_used) 907 *len_used = len; 908 909 return total_sgs; 910 } 911 912 /* Add a request to a virtqueue and kick the device */ 913 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 914 struct fuse_req *req, bool in_flight) 915 { 916 /* requests need at least 4 elements */ 917 struct scatterlist *stack_sgs[6]; 918 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 919 struct scatterlist **sgs = stack_sgs; 920 struct scatterlist *sg = stack_sg; 921 struct virtqueue *vq; 922 struct fuse_args *args = req->args; 923 unsigned int argbuf_used = 0; 924 unsigned int out_sgs = 0; 925 unsigned int in_sgs = 0; 926 unsigned int total_sgs; 927 unsigned int i; 928 int ret; 929 bool notify; 930 struct fuse_pqueue *fpq; 931 932 /* Does the sglist fit on the stack? */ 933 total_sgs = sg_count_fuse_req(req); 934 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 935 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 936 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 937 if (!sgs || !sg) { 938 ret = -ENOMEM; 939 goto out; 940 } 941 } 942 943 /* Use a bounce buffer since stack args cannot be mapped */ 944 ret = copy_args_to_argbuf(req); 945 if (ret < 0) 946 goto out; 947 948 /* Request elements */ 949 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 950 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 951 (struct fuse_arg *)args->in_args, 952 args->in_numargs, args->in_pages, 953 req->argbuf, &argbuf_used); 954 955 /* Reply elements */ 956 if (test_bit(FR_ISREPLY, &req->flags)) { 957 sg_init_one(&sg[out_sgs + in_sgs++], 958 &req->out.h, sizeof(req->out.h)); 959 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 960 args->out_args, args->out_numargs, 961 args->out_pages, 962 req->argbuf + argbuf_used, NULL); 963 } 964 965 WARN_ON(out_sgs + in_sgs != total_sgs); 966 967 for (i = 0; i < total_sgs; i++) 968 sgs[i] = &sg[i]; 969 970 spin_lock(&fsvq->lock); 971 972 if (!fsvq->connected) { 973 spin_unlock(&fsvq->lock); 974 ret = -ENOTCONN; 975 goto out; 976 } 977 978 vq = fsvq->vq; 979 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 980 if (ret < 0) { 981 spin_unlock(&fsvq->lock); 982 goto out; 983 } 984 985 /* Request successfully sent. */ 986 fpq = &fsvq->fud->pq; 987 spin_lock(&fpq->lock); 988 list_add_tail(&req->list, fpq->processing); 989 spin_unlock(&fpq->lock); 990 set_bit(FR_SENT, &req->flags); 991 /* matches barrier in request_wait_answer() */ 992 smp_mb__after_atomic(); 993 994 if (!in_flight) 995 inc_in_flight_req(fsvq); 996 notify = virtqueue_kick_prepare(vq); 997 998 spin_unlock(&fsvq->lock); 999 1000 if (notify) 1001 virtqueue_notify(vq); 1002 1003 out: 1004 if (ret < 0 && req->argbuf) { 1005 kfree(req->argbuf); 1006 req->argbuf = NULL; 1007 } 1008 if (sgs != stack_sgs) { 1009 kfree(sgs); 1010 kfree(sg); 1011 } 1012 1013 return ret; 1014 } 1015 1016 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 1017 __releases(fiq->lock) 1018 { 1019 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 1020 struct virtio_fs *fs; 1021 struct fuse_req *req; 1022 struct virtio_fs_vq *fsvq; 1023 int ret; 1024 1025 WARN_ON(list_empty(&fiq->pending)); 1026 req = list_last_entry(&fiq->pending, struct fuse_req, list); 1027 clear_bit(FR_PENDING, &req->flags); 1028 list_del_init(&req->list); 1029 WARN_ON(!list_empty(&fiq->pending)); 1030 spin_unlock(&fiq->lock); 1031 1032 fs = fiq->priv; 1033 1034 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 1035 __func__, req->in.h.opcode, req->in.h.unique, 1036 req->in.h.nodeid, req->in.h.len, 1037 fuse_len_args(req->args->out_numargs, req->args->out_args)); 1038 1039 fsvq = &fs->vqs[queue_id]; 1040 ret = virtio_fs_enqueue_req(fsvq, req, false); 1041 if (ret < 0) { 1042 if (ret == -ENOMEM || ret == -ENOSPC) { 1043 /* 1044 * Virtqueue full. Retry submission from worker 1045 * context as we might be holding fc->bg_lock. 1046 */ 1047 spin_lock(&fsvq->lock); 1048 list_add_tail(&req->list, &fsvq->queued_reqs); 1049 inc_in_flight_req(fsvq); 1050 schedule_delayed_work(&fsvq->dispatch_work, 1051 msecs_to_jiffies(1)); 1052 spin_unlock(&fsvq->lock); 1053 return; 1054 } 1055 req->out.h.error = ret; 1056 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1057 1058 /* Can't end request in submission context. Use a worker */ 1059 spin_lock(&fsvq->lock); 1060 list_add_tail(&req->list, &fsvq->end_reqs); 1061 schedule_delayed_work(&fsvq->dispatch_work, 0); 1062 spin_unlock(&fsvq->lock); 1063 return; 1064 } 1065 } 1066 1067 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1068 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1069 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1070 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1071 .release = virtio_fs_fiq_release, 1072 }; 1073 1074 static int virtio_fs_fill_super(struct super_block *sb) 1075 { 1076 struct fuse_conn *fc = get_fuse_conn_super(sb); 1077 struct virtio_fs *fs = fc->iq.priv; 1078 unsigned int i; 1079 int err; 1080 struct fuse_fs_context ctx = { 1081 .rootmode = S_IFDIR, 1082 .default_permissions = 1, 1083 .allow_other = 1, 1084 .max_read = UINT_MAX, 1085 .blksize = 512, 1086 .destroy = true, 1087 .no_control = true, 1088 .no_force_umount = true, 1089 .no_mount_options = true, 1090 }; 1091 1092 mutex_lock(&virtio_fs_mutex); 1093 1094 /* After holding mutex, make sure virtiofs device is still there. 1095 * Though we are holding a reference to it, drive ->remove might 1096 * still have cleaned up virtual queues. In that case bail out. 1097 */ 1098 err = -EINVAL; 1099 if (list_empty(&fs->list)) { 1100 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1101 goto err; 1102 } 1103 1104 err = -ENOMEM; 1105 /* Allocate fuse_dev for hiprio and notification queues */ 1106 for (i = 0; i < fs->nvqs; i++) { 1107 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1108 1109 fsvq->fud = fuse_dev_alloc(); 1110 if (!fsvq->fud) 1111 goto err_free_fuse_devs; 1112 } 1113 1114 /* virtiofs allocates and installs its own fuse devices */ 1115 ctx.fudptr = NULL; 1116 err = fuse_fill_super_common(sb, &ctx); 1117 if (err < 0) 1118 goto err_free_fuse_devs; 1119 1120 for (i = 0; i < fs->nvqs; i++) { 1121 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1122 1123 fuse_dev_install(fsvq->fud, fc); 1124 } 1125 1126 /* Previous unmount will stop all queues. Start these again */ 1127 virtio_fs_start_all_queues(fs); 1128 fuse_send_init(fc); 1129 mutex_unlock(&virtio_fs_mutex); 1130 return 0; 1131 1132 err_free_fuse_devs: 1133 virtio_fs_free_devs(fs); 1134 err: 1135 mutex_unlock(&virtio_fs_mutex); 1136 return err; 1137 } 1138 1139 static void virtio_kill_sb(struct super_block *sb) 1140 { 1141 struct fuse_conn *fc = get_fuse_conn_super(sb); 1142 struct virtio_fs *vfs; 1143 struct virtio_fs_vq *fsvq; 1144 1145 /* If mount failed, we can still be called without any fc */ 1146 if (!fc) 1147 return fuse_kill_sb_anon(sb); 1148 1149 vfs = fc->iq.priv; 1150 fsvq = &vfs->vqs[VQ_HIPRIO]; 1151 1152 /* Stop forget queue. Soon destroy will be sent */ 1153 spin_lock(&fsvq->lock); 1154 fsvq->connected = false; 1155 spin_unlock(&fsvq->lock); 1156 virtio_fs_drain_all_queues(vfs); 1157 1158 fuse_kill_sb_anon(sb); 1159 1160 /* fuse_kill_sb_anon() must have sent destroy. Stop all queues 1161 * and drain one more time and free fuse devices. Freeing fuse 1162 * devices will drop their reference on fuse_conn and that in 1163 * turn will drop its reference on virtio_fs object. 1164 */ 1165 virtio_fs_stop_all_queues(vfs); 1166 virtio_fs_drain_all_queues(vfs); 1167 virtio_fs_free_devs(vfs); 1168 } 1169 1170 static int virtio_fs_test_super(struct super_block *sb, 1171 struct fs_context *fsc) 1172 { 1173 struct fuse_conn *fc = fsc->s_fs_info; 1174 1175 return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv; 1176 } 1177 1178 static int virtio_fs_set_super(struct super_block *sb, 1179 struct fs_context *fsc) 1180 { 1181 int err; 1182 1183 err = get_anon_bdev(&sb->s_dev); 1184 if (!err) 1185 fuse_conn_get(fsc->s_fs_info); 1186 1187 return err; 1188 } 1189 1190 static int virtio_fs_get_tree(struct fs_context *fsc) 1191 { 1192 struct virtio_fs *fs; 1193 struct super_block *sb; 1194 struct fuse_conn *fc; 1195 int err; 1196 1197 /* This gets a reference on virtio_fs object. This ptr gets installed 1198 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1199 * to drop the reference to this object. 1200 */ 1201 fs = virtio_fs_find_instance(fsc->source); 1202 if (!fs) { 1203 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1204 return -EINVAL; 1205 } 1206 1207 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1208 if (!fc) { 1209 mutex_lock(&virtio_fs_mutex); 1210 virtio_fs_put(fs); 1211 mutex_unlock(&virtio_fs_mutex); 1212 return -ENOMEM; 1213 } 1214 1215 fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops, 1216 fs); 1217 fc->release = fuse_free_conn; 1218 fc->delete_stale = true; 1219 1220 fsc->s_fs_info = fc; 1221 sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); 1222 fuse_conn_put(fc); 1223 if (IS_ERR(sb)) 1224 return PTR_ERR(sb); 1225 1226 if (!sb->s_root) { 1227 err = virtio_fs_fill_super(sb); 1228 if (err) { 1229 deactivate_locked_super(sb); 1230 return err; 1231 } 1232 1233 sb->s_flags |= SB_ACTIVE; 1234 } 1235 1236 WARN_ON(fsc->root); 1237 fsc->root = dget(sb->s_root); 1238 return 0; 1239 } 1240 1241 static const struct fs_context_operations virtio_fs_context_ops = { 1242 .get_tree = virtio_fs_get_tree, 1243 }; 1244 1245 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1246 { 1247 fsc->ops = &virtio_fs_context_ops; 1248 return 0; 1249 } 1250 1251 static struct file_system_type virtio_fs_type = { 1252 .owner = THIS_MODULE, 1253 .name = "virtiofs", 1254 .init_fs_context = virtio_fs_init_fs_context, 1255 .kill_sb = virtio_kill_sb, 1256 }; 1257 1258 static int __init virtio_fs_init(void) 1259 { 1260 int ret; 1261 1262 ret = register_virtio_driver(&virtio_fs_driver); 1263 if (ret < 0) 1264 return ret; 1265 1266 ret = register_filesystem(&virtio_fs_type); 1267 if (ret < 0) { 1268 unregister_virtio_driver(&virtio_fs_driver); 1269 return ret; 1270 } 1271 1272 return 0; 1273 } 1274 module_init(virtio_fs_init); 1275 1276 static void __exit virtio_fs_exit(void) 1277 { 1278 unregister_filesystem(&virtio_fs_type); 1279 unregister_virtio_driver(&virtio_fs_driver); 1280 } 1281 module_exit(virtio_fs_exit); 1282 1283 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1284 MODULE_DESCRIPTION("Virtio Filesystem"); 1285 MODULE_LICENSE("GPL"); 1286 MODULE_ALIAS_FS(KBUILD_MODNAME); 1287 MODULE_DEVICE_TABLE(virtio, id_table); 1288