1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/dax.h> 9 #include <linux/pci.h> 10 #include <linux/pfn_t.h> 11 #include <linux/module.h> 12 #include <linux/virtio.h> 13 #include <linux/virtio_fs.h> 14 #include <linux/delay.h> 15 #include <linux/fs_context.h> 16 #include <linux/fs_parser.h> 17 #include <linux/highmem.h> 18 #include <linux/uio.h> 19 #include "fuse_i.h" 20 21 /* Used to help calculate the FUSE connection's max_pages limit for a request's 22 * size. Parts of the struct fuse_req are sliced into scattergather lists in 23 * addition to the pages used, so this can help account for that overhead. 24 */ 25 #define FUSE_HEADER_OVERHEAD 4 26 27 /* List of virtio-fs device instances and a lock for the list. Also provides 28 * mutual exclusion in device removal and mounting path 29 */ 30 static DEFINE_MUTEX(virtio_fs_mutex); 31 static LIST_HEAD(virtio_fs_instances); 32 33 enum { 34 VQ_HIPRIO, 35 VQ_REQUEST 36 }; 37 38 #define VQ_NAME_LEN 24 39 40 /* Per-virtqueue state */ 41 struct virtio_fs_vq { 42 spinlock_t lock; 43 struct virtqueue *vq; /* protected by ->lock */ 44 struct work_struct done_work; 45 struct list_head queued_reqs; 46 struct list_head end_reqs; /* End these requests */ 47 struct delayed_work dispatch_work; 48 struct fuse_dev *fud; 49 bool connected; 50 long in_flight; 51 struct completion in_flight_zero; /* No inflight requests */ 52 char name[VQ_NAME_LEN]; 53 } ____cacheline_aligned_in_smp; 54 55 /* A virtio-fs device instance */ 56 struct virtio_fs { 57 struct kref refcount; 58 struct list_head list; /* on virtio_fs_instances */ 59 char *tag; 60 struct virtio_fs_vq *vqs; 61 unsigned int nvqs; /* number of virtqueues */ 62 unsigned int num_request_queues; /* number of request queues */ 63 struct dax_device *dax_dev; 64 65 /* DAX memory window where file contents are mapped */ 66 void *window_kaddr; 67 phys_addr_t window_phys_addr; 68 size_t window_len; 69 }; 70 71 struct virtio_fs_forget_req { 72 struct fuse_in_header ih; 73 struct fuse_forget_in arg; 74 }; 75 76 struct virtio_fs_forget { 77 /* This request can be temporarily queued on virt queue */ 78 struct list_head list; 79 struct virtio_fs_forget_req req; 80 }; 81 82 struct virtio_fs_req_work { 83 struct fuse_req *req; 84 struct virtio_fs_vq *fsvq; 85 struct work_struct done_work; 86 }; 87 88 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 89 struct fuse_req *req, bool in_flight); 90 91 enum { 92 OPT_DAX, 93 }; 94 95 static const struct fs_parameter_spec virtio_fs_parameters[] = { 96 fsparam_flag("dax", OPT_DAX), 97 {} 98 }; 99 100 static int virtio_fs_parse_param(struct fs_context *fc, 101 struct fs_parameter *param) 102 { 103 struct fs_parse_result result; 104 struct fuse_fs_context *ctx = fc->fs_private; 105 int opt; 106 107 opt = fs_parse(fc, virtio_fs_parameters, param, &result); 108 if (opt < 0) 109 return opt; 110 111 switch (opt) { 112 case OPT_DAX: 113 ctx->dax = 1; 114 break; 115 default: 116 return -EINVAL; 117 } 118 119 return 0; 120 } 121 122 static void virtio_fs_free_fc(struct fs_context *fc) 123 { 124 struct fuse_fs_context *ctx = fc->fs_private; 125 126 kfree(ctx); 127 } 128 129 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 130 { 131 struct virtio_fs *fs = vq->vdev->priv; 132 133 return &fs->vqs[vq->index]; 134 } 135 136 /* Should be called with fsvq->lock held. */ 137 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 138 { 139 fsvq->in_flight++; 140 } 141 142 /* Should be called with fsvq->lock held. */ 143 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 144 { 145 WARN_ON(fsvq->in_flight <= 0); 146 fsvq->in_flight--; 147 if (!fsvq->in_flight) 148 complete(&fsvq->in_flight_zero); 149 } 150 151 static void release_virtio_fs_obj(struct kref *ref) 152 { 153 struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); 154 155 kfree(vfs->vqs); 156 kfree(vfs); 157 } 158 159 /* Make sure virtiofs_mutex is held */ 160 static void virtio_fs_put(struct virtio_fs *fs) 161 { 162 kref_put(&fs->refcount, release_virtio_fs_obj); 163 } 164 165 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 166 { 167 struct virtio_fs *vfs = fiq->priv; 168 169 mutex_lock(&virtio_fs_mutex); 170 virtio_fs_put(vfs); 171 mutex_unlock(&virtio_fs_mutex); 172 } 173 174 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 175 { 176 WARN_ON(fsvq->in_flight < 0); 177 178 /* Wait for in flight requests to finish.*/ 179 spin_lock(&fsvq->lock); 180 if (fsvq->in_flight) { 181 /* We are holding virtio_fs_mutex. There should not be any 182 * waiters waiting for completion. 183 */ 184 reinit_completion(&fsvq->in_flight_zero); 185 spin_unlock(&fsvq->lock); 186 wait_for_completion(&fsvq->in_flight_zero); 187 } else { 188 spin_unlock(&fsvq->lock); 189 } 190 191 flush_work(&fsvq->done_work); 192 flush_delayed_work(&fsvq->dispatch_work); 193 } 194 195 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 196 { 197 struct virtio_fs_vq *fsvq; 198 int i; 199 200 for (i = 0; i < fs->nvqs; i++) { 201 fsvq = &fs->vqs[i]; 202 virtio_fs_drain_queue(fsvq); 203 } 204 } 205 206 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 207 { 208 /* Provides mutual exclusion between ->remove and ->kill_sb 209 * paths. We don't want both of these draining queue at the 210 * same time. Current completion logic reinits completion 211 * and that means there should not be any other thread 212 * doing reinit or waiting for completion already. 213 */ 214 mutex_lock(&virtio_fs_mutex); 215 virtio_fs_drain_all_queues_locked(fs); 216 mutex_unlock(&virtio_fs_mutex); 217 } 218 219 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 220 { 221 struct virtio_fs_vq *fsvq; 222 int i; 223 224 for (i = 0; i < fs->nvqs; i++) { 225 fsvq = &fs->vqs[i]; 226 spin_lock(&fsvq->lock); 227 fsvq->connected = true; 228 spin_unlock(&fsvq->lock); 229 } 230 } 231 232 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 233 static int virtio_fs_add_instance(struct virtio_fs *fs) 234 { 235 struct virtio_fs *fs2; 236 bool duplicate = false; 237 238 mutex_lock(&virtio_fs_mutex); 239 240 list_for_each_entry(fs2, &virtio_fs_instances, list) { 241 if (strcmp(fs->tag, fs2->tag) == 0) 242 duplicate = true; 243 } 244 245 if (!duplicate) 246 list_add_tail(&fs->list, &virtio_fs_instances); 247 248 mutex_unlock(&virtio_fs_mutex); 249 250 if (duplicate) 251 return -EEXIST; 252 return 0; 253 } 254 255 /* Return the virtio_fs with a given tag, or NULL */ 256 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 257 { 258 struct virtio_fs *fs; 259 260 mutex_lock(&virtio_fs_mutex); 261 262 list_for_each_entry(fs, &virtio_fs_instances, list) { 263 if (strcmp(fs->tag, tag) == 0) { 264 kref_get(&fs->refcount); 265 goto found; 266 } 267 } 268 269 fs = NULL; /* not found */ 270 271 found: 272 mutex_unlock(&virtio_fs_mutex); 273 274 return fs; 275 } 276 277 static void virtio_fs_free_devs(struct virtio_fs *fs) 278 { 279 unsigned int i; 280 281 for (i = 0; i < fs->nvqs; i++) { 282 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 283 284 if (!fsvq->fud) 285 continue; 286 287 fuse_dev_free(fsvq->fud); 288 fsvq->fud = NULL; 289 } 290 } 291 292 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 293 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 294 { 295 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 296 char *end; 297 size_t len; 298 299 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 300 &tag_buf, sizeof(tag_buf)); 301 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 302 if (end == tag_buf) 303 return -EINVAL; /* empty tag */ 304 if (!end) 305 end = &tag_buf[sizeof(tag_buf)]; 306 307 len = end - tag_buf; 308 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 309 if (!fs->tag) 310 return -ENOMEM; 311 memcpy(fs->tag, tag_buf, len); 312 fs->tag[len] = '\0'; 313 return 0; 314 } 315 316 /* Work function for hiprio completion */ 317 static void virtio_fs_hiprio_done_work(struct work_struct *work) 318 { 319 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 320 done_work); 321 struct virtqueue *vq = fsvq->vq; 322 323 /* Free completed FUSE_FORGET requests */ 324 spin_lock(&fsvq->lock); 325 do { 326 unsigned int len; 327 void *req; 328 329 virtqueue_disable_cb(vq); 330 331 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 332 kfree(req); 333 dec_in_flight_req(fsvq); 334 } 335 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 336 spin_unlock(&fsvq->lock); 337 } 338 339 static void virtio_fs_request_dispatch_work(struct work_struct *work) 340 { 341 struct fuse_req *req; 342 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 343 dispatch_work.work); 344 int ret; 345 346 pr_debug("virtio-fs: worker %s called.\n", __func__); 347 while (1) { 348 spin_lock(&fsvq->lock); 349 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 350 list); 351 if (!req) { 352 spin_unlock(&fsvq->lock); 353 break; 354 } 355 356 list_del_init(&req->list); 357 spin_unlock(&fsvq->lock); 358 fuse_request_end(req); 359 } 360 361 /* Dispatch pending requests */ 362 while (1) { 363 spin_lock(&fsvq->lock); 364 req = list_first_entry_or_null(&fsvq->queued_reqs, 365 struct fuse_req, list); 366 if (!req) { 367 spin_unlock(&fsvq->lock); 368 return; 369 } 370 list_del_init(&req->list); 371 spin_unlock(&fsvq->lock); 372 373 ret = virtio_fs_enqueue_req(fsvq, req, true); 374 if (ret < 0) { 375 if (ret == -ENOMEM || ret == -ENOSPC) { 376 spin_lock(&fsvq->lock); 377 list_add_tail(&req->list, &fsvq->queued_reqs); 378 schedule_delayed_work(&fsvq->dispatch_work, 379 msecs_to_jiffies(1)); 380 spin_unlock(&fsvq->lock); 381 return; 382 } 383 req->out.h.error = ret; 384 spin_lock(&fsvq->lock); 385 dec_in_flight_req(fsvq); 386 spin_unlock(&fsvq->lock); 387 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 388 ret); 389 fuse_request_end(req); 390 } 391 } 392 } 393 394 /* 395 * Returns 1 if queue is full and sender should wait a bit before sending 396 * next request, 0 otherwise. 397 */ 398 static int send_forget_request(struct virtio_fs_vq *fsvq, 399 struct virtio_fs_forget *forget, 400 bool in_flight) 401 { 402 struct scatterlist sg; 403 struct virtqueue *vq; 404 int ret = 0; 405 bool notify; 406 struct virtio_fs_forget_req *req = &forget->req; 407 408 spin_lock(&fsvq->lock); 409 if (!fsvq->connected) { 410 if (in_flight) 411 dec_in_flight_req(fsvq); 412 kfree(forget); 413 goto out; 414 } 415 416 sg_init_one(&sg, req, sizeof(*req)); 417 vq = fsvq->vq; 418 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 419 420 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 421 if (ret < 0) { 422 if (ret == -ENOMEM || ret == -ENOSPC) { 423 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 424 ret); 425 list_add_tail(&forget->list, &fsvq->queued_reqs); 426 schedule_delayed_work(&fsvq->dispatch_work, 427 msecs_to_jiffies(1)); 428 if (!in_flight) 429 inc_in_flight_req(fsvq); 430 /* Queue is full */ 431 ret = 1; 432 } else { 433 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 434 ret); 435 kfree(forget); 436 if (in_flight) 437 dec_in_flight_req(fsvq); 438 } 439 goto out; 440 } 441 442 if (!in_flight) 443 inc_in_flight_req(fsvq); 444 notify = virtqueue_kick_prepare(vq); 445 spin_unlock(&fsvq->lock); 446 447 if (notify) 448 virtqueue_notify(vq); 449 return ret; 450 out: 451 spin_unlock(&fsvq->lock); 452 return ret; 453 } 454 455 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 456 { 457 struct virtio_fs_forget *forget; 458 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 459 dispatch_work.work); 460 pr_debug("virtio-fs: worker %s called.\n", __func__); 461 while (1) { 462 spin_lock(&fsvq->lock); 463 forget = list_first_entry_or_null(&fsvq->queued_reqs, 464 struct virtio_fs_forget, list); 465 if (!forget) { 466 spin_unlock(&fsvq->lock); 467 return; 468 } 469 470 list_del(&forget->list); 471 spin_unlock(&fsvq->lock); 472 if (send_forget_request(fsvq, forget, true)) 473 return; 474 } 475 } 476 477 /* Allocate and copy args into req->argbuf */ 478 static int copy_args_to_argbuf(struct fuse_req *req) 479 { 480 struct fuse_args *args = req->args; 481 unsigned int offset = 0; 482 unsigned int num_in; 483 unsigned int num_out; 484 unsigned int len; 485 unsigned int i; 486 487 num_in = args->in_numargs - args->in_pages; 488 num_out = args->out_numargs - args->out_pages; 489 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 490 fuse_len_args(num_out, args->out_args); 491 492 req->argbuf = kmalloc(len, GFP_ATOMIC); 493 if (!req->argbuf) 494 return -ENOMEM; 495 496 for (i = 0; i < num_in; i++) { 497 memcpy(req->argbuf + offset, 498 args->in_args[i].value, 499 args->in_args[i].size); 500 offset += args->in_args[i].size; 501 } 502 503 return 0; 504 } 505 506 /* Copy args out of and free req->argbuf */ 507 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 508 { 509 unsigned int remaining; 510 unsigned int offset; 511 unsigned int num_in; 512 unsigned int num_out; 513 unsigned int i; 514 515 remaining = req->out.h.len - sizeof(req->out.h); 516 num_in = args->in_numargs - args->in_pages; 517 num_out = args->out_numargs - args->out_pages; 518 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 519 520 for (i = 0; i < num_out; i++) { 521 unsigned int argsize = args->out_args[i].size; 522 523 if (args->out_argvar && 524 i == args->out_numargs - 1 && 525 argsize > remaining) { 526 argsize = remaining; 527 } 528 529 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 530 offset += argsize; 531 532 if (i != args->out_numargs - 1) 533 remaining -= argsize; 534 } 535 536 /* Store the actual size of the variable-length arg */ 537 if (args->out_argvar) 538 args->out_args[args->out_numargs - 1].size = remaining; 539 540 kfree(req->argbuf); 541 req->argbuf = NULL; 542 } 543 544 /* Work function for request completion */ 545 static void virtio_fs_request_complete(struct fuse_req *req, 546 struct virtio_fs_vq *fsvq) 547 { 548 struct fuse_pqueue *fpq = &fsvq->fud->pq; 549 struct fuse_args *args; 550 struct fuse_args_pages *ap; 551 unsigned int len, i, thislen; 552 struct page *page; 553 554 /* 555 * TODO verify that server properly follows FUSE protocol 556 * (oh.uniq, oh.len) 557 */ 558 args = req->args; 559 copy_args_from_argbuf(args, req); 560 561 if (args->out_pages && args->page_zeroing) { 562 len = args->out_args[args->out_numargs - 1].size; 563 ap = container_of(args, typeof(*ap), args); 564 for (i = 0; i < ap->num_pages; i++) { 565 thislen = ap->descs[i].length; 566 if (len < thislen) { 567 WARN_ON(ap->descs[i].offset); 568 page = ap->pages[i]; 569 zero_user_segment(page, len, thislen); 570 len = 0; 571 } else { 572 len -= thislen; 573 } 574 } 575 } 576 577 spin_lock(&fpq->lock); 578 clear_bit(FR_SENT, &req->flags); 579 spin_unlock(&fpq->lock); 580 581 fuse_request_end(req); 582 spin_lock(&fsvq->lock); 583 dec_in_flight_req(fsvq); 584 spin_unlock(&fsvq->lock); 585 } 586 587 static void virtio_fs_complete_req_work(struct work_struct *work) 588 { 589 struct virtio_fs_req_work *w = 590 container_of(work, typeof(*w), done_work); 591 592 virtio_fs_request_complete(w->req, w->fsvq); 593 kfree(w); 594 } 595 596 static void virtio_fs_requests_done_work(struct work_struct *work) 597 { 598 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 599 done_work); 600 struct fuse_pqueue *fpq = &fsvq->fud->pq; 601 struct virtqueue *vq = fsvq->vq; 602 struct fuse_req *req; 603 struct fuse_req *next; 604 unsigned int len; 605 LIST_HEAD(reqs); 606 607 /* Collect completed requests off the virtqueue */ 608 spin_lock(&fsvq->lock); 609 do { 610 virtqueue_disable_cb(vq); 611 612 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 613 spin_lock(&fpq->lock); 614 list_move_tail(&req->list, &reqs); 615 spin_unlock(&fpq->lock); 616 } 617 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 618 spin_unlock(&fsvq->lock); 619 620 /* End requests */ 621 list_for_each_entry_safe(req, next, &reqs, list) { 622 list_del_init(&req->list); 623 624 /* blocking async request completes in a worker context */ 625 if (req->args->may_block) { 626 struct virtio_fs_req_work *w; 627 628 w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); 629 INIT_WORK(&w->done_work, virtio_fs_complete_req_work); 630 w->fsvq = fsvq; 631 w->req = req; 632 schedule_work(&w->done_work); 633 } else { 634 virtio_fs_request_complete(req, fsvq); 635 } 636 } 637 } 638 639 /* Virtqueue interrupt handler */ 640 static void virtio_fs_vq_done(struct virtqueue *vq) 641 { 642 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 643 644 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 645 646 schedule_work(&fsvq->done_work); 647 } 648 649 static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 650 int vq_type) 651 { 652 strncpy(fsvq->name, name, VQ_NAME_LEN); 653 spin_lock_init(&fsvq->lock); 654 INIT_LIST_HEAD(&fsvq->queued_reqs); 655 INIT_LIST_HEAD(&fsvq->end_reqs); 656 init_completion(&fsvq->in_flight_zero); 657 658 if (vq_type == VQ_REQUEST) { 659 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 660 INIT_DELAYED_WORK(&fsvq->dispatch_work, 661 virtio_fs_request_dispatch_work); 662 } else { 663 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 664 INIT_DELAYED_WORK(&fsvq->dispatch_work, 665 virtio_fs_hiprio_dispatch_work); 666 } 667 } 668 669 /* Initialize virtqueues */ 670 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 671 struct virtio_fs *fs) 672 { 673 struct virtqueue **vqs; 674 vq_callback_t **callbacks; 675 const char **names; 676 unsigned int i; 677 int ret = 0; 678 679 virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, 680 &fs->num_request_queues); 681 if (fs->num_request_queues == 0) 682 return -EINVAL; 683 684 fs->nvqs = VQ_REQUEST + fs->num_request_queues; 685 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 686 if (!fs->vqs) 687 return -ENOMEM; 688 689 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 690 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 691 GFP_KERNEL); 692 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 693 if (!vqs || !callbacks || !names) { 694 ret = -ENOMEM; 695 goto out; 696 } 697 698 /* Initialize the hiprio/forget request virtqueue */ 699 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 700 virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 701 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 702 703 /* Initialize the requests virtqueues */ 704 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 705 char vq_name[VQ_NAME_LEN]; 706 707 snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 708 virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 709 callbacks[i] = virtio_fs_vq_done; 710 names[i] = fs->vqs[i].name; 711 } 712 713 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 714 if (ret < 0) 715 goto out; 716 717 for (i = 0; i < fs->nvqs; i++) 718 fs->vqs[i].vq = vqs[i]; 719 720 virtio_fs_start_all_queues(fs); 721 out: 722 kfree(names); 723 kfree(callbacks); 724 kfree(vqs); 725 if (ret) 726 kfree(fs->vqs); 727 return ret; 728 } 729 730 /* Free virtqueues (device must already be reset) */ 731 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, 732 struct virtio_fs *fs) 733 { 734 vdev->config->del_vqs(vdev); 735 } 736 737 /* Map a window offset to a page frame number. The window offset will have 738 * been produced by .iomap_begin(), which maps a file offset to a window 739 * offset. 740 */ 741 static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 742 long nr_pages, void **kaddr, pfn_t *pfn) 743 { 744 struct virtio_fs *fs = dax_get_private(dax_dev); 745 phys_addr_t offset = PFN_PHYS(pgoff); 746 size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff; 747 748 if (kaddr) 749 *kaddr = fs->window_kaddr + offset; 750 if (pfn) 751 *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 752 PFN_DEV | PFN_MAP); 753 return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 754 } 755 756 static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, 757 pgoff_t pgoff, void *addr, 758 size_t bytes, struct iov_iter *i) 759 { 760 return copy_from_iter(addr, bytes, i); 761 } 762 763 static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, 764 pgoff_t pgoff, void *addr, 765 size_t bytes, struct iov_iter *i) 766 { 767 return copy_to_iter(addr, bytes, i); 768 } 769 770 static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 771 pgoff_t pgoff, size_t nr_pages) 772 { 773 long rc; 774 void *kaddr; 775 776 rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); 777 if (rc < 0) 778 return rc; 779 memset(kaddr, 0, nr_pages << PAGE_SHIFT); 780 dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 781 return 0; 782 } 783 784 static const struct dax_operations virtio_fs_dax_ops = { 785 .direct_access = virtio_fs_direct_access, 786 .copy_from_iter = virtio_fs_copy_from_iter, 787 .copy_to_iter = virtio_fs_copy_to_iter, 788 .zero_page_range = virtio_fs_zero_page_range, 789 }; 790 791 static void virtio_fs_cleanup_dax(void *data) 792 { 793 struct dax_device *dax_dev = data; 794 795 kill_dax(dax_dev); 796 put_dax(dax_dev); 797 } 798 799 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 800 { 801 struct virtio_shm_region cache_reg; 802 struct dev_pagemap *pgmap; 803 bool have_cache; 804 805 if (!IS_ENABLED(CONFIG_FUSE_DAX)) 806 return 0; 807 808 /* Get cache region */ 809 have_cache = virtio_get_shm_region(vdev, &cache_reg, 810 (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 811 if (!have_cache) { 812 dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 813 return 0; 814 } 815 816 if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 817 dev_name(&vdev->dev))) { 818 dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 819 cache_reg.addr, cache_reg.len); 820 return -EBUSY; 821 } 822 823 dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 824 cache_reg.addr); 825 826 pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 827 if (!pgmap) 828 return -ENOMEM; 829 830 pgmap->type = MEMORY_DEVICE_FS_DAX; 831 832 /* Ideally we would directly use the PCI BAR resource but 833 * devm_memremap_pages() wants its own copy in pgmap. So 834 * initialize a struct resource from scratch (only the start 835 * and end fields will be used). 836 */ 837 pgmap->range = (struct range) { 838 .start = (phys_addr_t) cache_reg.addr, 839 .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 840 }; 841 pgmap->nr_range = 1; 842 843 fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 844 if (IS_ERR(fs->window_kaddr)) 845 return PTR_ERR(fs->window_kaddr); 846 847 fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 848 fs->window_len = (phys_addr_t) cache_reg.len; 849 850 dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 851 __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 852 853 fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); 854 if (IS_ERR(fs->dax_dev)) 855 return PTR_ERR(fs->dax_dev); 856 857 return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 858 fs->dax_dev); 859 } 860 861 static int virtio_fs_probe(struct virtio_device *vdev) 862 { 863 struct virtio_fs *fs; 864 int ret; 865 866 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 867 if (!fs) 868 return -ENOMEM; 869 kref_init(&fs->refcount); 870 vdev->priv = fs; 871 872 ret = virtio_fs_read_tag(vdev, fs); 873 if (ret < 0) 874 goto out; 875 876 ret = virtio_fs_setup_vqs(vdev, fs); 877 if (ret < 0) 878 goto out; 879 880 /* TODO vq affinity */ 881 882 ret = virtio_fs_setup_dax(vdev, fs); 883 if (ret < 0) 884 goto out_vqs; 885 886 /* Bring the device online in case the filesystem is mounted and 887 * requests need to be sent before we return. 888 */ 889 virtio_device_ready(vdev); 890 891 ret = virtio_fs_add_instance(fs); 892 if (ret < 0) 893 goto out_vqs; 894 895 return 0; 896 897 out_vqs: 898 vdev->config->reset(vdev); 899 virtio_fs_cleanup_vqs(vdev, fs); 900 kfree(fs->vqs); 901 902 out: 903 vdev->priv = NULL; 904 kfree(fs); 905 return ret; 906 } 907 908 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 909 { 910 struct virtio_fs_vq *fsvq; 911 int i; 912 913 for (i = 0; i < fs->nvqs; i++) { 914 fsvq = &fs->vqs[i]; 915 spin_lock(&fsvq->lock); 916 fsvq->connected = false; 917 spin_unlock(&fsvq->lock); 918 } 919 } 920 921 static void virtio_fs_remove(struct virtio_device *vdev) 922 { 923 struct virtio_fs *fs = vdev->priv; 924 925 mutex_lock(&virtio_fs_mutex); 926 /* This device is going away. No one should get new reference */ 927 list_del_init(&fs->list); 928 virtio_fs_stop_all_queues(fs); 929 virtio_fs_drain_all_queues_locked(fs); 930 vdev->config->reset(vdev); 931 virtio_fs_cleanup_vqs(vdev, fs); 932 933 vdev->priv = NULL; 934 /* Put device reference on virtio_fs object */ 935 virtio_fs_put(fs); 936 mutex_unlock(&virtio_fs_mutex); 937 } 938 939 #ifdef CONFIG_PM_SLEEP 940 static int virtio_fs_freeze(struct virtio_device *vdev) 941 { 942 /* TODO need to save state here */ 943 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 944 return -EOPNOTSUPP; 945 } 946 947 static int virtio_fs_restore(struct virtio_device *vdev) 948 { 949 /* TODO need to restore state here */ 950 return 0; 951 } 952 #endif /* CONFIG_PM_SLEEP */ 953 954 static const struct virtio_device_id id_table[] = { 955 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 956 {}, 957 }; 958 959 static const unsigned int feature_table[] = {}; 960 961 static struct virtio_driver virtio_fs_driver = { 962 .driver.name = KBUILD_MODNAME, 963 .driver.owner = THIS_MODULE, 964 .id_table = id_table, 965 .feature_table = feature_table, 966 .feature_table_size = ARRAY_SIZE(feature_table), 967 .probe = virtio_fs_probe, 968 .remove = virtio_fs_remove, 969 #ifdef CONFIG_PM_SLEEP 970 .freeze = virtio_fs_freeze, 971 .restore = virtio_fs_restore, 972 #endif 973 }; 974 975 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 976 __releases(fiq->lock) 977 { 978 struct fuse_forget_link *link; 979 struct virtio_fs_forget *forget; 980 struct virtio_fs_forget_req *req; 981 struct virtio_fs *fs; 982 struct virtio_fs_vq *fsvq; 983 u64 unique; 984 985 link = fuse_dequeue_forget(fiq, 1, NULL); 986 unique = fuse_get_unique(fiq); 987 988 fs = fiq->priv; 989 fsvq = &fs->vqs[VQ_HIPRIO]; 990 spin_unlock(&fiq->lock); 991 992 /* Allocate a buffer for the request */ 993 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 994 req = &forget->req; 995 996 req->ih = (struct fuse_in_header){ 997 .opcode = FUSE_FORGET, 998 .nodeid = link->forget_one.nodeid, 999 .unique = unique, 1000 .len = sizeof(*req), 1001 }; 1002 req->arg = (struct fuse_forget_in){ 1003 .nlookup = link->forget_one.nlookup, 1004 }; 1005 1006 send_forget_request(fsvq, forget, false); 1007 kfree(link); 1008 } 1009 1010 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 1011 __releases(fiq->lock) 1012 { 1013 /* 1014 * TODO interrupts. 1015 * 1016 * Normal fs operations on a local filesystems aren't interruptible. 1017 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 1018 * with shared lock between host and guest. 1019 */ 1020 spin_unlock(&fiq->lock); 1021 } 1022 1023 /* Count number of scatter-gather elements required */ 1024 static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, 1025 unsigned int num_pages, 1026 unsigned int total_len) 1027 { 1028 unsigned int i; 1029 unsigned int this_len; 1030 1031 for (i = 0; i < num_pages && total_len; i++) { 1032 this_len = min(page_descs[i].length, total_len); 1033 total_len -= this_len; 1034 } 1035 1036 return i; 1037 } 1038 1039 /* Return the number of scatter-gather list elements required */ 1040 static unsigned int sg_count_fuse_req(struct fuse_req *req) 1041 { 1042 struct fuse_args *args = req->args; 1043 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1044 unsigned int size, total_sgs = 1 /* fuse_in_header */; 1045 1046 if (args->in_numargs - args->in_pages) 1047 total_sgs += 1; 1048 1049 if (args->in_pages) { 1050 size = args->in_args[args->in_numargs - 1].size; 1051 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1052 size); 1053 } 1054 1055 if (!test_bit(FR_ISREPLY, &req->flags)) 1056 return total_sgs; 1057 1058 total_sgs += 1 /* fuse_out_header */; 1059 1060 if (args->out_numargs - args->out_pages) 1061 total_sgs += 1; 1062 1063 if (args->out_pages) { 1064 size = args->out_args[args->out_numargs - 1].size; 1065 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1066 size); 1067 } 1068 1069 return total_sgs; 1070 } 1071 1072 /* Add pages to scatter-gather list and return number of elements used */ 1073 static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 1074 struct page **pages, 1075 struct fuse_page_desc *page_descs, 1076 unsigned int num_pages, 1077 unsigned int total_len) 1078 { 1079 unsigned int i; 1080 unsigned int this_len; 1081 1082 for (i = 0; i < num_pages && total_len; i++) { 1083 sg_init_table(&sg[i], 1); 1084 this_len = min(page_descs[i].length, total_len); 1085 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 1086 total_len -= this_len; 1087 } 1088 1089 return i; 1090 } 1091 1092 /* Add args to scatter-gather list and return number of elements used */ 1093 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 1094 struct fuse_req *req, 1095 struct fuse_arg *args, 1096 unsigned int numargs, 1097 bool argpages, 1098 void *argbuf, 1099 unsigned int *len_used) 1100 { 1101 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 1102 unsigned int total_sgs = 0; 1103 unsigned int len; 1104 1105 len = fuse_len_args(numargs - argpages, args); 1106 if (len) 1107 sg_init_one(&sg[total_sgs++], argbuf, len); 1108 1109 if (argpages) 1110 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 1111 ap->pages, ap->descs, 1112 ap->num_pages, 1113 args[numargs - 1].size); 1114 1115 if (len_used) 1116 *len_used = len; 1117 1118 return total_sgs; 1119 } 1120 1121 /* Add a request to a virtqueue and kick the device */ 1122 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 1123 struct fuse_req *req, bool in_flight) 1124 { 1125 /* requests need at least 4 elements */ 1126 struct scatterlist *stack_sgs[6]; 1127 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 1128 struct scatterlist **sgs = stack_sgs; 1129 struct scatterlist *sg = stack_sg; 1130 struct virtqueue *vq; 1131 struct fuse_args *args = req->args; 1132 unsigned int argbuf_used = 0; 1133 unsigned int out_sgs = 0; 1134 unsigned int in_sgs = 0; 1135 unsigned int total_sgs; 1136 unsigned int i; 1137 int ret; 1138 bool notify; 1139 struct fuse_pqueue *fpq; 1140 1141 /* Does the sglist fit on the stack? */ 1142 total_sgs = sg_count_fuse_req(req); 1143 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 1144 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 1145 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 1146 if (!sgs || !sg) { 1147 ret = -ENOMEM; 1148 goto out; 1149 } 1150 } 1151 1152 /* Use a bounce buffer since stack args cannot be mapped */ 1153 ret = copy_args_to_argbuf(req); 1154 if (ret < 0) 1155 goto out; 1156 1157 /* Request elements */ 1158 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 1159 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 1160 (struct fuse_arg *)args->in_args, 1161 args->in_numargs, args->in_pages, 1162 req->argbuf, &argbuf_used); 1163 1164 /* Reply elements */ 1165 if (test_bit(FR_ISREPLY, &req->flags)) { 1166 sg_init_one(&sg[out_sgs + in_sgs++], 1167 &req->out.h, sizeof(req->out.h)); 1168 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 1169 args->out_args, args->out_numargs, 1170 args->out_pages, 1171 req->argbuf + argbuf_used, NULL); 1172 } 1173 1174 WARN_ON(out_sgs + in_sgs != total_sgs); 1175 1176 for (i = 0; i < total_sgs; i++) 1177 sgs[i] = &sg[i]; 1178 1179 spin_lock(&fsvq->lock); 1180 1181 if (!fsvq->connected) { 1182 spin_unlock(&fsvq->lock); 1183 ret = -ENOTCONN; 1184 goto out; 1185 } 1186 1187 vq = fsvq->vq; 1188 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 1189 if (ret < 0) { 1190 spin_unlock(&fsvq->lock); 1191 goto out; 1192 } 1193 1194 /* Request successfully sent. */ 1195 fpq = &fsvq->fud->pq; 1196 spin_lock(&fpq->lock); 1197 list_add_tail(&req->list, fpq->processing); 1198 spin_unlock(&fpq->lock); 1199 set_bit(FR_SENT, &req->flags); 1200 /* matches barrier in request_wait_answer() */ 1201 smp_mb__after_atomic(); 1202 1203 if (!in_flight) 1204 inc_in_flight_req(fsvq); 1205 notify = virtqueue_kick_prepare(vq); 1206 1207 spin_unlock(&fsvq->lock); 1208 1209 if (notify) 1210 virtqueue_notify(vq); 1211 1212 out: 1213 if (ret < 0 && req->argbuf) { 1214 kfree(req->argbuf); 1215 req->argbuf = NULL; 1216 } 1217 if (sgs != stack_sgs) { 1218 kfree(sgs); 1219 kfree(sg); 1220 } 1221 1222 return ret; 1223 } 1224 1225 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 1226 __releases(fiq->lock) 1227 { 1228 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 1229 struct virtio_fs *fs; 1230 struct fuse_req *req; 1231 struct virtio_fs_vq *fsvq; 1232 int ret; 1233 1234 WARN_ON(list_empty(&fiq->pending)); 1235 req = list_last_entry(&fiq->pending, struct fuse_req, list); 1236 clear_bit(FR_PENDING, &req->flags); 1237 list_del_init(&req->list); 1238 WARN_ON(!list_empty(&fiq->pending)); 1239 spin_unlock(&fiq->lock); 1240 1241 fs = fiq->priv; 1242 1243 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 1244 __func__, req->in.h.opcode, req->in.h.unique, 1245 req->in.h.nodeid, req->in.h.len, 1246 fuse_len_args(req->args->out_numargs, req->args->out_args)); 1247 1248 fsvq = &fs->vqs[queue_id]; 1249 ret = virtio_fs_enqueue_req(fsvq, req, false); 1250 if (ret < 0) { 1251 if (ret == -ENOMEM || ret == -ENOSPC) { 1252 /* 1253 * Virtqueue full. Retry submission from worker 1254 * context as we might be holding fc->bg_lock. 1255 */ 1256 spin_lock(&fsvq->lock); 1257 list_add_tail(&req->list, &fsvq->queued_reqs); 1258 inc_in_flight_req(fsvq); 1259 schedule_delayed_work(&fsvq->dispatch_work, 1260 msecs_to_jiffies(1)); 1261 spin_unlock(&fsvq->lock); 1262 return; 1263 } 1264 req->out.h.error = ret; 1265 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1266 1267 /* Can't end request in submission context. Use a worker */ 1268 spin_lock(&fsvq->lock); 1269 list_add_tail(&req->list, &fsvq->end_reqs); 1270 schedule_delayed_work(&fsvq->dispatch_work, 0); 1271 spin_unlock(&fsvq->lock); 1272 return; 1273 } 1274 } 1275 1276 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1277 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1278 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1279 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1280 .release = virtio_fs_fiq_release, 1281 }; 1282 1283 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1284 { 1285 ctx->rootmode = S_IFDIR; 1286 ctx->default_permissions = 1; 1287 ctx->allow_other = 1; 1288 ctx->max_read = UINT_MAX; 1289 ctx->blksize = 512; 1290 ctx->destroy = true; 1291 ctx->no_control = true; 1292 ctx->no_force_umount = true; 1293 } 1294 1295 static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1296 { 1297 struct fuse_mount *fm = get_fuse_mount_super(sb); 1298 struct fuse_conn *fc = fm->fc; 1299 struct virtio_fs *fs = fc->iq.priv; 1300 struct fuse_fs_context *ctx = fsc->fs_private; 1301 unsigned int i; 1302 int err; 1303 1304 virtio_fs_ctx_set_defaults(ctx); 1305 mutex_lock(&virtio_fs_mutex); 1306 1307 /* After holding mutex, make sure virtiofs device is still there. 1308 * Though we are holding a reference to it, drive ->remove might 1309 * still have cleaned up virtual queues. In that case bail out. 1310 */ 1311 err = -EINVAL; 1312 if (list_empty(&fs->list)) { 1313 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1314 goto err; 1315 } 1316 1317 err = -ENOMEM; 1318 /* Allocate fuse_dev for hiprio and notification queues */ 1319 for (i = 0; i < fs->nvqs; i++) { 1320 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1321 1322 fsvq->fud = fuse_dev_alloc(); 1323 if (!fsvq->fud) 1324 goto err_free_fuse_devs; 1325 } 1326 1327 /* virtiofs allocates and installs its own fuse devices */ 1328 ctx->fudptr = NULL; 1329 if (ctx->dax) { 1330 if (!fs->dax_dev) { 1331 err = -EINVAL; 1332 pr_err("virtio-fs: dax can't be enabled as filesystem" 1333 " device does not support it.\n"); 1334 goto err_free_fuse_devs; 1335 } 1336 ctx->dax_dev = fs->dax_dev; 1337 } 1338 err = fuse_fill_super_common(sb, ctx); 1339 if (err < 0) 1340 goto err_free_fuse_devs; 1341 1342 for (i = 0; i < fs->nvqs; i++) { 1343 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1344 1345 fuse_dev_install(fsvq->fud, fc); 1346 } 1347 1348 /* Previous unmount will stop all queues. Start these again */ 1349 virtio_fs_start_all_queues(fs); 1350 fuse_send_init(fm); 1351 mutex_unlock(&virtio_fs_mutex); 1352 return 0; 1353 1354 err_free_fuse_devs: 1355 virtio_fs_free_devs(fs); 1356 err: 1357 mutex_unlock(&virtio_fs_mutex); 1358 return err; 1359 } 1360 1361 static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1362 { 1363 struct fuse_conn *fc = fm->fc; 1364 struct virtio_fs *vfs = fc->iq.priv; 1365 struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1366 1367 /* Stop dax worker. Soon evict_inodes() will be called which 1368 * will free all memory ranges belonging to all inodes. 1369 */ 1370 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1371 fuse_dax_cancel_work(fc); 1372 1373 /* Stop forget queue. Soon destroy will be sent */ 1374 spin_lock(&fsvq->lock); 1375 fsvq->connected = false; 1376 spin_unlock(&fsvq->lock); 1377 virtio_fs_drain_all_queues(vfs); 1378 1379 fuse_conn_destroy(fm); 1380 1381 /* fuse_conn_destroy() must have sent destroy. Stop all queues 1382 * and drain one more time and free fuse devices. Freeing fuse 1383 * devices will drop their reference on fuse_conn and that in 1384 * turn will drop its reference on virtio_fs object. 1385 */ 1386 virtio_fs_stop_all_queues(vfs); 1387 virtio_fs_drain_all_queues(vfs); 1388 virtio_fs_free_devs(vfs); 1389 } 1390 1391 static void virtio_kill_sb(struct super_block *sb) 1392 { 1393 struct fuse_mount *fm = get_fuse_mount_super(sb); 1394 bool last; 1395 1396 /* If mount failed, we can still be called without any fc */ 1397 if (fm) { 1398 last = fuse_mount_remove(fm); 1399 if (last) 1400 virtio_fs_conn_destroy(fm); 1401 } 1402 kill_anon_super(sb); 1403 } 1404 1405 static int virtio_fs_test_super(struct super_block *sb, 1406 struct fs_context *fsc) 1407 { 1408 struct fuse_mount *fsc_fm = fsc->s_fs_info; 1409 struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1410 1411 return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; 1412 } 1413 1414 static int virtio_fs_get_tree(struct fs_context *fsc) 1415 { 1416 struct virtio_fs *fs; 1417 struct super_block *sb; 1418 struct fuse_conn *fc = NULL; 1419 struct fuse_mount *fm; 1420 unsigned int virtqueue_size; 1421 int err = -EIO; 1422 1423 /* This gets a reference on virtio_fs object. This ptr gets installed 1424 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1425 * to drop the reference to this object. 1426 */ 1427 fs = virtio_fs_find_instance(fsc->source); 1428 if (!fs) { 1429 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1430 return -EINVAL; 1431 } 1432 1433 virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); 1434 if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) 1435 goto out_err; 1436 1437 err = -ENOMEM; 1438 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1439 if (!fc) 1440 goto out_err; 1441 1442 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1443 if (!fm) 1444 goto out_err; 1445 1446 fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); 1447 fc->release = fuse_free_conn; 1448 fc->delete_stale = true; 1449 fc->auto_submounts = true; 1450 fc->sync_fs = true; 1451 1452 /* Tell FUSE to split requests that exceed the virtqueue's size */ 1453 fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, 1454 virtqueue_size - FUSE_HEADER_OVERHEAD); 1455 1456 fsc->s_fs_info = fm; 1457 sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); 1458 if (fsc->s_fs_info) { 1459 fuse_conn_put(fc); 1460 kfree(fm); 1461 } 1462 if (IS_ERR(sb)) 1463 return PTR_ERR(sb); 1464 1465 if (!sb->s_root) { 1466 err = virtio_fs_fill_super(sb, fsc); 1467 if (err) { 1468 fuse_conn_put(fc); 1469 kfree(fm); 1470 sb->s_fs_info = NULL; 1471 deactivate_locked_super(sb); 1472 return err; 1473 } 1474 1475 sb->s_flags |= SB_ACTIVE; 1476 } 1477 1478 WARN_ON(fsc->root); 1479 fsc->root = dget(sb->s_root); 1480 return 0; 1481 1482 out_err: 1483 kfree(fc); 1484 mutex_lock(&virtio_fs_mutex); 1485 virtio_fs_put(fs); 1486 mutex_unlock(&virtio_fs_mutex); 1487 return err; 1488 } 1489 1490 static const struct fs_context_operations virtio_fs_context_ops = { 1491 .free = virtio_fs_free_fc, 1492 .parse_param = virtio_fs_parse_param, 1493 .get_tree = virtio_fs_get_tree, 1494 }; 1495 1496 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1497 { 1498 struct fuse_fs_context *ctx; 1499 1500 if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT) 1501 return fuse_init_fs_context_submount(fsc); 1502 1503 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1504 if (!ctx) 1505 return -ENOMEM; 1506 fsc->fs_private = ctx; 1507 fsc->ops = &virtio_fs_context_ops; 1508 return 0; 1509 } 1510 1511 static struct file_system_type virtio_fs_type = { 1512 .owner = THIS_MODULE, 1513 .name = "virtiofs", 1514 .init_fs_context = virtio_fs_init_fs_context, 1515 .kill_sb = virtio_kill_sb, 1516 }; 1517 1518 static int __init virtio_fs_init(void) 1519 { 1520 int ret; 1521 1522 ret = register_virtio_driver(&virtio_fs_driver); 1523 if (ret < 0) 1524 return ret; 1525 1526 ret = register_filesystem(&virtio_fs_type); 1527 if (ret < 0) { 1528 unregister_virtio_driver(&virtio_fs_driver); 1529 return ret; 1530 } 1531 1532 return 0; 1533 } 1534 module_init(virtio_fs_init); 1535 1536 static void __exit virtio_fs_exit(void) 1537 { 1538 unregister_filesystem(&virtio_fs_type); 1539 unregister_virtio_driver(&virtio_fs_driver); 1540 } 1541 module_exit(virtio_fs_exit); 1542 1543 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1544 MODULE_DESCRIPTION("Virtio Filesystem"); 1545 MODULE_LICENSE("GPL"); 1546 MODULE_ALIAS_FS(KBUILD_MODNAME); 1547 MODULE_DEVICE_TABLE(virtio, id_table); 1548