1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/dax.h> 9 #include <linux/pci.h> 10 #include <linux/pfn_t.h> 11 #include <linux/module.h> 12 #include <linux/virtio.h> 13 #include <linux/virtio_fs.h> 14 #include <linux/delay.h> 15 #include <linux/fs_context.h> 16 #include <linux/fs_parser.h> 17 #include <linux/highmem.h> 18 #include <linux/uio.h> 19 #include "fuse_i.h" 20 21 /* Used to help calculate the FUSE connection's max_pages limit for a request's 22 * size. Parts of the struct fuse_req are sliced into scattergather lists in 23 * addition to the pages used, so this can help account for that overhead. 24 */ 25 #define FUSE_HEADER_OVERHEAD 4 26 27 /* List of virtio-fs device instances and a lock for the list. Also provides 28 * mutual exclusion in device removal and mounting path 29 */ 30 static DEFINE_MUTEX(virtio_fs_mutex); 31 static LIST_HEAD(virtio_fs_instances); 32 33 enum { 34 VQ_HIPRIO, 35 VQ_REQUEST 36 }; 37 38 #define VQ_NAME_LEN 24 39 40 /* Per-virtqueue state */ 41 struct virtio_fs_vq { 42 spinlock_t lock; 43 struct virtqueue *vq; /* protected by ->lock */ 44 struct work_struct done_work; 45 struct list_head queued_reqs; 46 struct list_head end_reqs; /* End these requests */ 47 struct delayed_work dispatch_work; 48 struct fuse_dev *fud; 49 bool connected; 50 long in_flight; 51 struct completion in_flight_zero; /* No inflight requests */ 52 char name[VQ_NAME_LEN]; 53 } ____cacheline_aligned_in_smp; 54 55 /* A virtio-fs device instance */ 56 struct virtio_fs { 57 struct kref refcount; 58 struct list_head list; /* on virtio_fs_instances */ 59 char *tag; 60 struct virtio_fs_vq *vqs; 61 unsigned int nvqs; /* number of virtqueues */ 62 unsigned int num_request_queues; /* number of request queues */ 63 struct dax_device *dax_dev; 64 65 /* DAX memory window where file contents are mapped */ 66 void *window_kaddr; 67 phys_addr_t window_phys_addr; 68 size_t window_len; 69 }; 70 71 struct virtio_fs_forget_req { 72 struct fuse_in_header ih; 73 struct fuse_forget_in arg; 74 }; 75 76 struct virtio_fs_forget { 77 /* This request can be temporarily queued on virt queue */ 78 struct list_head list; 79 struct virtio_fs_forget_req req; 80 }; 81 82 struct virtio_fs_req_work { 83 struct fuse_req *req; 84 struct virtio_fs_vq *fsvq; 85 struct work_struct done_work; 86 }; 87 88 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 89 struct fuse_req *req, bool in_flight); 90 91 static const struct constant_table dax_param_enums[] = { 92 {"always", FUSE_DAX_ALWAYS }, 93 {"never", FUSE_DAX_NEVER }, 94 {"inode", FUSE_DAX_INODE_USER }, 95 {} 96 }; 97 98 enum { 99 OPT_DAX, 100 OPT_DAX_ENUM, 101 }; 102 103 static const struct fs_parameter_spec virtio_fs_parameters[] = { 104 fsparam_flag("dax", OPT_DAX), 105 fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums), 106 {} 107 }; 108 109 static int virtio_fs_parse_param(struct fs_context *fsc, 110 struct fs_parameter *param) 111 { 112 struct fs_parse_result result; 113 struct fuse_fs_context *ctx = fsc->fs_private; 114 int opt; 115 116 opt = fs_parse(fsc, virtio_fs_parameters, param, &result); 117 if (opt < 0) 118 return opt; 119 120 switch (opt) { 121 case OPT_DAX: 122 ctx->dax_mode = FUSE_DAX_ALWAYS; 123 break; 124 case OPT_DAX_ENUM: 125 ctx->dax_mode = result.uint_32; 126 break; 127 default: 128 return -EINVAL; 129 } 130 131 return 0; 132 } 133 134 static void virtio_fs_free_fsc(struct fs_context *fsc) 135 { 136 struct fuse_fs_context *ctx = fsc->fs_private; 137 138 kfree(ctx); 139 } 140 141 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 142 { 143 struct virtio_fs *fs = vq->vdev->priv; 144 145 return &fs->vqs[vq->index]; 146 } 147 148 /* Should be called with fsvq->lock held. */ 149 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 150 { 151 fsvq->in_flight++; 152 } 153 154 /* Should be called with fsvq->lock held. */ 155 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 156 { 157 WARN_ON(fsvq->in_flight <= 0); 158 fsvq->in_flight--; 159 if (!fsvq->in_flight) 160 complete(&fsvq->in_flight_zero); 161 } 162 163 static void release_virtio_fs_obj(struct kref *ref) 164 { 165 struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); 166 167 kfree(vfs->vqs); 168 kfree(vfs); 169 } 170 171 /* Make sure virtiofs_mutex is held */ 172 static void virtio_fs_put(struct virtio_fs *fs) 173 { 174 kref_put(&fs->refcount, release_virtio_fs_obj); 175 } 176 177 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 178 { 179 struct virtio_fs *vfs = fiq->priv; 180 181 mutex_lock(&virtio_fs_mutex); 182 virtio_fs_put(vfs); 183 mutex_unlock(&virtio_fs_mutex); 184 } 185 186 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 187 { 188 WARN_ON(fsvq->in_flight < 0); 189 190 /* Wait for in flight requests to finish.*/ 191 spin_lock(&fsvq->lock); 192 if (fsvq->in_flight) { 193 /* We are holding virtio_fs_mutex. There should not be any 194 * waiters waiting for completion. 195 */ 196 reinit_completion(&fsvq->in_flight_zero); 197 spin_unlock(&fsvq->lock); 198 wait_for_completion(&fsvq->in_flight_zero); 199 } else { 200 spin_unlock(&fsvq->lock); 201 } 202 203 flush_work(&fsvq->done_work); 204 flush_delayed_work(&fsvq->dispatch_work); 205 } 206 207 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 208 { 209 struct virtio_fs_vq *fsvq; 210 int i; 211 212 for (i = 0; i < fs->nvqs; i++) { 213 fsvq = &fs->vqs[i]; 214 virtio_fs_drain_queue(fsvq); 215 } 216 } 217 218 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 219 { 220 /* Provides mutual exclusion between ->remove and ->kill_sb 221 * paths. We don't want both of these draining queue at the 222 * same time. Current completion logic reinits completion 223 * and that means there should not be any other thread 224 * doing reinit or waiting for completion already. 225 */ 226 mutex_lock(&virtio_fs_mutex); 227 virtio_fs_drain_all_queues_locked(fs); 228 mutex_unlock(&virtio_fs_mutex); 229 } 230 231 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 232 { 233 struct virtio_fs_vq *fsvq; 234 int i; 235 236 for (i = 0; i < fs->nvqs; i++) { 237 fsvq = &fs->vqs[i]; 238 spin_lock(&fsvq->lock); 239 fsvq->connected = true; 240 spin_unlock(&fsvq->lock); 241 } 242 } 243 244 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 245 static int virtio_fs_add_instance(struct virtio_fs *fs) 246 { 247 struct virtio_fs *fs2; 248 bool duplicate = false; 249 250 mutex_lock(&virtio_fs_mutex); 251 252 list_for_each_entry(fs2, &virtio_fs_instances, list) { 253 if (strcmp(fs->tag, fs2->tag) == 0) 254 duplicate = true; 255 } 256 257 if (!duplicate) 258 list_add_tail(&fs->list, &virtio_fs_instances); 259 260 mutex_unlock(&virtio_fs_mutex); 261 262 if (duplicate) 263 return -EEXIST; 264 return 0; 265 } 266 267 /* Return the virtio_fs with a given tag, or NULL */ 268 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 269 { 270 struct virtio_fs *fs; 271 272 mutex_lock(&virtio_fs_mutex); 273 274 list_for_each_entry(fs, &virtio_fs_instances, list) { 275 if (strcmp(fs->tag, tag) == 0) { 276 kref_get(&fs->refcount); 277 goto found; 278 } 279 } 280 281 fs = NULL; /* not found */ 282 283 found: 284 mutex_unlock(&virtio_fs_mutex); 285 286 return fs; 287 } 288 289 static void virtio_fs_free_devs(struct virtio_fs *fs) 290 { 291 unsigned int i; 292 293 for (i = 0; i < fs->nvqs; i++) { 294 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 295 296 if (!fsvq->fud) 297 continue; 298 299 fuse_dev_free(fsvq->fud); 300 fsvq->fud = NULL; 301 } 302 } 303 304 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 305 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 306 { 307 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 308 char *end; 309 size_t len; 310 311 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 312 &tag_buf, sizeof(tag_buf)); 313 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 314 if (end == tag_buf) 315 return -EINVAL; /* empty tag */ 316 if (!end) 317 end = &tag_buf[sizeof(tag_buf)]; 318 319 len = end - tag_buf; 320 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 321 if (!fs->tag) 322 return -ENOMEM; 323 memcpy(fs->tag, tag_buf, len); 324 fs->tag[len] = '\0'; 325 return 0; 326 } 327 328 /* Work function for hiprio completion */ 329 static void virtio_fs_hiprio_done_work(struct work_struct *work) 330 { 331 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 332 done_work); 333 struct virtqueue *vq = fsvq->vq; 334 335 /* Free completed FUSE_FORGET requests */ 336 spin_lock(&fsvq->lock); 337 do { 338 unsigned int len; 339 void *req; 340 341 virtqueue_disable_cb(vq); 342 343 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 344 kfree(req); 345 dec_in_flight_req(fsvq); 346 } 347 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 348 spin_unlock(&fsvq->lock); 349 } 350 351 static void virtio_fs_request_dispatch_work(struct work_struct *work) 352 { 353 struct fuse_req *req; 354 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 355 dispatch_work.work); 356 int ret; 357 358 pr_debug("virtio-fs: worker %s called.\n", __func__); 359 while (1) { 360 spin_lock(&fsvq->lock); 361 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 362 list); 363 if (!req) { 364 spin_unlock(&fsvq->lock); 365 break; 366 } 367 368 list_del_init(&req->list); 369 spin_unlock(&fsvq->lock); 370 fuse_request_end(req); 371 } 372 373 /* Dispatch pending requests */ 374 while (1) { 375 spin_lock(&fsvq->lock); 376 req = list_first_entry_or_null(&fsvq->queued_reqs, 377 struct fuse_req, list); 378 if (!req) { 379 spin_unlock(&fsvq->lock); 380 return; 381 } 382 list_del_init(&req->list); 383 spin_unlock(&fsvq->lock); 384 385 ret = virtio_fs_enqueue_req(fsvq, req, true); 386 if (ret < 0) { 387 if (ret == -ENOMEM || ret == -ENOSPC) { 388 spin_lock(&fsvq->lock); 389 list_add_tail(&req->list, &fsvq->queued_reqs); 390 schedule_delayed_work(&fsvq->dispatch_work, 391 msecs_to_jiffies(1)); 392 spin_unlock(&fsvq->lock); 393 return; 394 } 395 req->out.h.error = ret; 396 spin_lock(&fsvq->lock); 397 dec_in_flight_req(fsvq); 398 spin_unlock(&fsvq->lock); 399 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 400 ret); 401 fuse_request_end(req); 402 } 403 } 404 } 405 406 /* 407 * Returns 1 if queue is full and sender should wait a bit before sending 408 * next request, 0 otherwise. 409 */ 410 static int send_forget_request(struct virtio_fs_vq *fsvq, 411 struct virtio_fs_forget *forget, 412 bool in_flight) 413 { 414 struct scatterlist sg; 415 struct virtqueue *vq; 416 int ret = 0; 417 bool notify; 418 struct virtio_fs_forget_req *req = &forget->req; 419 420 spin_lock(&fsvq->lock); 421 if (!fsvq->connected) { 422 if (in_flight) 423 dec_in_flight_req(fsvq); 424 kfree(forget); 425 goto out; 426 } 427 428 sg_init_one(&sg, req, sizeof(*req)); 429 vq = fsvq->vq; 430 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 431 432 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 433 if (ret < 0) { 434 if (ret == -ENOMEM || ret == -ENOSPC) { 435 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 436 ret); 437 list_add_tail(&forget->list, &fsvq->queued_reqs); 438 schedule_delayed_work(&fsvq->dispatch_work, 439 msecs_to_jiffies(1)); 440 if (!in_flight) 441 inc_in_flight_req(fsvq); 442 /* Queue is full */ 443 ret = 1; 444 } else { 445 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 446 ret); 447 kfree(forget); 448 if (in_flight) 449 dec_in_flight_req(fsvq); 450 } 451 goto out; 452 } 453 454 if (!in_flight) 455 inc_in_flight_req(fsvq); 456 notify = virtqueue_kick_prepare(vq); 457 spin_unlock(&fsvq->lock); 458 459 if (notify) 460 virtqueue_notify(vq); 461 return ret; 462 out: 463 spin_unlock(&fsvq->lock); 464 return ret; 465 } 466 467 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 468 { 469 struct virtio_fs_forget *forget; 470 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 471 dispatch_work.work); 472 pr_debug("virtio-fs: worker %s called.\n", __func__); 473 while (1) { 474 spin_lock(&fsvq->lock); 475 forget = list_first_entry_or_null(&fsvq->queued_reqs, 476 struct virtio_fs_forget, list); 477 if (!forget) { 478 spin_unlock(&fsvq->lock); 479 return; 480 } 481 482 list_del(&forget->list); 483 spin_unlock(&fsvq->lock); 484 if (send_forget_request(fsvq, forget, true)) 485 return; 486 } 487 } 488 489 /* Allocate and copy args into req->argbuf */ 490 static int copy_args_to_argbuf(struct fuse_req *req) 491 { 492 struct fuse_args *args = req->args; 493 unsigned int offset = 0; 494 unsigned int num_in; 495 unsigned int num_out; 496 unsigned int len; 497 unsigned int i; 498 499 num_in = args->in_numargs - args->in_pages; 500 num_out = args->out_numargs - args->out_pages; 501 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 502 fuse_len_args(num_out, args->out_args); 503 504 req->argbuf = kmalloc(len, GFP_ATOMIC); 505 if (!req->argbuf) 506 return -ENOMEM; 507 508 for (i = 0; i < num_in; i++) { 509 memcpy(req->argbuf + offset, 510 args->in_args[i].value, 511 args->in_args[i].size); 512 offset += args->in_args[i].size; 513 } 514 515 return 0; 516 } 517 518 /* Copy args out of and free req->argbuf */ 519 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 520 { 521 unsigned int remaining; 522 unsigned int offset; 523 unsigned int num_in; 524 unsigned int num_out; 525 unsigned int i; 526 527 remaining = req->out.h.len - sizeof(req->out.h); 528 num_in = args->in_numargs - args->in_pages; 529 num_out = args->out_numargs - args->out_pages; 530 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 531 532 for (i = 0; i < num_out; i++) { 533 unsigned int argsize = args->out_args[i].size; 534 535 if (args->out_argvar && 536 i == args->out_numargs - 1 && 537 argsize > remaining) { 538 argsize = remaining; 539 } 540 541 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 542 offset += argsize; 543 544 if (i != args->out_numargs - 1) 545 remaining -= argsize; 546 } 547 548 /* Store the actual size of the variable-length arg */ 549 if (args->out_argvar) 550 args->out_args[args->out_numargs - 1].size = remaining; 551 552 kfree(req->argbuf); 553 req->argbuf = NULL; 554 } 555 556 /* Work function for request completion */ 557 static void virtio_fs_request_complete(struct fuse_req *req, 558 struct virtio_fs_vq *fsvq) 559 { 560 struct fuse_pqueue *fpq = &fsvq->fud->pq; 561 struct fuse_args *args; 562 struct fuse_args_pages *ap; 563 unsigned int len, i, thislen; 564 struct page *page; 565 566 /* 567 * TODO verify that server properly follows FUSE protocol 568 * (oh.uniq, oh.len) 569 */ 570 args = req->args; 571 copy_args_from_argbuf(args, req); 572 573 if (args->out_pages && args->page_zeroing) { 574 len = args->out_args[args->out_numargs - 1].size; 575 ap = container_of(args, typeof(*ap), args); 576 for (i = 0; i < ap->num_pages; i++) { 577 thislen = ap->descs[i].length; 578 if (len < thislen) { 579 WARN_ON(ap->descs[i].offset); 580 page = ap->pages[i]; 581 zero_user_segment(page, len, thislen); 582 len = 0; 583 } else { 584 len -= thislen; 585 } 586 } 587 } 588 589 spin_lock(&fpq->lock); 590 clear_bit(FR_SENT, &req->flags); 591 spin_unlock(&fpq->lock); 592 593 fuse_request_end(req); 594 spin_lock(&fsvq->lock); 595 dec_in_flight_req(fsvq); 596 spin_unlock(&fsvq->lock); 597 } 598 599 static void virtio_fs_complete_req_work(struct work_struct *work) 600 { 601 struct virtio_fs_req_work *w = 602 container_of(work, typeof(*w), done_work); 603 604 virtio_fs_request_complete(w->req, w->fsvq); 605 kfree(w); 606 } 607 608 static void virtio_fs_requests_done_work(struct work_struct *work) 609 { 610 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 611 done_work); 612 struct fuse_pqueue *fpq = &fsvq->fud->pq; 613 struct virtqueue *vq = fsvq->vq; 614 struct fuse_req *req; 615 struct fuse_req *next; 616 unsigned int len; 617 LIST_HEAD(reqs); 618 619 /* Collect completed requests off the virtqueue */ 620 spin_lock(&fsvq->lock); 621 do { 622 virtqueue_disable_cb(vq); 623 624 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 625 spin_lock(&fpq->lock); 626 list_move_tail(&req->list, &reqs); 627 spin_unlock(&fpq->lock); 628 } 629 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 630 spin_unlock(&fsvq->lock); 631 632 /* End requests */ 633 list_for_each_entry_safe(req, next, &reqs, list) { 634 list_del_init(&req->list); 635 636 /* blocking async request completes in a worker context */ 637 if (req->args->may_block) { 638 struct virtio_fs_req_work *w; 639 640 w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); 641 INIT_WORK(&w->done_work, virtio_fs_complete_req_work); 642 w->fsvq = fsvq; 643 w->req = req; 644 schedule_work(&w->done_work); 645 } else { 646 virtio_fs_request_complete(req, fsvq); 647 } 648 } 649 } 650 651 /* Virtqueue interrupt handler */ 652 static void virtio_fs_vq_done(struct virtqueue *vq) 653 { 654 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 655 656 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 657 658 schedule_work(&fsvq->done_work); 659 } 660 661 static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 662 int vq_type) 663 { 664 strscpy(fsvq->name, name, VQ_NAME_LEN); 665 spin_lock_init(&fsvq->lock); 666 INIT_LIST_HEAD(&fsvq->queued_reqs); 667 INIT_LIST_HEAD(&fsvq->end_reqs); 668 init_completion(&fsvq->in_flight_zero); 669 670 if (vq_type == VQ_REQUEST) { 671 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 672 INIT_DELAYED_WORK(&fsvq->dispatch_work, 673 virtio_fs_request_dispatch_work); 674 } else { 675 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 676 INIT_DELAYED_WORK(&fsvq->dispatch_work, 677 virtio_fs_hiprio_dispatch_work); 678 } 679 } 680 681 /* Initialize virtqueues */ 682 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 683 struct virtio_fs *fs) 684 { 685 struct virtqueue **vqs; 686 vq_callback_t **callbacks; 687 const char **names; 688 unsigned int i; 689 int ret = 0; 690 691 virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, 692 &fs->num_request_queues); 693 if (fs->num_request_queues == 0) 694 return -EINVAL; 695 696 fs->nvqs = VQ_REQUEST + fs->num_request_queues; 697 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 698 if (!fs->vqs) 699 return -ENOMEM; 700 701 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 702 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 703 GFP_KERNEL); 704 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 705 if (!vqs || !callbacks || !names) { 706 ret = -ENOMEM; 707 goto out; 708 } 709 710 /* Initialize the hiprio/forget request virtqueue */ 711 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 712 virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 713 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 714 715 /* Initialize the requests virtqueues */ 716 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 717 char vq_name[VQ_NAME_LEN]; 718 719 snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 720 virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 721 callbacks[i] = virtio_fs_vq_done; 722 names[i] = fs->vqs[i].name; 723 } 724 725 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 726 if (ret < 0) 727 goto out; 728 729 for (i = 0; i < fs->nvqs; i++) 730 fs->vqs[i].vq = vqs[i]; 731 732 virtio_fs_start_all_queues(fs); 733 out: 734 kfree(names); 735 kfree(callbacks); 736 kfree(vqs); 737 if (ret) 738 kfree(fs->vqs); 739 return ret; 740 } 741 742 /* Free virtqueues (device must already be reset) */ 743 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, 744 struct virtio_fs *fs) 745 { 746 vdev->config->del_vqs(vdev); 747 } 748 749 /* Map a window offset to a page frame number. The window offset will have 750 * been produced by .iomap_begin(), which maps a file offset to a window 751 * offset. 752 */ 753 static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 754 long nr_pages, void **kaddr, pfn_t *pfn) 755 { 756 struct virtio_fs *fs = dax_get_private(dax_dev); 757 phys_addr_t offset = PFN_PHYS(pgoff); 758 size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff; 759 760 if (kaddr) 761 *kaddr = fs->window_kaddr + offset; 762 if (pfn) 763 *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 764 PFN_DEV | PFN_MAP); 765 return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 766 } 767 768 static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 769 pgoff_t pgoff, size_t nr_pages) 770 { 771 long rc; 772 void *kaddr; 773 774 rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); 775 if (rc < 0) 776 return rc; 777 memset(kaddr, 0, nr_pages << PAGE_SHIFT); 778 dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 779 return 0; 780 } 781 782 static const struct dax_operations virtio_fs_dax_ops = { 783 .direct_access = virtio_fs_direct_access, 784 .zero_page_range = virtio_fs_zero_page_range, 785 }; 786 787 static void virtio_fs_cleanup_dax(void *data) 788 { 789 struct dax_device *dax_dev = data; 790 791 kill_dax(dax_dev); 792 put_dax(dax_dev); 793 } 794 795 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 796 { 797 struct virtio_shm_region cache_reg; 798 struct dev_pagemap *pgmap; 799 bool have_cache; 800 801 if (!IS_ENABLED(CONFIG_FUSE_DAX)) 802 return 0; 803 804 /* Get cache region */ 805 have_cache = virtio_get_shm_region(vdev, &cache_reg, 806 (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 807 if (!have_cache) { 808 dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 809 return 0; 810 } 811 812 if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 813 dev_name(&vdev->dev))) { 814 dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 815 cache_reg.addr, cache_reg.len); 816 return -EBUSY; 817 } 818 819 dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 820 cache_reg.addr); 821 822 pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 823 if (!pgmap) 824 return -ENOMEM; 825 826 pgmap->type = MEMORY_DEVICE_FS_DAX; 827 828 /* Ideally we would directly use the PCI BAR resource but 829 * devm_memremap_pages() wants its own copy in pgmap. So 830 * initialize a struct resource from scratch (only the start 831 * and end fields will be used). 832 */ 833 pgmap->range = (struct range) { 834 .start = (phys_addr_t) cache_reg.addr, 835 .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 836 }; 837 pgmap->nr_range = 1; 838 839 fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 840 if (IS_ERR(fs->window_kaddr)) 841 return PTR_ERR(fs->window_kaddr); 842 843 fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 844 fs->window_len = (phys_addr_t) cache_reg.len; 845 846 dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 847 __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 848 849 fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops); 850 if (IS_ERR(fs->dax_dev)) 851 return PTR_ERR(fs->dax_dev); 852 853 return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 854 fs->dax_dev); 855 } 856 857 static int virtio_fs_probe(struct virtio_device *vdev) 858 { 859 struct virtio_fs *fs; 860 int ret; 861 862 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 863 if (!fs) 864 return -ENOMEM; 865 kref_init(&fs->refcount); 866 vdev->priv = fs; 867 868 ret = virtio_fs_read_tag(vdev, fs); 869 if (ret < 0) 870 goto out; 871 872 ret = virtio_fs_setup_vqs(vdev, fs); 873 if (ret < 0) 874 goto out; 875 876 /* TODO vq affinity */ 877 878 ret = virtio_fs_setup_dax(vdev, fs); 879 if (ret < 0) 880 goto out_vqs; 881 882 /* Bring the device online in case the filesystem is mounted and 883 * requests need to be sent before we return. 884 */ 885 virtio_device_ready(vdev); 886 887 ret = virtio_fs_add_instance(fs); 888 if (ret < 0) 889 goto out_vqs; 890 891 return 0; 892 893 out_vqs: 894 virtio_reset_device(vdev); 895 virtio_fs_cleanup_vqs(vdev, fs); 896 kfree(fs->vqs); 897 898 out: 899 vdev->priv = NULL; 900 kfree(fs); 901 return ret; 902 } 903 904 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 905 { 906 struct virtio_fs_vq *fsvq; 907 int i; 908 909 for (i = 0; i < fs->nvqs; i++) { 910 fsvq = &fs->vqs[i]; 911 spin_lock(&fsvq->lock); 912 fsvq->connected = false; 913 spin_unlock(&fsvq->lock); 914 } 915 } 916 917 static void virtio_fs_remove(struct virtio_device *vdev) 918 { 919 struct virtio_fs *fs = vdev->priv; 920 921 mutex_lock(&virtio_fs_mutex); 922 /* This device is going away. No one should get new reference */ 923 list_del_init(&fs->list); 924 virtio_fs_stop_all_queues(fs); 925 virtio_fs_drain_all_queues_locked(fs); 926 virtio_reset_device(vdev); 927 virtio_fs_cleanup_vqs(vdev, fs); 928 929 vdev->priv = NULL; 930 /* Put device reference on virtio_fs object */ 931 virtio_fs_put(fs); 932 mutex_unlock(&virtio_fs_mutex); 933 } 934 935 #ifdef CONFIG_PM_SLEEP 936 static int virtio_fs_freeze(struct virtio_device *vdev) 937 { 938 /* TODO need to save state here */ 939 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 940 return -EOPNOTSUPP; 941 } 942 943 static int virtio_fs_restore(struct virtio_device *vdev) 944 { 945 /* TODO need to restore state here */ 946 return 0; 947 } 948 #endif /* CONFIG_PM_SLEEP */ 949 950 static const struct virtio_device_id id_table[] = { 951 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 952 {}, 953 }; 954 955 static const unsigned int feature_table[] = {}; 956 957 static struct virtio_driver virtio_fs_driver = { 958 .driver.name = KBUILD_MODNAME, 959 .driver.owner = THIS_MODULE, 960 .id_table = id_table, 961 .feature_table = feature_table, 962 .feature_table_size = ARRAY_SIZE(feature_table), 963 .probe = virtio_fs_probe, 964 .remove = virtio_fs_remove, 965 #ifdef CONFIG_PM_SLEEP 966 .freeze = virtio_fs_freeze, 967 .restore = virtio_fs_restore, 968 #endif 969 }; 970 971 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 972 __releases(fiq->lock) 973 { 974 struct fuse_forget_link *link; 975 struct virtio_fs_forget *forget; 976 struct virtio_fs_forget_req *req; 977 struct virtio_fs *fs; 978 struct virtio_fs_vq *fsvq; 979 u64 unique; 980 981 link = fuse_dequeue_forget(fiq, 1, NULL); 982 unique = fuse_get_unique(fiq); 983 984 fs = fiq->priv; 985 fsvq = &fs->vqs[VQ_HIPRIO]; 986 spin_unlock(&fiq->lock); 987 988 /* Allocate a buffer for the request */ 989 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 990 req = &forget->req; 991 992 req->ih = (struct fuse_in_header){ 993 .opcode = FUSE_FORGET, 994 .nodeid = link->forget_one.nodeid, 995 .unique = unique, 996 .len = sizeof(*req), 997 }; 998 req->arg = (struct fuse_forget_in){ 999 .nlookup = link->forget_one.nlookup, 1000 }; 1001 1002 send_forget_request(fsvq, forget, false); 1003 kfree(link); 1004 } 1005 1006 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 1007 __releases(fiq->lock) 1008 { 1009 /* 1010 * TODO interrupts. 1011 * 1012 * Normal fs operations on a local filesystems aren't interruptible. 1013 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 1014 * with shared lock between host and guest. 1015 */ 1016 spin_unlock(&fiq->lock); 1017 } 1018 1019 /* Count number of scatter-gather elements required */ 1020 static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, 1021 unsigned int num_pages, 1022 unsigned int total_len) 1023 { 1024 unsigned int i; 1025 unsigned int this_len; 1026 1027 for (i = 0; i < num_pages && total_len; i++) { 1028 this_len = min(page_descs[i].length, total_len); 1029 total_len -= this_len; 1030 } 1031 1032 return i; 1033 } 1034 1035 /* Return the number of scatter-gather list elements required */ 1036 static unsigned int sg_count_fuse_req(struct fuse_req *req) 1037 { 1038 struct fuse_args *args = req->args; 1039 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1040 unsigned int size, total_sgs = 1 /* fuse_in_header */; 1041 1042 if (args->in_numargs - args->in_pages) 1043 total_sgs += 1; 1044 1045 if (args->in_pages) { 1046 size = args->in_args[args->in_numargs - 1].size; 1047 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1048 size); 1049 } 1050 1051 if (!test_bit(FR_ISREPLY, &req->flags)) 1052 return total_sgs; 1053 1054 total_sgs += 1 /* fuse_out_header */; 1055 1056 if (args->out_numargs - args->out_pages) 1057 total_sgs += 1; 1058 1059 if (args->out_pages) { 1060 size = args->out_args[args->out_numargs - 1].size; 1061 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1062 size); 1063 } 1064 1065 return total_sgs; 1066 } 1067 1068 /* Add pages to scatter-gather list and return number of elements used */ 1069 static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 1070 struct page **pages, 1071 struct fuse_page_desc *page_descs, 1072 unsigned int num_pages, 1073 unsigned int total_len) 1074 { 1075 unsigned int i; 1076 unsigned int this_len; 1077 1078 for (i = 0; i < num_pages && total_len; i++) { 1079 sg_init_table(&sg[i], 1); 1080 this_len = min(page_descs[i].length, total_len); 1081 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 1082 total_len -= this_len; 1083 } 1084 1085 return i; 1086 } 1087 1088 /* Add args to scatter-gather list and return number of elements used */ 1089 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 1090 struct fuse_req *req, 1091 struct fuse_arg *args, 1092 unsigned int numargs, 1093 bool argpages, 1094 void *argbuf, 1095 unsigned int *len_used) 1096 { 1097 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 1098 unsigned int total_sgs = 0; 1099 unsigned int len; 1100 1101 len = fuse_len_args(numargs - argpages, args); 1102 if (len) 1103 sg_init_one(&sg[total_sgs++], argbuf, len); 1104 1105 if (argpages) 1106 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 1107 ap->pages, ap->descs, 1108 ap->num_pages, 1109 args[numargs - 1].size); 1110 1111 if (len_used) 1112 *len_used = len; 1113 1114 return total_sgs; 1115 } 1116 1117 /* Add a request to a virtqueue and kick the device */ 1118 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 1119 struct fuse_req *req, bool in_flight) 1120 { 1121 /* requests need at least 4 elements */ 1122 struct scatterlist *stack_sgs[6]; 1123 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 1124 struct scatterlist **sgs = stack_sgs; 1125 struct scatterlist *sg = stack_sg; 1126 struct virtqueue *vq; 1127 struct fuse_args *args = req->args; 1128 unsigned int argbuf_used = 0; 1129 unsigned int out_sgs = 0; 1130 unsigned int in_sgs = 0; 1131 unsigned int total_sgs; 1132 unsigned int i; 1133 int ret; 1134 bool notify; 1135 struct fuse_pqueue *fpq; 1136 1137 /* Does the sglist fit on the stack? */ 1138 total_sgs = sg_count_fuse_req(req); 1139 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 1140 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 1141 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 1142 if (!sgs || !sg) { 1143 ret = -ENOMEM; 1144 goto out; 1145 } 1146 } 1147 1148 /* Use a bounce buffer since stack args cannot be mapped */ 1149 ret = copy_args_to_argbuf(req); 1150 if (ret < 0) 1151 goto out; 1152 1153 /* Request elements */ 1154 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 1155 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 1156 (struct fuse_arg *)args->in_args, 1157 args->in_numargs, args->in_pages, 1158 req->argbuf, &argbuf_used); 1159 1160 /* Reply elements */ 1161 if (test_bit(FR_ISREPLY, &req->flags)) { 1162 sg_init_one(&sg[out_sgs + in_sgs++], 1163 &req->out.h, sizeof(req->out.h)); 1164 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 1165 args->out_args, args->out_numargs, 1166 args->out_pages, 1167 req->argbuf + argbuf_used, NULL); 1168 } 1169 1170 WARN_ON(out_sgs + in_sgs != total_sgs); 1171 1172 for (i = 0; i < total_sgs; i++) 1173 sgs[i] = &sg[i]; 1174 1175 spin_lock(&fsvq->lock); 1176 1177 if (!fsvq->connected) { 1178 spin_unlock(&fsvq->lock); 1179 ret = -ENOTCONN; 1180 goto out; 1181 } 1182 1183 vq = fsvq->vq; 1184 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 1185 if (ret < 0) { 1186 spin_unlock(&fsvq->lock); 1187 goto out; 1188 } 1189 1190 /* Request successfully sent. */ 1191 fpq = &fsvq->fud->pq; 1192 spin_lock(&fpq->lock); 1193 list_add_tail(&req->list, fpq->processing); 1194 spin_unlock(&fpq->lock); 1195 set_bit(FR_SENT, &req->flags); 1196 /* matches barrier in request_wait_answer() */ 1197 smp_mb__after_atomic(); 1198 1199 if (!in_flight) 1200 inc_in_flight_req(fsvq); 1201 notify = virtqueue_kick_prepare(vq); 1202 1203 spin_unlock(&fsvq->lock); 1204 1205 if (notify) 1206 virtqueue_notify(vq); 1207 1208 out: 1209 if (ret < 0 && req->argbuf) { 1210 kfree(req->argbuf); 1211 req->argbuf = NULL; 1212 } 1213 if (sgs != stack_sgs) { 1214 kfree(sgs); 1215 kfree(sg); 1216 } 1217 1218 return ret; 1219 } 1220 1221 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 1222 __releases(fiq->lock) 1223 { 1224 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 1225 struct virtio_fs *fs; 1226 struct fuse_req *req; 1227 struct virtio_fs_vq *fsvq; 1228 int ret; 1229 1230 WARN_ON(list_empty(&fiq->pending)); 1231 req = list_last_entry(&fiq->pending, struct fuse_req, list); 1232 clear_bit(FR_PENDING, &req->flags); 1233 list_del_init(&req->list); 1234 WARN_ON(!list_empty(&fiq->pending)); 1235 spin_unlock(&fiq->lock); 1236 1237 fs = fiq->priv; 1238 1239 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 1240 __func__, req->in.h.opcode, req->in.h.unique, 1241 req->in.h.nodeid, req->in.h.len, 1242 fuse_len_args(req->args->out_numargs, req->args->out_args)); 1243 1244 fsvq = &fs->vqs[queue_id]; 1245 ret = virtio_fs_enqueue_req(fsvq, req, false); 1246 if (ret < 0) { 1247 if (ret == -ENOMEM || ret == -ENOSPC) { 1248 /* 1249 * Virtqueue full. Retry submission from worker 1250 * context as we might be holding fc->bg_lock. 1251 */ 1252 spin_lock(&fsvq->lock); 1253 list_add_tail(&req->list, &fsvq->queued_reqs); 1254 inc_in_flight_req(fsvq); 1255 schedule_delayed_work(&fsvq->dispatch_work, 1256 msecs_to_jiffies(1)); 1257 spin_unlock(&fsvq->lock); 1258 return; 1259 } 1260 req->out.h.error = ret; 1261 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1262 1263 /* Can't end request in submission context. Use a worker */ 1264 spin_lock(&fsvq->lock); 1265 list_add_tail(&req->list, &fsvq->end_reqs); 1266 schedule_delayed_work(&fsvq->dispatch_work, 0); 1267 spin_unlock(&fsvq->lock); 1268 return; 1269 } 1270 } 1271 1272 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1273 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1274 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1275 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1276 .release = virtio_fs_fiq_release, 1277 }; 1278 1279 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1280 { 1281 ctx->rootmode = S_IFDIR; 1282 ctx->default_permissions = 1; 1283 ctx->allow_other = 1; 1284 ctx->max_read = UINT_MAX; 1285 ctx->blksize = 512; 1286 ctx->destroy = true; 1287 ctx->no_control = true; 1288 ctx->no_force_umount = true; 1289 } 1290 1291 static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1292 { 1293 struct fuse_mount *fm = get_fuse_mount_super(sb); 1294 struct fuse_conn *fc = fm->fc; 1295 struct virtio_fs *fs = fc->iq.priv; 1296 struct fuse_fs_context *ctx = fsc->fs_private; 1297 unsigned int i; 1298 int err; 1299 1300 virtio_fs_ctx_set_defaults(ctx); 1301 mutex_lock(&virtio_fs_mutex); 1302 1303 /* After holding mutex, make sure virtiofs device is still there. 1304 * Though we are holding a reference to it, drive ->remove might 1305 * still have cleaned up virtual queues. In that case bail out. 1306 */ 1307 err = -EINVAL; 1308 if (list_empty(&fs->list)) { 1309 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1310 goto err; 1311 } 1312 1313 err = -ENOMEM; 1314 /* Allocate fuse_dev for hiprio and notification queues */ 1315 for (i = 0; i < fs->nvqs; i++) { 1316 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1317 1318 fsvq->fud = fuse_dev_alloc(); 1319 if (!fsvq->fud) 1320 goto err_free_fuse_devs; 1321 } 1322 1323 /* virtiofs allocates and installs its own fuse devices */ 1324 ctx->fudptr = NULL; 1325 if (ctx->dax_mode != FUSE_DAX_NEVER) { 1326 if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) { 1327 err = -EINVAL; 1328 pr_err("virtio-fs: dax can't be enabled as filesystem" 1329 " device does not support it.\n"); 1330 goto err_free_fuse_devs; 1331 } 1332 ctx->dax_dev = fs->dax_dev; 1333 } 1334 err = fuse_fill_super_common(sb, ctx); 1335 if (err < 0) 1336 goto err_free_fuse_devs; 1337 1338 for (i = 0; i < fs->nvqs; i++) { 1339 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1340 1341 fuse_dev_install(fsvq->fud, fc); 1342 } 1343 1344 /* Previous unmount will stop all queues. Start these again */ 1345 virtio_fs_start_all_queues(fs); 1346 fuse_send_init(fm); 1347 mutex_unlock(&virtio_fs_mutex); 1348 return 0; 1349 1350 err_free_fuse_devs: 1351 virtio_fs_free_devs(fs); 1352 err: 1353 mutex_unlock(&virtio_fs_mutex); 1354 return err; 1355 } 1356 1357 static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1358 { 1359 struct fuse_conn *fc = fm->fc; 1360 struct virtio_fs *vfs = fc->iq.priv; 1361 struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1362 1363 /* Stop dax worker. Soon evict_inodes() will be called which 1364 * will free all memory ranges belonging to all inodes. 1365 */ 1366 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1367 fuse_dax_cancel_work(fc); 1368 1369 /* Stop forget queue. Soon destroy will be sent */ 1370 spin_lock(&fsvq->lock); 1371 fsvq->connected = false; 1372 spin_unlock(&fsvq->lock); 1373 virtio_fs_drain_all_queues(vfs); 1374 1375 fuse_conn_destroy(fm); 1376 1377 /* fuse_conn_destroy() must have sent destroy. Stop all queues 1378 * and drain one more time and free fuse devices. Freeing fuse 1379 * devices will drop their reference on fuse_conn and that in 1380 * turn will drop its reference on virtio_fs object. 1381 */ 1382 virtio_fs_stop_all_queues(vfs); 1383 virtio_fs_drain_all_queues(vfs); 1384 virtio_fs_free_devs(vfs); 1385 } 1386 1387 static void virtio_kill_sb(struct super_block *sb) 1388 { 1389 struct fuse_mount *fm = get_fuse_mount_super(sb); 1390 bool last; 1391 1392 /* If mount failed, we can still be called without any fc */ 1393 if (sb->s_root) { 1394 last = fuse_mount_remove(fm); 1395 if (last) 1396 virtio_fs_conn_destroy(fm); 1397 } 1398 kill_anon_super(sb); 1399 fuse_mount_destroy(fm); 1400 } 1401 1402 static int virtio_fs_test_super(struct super_block *sb, 1403 struct fs_context *fsc) 1404 { 1405 struct fuse_mount *fsc_fm = fsc->s_fs_info; 1406 struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1407 1408 return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; 1409 } 1410 1411 static int virtio_fs_get_tree(struct fs_context *fsc) 1412 { 1413 struct virtio_fs *fs; 1414 struct super_block *sb; 1415 struct fuse_conn *fc = NULL; 1416 struct fuse_mount *fm; 1417 unsigned int virtqueue_size; 1418 int err = -EIO; 1419 1420 /* This gets a reference on virtio_fs object. This ptr gets installed 1421 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1422 * to drop the reference to this object. 1423 */ 1424 fs = virtio_fs_find_instance(fsc->source); 1425 if (!fs) { 1426 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1427 return -EINVAL; 1428 } 1429 1430 virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); 1431 if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) 1432 goto out_err; 1433 1434 err = -ENOMEM; 1435 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1436 if (!fc) 1437 goto out_err; 1438 1439 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1440 if (!fm) 1441 goto out_err; 1442 1443 fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); 1444 fc->release = fuse_free_conn; 1445 fc->delete_stale = true; 1446 fc->auto_submounts = true; 1447 fc->sync_fs = true; 1448 1449 /* Tell FUSE to split requests that exceed the virtqueue's size */ 1450 fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, 1451 virtqueue_size - FUSE_HEADER_OVERHEAD); 1452 1453 fsc->s_fs_info = fm; 1454 sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); 1455 if (fsc->s_fs_info) 1456 fuse_mount_destroy(fm); 1457 if (IS_ERR(sb)) 1458 return PTR_ERR(sb); 1459 1460 if (!sb->s_root) { 1461 err = virtio_fs_fill_super(sb, fsc); 1462 if (err) { 1463 deactivate_locked_super(sb); 1464 return err; 1465 } 1466 1467 sb->s_flags |= SB_ACTIVE; 1468 } 1469 1470 WARN_ON(fsc->root); 1471 fsc->root = dget(sb->s_root); 1472 return 0; 1473 1474 out_err: 1475 kfree(fc); 1476 mutex_lock(&virtio_fs_mutex); 1477 virtio_fs_put(fs); 1478 mutex_unlock(&virtio_fs_mutex); 1479 return err; 1480 } 1481 1482 static const struct fs_context_operations virtio_fs_context_ops = { 1483 .free = virtio_fs_free_fsc, 1484 .parse_param = virtio_fs_parse_param, 1485 .get_tree = virtio_fs_get_tree, 1486 }; 1487 1488 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1489 { 1490 struct fuse_fs_context *ctx; 1491 1492 if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT) 1493 return fuse_init_fs_context_submount(fsc); 1494 1495 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1496 if (!ctx) 1497 return -ENOMEM; 1498 fsc->fs_private = ctx; 1499 fsc->ops = &virtio_fs_context_ops; 1500 return 0; 1501 } 1502 1503 static struct file_system_type virtio_fs_type = { 1504 .owner = THIS_MODULE, 1505 .name = "virtiofs", 1506 .init_fs_context = virtio_fs_init_fs_context, 1507 .kill_sb = virtio_kill_sb, 1508 }; 1509 1510 static int __init virtio_fs_init(void) 1511 { 1512 int ret; 1513 1514 ret = register_virtio_driver(&virtio_fs_driver); 1515 if (ret < 0) 1516 return ret; 1517 1518 ret = register_filesystem(&virtio_fs_type); 1519 if (ret < 0) { 1520 unregister_virtio_driver(&virtio_fs_driver); 1521 return ret; 1522 } 1523 1524 return 0; 1525 } 1526 module_init(virtio_fs_init); 1527 1528 static void __exit virtio_fs_exit(void) 1529 { 1530 unregister_filesystem(&virtio_fs_type); 1531 unregister_virtio_driver(&virtio_fs_driver); 1532 } 1533 module_exit(virtio_fs_exit); 1534 1535 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1536 MODULE_DESCRIPTION("Virtio Filesystem"); 1537 MODULE_LICENSE("GPL"); 1538 MODULE_ALIAS_FS(KBUILD_MODNAME); 1539 MODULE_DEVICE_TABLE(virtio, id_table); 1540