1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDUSE: vDPA Device in Userspace 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include <linux/init.h> 12 #include <linux/module.h> 13 #include <linux/cdev.h> 14 #include <linux/device.h> 15 #include <linux/eventfd.h> 16 #include <linux/slab.h> 17 #include <linux/wait.h> 18 #include <linux/dma-map-ops.h> 19 #include <linux/poll.h> 20 #include <linux/file.h> 21 #include <linux/uio.h> 22 #include <linux/vdpa.h> 23 #include <linux/nospec.h> 24 #include <linux/vmalloc.h> 25 #include <linux/sched/mm.h> 26 #include <uapi/linux/vduse.h> 27 #include <uapi/linux/vdpa.h> 28 #include <uapi/linux/virtio_config.h> 29 #include <uapi/linux/virtio_ids.h> 30 #include <uapi/linux/virtio_blk.h> 31 #include <linux/mod_devicetable.h> 32 33 #include "iova_domain.h" 34 35 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" 36 #define DRV_DESC "vDPA Device in Userspace" 37 #define DRV_LICENSE "GPL v2" 38 39 #define VDUSE_DEV_MAX (1U << MINORBITS) 40 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) 41 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024) 42 #define VDUSE_MSG_DEFAULT_TIMEOUT 30 43 44 struct vduse_virtqueue { 45 u16 index; 46 u16 num_max; 47 u32 num; 48 u64 desc_addr; 49 u64 driver_addr; 50 u64 device_addr; 51 struct vdpa_vq_state state; 52 bool ready; 53 bool kicked; 54 spinlock_t kick_lock; 55 spinlock_t irq_lock; 56 struct eventfd_ctx *kickfd; 57 struct vdpa_callback cb; 58 struct work_struct inject; 59 struct work_struct kick; 60 }; 61 62 struct vduse_dev; 63 64 struct vduse_vdpa { 65 struct vdpa_device vdpa; 66 struct vduse_dev *dev; 67 }; 68 69 struct vduse_umem { 70 unsigned long iova; 71 unsigned long npages; 72 struct page **pages; 73 struct mm_struct *mm; 74 }; 75 76 struct vduse_dev { 77 struct vduse_vdpa *vdev; 78 struct device *dev; 79 struct vduse_virtqueue *vqs; 80 struct vduse_iova_domain *domain; 81 char *name; 82 struct mutex lock; 83 spinlock_t msg_lock; 84 u64 msg_unique; 85 u32 msg_timeout; 86 wait_queue_head_t waitq; 87 struct list_head send_list; 88 struct list_head recv_list; 89 struct vdpa_callback config_cb; 90 struct work_struct inject; 91 spinlock_t irq_lock; 92 struct rw_semaphore rwsem; 93 int minor; 94 bool broken; 95 bool connected; 96 u64 api_version; 97 u64 device_features; 98 u64 driver_features; 99 u32 device_id; 100 u32 vendor_id; 101 u32 generation; 102 u32 config_size; 103 void *config; 104 u8 status; 105 u32 vq_num; 106 u32 vq_align; 107 struct vduse_umem *umem; 108 struct mutex mem_lock; 109 }; 110 111 struct vduse_dev_msg { 112 struct vduse_dev_request req; 113 struct vduse_dev_response resp; 114 struct list_head list; 115 wait_queue_head_t waitq; 116 bool completed; 117 }; 118 119 struct vduse_control { 120 u64 api_version; 121 }; 122 123 static DEFINE_MUTEX(vduse_lock); 124 static DEFINE_IDR(vduse_idr); 125 126 static dev_t vduse_major; 127 static struct class *vduse_class; 128 static struct cdev vduse_ctrl_cdev; 129 static struct cdev vduse_cdev; 130 static struct workqueue_struct *vduse_irq_wq; 131 132 static u32 allowed_device_id[] = { 133 VIRTIO_ID_BLOCK, 134 }; 135 136 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) 137 { 138 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); 139 140 return vdev->dev; 141 } 142 143 static inline struct vduse_dev *dev_to_vduse(struct device *dev) 144 { 145 struct vdpa_device *vdpa = dev_to_vdpa(dev); 146 147 return vdpa_to_vduse(vdpa); 148 } 149 150 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, 151 uint32_t request_id) 152 { 153 struct vduse_dev_msg *msg; 154 155 list_for_each_entry(msg, head, list) { 156 if (msg->req.request_id == request_id) { 157 list_del(&msg->list); 158 return msg; 159 } 160 } 161 162 return NULL; 163 } 164 165 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) 166 { 167 struct vduse_dev_msg *msg = NULL; 168 169 if (!list_empty(head)) { 170 msg = list_first_entry(head, struct vduse_dev_msg, list); 171 list_del(&msg->list); 172 } 173 174 return msg; 175 } 176 177 static void vduse_enqueue_msg(struct list_head *head, 178 struct vduse_dev_msg *msg) 179 { 180 list_add_tail(&msg->list, head); 181 } 182 183 static void vduse_dev_broken(struct vduse_dev *dev) 184 { 185 struct vduse_dev_msg *msg, *tmp; 186 187 if (unlikely(dev->broken)) 188 return; 189 190 list_splice_init(&dev->recv_list, &dev->send_list); 191 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { 192 list_del(&msg->list); 193 msg->completed = 1; 194 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 195 wake_up(&msg->waitq); 196 } 197 dev->broken = true; 198 wake_up(&dev->waitq); 199 } 200 201 static int vduse_dev_msg_sync(struct vduse_dev *dev, 202 struct vduse_dev_msg *msg) 203 { 204 int ret; 205 206 if (unlikely(dev->broken)) 207 return -EIO; 208 209 init_waitqueue_head(&msg->waitq); 210 spin_lock(&dev->msg_lock); 211 if (unlikely(dev->broken)) { 212 spin_unlock(&dev->msg_lock); 213 return -EIO; 214 } 215 msg->req.request_id = dev->msg_unique++; 216 vduse_enqueue_msg(&dev->send_list, msg); 217 wake_up(&dev->waitq); 218 spin_unlock(&dev->msg_lock); 219 if (dev->msg_timeout) 220 ret = wait_event_killable_timeout(msg->waitq, msg->completed, 221 (long)dev->msg_timeout * HZ); 222 else 223 ret = wait_event_killable(msg->waitq, msg->completed); 224 225 spin_lock(&dev->msg_lock); 226 if (!msg->completed) { 227 list_del(&msg->list); 228 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 229 /* Mark the device as malfunction when there is a timeout */ 230 if (!ret) 231 vduse_dev_broken(dev); 232 } 233 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; 234 spin_unlock(&dev->msg_lock); 235 236 return ret; 237 } 238 239 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, 240 struct vduse_virtqueue *vq, 241 struct vdpa_vq_state_packed *packed) 242 { 243 struct vduse_dev_msg msg = { 0 }; 244 int ret; 245 246 msg.req.type = VDUSE_GET_VQ_STATE; 247 msg.req.vq_state.index = vq->index; 248 249 ret = vduse_dev_msg_sync(dev, &msg); 250 if (ret) 251 return ret; 252 253 packed->last_avail_counter = 254 msg.resp.vq_state.packed.last_avail_counter & 0x0001; 255 packed->last_avail_idx = 256 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; 257 packed->last_used_counter = 258 msg.resp.vq_state.packed.last_used_counter & 0x0001; 259 packed->last_used_idx = 260 msg.resp.vq_state.packed.last_used_idx & 0x7FFF; 261 262 return 0; 263 } 264 265 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, 266 struct vduse_virtqueue *vq, 267 struct vdpa_vq_state_split *split) 268 { 269 struct vduse_dev_msg msg = { 0 }; 270 int ret; 271 272 msg.req.type = VDUSE_GET_VQ_STATE; 273 msg.req.vq_state.index = vq->index; 274 275 ret = vduse_dev_msg_sync(dev, &msg); 276 if (ret) 277 return ret; 278 279 split->avail_index = msg.resp.vq_state.split.avail_index; 280 281 return 0; 282 } 283 284 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) 285 { 286 struct vduse_dev_msg msg = { 0 }; 287 288 msg.req.type = VDUSE_SET_STATUS; 289 msg.req.s.status = status; 290 291 return vduse_dev_msg_sync(dev, &msg); 292 } 293 294 static int vduse_dev_update_iotlb(struct vduse_dev *dev, 295 u64 start, u64 last) 296 { 297 struct vduse_dev_msg msg = { 0 }; 298 299 if (last < start) 300 return -EINVAL; 301 302 msg.req.type = VDUSE_UPDATE_IOTLB; 303 msg.req.iova.start = start; 304 msg.req.iova.last = last; 305 306 return vduse_dev_msg_sync(dev, &msg); 307 } 308 309 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) 310 { 311 struct file *file = iocb->ki_filp; 312 struct vduse_dev *dev = file->private_data; 313 struct vduse_dev_msg *msg; 314 int size = sizeof(struct vduse_dev_request); 315 ssize_t ret; 316 317 if (iov_iter_count(to) < size) 318 return -EINVAL; 319 320 spin_lock(&dev->msg_lock); 321 while (1) { 322 msg = vduse_dequeue_msg(&dev->send_list); 323 if (msg) 324 break; 325 326 ret = -EAGAIN; 327 if (file->f_flags & O_NONBLOCK) 328 goto unlock; 329 330 spin_unlock(&dev->msg_lock); 331 ret = wait_event_interruptible_exclusive(dev->waitq, 332 !list_empty(&dev->send_list)); 333 if (ret) 334 return ret; 335 336 spin_lock(&dev->msg_lock); 337 } 338 spin_unlock(&dev->msg_lock); 339 ret = copy_to_iter(&msg->req, size, to); 340 spin_lock(&dev->msg_lock); 341 if (ret != size) { 342 ret = -EFAULT; 343 vduse_enqueue_msg(&dev->send_list, msg); 344 goto unlock; 345 } 346 vduse_enqueue_msg(&dev->recv_list, msg); 347 unlock: 348 spin_unlock(&dev->msg_lock); 349 350 return ret; 351 } 352 353 static bool is_mem_zero(const char *ptr, int size) 354 { 355 int i; 356 357 for (i = 0; i < size; i++) { 358 if (ptr[i]) 359 return false; 360 } 361 return true; 362 } 363 364 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) 365 { 366 struct file *file = iocb->ki_filp; 367 struct vduse_dev *dev = file->private_data; 368 struct vduse_dev_response resp; 369 struct vduse_dev_msg *msg; 370 size_t ret; 371 372 ret = copy_from_iter(&resp, sizeof(resp), from); 373 if (ret != sizeof(resp)) 374 return -EINVAL; 375 376 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) 377 return -EINVAL; 378 379 spin_lock(&dev->msg_lock); 380 msg = vduse_find_msg(&dev->recv_list, resp.request_id); 381 if (!msg) { 382 ret = -ENOENT; 383 goto unlock; 384 } 385 386 memcpy(&msg->resp, &resp, sizeof(resp)); 387 msg->completed = 1; 388 wake_up(&msg->waitq); 389 unlock: 390 spin_unlock(&dev->msg_lock); 391 392 return ret; 393 } 394 395 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) 396 { 397 struct vduse_dev *dev = file->private_data; 398 __poll_t mask = 0; 399 400 poll_wait(file, &dev->waitq, wait); 401 402 spin_lock(&dev->msg_lock); 403 404 if (unlikely(dev->broken)) 405 mask |= EPOLLERR; 406 if (!list_empty(&dev->send_list)) 407 mask |= EPOLLIN | EPOLLRDNORM; 408 if (!list_empty(&dev->recv_list)) 409 mask |= EPOLLOUT | EPOLLWRNORM; 410 411 spin_unlock(&dev->msg_lock); 412 413 return mask; 414 } 415 416 static void vduse_dev_reset(struct vduse_dev *dev) 417 { 418 int i; 419 struct vduse_iova_domain *domain = dev->domain; 420 421 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 422 if (domain->bounce_map) 423 vduse_domain_reset_bounce_map(domain); 424 425 down_write(&dev->rwsem); 426 427 dev->status = 0; 428 dev->driver_features = 0; 429 dev->generation++; 430 spin_lock(&dev->irq_lock); 431 dev->config_cb.callback = NULL; 432 dev->config_cb.private = NULL; 433 spin_unlock(&dev->irq_lock); 434 flush_work(&dev->inject); 435 436 for (i = 0; i < dev->vq_num; i++) { 437 struct vduse_virtqueue *vq = &dev->vqs[i]; 438 439 vq->ready = false; 440 vq->desc_addr = 0; 441 vq->driver_addr = 0; 442 vq->device_addr = 0; 443 vq->num = 0; 444 memset(&vq->state, 0, sizeof(vq->state)); 445 446 spin_lock(&vq->kick_lock); 447 vq->kicked = false; 448 if (vq->kickfd) 449 eventfd_ctx_put(vq->kickfd); 450 vq->kickfd = NULL; 451 spin_unlock(&vq->kick_lock); 452 453 spin_lock(&vq->irq_lock); 454 vq->cb.callback = NULL; 455 vq->cb.private = NULL; 456 spin_unlock(&vq->irq_lock); 457 flush_work(&vq->inject); 458 flush_work(&vq->kick); 459 } 460 461 up_write(&dev->rwsem); 462 } 463 464 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, 465 u64 desc_area, u64 driver_area, 466 u64 device_area) 467 { 468 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 469 struct vduse_virtqueue *vq = &dev->vqs[idx]; 470 471 vq->desc_addr = desc_area; 472 vq->driver_addr = driver_area; 473 vq->device_addr = device_area; 474 475 return 0; 476 } 477 478 static void vduse_vq_kick(struct vduse_virtqueue *vq) 479 { 480 spin_lock(&vq->kick_lock); 481 if (!vq->ready) 482 goto unlock; 483 484 if (vq->kickfd) 485 eventfd_signal(vq->kickfd, 1); 486 else 487 vq->kicked = true; 488 unlock: 489 spin_unlock(&vq->kick_lock); 490 } 491 492 static void vduse_vq_kick_work(struct work_struct *work) 493 { 494 struct vduse_virtqueue *vq = container_of(work, 495 struct vduse_virtqueue, kick); 496 497 vduse_vq_kick(vq); 498 } 499 500 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) 501 { 502 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 503 struct vduse_virtqueue *vq = &dev->vqs[idx]; 504 505 if (!eventfd_signal_allowed()) { 506 schedule_work(&vq->kick); 507 return; 508 } 509 vduse_vq_kick(vq); 510 } 511 512 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, 513 struct vdpa_callback *cb) 514 { 515 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 516 struct vduse_virtqueue *vq = &dev->vqs[idx]; 517 518 spin_lock(&vq->irq_lock); 519 vq->cb.callback = cb->callback; 520 vq->cb.private = cb->private; 521 spin_unlock(&vq->irq_lock); 522 } 523 524 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 525 { 526 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 527 struct vduse_virtqueue *vq = &dev->vqs[idx]; 528 529 vq->num = num; 530 } 531 532 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, 533 u16 idx, bool ready) 534 { 535 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 536 struct vduse_virtqueue *vq = &dev->vqs[idx]; 537 538 vq->ready = ready; 539 } 540 541 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 542 { 543 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 544 struct vduse_virtqueue *vq = &dev->vqs[idx]; 545 546 return vq->ready; 547 } 548 549 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, 550 const struct vdpa_vq_state *state) 551 { 552 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 553 struct vduse_virtqueue *vq = &dev->vqs[idx]; 554 555 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 556 vq->state.packed.last_avail_counter = 557 state->packed.last_avail_counter; 558 vq->state.packed.last_avail_idx = state->packed.last_avail_idx; 559 vq->state.packed.last_used_counter = 560 state->packed.last_used_counter; 561 vq->state.packed.last_used_idx = state->packed.last_used_idx; 562 } else 563 vq->state.split.avail_index = state->split.avail_index; 564 565 return 0; 566 } 567 568 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, 569 struct vdpa_vq_state *state) 570 { 571 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 572 struct vduse_virtqueue *vq = &dev->vqs[idx]; 573 574 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) 575 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); 576 577 return vduse_dev_get_vq_state_split(dev, vq, &state->split); 578 } 579 580 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) 581 { 582 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 583 584 return dev->vq_align; 585 } 586 587 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa) 588 { 589 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 590 591 return dev->device_features; 592 } 593 594 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) 595 { 596 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 597 598 dev->driver_features = features; 599 return 0; 600 } 601 602 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa) 603 { 604 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 605 606 return dev->driver_features; 607 } 608 609 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, 610 struct vdpa_callback *cb) 611 { 612 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 613 614 spin_lock(&dev->irq_lock); 615 dev->config_cb.callback = cb->callback; 616 dev->config_cb.private = cb->private; 617 spin_unlock(&dev->irq_lock); 618 } 619 620 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) 621 { 622 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 623 u16 num_max = 0; 624 int i; 625 626 for (i = 0; i < dev->vq_num; i++) 627 if (num_max < dev->vqs[i].num_max) 628 num_max = dev->vqs[i].num_max; 629 630 return num_max; 631 } 632 633 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) 634 { 635 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 636 637 return dev->device_id; 638 } 639 640 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) 641 { 642 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 643 644 return dev->vendor_id; 645 } 646 647 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) 648 { 649 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 650 651 return dev->status; 652 } 653 654 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 655 { 656 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 657 658 if (vduse_dev_set_status(dev, status)) 659 return; 660 661 dev->status = status; 662 } 663 664 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) 665 { 666 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 667 668 return dev->config_size; 669 } 670 671 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, 672 void *buf, unsigned int len) 673 { 674 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 675 676 if (offset > dev->config_size || 677 len > dev->config_size - offset) 678 return; 679 680 memcpy(buf, dev->config + offset, len); 681 } 682 683 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, 684 const void *buf, unsigned int len) 685 { 686 /* Now we only support read-only configuration space */ 687 } 688 689 static int vduse_vdpa_reset(struct vdpa_device *vdpa) 690 { 691 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 692 int ret = vduse_dev_set_status(dev, 0); 693 694 vduse_dev_reset(dev); 695 696 return ret; 697 } 698 699 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) 700 { 701 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 702 703 return dev->generation; 704 } 705 706 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 707 unsigned int asid, 708 struct vhost_iotlb *iotlb) 709 { 710 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 711 int ret; 712 713 ret = vduse_domain_set_map(dev->domain, iotlb); 714 if (ret) 715 return ret; 716 717 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); 718 if (ret) { 719 vduse_domain_clear_map(dev->domain, iotlb); 720 return ret; 721 } 722 723 return 0; 724 } 725 726 static void vduse_vdpa_free(struct vdpa_device *vdpa) 727 { 728 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 729 730 dev->vdev = NULL; 731 } 732 733 static const struct vdpa_config_ops vduse_vdpa_config_ops = { 734 .set_vq_address = vduse_vdpa_set_vq_address, 735 .kick_vq = vduse_vdpa_kick_vq, 736 .set_vq_cb = vduse_vdpa_set_vq_cb, 737 .set_vq_num = vduse_vdpa_set_vq_num, 738 .set_vq_ready = vduse_vdpa_set_vq_ready, 739 .get_vq_ready = vduse_vdpa_get_vq_ready, 740 .set_vq_state = vduse_vdpa_set_vq_state, 741 .get_vq_state = vduse_vdpa_get_vq_state, 742 .get_vq_align = vduse_vdpa_get_vq_align, 743 .get_device_features = vduse_vdpa_get_device_features, 744 .set_driver_features = vduse_vdpa_set_driver_features, 745 .get_driver_features = vduse_vdpa_get_driver_features, 746 .set_config_cb = vduse_vdpa_set_config_cb, 747 .get_vq_num_max = vduse_vdpa_get_vq_num_max, 748 .get_device_id = vduse_vdpa_get_device_id, 749 .get_vendor_id = vduse_vdpa_get_vendor_id, 750 .get_status = vduse_vdpa_get_status, 751 .set_status = vduse_vdpa_set_status, 752 .get_config_size = vduse_vdpa_get_config_size, 753 .get_config = vduse_vdpa_get_config, 754 .set_config = vduse_vdpa_set_config, 755 .get_generation = vduse_vdpa_get_generation, 756 .reset = vduse_vdpa_reset, 757 .set_map = vduse_vdpa_set_map, 758 .free = vduse_vdpa_free, 759 }; 760 761 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, 762 unsigned long offset, size_t size, 763 enum dma_data_direction dir, 764 unsigned long attrs) 765 { 766 struct vduse_dev *vdev = dev_to_vduse(dev); 767 struct vduse_iova_domain *domain = vdev->domain; 768 769 return vduse_domain_map_page(domain, page, offset, size, dir, attrs); 770 } 771 772 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, 773 size_t size, enum dma_data_direction dir, 774 unsigned long attrs) 775 { 776 struct vduse_dev *vdev = dev_to_vduse(dev); 777 struct vduse_iova_domain *domain = vdev->domain; 778 779 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 780 } 781 782 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, 783 dma_addr_t *dma_addr, gfp_t flag, 784 unsigned long attrs) 785 { 786 struct vduse_dev *vdev = dev_to_vduse(dev); 787 struct vduse_iova_domain *domain = vdev->domain; 788 unsigned long iova; 789 void *addr; 790 791 *dma_addr = DMA_MAPPING_ERROR; 792 addr = vduse_domain_alloc_coherent(domain, size, 793 (dma_addr_t *)&iova, flag, attrs); 794 if (!addr) 795 return NULL; 796 797 *dma_addr = (dma_addr_t)iova; 798 799 return addr; 800 } 801 802 static void vduse_dev_free_coherent(struct device *dev, size_t size, 803 void *vaddr, dma_addr_t dma_addr, 804 unsigned long attrs) 805 { 806 struct vduse_dev *vdev = dev_to_vduse(dev); 807 struct vduse_iova_domain *domain = vdev->domain; 808 809 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); 810 } 811 812 static size_t vduse_dev_max_mapping_size(struct device *dev) 813 { 814 struct vduse_dev *vdev = dev_to_vduse(dev); 815 struct vduse_iova_domain *domain = vdev->domain; 816 817 return domain->bounce_size; 818 } 819 820 static const struct dma_map_ops vduse_dev_dma_ops = { 821 .map_page = vduse_dev_map_page, 822 .unmap_page = vduse_dev_unmap_page, 823 .alloc = vduse_dev_alloc_coherent, 824 .free = vduse_dev_free_coherent, 825 .max_mapping_size = vduse_dev_max_mapping_size, 826 }; 827 828 static unsigned int perm_to_file_flags(u8 perm) 829 { 830 unsigned int flags = 0; 831 832 switch (perm) { 833 case VDUSE_ACCESS_WO: 834 flags |= O_WRONLY; 835 break; 836 case VDUSE_ACCESS_RO: 837 flags |= O_RDONLY; 838 break; 839 case VDUSE_ACCESS_RW: 840 flags |= O_RDWR; 841 break; 842 default: 843 WARN(1, "invalidate vhost IOTLB permission\n"); 844 break; 845 } 846 847 return flags; 848 } 849 850 static int vduse_kickfd_setup(struct vduse_dev *dev, 851 struct vduse_vq_eventfd *eventfd) 852 { 853 struct eventfd_ctx *ctx = NULL; 854 struct vduse_virtqueue *vq; 855 u32 index; 856 857 if (eventfd->index >= dev->vq_num) 858 return -EINVAL; 859 860 index = array_index_nospec(eventfd->index, dev->vq_num); 861 vq = &dev->vqs[index]; 862 if (eventfd->fd >= 0) { 863 ctx = eventfd_ctx_fdget(eventfd->fd); 864 if (IS_ERR(ctx)) 865 return PTR_ERR(ctx); 866 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) 867 return 0; 868 869 spin_lock(&vq->kick_lock); 870 if (vq->kickfd) 871 eventfd_ctx_put(vq->kickfd); 872 vq->kickfd = ctx; 873 if (vq->ready && vq->kicked && vq->kickfd) { 874 eventfd_signal(vq->kickfd, 1); 875 vq->kicked = false; 876 } 877 spin_unlock(&vq->kick_lock); 878 879 return 0; 880 } 881 882 static bool vduse_dev_is_ready(struct vduse_dev *dev) 883 { 884 int i; 885 886 for (i = 0; i < dev->vq_num; i++) 887 if (!dev->vqs[i].num_max) 888 return false; 889 890 return true; 891 } 892 893 static void vduse_dev_irq_inject(struct work_struct *work) 894 { 895 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); 896 897 spin_lock_irq(&dev->irq_lock); 898 if (dev->config_cb.callback) 899 dev->config_cb.callback(dev->config_cb.private); 900 spin_unlock_irq(&dev->irq_lock); 901 } 902 903 static void vduse_vq_irq_inject(struct work_struct *work) 904 { 905 struct vduse_virtqueue *vq = container_of(work, 906 struct vduse_virtqueue, inject); 907 908 spin_lock_irq(&vq->irq_lock); 909 if (vq->ready && vq->cb.callback) 910 vq->cb.callback(vq->cb.private); 911 spin_unlock_irq(&vq->irq_lock); 912 } 913 914 static int vduse_dev_queue_irq_work(struct vduse_dev *dev, 915 struct work_struct *irq_work) 916 { 917 int ret = -EINVAL; 918 919 down_read(&dev->rwsem); 920 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 921 goto unlock; 922 923 ret = 0; 924 queue_work(vduse_irq_wq, irq_work); 925 unlock: 926 up_read(&dev->rwsem); 927 928 return ret; 929 } 930 931 static int vduse_dev_dereg_umem(struct vduse_dev *dev, 932 u64 iova, u64 size) 933 { 934 int ret; 935 936 mutex_lock(&dev->mem_lock); 937 ret = -ENOENT; 938 if (!dev->umem) 939 goto unlock; 940 941 ret = -EINVAL; 942 if (dev->umem->iova != iova || size != dev->domain->bounce_size) 943 goto unlock; 944 945 vduse_domain_remove_user_bounce_pages(dev->domain); 946 unpin_user_pages_dirty_lock(dev->umem->pages, 947 dev->umem->npages, true); 948 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); 949 mmdrop(dev->umem->mm); 950 vfree(dev->umem->pages); 951 kfree(dev->umem); 952 dev->umem = NULL; 953 ret = 0; 954 unlock: 955 mutex_unlock(&dev->mem_lock); 956 return ret; 957 } 958 959 static int vduse_dev_reg_umem(struct vduse_dev *dev, 960 u64 iova, u64 uaddr, u64 size) 961 { 962 struct page **page_list = NULL; 963 struct vduse_umem *umem = NULL; 964 long pinned = 0; 965 unsigned long npages, lock_limit; 966 int ret; 967 968 if (!dev->domain->bounce_map || 969 size != dev->domain->bounce_size || 970 iova != 0 || uaddr & ~PAGE_MASK) 971 return -EINVAL; 972 973 mutex_lock(&dev->mem_lock); 974 ret = -EEXIST; 975 if (dev->umem) 976 goto unlock; 977 978 ret = -ENOMEM; 979 npages = size >> PAGE_SHIFT; 980 page_list = __vmalloc(array_size(npages, sizeof(struct page *)), 981 GFP_KERNEL_ACCOUNT); 982 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 983 if (!page_list || !umem) 984 goto unlock; 985 986 mmap_read_lock(current->mm); 987 988 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 989 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) 990 goto out; 991 992 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, 993 page_list, NULL); 994 if (pinned != npages) { 995 ret = pinned < 0 ? pinned : -ENOMEM; 996 goto out; 997 } 998 999 ret = vduse_domain_add_user_bounce_pages(dev->domain, 1000 page_list, pinned); 1001 if (ret) 1002 goto out; 1003 1004 atomic64_add(npages, ¤t->mm->pinned_vm); 1005 1006 umem->pages = page_list; 1007 umem->npages = pinned; 1008 umem->iova = iova; 1009 umem->mm = current->mm; 1010 mmgrab(current->mm); 1011 1012 dev->umem = umem; 1013 out: 1014 if (ret && pinned > 0) 1015 unpin_user_pages(page_list, pinned); 1016 1017 mmap_read_unlock(current->mm); 1018 unlock: 1019 if (ret) { 1020 vfree(page_list); 1021 kfree(umem); 1022 } 1023 mutex_unlock(&dev->mem_lock); 1024 return ret; 1025 } 1026 1027 static long vduse_dev_ioctl(struct file *file, unsigned int cmd, 1028 unsigned long arg) 1029 { 1030 struct vduse_dev *dev = file->private_data; 1031 void __user *argp = (void __user *)arg; 1032 int ret; 1033 1034 if (unlikely(dev->broken)) 1035 return -EPERM; 1036 1037 switch (cmd) { 1038 case VDUSE_IOTLB_GET_FD: { 1039 struct vduse_iotlb_entry entry; 1040 struct vhost_iotlb_map *map; 1041 struct vdpa_map_file *map_file; 1042 struct vduse_iova_domain *domain = dev->domain; 1043 struct file *f = NULL; 1044 1045 ret = -EFAULT; 1046 if (copy_from_user(&entry, argp, sizeof(entry))) 1047 break; 1048 1049 ret = -EINVAL; 1050 if (entry.start > entry.last) 1051 break; 1052 1053 spin_lock(&domain->iotlb_lock); 1054 map = vhost_iotlb_itree_first(domain->iotlb, 1055 entry.start, entry.last); 1056 if (map) { 1057 map_file = (struct vdpa_map_file *)map->opaque; 1058 f = get_file(map_file->file); 1059 entry.offset = map_file->offset; 1060 entry.start = map->start; 1061 entry.last = map->last; 1062 entry.perm = map->perm; 1063 } 1064 spin_unlock(&domain->iotlb_lock); 1065 ret = -EINVAL; 1066 if (!f) 1067 break; 1068 1069 ret = -EFAULT; 1070 if (copy_to_user(argp, &entry, sizeof(entry))) { 1071 fput(f); 1072 break; 1073 } 1074 ret = receive_fd(f, perm_to_file_flags(entry.perm)); 1075 fput(f); 1076 break; 1077 } 1078 case VDUSE_DEV_GET_FEATURES: 1079 /* 1080 * Just mirror what driver wrote here. 1081 * The driver is expected to check FEATURE_OK later. 1082 */ 1083 ret = put_user(dev->driver_features, (u64 __user *)argp); 1084 break; 1085 case VDUSE_DEV_SET_CONFIG: { 1086 struct vduse_config_data config; 1087 unsigned long size = offsetof(struct vduse_config_data, 1088 buffer); 1089 1090 ret = -EFAULT; 1091 if (copy_from_user(&config, argp, size)) 1092 break; 1093 1094 ret = -EINVAL; 1095 if (config.offset > dev->config_size || 1096 config.length == 0 || 1097 config.length > dev->config_size - config.offset) 1098 break; 1099 1100 ret = -EFAULT; 1101 if (copy_from_user(dev->config + config.offset, argp + size, 1102 config.length)) 1103 break; 1104 1105 ret = 0; 1106 break; 1107 } 1108 case VDUSE_DEV_INJECT_CONFIG_IRQ: 1109 ret = vduse_dev_queue_irq_work(dev, &dev->inject); 1110 break; 1111 case VDUSE_VQ_SETUP: { 1112 struct vduse_vq_config config; 1113 u32 index; 1114 1115 ret = -EFAULT; 1116 if (copy_from_user(&config, argp, sizeof(config))) 1117 break; 1118 1119 ret = -EINVAL; 1120 if (config.index >= dev->vq_num) 1121 break; 1122 1123 if (!is_mem_zero((const char *)config.reserved, 1124 sizeof(config.reserved))) 1125 break; 1126 1127 index = array_index_nospec(config.index, dev->vq_num); 1128 dev->vqs[index].num_max = config.max_size; 1129 ret = 0; 1130 break; 1131 } 1132 case VDUSE_VQ_GET_INFO: { 1133 struct vduse_vq_info vq_info; 1134 struct vduse_virtqueue *vq; 1135 u32 index; 1136 1137 ret = -EFAULT; 1138 if (copy_from_user(&vq_info, argp, sizeof(vq_info))) 1139 break; 1140 1141 ret = -EINVAL; 1142 if (vq_info.index >= dev->vq_num) 1143 break; 1144 1145 index = array_index_nospec(vq_info.index, dev->vq_num); 1146 vq = &dev->vqs[index]; 1147 vq_info.desc_addr = vq->desc_addr; 1148 vq_info.driver_addr = vq->driver_addr; 1149 vq_info.device_addr = vq->device_addr; 1150 vq_info.num = vq->num; 1151 1152 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 1153 vq_info.packed.last_avail_counter = 1154 vq->state.packed.last_avail_counter; 1155 vq_info.packed.last_avail_idx = 1156 vq->state.packed.last_avail_idx; 1157 vq_info.packed.last_used_counter = 1158 vq->state.packed.last_used_counter; 1159 vq_info.packed.last_used_idx = 1160 vq->state.packed.last_used_idx; 1161 } else 1162 vq_info.split.avail_index = 1163 vq->state.split.avail_index; 1164 1165 vq_info.ready = vq->ready; 1166 1167 ret = -EFAULT; 1168 if (copy_to_user(argp, &vq_info, sizeof(vq_info))) 1169 break; 1170 1171 ret = 0; 1172 break; 1173 } 1174 case VDUSE_VQ_SETUP_KICKFD: { 1175 struct vduse_vq_eventfd eventfd; 1176 1177 ret = -EFAULT; 1178 if (copy_from_user(&eventfd, argp, sizeof(eventfd))) 1179 break; 1180 1181 ret = vduse_kickfd_setup(dev, &eventfd); 1182 break; 1183 } 1184 case VDUSE_VQ_INJECT_IRQ: { 1185 u32 index; 1186 1187 ret = -EFAULT; 1188 if (get_user(index, (u32 __user *)argp)) 1189 break; 1190 1191 ret = -EINVAL; 1192 if (index >= dev->vq_num) 1193 break; 1194 1195 index = array_index_nospec(index, dev->vq_num); 1196 ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); 1197 break; 1198 } 1199 case VDUSE_IOTLB_REG_UMEM: { 1200 struct vduse_iova_umem umem; 1201 1202 ret = -EFAULT; 1203 if (copy_from_user(&umem, argp, sizeof(umem))) 1204 break; 1205 1206 ret = -EINVAL; 1207 if (!is_mem_zero((const char *)umem.reserved, 1208 sizeof(umem.reserved))) 1209 break; 1210 1211 ret = vduse_dev_reg_umem(dev, umem.iova, 1212 umem.uaddr, umem.size); 1213 break; 1214 } 1215 case VDUSE_IOTLB_DEREG_UMEM: { 1216 struct vduse_iova_umem umem; 1217 1218 ret = -EFAULT; 1219 if (copy_from_user(&umem, argp, sizeof(umem))) 1220 break; 1221 1222 ret = -EINVAL; 1223 if (!is_mem_zero((const char *)umem.reserved, 1224 sizeof(umem.reserved))) 1225 break; 1226 1227 ret = vduse_dev_dereg_umem(dev, umem.iova, 1228 umem.size); 1229 break; 1230 } 1231 case VDUSE_IOTLB_GET_INFO: { 1232 struct vduse_iova_info info; 1233 struct vhost_iotlb_map *map; 1234 struct vduse_iova_domain *domain = dev->domain; 1235 1236 ret = -EFAULT; 1237 if (copy_from_user(&info, argp, sizeof(info))) 1238 break; 1239 1240 ret = -EINVAL; 1241 if (info.start > info.last) 1242 break; 1243 1244 if (!is_mem_zero((const char *)info.reserved, 1245 sizeof(info.reserved))) 1246 break; 1247 1248 spin_lock(&domain->iotlb_lock); 1249 map = vhost_iotlb_itree_first(domain->iotlb, 1250 info.start, info.last); 1251 if (map) { 1252 info.start = map->start; 1253 info.last = map->last; 1254 info.capability = 0; 1255 if (domain->bounce_map && map->start == 0 && 1256 map->last == domain->bounce_size - 1) 1257 info.capability |= VDUSE_IOVA_CAP_UMEM; 1258 } 1259 spin_unlock(&domain->iotlb_lock); 1260 if (!map) 1261 break; 1262 1263 ret = -EFAULT; 1264 if (copy_to_user(argp, &info, sizeof(info))) 1265 break; 1266 1267 ret = 0; 1268 break; 1269 } 1270 default: 1271 ret = -ENOIOCTLCMD; 1272 break; 1273 } 1274 1275 return ret; 1276 } 1277 1278 static int vduse_dev_release(struct inode *inode, struct file *file) 1279 { 1280 struct vduse_dev *dev = file->private_data; 1281 1282 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); 1283 spin_lock(&dev->msg_lock); 1284 /* Make sure the inflight messages can processed after reconncection */ 1285 list_splice_init(&dev->recv_list, &dev->send_list); 1286 spin_unlock(&dev->msg_lock); 1287 dev->connected = false; 1288 1289 return 0; 1290 } 1291 1292 static struct vduse_dev *vduse_dev_get_from_minor(int minor) 1293 { 1294 struct vduse_dev *dev; 1295 1296 mutex_lock(&vduse_lock); 1297 dev = idr_find(&vduse_idr, minor); 1298 mutex_unlock(&vduse_lock); 1299 1300 return dev; 1301 } 1302 1303 static int vduse_dev_open(struct inode *inode, struct file *file) 1304 { 1305 int ret; 1306 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); 1307 1308 if (!dev) 1309 return -ENODEV; 1310 1311 ret = -EBUSY; 1312 mutex_lock(&dev->lock); 1313 if (dev->connected) 1314 goto unlock; 1315 1316 ret = 0; 1317 dev->connected = true; 1318 file->private_data = dev; 1319 unlock: 1320 mutex_unlock(&dev->lock); 1321 1322 return ret; 1323 } 1324 1325 static const struct file_operations vduse_dev_fops = { 1326 .owner = THIS_MODULE, 1327 .open = vduse_dev_open, 1328 .release = vduse_dev_release, 1329 .read_iter = vduse_dev_read_iter, 1330 .write_iter = vduse_dev_write_iter, 1331 .poll = vduse_dev_poll, 1332 .unlocked_ioctl = vduse_dev_ioctl, 1333 .compat_ioctl = compat_ptr_ioctl, 1334 .llseek = noop_llseek, 1335 }; 1336 1337 static struct vduse_dev *vduse_dev_create(void) 1338 { 1339 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); 1340 1341 if (!dev) 1342 return NULL; 1343 1344 mutex_init(&dev->lock); 1345 mutex_init(&dev->mem_lock); 1346 spin_lock_init(&dev->msg_lock); 1347 INIT_LIST_HEAD(&dev->send_list); 1348 INIT_LIST_HEAD(&dev->recv_list); 1349 spin_lock_init(&dev->irq_lock); 1350 init_rwsem(&dev->rwsem); 1351 1352 INIT_WORK(&dev->inject, vduse_dev_irq_inject); 1353 init_waitqueue_head(&dev->waitq); 1354 1355 return dev; 1356 } 1357 1358 static void vduse_dev_destroy(struct vduse_dev *dev) 1359 { 1360 kfree(dev); 1361 } 1362 1363 static struct vduse_dev *vduse_find_dev(const char *name) 1364 { 1365 struct vduse_dev *dev; 1366 int id; 1367 1368 idr_for_each_entry(&vduse_idr, dev, id) 1369 if (!strcmp(dev->name, name)) 1370 return dev; 1371 1372 return NULL; 1373 } 1374 1375 static int vduse_destroy_dev(char *name) 1376 { 1377 struct vduse_dev *dev = vduse_find_dev(name); 1378 1379 if (!dev) 1380 return -EINVAL; 1381 1382 mutex_lock(&dev->lock); 1383 if (dev->vdev || dev->connected) { 1384 mutex_unlock(&dev->lock); 1385 return -EBUSY; 1386 } 1387 dev->connected = true; 1388 mutex_unlock(&dev->lock); 1389 1390 vduse_dev_reset(dev); 1391 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1392 idr_remove(&vduse_idr, dev->minor); 1393 kvfree(dev->config); 1394 kfree(dev->vqs); 1395 vduse_domain_destroy(dev->domain); 1396 kfree(dev->name); 1397 vduse_dev_destroy(dev); 1398 module_put(THIS_MODULE); 1399 1400 return 0; 1401 } 1402 1403 static bool device_is_allowed(u32 device_id) 1404 { 1405 int i; 1406 1407 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) 1408 if (allowed_device_id[i] == device_id) 1409 return true; 1410 1411 return false; 1412 } 1413 1414 static bool features_is_valid(u64 features) 1415 { 1416 if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) 1417 return false; 1418 1419 /* Now we only support read-only configuration space */ 1420 if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE)) 1421 return false; 1422 1423 return true; 1424 } 1425 1426 static bool vduse_validate_config(struct vduse_dev_config *config) 1427 { 1428 if (!is_mem_zero((const char *)config->reserved, 1429 sizeof(config->reserved))) 1430 return false; 1431 1432 if (config->vq_align > PAGE_SIZE) 1433 return false; 1434 1435 if (config->config_size > PAGE_SIZE) 1436 return false; 1437 1438 if (!device_is_allowed(config->device_id)) 1439 return false; 1440 1441 if (!features_is_valid(config->features)) 1442 return false; 1443 1444 return true; 1445 } 1446 1447 static ssize_t msg_timeout_show(struct device *device, 1448 struct device_attribute *attr, char *buf) 1449 { 1450 struct vduse_dev *dev = dev_get_drvdata(device); 1451 1452 return sysfs_emit(buf, "%u\n", dev->msg_timeout); 1453 } 1454 1455 static ssize_t msg_timeout_store(struct device *device, 1456 struct device_attribute *attr, 1457 const char *buf, size_t count) 1458 { 1459 struct vduse_dev *dev = dev_get_drvdata(device); 1460 int ret; 1461 1462 ret = kstrtouint(buf, 10, &dev->msg_timeout); 1463 if (ret < 0) 1464 return ret; 1465 1466 return count; 1467 } 1468 1469 static DEVICE_ATTR_RW(msg_timeout); 1470 1471 static struct attribute *vduse_dev_attrs[] = { 1472 &dev_attr_msg_timeout.attr, 1473 NULL 1474 }; 1475 1476 ATTRIBUTE_GROUPS(vduse_dev); 1477 1478 static int vduse_create_dev(struct vduse_dev_config *config, 1479 void *config_buf, u64 api_version) 1480 { 1481 int i, ret; 1482 struct vduse_dev *dev; 1483 1484 ret = -EEXIST; 1485 if (vduse_find_dev(config->name)) 1486 goto err; 1487 1488 ret = -ENOMEM; 1489 dev = vduse_dev_create(); 1490 if (!dev) 1491 goto err; 1492 1493 dev->api_version = api_version; 1494 dev->device_features = config->features; 1495 dev->device_id = config->device_id; 1496 dev->vendor_id = config->vendor_id; 1497 dev->name = kstrdup(config->name, GFP_KERNEL); 1498 if (!dev->name) 1499 goto err_str; 1500 1501 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 1502 VDUSE_BOUNCE_SIZE); 1503 if (!dev->domain) 1504 goto err_domain; 1505 1506 dev->config = config_buf; 1507 dev->config_size = config->config_size; 1508 dev->vq_align = config->vq_align; 1509 dev->vq_num = config->vq_num; 1510 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1511 if (!dev->vqs) 1512 goto err_vqs; 1513 1514 for (i = 0; i < dev->vq_num; i++) { 1515 dev->vqs[i].index = i; 1516 INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject); 1517 INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work); 1518 spin_lock_init(&dev->vqs[i].kick_lock); 1519 spin_lock_init(&dev->vqs[i].irq_lock); 1520 } 1521 1522 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); 1523 if (ret < 0) 1524 goto err_idr; 1525 1526 dev->minor = ret; 1527 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; 1528 dev->dev = device_create_with_groups(vduse_class, NULL, 1529 MKDEV(MAJOR(vduse_major), dev->minor), 1530 dev, vduse_dev_groups, "%s", config->name); 1531 if (IS_ERR(dev->dev)) { 1532 ret = PTR_ERR(dev->dev); 1533 goto err_dev; 1534 } 1535 __module_get(THIS_MODULE); 1536 1537 return 0; 1538 err_dev: 1539 idr_remove(&vduse_idr, dev->minor); 1540 err_idr: 1541 kfree(dev->vqs); 1542 err_vqs: 1543 vduse_domain_destroy(dev->domain); 1544 err_domain: 1545 kfree(dev->name); 1546 err_str: 1547 vduse_dev_destroy(dev); 1548 err: 1549 return ret; 1550 } 1551 1552 static long vduse_ioctl(struct file *file, unsigned int cmd, 1553 unsigned long arg) 1554 { 1555 int ret; 1556 void __user *argp = (void __user *)arg; 1557 struct vduse_control *control = file->private_data; 1558 1559 mutex_lock(&vduse_lock); 1560 switch (cmd) { 1561 case VDUSE_GET_API_VERSION: 1562 ret = put_user(control->api_version, (u64 __user *)argp); 1563 break; 1564 case VDUSE_SET_API_VERSION: { 1565 u64 api_version; 1566 1567 ret = -EFAULT; 1568 if (get_user(api_version, (u64 __user *)argp)) 1569 break; 1570 1571 ret = -EINVAL; 1572 if (api_version > VDUSE_API_VERSION) 1573 break; 1574 1575 ret = 0; 1576 control->api_version = api_version; 1577 break; 1578 } 1579 case VDUSE_CREATE_DEV: { 1580 struct vduse_dev_config config; 1581 unsigned long size = offsetof(struct vduse_dev_config, config); 1582 void *buf; 1583 1584 ret = -EFAULT; 1585 if (copy_from_user(&config, argp, size)) 1586 break; 1587 1588 ret = -EINVAL; 1589 if (vduse_validate_config(&config) == false) 1590 break; 1591 1592 buf = vmemdup_user(argp + size, config.config_size); 1593 if (IS_ERR(buf)) { 1594 ret = PTR_ERR(buf); 1595 break; 1596 } 1597 config.name[VDUSE_NAME_MAX - 1] = '\0'; 1598 ret = vduse_create_dev(&config, buf, control->api_version); 1599 if (ret) 1600 kvfree(buf); 1601 break; 1602 } 1603 case VDUSE_DESTROY_DEV: { 1604 char name[VDUSE_NAME_MAX]; 1605 1606 ret = -EFAULT; 1607 if (copy_from_user(name, argp, VDUSE_NAME_MAX)) 1608 break; 1609 1610 name[VDUSE_NAME_MAX - 1] = '\0'; 1611 ret = vduse_destroy_dev(name); 1612 break; 1613 } 1614 default: 1615 ret = -EINVAL; 1616 break; 1617 } 1618 mutex_unlock(&vduse_lock); 1619 1620 return ret; 1621 } 1622 1623 static int vduse_release(struct inode *inode, struct file *file) 1624 { 1625 struct vduse_control *control = file->private_data; 1626 1627 kfree(control); 1628 return 0; 1629 } 1630 1631 static int vduse_open(struct inode *inode, struct file *file) 1632 { 1633 struct vduse_control *control; 1634 1635 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); 1636 if (!control) 1637 return -ENOMEM; 1638 1639 control->api_version = VDUSE_API_VERSION; 1640 file->private_data = control; 1641 1642 return 0; 1643 } 1644 1645 static const struct file_operations vduse_ctrl_fops = { 1646 .owner = THIS_MODULE, 1647 .open = vduse_open, 1648 .release = vduse_release, 1649 .unlocked_ioctl = vduse_ioctl, 1650 .compat_ioctl = compat_ptr_ioctl, 1651 .llseek = noop_llseek, 1652 }; 1653 1654 static char *vduse_devnode(struct device *dev, umode_t *mode) 1655 { 1656 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); 1657 } 1658 1659 struct vduse_mgmt_dev { 1660 struct vdpa_mgmt_dev mgmt_dev; 1661 struct device dev; 1662 }; 1663 1664 static struct vduse_mgmt_dev *vduse_mgmt; 1665 1666 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) 1667 { 1668 struct vduse_vdpa *vdev; 1669 int ret; 1670 1671 if (dev->vdev) 1672 return -EEXIST; 1673 1674 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 1675 &vduse_vdpa_config_ops, 1, 1, name, true); 1676 if (IS_ERR(vdev)) 1677 return PTR_ERR(vdev); 1678 1679 dev->vdev = vdev; 1680 vdev->dev = dev; 1681 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; 1682 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); 1683 if (ret) { 1684 put_device(&vdev->vdpa.dev); 1685 return ret; 1686 } 1687 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); 1688 vdev->vdpa.dma_dev = &vdev->vdpa.dev; 1689 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; 1690 1691 return 0; 1692 } 1693 1694 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 1695 const struct vdpa_dev_set_config *config) 1696 { 1697 struct vduse_dev *dev; 1698 int ret; 1699 1700 mutex_lock(&vduse_lock); 1701 dev = vduse_find_dev(name); 1702 if (!dev || !vduse_dev_is_ready(dev)) { 1703 mutex_unlock(&vduse_lock); 1704 return -EINVAL; 1705 } 1706 ret = vduse_dev_init_vdpa(dev, name); 1707 mutex_unlock(&vduse_lock); 1708 if (ret) 1709 return ret; 1710 1711 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 1712 if (ret) { 1713 put_device(&dev->vdev->vdpa.dev); 1714 return ret; 1715 } 1716 1717 return 0; 1718 } 1719 1720 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) 1721 { 1722 _vdpa_unregister_device(dev); 1723 } 1724 1725 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { 1726 .dev_add = vdpa_dev_add, 1727 .dev_del = vdpa_dev_del, 1728 }; 1729 1730 static struct virtio_device_id id_table[] = { 1731 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 1732 { 0 }, 1733 }; 1734 1735 static void vduse_mgmtdev_release(struct device *dev) 1736 { 1737 struct vduse_mgmt_dev *mgmt_dev; 1738 1739 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); 1740 kfree(mgmt_dev); 1741 } 1742 1743 static int vduse_mgmtdev_init(void) 1744 { 1745 int ret; 1746 1747 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); 1748 if (!vduse_mgmt) 1749 return -ENOMEM; 1750 1751 ret = dev_set_name(&vduse_mgmt->dev, "vduse"); 1752 if (ret) { 1753 kfree(vduse_mgmt); 1754 return ret; 1755 } 1756 1757 vduse_mgmt->dev.release = vduse_mgmtdev_release; 1758 1759 ret = device_register(&vduse_mgmt->dev); 1760 if (ret) 1761 goto dev_reg_err; 1762 1763 vduse_mgmt->mgmt_dev.id_table = id_table; 1764 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; 1765 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; 1766 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); 1767 if (ret) 1768 device_unregister(&vduse_mgmt->dev); 1769 1770 return ret; 1771 1772 dev_reg_err: 1773 put_device(&vduse_mgmt->dev); 1774 return ret; 1775 } 1776 1777 static void vduse_mgmtdev_exit(void) 1778 { 1779 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); 1780 device_unregister(&vduse_mgmt->dev); 1781 } 1782 1783 static int vduse_init(void) 1784 { 1785 int ret; 1786 struct device *dev; 1787 1788 vduse_class = class_create(THIS_MODULE, "vduse"); 1789 if (IS_ERR(vduse_class)) 1790 return PTR_ERR(vduse_class); 1791 1792 vduse_class->devnode = vduse_devnode; 1793 1794 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); 1795 if (ret) 1796 goto err_chardev_region; 1797 1798 /* /dev/vduse/control */ 1799 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); 1800 vduse_ctrl_cdev.owner = THIS_MODULE; 1801 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); 1802 if (ret) 1803 goto err_ctrl_cdev; 1804 1805 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control"); 1806 if (IS_ERR(dev)) { 1807 ret = PTR_ERR(dev); 1808 goto err_device; 1809 } 1810 1811 /* /dev/vduse/$DEVICE */ 1812 cdev_init(&vduse_cdev, &vduse_dev_fops); 1813 vduse_cdev.owner = THIS_MODULE; 1814 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), 1815 VDUSE_DEV_MAX - 1); 1816 if (ret) 1817 goto err_cdev; 1818 1819 vduse_irq_wq = alloc_workqueue("vduse-irq", 1820 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); 1821 if (!vduse_irq_wq) { 1822 ret = -ENOMEM; 1823 goto err_wq; 1824 } 1825 1826 ret = vduse_domain_init(); 1827 if (ret) 1828 goto err_domain; 1829 1830 ret = vduse_mgmtdev_init(); 1831 if (ret) 1832 goto err_mgmtdev; 1833 1834 return 0; 1835 err_mgmtdev: 1836 vduse_domain_exit(); 1837 err_domain: 1838 destroy_workqueue(vduse_irq_wq); 1839 err_wq: 1840 cdev_del(&vduse_cdev); 1841 err_cdev: 1842 device_destroy(vduse_class, vduse_major); 1843 err_device: 1844 cdev_del(&vduse_ctrl_cdev); 1845 err_ctrl_cdev: 1846 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 1847 err_chardev_region: 1848 class_destroy(vduse_class); 1849 return ret; 1850 } 1851 module_init(vduse_init); 1852 1853 static void vduse_exit(void) 1854 { 1855 vduse_mgmtdev_exit(); 1856 vduse_domain_exit(); 1857 destroy_workqueue(vduse_irq_wq); 1858 cdev_del(&vduse_cdev); 1859 device_destroy(vduse_class, vduse_major); 1860 cdev_del(&vduse_ctrl_cdev); 1861 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 1862 class_destroy(vduse_class); 1863 } 1864 module_exit(vduse_exit); 1865 1866 MODULE_LICENSE(DRV_LICENSE); 1867 MODULE_AUTHOR(DRV_AUTHOR); 1868 MODULE_DESCRIPTION(DRV_DESC); 1869