1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDUSE: vDPA Device in Userspace 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include <linux/init.h> 12 #include <linux/module.h> 13 #include <linux/cdev.h> 14 #include <linux/device.h> 15 #include <linux/eventfd.h> 16 #include <linux/slab.h> 17 #include <linux/wait.h> 18 #include <linux/dma-map-ops.h> 19 #include <linux/poll.h> 20 #include <linux/file.h> 21 #include <linux/uio.h> 22 #include <linux/vdpa.h> 23 #include <linux/nospec.h> 24 #include <linux/vmalloc.h> 25 #include <linux/sched/mm.h> 26 #include <uapi/linux/vduse.h> 27 #include <uapi/linux/vdpa.h> 28 #include <uapi/linux/virtio_config.h> 29 #include <uapi/linux/virtio_ids.h> 30 #include <uapi/linux/virtio_blk.h> 31 #include <linux/mod_devicetable.h> 32 33 #include "iova_domain.h" 34 35 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" 36 #define DRV_DESC "vDPA Device in Userspace" 37 #define DRV_LICENSE "GPL v2" 38 39 #define VDUSE_DEV_MAX (1U << MINORBITS) 40 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) 41 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024) 42 #define VDUSE_MSG_DEFAULT_TIMEOUT 30 43 44 struct vduse_virtqueue { 45 u16 index; 46 u16 num_max; 47 u32 num; 48 u64 desc_addr; 49 u64 driver_addr; 50 u64 device_addr; 51 struct vdpa_vq_state state; 52 bool ready; 53 bool kicked; 54 spinlock_t kick_lock; 55 spinlock_t irq_lock; 56 struct eventfd_ctx *kickfd; 57 struct vdpa_callback cb; 58 struct work_struct inject; 59 struct work_struct kick; 60 }; 61 62 struct vduse_dev; 63 64 struct vduse_vdpa { 65 struct vdpa_device vdpa; 66 struct vduse_dev *dev; 67 }; 68 69 struct vduse_umem { 70 unsigned long iova; 71 unsigned long npages; 72 struct page **pages; 73 struct mm_struct *mm; 74 }; 75 76 struct vduse_dev { 77 struct vduse_vdpa *vdev; 78 struct device *dev; 79 struct vduse_virtqueue *vqs; 80 struct vduse_iova_domain *domain; 81 char *name; 82 struct mutex lock; 83 spinlock_t msg_lock; 84 u64 msg_unique; 85 u32 msg_timeout; 86 wait_queue_head_t waitq; 87 struct list_head send_list; 88 struct list_head recv_list; 89 struct vdpa_callback config_cb; 90 struct work_struct inject; 91 spinlock_t irq_lock; 92 struct rw_semaphore rwsem; 93 int minor; 94 bool broken; 95 bool connected; 96 u64 api_version; 97 u64 device_features; 98 u64 driver_features; 99 u32 device_id; 100 u32 vendor_id; 101 u32 generation; 102 u32 config_size; 103 void *config; 104 u8 status; 105 u32 vq_num; 106 u32 vq_align; 107 struct vduse_umem *umem; 108 struct mutex mem_lock; 109 }; 110 111 struct vduse_dev_msg { 112 struct vduse_dev_request req; 113 struct vduse_dev_response resp; 114 struct list_head list; 115 wait_queue_head_t waitq; 116 bool completed; 117 }; 118 119 struct vduse_control { 120 u64 api_version; 121 }; 122 123 static DEFINE_MUTEX(vduse_lock); 124 static DEFINE_IDR(vduse_idr); 125 126 static dev_t vduse_major; 127 static struct class *vduse_class; 128 static struct cdev vduse_ctrl_cdev; 129 static struct cdev vduse_cdev; 130 static struct workqueue_struct *vduse_irq_wq; 131 132 static u32 allowed_device_id[] = { 133 VIRTIO_ID_BLOCK, 134 }; 135 136 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) 137 { 138 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); 139 140 return vdev->dev; 141 } 142 143 static inline struct vduse_dev *dev_to_vduse(struct device *dev) 144 { 145 struct vdpa_device *vdpa = dev_to_vdpa(dev); 146 147 return vdpa_to_vduse(vdpa); 148 } 149 150 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, 151 uint32_t request_id) 152 { 153 struct vduse_dev_msg *msg; 154 155 list_for_each_entry(msg, head, list) { 156 if (msg->req.request_id == request_id) { 157 list_del(&msg->list); 158 return msg; 159 } 160 } 161 162 return NULL; 163 } 164 165 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) 166 { 167 struct vduse_dev_msg *msg = NULL; 168 169 if (!list_empty(head)) { 170 msg = list_first_entry(head, struct vduse_dev_msg, list); 171 list_del(&msg->list); 172 } 173 174 return msg; 175 } 176 177 static void vduse_enqueue_msg(struct list_head *head, 178 struct vduse_dev_msg *msg) 179 { 180 list_add_tail(&msg->list, head); 181 } 182 183 static void vduse_dev_broken(struct vduse_dev *dev) 184 { 185 struct vduse_dev_msg *msg, *tmp; 186 187 if (unlikely(dev->broken)) 188 return; 189 190 list_splice_init(&dev->recv_list, &dev->send_list); 191 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { 192 list_del(&msg->list); 193 msg->completed = 1; 194 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 195 wake_up(&msg->waitq); 196 } 197 dev->broken = true; 198 wake_up(&dev->waitq); 199 } 200 201 static int vduse_dev_msg_sync(struct vduse_dev *dev, 202 struct vduse_dev_msg *msg) 203 { 204 int ret; 205 206 if (unlikely(dev->broken)) 207 return -EIO; 208 209 init_waitqueue_head(&msg->waitq); 210 spin_lock(&dev->msg_lock); 211 if (unlikely(dev->broken)) { 212 spin_unlock(&dev->msg_lock); 213 return -EIO; 214 } 215 msg->req.request_id = dev->msg_unique++; 216 vduse_enqueue_msg(&dev->send_list, msg); 217 wake_up(&dev->waitq); 218 spin_unlock(&dev->msg_lock); 219 if (dev->msg_timeout) 220 ret = wait_event_killable_timeout(msg->waitq, msg->completed, 221 (long)dev->msg_timeout * HZ); 222 else 223 ret = wait_event_killable(msg->waitq, msg->completed); 224 225 spin_lock(&dev->msg_lock); 226 if (!msg->completed) { 227 list_del(&msg->list); 228 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 229 /* Mark the device as malfunction when there is a timeout */ 230 if (!ret) 231 vduse_dev_broken(dev); 232 } 233 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; 234 spin_unlock(&dev->msg_lock); 235 236 return ret; 237 } 238 239 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, 240 struct vduse_virtqueue *vq, 241 struct vdpa_vq_state_packed *packed) 242 { 243 struct vduse_dev_msg msg = { 0 }; 244 int ret; 245 246 msg.req.type = VDUSE_GET_VQ_STATE; 247 msg.req.vq_state.index = vq->index; 248 249 ret = vduse_dev_msg_sync(dev, &msg); 250 if (ret) 251 return ret; 252 253 packed->last_avail_counter = 254 msg.resp.vq_state.packed.last_avail_counter & 0x0001; 255 packed->last_avail_idx = 256 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; 257 packed->last_used_counter = 258 msg.resp.vq_state.packed.last_used_counter & 0x0001; 259 packed->last_used_idx = 260 msg.resp.vq_state.packed.last_used_idx & 0x7FFF; 261 262 return 0; 263 } 264 265 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, 266 struct vduse_virtqueue *vq, 267 struct vdpa_vq_state_split *split) 268 { 269 struct vduse_dev_msg msg = { 0 }; 270 int ret; 271 272 msg.req.type = VDUSE_GET_VQ_STATE; 273 msg.req.vq_state.index = vq->index; 274 275 ret = vduse_dev_msg_sync(dev, &msg); 276 if (ret) 277 return ret; 278 279 split->avail_index = msg.resp.vq_state.split.avail_index; 280 281 return 0; 282 } 283 284 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) 285 { 286 struct vduse_dev_msg msg = { 0 }; 287 288 msg.req.type = VDUSE_SET_STATUS; 289 msg.req.s.status = status; 290 291 return vduse_dev_msg_sync(dev, &msg); 292 } 293 294 static int vduse_dev_update_iotlb(struct vduse_dev *dev, 295 u64 start, u64 last) 296 { 297 struct vduse_dev_msg msg = { 0 }; 298 299 if (last < start) 300 return -EINVAL; 301 302 msg.req.type = VDUSE_UPDATE_IOTLB; 303 msg.req.iova.start = start; 304 msg.req.iova.last = last; 305 306 return vduse_dev_msg_sync(dev, &msg); 307 } 308 309 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) 310 { 311 struct file *file = iocb->ki_filp; 312 struct vduse_dev *dev = file->private_data; 313 struct vduse_dev_msg *msg; 314 int size = sizeof(struct vduse_dev_request); 315 ssize_t ret; 316 317 if (iov_iter_count(to) < size) 318 return -EINVAL; 319 320 spin_lock(&dev->msg_lock); 321 while (1) { 322 msg = vduse_dequeue_msg(&dev->send_list); 323 if (msg) 324 break; 325 326 ret = -EAGAIN; 327 if (file->f_flags & O_NONBLOCK) 328 goto unlock; 329 330 spin_unlock(&dev->msg_lock); 331 ret = wait_event_interruptible_exclusive(dev->waitq, 332 !list_empty(&dev->send_list)); 333 if (ret) 334 return ret; 335 336 spin_lock(&dev->msg_lock); 337 } 338 spin_unlock(&dev->msg_lock); 339 ret = copy_to_iter(&msg->req, size, to); 340 spin_lock(&dev->msg_lock); 341 if (ret != size) { 342 ret = -EFAULT; 343 vduse_enqueue_msg(&dev->send_list, msg); 344 goto unlock; 345 } 346 vduse_enqueue_msg(&dev->recv_list, msg); 347 unlock: 348 spin_unlock(&dev->msg_lock); 349 350 return ret; 351 } 352 353 static bool is_mem_zero(const char *ptr, int size) 354 { 355 int i; 356 357 for (i = 0; i < size; i++) { 358 if (ptr[i]) 359 return false; 360 } 361 return true; 362 } 363 364 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) 365 { 366 struct file *file = iocb->ki_filp; 367 struct vduse_dev *dev = file->private_data; 368 struct vduse_dev_response resp; 369 struct vduse_dev_msg *msg; 370 size_t ret; 371 372 ret = copy_from_iter(&resp, sizeof(resp), from); 373 if (ret != sizeof(resp)) 374 return -EINVAL; 375 376 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) 377 return -EINVAL; 378 379 spin_lock(&dev->msg_lock); 380 msg = vduse_find_msg(&dev->recv_list, resp.request_id); 381 if (!msg) { 382 ret = -ENOENT; 383 goto unlock; 384 } 385 386 memcpy(&msg->resp, &resp, sizeof(resp)); 387 msg->completed = 1; 388 wake_up(&msg->waitq); 389 unlock: 390 spin_unlock(&dev->msg_lock); 391 392 return ret; 393 } 394 395 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) 396 { 397 struct vduse_dev *dev = file->private_data; 398 __poll_t mask = 0; 399 400 poll_wait(file, &dev->waitq, wait); 401 402 spin_lock(&dev->msg_lock); 403 404 if (unlikely(dev->broken)) 405 mask |= EPOLLERR; 406 if (!list_empty(&dev->send_list)) 407 mask |= EPOLLIN | EPOLLRDNORM; 408 if (!list_empty(&dev->recv_list)) 409 mask |= EPOLLOUT | EPOLLWRNORM; 410 411 spin_unlock(&dev->msg_lock); 412 413 return mask; 414 } 415 416 static void vduse_dev_reset(struct vduse_dev *dev) 417 { 418 int i; 419 struct vduse_iova_domain *domain = dev->domain; 420 421 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 422 if (domain->bounce_map) 423 vduse_domain_reset_bounce_map(domain); 424 425 down_write(&dev->rwsem); 426 427 dev->status = 0; 428 dev->driver_features = 0; 429 dev->generation++; 430 spin_lock(&dev->irq_lock); 431 dev->config_cb.callback = NULL; 432 dev->config_cb.private = NULL; 433 spin_unlock(&dev->irq_lock); 434 flush_work(&dev->inject); 435 436 for (i = 0; i < dev->vq_num; i++) { 437 struct vduse_virtqueue *vq = &dev->vqs[i]; 438 439 vq->ready = false; 440 vq->desc_addr = 0; 441 vq->driver_addr = 0; 442 vq->device_addr = 0; 443 vq->num = 0; 444 memset(&vq->state, 0, sizeof(vq->state)); 445 446 spin_lock(&vq->kick_lock); 447 vq->kicked = false; 448 if (vq->kickfd) 449 eventfd_ctx_put(vq->kickfd); 450 vq->kickfd = NULL; 451 spin_unlock(&vq->kick_lock); 452 453 spin_lock(&vq->irq_lock); 454 vq->cb.callback = NULL; 455 vq->cb.private = NULL; 456 spin_unlock(&vq->irq_lock); 457 flush_work(&vq->inject); 458 flush_work(&vq->kick); 459 } 460 461 up_write(&dev->rwsem); 462 } 463 464 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, 465 u64 desc_area, u64 driver_area, 466 u64 device_area) 467 { 468 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 469 struct vduse_virtqueue *vq = &dev->vqs[idx]; 470 471 vq->desc_addr = desc_area; 472 vq->driver_addr = driver_area; 473 vq->device_addr = device_area; 474 475 return 0; 476 } 477 478 static void vduse_vq_kick(struct vduse_virtqueue *vq) 479 { 480 spin_lock(&vq->kick_lock); 481 if (!vq->ready) 482 goto unlock; 483 484 if (vq->kickfd) 485 eventfd_signal(vq->kickfd, 1); 486 else 487 vq->kicked = true; 488 unlock: 489 spin_unlock(&vq->kick_lock); 490 } 491 492 static void vduse_vq_kick_work(struct work_struct *work) 493 { 494 struct vduse_virtqueue *vq = container_of(work, 495 struct vduse_virtqueue, kick); 496 497 vduse_vq_kick(vq); 498 } 499 500 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) 501 { 502 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 503 struct vduse_virtqueue *vq = &dev->vqs[idx]; 504 505 if (!eventfd_signal_allowed()) { 506 schedule_work(&vq->kick); 507 return; 508 } 509 vduse_vq_kick(vq); 510 } 511 512 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, 513 struct vdpa_callback *cb) 514 { 515 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 516 struct vduse_virtqueue *vq = &dev->vqs[idx]; 517 518 spin_lock(&vq->irq_lock); 519 vq->cb.callback = cb->callback; 520 vq->cb.private = cb->private; 521 spin_unlock(&vq->irq_lock); 522 } 523 524 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 525 { 526 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 527 struct vduse_virtqueue *vq = &dev->vqs[idx]; 528 529 vq->num = num; 530 } 531 532 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, 533 u16 idx, bool ready) 534 { 535 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 536 struct vduse_virtqueue *vq = &dev->vqs[idx]; 537 538 vq->ready = ready; 539 } 540 541 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 542 { 543 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 544 struct vduse_virtqueue *vq = &dev->vqs[idx]; 545 546 return vq->ready; 547 } 548 549 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, 550 const struct vdpa_vq_state *state) 551 { 552 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 553 struct vduse_virtqueue *vq = &dev->vqs[idx]; 554 555 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 556 vq->state.packed.last_avail_counter = 557 state->packed.last_avail_counter; 558 vq->state.packed.last_avail_idx = state->packed.last_avail_idx; 559 vq->state.packed.last_used_counter = 560 state->packed.last_used_counter; 561 vq->state.packed.last_used_idx = state->packed.last_used_idx; 562 } else 563 vq->state.split.avail_index = state->split.avail_index; 564 565 return 0; 566 } 567 568 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, 569 struct vdpa_vq_state *state) 570 { 571 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 572 struct vduse_virtqueue *vq = &dev->vqs[idx]; 573 574 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) 575 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); 576 577 return vduse_dev_get_vq_state_split(dev, vq, &state->split); 578 } 579 580 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) 581 { 582 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 583 584 return dev->vq_align; 585 } 586 587 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa) 588 { 589 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 590 591 return dev->device_features; 592 } 593 594 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) 595 { 596 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 597 598 dev->driver_features = features; 599 return 0; 600 } 601 602 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa) 603 { 604 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 605 606 return dev->driver_features; 607 } 608 609 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, 610 struct vdpa_callback *cb) 611 { 612 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 613 614 spin_lock(&dev->irq_lock); 615 dev->config_cb.callback = cb->callback; 616 dev->config_cb.private = cb->private; 617 spin_unlock(&dev->irq_lock); 618 } 619 620 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) 621 { 622 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 623 u16 num_max = 0; 624 int i; 625 626 for (i = 0; i < dev->vq_num; i++) 627 if (num_max < dev->vqs[i].num_max) 628 num_max = dev->vqs[i].num_max; 629 630 return num_max; 631 } 632 633 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) 634 { 635 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 636 637 return dev->device_id; 638 } 639 640 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) 641 { 642 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 643 644 return dev->vendor_id; 645 } 646 647 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) 648 { 649 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 650 651 return dev->status; 652 } 653 654 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 655 { 656 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 657 658 if (vduse_dev_set_status(dev, status)) 659 return; 660 661 dev->status = status; 662 } 663 664 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) 665 { 666 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 667 668 return dev->config_size; 669 } 670 671 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, 672 void *buf, unsigned int len) 673 { 674 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 675 676 /* Initialize the buffer in case of partial copy. */ 677 memset(buf, 0, len); 678 679 if (offset > dev->config_size) 680 return; 681 682 if (len > dev->config_size - offset) 683 len = dev->config_size - offset; 684 685 memcpy(buf, dev->config + offset, len); 686 } 687 688 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, 689 const void *buf, unsigned int len) 690 { 691 /* Now we only support read-only configuration space */ 692 } 693 694 static int vduse_vdpa_reset(struct vdpa_device *vdpa) 695 { 696 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 697 int ret = vduse_dev_set_status(dev, 0); 698 699 vduse_dev_reset(dev); 700 701 return ret; 702 } 703 704 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) 705 { 706 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 707 708 return dev->generation; 709 } 710 711 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 712 unsigned int asid, 713 struct vhost_iotlb *iotlb) 714 { 715 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 716 int ret; 717 718 ret = vduse_domain_set_map(dev->domain, iotlb); 719 if (ret) 720 return ret; 721 722 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); 723 if (ret) { 724 vduse_domain_clear_map(dev->domain, iotlb); 725 return ret; 726 } 727 728 return 0; 729 } 730 731 static void vduse_vdpa_free(struct vdpa_device *vdpa) 732 { 733 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 734 735 dev->vdev = NULL; 736 } 737 738 static const struct vdpa_config_ops vduse_vdpa_config_ops = { 739 .set_vq_address = vduse_vdpa_set_vq_address, 740 .kick_vq = vduse_vdpa_kick_vq, 741 .set_vq_cb = vduse_vdpa_set_vq_cb, 742 .set_vq_num = vduse_vdpa_set_vq_num, 743 .set_vq_ready = vduse_vdpa_set_vq_ready, 744 .get_vq_ready = vduse_vdpa_get_vq_ready, 745 .set_vq_state = vduse_vdpa_set_vq_state, 746 .get_vq_state = vduse_vdpa_get_vq_state, 747 .get_vq_align = vduse_vdpa_get_vq_align, 748 .get_device_features = vduse_vdpa_get_device_features, 749 .set_driver_features = vduse_vdpa_set_driver_features, 750 .get_driver_features = vduse_vdpa_get_driver_features, 751 .set_config_cb = vduse_vdpa_set_config_cb, 752 .get_vq_num_max = vduse_vdpa_get_vq_num_max, 753 .get_device_id = vduse_vdpa_get_device_id, 754 .get_vendor_id = vduse_vdpa_get_vendor_id, 755 .get_status = vduse_vdpa_get_status, 756 .set_status = vduse_vdpa_set_status, 757 .get_config_size = vduse_vdpa_get_config_size, 758 .get_config = vduse_vdpa_get_config, 759 .set_config = vduse_vdpa_set_config, 760 .get_generation = vduse_vdpa_get_generation, 761 .reset = vduse_vdpa_reset, 762 .set_map = vduse_vdpa_set_map, 763 .free = vduse_vdpa_free, 764 }; 765 766 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, 767 unsigned long offset, size_t size, 768 enum dma_data_direction dir, 769 unsigned long attrs) 770 { 771 struct vduse_dev *vdev = dev_to_vduse(dev); 772 struct vduse_iova_domain *domain = vdev->domain; 773 774 return vduse_domain_map_page(domain, page, offset, size, dir, attrs); 775 } 776 777 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, 778 size_t size, enum dma_data_direction dir, 779 unsigned long attrs) 780 { 781 struct vduse_dev *vdev = dev_to_vduse(dev); 782 struct vduse_iova_domain *domain = vdev->domain; 783 784 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 785 } 786 787 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, 788 dma_addr_t *dma_addr, gfp_t flag, 789 unsigned long attrs) 790 { 791 struct vduse_dev *vdev = dev_to_vduse(dev); 792 struct vduse_iova_domain *domain = vdev->domain; 793 unsigned long iova; 794 void *addr; 795 796 *dma_addr = DMA_MAPPING_ERROR; 797 addr = vduse_domain_alloc_coherent(domain, size, 798 (dma_addr_t *)&iova, flag, attrs); 799 if (!addr) 800 return NULL; 801 802 *dma_addr = (dma_addr_t)iova; 803 804 return addr; 805 } 806 807 static void vduse_dev_free_coherent(struct device *dev, size_t size, 808 void *vaddr, dma_addr_t dma_addr, 809 unsigned long attrs) 810 { 811 struct vduse_dev *vdev = dev_to_vduse(dev); 812 struct vduse_iova_domain *domain = vdev->domain; 813 814 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); 815 } 816 817 static size_t vduse_dev_max_mapping_size(struct device *dev) 818 { 819 struct vduse_dev *vdev = dev_to_vduse(dev); 820 struct vduse_iova_domain *domain = vdev->domain; 821 822 return domain->bounce_size; 823 } 824 825 static const struct dma_map_ops vduse_dev_dma_ops = { 826 .map_page = vduse_dev_map_page, 827 .unmap_page = vduse_dev_unmap_page, 828 .alloc = vduse_dev_alloc_coherent, 829 .free = vduse_dev_free_coherent, 830 .max_mapping_size = vduse_dev_max_mapping_size, 831 }; 832 833 static unsigned int perm_to_file_flags(u8 perm) 834 { 835 unsigned int flags = 0; 836 837 switch (perm) { 838 case VDUSE_ACCESS_WO: 839 flags |= O_WRONLY; 840 break; 841 case VDUSE_ACCESS_RO: 842 flags |= O_RDONLY; 843 break; 844 case VDUSE_ACCESS_RW: 845 flags |= O_RDWR; 846 break; 847 default: 848 WARN(1, "invalidate vhost IOTLB permission\n"); 849 break; 850 } 851 852 return flags; 853 } 854 855 static int vduse_kickfd_setup(struct vduse_dev *dev, 856 struct vduse_vq_eventfd *eventfd) 857 { 858 struct eventfd_ctx *ctx = NULL; 859 struct vduse_virtqueue *vq; 860 u32 index; 861 862 if (eventfd->index >= dev->vq_num) 863 return -EINVAL; 864 865 index = array_index_nospec(eventfd->index, dev->vq_num); 866 vq = &dev->vqs[index]; 867 if (eventfd->fd >= 0) { 868 ctx = eventfd_ctx_fdget(eventfd->fd); 869 if (IS_ERR(ctx)) 870 return PTR_ERR(ctx); 871 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) 872 return 0; 873 874 spin_lock(&vq->kick_lock); 875 if (vq->kickfd) 876 eventfd_ctx_put(vq->kickfd); 877 vq->kickfd = ctx; 878 if (vq->ready && vq->kicked && vq->kickfd) { 879 eventfd_signal(vq->kickfd, 1); 880 vq->kicked = false; 881 } 882 spin_unlock(&vq->kick_lock); 883 884 return 0; 885 } 886 887 static bool vduse_dev_is_ready(struct vduse_dev *dev) 888 { 889 int i; 890 891 for (i = 0; i < dev->vq_num; i++) 892 if (!dev->vqs[i].num_max) 893 return false; 894 895 return true; 896 } 897 898 static void vduse_dev_irq_inject(struct work_struct *work) 899 { 900 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); 901 902 spin_lock_irq(&dev->irq_lock); 903 if (dev->config_cb.callback) 904 dev->config_cb.callback(dev->config_cb.private); 905 spin_unlock_irq(&dev->irq_lock); 906 } 907 908 static void vduse_vq_irq_inject(struct work_struct *work) 909 { 910 struct vduse_virtqueue *vq = container_of(work, 911 struct vduse_virtqueue, inject); 912 913 spin_lock_irq(&vq->irq_lock); 914 if (vq->ready && vq->cb.callback) 915 vq->cb.callback(vq->cb.private); 916 spin_unlock_irq(&vq->irq_lock); 917 } 918 919 static int vduse_dev_queue_irq_work(struct vduse_dev *dev, 920 struct work_struct *irq_work) 921 { 922 int ret = -EINVAL; 923 924 down_read(&dev->rwsem); 925 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 926 goto unlock; 927 928 ret = 0; 929 queue_work(vduse_irq_wq, irq_work); 930 unlock: 931 up_read(&dev->rwsem); 932 933 return ret; 934 } 935 936 static int vduse_dev_dereg_umem(struct vduse_dev *dev, 937 u64 iova, u64 size) 938 { 939 int ret; 940 941 mutex_lock(&dev->mem_lock); 942 ret = -ENOENT; 943 if (!dev->umem) 944 goto unlock; 945 946 ret = -EINVAL; 947 if (dev->umem->iova != iova || size != dev->domain->bounce_size) 948 goto unlock; 949 950 vduse_domain_remove_user_bounce_pages(dev->domain); 951 unpin_user_pages_dirty_lock(dev->umem->pages, 952 dev->umem->npages, true); 953 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); 954 mmdrop(dev->umem->mm); 955 vfree(dev->umem->pages); 956 kfree(dev->umem); 957 dev->umem = NULL; 958 ret = 0; 959 unlock: 960 mutex_unlock(&dev->mem_lock); 961 return ret; 962 } 963 964 static int vduse_dev_reg_umem(struct vduse_dev *dev, 965 u64 iova, u64 uaddr, u64 size) 966 { 967 struct page **page_list = NULL; 968 struct vduse_umem *umem = NULL; 969 long pinned = 0; 970 unsigned long npages, lock_limit; 971 int ret; 972 973 if (!dev->domain->bounce_map || 974 size != dev->domain->bounce_size || 975 iova != 0 || uaddr & ~PAGE_MASK) 976 return -EINVAL; 977 978 mutex_lock(&dev->mem_lock); 979 ret = -EEXIST; 980 if (dev->umem) 981 goto unlock; 982 983 ret = -ENOMEM; 984 npages = size >> PAGE_SHIFT; 985 page_list = __vmalloc(array_size(npages, sizeof(struct page *)), 986 GFP_KERNEL_ACCOUNT); 987 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 988 if (!page_list || !umem) 989 goto unlock; 990 991 mmap_read_lock(current->mm); 992 993 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 994 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) 995 goto out; 996 997 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, 998 page_list, NULL); 999 if (pinned != npages) { 1000 ret = pinned < 0 ? pinned : -ENOMEM; 1001 goto out; 1002 } 1003 1004 ret = vduse_domain_add_user_bounce_pages(dev->domain, 1005 page_list, pinned); 1006 if (ret) 1007 goto out; 1008 1009 atomic64_add(npages, ¤t->mm->pinned_vm); 1010 1011 umem->pages = page_list; 1012 umem->npages = pinned; 1013 umem->iova = iova; 1014 umem->mm = current->mm; 1015 mmgrab(current->mm); 1016 1017 dev->umem = umem; 1018 out: 1019 if (ret && pinned > 0) 1020 unpin_user_pages(page_list, pinned); 1021 1022 mmap_read_unlock(current->mm); 1023 unlock: 1024 if (ret) { 1025 vfree(page_list); 1026 kfree(umem); 1027 } 1028 mutex_unlock(&dev->mem_lock); 1029 return ret; 1030 } 1031 1032 static long vduse_dev_ioctl(struct file *file, unsigned int cmd, 1033 unsigned long arg) 1034 { 1035 struct vduse_dev *dev = file->private_data; 1036 void __user *argp = (void __user *)arg; 1037 int ret; 1038 1039 if (unlikely(dev->broken)) 1040 return -EPERM; 1041 1042 switch (cmd) { 1043 case VDUSE_IOTLB_GET_FD: { 1044 struct vduse_iotlb_entry entry; 1045 struct vhost_iotlb_map *map; 1046 struct vdpa_map_file *map_file; 1047 struct vduse_iova_domain *domain = dev->domain; 1048 struct file *f = NULL; 1049 1050 ret = -EFAULT; 1051 if (copy_from_user(&entry, argp, sizeof(entry))) 1052 break; 1053 1054 ret = -EINVAL; 1055 if (entry.start > entry.last) 1056 break; 1057 1058 spin_lock(&domain->iotlb_lock); 1059 map = vhost_iotlb_itree_first(domain->iotlb, 1060 entry.start, entry.last); 1061 if (map) { 1062 map_file = (struct vdpa_map_file *)map->opaque; 1063 f = get_file(map_file->file); 1064 entry.offset = map_file->offset; 1065 entry.start = map->start; 1066 entry.last = map->last; 1067 entry.perm = map->perm; 1068 } 1069 spin_unlock(&domain->iotlb_lock); 1070 ret = -EINVAL; 1071 if (!f) 1072 break; 1073 1074 ret = -EFAULT; 1075 if (copy_to_user(argp, &entry, sizeof(entry))) { 1076 fput(f); 1077 break; 1078 } 1079 ret = receive_fd(f, perm_to_file_flags(entry.perm)); 1080 fput(f); 1081 break; 1082 } 1083 case VDUSE_DEV_GET_FEATURES: 1084 /* 1085 * Just mirror what driver wrote here. 1086 * The driver is expected to check FEATURE_OK later. 1087 */ 1088 ret = put_user(dev->driver_features, (u64 __user *)argp); 1089 break; 1090 case VDUSE_DEV_SET_CONFIG: { 1091 struct vduse_config_data config; 1092 unsigned long size = offsetof(struct vduse_config_data, 1093 buffer); 1094 1095 ret = -EFAULT; 1096 if (copy_from_user(&config, argp, size)) 1097 break; 1098 1099 ret = -EINVAL; 1100 if (config.offset > dev->config_size || 1101 config.length == 0 || 1102 config.length > dev->config_size - config.offset) 1103 break; 1104 1105 ret = -EFAULT; 1106 if (copy_from_user(dev->config + config.offset, argp + size, 1107 config.length)) 1108 break; 1109 1110 ret = 0; 1111 break; 1112 } 1113 case VDUSE_DEV_INJECT_CONFIG_IRQ: 1114 ret = vduse_dev_queue_irq_work(dev, &dev->inject); 1115 break; 1116 case VDUSE_VQ_SETUP: { 1117 struct vduse_vq_config config; 1118 u32 index; 1119 1120 ret = -EFAULT; 1121 if (copy_from_user(&config, argp, sizeof(config))) 1122 break; 1123 1124 ret = -EINVAL; 1125 if (config.index >= dev->vq_num) 1126 break; 1127 1128 if (!is_mem_zero((const char *)config.reserved, 1129 sizeof(config.reserved))) 1130 break; 1131 1132 index = array_index_nospec(config.index, dev->vq_num); 1133 dev->vqs[index].num_max = config.max_size; 1134 ret = 0; 1135 break; 1136 } 1137 case VDUSE_VQ_GET_INFO: { 1138 struct vduse_vq_info vq_info; 1139 struct vduse_virtqueue *vq; 1140 u32 index; 1141 1142 ret = -EFAULT; 1143 if (copy_from_user(&vq_info, argp, sizeof(vq_info))) 1144 break; 1145 1146 ret = -EINVAL; 1147 if (vq_info.index >= dev->vq_num) 1148 break; 1149 1150 index = array_index_nospec(vq_info.index, dev->vq_num); 1151 vq = &dev->vqs[index]; 1152 vq_info.desc_addr = vq->desc_addr; 1153 vq_info.driver_addr = vq->driver_addr; 1154 vq_info.device_addr = vq->device_addr; 1155 vq_info.num = vq->num; 1156 1157 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 1158 vq_info.packed.last_avail_counter = 1159 vq->state.packed.last_avail_counter; 1160 vq_info.packed.last_avail_idx = 1161 vq->state.packed.last_avail_idx; 1162 vq_info.packed.last_used_counter = 1163 vq->state.packed.last_used_counter; 1164 vq_info.packed.last_used_idx = 1165 vq->state.packed.last_used_idx; 1166 } else 1167 vq_info.split.avail_index = 1168 vq->state.split.avail_index; 1169 1170 vq_info.ready = vq->ready; 1171 1172 ret = -EFAULT; 1173 if (copy_to_user(argp, &vq_info, sizeof(vq_info))) 1174 break; 1175 1176 ret = 0; 1177 break; 1178 } 1179 case VDUSE_VQ_SETUP_KICKFD: { 1180 struct vduse_vq_eventfd eventfd; 1181 1182 ret = -EFAULT; 1183 if (copy_from_user(&eventfd, argp, sizeof(eventfd))) 1184 break; 1185 1186 ret = vduse_kickfd_setup(dev, &eventfd); 1187 break; 1188 } 1189 case VDUSE_VQ_INJECT_IRQ: { 1190 u32 index; 1191 1192 ret = -EFAULT; 1193 if (get_user(index, (u32 __user *)argp)) 1194 break; 1195 1196 ret = -EINVAL; 1197 if (index >= dev->vq_num) 1198 break; 1199 1200 index = array_index_nospec(index, dev->vq_num); 1201 ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); 1202 break; 1203 } 1204 case VDUSE_IOTLB_REG_UMEM: { 1205 struct vduse_iova_umem umem; 1206 1207 ret = -EFAULT; 1208 if (copy_from_user(&umem, argp, sizeof(umem))) 1209 break; 1210 1211 ret = -EINVAL; 1212 if (!is_mem_zero((const char *)umem.reserved, 1213 sizeof(umem.reserved))) 1214 break; 1215 1216 ret = vduse_dev_reg_umem(dev, umem.iova, 1217 umem.uaddr, umem.size); 1218 break; 1219 } 1220 case VDUSE_IOTLB_DEREG_UMEM: { 1221 struct vduse_iova_umem umem; 1222 1223 ret = -EFAULT; 1224 if (copy_from_user(&umem, argp, sizeof(umem))) 1225 break; 1226 1227 ret = -EINVAL; 1228 if (!is_mem_zero((const char *)umem.reserved, 1229 sizeof(umem.reserved))) 1230 break; 1231 1232 ret = vduse_dev_dereg_umem(dev, umem.iova, 1233 umem.size); 1234 break; 1235 } 1236 case VDUSE_IOTLB_GET_INFO: { 1237 struct vduse_iova_info info; 1238 struct vhost_iotlb_map *map; 1239 struct vduse_iova_domain *domain = dev->domain; 1240 1241 ret = -EFAULT; 1242 if (copy_from_user(&info, argp, sizeof(info))) 1243 break; 1244 1245 ret = -EINVAL; 1246 if (info.start > info.last) 1247 break; 1248 1249 if (!is_mem_zero((const char *)info.reserved, 1250 sizeof(info.reserved))) 1251 break; 1252 1253 spin_lock(&domain->iotlb_lock); 1254 map = vhost_iotlb_itree_first(domain->iotlb, 1255 info.start, info.last); 1256 if (map) { 1257 info.start = map->start; 1258 info.last = map->last; 1259 info.capability = 0; 1260 if (domain->bounce_map && map->start == 0 && 1261 map->last == domain->bounce_size - 1) 1262 info.capability |= VDUSE_IOVA_CAP_UMEM; 1263 } 1264 spin_unlock(&domain->iotlb_lock); 1265 if (!map) 1266 break; 1267 1268 ret = -EFAULT; 1269 if (copy_to_user(argp, &info, sizeof(info))) 1270 break; 1271 1272 ret = 0; 1273 break; 1274 } 1275 default: 1276 ret = -ENOIOCTLCMD; 1277 break; 1278 } 1279 1280 return ret; 1281 } 1282 1283 static int vduse_dev_release(struct inode *inode, struct file *file) 1284 { 1285 struct vduse_dev *dev = file->private_data; 1286 1287 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); 1288 spin_lock(&dev->msg_lock); 1289 /* Make sure the inflight messages can processed after reconncection */ 1290 list_splice_init(&dev->recv_list, &dev->send_list); 1291 spin_unlock(&dev->msg_lock); 1292 dev->connected = false; 1293 1294 return 0; 1295 } 1296 1297 static struct vduse_dev *vduse_dev_get_from_minor(int minor) 1298 { 1299 struct vduse_dev *dev; 1300 1301 mutex_lock(&vduse_lock); 1302 dev = idr_find(&vduse_idr, minor); 1303 mutex_unlock(&vduse_lock); 1304 1305 return dev; 1306 } 1307 1308 static int vduse_dev_open(struct inode *inode, struct file *file) 1309 { 1310 int ret; 1311 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); 1312 1313 if (!dev) 1314 return -ENODEV; 1315 1316 ret = -EBUSY; 1317 mutex_lock(&dev->lock); 1318 if (dev->connected) 1319 goto unlock; 1320 1321 ret = 0; 1322 dev->connected = true; 1323 file->private_data = dev; 1324 unlock: 1325 mutex_unlock(&dev->lock); 1326 1327 return ret; 1328 } 1329 1330 static const struct file_operations vduse_dev_fops = { 1331 .owner = THIS_MODULE, 1332 .open = vduse_dev_open, 1333 .release = vduse_dev_release, 1334 .read_iter = vduse_dev_read_iter, 1335 .write_iter = vduse_dev_write_iter, 1336 .poll = vduse_dev_poll, 1337 .unlocked_ioctl = vduse_dev_ioctl, 1338 .compat_ioctl = compat_ptr_ioctl, 1339 .llseek = noop_llseek, 1340 }; 1341 1342 static struct vduse_dev *vduse_dev_create(void) 1343 { 1344 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); 1345 1346 if (!dev) 1347 return NULL; 1348 1349 mutex_init(&dev->lock); 1350 mutex_init(&dev->mem_lock); 1351 spin_lock_init(&dev->msg_lock); 1352 INIT_LIST_HEAD(&dev->send_list); 1353 INIT_LIST_HEAD(&dev->recv_list); 1354 spin_lock_init(&dev->irq_lock); 1355 init_rwsem(&dev->rwsem); 1356 1357 INIT_WORK(&dev->inject, vduse_dev_irq_inject); 1358 init_waitqueue_head(&dev->waitq); 1359 1360 return dev; 1361 } 1362 1363 static void vduse_dev_destroy(struct vduse_dev *dev) 1364 { 1365 kfree(dev); 1366 } 1367 1368 static struct vduse_dev *vduse_find_dev(const char *name) 1369 { 1370 struct vduse_dev *dev; 1371 int id; 1372 1373 idr_for_each_entry(&vduse_idr, dev, id) 1374 if (!strcmp(dev->name, name)) 1375 return dev; 1376 1377 return NULL; 1378 } 1379 1380 static int vduse_destroy_dev(char *name) 1381 { 1382 struct vduse_dev *dev = vduse_find_dev(name); 1383 1384 if (!dev) 1385 return -EINVAL; 1386 1387 mutex_lock(&dev->lock); 1388 if (dev->vdev || dev->connected) { 1389 mutex_unlock(&dev->lock); 1390 return -EBUSY; 1391 } 1392 dev->connected = true; 1393 mutex_unlock(&dev->lock); 1394 1395 vduse_dev_reset(dev); 1396 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1397 idr_remove(&vduse_idr, dev->minor); 1398 kvfree(dev->config); 1399 kfree(dev->vqs); 1400 vduse_domain_destroy(dev->domain); 1401 kfree(dev->name); 1402 vduse_dev_destroy(dev); 1403 module_put(THIS_MODULE); 1404 1405 return 0; 1406 } 1407 1408 static bool device_is_allowed(u32 device_id) 1409 { 1410 int i; 1411 1412 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) 1413 if (allowed_device_id[i] == device_id) 1414 return true; 1415 1416 return false; 1417 } 1418 1419 static bool features_is_valid(u64 features) 1420 { 1421 if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) 1422 return false; 1423 1424 /* Now we only support read-only configuration space */ 1425 if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE)) 1426 return false; 1427 1428 return true; 1429 } 1430 1431 static bool vduse_validate_config(struct vduse_dev_config *config) 1432 { 1433 if (!is_mem_zero((const char *)config->reserved, 1434 sizeof(config->reserved))) 1435 return false; 1436 1437 if (config->vq_align > PAGE_SIZE) 1438 return false; 1439 1440 if (config->config_size > PAGE_SIZE) 1441 return false; 1442 1443 if (!device_is_allowed(config->device_id)) 1444 return false; 1445 1446 if (!features_is_valid(config->features)) 1447 return false; 1448 1449 return true; 1450 } 1451 1452 static ssize_t msg_timeout_show(struct device *device, 1453 struct device_attribute *attr, char *buf) 1454 { 1455 struct vduse_dev *dev = dev_get_drvdata(device); 1456 1457 return sysfs_emit(buf, "%u\n", dev->msg_timeout); 1458 } 1459 1460 static ssize_t msg_timeout_store(struct device *device, 1461 struct device_attribute *attr, 1462 const char *buf, size_t count) 1463 { 1464 struct vduse_dev *dev = dev_get_drvdata(device); 1465 int ret; 1466 1467 ret = kstrtouint(buf, 10, &dev->msg_timeout); 1468 if (ret < 0) 1469 return ret; 1470 1471 return count; 1472 } 1473 1474 static DEVICE_ATTR_RW(msg_timeout); 1475 1476 static struct attribute *vduse_dev_attrs[] = { 1477 &dev_attr_msg_timeout.attr, 1478 NULL 1479 }; 1480 1481 ATTRIBUTE_GROUPS(vduse_dev); 1482 1483 static int vduse_create_dev(struct vduse_dev_config *config, 1484 void *config_buf, u64 api_version) 1485 { 1486 int i, ret; 1487 struct vduse_dev *dev; 1488 1489 ret = -EEXIST; 1490 if (vduse_find_dev(config->name)) 1491 goto err; 1492 1493 ret = -ENOMEM; 1494 dev = vduse_dev_create(); 1495 if (!dev) 1496 goto err; 1497 1498 dev->api_version = api_version; 1499 dev->device_features = config->features; 1500 dev->device_id = config->device_id; 1501 dev->vendor_id = config->vendor_id; 1502 dev->name = kstrdup(config->name, GFP_KERNEL); 1503 if (!dev->name) 1504 goto err_str; 1505 1506 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 1507 VDUSE_BOUNCE_SIZE); 1508 if (!dev->domain) 1509 goto err_domain; 1510 1511 dev->config = config_buf; 1512 dev->config_size = config->config_size; 1513 dev->vq_align = config->vq_align; 1514 dev->vq_num = config->vq_num; 1515 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1516 if (!dev->vqs) 1517 goto err_vqs; 1518 1519 for (i = 0; i < dev->vq_num; i++) { 1520 dev->vqs[i].index = i; 1521 INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject); 1522 INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work); 1523 spin_lock_init(&dev->vqs[i].kick_lock); 1524 spin_lock_init(&dev->vqs[i].irq_lock); 1525 } 1526 1527 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); 1528 if (ret < 0) 1529 goto err_idr; 1530 1531 dev->minor = ret; 1532 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; 1533 dev->dev = device_create_with_groups(vduse_class, NULL, 1534 MKDEV(MAJOR(vduse_major), dev->minor), 1535 dev, vduse_dev_groups, "%s", config->name); 1536 if (IS_ERR(dev->dev)) { 1537 ret = PTR_ERR(dev->dev); 1538 goto err_dev; 1539 } 1540 __module_get(THIS_MODULE); 1541 1542 return 0; 1543 err_dev: 1544 idr_remove(&vduse_idr, dev->minor); 1545 err_idr: 1546 kfree(dev->vqs); 1547 err_vqs: 1548 vduse_domain_destroy(dev->domain); 1549 err_domain: 1550 kfree(dev->name); 1551 err_str: 1552 vduse_dev_destroy(dev); 1553 err: 1554 return ret; 1555 } 1556 1557 static long vduse_ioctl(struct file *file, unsigned int cmd, 1558 unsigned long arg) 1559 { 1560 int ret; 1561 void __user *argp = (void __user *)arg; 1562 struct vduse_control *control = file->private_data; 1563 1564 mutex_lock(&vduse_lock); 1565 switch (cmd) { 1566 case VDUSE_GET_API_VERSION: 1567 ret = put_user(control->api_version, (u64 __user *)argp); 1568 break; 1569 case VDUSE_SET_API_VERSION: { 1570 u64 api_version; 1571 1572 ret = -EFAULT; 1573 if (get_user(api_version, (u64 __user *)argp)) 1574 break; 1575 1576 ret = -EINVAL; 1577 if (api_version > VDUSE_API_VERSION) 1578 break; 1579 1580 ret = 0; 1581 control->api_version = api_version; 1582 break; 1583 } 1584 case VDUSE_CREATE_DEV: { 1585 struct vduse_dev_config config; 1586 unsigned long size = offsetof(struct vduse_dev_config, config); 1587 void *buf; 1588 1589 ret = -EFAULT; 1590 if (copy_from_user(&config, argp, size)) 1591 break; 1592 1593 ret = -EINVAL; 1594 if (vduse_validate_config(&config) == false) 1595 break; 1596 1597 buf = vmemdup_user(argp + size, config.config_size); 1598 if (IS_ERR(buf)) { 1599 ret = PTR_ERR(buf); 1600 break; 1601 } 1602 config.name[VDUSE_NAME_MAX - 1] = '\0'; 1603 ret = vduse_create_dev(&config, buf, control->api_version); 1604 if (ret) 1605 kvfree(buf); 1606 break; 1607 } 1608 case VDUSE_DESTROY_DEV: { 1609 char name[VDUSE_NAME_MAX]; 1610 1611 ret = -EFAULT; 1612 if (copy_from_user(name, argp, VDUSE_NAME_MAX)) 1613 break; 1614 1615 name[VDUSE_NAME_MAX - 1] = '\0'; 1616 ret = vduse_destroy_dev(name); 1617 break; 1618 } 1619 default: 1620 ret = -EINVAL; 1621 break; 1622 } 1623 mutex_unlock(&vduse_lock); 1624 1625 return ret; 1626 } 1627 1628 static int vduse_release(struct inode *inode, struct file *file) 1629 { 1630 struct vduse_control *control = file->private_data; 1631 1632 kfree(control); 1633 return 0; 1634 } 1635 1636 static int vduse_open(struct inode *inode, struct file *file) 1637 { 1638 struct vduse_control *control; 1639 1640 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); 1641 if (!control) 1642 return -ENOMEM; 1643 1644 control->api_version = VDUSE_API_VERSION; 1645 file->private_data = control; 1646 1647 return 0; 1648 } 1649 1650 static const struct file_operations vduse_ctrl_fops = { 1651 .owner = THIS_MODULE, 1652 .open = vduse_open, 1653 .release = vduse_release, 1654 .unlocked_ioctl = vduse_ioctl, 1655 .compat_ioctl = compat_ptr_ioctl, 1656 .llseek = noop_llseek, 1657 }; 1658 1659 static char *vduse_devnode(const struct device *dev, umode_t *mode) 1660 { 1661 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); 1662 } 1663 1664 struct vduse_mgmt_dev { 1665 struct vdpa_mgmt_dev mgmt_dev; 1666 struct device dev; 1667 }; 1668 1669 static struct vduse_mgmt_dev *vduse_mgmt; 1670 1671 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) 1672 { 1673 struct vduse_vdpa *vdev; 1674 int ret; 1675 1676 if (dev->vdev) 1677 return -EEXIST; 1678 1679 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 1680 &vduse_vdpa_config_ops, 1, 1, name, true); 1681 if (IS_ERR(vdev)) 1682 return PTR_ERR(vdev); 1683 1684 dev->vdev = vdev; 1685 vdev->dev = dev; 1686 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; 1687 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); 1688 if (ret) { 1689 put_device(&vdev->vdpa.dev); 1690 return ret; 1691 } 1692 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); 1693 vdev->vdpa.dma_dev = &vdev->vdpa.dev; 1694 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; 1695 1696 return 0; 1697 } 1698 1699 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 1700 const struct vdpa_dev_set_config *config) 1701 { 1702 struct vduse_dev *dev; 1703 int ret; 1704 1705 mutex_lock(&vduse_lock); 1706 dev = vduse_find_dev(name); 1707 if (!dev || !vduse_dev_is_ready(dev)) { 1708 mutex_unlock(&vduse_lock); 1709 return -EINVAL; 1710 } 1711 ret = vduse_dev_init_vdpa(dev, name); 1712 mutex_unlock(&vduse_lock); 1713 if (ret) 1714 return ret; 1715 1716 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 1717 if (ret) { 1718 put_device(&dev->vdev->vdpa.dev); 1719 return ret; 1720 } 1721 1722 return 0; 1723 } 1724 1725 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) 1726 { 1727 _vdpa_unregister_device(dev); 1728 } 1729 1730 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { 1731 .dev_add = vdpa_dev_add, 1732 .dev_del = vdpa_dev_del, 1733 }; 1734 1735 static struct virtio_device_id id_table[] = { 1736 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 1737 { 0 }, 1738 }; 1739 1740 static void vduse_mgmtdev_release(struct device *dev) 1741 { 1742 struct vduse_mgmt_dev *mgmt_dev; 1743 1744 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); 1745 kfree(mgmt_dev); 1746 } 1747 1748 static int vduse_mgmtdev_init(void) 1749 { 1750 int ret; 1751 1752 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); 1753 if (!vduse_mgmt) 1754 return -ENOMEM; 1755 1756 ret = dev_set_name(&vduse_mgmt->dev, "vduse"); 1757 if (ret) { 1758 kfree(vduse_mgmt); 1759 return ret; 1760 } 1761 1762 vduse_mgmt->dev.release = vduse_mgmtdev_release; 1763 1764 ret = device_register(&vduse_mgmt->dev); 1765 if (ret) 1766 goto dev_reg_err; 1767 1768 vduse_mgmt->mgmt_dev.id_table = id_table; 1769 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; 1770 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; 1771 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); 1772 if (ret) 1773 device_unregister(&vduse_mgmt->dev); 1774 1775 return ret; 1776 1777 dev_reg_err: 1778 put_device(&vduse_mgmt->dev); 1779 return ret; 1780 } 1781 1782 static void vduse_mgmtdev_exit(void) 1783 { 1784 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); 1785 device_unregister(&vduse_mgmt->dev); 1786 } 1787 1788 static int vduse_init(void) 1789 { 1790 int ret; 1791 struct device *dev; 1792 1793 vduse_class = class_create(THIS_MODULE, "vduse"); 1794 if (IS_ERR(vduse_class)) 1795 return PTR_ERR(vduse_class); 1796 1797 vduse_class->devnode = vduse_devnode; 1798 1799 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); 1800 if (ret) 1801 goto err_chardev_region; 1802 1803 /* /dev/vduse/control */ 1804 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); 1805 vduse_ctrl_cdev.owner = THIS_MODULE; 1806 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); 1807 if (ret) 1808 goto err_ctrl_cdev; 1809 1810 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control"); 1811 if (IS_ERR(dev)) { 1812 ret = PTR_ERR(dev); 1813 goto err_device; 1814 } 1815 1816 /* /dev/vduse/$DEVICE */ 1817 cdev_init(&vduse_cdev, &vduse_dev_fops); 1818 vduse_cdev.owner = THIS_MODULE; 1819 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), 1820 VDUSE_DEV_MAX - 1); 1821 if (ret) 1822 goto err_cdev; 1823 1824 vduse_irq_wq = alloc_workqueue("vduse-irq", 1825 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); 1826 if (!vduse_irq_wq) { 1827 ret = -ENOMEM; 1828 goto err_wq; 1829 } 1830 1831 ret = vduse_domain_init(); 1832 if (ret) 1833 goto err_domain; 1834 1835 ret = vduse_mgmtdev_init(); 1836 if (ret) 1837 goto err_mgmtdev; 1838 1839 return 0; 1840 err_mgmtdev: 1841 vduse_domain_exit(); 1842 err_domain: 1843 destroy_workqueue(vduse_irq_wq); 1844 err_wq: 1845 cdev_del(&vduse_cdev); 1846 err_cdev: 1847 device_destroy(vduse_class, vduse_major); 1848 err_device: 1849 cdev_del(&vduse_ctrl_cdev); 1850 err_ctrl_cdev: 1851 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 1852 err_chardev_region: 1853 class_destroy(vduse_class); 1854 return ret; 1855 } 1856 module_init(vduse_init); 1857 1858 static void vduse_exit(void) 1859 { 1860 vduse_mgmtdev_exit(); 1861 vduse_domain_exit(); 1862 destroy_workqueue(vduse_irq_wq); 1863 cdev_del(&vduse_cdev); 1864 device_destroy(vduse_class, vduse_major); 1865 cdev_del(&vduse_ctrl_cdev); 1866 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 1867 class_destroy(vduse_class); 1868 } 1869 module_exit(vduse_exit); 1870 1871 MODULE_LICENSE(DRV_LICENSE); 1872 MODULE_AUTHOR(DRV_AUTHOR); 1873 MODULE_DESCRIPTION(DRV_DESC); 1874