1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDUSE: vDPA Device in Userspace 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include <linux/init.h> 12 #include <linux/module.h> 13 #include <linux/cdev.h> 14 #include <linux/device.h> 15 #include <linux/eventfd.h> 16 #include <linux/slab.h> 17 #include <linux/wait.h> 18 #include <linux/dma-map-ops.h> 19 #include <linux/poll.h> 20 #include <linux/file.h> 21 #include <linux/uio.h> 22 #include <linux/vdpa.h> 23 #include <linux/nospec.h> 24 #include <uapi/linux/vduse.h> 25 #include <uapi/linux/vdpa.h> 26 #include <uapi/linux/virtio_config.h> 27 #include <uapi/linux/virtio_ids.h> 28 #include <uapi/linux/virtio_blk.h> 29 #include <linux/mod_devicetable.h> 30 31 #include "iova_domain.h" 32 33 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" 34 #define DRV_DESC "vDPA Device in Userspace" 35 #define DRV_LICENSE "GPL v2" 36 37 #define VDUSE_DEV_MAX (1U << MINORBITS) 38 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) 39 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024) 40 #define VDUSE_MSG_DEFAULT_TIMEOUT 30 41 42 struct vduse_virtqueue { 43 u16 index; 44 u16 num_max; 45 u32 num; 46 u64 desc_addr; 47 u64 driver_addr; 48 u64 device_addr; 49 struct vdpa_vq_state state; 50 bool ready; 51 bool kicked; 52 spinlock_t kick_lock; 53 spinlock_t irq_lock; 54 struct eventfd_ctx *kickfd; 55 struct vdpa_callback cb; 56 struct work_struct inject; 57 struct work_struct kick; 58 }; 59 60 struct vduse_dev; 61 62 struct vduse_vdpa { 63 struct vdpa_device vdpa; 64 struct vduse_dev *dev; 65 }; 66 67 struct vduse_dev { 68 struct vduse_vdpa *vdev; 69 struct device *dev; 70 struct vduse_virtqueue *vqs; 71 struct vduse_iova_domain *domain; 72 char *name; 73 struct mutex lock; 74 spinlock_t msg_lock; 75 u64 msg_unique; 76 u32 msg_timeout; 77 wait_queue_head_t waitq; 78 struct list_head send_list; 79 struct list_head recv_list; 80 struct vdpa_callback config_cb; 81 struct work_struct inject; 82 spinlock_t irq_lock; 83 int minor; 84 bool broken; 85 bool connected; 86 u64 api_version; 87 u64 device_features; 88 u64 driver_features; 89 u32 device_id; 90 u32 vendor_id; 91 u32 generation; 92 u32 config_size; 93 void *config; 94 u8 status; 95 u32 vq_num; 96 u32 vq_align; 97 }; 98 99 struct vduse_dev_msg { 100 struct vduse_dev_request req; 101 struct vduse_dev_response resp; 102 struct list_head list; 103 wait_queue_head_t waitq; 104 bool completed; 105 }; 106 107 struct vduse_control { 108 u64 api_version; 109 }; 110 111 static DEFINE_MUTEX(vduse_lock); 112 static DEFINE_IDR(vduse_idr); 113 114 static dev_t vduse_major; 115 static struct class *vduse_class; 116 static struct cdev vduse_ctrl_cdev; 117 static struct cdev vduse_cdev; 118 static struct workqueue_struct *vduse_irq_wq; 119 120 static u32 allowed_device_id[] = { 121 VIRTIO_ID_BLOCK, 122 }; 123 124 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) 125 { 126 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); 127 128 return vdev->dev; 129 } 130 131 static inline struct vduse_dev *dev_to_vduse(struct device *dev) 132 { 133 struct vdpa_device *vdpa = dev_to_vdpa(dev); 134 135 return vdpa_to_vduse(vdpa); 136 } 137 138 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, 139 uint32_t request_id) 140 { 141 struct vduse_dev_msg *msg; 142 143 list_for_each_entry(msg, head, list) { 144 if (msg->req.request_id == request_id) { 145 list_del(&msg->list); 146 return msg; 147 } 148 } 149 150 return NULL; 151 } 152 153 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) 154 { 155 struct vduse_dev_msg *msg = NULL; 156 157 if (!list_empty(head)) { 158 msg = list_first_entry(head, struct vduse_dev_msg, list); 159 list_del(&msg->list); 160 } 161 162 return msg; 163 } 164 165 static void vduse_enqueue_msg(struct list_head *head, 166 struct vduse_dev_msg *msg) 167 { 168 list_add_tail(&msg->list, head); 169 } 170 171 static void vduse_dev_broken(struct vduse_dev *dev) 172 { 173 struct vduse_dev_msg *msg, *tmp; 174 175 if (unlikely(dev->broken)) 176 return; 177 178 list_splice_init(&dev->recv_list, &dev->send_list); 179 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { 180 list_del(&msg->list); 181 msg->completed = 1; 182 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 183 wake_up(&msg->waitq); 184 } 185 dev->broken = true; 186 wake_up(&dev->waitq); 187 } 188 189 static int vduse_dev_msg_sync(struct vduse_dev *dev, 190 struct vduse_dev_msg *msg) 191 { 192 int ret; 193 194 if (unlikely(dev->broken)) 195 return -EIO; 196 197 init_waitqueue_head(&msg->waitq); 198 spin_lock(&dev->msg_lock); 199 if (unlikely(dev->broken)) { 200 spin_unlock(&dev->msg_lock); 201 return -EIO; 202 } 203 msg->req.request_id = dev->msg_unique++; 204 vduse_enqueue_msg(&dev->send_list, msg); 205 wake_up(&dev->waitq); 206 spin_unlock(&dev->msg_lock); 207 if (dev->msg_timeout) 208 ret = wait_event_killable_timeout(msg->waitq, msg->completed, 209 (long)dev->msg_timeout * HZ); 210 else 211 ret = wait_event_killable(msg->waitq, msg->completed); 212 213 spin_lock(&dev->msg_lock); 214 if (!msg->completed) { 215 list_del(&msg->list); 216 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 217 /* Mark the device as malfunction when there is a timeout */ 218 if (!ret) 219 vduse_dev_broken(dev); 220 } 221 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; 222 spin_unlock(&dev->msg_lock); 223 224 return ret; 225 } 226 227 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, 228 struct vduse_virtqueue *vq, 229 struct vdpa_vq_state_packed *packed) 230 { 231 struct vduse_dev_msg msg = { 0 }; 232 int ret; 233 234 msg.req.type = VDUSE_GET_VQ_STATE; 235 msg.req.vq_state.index = vq->index; 236 237 ret = vduse_dev_msg_sync(dev, &msg); 238 if (ret) 239 return ret; 240 241 packed->last_avail_counter = 242 msg.resp.vq_state.packed.last_avail_counter & 0x0001; 243 packed->last_avail_idx = 244 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; 245 packed->last_used_counter = 246 msg.resp.vq_state.packed.last_used_counter & 0x0001; 247 packed->last_used_idx = 248 msg.resp.vq_state.packed.last_used_idx & 0x7FFF; 249 250 return 0; 251 } 252 253 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, 254 struct vduse_virtqueue *vq, 255 struct vdpa_vq_state_split *split) 256 { 257 struct vduse_dev_msg msg = { 0 }; 258 int ret; 259 260 msg.req.type = VDUSE_GET_VQ_STATE; 261 msg.req.vq_state.index = vq->index; 262 263 ret = vduse_dev_msg_sync(dev, &msg); 264 if (ret) 265 return ret; 266 267 split->avail_index = msg.resp.vq_state.split.avail_index; 268 269 return 0; 270 } 271 272 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) 273 { 274 struct vduse_dev_msg msg = { 0 }; 275 276 msg.req.type = VDUSE_SET_STATUS; 277 msg.req.s.status = status; 278 279 return vduse_dev_msg_sync(dev, &msg); 280 } 281 282 static int vduse_dev_update_iotlb(struct vduse_dev *dev, 283 u64 start, u64 last) 284 { 285 struct vduse_dev_msg msg = { 0 }; 286 287 if (last < start) 288 return -EINVAL; 289 290 msg.req.type = VDUSE_UPDATE_IOTLB; 291 msg.req.iova.start = start; 292 msg.req.iova.last = last; 293 294 return vduse_dev_msg_sync(dev, &msg); 295 } 296 297 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) 298 { 299 struct file *file = iocb->ki_filp; 300 struct vduse_dev *dev = file->private_data; 301 struct vduse_dev_msg *msg; 302 int size = sizeof(struct vduse_dev_request); 303 ssize_t ret; 304 305 if (iov_iter_count(to) < size) 306 return -EINVAL; 307 308 spin_lock(&dev->msg_lock); 309 while (1) { 310 msg = vduse_dequeue_msg(&dev->send_list); 311 if (msg) 312 break; 313 314 ret = -EAGAIN; 315 if (file->f_flags & O_NONBLOCK) 316 goto unlock; 317 318 spin_unlock(&dev->msg_lock); 319 ret = wait_event_interruptible_exclusive(dev->waitq, 320 !list_empty(&dev->send_list)); 321 if (ret) 322 return ret; 323 324 spin_lock(&dev->msg_lock); 325 } 326 spin_unlock(&dev->msg_lock); 327 ret = copy_to_iter(&msg->req, size, to); 328 spin_lock(&dev->msg_lock); 329 if (ret != size) { 330 ret = -EFAULT; 331 vduse_enqueue_msg(&dev->send_list, msg); 332 goto unlock; 333 } 334 vduse_enqueue_msg(&dev->recv_list, msg); 335 unlock: 336 spin_unlock(&dev->msg_lock); 337 338 return ret; 339 } 340 341 static bool is_mem_zero(const char *ptr, int size) 342 { 343 int i; 344 345 for (i = 0; i < size; i++) { 346 if (ptr[i]) 347 return false; 348 } 349 return true; 350 } 351 352 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) 353 { 354 struct file *file = iocb->ki_filp; 355 struct vduse_dev *dev = file->private_data; 356 struct vduse_dev_response resp; 357 struct vduse_dev_msg *msg; 358 size_t ret; 359 360 ret = copy_from_iter(&resp, sizeof(resp), from); 361 if (ret != sizeof(resp)) 362 return -EINVAL; 363 364 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) 365 return -EINVAL; 366 367 spin_lock(&dev->msg_lock); 368 msg = vduse_find_msg(&dev->recv_list, resp.request_id); 369 if (!msg) { 370 ret = -ENOENT; 371 goto unlock; 372 } 373 374 memcpy(&msg->resp, &resp, sizeof(resp)); 375 msg->completed = 1; 376 wake_up(&msg->waitq); 377 unlock: 378 spin_unlock(&dev->msg_lock); 379 380 return ret; 381 } 382 383 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) 384 { 385 struct vduse_dev *dev = file->private_data; 386 __poll_t mask = 0; 387 388 poll_wait(file, &dev->waitq, wait); 389 390 spin_lock(&dev->msg_lock); 391 392 if (unlikely(dev->broken)) 393 mask |= EPOLLERR; 394 if (!list_empty(&dev->send_list)) 395 mask |= EPOLLIN | EPOLLRDNORM; 396 if (!list_empty(&dev->recv_list)) 397 mask |= EPOLLOUT | EPOLLWRNORM; 398 399 spin_unlock(&dev->msg_lock); 400 401 return mask; 402 } 403 404 static void vduse_dev_reset(struct vduse_dev *dev) 405 { 406 int i; 407 struct vduse_iova_domain *domain = dev->domain; 408 409 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 410 if (domain->bounce_map) 411 vduse_domain_reset_bounce_map(domain); 412 413 dev->status = 0; 414 dev->driver_features = 0; 415 dev->generation++; 416 spin_lock(&dev->irq_lock); 417 dev->config_cb.callback = NULL; 418 dev->config_cb.private = NULL; 419 spin_unlock(&dev->irq_lock); 420 flush_work(&dev->inject); 421 422 for (i = 0; i < dev->vq_num; i++) { 423 struct vduse_virtqueue *vq = &dev->vqs[i]; 424 425 vq->ready = false; 426 vq->desc_addr = 0; 427 vq->driver_addr = 0; 428 vq->device_addr = 0; 429 vq->num = 0; 430 memset(&vq->state, 0, sizeof(vq->state)); 431 432 spin_lock(&vq->kick_lock); 433 vq->kicked = false; 434 if (vq->kickfd) 435 eventfd_ctx_put(vq->kickfd); 436 vq->kickfd = NULL; 437 spin_unlock(&vq->kick_lock); 438 439 spin_lock(&vq->irq_lock); 440 vq->cb.callback = NULL; 441 vq->cb.private = NULL; 442 spin_unlock(&vq->irq_lock); 443 flush_work(&vq->inject); 444 flush_work(&vq->kick); 445 } 446 } 447 448 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, 449 u64 desc_area, u64 driver_area, 450 u64 device_area) 451 { 452 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 453 struct vduse_virtqueue *vq = &dev->vqs[idx]; 454 455 vq->desc_addr = desc_area; 456 vq->driver_addr = driver_area; 457 vq->device_addr = device_area; 458 459 return 0; 460 } 461 462 static void vduse_vq_kick(struct vduse_virtqueue *vq) 463 { 464 spin_lock(&vq->kick_lock); 465 if (!vq->ready) 466 goto unlock; 467 468 if (vq->kickfd) 469 eventfd_signal(vq->kickfd, 1); 470 else 471 vq->kicked = true; 472 unlock: 473 spin_unlock(&vq->kick_lock); 474 } 475 476 static void vduse_vq_kick_work(struct work_struct *work) 477 { 478 struct vduse_virtqueue *vq = container_of(work, 479 struct vduse_virtqueue, kick); 480 481 vduse_vq_kick(vq); 482 } 483 484 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) 485 { 486 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 487 struct vduse_virtqueue *vq = &dev->vqs[idx]; 488 489 if (!eventfd_signal_allowed()) { 490 schedule_work(&vq->kick); 491 return; 492 } 493 vduse_vq_kick(vq); 494 } 495 496 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, 497 struct vdpa_callback *cb) 498 { 499 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 500 struct vduse_virtqueue *vq = &dev->vqs[idx]; 501 502 spin_lock(&vq->irq_lock); 503 vq->cb.callback = cb->callback; 504 vq->cb.private = cb->private; 505 spin_unlock(&vq->irq_lock); 506 } 507 508 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 509 { 510 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 511 struct vduse_virtqueue *vq = &dev->vqs[idx]; 512 513 vq->num = num; 514 } 515 516 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, 517 u16 idx, bool ready) 518 { 519 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 520 struct vduse_virtqueue *vq = &dev->vqs[idx]; 521 522 vq->ready = ready; 523 } 524 525 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 526 { 527 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 528 struct vduse_virtqueue *vq = &dev->vqs[idx]; 529 530 return vq->ready; 531 } 532 533 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, 534 const struct vdpa_vq_state *state) 535 { 536 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 537 struct vduse_virtqueue *vq = &dev->vqs[idx]; 538 539 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 540 vq->state.packed.last_avail_counter = 541 state->packed.last_avail_counter; 542 vq->state.packed.last_avail_idx = state->packed.last_avail_idx; 543 vq->state.packed.last_used_counter = 544 state->packed.last_used_counter; 545 vq->state.packed.last_used_idx = state->packed.last_used_idx; 546 } else 547 vq->state.split.avail_index = state->split.avail_index; 548 549 return 0; 550 } 551 552 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, 553 struct vdpa_vq_state *state) 554 { 555 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 556 struct vduse_virtqueue *vq = &dev->vqs[idx]; 557 558 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) 559 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); 560 561 return vduse_dev_get_vq_state_split(dev, vq, &state->split); 562 } 563 564 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) 565 { 566 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 567 568 return dev->vq_align; 569 } 570 571 static u64 vduse_vdpa_get_features(struct vdpa_device *vdpa) 572 { 573 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 574 575 return dev->device_features; 576 } 577 578 static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features) 579 { 580 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 581 582 dev->driver_features = features; 583 return 0; 584 } 585 586 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, 587 struct vdpa_callback *cb) 588 { 589 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 590 591 spin_lock(&dev->irq_lock); 592 dev->config_cb.callback = cb->callback; 593 dev->config_cb.private = cb->private; 594 spin_unlock(&dev->irq_lock); 595 } 596 597 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) 598 { 599 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 600 u16 num_max = 0; 601 int i; 602 603 for (i = 0; i < dev->vq_num; i++) 604 if (num_max < dev->vqs[i].num_max) 605 num_max = dev->vqs[i].num_max; 606 607 return num_max; 608 } 609 610 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) 611 { 612 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 613 614 return dev->device_id; 615 } 616 617 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) 618 { 619 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 620 621 return dev->vendor_id; 622 } 623 624 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) 625 { 626 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 627 628 return dev->status; 629 } 630 631 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 632 { 633 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 634 635 if (vduse_dev_set_status(dev, status)) 636 return; 637 638 dev->status = status; 639 } 640 641 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) 642 { 643 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 644 645 return dev->config_size; 646 } 647 648 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, 649 void *buf, unsigned int len) 650 { 651 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 652 653 if (len > dev->config_size - offset) 654 return; 655 656 memcpy(buf, dev->config + offset, len); 657 } 658 659 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, 660 const void *buf, unsigned int len) 661 { 662 /* Now we only support read-only configuration space */ 663 } 664 665 static int vduse_vdpa_reset(struct vdpa_device *vdpa) 666 { 667 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 668 int ret = vduse_dev_set_status(dev, 0); 669 670 vduse_dev_reset(dev); 671 672 return ret; 673 } 674 675 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) 676 { 677 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 678 679 return dev->generation; 680 } 681 682 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 683 struct vhost_iotlb *iotlb) 684 { 685 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 686 int ret; 687 688 ret = vduse_domain_set_map(dev->domain, iotlb); 689 if (ret) 690 return ret; 691 692 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); 693 if (ret) { 694 vduse_domain_clear_map(dev->domain, iotlb); 695 return ret; 696 } 697 698 return 0; 699 } 700 701 static void vduse_vdpa_free(struct vdpa_device *vdpa) 702 { 703 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 704 705 dev->vdev = NULL; 706 } 707 708 static const struct vdpa_config_ops vduse_vdpa_config_ops = { 709 .set_vq_address = vduse_vdpa_set_vq_address, 710 .kick_vq = vduse_vdpa_kick_vq, 711 .set_vq_cb = vduse_vdpa_set_vq_cb, 712 .set_vq_num = vduse_vdpa_set_vq_num, 713 .set_vq_ready = vduse_vdpa_set_vq_ready, 714 .get_vq_ready = vduse_vdpa_get_vq_ready, 715 .set_vq_state = vduse_vdpa_set_vq_state, 716 .get_vq_state = vduse_vdpa_get_vq_state, 717 .get_vq_align = vduse_vdpa_get_vq_align, 718 .get_features = vduse_vdpa_get_features, 719 .set_features = vduse_vdpa_set_features, 720 .set_config_cb = vduse_vdpa_set_config_cb, 721 .get_vq_num_max = vduse_vdpa_get_vq_num_max, 722 .get_device_id = vduse_vdpa_get_device_id, 723 .get_vendor_id = vduse_vdpa_get_vendor_id, 724 .get_status = vduse_vdpa_get_status, 725 .set_status = vduse_vdpa_set_status, 726 .get_config_size = vduse_vdpa_get_config_size, 727 .get_config = vduse_vdpa_get_config, 728 .set_config = vduse_vdpa_set_config, 729 .get_generation = vduse_vdpa_get_generation, 730 .reset = vduse_vdpa_reset, 731 .set_map = vduse_vdpa_set_map, 732 .free = vduse_vdpa_free, 733 }; 734 735 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, 736 unsigned long offset, size_t size, 737 enum dma_data_direction dir, 738 unsigned long attrs) 739 { 740 struct vduse_dev *vdev = dev_to_vduse(dev); 741 struct vduse_iova_domain *domain = vdev->domain; 742 743 return vduse_domain_map_page(domain, page, offset, size, dir, attrs); 744 } 745 746 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, 747 size_t size, enum dma_data_direction dir, 748 unsigned long attrs) 749 { 750 struct vduse_dev *vdev = dev_to_vduse(dev); 751 struct vduse_iova_domain *domain = vdev->domain; 752 753 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 754 } 755 756 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, 757 dma_addr_t *dma_addr, gfp_t flag, 758 unsigned long attrs) 759 { 760 struct vduse_dev *vdev = dev_to_vduse(dev); 761 struct vduse_iova_domain *domain = vdev->domain; 762 unsigned long iova; 763 void *addr; 764 765 *dma_addr = DMA_MAPPING_ERROR; 766 addr = vduse_domain_alloc_coherent(domain, size, 767 (dma_addr_t *)&iova, flag, attrs); 768 if (!addr) 769 return NULL; 770 771 *dma_addr = (dma_addr_t)iova; 772 773 return addr; 774 } 775 776 static void vduse_dev_free_coherent(struct device *dev, size_t size, 777 void *vaddr, dma_addr_t dma_addr, 778 unsigned long attrs) 779 { 780 struct vduse_dev *vdev = dev_to_vduse(dev); 781 struct vduse_iova_domain *domain = vdev->domain; 782 783 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); 784 } 785 786 static size_t vduse_dev_max_mapping_size(struct device *dev) 787 { 788 struct vduse_dev *vdev = dev_to_vduse(dev); 789 struct vduse_iova_domain *domain = vdev->domain; 790 791 return domain->bounce_size; 792 } 793 794 static const struct dma_map_ops vduse_dev_dma_ops = { 795 .map_page = vduse_dev_map_page, 796 .unmap_page = vduse_dev_unmap_page, 797 .alloc = vduse_dev_alloc_coherent, 798 .free = vduse_dev_free_coherent, 799 .max_mapping_size = vduse_dev_max_mapping_size, 800 }; 801 802 static unsigned int perm_to_file_flags(u8 perm) 803 { 804 unsigned int flags = 0; 805 806 switch (perm) { 807 case VDUSE_ACCESS_WO: 808 flags |= O_WRONLY; 809 break; 810 case VDUSE_ACCESS_RO: 811 flags |= O_RDONLY; 812 break; 813 case VDUSE_ACCESS_RW: 814 flags |= O_RDWR; 815 break; 816 default: 817 WARN(1, "invalidate vhost IOTLB permission\n"); 818 break; 819 } 820 821 return flags; 822 } 823 824 static int vduse_kickfd_setup(struct vduse_dev *dev, 825 struct vduse_vq_eventfd *eventfd) 826 { 827 struct eventfd_ctx *ctx = NULL; 828 struct vduse_virtqueue *vq; 829 u32 index; 830 831 if (eventfd->index >= dev->vq_num) 832 return -EINVAL; 833 834 index = array_index_nospec(eventfd->index, dev->vq_num); 835 vq = &dev->vqs[index]; 836 if (eventfd->fd >= 0) { 837 ctx = eventfd_ctx_fdget(eventfd->fd); 838 if (IS_ERR(ctx)) 839 return PTR_ERR(ctx); 840 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) 841 return 0; 842 843 spin_lock(&vq->kick_lock); 844 if (vq->kickfd) 845 eventfd_ctx_put(vq->kickfd); 846 vq->kickfd = ctx; 847 if (vq->ready && vq->kicked && vq->kickfd) { 848 eventfd_signal(vq->kickfd, 1); 849 vq->kicked = false; 850 } 851 spin_unlock(&vq->kick_lock); 852 853 return 0; 854 } 855 856 static bool vduse_dev_is_ready(struct vduse_dev *dev) 857 { 858 int i; 859 860 for (i = 0; i < dev->vq_num; i++) 861 if (!dev->vqs[i].num_max) 862 return false; 863 864 return true; 865 } 866 867 static void vduse_dev_irq_inject(struct work_struct *work) 868 { 869 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); 870 871 spin_lock_irq(&dev->irq_lock); 872 if (dev->config_cb.callback) 873 dev->config_cb.callback(dev->config_cb.private); 874 spin_unlock_irq(&dev->irq_lock); 875 } 876 877 static void vduse_vq_irq_inject(struct work_struct *work) 878 { 879 struct vduse_virtqueue *vq = container_of(work, 880 struct vduse_virtqueue, inject); 881 882 spin_lock_irq(&vq->irq_lock); 883 if (vq->ready && vq->cb.callback) 884 vq->cb.callback(vq->cb.private); 885 spin_unlock_irq(&vq->irq_lock); 886 } 887 888 static long vduse_dev_ioctl(struct file *file, unsigned int cmd, 889 unsigned long arg) 890 { 891 struct vduse_dev *dev = file->private_data; 892 void __user *argp = (void __user *)arg; 893 int ret; 894 895 if (unlikely(dev->broken)) 896 return -EPERM; 897 898 switch (cmd) { 899 case VDUSE_IOTLB_GET_FD: { 900 struct vduse_iotlb_entry entry; 901 struct vhost_iotlb_map *map; 902 struct vdpa_map_file *map_file; 903 struct vduse_iova_domain *domain = dev->domain; 904 struct file *f = NULL; 905 906 ret = -EFAULT; 907 if (copy_from_user(&entry, argp, sizeof(entry))) 908 break; 909 910 ret = -EINVAL; 911 if (entry.start > entry.last) 912 break; 913 914 spin_lock(&domain->iotlb_lock); 915 map = vhost_iotlb_itree_first(domain->iotlb, 916 entry.start, entry.last); 917 if (map) { 918 map_file = (struct vdpa_map_file *)map->opaque; 919 f = get_file(map_file->file); 920 entry.offset = map_file->offset; 921 entry.start = map->start; 922 entry.last = map->last; 923 entry.perm = map->perm; 924 } 925 spin_unlock(&domain->iotlb_lock); 926 ret = -EINVAL; 927 if (!f) 928 break; 929 930 ret = -EFAULT; 931 if (copy_to_user(argp, &entry, sizeof(entry))) { 932 fput(f); 933 break; 934 } 935 ret = receive_fd(f, perm_to_file_flags(entry.perm)); 936 fput(f); 937 break; 938 } 939 case VDUSE_DEV_GET_FEATURES: 940 /* 941 * Just mirror what driver wrote here. 942 * The driver is expected to check FEATURE_OK later. 943 */ 944 ret = put_user(dev->driver_features, (u64 __user *)argp); 945 break; 946 case VDUSE_DEV_SET_CONFIG: { 947 struct vduse_config_data config; 948 unsigned long size = offsetof(struct vduse_config_data, 949 buffer); 950 951 ret = -EFAULT; 952 if (copy_from_user(&config, argp, size)) 953 break; 954 955 ret = -EINVAL; 956 if (config.length == 0 || 957 config.length > dev->config_size - config.offset) 958 break; 959 960 ret = -EFAULT; 961 if (copy_from_user(dev->config + config.offset, argp + size, 962 config.length)) 963 break; 964 965 ret = 0; 966 break; 967 } 968 case VDUSE_DEV_INJECT_CONFIG_IRQ: 969 ret = 0; 970 queue_work(vduse_irq_wq, &dev->inject); 971 break; 972 case VDUSE_VQ_SETUP: { 973 struct vduse_vq_config config; 974 u32 index; 975 976 ret = -EFAULT; 977 if (copy_from_user(&config, argp, sizeof(config))) 978 break; 979 980 ret = -EINVAL; 981 if (config.index >= dev->vq_num) 982 break; 983 984 if (!is_mem_zero((const char *)config.reserved, 985 sizeof(config.reserved))) 986 break; 987 988 index = array_index_nospec(config.index, dev->vq_num); 989 dev->vqs[index].num_max = config.max_size; 990 ret = 0; 991 break; 992 } 993 case VDUSE_VQ_GET_INFO: { 994 struct vduse_vq_info vq_info; 995 struct vduse_virtqueue *vq; 996 u32 index; 997 998 ret = -EFAULT; 999 if (copy_from_user(&vq_info, argp, sizeof(vq_info))) 1000 break; 1001 1002 ret = -EINVAL; 1003 if (vq_info.index >= dev->vq_num) 1004 break; 1005 1006 index = array_index_nospec(vq_info.index, dev->vq_num); 1007 vq = &dev->vqs[index]; 1008 vq_info.desc_addr = vq->desc_addr; 1009 vq_info.driver_addr = vq->driver_addr; 1010 vq_info.device_addr = vq->device_addr; 1011 vq_info.num = vq->num; 1012 1013 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 1014 vq_info.packed.last_avail_counter = 1015 vq->state.packed.last_avail_counter; 1016 vq_info.packed.last_avail_idx = 1017 vq->state.packed.last_avail_idx; 1018 vq_info.packed.last_used_counter = 1019 vq->state.packed.last_used_counter; 1020 vq_info.packed.last_used_idx = 1021 vq->state.packed.last_used_idx; 1022 } else 1023 vq_info.split.avail_index = 1024 vq->state.split.avail_index; 1025 1026 vq_info.ready = vq->ready; 1027 1028 ret = -EFAULT; 1029 if (copy_to_user(argp, &vq_info, sizeof(vq_info))) 1030 break; 1031 1032 ret = 0; 1033 break; 1034 } 1035 case VDUSE_VQ_SETUP_KICKFD: { 1036 struct vduse_vq_eventfd eventfd; 1037 1038 ret = -EFAULT; 1039 if (copy_from_user(&eventfd, argp, sizeof(eventfd))) 1040 break; 1041 1042 ret = vduse_kickfd_setup(dev, &eventfd); 1043 break; 1044 } 1045 case VDUSE_VQ_INJECT_IRQ: { 1046 u32 index; 1047 1048 ret = -EFAULT; 1049 if (get_user(index, (u32 __user *)argp)) 1050 break; 1051 1052 ret = -EINVAL; 1053 if (index >= dev->vq_num) 1054 break; 1055 1056 ret = 0; 1057 index = array_index_nospec(index, dev->vq_num); 1058 queue_work(vduse_irq_wq, &dev->vqs[index].inject); 1059 break; 1060 } 1061 default: 1062 ret = -ENOIOCTLCMD; 1063 break; 1064 } 1065 1066 return ret; 1067 } 1068 1069 static int vduse_dev_release(struct inode *inode, struct file *file) 1070 { 1071 struct vduse_dev *dev = file->private_data; 1072 1073 spin_lock(&dev->msg_lock); 1074 /* Make sure the inflight messages can processed after reconncection */ 1075 list_splice_init(&dev->recv_list, &dev->send_list); 1076 spin_unlock(&dev->msg_lock); 1077 dev->connected = false; 1078 1079 return 0; 1080 } 1081 1082 static struct vduse_dev *vduse_dev_get_from_minor(int minor) 1083 { 1084 struct vduse_dev *dev; 1085 1086 mutex_lock(&vduse_lock); 1087 dev = idr_find(&vduse_idr, minor); 1088 mutex_unlock(&vduse_lock); 1089 1090 return dev; 1091 } 1092 1093 static int vduse_dev_open(struct inode *inode, struct file *file) 1094 { 1095 int ret; 1096 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); 1097 1098 if (!dev) 1099 return -ENODEV; 1100 1101 ret = -EBUSY; 1102 mutex_lock(&dev->lock); 1103 if (dev->connected) 1104 goto unlock; 1105 1106 ret = 0; 1107 dev->connected = true; 1108 file->private_data = dev; 1109 unlock: 1110 mutex_unlock(&dev->lock); 1111 1112 return ret; 1113 } 1114 1115 static const struct file_operations vduse_dev_fops = { 1116 .owner = THIS_MODULE, 1117 .open = vduse_dev_open, 1118 .release = vduse_dev_release, 1119 .read_iter = vduse_dev_read_iter, 1120 .write_iter = vduse_dev_write_iter, 1121 .poll = vduse_dev_poll, 1122 .unlocked_ioctl = vduse_dev_ioctl, 1123 .compat_ioctl = compat_ptr_ioctl, 1124 .llseek = noop_llseek, 1125 }; 1126 1127 static struct vduse_dev *vduse_dev_create(void) 1128 { 1129 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); 1130 1131 if (!dev) 1132 return NULL; 1133 1134 mutex_init(&dev->lock); 1135 spin_lock_init(&dev->msg_lock); 1136 INIT_LIST_HEAD(&dev->send_list); 1137 INIT_LIST_HEAD(&dev->recv_list); 1138 spin_lock_init(&dev->irq_lock); 1139 1140 INIT_WORK(&dev->inject, vduse_dev_irq_inject); 1141 init_waitqueue_head(&dev->waitq); 1142 1143 return dev; 1144 } 1145 1146 static void vduse_dev_destroy(struct vduse_dev *dev) 1147 { 1148 kfree(dev); 1149 } 1150 1151 static struct vduse_dev *vduse_find_dev(const char *name) 1152 { 1153 struct vduse_dev *dev; 1154 int id; 1155 1156 idr_for_each_entry(&vduse_idr, dev, id) 1157 if (!strcmp(dev->name, name)) 1158 return dev; 1159 1160 return NULL; 1161 } 1162 1163 static int vduse_destroy_dev(char *name) 1164 { 1165 struct vduse_dev *dev = vduse_find_dev(name); 1166 1167 if (!dev) 1168 return -EINVAL; 1169 1170 mutex_lock(&dev->lock); 1171 if (dev->vdev || dev->connected) { 1172 mutex_unlock(&dev->lock); 1173 return -EBUSY; 1174 } 1175 dev->connected = true; 1176 mutex_unlock(&dev->lock); 1177 1178 vduse_dev_reset(dev); 1179 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1180 idr_remove(&vduse_idr, dev->minor); 1181 kvfree(dev->config); 1182 kfree(dev->vqs); 1183 vduse_domain_destroy(dev->domain); 1184 kfree(dev->name); 1185 vduse_dev_destroy(dev); 1186 module_put(THIS_MODULE); 1187 1188 return 0; 1189 } 1190 1191 static bool device_is_allowed(u32 device_id) 1192 { 1193 int i; 1194 1195 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) 1196 if (allowed_device_id[i] == device_id) 1197 return true; 1198 1199 return false; 1200 } 1201 1202 static bool features_is_valid(u64 features) 1203 { 1204 if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) 1205 return false; 1206 1207 /* Now we only support read-only configuration space */ 1208 if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE)) 1209 return false; 1210 1211 return true; 1212 } 1213 1214 static bool vduse_validate_config(struct vduse_dev_config *config) 1215 { 1216 if (!is_mem_zero((const char *)config->reserved, 1217 sizeof(config->reserved))) 1218 return false; 1219 1220 if (config->vq_align > PAGE_SIZE) 1221 return false; 1222 1223 if (config->config_size > PAGE_SIZE) 1224 return false; 1225 1226 if (!device_is_allowed(config->device_id)) 1227 return false; 1228 1229 if (!features_is_valid(config->features)) 1230 return false; 1231 1232 return true; 1233 } 1234 1235 static ssize_t msg_timeout_show(struct device *device, 1236 struct device_attribute *attr, char *buf) 1237 { 1238 struct vduse_dev *dev = dev_get_drvdata(device); 1239 1240 return sysfs_emit(buf, "%u\n", dev->msg_timeout); 1241 } 1242 1243 static ssize_t msg_timeout_store(struct device *device, 1244 struct device_attribute *attr, 1245 const char *buf, size_t count) 1246 { 1247 struct vduse_dev *dev = dev_get_drvdata(device); 1248 int ret; 1249 1250 ret = kstrtouint(buf, 10, &dev->msg_timeout); 1251 if (ret < 0) 1252 return ret; 1253 1254 return count; 1255 } 1256 1257 static DEVICE_ATTR_RW(msg_timeout); 1258 1259 static struct attribute *vduse_dev_attrs[] = { 1260 &dev_attr_msg_timeout.attr, 1261 NULL 1262 }; 1263 1264 ATTRIBUTE_GROUPS(vduse_dev); 1265 1266 static int vduse_create_dev(struct vduse_dev_config *config, 1267 void *config_buf, u64 api_version) 1268 { 1269 int i, ret; 1270 struct vduse_dev *dev; 1271 1272 ret = -EEXIST; 1273 if (vduse_find_dev(config->name)) 1274 goto err; 1275 1276 ret = -ENOMEM; 1277 dev = vduse_dev_create(); 1278 if (!dev) 1279 goto err; 1280 1281 dev->api_version = api_version; 1282 dev->device_features = config->features; 1283 dev->device_id = config->device_id; 1284 dev->vendor_id = config->vendor_id; 1285 dev->name = kstrdup(config->name, GFP_KERNEL); 1286 if (!dev->name) 1287 goto err_str; 1288 1289 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 1290 VDUSE_BOUNCE_SIZE); 1291 if (!dev->domain) 1292 goto err_domain; 1293 1294 dev->config = config_buf; 1295 dev->config_size = config->config_size; 1296 dev->vq_align = config->vq_align; 1297 dev->vq_num = config->vq_num; 1298 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1299 if (!dev->vqs) 1300 goto err_vqs; 1301 1302 for (i = 0; i < dev->vq_num; i++) { 1303 dev->vqs[i].index = i; 1304 INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject); 1305 INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work); 1306 spin_lock_init(&dev->vqs[i].kick_lock); 1307 spin_lock_init(&dev->vqs[i].irq_lock); 1308 } 1309 1310 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); 1311 if (ret < 0) 1312 goto err_idr; 1313 1314 dev->minor = ret; 1315 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; 1316 dev->dev = device_create(vduse_class, NULL, 1317 MKDEV(MAJOR(vduse_major), dev->minor), 1318 dev, "%s", config->name); 1319 if (IS_ERR(dev->dev)) { 1320 ret = PTR_ERR(dev->dev); 1321 goto err_dev; 1322 } 1323 __module_get(THIS_MODULE); 1324 1325 return 0; 1326 err_dev: 1327 idr_remove(&vduse_idr, dev->minor); 1328 err_idr: 1329 kfree(dev->vqs); 1330 err_vqs: 1331 vduse_domain_destroy(dev->domain); 1332 err_domain: 1333 kfree(dev->name); 1334 err_str: 1335 vduse_dev_destroy(dev); 1336 err: 1337 kvfree(config_buf); 1338 return ret; 1339 } 1340 1341 static long vduse_ioctl(struct file *file, unsigned int cmd, 1342 unsigned long arg) 1343 { 1344 int ret; 1345 void __user *argp = (void __user *)arg; 1346 struct vduse_control *control = file->private_data; 1347 1348 mutex_lock(&vduse_lock); 1349 switch (cmd) { 1350 case VDUSE_GET_API_VERSION: 1351 ret = put_user(control->api_version, (u64 __user *)argp); 1352 break; 1353 case VDUSE_SET_API_VERSION: { 1354 u64 api_version; 1355 1356 ret = -EFAULT; 1357 if (get_user(api_version, (u64 __user *)argp)) 1358 break; 1359 1360 ret = -EINVAL; 1361 if (api_version > VDUSE_API_VERSION) 1362 break; 1363 1364 ret = 0; 1365 control->api_version = api_version; 1366 break; 1367 } 1368 case VDUSE_CREATE_DEV: { 1369 struct vduse_dev_config config; 1370 unsigned long size = offsetof(struct vduse_dev_config, config); 1371 void *buf; 1372 1373 ret = -EFAULT; 1374 if (copy_from_user(&config, argp, size)) 1375 break; 1376 1377 ret = -EINVAL; 1378 if (vduse_validate_config(&config) == false) 1379 break; 1380 1381 buf = vmemdup_user(argp + size, config.config_size); 1382 if (IS_ERR(buf)) { 1383 ret = PTR_ERR(buf); 1384 break; 1385 } 1386 config.name[VDUSE_NAME_MAX - 1] = '\0'; 1387 ret = vduse_create_dev(&config, buf, control->api_version); 1388 break; 1389 } 1390 case VDUSE_DESTROY_DEV: { 1391 char name[VDUSE_NAME_MAX]; 1392 1393 ret = -EFAULT; 1394 if (copy_from_user(name, argp, VDUSE_NAME_MAX)) 1395 break; 1396 1397 name[VDUSE_NAME_MAX - 1] = '\0'; 1398 ret = vduse_destroy_dev(name); 1399 break; 1400 } 1401 default: 1402 ret = -EINVAL; 1403 break; 1404 } 1405 mutex_unlock(&vduse_lock); 1406 1407 return ret; 1408 } 1409 1410 static int vduse_release(struct inode *inode, struct file *file) 1411 { 1412 struct vduse_control *control = file->private_data; 1413 1414 kfree(control); 1415 return 0; 1416 } 1417 1418 static int vduse_open(struct inode *inode, struct file *file) 1419 { 1420 struct vduse_control *control; 1421 1422 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); 1423 if (!control) 1424 return -ENOMEM; 1425 1426 control->api_version = VDUSE_API_VERSION; 1427 file->private_data = control; 1428 1429 return 0; 1430 } 1431 1432 static const struct file_operations vduse_ctrl_fops = { 1433 .owner = THIS_MODULE, 1434 .open = vduse_open, 1435 .release = vduse_release, 1436 .unlocked_ioctl = vduse_ioctl, 1437 .compat_ioctl = compat_ptr_ioctl, 1438 .llseek = noop_llseek, 1439 }; 1440 1441 static char *vduse_devnode(struct device *dev, umode_t *mode) 1442 { 1443 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); 1444 } 1445 1446 static void vduse_mgmtdev_release(struct device *dev) 1447 { 1448 } 1449 1450 static struct device vduse_mgmtdev = { 1451 .init_name = "vduse", 1452 .release = vduse_mgmtdev_release, 1453 }; 1454 1455 static struct vdpa_mgmt_dev mgmt_dev; 1456 1457 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) 1458 { 1459 struct vduse_vdpa *vdev; 1460 int ret; 1461 1462 if (dev->vdev) 1463 return -EEXIST; 1464 1465 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 1466 &vduse_vdpa_config_ops, name, true); 1467 if (IS_ERR(vdev)) 1468 return PTR_ERR(vdev); 1469 1470 dev->vdev = vdev; 1471 vdev->dev = dev; 1472 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; 1473 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); 1474 if (ret) { 1475 put_device(&vdev->vdpa.dev); 1476 return ret; 1477 } 1478 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); 1479 vdev->vdpa.dma_dev = &vdev->vdpa.dev; 1480 vdev->vdpa.mdev = &mgmt_dev; 1481 1482 return 0; 1483 } 1484 1485 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) 1486 { 1487 struct vduse_dev *dev; 1488 int ret; 1489 1490 mutex_lock(&vduse_lock); 1491 dev = vduse_find_dev(name); 1492 if (!dev || !vduse_dev_is_ready(dev)) { 1493 mutex_unlock(&vduse_lock); 1494 return -EINVAL; 1495 } 1496 ret = vduse_dev_init_vdpa(dev, name); 1497 mutex_unlock(&vduse_lock); 1498 if (ret) 1499 return ret; 1500 1501 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 1502 if (ret) { 1503 put_device(&dev->vdev->vdpa.dev); 1504 return ret; 1505 } 1506 1507 return 0; 1508 } 1509 1510 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) 1511 { 1512 _vdpa_unregister_device(dev); 1513 } 1514 1515 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { 1516 .dev_add = vdpa_dev_add, 1517 .dev_del = vdpa_dev_del, 1518 }; 1519 1520 static struct virtio_device_id id_table[] = { 1521 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 1522 { 0 }, 1523 }; 1524 1525 static struct vdpa_mgmt_dev mgmt_dev = { 1526 .device = &vduse_mgmtdev, 1527 .id_table = id_table, 1528 .ops = &vdpa_dev_mgmtdev_ops, 1529 }; 1530 1531 static int vduse_mgmtdev_init(void) 1532 { 1533 int ret; 1534 1535 ret = device_register(&vduse_mgmtdev); 1536 if (ret) 1537 return ret; 1538 1539 ret = vdpa_mgmtdev_register(&mgmt_dev); 1540 if (ret) 1541 goto err; 1542 1543 return 0; 1544 err: 1545 device_unregister(&vduse_mgmtdev); 1546 return ret; 1547 } 1548 1549 static void vduse_mgmtdev_exit(void) 1550 { 1551 vdpa_mgmtdev_unregister(&mgmt_dev); 1552 device_unregister(&vduse_mgmtdev); 1553 } 1554 1555 static int vduse_init(void) 1556 { 1557 int ret; 1558 struct device *dev; 1559 1560 vduse_class = class_create(THIS_MODULE, "vduse"); 1561 if (IS_ERR(vduse_class)) 1562 return PTR_ERR(vduse_class); 1563 1564 vduse_class->devnode = vduse_devnode; 1565 vduse_class->dev_groups = vduse_dev_groups; 1566 1567 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); 1568 if (ret) 1569 goto err_chardev_region; 1570 1571 /* /dev/vduse/control */ 1572 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); 1573 vduse_ctrl_cdev.owner = THIS_MODULE; 1574 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); 1575 if (ret) 1576 goto err_ctrl_cdev; 1577 1578 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control"); 1579 if (IS_ERR(dev)) { 1580 ret = PTR_ERR(dev); 1581 goto err_device; 1582 } 1583 1584 /* /dev/vduse/$DEVICE */ 1585 cdev_init(&vduse_cdev, &vduse_dev_fops); 1586 vduse_cdev.owner = THIS_MODULE; 1587 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), 1588 VDUSE_DEV_MAX - 1); 1589 if (ret) 1590 goto err_cdev; 1591 1592 vduse_irq_wq = alloc_workqueue("vduse-irq", 1593 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); 1594 if (!vduse_irq_wq) { 1595 ret = -ENOMEM; 1596 goto err_wq; 1597 } 1598 1599 ret = vduse_domain_init(); 1600 if (ret) 1601 goto err_domain; 1602 1603 ret = vduse_mgmtdev_init(); 1604 if (ret) 1605 goto err_mgmtdev; 1606 1607 return 0; 1608 err_mgmtdev: 1609 vduse_domain_exit(); 1610 err_domain: 1611 destroy_workqueue(vduse_irq_wq); 1612 err_wq: 1613 cdev_del(&vduse_cdev); 1614 err_cdev: 1615 device_destroy(vduse_class, vduse_major); 1616 err_device: 1617 cdev_del(&vduse_ctrl_cdev); 1618 err_ctrl_cdev: 1619 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 1620 err_chardev_region: 1621 class_destroy(vduse_class); 1622 return ret; 1623 } 1624 module_init(vduse_init); 1625 1626 static void vduse_exit(void) 1627 { 1628 vduse_mgmtdev_exit(); 1629 vduse_domain_exit(); 1630 destroy_workqueue(vduse_irq_wq); 1631 cdev_del(&vduse_cdev); 1632 device_destroy(vduse_class, vduse_major); 1633 cdev_del(&vduse_ctrl_cdev); 1634 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 1635 class_destroy(vduse_class); 1636 } 1637 module_exit(vduse_exit); 1638 1639 MODULE_LICENSE(DRV_LICENSE); 1640 MODULE_AUTHOR(DRV_AUTHOR); 1641 MODULE_DESCRIPTION(DRV_DESC); 1642