1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018-2020 Intel Corporation. 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Author: Tiwei Bie <tiwei.bie@intel.com> 7 * Jason Wang <jasowang@redhat.com> 8 * 9 * Thanks Michael S. Tsirkin for the valuable comments and 10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all 11 * their supports. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/cdev.h> 17 #include <linux/device.h> 18 #include <linux/mm.h> 19 #include <linux/iommu.h> 20 #include <linux/uuid.h> 21 #include <linux/vdpa.h> 22 #include <linux/nospec.h> 23 #include <linux/vhost.h> 24 #include <linux/virtio_net.h> 25 #include <linux/kernel.h> 26 27 #include "vhost.h" 28 29 enum { 30 VHOST_VDPA_BACKEND_FEATURES = 31 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | 32 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), 33 }; 34 35 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 36 37 struct vhost_vdpa { 38 struct vhost_dev vdev; 39 struct iommu_domain *domain; 40 struct vhost_virtqueue *vqs; 41 struct completion completion; 42 struct vdpa_device *vdpa; 43 struct device dev; 44 struct cdev cdev; 45 atomic_t opened; 46 int nvqs; 47 int virtio_id; 48 int minor; 49 struct eventfd_ctx *config_ctx; 50 int in_batch; 51 }; 52 53 static DEFINE_IDA(vhost_vdpa_ida); 54 55 static dev_t vhost_vdpa_major; 56 57 static void handle_vq_kick(struct vhost_work *work) 58 { 59 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 60 poll.work); 61 struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); 62 const struct vdpa_config_ops *ops = v->vdpa->config; 63 64 ops->kick_vq(v->vdpa, vq - v->vqs); 65 } 66 67 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) 68 { 69 struct vhost_virtqueue *vq = private; 70 struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; 71 72 if (call_ctx) 73 eventfd_signal(call_ctx, 1); 74 75 return IRQ_HANDLED; 76 } 77 78 static irqreturn_t vhost_vdpa_config_cb(void *private) 79 { 80 struct vhost_vdpa *v = private; 81 struct eventfd_ctx *config_ctx = v->config_ctx; 82 83 if (config_ctx) 84 eventfd_signal(config_ctx, 1); 85 86 return IRQ_HANDLED; 87 } 88 89 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) 90 { 91 struct vhost_virtqueue *vq = &v->vqs[qid]; 92 const struct vdpa_config_ops *ops = v->vdpa->config; 93 struct vdpa_device *vdpa = v->vdpa; 94 int ret, irq; 95 96 if (!ops->get_vq_irq) 97 return; 98 99 irq = ops->get_vq_irq(vdpa, qid); 100 spin_lock(&vq->call_ctx.ctx_lock); 101 irq_bypass_unregister_producer(&vq->call_ctx.producer); 102 if (!vq->call_ctx.ctx || irq < 0) { 103 spin_unlock(&vq->call_ctx.ctx_lock); 104 return; 105 } 106 107 vq->call_ctx.producer.token = vq->call_ctx.ctx; 108 vq->call_ctx.producer.irq = irq; 109 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 110 spin_unlock(&vq->call_ctx.ctx_lock); 111 } 112 113 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) 114 { 115 struct vhost_virtqueue *vq = &v->vqs[qid]; 116 117 spin_lock(&vq->call_ctx.ctx_lock); 118 irq_bypass_unregister_producer(&vq->call_ctx.producer); 119 spin_unlock(&vq->call_ctx.ctx_lock); 120 } 121 122 static void vhost_vdpa_reset(struct vhost_vdpa *v) 123 { 124 struct vdpa_device *vdpa = v->vdpa; 125 126 vdpa_reset(vdpa); 127 v->in_batch = 0; 128 } 129 130 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 131 { 132 struct vdpa_device *vdpa = v->vdpa; 133 const struct vdpa_config_ops *ops = vdpa->config; 134 u32 device_id; 135 136 device_id = ops->get_device_id(vdpa); 137 138 if (copy_to_user(argp, &device_id, sizeof(device_id))) 139 return -EFAULT; 140 141 return 0; 142 } 143 144 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) 145 { 146 struct vdpa_device *vdpa = v->vdpa; 147 const struct vdpa_config_ops *ops = vdpa->config; 148 u8 status; 149 150 status = ops->get_status(vdpa); 151 152 if (copy_to_user(statusp, &status, sizeof(status))) 153 return -EFAULT; 154 155 return 0; 156 } 157 158 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) 159 { 160 struct vdpa_device *vdpa = v->vdpa; 161 const struct vdpa_config_ops *ops = vdpa->config; 162 u8 status, status_old; 163 int nvqs = v->nvqs; 164 u16 i; 165 166 if (copy_from_user(&status, statusp, sizeof(status))) 167 return -EFAULT; 168 169 status_old = ops->get_status(vdpa); 170 171 /* 172 * Userspace shouldn't remove status bits unless reset the 173 * status to 0. 174 */ 175 if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) 176 return -EINVAL; 177 178 ops->set_status(vdpa, status); 179 180 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) 181 for (i = 0; i < nvqs; i++) 182 vhost_vdpa_setup_vq_irq(v, i); 183 184 if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) 185 for (i = 0; i < nvqs; i++) 186 vhost_vdpa_unsetup_vq_irq(v, i); 187 188 return 0; 189 } 190 191 static int vhost_vdpa_config_validate(struct vhost_vdpa *v, 192 struct vhost_vdpa_config *c) 193 { 194 long size = 0; 195 196 switch (v->virtio_id) { 197 case VIRTIO_ID_NET: 198 size = sizeof(struct virtio_net_config); 199 break; 200 } 201 202 if (c->len == 0) 203 return -EINVAL; 204 205 if (c->len > size - c->off) 206 return -E2BIG; 207 208 return 0; 209 } 210 211 static long vhost_vdpa_get_config(struct vhost_vdpa *v, 212 struct vhost_vdpa_config __user *c) 213 { 214 struct vdpa_device *vdpa = v->vdpa; 215 struct vhost_vdpa_config config; 216 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 217 u8 *buf; 218 219 if (copy_from_user(&config, c, size)) 220 return -EFAULT; 221 if (vhost_vdpa_config_validate(v, &config)) 222 return -EINVAL; 223 buf = kvzalloc(config.len, GFP_KERNEL); 224 if (!buf) 225 return -ENOMEM; 226 227 vdpa_get_config(vdpa, config.off, buf, config.len); 228 229 if (copy_to_user(c->buf, buf, config.len)) { 230 kvfree(buf); 231 return -EFAULT; 232 } 233 234 kvfree(buf); 235 return 0; 236 } 237 238 static long vhost_vdpa_set_config(struct vhost_vdpa *v, 239 struct vhost_vdpa_config __user *c) 240 { 241 struct vdpa_device *vdpa = v->vdpa; 242 const struct vdpa_config_ops *ops = vdpa->config; 243 struct vhost_vdpa_config config; 244 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 245 u8 *buf; 246 247 if (copy_from_user(&config, c, size)) 248 return -EFAULT; 249 if (vhost_vdpa_config_validate(v, &config)) 250 return -EINVAL; 251 buf = kvzalloc(config.len, GFP_KERNEL); 252 if (!buf) 253 return -ENOMEM; 254 255 if (copy_from_user(buf, c->buf, config.len)) { 256 kvfree(buf); 257 return -EFAULT; 258 } 259 260 ops->set_config(vdpa, config.off, buf, config.len); 261 262 kvfree(buf); 263 return 0; 264 } 265 266 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) 267 { 268 struct vdpa_device *vdpa = v->vdpa; 269 const struct vdpa_config_ops *ops = vdpa->config; 270 u64 features; 271 272 features = ops->get_features(vdpa); 273 274 if (copy_to_user(featurep, &features, sizeof(features))) 275 return -EFAULT; 276 277 return 0; 278 } 279 280 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) 281 { 282 struct vdpa_device *vdpa = v->vdpa; 283 const struct vdpa_config_ops *ops = vdpa->config; 284 u64 features; 285 286 /* 287 * It's not allowed to change the features after they have 288 * been negotiated. 289 */ 290 if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) 291 return -EBUSY; 292 293 if (copy_from_user(&features, featurep, sizeof(features))) 294 return -EFAULT; 295 296 if (vdpa_set_features(vdpa, features)) 297 return -EINVAL; 298 299 return 0; 300 } 301 302 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) 303 { 304 struct vdpa_device *vdpa = v->vdpa; 305 const struct vdpa_config_ops *ops = vdpa->config; 306 u16 num; 307 308 num = ops->get_vq_num_max(vdpa); 309 310 if (copy_to_user(argp, &num, sizeof(num))) 311 return -EFAULT; 312 313 return 0; 314 } 315 316 static void vhost_vdpa_config_put(struct vhost_vdpa *v) 317 { 318 if (v->config_ctx) 319 eventfd_ctx_put(v->config_ctx); 320 } 321 322 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) 323 { 324 struct vdpa_callback cb; 325 int fd; 326 struct eventfd_ctx *ctx; 327 328 cb.callback = vhost_vdpa_config_cb; 329 cb.private = v->vdpa; 330 if (copy_from_user(&fd, argp, sizeof(fd))) 331 return -EFAULT; 332 333 ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 334 swap(ctx, v->config_ctx); 335 336 if (!IS_ERR_OR_NULL(ctx)) 337 eventfd_ctx_put(ctx); 338 339 if (IS_ERR(v->config_ctx)) 340 return PTR_ERR(v->config_ctx); 341 342 v->vdpa->config->set_config_cb(v->vdpa, &cb); 343 344 return 0; 345 } 346 347 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 348 void __user *argp) 349 { 350 struct vdpa_device *vdpa = v->vdpa; 351 const struct vdpa_config_ops *ops = vdpa->config; 352 struct vdpa_vq_state vq_state; 353 struct vdpa_callback cb; 354 struct vhost_virtqueue *vq; 355 struct vhost_vring_state s; 356 u64 __user *featurep = argp; 357 u64 features; 358 u32 idx; 359 long r; 360 361 r = get_user(idx, (u32 __user *)argp); 362 if (r < 0) 363 return r; 364 365 if (idx >= v->nvqs) 366 return -ENOBUFS; 367 368 idx = array_index_nospec(idx, v->nvqs); 369 vq = &v->vqs[idx]; 370 371 switch (cmd) { 372 case VHOST_VDPA_SET_VRING_ENABLE: 373 if (copy_from_user(&s, argp, sizeof(s))) 374 return -EFAULT; 375 ops->set_vq_ready(vdpa, idx, s.num); 376 return 0; 377 case VHOST_GET_VRING_BASE: 378 r = ops->get_vq_state(v->vdpa, idx, &vq_state); 379 if (r) 380 return r; 381 382 vq->last_avail_idx = vq_state.avail_index; 383 break; 384 case VHOST_GET_BACKEND_FEATURES: 385 features = VHOST_VDPA_BACKEND_FEATURES; 386 if (copy_to_user(featurep, &features, sizeof(features))) 387 return -EFAULT; 388 return 0; 389 case VHOST_SET_BACKEND_FEATURES: 390 if (copy_from_user(&features, featurep, sizeof(features))) 391 return -EFAULT; 392 if (features & ~VHOST_VDPA_BACKEND_FEATURES) 393 return -EOPNOTSUPP; 394 vhost_set_backend_features(&v->vdev, features); 395 return 0; 396 } 397 398 r = vhost_vring_ioctl(&v->vdev, cmd, argp); 399 if (r) 400 return r; 401 402 switch (cmd) { 403 case VHOST_SET_VRING_ADDR: 404 if (ops->set_vq_address(vdpa, idx, 405 (u64)(uintptr_t)vq->desc, 406 (u64)(uintptr_t)vq->avail, 407 (u64)(uintptr_t)vq->used)) 408 r = -EINVAL; 409 break; 410 411 case VHOST_SET_VRING_BASE: 412 vq_state.avail_index = vq->last_avail_idx; 413 if (ops->set_vq_state(vdpa, idx, &vq_state)) 414 r = -EINVAL; 415 break; 416 417 case VHOST_SET_VRING_CALL: 418 if (vq->call_ctx.ctx) { 419 cb.callback = vhost_vdpa_virtqueue_cb; 420 cb.private = vq; 421 } else { 422 cb.callback = NULL; 423 cb.private = NULL; 424 } 425 ops->set_vq_cb(vdpa, idx, &cb); 426 vhost_vdpa_setup_vq_irq(v, idx); 427 break; 428 429 case VHOST_SET_VRING_NUM: 430 ops->set_vq_num(vdpa, idx, vq->num); 431 break; 432 } 433 434 return r; 435 } 436 437 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 438 unsigned int cmd, unsigned long arg) 439 { 440 struct vhost_vdpa *v = filep->private_data; 441 struct vhost_dev *d = &v->vdev; 442 void __user *argp = (void __user *)arg; 443 long r; 444 445 mutex_lock(&d->mutex); 446 447 switch (cmd) { 448 case VHOST_VDPA_GET_DEVICE_ID: 449 r = vhost_vdpa_get_device_id(v, argp); 450 break; 451 case VHOST_VDPA_GET_STATUS: 452 r = vhost_vdpa_get_status(v, argp); 453 break; 454 case VHOST_VDPA_SET_STATUS: 455 r = vhost_vdpa_set_status(v, argp); 456 break; 457 case VHOST_VDPA_GET_CONFIG: 458 r = vhost_vdpa_get_config(v, argp); 459 break; 460 case VHOST_VDPA_SET_CONFIG: 461 r = vhost_vdpa_set_config(v, argp); 462 break; 463 case VHOST_GET_FEATURES: 464 r = vhost_vdpa_get_features(v, argp); 465 break; 466 case VHOST_SET_FEATURES: 467 r = vhost_vdpa_set_features(v, argp); 468 break; 469 case VHOST_VDPA_GET_VRING_NUM: 470 r = vhost_vdpa_get_vring_num(v, argp); 471 break; 472 case VHOST_SET_LOG_BASE: 473 case VHOST_SET_LOG_FD: 474 r = -ENOIOCTLCMD; 475 break; 476 case VHOST_VDPA_SET_CONFIG_CALL: 477 r = vhost_vdpa_set_config_call(v, argp); 478 break; 479 default: 480 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 481 if (r == -ENOIOCTLCMD) 482 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 483 break; 484 } 485 486 mutex_unlock(&d->mutex); 487 return r; 488 } 489 490 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) 491 { 492 struct vhost_dev *dev = &v->vdev; 493 struct vhost_iotlb *iotlb = dev->iotlb; 494 struct vhost_iotlb_map *map; 495 struct page *page; 496 unsigned long pfn, pinned; 497 498 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 499 pinned = map->size >> PAGE_SHIFT; 500 for (pfn = map->addr >> PAGE_SHIFT; 501 pinned > 0; pfn++, pinned--) { 502 page = pfn_to_page(pfn); 503 if (map->perm & VHOST_ACCESS_WO) 504 set_page_dirty_lock(page); 505 unpin_user_page(page); 506 } 507 atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); 508 vhost_iotlb_map_free(iotlb, map); 509 } 510 } 511 512 static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) 513 { 514 struct vhost_dev *dev = &v->vdev; 515 516 vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); 517 kfree(dev->iotlb); 518 dev->iotlb = NULL; 519 } 520 521 static int perm_to_iommu_flags(u32 perm) 522 { 523 int flags = 0; 524 525 switch (perm) { 526 case VHOST_ACCESS_WO: 527 flags |= IOMMU_WRITE; 528 break; 529 case VHOST_ACCESS_RO: 530 flags |= IOMMU_READ; 531 break; 532 case VHOST_ACCESS_RW: 533 flags |= (IOMMU_WRITE | IOMMU_READ); 534 break; 535 default: 536 WARN(1, "invalidate vhost IOTLB permission\n"); 537 break; 538 } 539 540 return flags | IOMMU_CACHE; 541 } 542 543 static int vhost_vdpa_map(struct vhost_vdpa *v, 544 u64 iova, u64 size, u64 pa, u32 perm) 545 { 546 struct vhost_dev *dev = &v->vdev; 547 struct vdpa_device *vdpa = v->vdpa; 548 const struct vdpa_config_ops *ops = vdpa->config; 549 int r = 0; 550 551 r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, 552 pa, perm); 553 if (r) 554 return r; 555 556 if (ops->dma_map) { 557 r = ops->dma_map(vdpa, iova, size, pa, perm); 558 } else if (ops->set_map) { 559 if (!v->in_batch) 560 r = ops->set_map(vdpa, dev->iotlb); 561 } else { 562 r = iommu_map(v->domain, iova, pa, size, 563 perm_to_iommu_flags(perm)); 564 } 565 566 return r; 567 } 568 569 static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) 570 { 571 struct vhost_dev *dev = &v->vdev; 572 struct vdpa_device *vdpa = v->vdpa; 573 const struct vdpa_config_ops *ops = vdpa->config; 574 575 vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); 576 577 if (ops->dma_map) { 578 ops->dma_unmap(vdpa, iova, size); 579 } else if (ops->set_map) { 580 if (!v->in_batch) 581 ops->set_map(vdpa, dev->iotlb); 582 } else { 583 iommu_unmap(v->domain, iova, size); 584 } 585 } 586 587 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 588 struct vhost_iotlb_msg *msg) 589 { 590 struct vhost_dev *dev = &v->vdev; 591 struct vhost_iotlb *iotlb = dev->iotlb; 592 struct page **page_list; 593 unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 594 unsigned int gup_flags = FOLL_LONGTERM; 595 unsigned long npages, cur_base, map_pfn, last_pfn = 0; 596 unsigned long locked, lock_limit, pinned, i; 597 u64 iova = msg->iova; 598 int ret = 0; 599 600 if (vhost_iotlb_itree_first(iotlb, msg->iova, 601 msg->iova + msg->size - 1)) 602 return -EEXIST; 603 604 page_list = (struct page **) __get_free_page(GFP_KERNEL); 605 if (!page_list) 606 return -ENOMEM; 607 608 if (msg->perm & VHOST_ACCESS_WO) 609 gup_flags |= FOLL_WRITE; 610 611 npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; 612 if (!npages) 613 return -EINVAL; 614 615 mmap_read_lock(dev->mm); 616 617 locked = atomic64_add_return(npages, &dev->mm->pinned_vm); 618 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 619 620 if (locked > lock_limit) { 621 ret = -ENOMEM; 622 goto out; 623 } 624 625 cur_base = msg->uaddr & PAGE_MASK; 626 iova &= PAGE_MASK; 627 628 while (npages) { 629 pinned = min_t(unsigned long, npages, list_size); 630 ret = pin_user_pages(cur_base, pinned, 631 gup_flags, page_list, NULL); 632 if (ret != pinned) 633 goto out; 634 635 if (!last_pfn) 636 map_pfn = page_to_pfn(page_list[0]); 637 638 for (i = 0; i < ret; i++) { 639 unsigned long this_pfn = page_to_pfn(page_list[i]); 640 u64 csize; 641 642 if (last_pfn && (this_pfn != last_pfn + 1)) { 643 /* Pin a contiguous chunk of memory */ 644 csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; 645 if (vhost_vdpa_map(v, iova, csize, 646 map_pfn << PAGE_SHIFT, 647 msg->perm)) 648 goto out; 649 map_pfn = this_pfn; 650 iova += csize; 651 } 652 653 last_pfn = this_pfn; 654 } 655 656 cur_base += ret << PAGE_SHIFT; 657 npages -= ret; 658 } 659 660 /* Pin the rest chunk */ 661 ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, 662 map_pfn << PAGE_SHIFT, msg->perm); 663 out: 664 if (ret) { 665 vhost_vdpa_unmap(v, msg->iova, msg->size); 666 atomic64_sub(npages, &dev->mm->pinned_vm); 667 } 668 mmap_read_unlock(dev->mm); 669 free_page((unsigned long)page_list); 670 return ret; 671 } 672 673 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, 674 struct vhost_iotlb_msg *msg) 675 { 676 struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 677 struct vdpa_device *vdpa = v->vdpa; 678 const struct vdpa_config_ops *ops = vdpa->config; 679 int r = 0; 680 681 r = vhost_dev_check_owner(dev); 682 if (r) 683 return r; 684 685 switch (msg->type) { 686 case VHOST_IOTLB_UPDATE: 687 r = vhost_vdpa_process_iotlb_update(v, msg); 688 break; 689 case VHOST_IOTLB_INVALIDATE: 690 vhost_vdpa_unmap(v, msg->iova, msg->size); 691 break; 692 case VHOST_IOTLB_BATCH_BEGIN: 693 v->in_batch = true; 694 break; 695 case VHOST_IOTLB_BATCH_END: 696 if (v->in_batch && ops->set_map) 697 ops->set_map(vdpa, dev->iotlb); 698 v->in_batch = false; 699 break; 700 default: 701 r = -EINVAL; 702 break; 703 } 704 705 return r; 706 } 707 708 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, 709 struct iov_iter *from) 710 { 711 struct file *file = iocb->ki_filp; 712 struct vhost_vdpa *v = file->private_data; 713 struct vhost_dev *dev = &v->vdev; 714 715 return vhost_chr_write_iter(dev, from); 716 } 717 718 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) 719 { 720 struct vdpa_device *vdpa = v->vdpa; 721 const struct vdpa_config_ops *ops = vdpa->config; 722 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 723 struct bus_type *bus; 724 int ret; 725 726 /* Device want to do DMA by itself */ 727 if (ops->set_map || ops->dma_map) 728 return 0; 729 730 bus = dma_dev->bus; 731 if (!bus) 732 return -EFAULT; 733 734 if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) 735 return -ENOTSUPP; 736 737 v->domain = iommu_domain_alloc(bus); 738 if (!v->domain) 739 return -EIO; 740 741 ret = iommu_attach_device(v->domain, dma_dev); 742 if (ret) 743 goto err_attach; 744 745 return 0; 746 747 err_attach: 748 iommu_domain_free(v->domain); 749 return ret; 750 } 751 752 static void vhost_vdpa_free_domain(struct vhost_vdpa *v) 753 { 754 struct vdpa_device *vdpa = v->vdpa; 755 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 756 757 if (v->domain) { 758 iommu_detach_device(v->domain, dma_dev); 759 iommu_domain_free(v->domain); 760 } 761 762 v->domain = NULL; 763 } 764 765 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 766 { 767 struct vhost_vdpa *v; 768 struct vhost_dev *dev; 769 struct vhost_virtqueue **vqs; 770 int nvqs, i, r, opened; 771 772 v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); 773 774 opened = atomic_cmpxchg(&v->opened, 0, 1); 775 if (opened) 776 return -EBUSY; 777 778 nvqs = v->nvqs; 779 vhost_vdpa_reset(v); 780 781 vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); 782 if (!vqs) { 783 r = -ENOMEM; 784 goto err; 785 } 786 787 dev = &v->vdev; 788 for (i = 0; i < nvqs; i++) { 789 vqs[i] = &v->vqs[i]; 790 vqs[i]->handle_kick = handle_vq_kick; 791 } 792 vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, 793 vhost_vdpa_process_iotlb_msg); 794 795 dev->iotlb = vhost_iotlb_alloc(0, 0); 796 if (!dev->iotlb) { 797 r = -ENOMEM; 798 goto err_init_iotlb; 799 } 800 801 r = vhost_vdpa_alloc_domain(v); 802 if (r) 803 goto err_init_iotlb; 804 805 filep->private_data = v; 806 807 return 0; 808 809 err_init_iotlb: 810 vhost_dev_cleanup(&v->vdev); 811 err: 812 atomic_dec(&v->opened); 813 return r; 814 } 815 816 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) 817 { 818 struct vhost_virtqueue *vq; 819 int i; 820 821 for (i = 0; i < v->nvqs; i++) { 822 vq = &v->vqs[i]; 823 if (vq->call_ctx.producer.irq) 824 irq_bypass_unregister_producer(&vq->call_ctx.producer); 825 } 826 } 827 828 static int vhost_vdpa_release(struct inode *inode, struct file *filep) 829 { 830 struct vhost_vdpa *v = filep->private_data; 831 struct vhost_dev *d = &v->vdev; 832 833 mutex_lock(&d->mutex); 834 filep->private_data = NULL; 835 vhost_vdpa_reset(v); 836 vhost_dev_stop(&v->vdev); 837 vhost_vdpa_iotlb_free(v); 838 vhost_vdpa_free_domain(v); 839 vhost_vdpa_config_put(v); 840 vhost_vdpa_clean_irq(v); 841 vhost_dev_cleanup(&v->vdev); 842 kfree(v->vdev.vqs); 843 mutex_unlock(&d->mutex); 844 845 atomic_dec(&v->opened); 846 complete(&v->completion); 847 848 return 0; 849 } 850 851 #ifdef CONFIG_MMU 852 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) 853 { 854 struct vhost_vdpa *v = vmf->vma->vm_file->private_data; 855 struct vdpa_device *vdpa = v->vdpa; 856 const struct vdpa_config_ops *ops = vdpa->config; 857 struct vdpa_notification_area notify; 858 struct vm_area_struct *vma = vmf->vma; 859 u16 index = vma->vm_pgoff; 860 861 notify = ops->get_vq_notification(vdpa, index); 862 863 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 864 if (remap_pfn_range(vma, vmf->address & PAGE_MASK, 865 notify.addr >> PAGE_SHIFT, PAGE_SIZE, 866 vma->vm_page_prot)) 867 return VM_FAULT_SIGBUS; 868 869 return VM_FAULT_NOPAGE; 870 } 871 872 static const struct vm_operations_struct vhost_vdpa_vm_ops = { 873 .fault = vhost_vdpa_fault, 874 }; 875 876 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) 877 { 878 struct vhost_vdpa *v = vma->vm_file->private_data; 879 struct vdpa_device *vdpa = v->vdpa; 880 const struct vdpa_config_ops *ops = vdpa->config; 881 struct vdpa_notification_area notify; 882 unsigned long index = vma->vm_pgoff; 883 884 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 885 return -EINVAL; 886 if ((vma->vm_flags & VM_SHARED) == 0) 887 return -EINVAL; 888 if (vma->vm_flags & VM_READ) 889 return -EINVAL; 890 if (index > 65535) 891 return -EINVAL; 892 if (!ops->get_vq_notification) 893 return -ENOTSUPP; 894 895 /* To be safe and easily modelled by userspace, We only 896 * support the doorbell which sits on the page boundary and 897 * does not share the page with other registers. 898 */ 899 notify = ops->get_vq_notification(vdpa, index); 900 if (notify.addr & (PAGE_SIZE - 1)) 901 return -EINVAL; 902 if (vma->vm_end - vma->vm_start != notify.size) 903 return -ENOTSUPP; 904 905 vma->vm_ops = &vhost_vdpa_vm_ops; 906 return 0; 907 } 908 #endif /* CONFIG_MMU */ 909 910 static const struct file_operations vhost_vdpa_fops = { 911 .owner = THIS_MODULE, 912 .open = vhost_vdpa_open, 913 .release = vhost_vdpa_release, 914 .write_iter = vhost_vdpa_chr_write_iter, 915 .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, 916 #ifdef CONFIG_MMU 917 .mmap = vhost_vdpa_mmap, 918 #endif /* CONFIG_MMU */ 919 .compat_ioctl = compat_ptr_ioctl, 920 }; 921 922 static void vhost_vdpa_release_dev(struct device *device) 923 { 924 struct vhost_vdpa *v = 925 container_of(device, struct vhost_vdpa, dev); 926 927 ida_simple_remove(&vhost_vdpa_ida, v->minor); 928 kfree(v->vqs); 929 kfree(v); 930 } 931 932 static int vhost_vdpa_probe(struct vdpa_device *vdpa) 933 { 934 const struct vdpa_config_ops *ops = vdpa->config; 935 struct vhost_vdpa *v; 936 int minor; 937 int r; 938 939 /* Currently, we only accept the network devices. */ 940 if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) 941 return -ENOTSUPP; 942 943 v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 944 if (!v) 945 return -ENOMEM; 946 947 minor = ida_simple_get(&vhost_vdpa_ida, 0, 948 VHOST_VDPA_DEV_MAX, GFP_KERNEL); 949 if (minor < 0) { 950 kfree(v); 951 return minor; 952 } 953 954 atomic_set(&v->opened, 0); 955 v->minor = minor; 956 v->vdpa = vdpa; 957 v->nvqs = vdpa->nvqs; 958 v->virtio_id = ops->get_device_id(vdpa); 959 960 device_initialize(&v->dev); 961 v->dev.release = vhost_vdpa_release_dev; 962 v->dev.parent = &vdpa->dev; 963 v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); 964 v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), 965 GFP_KERNEL); 966 if (!v->vqs) { 967 r = -ENOMEM; 968 goto err; 969 } 970 971 r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); 972 if (r) 973 goto err; 974 975 cdev_init(&v->cdev, &vhost_vdpa_fops); 976 v->cdev.owner = THIS_MODULE; 977 978 r = cdev_device_add(&v->cdev, &v->dev); 979 if (r) 980 goto err; 981 982 init_completion(&v->completion); 983 vdpa_set_drvdata(vdpa, v); 984 985 return 0; 986 987 err: 988 put_device(&v->dev); 989 return r; 990 } 991 992 static void vhost_vdpa_remove(struct vdpa_device *vdpa) 993 { 994 struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); 995 int opened; 996 997 cdev_device_del(&v->cdev, &v->dev); 998 999 do { 1000 opened = atomic_cmpxchg(&v->opened, 0, 1); 1001 if (!opened) 1002 break; 1003 wait_for_completion(&v->completion); 1004 } while (1); 1005 1006 put_device(&v->dev); 1007 } 1008 1009 static struct vdpa_driver vhost_vdpa_driver = { 1010 .driver = { 1011 .name = "vhost_vdpa", 1012 }, 1013 .probe = vhost_vdpa_probe, 1014 .remove = vhost_vdpa_remove, 1015 }; 1016 1017 static int __init vhost_vdpa_init(void) 1018 { 1019 int r; 1020 1021 r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, 1022 "vhost-vdpa"); 1023 if (r) 1024 goto err_alloc_chrdev; 1025 1026 r = vdpa_register_driver(&vhost_vdpa_driver); 1027 if (r) 1028 goto err_vdpa_register_driver; 1029 1030 return 0; 1031 1032 err_vdpa_register_driver: 1033 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1034 err_alloc_chrdev: 1035 return r; 1036 } 1037 module_init(vhost_vdpa_init); 1038 1039 static void __exit vhost_vdpa_exit(void) 1040 { 1041 vdpa_unregister_driver(&vhost_vdpa_driver); 1042 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1043 } 1044 module_exit(vhost_vdpa_exit); 1045 1046 MODULE_VERSION("0.0.1"); 1047 MODULE_LICENSE("GPL v2"); 1048 MODULE_AUTHOR("Intel Corporation"); 1049 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); 1050