1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018-2020 Intel Corporation. 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Author: Tiwei Bie <tiwei.bie@intel.com> 7 * Jason Wang <jasowang@redhat.com> 8 * 9 * Thanks Michael S. Tsirkin for the valuable comments and 10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all 11 * their supports. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/cdev.h> 17 #include <linux/device.h> 18 #include <linux/mm.h> 19 #include <linux/iommu.h> 20 #include <linux/uuid.h> 21 #include <linux/vdpa.h> 22 #include <linux/nospec.h> 23 #include <linux/vhost.h> 24 #include <linux/virtio_net.h> 25 26 #include "vhost.h" 27 28 enum { 29 VHOST_VDPA_BACKEND_FEATURES = 30 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | 31 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), 32 }; 33 34 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 35 36 struct vhost_vdpa { 37 struct vhost_dev vdev; 38 struct iommu_domain *domain; 39 struct vhost_virtqueue *vqs; 40 struct completion completion; 41 struct vdpa_device *vdpa; 42 struct device dev; 43 struct cdev cdev; 44 atomic_t opened; 45 int nvqs; 46 int virtio_id; 47 int minor; 48 struct eventfd_ctx *config_ctx; 49 int in_batch; 50 struct vdpa_iova_range range; 51 }; 52 53 static DEFINE_IDA(vhost_vdpa_ida); 54 55 static dev_t vhost_vdpa_major; 56 57 static void handle_vq_kick(struct vhost_work *work) 58 { 59 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 60 poll.work); 61 struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); 62 const struct vdpa_config_ops *ops = v->vdpa->config; 63 64 ops->kick_vq(v->vdpa, vq - v->vqs); 65 } 66 67 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) 68 { 69 struct vhost_virtqueue *vq = private; 70 struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; 71 72 if (call_ctx) 73 eventfd_signal(call_ctx, 1); 74 75 return IRQ_HANDLED; 76 } 77 78 static irqreturn_t vhost_vdpa_config_cb(void *private) 79 { 80 struct vhost_vdpa *v = private; 81 struct eventfd_ctx *config_ctx = v->config_ctx; 82 83 if (config_ctx) 84 eventfd_signal(config_ctx, 1); 85 86 return IRQ_HANDLED; 87 } 88 89 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) 90 { 91 struct vhost_virtqueue *vq = &v->vqs[qid]; 92 const struct vdpa_config_ops *ops = v->vdpa->config; 93 struct vdpa_device *vdpa = v->vdpa; 94 int ret, irq; 95 96 if (!ops->get_vq_irq) 97 return; 98 99 irq = ops->get_vq_irq(vdpa, qid); 100 irq_bypass_unregister_producer(&vq->call_ctx.producer); 101 if (!vq->call_ctx.ctx || irq < 0) 102 return; 103 104 vq->call_ctx.producer.token = vq->call_ctx.ctx; 105 vq->call_ctx.producer.irq = irq; 106 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 107 if (unlikely(ret)) 108 dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n", 109 qid, vq->call_ctx.producer.token, ret); 110 } 111 112 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) 113 { 114 struct vhost_virtqueue *vq = &v->vqs[qid]; 115 116 irq_bypass_unregister_producer(&vq->call_ctx.producer); 117 } 118 119 static void vhost_vdpa_reset(struct vhost_vdpa *v) 120 { 121 struct vdpa_device *vdpa = v->vdpa; 122 123 vdpa_reset(vdpa); 124 v->in_batch = 0; 125 } 126 127 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 128 { 129 struct vdpa_device *vdpa = v->vdpa; 130 const struct vdpa_config_ops *ops = vdpa->config; 131 u32 device_id; 132 133 device_id = ops->get_device_id(vdpa); 134 135 if (copy_to_user(argp, &device_id, sizeof(device_id))) 136 return -EFAULT; 137 138 return 0; 139 } 140 141 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) 142 { 143 struct vdpa_device *vdpa = v->vdpa; 144 const struct vdpa_config_ops *ops = vdpa->config; 145 u8 status; 146 147 status = ops->get_status(vdpa); 148 149 if (copy_to_user(statusp, &status, sizeof(status))) 150 return -EFAULT; 151 152 return 0; 153 } 154 155 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) 156 { 157 struct vdpa_device *vdpa = v->vdpa; 158 const struct vdpa_config_ops *ops = vdpa->config; 159 u8 status, status_old; 160 int nvqs = v->nvqs; 161 u16 i; 162 163 if (copy_from_user(&status, statusp, sizeof(status))) 164 return -EFAULT; 165 166 status_old = ops->get_status(vdpa); 167 168 /* 169 * Userspace shouldn't remove status bits unless reset the 170 * status to 0. 171 */ 172 if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) 173 return -EINVAL; 174 175 ops->set_status(vdpa, status); 176 177 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) 178 for (i = 0; i < nvqs; i++) 179 vhost_vdpa_setup_vq_irq(v, i); 180 181 if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) 182 for (i = 0; i < nvqs; i++) 183 vhost_vdpa_unsetup_vq_irq(v, i); 184 185 return 0; 186 } 187 188 static int vhost_vdpa_config_validate(struct vhost_vdpa *v, 189 struct vhost_vdpa_config *c) 190 { 191 long size = 0; 192 193 switch (v->virtio_id) { 194 case VIRTIO_ID_NET: 195 size = sizeof(struct virtio_net_config); 196 break; 197 } 198 199 if (c->len == 0) 200 return -EINVAL; 201 202 if (c->len > size - c->off) 203 return -E2BIG; 204 205 return 0; 206 } 207 208 static long vhost_vdpa_get_config(struct vhost_vdpa *v, 209 struct vhost_vdpa_config __user *c) 210 { 211 struct vdpa_device *vdpa = v->vdpa; 212 struct vhost_vdpa_config config; 213 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 214 u8 *buf; 215 216 if (copy_from_user(&config, c, size)) 217 return -EFAULT; 218 if (vhost_vdpa_config_validate(v, &config)) 219 return -EINVAL; 220 buf = kvzalloc(config.len, GFP_KERNEL); 221 if (!buf) 222 return -ENOMEM; 223 224 vdpa_get_config(vdpa, config.off, buf, config.len); 225 226 if (copy_to_user(c->buf, buf, config.len)) { 227 kvfree(buf); 228 return -EFAULT; 229 } 230 231 kvfree(buf); 232 return 0; 233 } 234 235 static long vhost_vdpa_set_config(struct vhost_vdpa *v, 236 struct vhost_vdpa_config __user *c) 237 { 238 struct vdpa_device *vdpa = v->vdpa; 239 const struct vdpa_config_ops *ops = vdpa->config; 240 struct vhost_vdpa_config config; 241 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 242 u8 *buf; 243 244 if (copy_from_user(&config, c, size)) 245 return -EFAULT; 246 if (vhost_vdpa_config_validate(v, &config)) 247 return -EINVAL; 248 buf = kvzalloc(config.len, GFP_KERNEL); 249 if (!buf) 250 return -ENOMEM; 251 252 if (copy_from_user(buf, c->buf, config.len)) { 253 kvfree(buf); 254 return -EFAULT; 255 } 256 257 ops->set_config(vdpa, config.off, buf, config.len); 258 259 kvfree(buf); 260 return 0; 261 } 262 263 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) 264 { 265 struct vdpa_device *vdpa = v->vdpa; 266 const struct vdpa_config_ops *ops = vdpa->config; 267 u64 features; 268 269 features = ops->get_features(vdpa); 270 271 if (copy_to_user(featurep, &features, sizeof(features))) 272 return -EFAULT; 273 274 return 0; 275 } 276 277 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) 278 { 279 struct vdpa_device *vdpa = v->vdpa; 280 const struct vdpa_config_ops *ops = vdpa->config; 281 u64 features; 282 283 /* 284 * It's not allowed to change the features after they have 285 * been negotiated. 286 */ 287 if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) 288 return -EBUSY; 289 290 if (copy_from_user(&features, featurep, sizeof(features))) 291 return -EFAULT; 292 293 if (vdpa_set_features(vdpa, features)) 294 return -EINVAL; 295 296 return 0; 297 } 298 299 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) 300 { 301 struct vdpa_device *vdpa = v->vdpa; 302 const struct vdpa_config_ops *ops = vdpa->config; 303 u16 num; 304 305 num = ops->get_vq_num_max(vdpa); 306 307 if (copy_to_user(argp, &num, sizeof(num))) 308 return -EFAULT; 309 310 return 0; 311 } 312 313 static void vhost_vdpa_config_put(struct vhost_vdpa *v) 314 { 315 if (v->config_ctx) 316 eventfd_ctx_put(v->config_ctx); 317 } 318 319 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) 320 { 321 struct vdpa_callback cb; 322 int fd; 323 struct eventfd_ctx *ctx; 324 325 cb.callback = vhost_vdpa_config_cb; 326 cb.private = v->vdpa; 327 if (copy_from_user(&fd, argp, sizeof(fd))) 328 return -EFAULT; 329 330 ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 331 swap(ctx, v->config_ctx); 332 333 if (!IS_ERR_OR_NULL(ctx)) 334 eventfd_ctx_put(ctx); 335 336 if (IS_ERR(v->config_ctx)) 337 return PTR_ERR(v->config_ctx); 338 339 v->vdpa->config->set_config_cb(v->vdpa, &cb); 340 341 return 0; 342 } 343 344 static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) 345 { 346 struct vhost_vdpa_iova_range range = { 347 .first = v->range.first, 348 .last = v->range.last, 349 }; 350 351 return copy_to_user(argp, &range, sizeof(range)); 352 } 353 354 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 355 void __user *argp) 356 { 357 struct vdpa_device *vdpa = v->vdpa; 358 const struct vdpa_config_ops *ops = vdpa->config; 359 struct vdpa_vq_state vq_state; 360 struct vdpa_callback cb; 361 struct vhost_virtqueue *vq; 362 struct vhost_vring_state s; 363 u32 idx; 364 long r; 365 366 r = get_user(idx, (u32 __user *)argp); 367 if (r < 0) 368 return r; 369 370 if (idx >= v->nvqs) 371 return -ENOBUFS; 372 373 idx = array_index_nospec(idx, v->nvqs); 374 vq = &v->vqs[idx]; 375 376 switch (cmd) { 377 case VHOST_VDPA_SET_VRING_ENABLE: 378 if (copy_from_user(&s, argp, sizeof(s))) 379 return -EFAULT; 380 ops->set_vq_ready(vdpa, idx, s.num); 381 return 0; 382 case VHOST_GET_VRING_BASE: 383 r = ops->get_vq_state(v->vdpa, idx, &vq_state); 384 if (r) 385 return r; 386 387 vq->last_avail_idx = vq_state.avail_index; 388 break; 389 } 390 391 r = vhost_vring_ioctl(&v->vdev, cmd, argp); 392 if (r) 393 return r; 394 395 switch (cmd) { 396 case VHOST_SET_VRING_ADDR: 397 if (ops->set_vq_address(vdpa, idx, 398 (u64)(uintptr_t)vq->desc, 399 (u64)(uintptr_t)vq->avail, 400 (u64)(uintptr_t)vq->used)) 401 r = -EINVAL; 402 break; 403 404 case VHOST_SET_VRING_BASE: 405 vq_state.avail_index = vq->last_avail_idx; 406 if (ops->set_vq_state(vdpa, idx, &vq_state)) 407 r = -EINVAL; 408 break; 409 410 case VHOST_SET_VRING_CALL: 411 if (vq->call_ctx.ctx) { 412 cb.callback = vhost_vdpa_virtqueue_cb; 413 cb.private = vq; 414 } else { 415 cb.callback = NULL; 416 cb.private = NULL; 417 } 418 ops->set_vq_cb(vdpa, idx, &cb); 419 vhost_vdpa_setup_vq_irq(v, idx); 420 break; 421 422 case VHOST_SET_VRING_NUM: 423 ops->set_vq_num(vdpa, idx, vq->num); 424 break; 425 } 426 427 return r; 428 } 429 430 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 431 unsigned int cmd, unsigned long arg) 432 { 433 struct vhost_vdpa *v = filep->private_data; 434 struct vhost_dev *d = &v->vdev; 435 void __user *argp = (void __user *)arg; 436 u64 __user *featurep = argp; 437 u64 features; 438 long r = 0; 439 440 if (cmd == VHOST_SET_BACKEND_FEATURES) { 441 if (copy_from_user(&features, featurep, sizeof(features))) 442 return -EFAULT; 443 if (features & ~VHOST_VDPA_BACKEND_FEATURES) 444 return -EOPNOTSUPP; 445 vhost_set_backend_features(&v->vdev, features); 446 return 0; 447 } 448 449 mutex_lock(&d->mutex); 450 451 switch (cmd) { 452 case VHOST_VDPA_GET_DEVICE_ID: 453 r = vhost_vdpa_get_device_id(v, argp); 454 break; 455 case VHOST_VDPA_GET_STATUS: 456 r = vhost_vdpa_get_status(v, argp); 457 break; 458 case VHOST_VDPA_SET_STATUS: 459 r = vhost_vdpa_set_status(v, argp); 460 break; 461 case VHOST_VDPA_GET_CONFIG: 462 r = vhost_vdpa_get_config(v, argp); 463 break; 464 case VHOST_VDPA_SET_CONFIG: 465 r = vhost_vdpa_set_config(v, argp); 466 break; 467 case VHOST_GET_FEATURES: 468 r = vhost_vdpa_get_features(v, argp); 469 break; 470 case VHOST_SET_FEATURES: 471 r = vhost_vdpa_set_features(v, argp); 472 break; 473 case VHOST_VDPA_GET_VRING_NUM: 474 r = vhost_vdpa_get_vring_num(v, argp); 475 break; 476 case VHOST_SET_LOG_BASE: 477 case VHOST_SET_LOG_FD: 478 r = -ENOIOCTLCMD; 479 break; 480 case VHOST_VDPA_SET_CONFIG_CALL: 481 r = vhost_vdpa_set_config_call(v, argp); 482 break; 483 case VHOST_GET_BACKEND_FEATURES: 484 features = VHOST_VDPA_BACKEND_FEATURES; 485 if (copy_to_user(featurep, &features, sizeof(features))) 486 r = -EFAULT; 487 break; 488 case VHOST_VDPA_GET_IOVA_RANGE: 489 r = vhost_vdpa_get_iova_range(v, argp); 490 break; 491 default: 492 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 493 if (r == -ENOIOCTLCMD) 494 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 495 break; 496 } 497 498 mutex_unlock(&d->mutex); 499 return r; 500 } 501 502 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) 503 { 504 struct vhost_dev *dev = &v->vdev; 505 struct vhost_iotlb *iotlb = dev->iotlb; 506 struct vhost_iotlb_map *map; 507 struct page *page; 508 unsigned long pfn, pinned; 509 510 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 511 pinned = map->size >> PAGE_SHIFT; 512 for (pfn = map->addr >> PAGE_SHIFT; 513 pinned > 0; pfn++, pinned--) { 514 page = pfn_to_page(pfn); 515 if (map->perm & VHOST_ACCESS_WO) 516 set_page_dirty_lock(page); 517 unpin_user_page(page); 518 } 519 atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); 520 vhost_iotlb_map_free(iotlb, map); 521 } 522 } 523 524 static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) 525 { 526 struct vhost_dev *dev = &v->vdev; 527 528 vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); 529 kfree(dev->iotlb); 530 dev->iotlb = NULL; 531 } 532 533 static int perm_to_iommu_flags(u32 perm) 534 { 535 int flags = 0; 536 537 switch (perm) { 538 case VHOST_ACCESS_WO: 539 flags |= IOMMU_WRITE; 540 break; 541 case VHOST_ACCESS_RO: 542 flags |= IOMMU_READ; 543 break; 544 case VHOST_ACCESS_RW: 545 flags |= (IOMMU_WRITE | IOMMU_READ); 546 break; 547 default: 548 WARN(1, "invalidate vhost IOTLB permission\n"); 549 break; 550 } 551 552 return flags | IOMMU_CACHE; 553 } 554 555 static int vhost_vdpa_map(struct vhost_vdpa *v, 556 u64 iova, u64 size, u64 pa, u32 perm) 557 { 558 struct vhost_dev *dev = &v->vdev; 559 struct vdpa_device *vdpa = v->vdpa; 560 const struct vdpa_config_ops *ops = vdpa->config; 561 int r = 0; 562 563 r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, 564 pa, perm); 565 if (r) 566 return r; 567 568 if (ops->dma_map) { 569 r = ops->dma_map(vdpa, iova, size, pa, perm); 570 } else if (ops->set_map) { 571 if (!v->in_batch) 572 r = ops->set_map(vdpa, dev->iotlb); 573 } else { 574 r = iommu_map(v->domain, iova, pa, size, 575 perm_to_iommu_flags(perm)); 576 } 577 578 if (r) 579 vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); 580 581 return r; 582 } 583 584 static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) 585 { 586 struct vhost_dev *dev = &v->vdev; 587 struct vdpa_device *vdpa = v->vdpa; 588 const struct vdpa_config_ops *ops = vdpa->config; 589 590 vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); 591 592 if (ops->dma_map) { 593 ops->dma_unmap(vdpa, iova, size); 594 } else if (ops->set_map) { 595 if (!v->in_batch) 596 ops->set_map(vdpa, dev->iotlb); 597 } else { 598 iommu_unmap(v->domain, iova, size); 599 } 600 } 601 602 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 603 struct vhost_iotlb_msg *msg) 604 { 605 struct vhost_dev *dev = &v->vdev; 606 struct vhost_iotlb *iotlb = dev->iotlb; 607 struct page **page_list; 608 unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 609 unsigned int gup_flags = FOLL_LONGTERM; 610 unsigned long npages, cur_base, map_pfn, last_pfn = 0; 611 unsigned long locked, lock_limit, pinned, i; 612 u64 iova = msg->iova; 613 int ret = 0; 614 615 if (msg->iova < v->range.first || 616 msg->iova + msg->size - 1 > v->range.last) 617 return -EINVAL; 618 619 if (vhost_iotlb_itree_first(iotlb, msg->iova, 620 msg->iova + msg->size - 1)) 621 return -EEXIST; 622 623 page_list = (struct page **) __get_free_page(GFP_KERNEL); 624 if (!page_list) 625 return -ENOMEM; 626 627 if (msg->perm & VHOST_ACCESS_WO) 628 gup_flags |= FOLL_WRITE; 629 630 npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; 631 if (!npages) 632 return -EINVAL; 633 634 mmap_read_lock(dev->mm); 635 636 locked = atomic64_add_return(npages, &dev->mm->pinned_vm); 637 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 638 639 if (locked > lock_limit) { 640 ret = -ENOMEM; 641 goto out; 642 } 643 644 cur_base = msg->uaddr & PAGE_MASK; 645 iova &= PAGE_MASK; 646 647 while (npages) { 648 pinned = min_t(unsigned long, npages, list_size); 649 ret = pin_user_pages(cur_base, pinned, 650 gup_flags, page_list, NULL); 651 if (ret != pinned) 652 goto out; 653 654 if (!last_pfn) 655 map_pfn = page_to_pfn(page_list[0]); 656 657 for (i = 0; i < ret; i++) { 658 unsigned long this_pfn = page_to_pfn(page_list[i]); 659 u64 csize; 660 661 if (last_pfn && (this_pfn != last_pfn + 1)) { 662 /* Pin a contiguous chunk of memory */ 663 csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; 664 if (vhost_vdpa_map(v, iova, csize, 665 map_pfn << PAGE_SHIFT, 666 msg->perm)) 667 goto out; 668 map_pfn = this_pfn; 669 iova += csize; 670 } 671 672 last_pfn = this_pfn; 673 } 674 675 cur_base += ret << PAGE_SHIFT; 676 npages -= ret; 677 } 678 679 /* Pin the rest chunk */ 680 ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, 681 map_pfn << PAGE_SHIFT, msg->perm); 682 out: 683 if (ret) { 684 vhost_vdpa_unmap(v, msg->iova, msg->size); 685 atomic64_sub(npages, &dev->mm->pinned_vm); 686 } 687 mmap_read_unlock(dev->mm); 688 free_page((unsigned long)page_list); 689 return ret; 690 } 691 692 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, 693 struct vhost_iotlb_msg *msg) 694 { 695 struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 696 struct vdpa_device *vdpa = v->vdpa; 697 const struct vdpa_config_ops *ops = vdpa->config; 698 int r = 0; 699 700 r = vhost_dev_check_owner(dev); 701 if (r) 702 return r; 703 704 switch (msg->type) { 705 case VHOST_IOTLB_UPDATE: 706 r = vhost_vdpa_process_iotlb_update(v, msg); 707 break; 708 case VHOST_IOTLB_INVALIDATE: 709 vhost_vdpa_unmap(v, msg->iova, msg->size); 710 break; 711 case VHOST_IOTLB_BATCH_BEGIN: 712 v->in_batch = true; 713 break; 714 case VHOST_IOTLB_BATCH_END: 715 if (v->in_batch && ops->set_map) 716 ops->set_map(vdpa, dev->iotlb); 717 v->in_batch = false; 718 break; 719 default: 720 r = -EINVAL; 721 break; 722 } 723 724 return r; 725 } 726 727 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, 728 struct iov_iter *from) 729 { 730 struct file *file = iocb->ki_filp; 731 struct vhost_vdpa *v = file->private_data; 732 struct vhost_dev *dev = &v->vdev; 733 734 return vhost_chr_write_iter(dev, from); 735 } 736 737 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) 738 { 739 struct vdpa_device *vdpa = v->vdpa; 740 const struct vdpa_config_ops *ops = vdpa->config; 741 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 742 struct bus_type *bus; 743 int ret; 744 745 /* Device want to do DMA by itself */ 746 if (ops->set_map || ops->dma_map) 747 return 0; 748 749 bus = dma_dev->bus; 750 if (!bus) 751 return -EFAULT; 752 753 if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) 754 return -ENOTSUPP; 755 756 v->domain = iommu_domain_alloc(bus); 757 if (!v->domain) 758 return -EIO; 759 760 ret = iommu_attach_device(v->domain, dma_dev); 761 if (ret) 762 goto err_attach; 763 764 return 0; 765 766 err_attach: 767 iommu_domain_free(v->domain); 768 return ret; 769 } 770 771 static void vhost_vdpa_free_domain(struct vhost_vdpa *v) 772 { 773 struct vdpa_device *vdpa = v->vdpa; 774 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 775 776 if (v->domain) { 777 iommu_detach_device(v->domain, dma_dev); 778 iommu_domain_free(v->domain); 779 } 780 781 v->domain = NULL; 782 } 783 784 static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) 785 { 786 struct vdpa_iova_range *range = &v->range; 787 struct iommu_domain_geometry geo; 788 struct vdpa_device *vdpa = v->vdpa; 789 const struct vdpa_config_ops *ops = vdpa->config; 790 791 if (ops->get_iova_range) { 792 *range = ops->get_iova_range(vdpa); 793 } else if (v->domain && 794 !iommu_domain_get_attr(v->domain, 795 DOMAIN_ATTR_GEOMETRY, &geo) && 796 geo.force_aperture) { 797 range->first = geo.aperture_start; 798 range->last = geo.aperture_end; 799 } else { 800 range->first = 0; 801 range->last = ULLONG_MAX; 802 } 803 } 804 805 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 806 { 807 struct vhost_vdpa *v; 808 struct vhost_dev *dev; 809 struct vhost_virtqueue **vqs; 810 int nvqs, i, r, opened; 811 812 v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); 813 814 opened = atomic_cmpxchg(&v->opened, 0, 1); 815 if (opened) 816 return -EBUSY; 817 818 nvqs = v->nvqs; 819 vhost_vdpa_reset(v); 820 821 vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); 822 if (!vqs) { 823 r = -ENOMEM; 824 goto err; 825 } 826 827 dev = &v->vdev; 828 for (i = 0; i < nvqs; i++) { 829 vqs[i] = &v->vqs[i]; 830 vqs[i]->handle_kick = handle_vq_kick; 831 } 832 vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, 833 vhost_vdpa_process_iotlb_msg); 834 835 dev->iotlb = vhost_iotlb_alloc(0, 0); 836 if (!dev->iotlb) { 837 r = -ENOMEM; 838 goto err_init_iotlb; 839 } 840 841 r = vhost_vdpa_alloc_domain(v); 842 if (r) 843 goto err_init_iotlb; 844 845 vhost_vdpa_set_iova_range(v); 846 847 filep->private_data = v; 848 849 return 0; 850 851 err_init_iotlb: 852 vhost_dev_cleanup(&v->vdev); 853 kfree(vqs); 854 err: 855 atomic_dec(&v->opened); 856 return r; 857 } 858 859 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) 860 { 861 struct vhost_virtqueue *vq; 862 int i; 863 864 for (i = 0; i < v->nvqs; i++) { 865 vq = &v->vqs[i]; 866 if (vq->call_ctx.producer.irq) 867 irq_bypass_unregister_producer(&vq->call_ctx.producer); 868 } 869 } 870 871 static int vhost_vdpa_release(struct inode *inode, struct file *filep) 872 { 873 struct vhost_vdpa *v = filep->private_data; 874 struct vhost_dev *d = &v->vdev; 875 876 mutex_lock(&d->mutex); 877 filep->private_data = NULL; 878 vhost_vdpa_reset(v); 879 vhost_dev_stop(&v->vdev); 880 vhost_vdpa_iotlb_free(v); 881 vhost_vdpa_free_domain(v); 882 vhost_vdpa_config_put(v); 883 vhost_vdpa_clean_irq(v); 884 vhost_dev_cleanup(&v->vdev); 885 kfree(v->vdev.vqs); 886 mutex_unlock(&d->mutex); 887 888 atomic_dec(&v->opened); 889 complete(&v->completion); 890 891 return 0; 892 } 893 894 #ifdef CONFIG_MMU 895 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) 896 { 897 struct vhost_vdpa *v = vmf->vma->vm_file->private_data; 898 struct vdpa_device *vdpa = v->vdpa; 899 const struct vdpa_config_ops *ops = vdpa->config; 900 struct vdpa_notification_area notify; 901 struct vm_area_struct *vma = vmf->vma; 902 u16 index = vma->vm_pgoff; 903 904 notify = ops->get_vq_notification(vdpa, index); 905 906 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 907 if (remap_pfn_range(vma, vmf->address & PAGE_MASK, 908 notify.addr >> PAGE_SHIFT, PAGE_SIZE, 909 vma->vm_page_prot)) 910 return VM_FAULT_SIGBUS; 911 912 return VM_FAULT_NOPAGE; 913 } 914 915 static const struct vm_operations_struct vhost_vdpa_vm_ops = { 916 .fault = vhost_vdpa_fault, 917 }; 918 919 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) 920 { 921 struct vhost_vdpa *v = vma->vm_file->private_data; 922 struct vdpa_device *vdpa = v->vdpa; 923 const struct vdpa_config_ops *ops = vdpa->config; 924 struct vdpa_notification_area notify; 925 unsigned long index = vma->vm_pgoff; 926 927 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 928 return -EINVAL; 929 if ((vma->vm_flags & VM_SHARED) == 0) 930 return -EINVAL; 931 if (vma->vm_flags & VM_READ) 932 return -EINVAL; 933 if (index > 65535) 934 return -EINVAL; 935 if (!ops->get_vq_notification) 936 return -ENOTSUPP; 937 938 /* To be safe and easily modelled by userspace, We only 939 * support the doorbell which sits on the page boundary and 940 * does not share the page with other registers. 941 */ 942 notify = ops->get_vq_notification(vdpa, index); 943 if (notify.addr & (PAGE_SIZE - 1)) 944 return -EINVAL; 945 if (vma->vm_end - vma->vm_start != notify.size) 946 return -ENOTSUPP; 947 948 vma->vm_ops = &vhost_vdpa_vm_ops; 949 return 0; 950 } 951 #endif /* CONFIG_MMU */ 952 953 static const struct file_operations vhost_vdpa_fops = { 954 .owner = THIS_MODULE, 955 .open = vhost_vdpa_open, 956 .release = vhost_vdpa_release, 957 .write_iter = vhost_vdpa_chr_write_iter, 958 .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, 959 #ifdef CONFIG_MMU 960 .mmap = vhost_vdpa_mmap, 961 #endif /* CONFIG_MMU */ 962 .compat_ioctl = compat_ptr_ioctl, 963 }; 964 965 static void vhost_vdpa_release_dev(struct device *device) 966 { 967 struct vhost_vdpa *v = 968 container_of(device, struct vhost_vdpa, dev); 969 970 ida_simple_remove(&vhost_vdpa_ida, v->minor); 971 kfree(v->vqs); 972 kfree(v); 973 } 974 975 static int vhost_vdpa_probe(struct vdpa_device *vdpa) 976 { 977 const struct vdpa_config_ops *ops = vdpa->config; 978 struct vhost_vdpa *v; 979 int minor; 980 int r; 981 982 /* Currently, we only accept the network devices. */ 983 if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) 984 return -ENOTSUPP; 985 986 v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 987 if (!v) 988 return -ENOMEM; 989 990 minor = ida_simple_get(&vhost_vdpa_ida, 0, 991 VHOST_VDPA_DEV_MAX, GFP_KERNEL); 992 if (minor < 0) { 993 kfree(v); 994 return minor; 995 } 996 997 atomic_set(&v->opened, 0); 998 v->minor = minor; 999 v->vdpa = vdpa; 1000 v->nvqs = vdpa->nvqs; 1001 v->virtio_id = ops->get_device_id(vdpa); 1002 1003 device_initialize(&v->dev); 1004 v->dev.release = vhost_vdpa_release_dev; 1005 v->dev.parent = &vdpa->dev; 1006 v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); 1007 v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), 1008 GFP_KERNEL); 1009 if (!v->vqs) { 1010 r = -ENOMEM; 1011 goto err; 1012 } 1013 1014 r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); 1015 if (r) 1016 goto err; 1017 1018 cdev_init(&v->cdev, &vhost_vdpa_fops); 1019 v->cdev.owner = THIS_MODULE; 1020 1021 r = cdev_device_add(&v->cdev, &v->dev); 1022 if (r) 1023 goto err; 1024 1025 init_completion(&v->completion); 1026 vdpa_set_drvdata(vdpa, v); 1027 1028 return 0; 1029 1030 err: 1031 put_device(&v->dev); 1032 return r; 1033 } 1034 1035 static void vhost_vdpa_remove(struct vdpa_device *vdpa) 1036 { 1037 struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); 1038 int opened; 1039 1040 cdev_device_del(&v->cdev, &v->dev); 1041 1042 do { 1043 opened = atomic_cmpxchg(&v->opened, 0, 1); 1044 if (!opened) 1045 break; 1046 wait_for_completion(&v->completion); 1047 } while (1); 1048 1049 put_device(&v->dev); 1050 } 1051 1052 static struct vdpa_driver vhost_vdpa_driver = { 1053 .driver = { 1054 .name = "vhost_vdpa", 1055 }, 1056 .probe = vhost_vdpa_probe, 1057 .remove = vhost_vdpa_remove, 1058 }; 1059 1060 static int __init vhost_vdpa_init(void) 1061 { 1062 int r; 1063 1064 r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, 1065 "vhost-vdpa"); 1066 if (r) 1067 goto err_alloc_chrdev; 1068 1069 r = vdpa_register_driver(&vhost_vdpa_driver); 1070 if (r) 1071 goto err_vdpa_register_driver; 1072 1073 return 0; 1074 1075 err_vdpa_register_driver: 1076 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1077 err_alloc_chrdev: 1078 return r; 1079 } 1080 module_init(vhost_vdpa_init); 1081 1082 static void __exit vhost_vdpa_exit(void) 1083 { 1084 vdpa_unregister_driver(&vhost_vdpa_driver); 1085 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1086 } 1087 module_exit(vhost_vdpa_exit); 1088 1089 MODULE_VERSION("0.0.1"); 1090 MODULE_LICENSE("GPL v2"); 1091 MODULE_AUTHOR("Intel Corporation"); 1092 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); 1093