1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018-2020 Intel Corporation. 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Author: Tiwei Bie <tiwei.bie@intel.com> 7 * Jason Wang <jasowang@redhat.com> 8 * 9 * Thanks Michael S. Tsirkin for the valuable comments and 10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all 11 * their supports. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/cdev.h> 17 #include <linux/device.h> 18 #include <linux/mm.h> 19 #include <linux/iommu.h> 20 #include <linux/uuid.h> 21 #include <linux/vdpa.h> 22 #include <linux/nospec.h> 23 #include <linux/vhost.h> 24 #include <linux/virtio_net.h> 25 #include <linux/kernel.h> 26 27 #include "vhost.h" 28 29 enum { 30 VHOST_VDPA_BACKEND_FEATURES = 31 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | 32 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), 33 }; 34 35 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 36 37 struct vhost_vdpa { 38 struct vhost_dev vdev; 39 struct iommu_domain *domain; 40 struct vhost_virtqueue *vqs; 41 struct completion completion; 42 struct vdpa_device *vdpa; 43 struct device dev; 44 struct cdev cdev; 45 atomic_t opened; 46 int nvqs; 47 int virtio_id; 48 int minor; 49 struct eventfd_ctx *config_ctx; 50 int in_batch; 51 }; 52 53 static DEFINE_IDA(vhost_vdpa_ida); 54 55 static dev_t vhost_vdpa_major; 56 57 static void handle_vq_kick(struct vhost_work *work) 58 { 59 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 60 poll.work); 61 struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); 62 const struct vdpa_config_ops *ops = v->vdpa->config; 63 64 ops->kick_vq(v->vdpa, vq - v->vqs); 65 } 66 67 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) 68 { 69 struct vhost_virtqueue *vq = private; 70 struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; 71 72 if (call_ctx) 73 eventfd_signal(call_ctx, 1); 74 75 return IRQ_HANDLED; 76 } 77 78 static irqreturn_t vhost_vdpa_config_cb(void *private) 79 { 80 struct vhost_vdpa *v = private; 81 struct eventfd_ctx *config_ctx = v->config_ctx; 82 83 if (config_ctx) 84 eventfd_signal(config_ctx, 1); 85 86 return IRQ_HANDLED; 87 } 88 89 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) 90 { 91 struct vhost_virtqueue *vq = &v->vqs[qid]; 92 const struct vdpa_config_ops *ops = v->vdpa->config; 93 struct vdpa_device *vdpa = v->vdpa; 94 int ret, irq; 95 96 if (!ops->get_vq_irq) 97 return; 98 99 irq = ops->get_vq_irq(vdpa, qid); 100 spin_lock(&vq->call_ctx.ctx_lock); 101 irq_bypass_unregister_producer(&vq->call_ctx.producer); 102 if (!vq->call_ctx.ctx || irq < 0) { 103 spin_unlock(&vq->call_ctx.ctx_lock); 104 return; 105 } 106 107 vq->call_ctx.producer.token = vq->call_ctx.ctx; 108 vq->call_ctx.producer.irq = irq; 109 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 110 spin_unlock(&vq->call_ctx.ctx_lock); 111 } 112 113 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) 114 { 115 struct vhost_virtqueue *vq = &v->vqs[qid]; 116 117 spin_lock(&vq->call_ctx.ctx_lock); 118 irq_bypass_unregister_producer(&vq->call_ctx.producer); 119 spin_unlock(&vq->call_ctx.ctx_lock); 120 } 121 122 static void vhost_vdpa_reset(struct vhost_vdpa *v) 123 { 124 struct vdpa_device *vdpa = v->vdpa; 125 126 vdpa_reset(vdpa); 127 v->in_batch = 0; 128 } 129 130 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 131 { 132 struct vdpa_device *vdpa = v->vdpa; 133 const struct vdpa_config_ops *ops = vdpa->config; 134 u32 device_id; 135 136 device_id = ops->get_device_id(vdpa); 137 138 if (copy_to_user(argp, &device_id, sizeof(device_id))) 139 return -EFAULT; 140 141 return 0; 142 } 143 144 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) 145 { 146 struct vdpa_device *vdpa = v->vdpa; 147 const struct vdpa_config_ops *ops = vdpa->config; 148 u8 status; 149 150 status = ops->get_status(vdpa); 151 152 if (copy_to_user(statusp, &status, sizeof(status))) 153 return -EFAULT; 154 155 return 0; 156 } 157 158 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) 159 { 160 struct vdpa_device *vdpa = v->vdpa; 161 const struct vdpa_config_ops *ops = vdpa->config; 162 u8 status, status_old; 163 int nvqs = v->nvqs; 164 u16 i; 165 166 if (copy_from_user(&status, statusp, sizeof(status))) 167 return -EFAULT; 168 169 status_old = ops->get_status(vdpa); 170 171 /* 172 * Userspace shouldn't remove status bits unless reset the 173 * status to 0. 174 */ 175 if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) 176 return -EINVAL; 177 178 ops->set_status(vdpa, status); 179 180 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) 181 for (i = 0; i < nvqs; i++) 182 vhost_vdpa_setup_vq_irq(v, i); 183 184 if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) 185 for (i = 0; i < nvqs; i++) 186 vhost_vdpa_unsetup_vq_irq(v, i); 187 188 return 0; 189 } 190 191 static int vhost_vdpa_config_validate(struct vhost_vdpa *v, 192 struct vhost_vdpa_config *c) 193 { 194 long size = 0; 195 196 switch (v->virtio_id) { 197 case VIRTIO_ID_NET: 198 size = sizeof(struct virtio_net_config); 199 break; 200 } 201 202 if (c->len == 0) 203 return -EINVAL; 204 205 if (c->len > size - c->off) 206 return -E2BIG; 207 208 return 0; 209 } 210 211 static long vhost_vdpa_get_config(struct vhost_vdpa *v, 212 struct vhost_vdpa_config __user *c) 213 { 214 struct vdpa_device *vdpa = v->vdpa; 215 struct vhost_vdpa_config config; 216 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 217 u8 *buf; 218 219 if (copy_from_user(&config, c, size)) 220 return -EFAULT; 221 if (vhost_vdpa_config_validate(v, &config)) 222 return -EINVAL; 223 buf = kvzalloc(config.len, GFP_KERNEL); 224 if (!buf) 225 return -ENOMEM; 226 227 vdpa_get_config(vdpa, config.off, buf, config.len); 228 229 if (copy_to_user(c->buf, buf, config.len)) { 230 kvfree(buf); 231 return -EFAULT; 232 } 233 234 kvfree(buf); 235 return 0; 236 } 237 238 static long vhost_vdpa_set_config(struct vhost_vdpa *v, 239 struct vhost_vdpa_config __user *c) 240 { 241 struct vdpa_device *vdpa = v->vdpa; 242 const struct vdpa_config_ops *ops = vdpa->config; 243 struct vhost_vdpa_config config; 244 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 245 u8 *buf; 246 247 if (copy_from_user(&config, c, size)) 248 return -EFAULT; 249 if (vhost_vdpa_config_validate(v, &config)) 250 return -EINVAL; 251 buf = kvzalloc(config.len, GFP_KERNEL); 252 if (!buf) 253 return -ENOMEM; 254 255 if (copy_from_user(buf, c->buf, config.len)) { 256 kvfree(buf); 257 return -EFAULT; 258 } 259 260 ops->set_config(vdpa, config.off, buf, config.len); 261 262 kvfree(buf); 263 return 0; 264 } 265 266 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) 267 { 268 struct vdpa_device *vdpa = v->vdpa; 269 const struct vdpa_config_ops *ops = vdpa->config; 270 u64 features; 271 272 features = ops->get_features(vdpa); 273 274 if (copy_to_user(featurep, &features, sizeof(features))) 275 return -EFAULT; 276 277 return 0; 278 } 279 280 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) 281 { 282 struct vdpa_device *vdpa = v->vdpa; 283 const struct vdpa_config_ops *ops = vdpa->config; 284 u64 features; 285 286 /* 287 * It's not allowed to change the features after they have 288 * been negotiated. 289 */ 290 if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) 291 return -EBUSY; 292 293 if (copy_from_user(&features, featurep, sizeof(features))) 294 return -EFAULT; 295 296 if (vdpa_set_features(vdpa, features)) 297 return -EINVAL; 298 299 return 0; 300 } 301 302 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) 303 { 304 struct vdpa_device *vdpa = v->vdpa; 305 const struct vdpa_config_ops *ops = vdpa->config; 306 u16 num; 307 308 num = ops->get_vq_num_max(vdpa); 309 310 if (copy_to_user(argp, &num, sizeof(num))) 311 return -EFAULT; 312 313 return 0; 314 } 315 316 static void vhost_vdpa_config_put(struct vhost_vdpa *v) 317 { 318 if (v->config_ctx) 319 eventfd_ctx_put(v->config_ctx); 320 } 321 322 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) 323 { 324 struct vdpa_callback cb; 325 int fd; 326 struct eventfd_ctx *ctx; 327 328 cb.callback = vhost_vdpa_config_cb; 329 cb.private = v->vdpa; 330 if (copy_from_user(&fd, argp, sizeof(fd))) 331 return -EFAULT; 332 333 ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 334 swap(ctx, v->config_ctx); 335 336 if (!IS_ERR_OR_NULL(ctx)) 337 eventfd_ctx_put(ctx); 338 339 if (IS_ERR(v->config_ctx)) 340 return PTR_ERR(v->config_ctx); 341 342 v->vdpa->config->set_config_cb(v->vdpa, &cb); 343 344 return 0; 345 } 346 347 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 348 void __user *argp) 349 { 350 struct vdpa_device *vdpa = v->vdpa; 351 const struct vdpa_config_ops *ops = vdpa->config; 352 struct vdpa_vq_state vq_state; 353 struct vdpa_callback cb; 354 struct vhost_virtqueue *vq; 355 struct vhost_vring_state s; 356 u32 idx; 357 long r; 358 359 r = get_user(idx, (u32 __user *)argp); 360 if (r < 0) 361 return r; 362 363 if (idx >= v->nvqs) 364 return -ENOBUFS; 365 366 idx = array_index_nospec(idx, v->nvqs); 367 vq = &v->vqs[idx]; 368 369 switch (cmd) { 370 case VHOST_VDPA_SET_VRING_ENABLE: 371 if (copy_from_user(&s, argp, sizeof(s))) 372 return -EFAULT; 373 ops->set_vq_ready(vdpa, idx, s.num); 374 return 0; 375 case VHOST_GET_VRING_BASE: 376 r = ops->get_vq_state(v->vdpa, idx, &vq_state); 377 if (r) 378 return r; 379 380 vq->last_avail_idx = vq_state.avail_index; 381 break; 382 } 383 384 r = vhost_vring_ioctl(&v->vdev, cmd, argp); 385 if (r) 386 return r; 387 388 switch (cmd) { 389 case VHOST_SET_VRING_ADDR: 390 if (ops->set_vq_address(vdpa, idx, 391 (u64)(uintptr_t)vq->desc, 392 (u64)(uintptr_t)vq->avail, 393 (u64)(uintptr_t)vq->used)) 394 r = -EINVAL; 395 break; 396 397 case VHOST_SET_VRING_BASE: 398 vq_state.avail_index = vq->last_avail_idx; 399 if (ops->set_vq_state(vdpa, idx, &vq_state)) 400 r = -EINVAL; 401 break; 402 403 case VHOST_SET_VRING_CALL: 404 if (vq->call_ctx.ctx) { 405 cb.callback = vhost_vdpa_virtqueue_cb; 406 cb.private = vq; 407 } else { 408 cb.callback = NULL; 409 cb.private = NULL; 410 } 411 ops->set_vq_cb(vdpa, idx, &cb); 412 vhost_vdpa_setup_vq_irq(v, idx); 413 break; 414 415 case VHOST_SET_VRING_NUM: 416 ops->set_vq_num(vdpa, idx, vq->num); 417 break; 418 } 419 420 return r; 421 } 422 423 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 424 unsigned int cmd, unsigned long arg) 425 { 426 struct vhost_vdpa *v = filep->private_data; 427 struct vhost_dev *d = &v->vdev; 428 void __user *argp = (void __user *)arg; 429 u64 __user *featurep = argp; 430 u64 features; 431 long r; 432 433 if (cmd == VHOST_SET_BACKEND_FEATURES) { 434 r = copy_from_user(&features, featurep, sizeof(features)); 435 if (r) 436 return r; 437 if (features & ~VHOST_VDPA_BACKEND_FEATURES) 438 return -EOPNOTSUPP; 439 vhost_set_backend_features(&v->vdev, features); 440 return 0; 441 } 442 443 mutex_lock(&d->mutex); 444 445 switch (cmd) { 446 case VHOST_VDPA_GET_DEVICE_ID: 447 r = vhost_vdpa_get_device_id(v, argp); 448 break; 449 case VHOST_VDPA_GET_STATUS: 450 r = vhost_vdpa_get_status(v, argp); 451 break; 452 case VHOST_VDPA_SET_STATUS: 453 r = vhost_vdpa_set_status(v, argp); 454 break; 455 case VHOST_VDPA_GET_CONFIG: 456 r = vhost_vdpa_get_config(v, argp); 457 break; 458 case VHOST_VDPA_SET_CONFIG: 459 r = vhost_vdpa_set_config(v, argp); 460 break; 461 case VHOST_GET_FEATURES: 462 r = vhost_vdpa_get_features(v, argp); 463 break; 464 case VHOST_SET_FEATURES: 465 r = vhost_vdpa_set_features(v, argp); 466 break; 467 case VHOST_VDPA_GET_VRING_NUM: 468 r = vhost_vdpa_get_vring_num(v, argp); 469 break; 470 case VHOST_SET_LOG_BASE: 471 case VHOST_SET_LOG_FD: 472 r = -ENOIOCTLCMD; 473 break; 474 case VHOST_VDPA_SET_CONFIG_CALL: 475 r = vhost_vdpa_set_config_call(v, argp); 476 break; 477 case VHOST_GET_BACKEND_FEATURES: 478 features = VHOST_VDPA_BACKEND_FEATURES; 479 r = copy_to_user(featurep, &features, sizeof(features)); 480 break; 481 default: 482 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 483 if (r == -ENOIOCTLCMD) 484 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 485 break; 486 } 487 488 mutex_unlock(&d->mutex); 489 return r; 490 } 491 492 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) 493 { 494 struct vhost_dev *dev = &v->vdev; 495 struct vhost_iotlb *iotlb = dev->iotlb; 496 struct vhost_iotlb_map *map; 497 struct page *page; 498 unsigned long pfn, pinned; 499 500 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 501 pinned = map->size >> PAGE_SHIFT; 502 for (pfn = map->addr >> PAGE_SHIFT; 503 pinned > 0; pfn++, pinned--) { 504 page = pfn_to_page(pfn); 505 if (map->perm & VHOST_ACCESS_WO) 506 set_page_dirty_lock(page); 507 unpin_user_page(page); 508 } 509 atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); 510 vhost_iotlb_map_free(iotlb, map); 511 } 512 } 513 514 static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) 515 { 516 struct vhost_dev *dev = &v->vdev; 517 518 vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); 519 kfree(dev->iotlb); 520 dev->iotlb = NULL; 521 } 522 523 static int perm_to_iommu_flags(u32 perm) 524 { 525 int flags = 0; 526 527 switch (perm) { 528 case VHOST_ACCESS_WO: 529 flags |= IOMMU_WRITE; 530 break; 531 case VHOST_ACCESS_RO: 532 flags |= IOMMU_READ; 533 break; 534 case VHOST_ACCESS_RW: 535 flags |= (IOMMU_WRITE | IOMMU_READ); 536 break; 537 default: 538 WARN(1, "invalidate vhost IOTLB permission\n"); 539 break; 540 } 541 542 return flags | IOMMU_CACHE; 543 } 544 545 static int vhost_vdpa_map(struct vhost_vdpa *v, 546 u64 iova, u64 size, u64 pa, u32 perm) 547 { 548 struct vhost_dev *dev = &v->vdev; 549 struct vdpa_device *vdpa = v->vdpa; 550 const struct vdpa_config_ops *ops = vdpa->config; 551 int r = 0; 552 553 r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, 554 pa, perm); 555 if (r) 556 return r; 557 558 if (ops->dma_map) { 559 r = ops->dma_map(vdpa, iova, size, pa, perm); 560 } else if (ops->set_map) { 561 if (!v->in_batch) 562 r = ops->set_map(vdpa, dev->iotlb); 563 } else { 564 r = iommu_map(v->domain, iova, pa, size, 565 perm_to_iommu_flags(perm)); 566 } 567 568 if (r) 569 vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); 570 571 return r; 572 } 573 574 static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) 575 { 576 struct vhost_dev *dev = &v->vdev; 577 struct vdpa_device *vdpa = v->vdpa; 578 const struct vdpa_config_ops *ops = vdpa->config; 579 580 vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); 581 582 if (ops->dma_map) { 583 ops->dma_unmap(vdpa, iova, size); 584 } else if (ops->set_map) { 585 if (!v->in_batch) 586 ops->set_map(vdpa, dev->iotlb); 587 } else { 588 iommu_unmap(v->domain, iova, size); 589 } 590 } 591 592 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 593 struct vhost_iotlb_msg *msg) 594 { 595 struct vhost_dev *dev = &v->vdev; 596 struct vhost_iotlb *iotlb = dev->iotlb; 597 struct page **page_list; 598 struct vm_area_struct **vmas; 599 unsigned int gup_flags = FOLL_LONGTERM; 600 unsigned long map_pfn, last_pfn = 0; 601 unsigned long npages, lock_limit; 602 unsigned long i, nmap = 0; 603 u64 iova = msg->iova; 604 long pinned; 605 int ret = 0; 606 607 if (vhost_iotlb_itree_first(iotlb, msg->iova, 608 msg->iova + msg->size - 1)) 609 return -EEXIST; 610 611 if (msg->perm & VHOST_ACCESS_WO) 612 gup_flags |= FOLL_WRITE; 613 614 npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; 615 if (!npages) 616 return -EINVAL; 617 618 page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); 619 vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *), 620 GFP_KERNEL); 621 if (!page_list || !vmas) { 622 ret = -ENOMEM; 623 goto free; 624 } 625 626 mmap_read_lock(dev->mm); 627 628 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 629 if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { 630 ret = -ENOMEM; 631 goto unlock; 632 } 633 634 pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags, 635 page_list, vmas); 636 if (npages != pinned) { 637 if (pinned < 0) { 638 ret = pinned; 639 } else { 640 unpin_user_pages(page_list, pinned); 641 ret = -ENOMEM; 642 } 643 goto unlock; 644 } 645 646 iova &= PAGE_MASK; 647 map_pfn = page_to_pfn(page_list[0]); 648 649 /* One more iteration to avoid extra vdpa_map() call out of loop. */ 650 for (i = 0; i <= npages; i++) { 651 unsigned long this_pfn; 652 u64 csize; 653 654 /* The last chunk may have no valid PFN next to it */ 655 this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL; 656 657 if (last_pfn && (this_pfn == -1UL || 658 this_pfn != last_pfn + 1)) { 659 /* Pin a contiguous chunk of memory */ 660 csize = last_pfn - map_pfn + 1; 661 ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT, 662 map_pfn << PAGE_SHIFT, 663 msg->perm); 664 if (ret) { 665 /* 666 * Unpin the rest chunks of memory on the 667 * flight with no corresponding vdpa_map() 668 * calls having been made yet. On the other 669 * hand, vdpa_unmap() in the failure path 670 * is in charge of accounting the number of 671 * pinned pages for its own. 672 * This asymmetrical pattern of accounting 673 * is for efficiency to pin all pages at 674 * once, while there is no other callsite 675 * of vdpa_map() than here above. 676 */ 677 unpin_user_pages(&page_list[nmap], 678 npages - nmap); 679 goto out; 680 } 681 atomic64_add(csize, &dev->mm->pinned_vm); 682 nmap += csize; 683 iova += csize << PAGE_SHIFT; 684 map_pfn = this_pfn; 685 } 686 last_pfn = this_pfn; 687 } 688 689 WARN_ON(nmap != npages); 690 out: 691 if (ret) 692 vhost_vdpa_unmap(v, msg->iova, msg->size); 693 unlock: 694 mmap_read_unlock(dev->mm); 695 free: 696 kvfree(vmas); 697 kvfree(page_list); 698 return ret; 699 } 700 701 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, 702 struct vhost_iotlb_msg *msg) 703 { 704 struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 705 struct vdpa_device *vdpa = v->vdpa; 706 const struct vdpa_config_ops *ops = vdpa->config; 707 int r = 0; 708 709 r = vhost_dev_check_owner(dev); 710 if (r) 711 return r; 712 713 switch (msg->type) { 714 case VHOST_IOTLB_UPDATE: 715 r = vhost_vdpa_process_iotlb_update(v, msg); 716 break; 717 case VHOST_IOTLB_INVALIDATE: 718 vhost_vdpa_unmap(v, msg->iova, msg->size); 719 break; 720 case VHOST_IOTLB_BATCH_BEGIN: 721 v->in_batch = true; 722 break; 723 case VHOST_IOTLB_BATCH_END: 724 if (v->in_batch && ops->set_map) 725 ops->set_map(vdpa, dev->iotlb); 726 v->in_batch = false; 727 break; 728 default: 729 r = -EINVAL; 730 break; 731 } 732 733 return r; 734 } 735 736 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, 737 struct iov_iter *from) 738 { 739 struct file *file = iocb->ki_filp; 740 struct vhost_vdpa *v = file->private_data; 741 struct vhost_dev *dev = &v->vdev; 742 743 return vhost_chr_write_iter(dev, from); 744 } 745 746 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) 747 { 748 struct vdpa_device *vdpa = v->vdpa; 749 const struct vdpa_config_ops *ops = vdpa->config; 750 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 751 struct bus_type *bus; 752 int ret; 753 754 /* Device want to do DMA by itself */ 755 if (ops->set_map || ops->dma_map) 756 return 0; 757 758 bus = dma_dev->bus; 759 if (!bus) 760 return -EFAULT; 761 762 if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) 763 return -ENOTSUPP; 764 765 v->domain = iommu_domain_alloc(bus); 766 if (!v->domain) 767 return -EIO; 768 769 ret = iommu_attach_device(v->domain, dma_dev); 770 if (ret) 771 goto err_attach; 772 773 return 0; 774 775 err_attach: 776 iommu_domain_free(v->domain); 777 return ret; 778 } 779 780 static void vhost_vdpa_free_domain(struct vhost_vdpa *v) 781 { 782 struct vdpa_device *vdpa = v->vdpa; 783 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 784 785 if (v->domain) { 786 iommu_detach_device(v->domain, dma_dev); 787 iommu_domain_free(v->domain); 788 } 789 790 v->domain = NULL; 791 } 792 793 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 794 { 795 struct vhost_vdpa *v; 796 struct vhost_dev *dev; 797 struct vhost_virtqueue **vqs; 798 int nvqs, i, r, opened; 799 800 v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); 801 802 opened = atomic_cmpxchg(&v->opened, 0, 1); 803 if (opened) 804 return -EBUSY; 805 806 nvqs = v->nvqs; 807 vhost_vdpa_reset(v); 808 809 vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); 810 if (!vqs) { 811 r = -ENOMEM; 812 goto err; 813 } 814 815 dev = &v->vdev; 816 for (i = 0; i < nvqs; i++) { 817 vqs[i] = &v->vqs[i]; 818 vqs[i]->handle_kick = handle_vq_kick; 819 } 820 vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, 821 vhost_vdpa_process_iotlb_msg); 822 823 dev->iotlb = vhost_iotlb_alloc(0, 0); 824 if (!dev->iotlb) { 825 r = -ENOMEM; 826 goto err_init_iotlb; 827 } 828 829 r = vhost_vdpa_alloc_domain(v); 830 if (r) 831 goto err_init_iotlb; 832 833 filep->private_data = v; 834 835 return 0; 836 837 err_init_iotlb: 838 vhost_dev_cleanup(&v->vdev); 839 kfree(vqs); 840 err: 841 atomic_dec(&v->opened); 842 return r; 843 } 844 845 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) 846 { 847 struct vhost_virtqueue *vq; 848 int i; 849 850 for (i = 0; i < v->nvqs; i++) { 851 vq = &v->vqs[i]; 852 if (vq->call_ctx.producer.irq) 853 irq_bypass_unregister_producer(&vq->call_ctx.producer); 854 } 855 } 856 857 static int vhost_vdpa_release(struct inode *inode, struct file *filep) 858 { 859 struct vhost_vdpa *v = filep->private_data; 860 struct vhost_dev *d = &v->vdev; 861 862 mutex_lock(&d->mutex); 863 filep->private_data = NULL; 864 vhost_vdpa_reset(v); 865 vhost_dev_stop(&v->vdev); 866 vhost_vdpa_iotlb_free(v); 867 vhost_vdpa_free_domain(v); 868 vhost_vdpa_config_put(v); 869 vhost_vdpa_clean_irq(v); 870 vhost_dev_cleanup(&v->vdev); 871 kfree(v->vdev.vqs); 872 mutex_unlock(&d->mutex); 873 874 atomic_dec(&v->opened); 875 complete(&v->completion); 876 877 return 0; 878 } 879 880 #ifdef CONFIG_MMU 881 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) 882 { 883 struct vhost_vdpa *v = vmf->vma->vm_file->private_data; 884 struct vdpa_device *vdpa = v->vdpa; 885 const struct vdpa_config_ops *ops = vdpa->config; 886 struct vdpa_notification_area notify; 887 struct vm_area_struct *vma = vmf->vma; 888 u16 index = vma->vm_pgoff; 889 890 notify = ops->get_vq_notification(vdpa, index); 891 892 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 893 if (remap_pfn_range(vma, vmf->address & PAGE_MASK, 894 notify.addr >> PAGE_SHIFT, PAGE_SIZE, 895 vma->vm_page_prot)) 896 return VM_FAULT_SIGBUS; 897 898 return VM_FAULT_NOPAGE; 899 } 900 901 static const struct vm_operations_struct vhost_vdpa_vm_ops = { 902 .fault = vhost_vdpa_fault, 903 }; 904 905 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) 906 { 907 struct vhost_vdpa *v = vma->vm_file->private_data; 908 struct vdpa_device *vdpa = v->vdpa; 909 const struct vdpa_config_ops *ops = vdpa->config; 910 struct vdpa_notification_area notify; 911 unsigned long index = vma->vm_pgoff; 912 913 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 914 return -EINVAL; 915 if ((vma->vm_flags & VM_SHARED) == 0) 916 return -EINVAL; 917 if (vma->vm_flags & VM_READ) 918 return -EINVAL; 919 if (index > 65535) 920 return -EINVAL; 921 if (!ops->get_vq_notification) 922 return -ENOTSUPP; 923 924 /* To be safe and easily modelled by userspace, We only 925 * support the doorbell which sits on the page boundary and 926 * does not share the page with other registers. 927 */ 928 notify = ops->get_vq_notification(vdpa, index); 929 if (notify.addr & (PAGE_SIZE - 1)) 930 return -EINVAL; 931 if (vma->vm_end - vma->vm_start != notify.size) 932 return -ENOTSUPP; 933 934 vma->vm_ops = &vhost_vdpa_vm_ops; 935 return 0; 936 } 937 #endif /* CONFIG_MMU */ 938 939 static const struct file_operations vhost_vdpa_fops = { 940 .owner = THIS_MODULE, 941 .open = vhost_vdpa_open, 942 .release = vhost_vdpa_release, 943 .write_iter = vhost_vdpa_chr_write_iter, 944 .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, 945 #ifdef CONFIG_MMU 946 .mmap = vhost_vdpa_mmap, 947 #endif /* CONFIG_MMU */ 948 .compat_ioctl = compat_ptr_ioctl, 949 }; 950 951 static void vhost_vdpa_release_dev(struct device *device) 952 { 953 struct vhost_vdpa *v = 954 container_of(device, struct vhost_vdpa, dev); 955 956 ida_simple_remove(&vhost_vdpa_ida, v->minor); 957 kfree(v->vqs); 958 kfree(v); 959 } 960 961 static int vhost_vdpa_probe(struct vdpa_device *vdpa) 962 { 963 const struct vdpa_config_ops *ops = vdpa->config; 964 struct vhost_vdpa *v; 965 int minor; 966 int r; 967 968 /* Currently, we only accept the network devices. */ 969 if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) 970 return -ENOTSUPP; 971 972 v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 973 if (!v) 974 return -ENOMEM; 975 976 minor = ida_simple_get(&vhost_vdpa_ida, 0, 977 VHOST_VDPA_DEV_MAX, GFP_KERNEL); 978 if (minor < 0) { 979 kfree(v); 980 return minor; 981 } 982 983 atomic_set(&v->opened, 0); 984 v->minor = minor; 985 v->vdpa = vdpa; 986 v->nvqs = vdpa->nvqs; 987 v->virtio_id = ops->get_device_id(vdpa); 988 989 device_initialize(&v->dev); 990 v->dev.release = vhost_vdpa_release_dev; 991 v->dev.parent = &vdpa->dev; 992 v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); 993 v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), 994 GFP_KERNEL); 995 if (!v->vqs) { 996 r = -ENOMEM; 997 goto err; 998 } 999 1000 r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); 1001 if (r) 1002 goto err; 1003 1004 cdev_init(&v->cdev, &vhost_vdpa_fops); 1005 v->cdev.owner = THIS_MODULE; 1006 1007 r = cdev_device_add(&v->cdev, &v->dev); 1008 if (r) 1009 goto err; 1010 1011 init_completion(&v->completion); 1012 vdpa_set_drvdata(vdpa, v); 1013 1014 return 0; 1015 1016 err: 1017 put_device(&v->dev); 1018 return r; 1019 } 1020 1021 static void vhost_vdpa_remove(struct vdpa_device *vdpa) 1022 { 1023 struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); 1024 int opened; 1025 1026 cdev_device_del(&v->cdev, &v->dev); 1027 1028 do { 1029 opened = atomic_cmpxchg(&v->opened, 0, 1); 1030 if (!opened) 1031 break; 1032 wait_for_completion(&v->completion); 1033 } while (1); 1034 1035 put_device(&v->dev); 1036 } 1037 1038 static struct vdpa_driver vhost_vdpa_driver = { 1039 .driver = { 1040 .name = "vhost_vdpa", 1041 }, 1042 .probe = vhost_vdpa_probe, 1043 .remove = vhost_vdpa_remove, 1044 }; 1045 1046 static int __init vhost_vdpa_init(void) 1047 { 1048 int r; 1049 1050 r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, 1051 "vhost-vdpa"); 1052 if (r) 1053 goto err_alloc_chrdev; 1054 1055 r = vdpa_register_driver(&vhost_vdpa_driver); 1056 if (r) 1057 goto err_vdpa_register_driver; 1058 1059 return 0; 1060 1061 err_vdpa_register_driver: 1062 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1063 err_alloc_chrdev: 1064 return r; 1065 } 1066 module_init(vhost_vdpa_init); 1067 1068 static void __exit vhost_vdpa_exit(void) 1069 { 1070 vdpa_unregister_driver(&vhost_vdpa_driver); 1071 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1072 } 1073 module_exit(vhost_vdpa_exit); 1074 1075 MODULE_VERSION("0.0.1"); 1076 MODULE_LICENSE("GPL v2"); 1077 MODULE_AUTHOR("Intel Corporation"); 1078 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); 1079