1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018-2020 Intel Corporation. 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Author: Tiwei Bie <tiwei.bie@intel.com> 7 * Jason Wang <jasowang@redhat.com> 8 * 9 * Thanks Michael S. Tsirkin for the valuable comments and 10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all 11 * their supports. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/cdev.h> 17 #include <linux/device.h> 18 #include <linux/mm.h> 19 #include <linux/iommu.h> 20 #include <linux/uuid.h> 21 #include <linux/vdpa.h> 22 #include <linux/nospec.h> 23 #include <linux/vhost.h> 24 #include <linux/virtio_net.h> 25 #include <linux/kernel.h> 26 27 #include "vhost.h" 28 29 enum { 30 VHOST_VDPA_BACKEND_FEATURES = 31 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | 32 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), 33 }; 34 35 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 36 37 struct vhost_vdpa { 38 struct vhost_dev vdev; 39 struct iommu_domain *domain; 40 struct vhost_virtqueue *vqs; 41 struct completion completion; 42 struct vdpa_device *vdpa; 43 struct device dev; 44 struct cdev cdev; 45 atomic_t opened; 46 int nvqs; 47 int virtio_id; 48 int minor; 49 struct eventfd_ctx *config_ctx; 50 int in_batch; 51 }; 52 53 static DEFINE_IDA(vhost_vdpa_ida); 54 55 static dev_t vhost_vdpa_major; 56 57 static void handle_vq_kick(struct vhost_work *work) 58 { 59 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 60 poll.work); 61 struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); 62 const struct vdpa_config_ops *ops = v->vdpa->config; 63 64 ops->kick_vq(v->vdpa, vq - v->vqs); 65 } 66 67 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) 68 { 69 struct vhost_virtqueue *vq = private; 70 struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; 71 72 if (call_ctx) 73 eventfd_signal(call_ctx, 1); 74 75 return IRQ_HANDLED; 76 } 77 78 static irqreturn_t vhost_vdpa_config_cb(void *private) 79 { 80 struct vhost_vdpa *v = private; 81 struct eventfd_ctx *config_ctx = v->config_ctx; 82 83 if (config_ctx) 84 eventfd_signal(config_ctx, 1); 85 86 return IRQ_HANDLED; 87 } 88 89 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) 90 { 91 struct vhost_virtqueue *vq = &v->vqs[qid]; 92 const struct vdpa_config_ops *ops = v->vdpa->config; 93 struct vdpa_device *vdpa = v->vdpa; 94 int ret, irq; 95 96 if (!ops->get_vq_irq) 97 return; 98 99 irq = ops->get_vq_irq(vdpa, qid); 100 spin_lock(&vq->call_ctx.ctx_lock); 101 irq_bypass_unregister_producer(&vq->call_ctx.producer); 102 if (!vq->call_ctx.ctx || irq < 0) { 103 spin_unlock(&vq->call_ctx.ctx_lock); 104 return; 105 } 106 107 vq->call_ctx.producer.token = vq->call_ctx.ctx; 108 vq->call_ctx.producer.irq = irq; 109 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 110 spin_unlock(&vq->call_ctx.ctx_lock); 111 } 112 113 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) 114 { 115 struct vhost_virtqueue *vq = &v->vqs[qid]; 116 117 spin_lock(&vq->call_ctx.ctx_lock); 118 irq_bypass_unregister_producer(&vq->call_ctx.producer); 119 spin_unlock(&vq->call_ctx.ctx_lock); 120 } 121 122 static void vhost_vdpa_reset(struct vhost_vdpa *v) 123 { 124 struct vdpa_device *vdpa = v->vdpa; 125 126 vdpa_reset(vdpa); 127 v->in_batch = 0; 128 } 129 130 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 131 { 132 struct vdpa_device *vdpa = v->vdpa; 133 const struct vdpa_config_ops *ops = vdpa->config; 134 u32 device_id; 135 136 device_id = ops->get_device_id(vdpa); 137 138 if (copy_to_user(argp, &device_id, sizeof(device_id))) 139 return -EFAULT; 140 141 return 0; 142 } 143 144 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) 145 { 146 struct vdpa_device *vdpa = v->vdpa; 147 const struct vdpa_config_ops *ops = vdpa->config; 148 u8 status; 149 150 status = ops->get_status(vdpa); 151 152 if (copy_to_user(statusp, &status, sizeof(status))) 153 return -EFAULT; 154 155 return 0; 156 } 157 158 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) 159 { 160 struct vdpa_device *vdpa = v->vdpa; 161 const struct vdpa_config_ops *ops = vdpa->config; 162 u8 status, status_old; 163 int nvqs = v->nvqs; 164 u16 i; 165 166 if (copy_from_user(&status, statusp, sizeof(status))) 167 return -EFAULT; 168 169 status_old = ops->get_status(vdpa); 170 171 /* 172 * Userspace shouldn't remove status bits unless reset the 173 * status to 0. 174 */ 175 if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) 176 return -EINVAL; 177 178 ops->set_status(vdpa, status); 179 180 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) 181 for (i = 0; i < nvqs; i++) 182 vhost_vdpa_setup_vq_irq(v, i); 183 184 if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) 185 for (i = 0; i < nvqs; i++) 186 vhost_vdpa_unsetup_vq_irq(v, i); 187 188 return 0; 189 } 190 191 static int vhost_vdpa_config_validate(struct vhost_vdpa *v, 192 struct vhost_vdpa_config *c) 193 { 194 long size = 0; 195 196 switch (v->virtio_id) { 197 case VIRTIO_ID_NET: 198 size = sizeof(struct virtio_net_config); 199 break; 200 } 201 202 if (c->len == 0) 203 return -EINVAL; 204 205 if (c->len > size - c->off) 206 return -E2BIG; 207 208 return 0; 209 } 210 211 static long vhost_vdpa_get_config(struct vhost_vdpa *v, 212 struct vhost_vdpa_config __user *c) 213 { 214 struct vdpa_device *vdpa = v->vdpa; 215 struct vhost_vdpa_config config; 216 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 217 u8 *buf; 218 219 if (copy_from_user(&config, c, size)) 220 return -EFAULT; 221 if (vhost_vdpa_config_validate(v, &config)) 222 return -EINVAL; 223 buf = kvzalloc(config.len, GFP_KERNEL); 224 if (!buf) 225 return -ENOMEM; 226 227 vdpa_get_config(vdpa, config.off, buf, config.len); 228 229 if (copy_to_user(c->buf, buf, config.len)) { 230 kvfree(buf); 231 return -EFAULT; 232 } 233 234 kvfree(buf); 235 return 0; 236 } 237 238 static long vhost_vdpa_set_config(struct vhost_vdpa *v, 239 struct vhost_vdpa_config __user *c) 240 { 241 struct vdpa_device *vdpa = v->vdpa; 242 const struct vdpa_config_ops *ops = vdpa->config; 243 struct vhost_vdpa_config config; 244 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 245 u8 *buf; 246 247 if (copy_from_user(&config, c, size)) 248 return -EFAULT; 249 if (vhost_vdpa_config_validate(v, &config)) 250 return -EINVAL; 251 buf = kvzalloc(config.len, GFP_KERNEL); 252 if (!buf) 253 return -ENOMEM; 254 255 if (copy_from_user(buf, c->buf, config.len)) { 256 kvfree(buf); 257 return -EFAULT; 258 } 259 260 ops->set_config(vdpa, config.off, buf, config.len); 261 262 kvfree(buf); 263 return 0; 264 } 265 266 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) 267 { 268 struct vdpa_device *vdpa = v->vdpa; 269 const struct vdpa_config_ops *ops = vdpa->config; 270 u64 features; 271 272 features = ops->get_features(vdpa); 273 274 if (copy_to_user(featurep, &features, sizeof(features))) 275 return -EFAULT; 276 277 return 0; 278 } 279 280 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) 281 { 282 struct vdpa_device *vdpa = v->vdpa; 283 const struct vdpa_config_ops *ops = vdpa->config; 284 u64 features; 285 286 /* 287 * It's not allowed to change the features after they have 288 * been negotiated. 289 */ 290 if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) 291 return -EBUSY; 292 293 if (copy_from_user(&features, featurep, sizeof(features))) 294 return -EFAULT; 295 296 if (vdpa_set_features(vdpa, features)) 297 return -EINVAL; 298 299 return 0; 300 } 301 302 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) 303 { 304 struct vdpa_device *vdpa = v->vdpa; 305 const struct vdpa_config_ops *ops = vdpa->config; 306 u16 num; 307 308 num = ops->get_vq_num_max(vdpa); 309 310 if (copy_to_user(argp, &num, sizeof(num))) 311 return -EFAULT; 312 313 return 0; 314 } 315 316 static void vhost_vdpa_config_put(struct vhost_vdpa *v) 317 { 318 if (v->config_ctx) 319 eventfd_ctx_put(v->config_ctx); 320 } 321 322 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) 323 { 324 struct vdpa_callback cb; 325 int fd; 326 struct eventfd_ctx *ctx; 327 328 cb.callback = vhost_vdpa_config_cb; 329 cb.private = v->vdpa; 330 if (copy_from_user(&fd, argp, sizeof(fd))) 331 return -EFAULT; 332 333 ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 334 swap(ctx, v->config_ctx); 335 336 if (!IS_ERR_OR_NULL(ctx)) 337 eventfd_ctx_put(ctx); 338 339 if (IS_ERR(v->config_ctx)) 340 return PTR_ERR(v->config_ctx); 341 342 v->vdpa->config->set_config_cb(v->vdpa, &cb); 343 344 return 0; 345 } 346 347 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 348 void __user *argp) 349 { 350 struct vdpa_device *vdpa = v->vdpa; 351 const struct vdpa_config_ops *ops = vdpa->config; 352 struct vdpa_vq_state vq_state; 353 struct vdpa_callback cb; 354 struct vhost_virtqueue *vq; 355 struct vhost_vring_state s; 356 u32 idx; 357 long r; 358 359 r = get_user(idx, (u32 __user *)argp); 360 if (r < 0) 361 return r; 362 363 if (idx >= v->nvqs) 364 return -ENOBUFS; 365 366 idx = array_index_nospec(idx, v->nvqs); 367 vq = &v->vqs[idx]; 368 369 switch (cmd) { 370 case VHOST_VDPA_SET_VRING_ENABLE: 371 if (copy_from_user(&s, argp, sizeof(s))) 372 return -EFAULT; 373 ops->set_vq_ready(vdpa, idx, s.num); 374 return 0; 375 case VHOST_GET_VRING_BASE: 376 r = ops->get_vq_state(v->vdpa, idx, &vq_state); 377 if (r) 378 return r; 379 380 vq->last_avail_idx = vq_state.avail_index; 381 break; 382 } 383 384 r = vhost_vring_ioctl(&v->vdev, cmd, argp); 385 if (r) 386 return r; 387 388 switch (cmd) { 389 case VHOST_SET_VRING_ADDR: 390 if (ops->set_vq_address(vdpa, idx, 391 (u64)(uintptr_t)vq->desc, 392 (u64)(uintptr_t)vq->avail, 393 (u64)(uintptr_t)vq->used)) 394 r = -EINVAL; 395 break; 396 397 case VHOST_SET_VRING_BASE: 398 vq_state.avail_index = vq->last_avail_idx; 399 if (ops->set_vq_state(vdpa, idx, &vq_state)) 400 r = -EINVAL; 401 break; 402 403 case VHOST_SET_VRING_CALL: 404 if (vq->call_ctx.ctx) { 405 cb.callback = vhost_vdpa_virtqueue_cb; 406 cb.private = vq; 407 } else { 408 cb.callback = NULL; 409 cb.private = NULL; 410 } 411 ops->set_vq_cb(vdpa, idx, &cb); 412 vhost_vdpa_setup_vq_irq(v, idx); 413 break; 414 415 case VHOST_SET_VRING_NUM: 416 ops->set_vq_num(vdpa, idx, vq->num); 417 break; 418 } 419 420 return r; 421 } 422 423 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 424 unsigned int cmd, unsigned long arg) 425 { 426 struct vhost_vdpa *v = filep->private_data; 427 struct vhost_dev *d = &v->vdev; 428 void __user *argp = (void __user *)arg; 429 u64 __user *featurep = argp; 430 u64 features; 431 long r; 432 433 if (cmd == VHOST_SET_BACKEND_FEATURES) { 434 r = copy_from_user(&features, featurep, sizeof(features)); 435 if (r) 436 return r; 437 if (features & ~VHOST_VDPA_BACKEND_FEATURES) 438 return -EOPNOTSUPP; 439 vhost_set_backend_features(&v->vdev, features); 440 return 0; 441 } 442 443 mutex_lock(&d->mutex); 444 445 switch (cmd) { 446 case VHOST_VDPA_GET_DEVICE_ID: 447 r = vhost_vdpa_get_device_id(v, argp); 448 break; 449 case VHOST_VDPA_GET_STATUS: 450 r = vhost_vdpa_get_status(v, argp); 451 break; 452 case VHOST_VDPA_SET_STATUS: 453 r = vhost_vdpa_set_status(v, argp); 454 break; 455 case VHOST_VDPA_GET_CONFIG: 456 r = vhost_vdpa_get_config(v, argp); 457 break; 458 case VHOST_VDPA_SET_CONFIG: 459 r = vhost_vdpa_set_config(v, argp); 460 break; 461 case VHOST_GET_FEATURES: 462 r = vhost_vdpa_get_features(v, argp); 463 break; 464 case VHOST_SET_FEATURES: 465 r = vhost_vdpa_set_features(v, argp); 466 break; 467 case VHOST_VDPA_GET_VRING_NUM: 468 r = vhost_vdpa_get_vring_num(v, argp); 469 break; 470 case VHOST_SET_LOG_BASE: 471 case VHOST_SET_LOG_FD: 472 r = -ENOIOCTLCMD; 473 break; 474 case VHOST_VDPA_SET_CONFIG_CALL: 475 r = vhost_vdpa_set_config_call(v, argp); 476 break; 477 case VHOST_GET_BACKEND_FEATURES: 478 features = VHOST_VDPA_BACKEND_FEATURES; 479 r = copy_to_user(featurep, &features, sizeof(features)); 480 break; 481 default: 482 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 483 if (r == -ENOIOCTLCMD) 484 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 485 break; 486 } 487 488 mutex_unlock(&d->mutex); 489 return r; 490 } 491 492 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) 493 { 494 struct vhost_dev *dev = &v->vdev; 495 struct vhost_iotlb *iotlb = dev->iotlb; 496 struct vhost_iotlb_map *map; 497 struct page *page; 498 unsigned long pfn, pinned; 499 500 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 501 pinned = map->size >> PAGE_SHIFT; 502 for (pfn = map->addr >> PAGE_SHIFT; 503 pinned > 0; pfn++, pinned--) { 504 page = pfn_to_page(pfn); 505 if (map->perm & VHOST_ACCESS_WO) 506 set_page_dirty_lock(page); 507 unpin_user_page(page); 508 } 509 atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); 510 vhost_iotlb_map_free(iotlb, map); 511 } 512 } 513 514 static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) 515 { 516 struct vhost_dev *dev = &v->vdev; 517 518 vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); 519 kfree(dev->iotlb); 520 dev->iotlb = NULL; 521 } 522 523 static int perm_to_iommu_flags(u32 perm) 524 { 525 int flags = 0; 526 527 switch (perm) { 528 case VHOST_ACCESS_WO: 529 flags |= IOMMU_WRITE; 530 break; 531 case VHOST_ACCESS_RO: 532 flags |= IOMMU_READ; 533 break; 534 case VHOST_ACCESS_RW: 535 flags |= (IOMMU_WRITE | IOMMU_READ); 536 break; 537 default: 538 WARN(1, "invalidate vhost IOTLB permission\n"); 539 break; 540 } 541 542 return flags | IOMMU_CACHE; 543 } 544 545 static int vhost_vdpa_map(struct vhost_vdpa *v, 546 u64 iova, u64 size, u64 pa, u32 perm) 547 { 548 struct vhost_dev *dev = &v->vdev; 549 struct vdpa_device *vdpa = v->vdpa; 550 const struct vdpa_config_ops *ops = vdpa->config; 551 int r = 0; 552 553 r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, 554 pa, perm); 555 if (r) 556 return r; 557 558 if (ops->dma_map) { 559 r = ops->dma_map(vdpa, iova, size, pa, perm); 560 } else if (ops->set_map) { 561 if (!v->in_batch) 562 r = ops->set_map(vdpa, dev->iotlb); 563 } else { 564 r = iommu_map(v->domain, iova, pa, size, 565 perm_to_iommu_flags(perm)); 566 } 567 568 return r; 569 } 570 571 static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) 572 { 573 struct vhost_dev *dev = &v->vdev; 574 struct vdpa_device *vdpa = v->vdpa; 575 const struct vdpa_config_ops *ops = vdpa->config; 576 577 vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); 578 579 if (ops->dma_map) { 580 ops->dma_unmap(vdpa, iova, size); 581 } else if (ops->set_map) { 582 if (!v->in_batch) 583 ops->set_map(vdpa, dev->iotlb); 584 } else { 585 iommu_unmap(v->domain, iova, size); 586 } 587 } 588 589 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 590 struct vhost_iotlb_msg *msg) 591 { 592 struct vhost_dev *dev = &v->vdev; 593 struct vhost_iotlb *iotlb = dev->iotlb; 594 struct page **page_list; 595 unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 596 unsigned int gup_flags = FOLL_LONGTERM; 597 unsigned long npages, cur_base, map_pfn, last_pfn = 0; 598 unsigned long locked, lock_limit, pinned, i; 599 u64 iova = msg->iova; 600 int ret = 0; 601 602 if (vhost_iotlb_itree_first(iotlb, msg->iova, 603 msg->iova + msg->size - 1)) 604 return -EEXIST; 605 606 page_list = (struct page **) __get_free_page(GFP_KERNEL); 607 if (!page_list) 608 return -ENOMEM; 609 610 if (msg->perm & VHOST_ACCESS_WO) 611 gup_flags |= FOLL_WRITE; 612 613 npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; 614 if (!npages) 615 return -EINVAL; 616 617 mmap_read_lock(dev->mm); 618 619 locked = atomic64_add_return(npages, &dev->mm->pinned_vm); 620 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 621 622 if (locked > lock_limit) { 623 ret = -ENOMEM; 624 goto out; 625 } 626 627 cur_base = msg->uaddr & PAGE_MASK; 628 iova &= PAGE_MASK; 629 630 while (npages) { 631 pinned = min_t(unsigned long, npages, list_size); 632 ret = pin_user_pages(cur_base, pinned, 633 gup_flags, page_list, NULL); 634 if (ret != pinned) 635 goto out; 636 637 if (!last_pfn) 638 map_pfn = page_to_pfn(page_list[0]); 639 640 for (i = 0; i < ret; i++) { 641 unsigned long this_pfn = page_to_pfn(page_list[i]); 642 u64 csize; 643 644 if (last_pfn && (this_pfn != last_pfn + 1)) { 645 /* Pin a contiguous chunk of memory */ 646 csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; 647 if (vhost_vdpa_map(v, iova, csize, 648 map_pfn << PAGE_SHIFT, 649 msg->perm)) 650 goto out; 651 map_pfn = this_pfn; 652 iova += csize; 653 } 654 655 last_pfn = this_pfn; 656 } 657 658 cur_base += ret << PAGE_SHIFT; 659 npages -= ret; 660 } 661 662 /* Pin the rest chunk */ 663 ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, 664 map_pfn << PAGE_SHIFT, msg->perm); 665 out: 666 if (ret) { 667 vhost_vdpa_unmap(v, msg->iova, msg->size); 668 atomic64_sub(npages, &dev->mm->pinned_vm); 669 } 670 mmap_read_unlock(dev->mm); 671 free_page((unsigned long)page_list); 672 return ret; 673 } 674 675 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, 676 struct vhost_iotlb_msg *msg) 677 { 678 struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 679 struct vdpa_device *vdpa = v->vdpa; 680 const struct vdpa_config_ops *ops = vdpa->config; 681 int r = 0; 682 683 r = vhost_dev_check_owner(dev); 684 if (r) 685 return r; 686 687 switch (msg->type) { 688 case VHOST_IOTLB_UPDATE: 689 r = vhost_vdpa_process_iotlb_update(v, msg); 690 break; 691 case VHOST_IOTLB_INVALIDATE: 692 vhost_vdpa_unmap(v, msg->iova, msg->size); 693 break; 694 case VHOST_IOTLB_BATCH_BEGIN: 695 v->in_batch = true; 696 break; 697 case VHOST_IOTLB_BATCH_END: 698 if (v->in_batch && ops->set_map) 699 ops->set_map(vdpa, dev->iotlb); 700 v->in_batch = false; 701 break; 702 default: 703 r = -EINVAL; 704 break; 705 } 706 707 return r; 708 } 709 710 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, 711 struct iov_iter *from) 712 { 713 struct file *file = iocb->ki_filp; 714 struct vhost_vdpa *v = file->private_data; 715 struct vhost_dev *dev = &v->vdev; 716 717 return vhost_chr_write_iter(dev, from); 718 } 719 720 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) 721 { 722 struct vdpa_device *vdpa = v->vdpa; 723 const struct vdpa_config_ops *ops = vdpa->config; 724 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 725 struct bus_type *bus; 726 int ret; 727 728 /* Device want to do DMA by itself */ 729 if (ops->set_map || ops->dma_map) 730 return 0; 731 732 bus = dma_dev->bus; 733 if (!bus) 734 return -EFAULT; 735 736 if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) 737 return -ENOTSUPP; 738 739 v->domain = iommu_domain_alloc(bus); 740 if (!v->domain) 741 return -EIO; 742 743 ret = iommu_attach_device(v->domain, dma_dev); 744 if (ret) 745 goto err_attach; 746 747 return 0; 748 749 err_attach: 750 iommu_domain_free(v->domain); 751 return ret; 752 } 753 754 static void vhost_vdpa_free_domain(struct vhost_vdpa *v) 755 { 756 struct vdpa_device *vdpa = v->vdpa; 757 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 758 759 if (v->domain) { 760 iommu_detach_device(v->domain, dma_dev); 761 iommu_domain_free(v->domain); 762 } 763 764 v->domain = NULL; 765 } 766 767 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 768 { 769 struct vhost_vdpa *v; 770 struct vhost_dev *dev; 771 struct vhost_virtqueue **vqs; 772 int nvqs, i, r, opened; 773 774 v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); 775 776 opened = atomic_cmpxchg(&v->opened, 0, 1); 777 if (opened) 778 return -EBUSY; 779 780 nvqs = v->nvqs; 781 vhost_vdpa_reset(v); 782 783 vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); 784 if (!vqs) { 785 r = -ENOMEM; 786 goto err; 787 } 788 789 dev = &v->vdev; 790 for (i = 0; i < nvqs; i++) { 791 vqs[i] = &v->vqs[i]; 792 vqs[i]->handle_kick = handle_vq_kick; 793 } 794 vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, 795 vhost_vdpa_process_iotlb_msg); 796 797 dev->iotlb = vhost_iotlb_alloc(0, 0); 798 if (!dev->iotlb) { 799 r = -ENOMEM; 800 goto err_init_iotlb; 801 } 802 803 r = vhost_vdpa_alloc_domain(v); 804 if (r) 805 goto err_init_iotlb; 806 807 filep->private_data = v; 808 809 return 0; 810 811 err_init_iotlb: 812 vhost_dev_cleanup(&v->vdev); 813 err: 814 atomic_dec(&v->opened); 815 return r; 816 } 817 818 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) 819 { 820 struct vhost_virtqueue *vq; 821 int i; 822 823 for (i = 0; i < v->nvqs; i++) { 824 vq = &v->vqs[i]; 825 if (vq->call_ctx.producer.irq) 826 irq_bypass_unregister_producer(&vq->call_ctx.producer); 827 } 828 } 829 830 static int vhost_vdpa_release(struct inode *inode, struct file *filep) 831 { 832 struct vhost_vdpa *v = filep->private_data; 833 struct vhost_dev *d = &v->vdev; 834 835 mutex_lock(&d->mutex); 836 filep->private_data = NULL; 837 vhost_vdpa_reset(v); 838 vhost_dev_stop(&v->vdev); 839 vhost_vdpa_iotlb_free(v); 840 vhost_vdpa_free_domain(v); 841 vhost_vdpa_config_put(v); 842 vhost_vdpa_clean_irq(v); 843 vhost_dev_cleanup(&v->vdev); 844 kfree(v->vdev.vqs); 845 mutex_unlock(&d->mutex); 846 847 atomic_dec(&v->opened); 848 complete(&v->completion); 849 850 return 0; 851 } 852 853 #ifdef CONFIG_MMU 854 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) 855 { 856 struct vhost_vdpa *v = vmf->vma->vm_file->private_data; 857 struct vdpa_device *vdpa = v->vdpa; 858 const struct vdpa_config_ops *ops = vdpa->config; 859 struct vdpa_notification_area notify; 860 struct vm_area_struct *vma = vmf->vma; 861 u16 index = vma->vm_pgoff; 862 863 notify = ops->get_vq_notification(vdpa, index); 864 865 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 866 if (remap_pfn_range(vma, vmf->address & PAGE_MASK, 867 notify.addr >> PAGE_SHIFT, PAGE_SIZE, 868 vma->vm_page_prot)) 869 return VM_FAULT_SIGBUS; 870 871 return VM_FAULT_NOPAGE; 872 } 873 874 static const struct vm_operations_struct vhost_vdpa_vm_ops = { 875 .fault = vhost_vdpa_fault, 876 }; 877 878 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) 879 { 880 struct vhost_vdpa *v = vma->vm_file->private_data; 881 struct vdpa_device *vdpa = v->vdpa; 882 const struct vdpa_config_ops *ops = vdpa->config; 883 struct vdpa_notification_area notify; 884 unsigned long index = vma->vm_pgoff; 885 886 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 887 return -EINVAL; 888 if ((vma->vm_flags & VM_SHARED) == 0) 889 return -EINVAL; 890 if (vma->vm_flags & VM_READ) 891 return -EINVAL; 892 if (index > 65535) 893 return -EINVAL; 894 if (!ops->get_vq_notification) 895 return -ENOTSUPP; 896 897 /* To be safe and easily modelled by userspace, We only 898 * support the doorbell which sits on the page boundary and 899 * does not share the page with other registers. 900 */ 901 notify = ops->get_vq_notification(vdpa, index); 902 if (notify.addr & (PAGE_SIZE - 1)) 903 return -EINVAL; 904 if (vma->vm_end - vma->vm_start != notify.size) 905 return -ENOTSUPP; 906 907 vma->vm_ops = &vhost_vdpa_vm_ops; 908 return 0; 909 } 910 #endif /* CONFIG_MMU */ 911 912 static const struct file_operations vhost_vdpa_fops = { 913 .owner = THIS_MODULE, 914 .open = vhost_vdpa_open, 915 .release = vhost_vdpa_release, 916 .write_iter = vhost_vdpa_chr_write_iter, 917 .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, 918 #ifdef CONFIG_MMU 919 .mmap = vhost_vdpa_mmap, 920 #endif /* CONFIG_MMU */ 921 .compat_ioctl = compat_ptr_ioctl, 922 }; 923 924 static void vhost_vdpa_release_dev(struct device *device) 925 { 926 struct vhost_vdpa *v = 927 container_of(device, struct vhost_vdpa, dev); 928 929 ida_simple_remove(&vhost_vdpa_ida, v->minor); 930 kfree(v->vqs); 931 kfree(v); 932 } 933 934 static int vhost_vdpa_probe(struct vdpa_device *vdpa) 935 { 936 const struct vdpa_config_ops *ops = vdpa->config; 937 struct vhost_vdpa *v; 938 int minor; 939 int r; 940 941 /* Currently, we only accept the network devices. */ 942 if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) 943 return -ENOTSUPP; 944 945 v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 946 if (!v) 947 return -ENOMEM; 948 949 minor = ida_simple_get(&vhost_vdpa_ida, 0, 950 VHOST_VDPA_DEV_MAX, GFP_KERNEL); 951 if (minor < 0) { 952 kfree(v); 953 return minor; 954 } 955 956 atomic_set(&v->opened, 0); 957 v->minor = minor; 958 v->vdpa = vdpa; 959 v->nvqs = vdpa->nvqs; 960 v->virtio_id = ops->get_device_id(vdpa); 961 962 device_initialize(&v->dev); 963 v->dev.release = vhost_vdpa_release_dev; 964 v->dev.parent = &vdpa->dev; 965 v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); 966 v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), 967 GFP_KERNEL); 968 if (!v->vqs) { 969 r = -ENOMEM; 970 goto err; 971 } 972 973 r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); 974 if (r) 975 goto err; 976 977 cdev_init(&v->cdev, &vhost_vdpa_fops); 978 v->cdev.owner = THIS_MODULE; 979 980 r = cdev_device_add(&v->cdev, &v->dev); 981 if (r) 982 goto err; 983 984 init_completion(&v->completion); 985 vdpa_set_drvdata(vdpa, v); 986 987 return 0; 988 989 err: 990 put_device(&v->dev); 991 return r; 992 } 993 994 static void vhost_vdpa_remove(struct vdpa_device *vdpa) 995 { 996 struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); 997 int opened; 998 999 cdev_device_del(&v->cdev, &v->dev); 1000 1001 do { 1002 opened = atomic_cmpxchg(&v->opened, 0, 1); 1003 if (!opened) 1004 break; 1005 wait_for_completion(&v->completion); 1006 } while (1); 1007 1008 put_device(&v->dev); 1009 } 1010 1011 static struct vdpa_driver vhost_vdpa_driver = { 1012 .driver = { 1013 .name = "vhost_vdpa", 1014 }, 1015 .probe = vhost_vdpa_probe, 1016 .remove = vhost_vdpa_remove, 1017 }; 1018 1019 static int __init vhost_vdpa_init(void) 1020 { 1021 int r; 1022 1023 r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, 1024 "vhost-vdpa"); 1025 if (r) 1026 goto err_alloc_chrdev; 1027 1028 r = vdpa_register_driver(&vhost_vdpa_driver); 1029 if (r) 1030 goto err_vdpa_register_driver; 1031 1032 return 0; 1033 1034 err_vdpa_register_driver: 1035 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1036 err_alloc_chrdev: 1037 return r; 1038 } 1039 module_init(vhost_vdpa_init); 1040 1041 static void __exit vhost_vdpa_exit(void) 1042 { 1043 vdpa_unregister_driver(&vhost_vdpa_driver); 1044 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1045 } 1046 module_exit(vhost_vdpa_exit); 1047 1048 MODULE_VERSION("0.0.1"); 1049 MODULE_LICENSE("GPL v2"); 1050 MODULE_AUTHOR("Intel Corporation"); 1051 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); 1052