1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-shadow-virtqueue.h" 21 #include "hw/virtio/vhost-vdpa.h" 22 #include "exec/address-spaces.h" 23 #include "qemu/cutils.h" 24 #include "qemu/main-loop.h" 25 #include "cpu.h" 26 #include "trace.h" 27 #include "qapi/error.h" 28 29 /* 30 * Return one past the end of the end of section. Be careful with uint64_t 31 * conversions! 32 */ 33 static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) 34 { 35 Int128 llend = int128_make64(section->offset_within_address_space); 36 llend = int128_add(llend, section->size); 37 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 38 39 return llend; 40 } 41 42 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, 43 uint64_t iova_min, 44 uint64_t iova_max) 45 { 46 Int128 llend; 47 48 if ((!memory_region_is_ram(section->mr) && 49 !memory_region_is_iommu(section->mr)) || 50 memory_region_is_protected(section->mr) || 51 /* vhost-vDPA doesn't allow MMIO to be mapped */ 52 memory_region_is_ram_device(section->mr)) { 53 return true; 54 } 55 56 if (section->offset_within_address_space < iova_min) { 57 error_report("RAM section out of device range (min=0x%" PRIx64 58 ", addr=0x%" HWADDR_PRIx ")", 59 iova_min, section->offset_within_address_space); 60 return true; 61 } 62 63 llend = vhost_vdpa_section_end(section); 64 if (int128_gt(llend, int128_make64(iova_max))) { 65 error_report("RAM section out of device range (max=0x%" PRIx64 66 ", end addr=0x%" PRIx64 ")", 67 iova_max, int128_get64(llend)); 68 return true; 69 } 70 71 return false; 72 } 73 74 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 75 void *vaddr, bool readonly) 76 { 77 struct vhost_msg_v2 msg = {}; 78 int fd = v->device_fd; 79 int ret = 0; 80 81 msg.type = v->msg_type; 82 msg.iotlb.iova = iova; 83 msg.iotlb.size = size; 84 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 85 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 86 msg.iotlb.type = VHOST_IOTLB_UPDATE; 87 88 trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, 89 msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); 90 91 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 92 error_report("failed to write, fd=%d, errno=%d (%s)", 93 fd, errno, strerror(errno)); 94 return -EIO ; 95 } 96 97 return ret; 98 } 99 100 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 101 hwaddr size) 102 { 103 struct vhost_msg_v2 msg = {}; 104 int fd = v->device_fd; 105 int ret = 0; 106 107 msg.type = v->msg_type; 108 msg.iotlb.iova = iova; 109 msg.iotlb.size = size; 110 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 111 112 trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, 113 msg.iotlb.size, msg.iotlb.type); 114 115 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 116 error_report("failed to write, fd=%d, errno=%d (%s)", 117 fd, errno, strerror(errno)); 118 return -EIO ; 119 } 120 121 return ret; 122 } 123 124 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) 125 { 126 int fd = v->device_fd; 127 struct vhost_msg_v2 msg = { 128 .type = v->msg_type, 129 .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, 130 }; 131 132 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 133 error_report("failed to write, fd=%d, errno=%d (%s)", 134 fd, errno, strerror(errno)); 135 } 136 } 137 138 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) 139 { 140 if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && 141 !v->iotlb_batch_begin_sent) { 142 vhost_vdpa_listener_begin_batch(v); 143 } 144 145 v->iotlb_batch_begin_sent = true; 146 } 147 148 static void vhost_vdpa_listener_commit(MemoryListener *listener) 149 { 150 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 151 struct vhost_dev *dev = v->dev; 152 struct vhost_msg_v2 msg = {}; 153 int fd = v->device_fd; 154 155 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 156 return; 157 } 158 159 if (!v->iotlb_batch_begin_sent) { 160 return; 161 } 162 163 msg.type = v->msg_type; 164 msg.iotlb.type = VHOST_IOTLB_BATCH_END; 165 166 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 167 error_report("failed to write, fd=%d, errno=%d (%s)", 168 fd, errno, strerror(errno)); 169 } 170 171 v->iotlb_batch_begin_sent = false; 172 } 173 174 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 175 MemoryRegionSection *section) 176 { 177 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 178 hwaddr iova; 179 Int128 llend, llsize; 180 void *vaddr; 181 int ret; 182 183 if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 184 v->iova_range.last)) { 185 return; 186 } 187 188 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 189 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 190 error_report("%s received unaligned region", __func__); 191 return; 192 } 193 194 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 195 llend = vhost_vdpa_section_end(section); 196 if (int128_ge(int128_make64(iova), llend)) { 197 return; 198 } 199 200 memory_region_ref(section->mr); 201 202 /* Here we assume that memory_region_is_ram(section->mr)==true */ 203 204 vaddr = memory_region_get_ram_ptr(section->mr) + 205 section->offset_within_region + 206 (iova - section->offset_within_address_space); 207 208 trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), 209 vaddr, section->readonly); 210 211 llsize = int128_sub(llend, int128_make64(iova)); 212 if (v->shadow_vqs_enabled) { 213 DMAMap mem_region = { 214 .translated_addr = (hwaddr)(uintptr_t)vaddr, 215 .size = int128_get64(llsize) - 1, 216 .perm = IOMMU_ACCESS_FLAG(true, section->readonly), 217 }; 218 219 int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); 220 if (unlikely(r != IOVA_OK)) { 221 error_report("Can't allocate a mapping (%d)", r); 222 goto fail; 223 } 224 225 iova = mem_region.iova; 226 } 227 228 vhost_vdpa_iotlb_batch_begin_once(v); 229 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 230 vaddr, section->readonly); 231 if (ret) { 232 error_report("vhost vdpa map fail!"); 233 goto fail; 234 } 235 236 return; 237 238 fail: 239 /* 240 * On the initfn path, store the first error in the container so we 241 * can gracefully fail. Runtime, there's not much we can do other 242 * than throw a hardware error. 243 */ 244 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 245 return; 246 247 } 248 249 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 250 MemoryRegionSection *section) 251 { 252 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 253 hwaddr iova; 254 Int128 llend, llsize; 255 int ret; 256 257 if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 258 v->iova_range.last)) { 259 return; 260 } 261 262 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 263 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 264 error_report("%s received unaligned region", __func__); 265 return; 266 } 267 268 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 269 llend = vhost_vdpa_section_end(section); 270 271 trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); 272 273 if (int128_ge(int128_make64(iova), llend)) { 274 return; 275 } 276 277 llsize = int128_sub(llend, int128_make64(iova)); 278 279 if (v->shadow_vqs_enabled) { 280 const DMAMap *result; 281 const void *vaddr = memory_region_get_ram_ptr(section->mr) + 282 section->offset_within_region + 283 (iova - section->offset_within_address_space); 284 DMAMap mem_region = { 285 .translated_addr = (hwaddr)(uintptr_t)vaddr, 286 .size = int128_get64(llsize) - 1, 287 }; 288 289 result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); 290 iova = result->iova; 291 vhost_iova_tree_remove(v->iova_tree, &mem_region); 292 } 293 vhost_vdpa_iotlb_batch_begin_once(v); 294 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 295 if (ret) { 296 error_report("vhost_vdpa dma unmap error!"); 297 } 298 299 memory_region_unref(section->mr); 300 } 301 /* 302 * IOTLB API is used by vhost-vdpa which requires incremental updating 303 * of the mapping. So we can not use generic vhost memory listener which 304 * depends on the addnop(). 305 */ 306 static const MemoryListener vhost_vdpa_memory_listener = { 307 .name = "vhost-vdpa", 308 .commit = vhost_vdpa_listener_commit, 309 .region_add = vhost_vdpa_listener_region_add, 310 .region_del = vhost_vdpa_listener_region_del, 311 }; 312 313 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 314 void *arg) 315 { 316 struct vhost_vdpa *v = dev->opaque; 317 int fd = v->device_fd; 318 int ret; 319 320 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 321 322 ret = ioctl(fd, request, arg); 323 return ret < 0 ? -errno : ret; 324 } 325 326 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 327 { 328 uint8_t s; 329 int ret; 330 331 trace_vhost_vdpa_add_status(dev, status); 332 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); 333 if (ret < 0) { 334 return ret; 335 } 336 337 s |= status; 338 339 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 340 if (ret < 0) { 341 return ret; 342 } 343 344 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); 345 if (ret < 0) { 346 return ret; 347 } 348 349 if (!(s & status)) { 350 return -EIO; 351 } 352 353 return 0; 354 } 355 356 static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) 357 { 358 int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE, 359 &v->iova_range); 360 if (ret != 0) { 361 v->iova_range.first = 0; 362 v->iova_range.last = UINT64_MAX; 363 } 364 365 trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first, 366 v->iova_range.last); 367 } 368 369 static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) 370 { 371 struct vhost_vdpa *v = dev->opaque; 372 373 return v->index != 0; 374 } 375 376 static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, 377 uint64_t *features) 378 { 379 int ret; 380 381 ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 382 trace_vhost_vdpa_get_features(dev, *features); 383 return ret; 384 } 385 386 static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, 387 Error **errp) 388 { 389 g_autoptr(GPtrArray) shadow_vqs = NULL; 390 uint64_t dev_features, svq_features; 391 int r; 392 bool ok; 393 394 if (!v->shadow_vqs_enabled) { 395 return 0; 396 } 397 398 r = vhost_vdpa_get_dev_features(hdev, &dev_features); 399 if (r != 0) { 400 error_setg_errno(errp, -r, "Can't get vdpa device features"); 401 return r; 402 } 403 404 svq_features = dev_features; 405 ok = vhost_svq_valid_features(svq_features, errp); 406 if (unlikely(!ok)) { 407 return -1; 408 } 409 410 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); 411 for (unsigned n = 0; n < hdev->nvqs; ++n) { 412 g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); 413 414 if (unlikely(!svq)) { 415 error_setg(errp, "Cannot create svq %u", n); 416 return -1; 417 } 418 g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq)); 419 } 420 421 v->shadow_vqs = g_steal_pointer(&shadow_vqs); 422 return 0; 423 } 424 425 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) 426 { 427 struct vhost_vdpa *v; 428 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 429 trace_vhost_vdpa_init(dev, opaque); 430 int ret; 431 432 /* 433 * Similar to VFIO, we end up pinning all guest memory and have to 434 * disable discarding of RAM. 435 */ 436 ret = ram_block_discard_disable(true); 437 if (ret) { 438 error_report("Cannot set discarding of RAM broken"); 439 return ret; 440 } 441 442 v = opaque; 443 v->dev = dev; 444 dev->opaque = opaque ; 445 v->listener = vhost_vdpa_memory_listener; 446 v->msg_type = VHOST_IOTLB_MSG_V2; 447 ret = vhost_vdpa_init_svq(dev, v, errp); 448 if (ret) { 449 goto err; 450 } 451 452 vhost_vdpa_get_iova_range(v); 453 454 if (vhost_vdpa_one_time_request(dev)) { 455 return 0; 456 } 457 458 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 459 VIRTIO_CONFIG_S_DRIVER); 460 461 return 0; 462 463 err: 464 ram_block_discard_disable(false); 465 return ret; 466 } 467 468 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, 469 int queue_index) 470 { 471 size_t page_size = qemu_real_host_page_size(); 472 struct vhost_vdpa *v = dev->opaque; 473 VirtIODevice *vdev = dev->vdev; 474 VhostVDPAHostNotifier *n; 475 476 n = &v->notifier[queue_index]; 477 478 if (n->addr) { 479 virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false); 480 object_unparent(OBJECT(&n->mr)); 481 munmap(n->addr, page_size); 482 n->addr = NULL; 483 } 484 } 485 486 static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) 487 { 488 size_t page_size = qemu_real_host_page_size(); 489 struct vhost_vdpa *v = dev->opaque; 490 VirtIODevice *vdev = dev->vdev; 491 VhostVDPAHostNotifier *n; 492 int fd = v->device_fd; 493 void *addr; 494 char *name; 495 496 vhost_vdpa_host_notifier_uninit(dev, queue_index); 497 498 n = &v->notifier[queue_index]; 499 500 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 501 queue_index * page_size); 502 if (addr == MAP_FAILED) { 503 goto err; 504 } 505 506 name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]", 507 v, queue_index); 508 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 509 page_size, addr); 510 g_free(name); 511 512 if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { 513 object_unparent(OBJECT(&n->mr)); 514 munmap(addr, page_size); 515 goto err; 516 } 517 n->addr = addr; 518 519 return 0; 520 521 err: 522 return -1; 523 } 524 525 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) 526 { 527 int i; 528 529 for (i = dev->vq_index; i < dev->vq_index + n; i++) { 530 vhost_vdpa_host_notifier_uninit(dev, i); 531 } 532 } 533 534 static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) 535 { 536 struct vhost_vdpa *v = dev->opaque; 537 int i; 538 539 if (v->shadow_vqs_enabled) { 540 /* FIXME SVQ is not compatible with host notifiers mr */ 541 return; 542 } 543 544 for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { 545 if (vhost_vdpa_host_notifier_init(dev, i)) { 546 goto err; 547 } 548 } 549 550 return; 551 552 err: 553 vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index); 554 return; 555 } 556 557 static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) 558 { 559 struct vhost_vdpa *v = dev->opaque; 560 size_t idx; 561 562 if (!v->shadow_vqs) { 563 return; 564 } 565 566 for (idx = 0; idx < v->shadow_vqs->len; ++idx) { 567 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); 568 } 569 g_ptr_array_free(v->shadow_vqs, true); 570 } 571 572 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 573 { 574 struct vhost_vdpa *v; 575 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 576 v = dev->opaque; 577 trace_vhost_vdpa_cleanup(dev, v); 578 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 579 memory_listener_unregister(&v->listener); 580 vhost_vdpa_svq_cleanup(dev); 581 582 dev->opaque = NULL; 583 ram_block_discard_disable(false); 584 585 return 0; 586 } 587 588 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 589 { 590 trace_vhost_vdpa_memslots_limit(dev, INT_MAX); 591 return INT_MAX; 592 } 593 594 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 595 struct vhost_memory *mem) 596 { 597 if (vhost_vdpa_one_time_request(dev)) { 598 return 0; 599 } 600 601 trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); 602 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && 603 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { 604 int i; 605 for (i = 0; i < mem->nregions; i++) { 606 trace_vhost_vdpa_dump_regions(dev, i, 607 mem->regions[i].guest_phys_addr, 608 mem->regions[i].memory_size, 609 mem->regions[i].userspace_addr, 610 mem->regions[i].flags_padding); 611 } 612 } 613 if (mem->padding) { 614 return -EINVAL; 615 } 616 617 return 0; 618 } 619 620 static int vhost_vdpa_set_features(struct vhost_dev *dev, 621 uint64_t features) 622 { 623 struct vhost_vdpa *v = dev->opaque; 624 int ret; 625 626 if (vhost_vdpa_one_time_request(dev)) { 627 return 0; 628 } 629 630 if (v->shadow_vqs_enabled) { 631 if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) { 632 /* 633 * QEMU is just trying to enable or disable logging. SVQ handles 634 * this sepparately, so no need to forward this. 635 */ 636 v->acked_features = features; 637 return 0; 638 } 639 640 v->acked_features = features; 641 642 /* We must not ack _F_LOG if SVQ is enabled */ 643 features &= ~BIT_ULL(VHOST_F_LOG_ALL); 644 } 645 646 trace_vhost_vdpa_set_features(dev, features); 647 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 648 if (ret) { 649 return ret; 650 } 651 652 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 653 } 654 655 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) 656 { 657 uint64_t features; 658 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 659 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; 660 int r; 661 662 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { 663 return -EFAULT; 664 } 665 666 features &= f; 667 668 if (vhost_vdpa_one_time_request(dev)) { 669 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); 670 if (r) { 671 return -EFAULT; 672 } 673 } 674 675 dev->backend_cap = features; 676 677 return 0; 678 } 679 680 static int vhost_vdpa_get_device_id(struct vhost_dev *dev, 681 uint32_t *device_id) 682 { 683 int ret; 684 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 685 trace_vhost_vdpa_get_device_id(dev, *device_id); 686 return ret; 687 } 688 689 static void vhost_vdpa_reset_svq(struct vhost_vdpa *v) 690 { 691 if (!v->shadow_vqs_enabled) { 692 return; 693 } 694 695 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { 696 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 697 vhost_svq_stop(svq); 698 } 699 } 700 701 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 702 { 703 struct vhost_vdpa *v = dev->opaque; 704 int ret; 705 uint8_t status = 0; 706 707 vhost_vdpa_reset_svq(v); 708 709 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 710 trace_vhost_vdpa_reset_device(dev, status); 711 return ret; 712 } 713 714 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 715 { 716 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 717 718 trace_vhost_vdpa_get_vq_index(dev, idx, idx); 719 return idx; 720 } 721 722 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 723 { 724 int i; 725 trace_vhost_vdpa_set_vring_ready(dev); 726 for (i = 0; i < dev->nvqs; ++i) { 727 struct vhost_vring_state state = { 728 .index = dev->vq_index + i, 729 .num = 1, 730 }; 731 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 732 } 733 return 0; 734 } 735 736 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, 737 uint32_t config_len) 738 { 739 int b, len; 740 char line[QEMU_HEXDUMP_LINE_LEN]; 741 742 for (b = 0; b < config_len; b += 16) { 743 len = config_len - b; 744 qemu_hexdump_line(line, b, config, len, false); 745 trace_vhost_vdpa_dump_config(dev, line); 746 } 747 } 748 749 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 750 uint32_t offset, uint32_t size, 751 uint32_t flags) 752 { 753 struct vhost_vdpa_config *config; 754 int ret; 755 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 756 757 trace_vhost_vdpa_set_config(dev, offset, size, flags); 758 config = g_malloc(size + config_size); 759 config->off = offset; 760 config->len = size; 761 memcpy(config->buf, data, size); 762 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && 763 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 764 vhost_vdpa_dump_config(dev, data, size); 765 } 766 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 767 g_free(config); 768 return ret; 769 } 770 771 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 772 uint32_t config_len, Error **errp) 773 { 774 struct vhost_vdpa_config *v_config; 775 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 776 int ret; 777 778 trace_vhost_vdpa_get_config(dev, config, config_len); 779 v_config = g_malloc(config_len + config_size); 780 v_config->len = config_len; 781 v_config->off = 0; 782 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 783 memcpy(config, v_config->buf, config_len); 784 g_free(v_config); 785 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && 786 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 787 vhost_vdpa_dump_config(dev, config, config_len); 788 } 789 return ret; 790 } 791 792 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev, 793 struct vhost_vring_state *ring) 794 { 795 trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); 796 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 797 } 798 799 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev, 800 struct vhost_vring_file *file) 801 { 802 trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); 803 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 804 } 805 806 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev, 807 struct vhost_vring_file *file) 808 { 809 trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); 810 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 811 } 812 813 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, 814 struct vhost_vring_addr *addr) 815 { 816 trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, 817 addr->desc_user_addr, addr->used_user_addr, 818 addr->avail_user_addr, 819 addr->log_guest_addr); 820 821 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 822 823 } 824 825 /** 826 * Set the shadow virtqueue descriptors to the device 827 * 828 * @dev: The vhost device model 829 * @svq: The shadow virtqueue 830 * @idx: The index of the virtqueue in the vhost device 831 * @errp: Error 832 * 833 * Note that this function does not rewind kick file descriptor if cannot set 834 * call one. 835 */ 836 static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, 837 VhostShadowVirtqueue *svq, unsigned idx, 838 Error **errp) 839 { 840 struct vhost_vring_file file = { 841 .index = dev->vq_index + idx, 842 }; 843 const EventNotifier *event_notifier = &svq->hdev_kick; 844 int r; 845 846 file.fd = event_notifier_get_fd(event_notifier); 847 r = vhost_vdpa_set_vring_dev_kick(dev, &file); 848 if (unlikely(r != 0)) { 849 error_setg_errno(errp, -r, "Can't set device kick fd"); 850 return r; 851 } 852 853 event_notifier = &svq->hdev_call; 854 file.fd = event_notifier_get_fd(event_notifier); 855 r = vhost_vdpa_set_vring_dev_call(dev, &file); 856 if (unlikely(r != 0)) { 857 error_setg_errno(errp, -r, "Can't set device call fd"); 858 } 859 860 return r; 861 } 862 863 /** 864 * Unmap a SVQ area in the device 865 */ 866 static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, 867 const DMAMap *needle) 868 { 869 const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); 870 hwaddr size; 871 int r; 872 873 if (unlikely(!result)) { 874 error_report("Unable to find SVQ address to unmap"); 875 return false; 876 } 877 878 size = ROUND_UP(result->size, qemu_real_host_page_size()); 879 r = vhost_vdpa_dma_unmap(v, result->iova, size); 880 return r == 0; 881 } 882 883 static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, 884 const VhostShadowVirtqueue *svq) 885 { 886 DMAMap needle = {}; 887 struct vhost_vdpa *v = dev->opaque; 888 struct vhost_vring_addr svq_addr; 889 bool ok; 890 891 vhost_svq_get_vring_addr(svq, &svq_addr); 892 893 needle.translated_addr = svq_addr.desc_user_addr; 894 ok = vhost_vdpa_svq_unmap_ring(v, &needle); 895 if (unlikely(!ok)) { 896 return false; 897 } 898 899 needle.translated_addr = svq_addr.used_user_addr; 900 return vhost_vdpa_svq_unmap_ring(v, &needle); 901 } 902 903 /** 904 * Map the SVQ area in the device 905 * 906 * @v: Vhost-vdpa device 907 * @needle: The area to search iova 908 * @errorp: Error pointer 909 */ 910 static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, 911 Error **errp) 912 { 913 int r; 914 915 r = vhost_iova_tree_map_alloc(v->iova_tree, needle); 916 if (unlikely(r != IOVA_OK)) { 917 error_setg(errp, "Cannot allocate iova (%d)", r); 918 return false; 919 } 920 921 r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, 922 (void *)(uintptr_t)needle->translated_addr, 923 needle->perm == IOMMU_RO); 924 if (unlikely(r != 0)) { 925 error_setg_errno(errp, -r, "Cannot map region to device"); 926 vhost_iova_tree_remove(v->iova_tree, needle); 927 } 928 929 return r == 0; 930 } 931 932 /** 933 * Map the shadow virtqueue rings in the device 934 * 935 * @dev: The vhost device 936 * @svq: The shadow virtqueue 937 * @addr: Assigned IOVA addresses 938 * @errp: Error pointer 939 */ 940 static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, 941 const VhostShadowVirtqueue *svq, 942 struct vhost_vring_addr *addr, 943 Error **errp) 944 { 945 DMAMap device_region, driver_region; 946 struct vhost_vring_addr svq_addr; 947 struct vhost_vdpa *v = dev->opaque; 948 size_t device_size = vhost_svq_device_area_size(svq); 949 size_t driver_size = vhost_svq_driver_area_size(svq); 950 size_t avail_offset; 951 bool ok; 952 953 ERRP_GUARD(); 954 vhost_svq_get_vring_addr(svq, &svq_addr); 955 956 driver_region = (DMAMap) { 957 .translated_addr = svq_addr.desc_user_addr, 958 .size = driver_size - 1, 959 .perm = IOMMU_RO, 960 }; 961 ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); 962 if (unlikely(!ok)) { 963 error_prepend(errp, "Cannot create vq driver region: "); 964 return false; 965 } 966 addr->desc_user_addr = driver_region.iova; 967 avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; 968 addr->avail_user_addr = driver_region.iova + avail_offset; 969 970 device_region = (DMAMap) { 971 .translated_addr = svq_addr.used_user_addr, 972 .size = device_size - 1, 973 .perm = IOMMU_RW, 974 }; 975 ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); 976 if (unlikely(!ok)) { 977 error_prepend(errp, "Cannot create vq device region: "); 978 vhost_vdpa_svq_unmap_ring(v, &driver_region); 979 } 980 addr->used_user_addr = device_region.iova; 981 982 return ok; 983 } 984 985 static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, 986 VhostShadowVirtqueue *svq, unsigned idx, 987 Error **errp) 988 { 989 uint16_t vq_index = dev->vq_index + idx; 990 struct vhost_vring_state s = { 991 .index = vq_index, 992 }; 993 int r; 994 995 r = vhost_vdpa_set_dev_vring_base(dev, &s); 996 if (unlikely(r)) { 997 error_setg_errno(errp, -r, "Cannot set vring base"); 998 return false; 999 } 1000 1001 r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp); 1002 return r == 0; 1003 } 1004 1005 static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) 1006 { 1007 struct vhost_vdpa *v = dev->opaque; 1008 Error *err = NULL; 1009 unsigned i; 1010 1011 if (!v->shadow_vqs) { 1012 return true; 1013 } 1014 1015 for (i = 0; i < v->shadow_vqs->len; ++i) { 1016 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); 1017 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 1018 struct vhost_vring_addr addr = { 1019 .index = i, 1020 }; 1021 int r; 1022 bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); 1023 if (unlikely(!ok)) { 1024 goto err; 1025 } 1026 1027 vhost_svq_start(svq, dev->vdev, vq); 1028 ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); 1029 if (unlikely(!ok)) { 1030 goto err_map; 1031 } 1032 1033 /* Override vring GPA set by vhost subsystem */ 1034 r = vhost_vdpa_set_vring_dev_addr(dev, &addr); 1035 if (unlikely(r != 0)) { 1036 error_setg_errno(&err, -r, "Cannot set device address"); 1037 goto err_set_addr; 1038 } 1039 } 1040 1041 return true; 1042 1043 err_set_addr: 1044 vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i)); 1045 1046 err_map: 1047 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i)); 1048 1049 err: 1050 error_reportf_err(err, "Cannot setup SVQ %u: ", i); 1051 for (unsigned j = 0; j < i; ++j) { 1052 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j); 1053 vhost_vdpa_svq_unmap_rings(dev, svq); 1054 vhost_svq_stop(svq); 1055 } 1056 1057 return false; 1058 } 1059 1060 static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) 1061 { 1062 struct vhost_vdpa *v = dev->opaque; 1063 1064 if (!v->shadow_vqs) { 1065 return true; 1066 } 1067 1068 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { 1069 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 1070 bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); 1071 if (unlikely(!ok)) { 1072 return false; 1073 } 1074 } 1075 1076 return true; 1077 } 1078 1079 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 1080 { 1081 struct vhost_vdpa *v = dev->opaque; 1082 bool ok; 1083 trace_vhost_vdpa_dev_start(dev, started); 1084 1085 if (started) { 1086 vhost_vdpa_host_notifiers_init(dev); 1087 ok = vhost_vdpa_svqs_start(dev); 1088 if (unlikely(!ok)) { 1089 return -1; 1090 } 1091 vhost_vdpa_set_vring_ready(dev); 1092 } else { 1093 ok = vhost_vdpa_svqs_stop(dev); 1094 if (unlikely(!ok)) { 1095 return -1; 1096 } 1097 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 1098 } 1099 1100 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 1101 return 0; 1102 } 1103 1104 if (started) { 1105 memory_listener_register(&v->listener, &address_space_memory); 1106 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 1107 } else { 1108 vhost_vdpa_reset_device(dev); 1109 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 1110 VIRTIO_CONFIG_S_DRIVER); 1111 memory_listener_unregister(&v->listener); 1112 1113 return 0; 1114 } 1115 } 1116 1117 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 1118 struct vhost_log *log) 1119 { 1120 struct vhost_vdpa *v = dev->opaque; 1121 if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { 1122 return 0; 1123 } 1124 1125 trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, 1126 log->log); 1127 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 1128 } 1129 1130 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 1131 struct vhost_vring_addr *addr) 1132 { 1133 struct vhost_vdpa *v = dev->opaque; 1134 1135 if (v->shadow_vqs_enabled) { 1136 /* 1137 * Device vring addr was set at device start. SVQ base is handled by 1138 * VirtQueue code. 1139 */ 1140 return 0; 1141 } 1142 1143 return vhost_vdpa_set_vring_dev_addr(dev, addr); 1144 } 1145 1146 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 1147 struct vhost_vring_state *ring) 1148 { 1149 trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); 1150 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 1151 } 1152 1153 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 1154 struct vhost_vring_state *ring) 1155 { 1156 struct vhost_vdpa *v = dev->opaque; 1157 1158 if (v->shadow_vqs_enabled) { 1159 /* 1160 * Device vring base was set at device start. SVQ base is handled by 1161 * VirtQueue code. 1162 */ 1163 return 0; 1164 } 1165 1166 return vhost_vdpa_set_dev_vring_base(dev, ring); 1167 } 1168 1169 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 1170 struct vhost_vring_state *ring) 1171 { 1172 struct vhost_vdpa *v = dev->opaque; 1173 int ret; 1174 1175 if (v->shadow_vqs_enabled) { 1176 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, 1177 ring->index); 1178 1179 /* 1180 * Setting base as last used idx, so destination will see as available 1181 * all the entries that the device did not use, including the in-flight 1182 * processing ones. 1183 * 1184 * TODO: This is ok for networking, but other kinds of devices might 1185 * have problems with these retransmissions. 1186 */ 1187 ring->num = svq->last_used_idx; 1188 return 0; 1189 } 1190 1191 ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 1192 trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); 1193 return ret; 1194 } 1195 1196 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 1197 struct vhost_vring_file *file) 1198 { 1199 struct vhost_vdpa *v = dev->opaque; 1200 int vdpa_idx = file->index - dev->vq_index; 1201 1202 if (v->shadow_vqs_enabled) { 1203 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); 1204 vhost_svq_set_svq_kick_fd(svq, file->fd); 1205 return 0; 1206 } else { 1207 return vhost_vdpa_set_vring_dev_kick(dev, file); 1208 } 1209 } 1210 1211 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 1212 struct vhost_vring_file *file) 1213 { 1214 struct vhost_vdpa *v = dev->opaque; 1215 1216 if (v->shadow_vqs_enabled) { 1217 int vdpa_idx = file->index - dev->vq_index; 1218 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); 1219 1220 vhost_svq_set_svq_call_fd(svq, file->fd); 1221 return 0; 1222 } else { 1223 return vhost_vdpa_set_vring_dev_call(dev, file); 1224 } 1225 } 1226 1227 static int vhost_vdpa_get_features(struct vhost_dev *dev, 1228 uint64_t *features) 1229 { 1230 struct vhost_vdpa *v = dev->opaque; 1231 int ret = vhost_vdpa_get_dev_features(dev, features); 1232 1233 if (ret == 0 && v->shadow_vqs_enabled) { 1234 /* Add SVQ logging capabilities */ 1235 *features |= BIT_ULL(VHOST_F_LOG_ALL); 1236 } 1237 1238 return ret; 1239 } 1240 1241 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 1242 { 1243 if (vhost_vdpa_one_time_request(dev)) { 1244 return 0; 1245 } 1246 1247 trace_vhost_vdpa_set_owner(dev); 1248 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 1249 } 1250 1251 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 1252 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 1253 { 1254 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 1255 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 1256 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 1257 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 1258 trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, 1259 addr->avail_user_addr, addr->used_user_addr); 1260 return 0; 1261 } 1262 1263 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 1264 { 1265 return true; 1266 } 1267 1268 const VhostOps vdpa_ops = { 1269 .backend_type = VHOST_BACKEND_TYPE_VDPA, 1270 .vhost_backend_init = vhost_vdpa_init, 1271 .vhost_backend_cleanup = vhost_vdpa_cleanup, 1272 .vhost_set_log_base = vhost_vdpa_set_log_base, 1273 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 1274 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 1275 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 1276 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 1277 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 1278 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 1279 .vhost_get_features = vhost_vdpa_get_features, 1280 .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, 1281 .vhost_set_owner = vhost_vdpa_set_owner, 1282 .vhost_set_vring_endian = NULL, 1283 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 1284 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 1285 .vhost_set_features = vhost_vdpa_set_features, 1286 .vhost_reset_device = vhost_vdpa_reset_device, 1287 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 1288 .vhost_get_config = vhost_vdpa_get_config, 1289 .vhost_set_config = vhost_vdpa_set_config, 1290 .vhost_requires_shm_log = NULL, 1291 .vhost_migration_done = NULL, 1292 .vhost_backend_can_merge = NULL, 1293 .vhost_net_set_mtu = NULL, 1294 .vhost_set_iotlb_callback = NULL, 1295 .vhost_send_device_iotlb_msg = NULL, 1296 .vhost_dev_start = vhost_vdpa_dev_start, 1297 .vhost_get_device_id = vhost_vdpa_get_device_id, 1298 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 1299 .vhost_force_iommu = vhost_vdpa_force_iommu, 1300 }; 1301