1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-shadow-virtqueue.h" 21 #include "hw/virtio/vhost-vdpa.h" 22 #include "exec/address-spaces.h" 23 #include "migration/blocker.h" 24 #include "qemu/cutils.h" 25 #include "qemu/main-loop.h" 26 #include "cpu.h" 27 #include "trace.h" 28 #include "qapi/error.h" 29 30 /* 31 * Return one past the end of the end of section. Be careful with uint64_t 32 * conversions! 33 */ 34 static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) 35 { 36 Int128 llend = int128_make64(section->offset_within_address_space); 37 llend = int128_add(llend, section->size); 38 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 39 40 return llend; 41 } 42 43 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, 44 uint64_t iova_min, 45 uint64_t iova_max) 46 { 47 Int128 llend; 48 49 if ((!memory_region_is_ram(section->mr) && 50 !memory_region_is_iommu(section->mr)) || 51 memory_region_is_protected(section->mr) || 52 /* vhost-vDPA doesn't allow MMIO to be mapped */ 53 memory_region_is_ram_device(section->mr)) { 54 return true; 55 } 56 57 if (section->offset_within_address_space < iova_min) { 58 error_report("RAM section out of device range (min=0x%" PRIx64 59 ", addr=0x%" HWADDR_PRIx ")", 60 iova_min, section->offset_within_address_space); 61 return true; 62 } 63 /* 64 * While using vIOMMU, sometimes the section will be larger than iova_max, 65 * but the memory that actually maps is smaller, so move the check to 66 * function vhost_vdpa_iommu_map_notify(). That function will use the actual 67 * size that maps to the kernel 68 */ 69 70 if (!memory_region_is_iommu(section->mr)) { 71 llend = vhost_vdpa_section_end(section); 72 if (int128_gt(llend, int128_make64(iova_max))) { 73 error_report("RAM section out of device range (max=0x%" PRIx64 74 ", end addr=0x%" PRIx64 ")", 75 iova_max, int128_get64(llend)); 76 return true; 77 } 78 } 79 80 return false; 81 } 82 83 /* 84 * The caller must set asid = 0 if the device does not support asid. 85 * This is not an ABI break since it is set to 0 by the initializer anyway. 86 */ 87 int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, 88 hwaddr size, void *vaddr, bool readonly) 89 { 90 struct vhost_msg_v2 msg = {}; 91 int fd = v->device_fd; 92 int ret = 0; 93 94 msg.type = v->msg_type; 95 msg.asid = asid; 96 msg.iotlb.iova = iova; 97 msg.iotlb.size = size; 98 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 99 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 100 msg.iotlb.type = VHOST_IOTLB_UPDATE; 101 102 trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova, 103 msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, 104 msg.iotlb.type); 105 106 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 107 error_report("failed to write, fd=%d, errno=%d (%s)", 108 fd, errno, strerror(errno)); 109 return -EIO ; 110 } 111 112 return ret; 113 } 114 115 /* 116 * The caller must set asid = 0 if the device does not support asid. 117 * This is not an ABI break since it is set to 0 by the initializer anyway. 118 */ 119 int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, 120 hwaddr size) 121 { 122 struct vhost_msg_v2 msg = {}; 123 int fd = v->device_fd; 124 int ret = 0; 125 126 msg.type = v->msg_type; 127 msg.asid = asid; 128 msg.iotlb.iova = iova; 129 msg.iotlb.size = size; 130 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 131 132 trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova, 133 msg.iotlb.size, msg.iotlb.type); 134 135 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 136 error_report("failed to write, fd=%d, errno=%d (%s)", 137 fd, errno, strerror(errno)); 138 return -EIO ; 139 } 140 141 return ret; 142 } 143 144 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) 145 { 146 int fd = v->device_fd; 147 struct vhost_msg_v2 msg = { 148 .type = v->msg_type, 149 .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, 150 }; 151 152 trace_vhost_vdpa_listener_begin_batch(v, fd, msg.type, msg.iotlb.type); 153 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 154 error_report("failed to write, fd=%d, errno=%d (%s)", 155 fd, errno, strerror(errno)); 156 } 157 } 158 159 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) 160 { 161 if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && 162 !v->iotlb_batch_begin_sent) { 163 vhost_vdpa_listener_begin_batch(v); 164 } 165 166 v->iotlb_batch_begin_sent = true; 167 } 168 169 static void vhost_vdpa_listener_commit(MemoryListener *listener) 170 { 171 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 172 struct vhost_dev *dev = v->dev; 173 struct vhost_msg_v2 msg = {}; 174 int fd = v->device_fd; 175 176 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 177 return; 178 } 179 180 if (!v->iotlb_batch_begin_sent) { 181 return; 182 } 183 184 msg.type = v->msg_type; 185 msg.iotlb.type = VHOST_IOTLB_BATCH_END; 186 187 trace_vhost_vdpa_listener_commit(v, fd, msg.type, msg.iotlb.type); 188 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 189 error_report("failed to write, fd=%d, errno=%d (%s)", 190 fd, errno, strerror(errno)); 191 } 192 193 v->iotlb_batch_begin_sent = false; 194 } 195 196 static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) 197 { 198 struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n); 199 200 hwaddr iova = iotlb->iova + iommu->iommu_offset; 201 struct vhost_vdpa *v = iommu->dev; 202 void *vaddr; 203 int ret; 204 Int128 llend; 205 206 if (iotlb->target_as != &address_space_memory) { 207 error_report("Wrong target AS \"%s\", only system memory is allowed", 208 iotlb->target_as->name ? iotlb->target_as->name : "none"); 209 return; 210 } 211 RCU_READ_LOCK_GUARD(); 212 /* check if RAM section out of device range */ 213 llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova)); 214 if (int128_gt(llend, int128_make64(v->iova_range.last))) { 215 error_report("RAM section out of device range (max=0x%" PRIx64 216 ", end addr=0x%" PRIx64 ")", 217 v->iova_range.last, int128_get64(llend)); 218 return; 219 } 220 221 if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { 222 bool read_only; 223 224 if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) { 225 return; 226 } 227 ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, 228 iotlb->addr_mask + 1, vaddr, read_only); 229 if (ret) { 230 error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", " 231 "0x%" HWADDR_PRIx ", %p) = %d (%m)", 232 v, iova, iotlb->addr_mask + 1, vaddr, ret); 233 } 234 } else { 235 ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, 236 iotlb->addr_mask + 1); 237 if (ret) { 238 error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " 239 "0x%" HWADDR_PRIx ") = %d (%m)", 240 v, iova, iotlb->addr_mask + 1, ret); 241 } 242 } 243 } 244 245 static void vhost_vdpa_iommu_region_add(MemoryListener *listener, 246 MemoryRegionSection *section) 247 { 248 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 249 250 struct vdpa_iommu *iommu; 251 Int128 end; 252 int iommu_idx; 253 IOMMUMemoryRegion *iommu_mr; 254 int ret; 255 256 iommu_mr = IOMMU_MEMORY_REGION(section->mr); 257 258 iommu = g_malloc0(sizeof(*iommu)); 259 end = int128_add(int128_make64(section->offset_within_region), 260 section->size); 261 end = int128_sub(end, int128_one()); 262 iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, 263 MEMTXATTRS_UNSPECIFIED); 264 iommu->iommu_mr = iommu_mr; 265 iommu_notifier_init(&iommu->n, vhost_vdpa_iommu_map_notify, 266 IOMMU_NOTIFIER_IOTLB_EVENTS, 267 section->offset_within_region, 268 int128_get64(end), 269 iommu_idx); 270 iommu->iommu_offset = section->offset_within_address_space - 271 section->offset_within_region; 272 iommu->dev = v; 273 274 ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL); 275 if (ret) { 276 g_free(iommu); 277 return; 278 } 279 280 QLIST_INSERT_HEAD(&v->iommu_list, iommu, iommu_next); 281 memory_region_iommu_replay(iommu->iommu_mr, &iommu->n); 282 283 return; 284 } 285 286 static void vhost_vdpa_iommu_region_del(MemoryListener *listener, 287 MemoryRegionSection *section) 288 { 289 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 290 291 struct vdpa_iommu *iommu; 292 293 QLIST_FOREACH(iommu, &v->iommu_list, iommu_next) 294 { 295 if (MEMORY_REGION(iommu->iommu_mr) == section->mr && 296 iommu->n.start == section->offset_within_region) { 297 memory_region_unregister_iommu_notifier(section->mr, &iommu->n); 298 QLIST_REMOVE(iommu, iommu_next); 299 g_free(iommu); 300 break; 301 } 302 } 303 } 304 305 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 306 MemoryRegionSection *section) 307 { 308 DMAMap mem_region = {}; 309 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 310 hwaddr iova; 311 Int128 llend, llsize; 312 void *vaddr; 313 int ret; 314 315 if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 316 v->iova_range.last)) { 317 return; 318 } 319 if (memory_region_is_iommu(section->mr)) { 320 vhost_vdpa_iommu_region_add(listener, section); 321 return; 322 } 323 324 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 325 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 326 trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, 327 section->offset_within_address_space & ~TARGET_PAGE_MASK, 328 section->offset_within_region & ~TARGET_PAGE_MASK); 329 return; 330 } 331 332 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 333 llend = vhost_vdpa_section_end(section); 334 if (int128_ge(int128_make64(iova), llend)) { 335 return; 336 } 337 338 memory_region_ref(section->mr); 339 340 /* Here we assume that memory_region_is_ram(section->mr)==true */ 341 342 vaddr = memory_region_get_ram_ptr(section->mr) + 343 section->offset_within_region + 344 (iova - section->offset_within_address_space); 345 346 trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), 347 vaddr, section->readonly); 348 349 llsize = int128_sub(llend, int128_make64(iova)); 350 if (v->shadow_data) { 351 int r; 352 353 mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, 354 mem_region.size = int128_get64(llsize) - 1, 355 mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), 356 357 r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); 358 if (unlikely(r != IOVA_OK)) { 359 error_report("Can't allocate a mapping (%d)", r); 360 goto fail; 361 } 362 363 iova = mem_region.iova; 364 } 365 366 vhost_vdpa_iotlb_batch_begin_once(v); 367 ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, 368 int128_get64(llsize), vaddr, section->readonly); 369 if (ret) { 370 error_report("vhost vdpa map fail!"); 371 goto fail_map; 372 } 373 374 return; 375 376 fail_map: 377 if (v->shadow_data) { 378 vhost_iova_tree_remove(v->iova_tree, mem_region); 379 } 380 381 fail: 382 /* 383 * On the initfn path, store the first error in the container so we 384 * can gracefully fail. Runtime, there's not much we can do other 385 * than throw a hardware error. 386 */ 387 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 388 return; 389 390 } 391 392 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 393 MemoryRegionSection *section) 394 { 395 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 396 hwaddr iova; 397 Int128 llend, llsize; 398 int ret; 399 400 if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, 401 v->iova_range.last)) { 402 return; 403 } 404 if (memory_region_is_iommu(section->mr)) { 405 vhost_vdpa_iommu_region_del(listener, section); 406 } 407 408 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 409 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 410 trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, 411 section->offset_within_address_space & ~TARGET_PAGE_MASK, 412 section->offset_within_region & ~TARGET_PAGE_MASK); 413 return; 414 } 415 416 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 417 llend = vhost_vdpa_section_end(section); 418 419 trace_vhost_vdpa_listener_region_del(v, iova, 420 int128_get64(int128_sub(llend, int128_one()))); 421 422 if (int128_ge(int128_make64(iova), llend)) { 423 return; 424 } 425 426 llsize = int128_sub(llend, int128_make64(iova)); 427 428 if (v->shadow_data) { 429 const DMAMap *result; 430 const void *vaddr = memory_region_get_ram_ptr(section->mr) + 431 section->offset_within_region + 432 (iova - section->offset_within_address_space); 433 DMAMap mem_region = { 434 .translated_addr = (hwaddr)(uintptr_t)vaddr, 435 .size = int128_get64(llsize) - 1, 436 }; 437 438 result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); 439 if (!result) { 440 /* The memory listener map wasn't mapped */ 441 return; 442 } 443 iova = result->iova; 444 vhost_iova_tree_remove(v->iova_tree, *result); 445 } 446 vhost_vdpa_iotlb_batch_begin_once(v); 447 /* 448 * The unmap ioctl doesn't accept a full 64-bit. need to check it 449 */ 450 if (int128_eq(llsize, int128_2_64())) { 451 llsize = int128_rshift(llsize, 1); 452 ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, 453 int128_get64(llsize)); 454 455 if (ret) { 456 error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " 457 "0x%" HWADDR_PRIx ") = %d (%m)", 458 v, iova, int128_get64(llsize), ret); 459 } 460 iova += int128_get64(llsize); 461 } 462 ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, 463 int128_get64(llsize)); 464 465 if (ret) { 466 error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " 467 "0x%" HWADDR_PRIx ") = %d (%m)", 468 v, iova, int128_get64(llsize), ret); 469 } 470 471 memory_region_unref(section->mr); 472 } 473 /* 474 * IOTLB API is used by vhost-vdpa which requires incremental updating 475 * of the mapping. So we can not use generic vhost memory listener which 476 * depends on the addnop(). 477 */ 478 static const MemoryListener vhost_vdpa_memory_listener = { 479 .name = "vhost-vdpa", 480 .commit = vhost_vdpa_listener_commit, 481 .region_add = vhost_vdpa_listener_region_add, 482 .region_del = vhost_vdpa_listener_region_del, 483 }; 484 485 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 486 void *arg) 487 { 488 struct vhost_vdpa *v = dev->opaque; 489 int fd = v->device_fd; 490 int ret; 491 492 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 493 494 ret = ioctl(fd, request, arg); 495 return ret < 0 ? -errno : ret; 496 } 497 498 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 499 { 500 uint8_t s; 501 int ret; 502 503 trace_vhost_vdpa_add_status(dev, status); 504 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); 505 if (ret < 0) { 506 return ret; 507 } 508 509 s |= status; 510 511 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 512 if (ret < 0) { 513 return ret; 514 } 515 516 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); 517 if (ret < 0) { 518 return ret; 519 } 520 521 if (!(s & status)) { 522 return -EIO; 523 } 524 525 return 0; 526 } 527 528 int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range) 529 { 530 int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); 531 532 return ret < 0 ? -errno : 0; 533 } 534 535 /* 536 * The use of this function is for requests that only need to be 537 * applied once. Typically such request occurs at the beginning 538 * of operation, and before setting up queues. It should not be 539 * used for request that performs operation until all queues are 540 * set, which would need to check dev->vq_index_end instead. 541 */ 542 static bool vhost_vdpa_first_dev(struct vhost_dev *dev) 543 { 544 struct vhost_vdpa *v = dev->opaque; 545 546 return v->index == 0; 547 } 548 549 static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, 550 uint64_t *features) 551 { 552 int ret; 553 554 ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 555 trace_vhost_vdpa_get_features(dev, *features); 556 return ret; 557 } 558 559 static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) 560 { 561 g_autoptr(GPtrArray) shadow_vqs = NULL; 562 563 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); 564 for (unsigned n = 0; n < hdev->nvqs; ++n) { 565 VhostShadowVirtqueue *svq; 566 567 svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque); 568 g_ptr_array_add(shadow_vqs, svq); 569 } 570 571 v->shadow_vqs = g_steal_pointer(&shadow_vqs); 572 } 573 574 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) 575 { 576 struct vhost_vdpa *v; 577 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 578 trace_vhost_vdpa_init(dev, opaque); 579 int ret; 580 581 v = opaque; 582 v->dev = dev; 583 dev->opaque = opaque ; 584 v->listener = vhost_vdpa_memory_listener; 585 v->msg_type = VHOST_IOTLB_MSG_V2; 586 vhost_vdpa_init_svq(dev, v); 587 588 error_propagate(&dev->migration_blocker, v->migration_blocker); 589 if (!vhost_vdpa_first_dev(dev)) { 590 return 0; 591 } 592 593 /* 594 * If dev->shadow_vqs_enabled at initialization that means the device has 595 * been started with x-svq=on, so don't block migration 596 */ 597 if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) { 598 /* We don't have dev->features yet */ 599 uint64_t features; 600 ret = vhost_vdpa_get_dev_features(dev, &features); 601 if (unlikely(ret)) { 602 error_setg_errno(errp, -ret, "Could not get device features"); 603 return ret; 604 } 605 vhost_svq_valid_features(features, &dev->migration_blocker); 606 } 607 608 /* 609 * Similar to VFIO, we end up pinning all guest memory and have to 610 * disable discarding of RAM. 611 */ 612 ret = ram_block_discard_disable(true); 613 if (ret) { 614 error_report("Cannot set discarding of RAM broken"); 615 return ret; 616 } 617 618 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 619 VIRTIO_CONFIG_S_DRIVER); 620 621 return 0; 622 } 623 624 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, 625 int queue_index) 626 { 627 size_t page_size = qemu_real_host_page_size(); 628 struct vhost_vdpa *v = dev->opaque; 629 VirtIODevice *vdev = dev->vdev; 630 VhostVDPAHostNotifier *n; 631 632 n = &v->notifier[queue_index]; 633 634 if (n->addr) { 635 virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false); 636 object_unparent(OBJECT(&n->mr)); 637 munmap(n->addr, page_size); 638 n->addr = NULL; 639 } 640 } 641 642 static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) 643 { 644 size_t page_size = qemu_real_host_page_size(); 645 struct vhost_vdpa *v = dev->opaque; 646 VirtIODevice *vdev = dev->vdev; 647 VhostVDPAHostNotifier *n; 648 int fd = v->device_fd; 649 void *addr; 650 char *name; 651 652 vhost_vdpa_host_notifier_uninit(dev, queue_index); 653 654 n = &v->notifier[queue_index]; 655 656 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 657 queue_index * page_size); 658 if (addr == MAP_FAILED) { 659 goto err; 660 } 661 662 name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]", 663 v, queue_index); 664 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 665 page_size, addr); 666 g_free(name); 667 668 if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { 669 object_unparent(OBJECT(&n->mr)); 670 munmap(addr, page_size); 671 goto err; 672 } 673 n->addr = addr; 674 675 return 0; 676 677 err: 678 return -1; 679 } 680 681 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) 682 { 683 int i; 684 685 /* 686 * Pack all the changes to the memory regions in a single 687 * transaction to avoid a few updating of the address space 688 * topology. 689 */ 690 memory_region_transaction_begin(); 691 692 for (i = dev->vq_index; i < dev->vq_index + n; i++) { 693 vhost_vdpa_host_notifier_uninit(dev, i); 694 } 695 696 memory_region_transaction_commit(); 697 } 698 699 static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) 700 { 701 struct vhost_vdpa *v = dev->opaque; 702 int i; 703 704 if (v->shadow_vqs_enabled) { 705 /* FIXME SVQ is not compatible with host notifiers mr */ 706 return; 707 } 708 709 /* 710 * Pack all the changes to the memory regions in a single 711 * transaction to avoid a few updating of the address space 712 * topology. 713 */ 714 memory_region_transaction_begin(); 715 716 for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { 717 if (vhost_vdpa_host_notifier_init(dev, i)) { 718 vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index); 719 break; 720 } 721 } 722 723 memory_region_transaction_commit(); 724 } 725 726 static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) 727 { 728 struct vhost_vdpa *v = dev->opaque; 729 size_t idx; 730 731 for (idx = 0; idx < v->shadow_vqs->len; ++idx) { 732 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); 733 } 734 g_ptr_array_free(v->shadow_vqs, true); 735 } 736 737 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 738 { 739 struct vhost_vdpa *v; 740 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 741 v = dev->opaque; 742 trace_vhost_vdpa_cleanup(dev, v); 743 if (vhost_vdpa_first_dev(dev)) { 744 ram_block_discard_disable(false); 745 } 746 747 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 748 memory_listener_unregister(&v->listener); 749 vhost_vdpa_svq_cleanup(dev); 750 751 dev->opaque = NULL; 752 753 return 0; 754 } 755 756 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 757 { 758 trace_vhost_vdpa_memslots_limit(dev, INT_MAX); 759 return INT_MAX; 760 } 761 762 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 763 struct vhost_memory *mem) 764 { 765 if (!vhost_vdpa_first_dev(dev)) { 766 return 0; 767 } 768 769 trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); 770 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && 771 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { 772 int i; 773 for (i = 0; i < mem->nregions; i++) { 774 trace_vhost_vdpa_dump_regions(dev, i, 775 mem->regions[i].guest_phys_addr, 776 mem->regions[i].memory_size, 777 mem->regions[i].userspace_addr, 778 mem->regions[i].flags_padding); 779 } 780 } 781 if (mem->padding) { 782 return -EINVAL; 783 } 784 785 return 0; 786 } 787 788 static int vhost_vdpa_set_features(struct vhost_dev *dev, 789 uint64_t features) 790 { 791 struct vhost_vdpa *v = dev->opaque; 792 int ret; 793 794 if (!vhost_vdpa_first_dev(dev)) { 795 return 0; 796 } 797 798 if (v->shadow_vqs_enabled) { 799 if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) { 800 /* 801 * QEMU is just trying to enable or disable logging. SVQ handles 802 * this sepparately, so no need to forward this. 803 */ 804 v->acked_features = features; 805 return 0; 806 } 807 808 v->acked_features = features; 809 810 /* We must not ack _F_LOG if SVQ is enabled */ 811 features &= ~BIT_ULL(VHOST_F_LOG_ALL); 812 } 813 814 trace_vhost_vdpa_set_features(dev, features); 815 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 816 if (ret) { 817 return ret; 818 } 819 820 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 821 } 822 823 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) 824 { 825 uint64_t features; 826 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 827 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | 828 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | 829 0x1ULL << VHOST_BACKEND_F_SUSPEND; 830 int r; 831 832 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { 833 return -EFAULT; 834 } 835 836 features &= f; 837 838 if (vhost_vdpa_first_dev(dev)) { 839 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); 840 if (r) { 841 return -EFAULT; 842 } 843 } 844 845 dev->backend_cap = features; 846 847 return 0; 848 } 849 850 static int vhost_vdpa_get_device_id(struct vhost_dev *dev, 851 uint32_t *device_id) 852 { 853 int ret; 854 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 855 trace_vhost_vdpa_get_device_id(dev, *device_id); 856 return ret; 857 } 858 859 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 860 { 861 struct vhost_vdpa *v = dev->opaque; 862 int ret; 863 uint8_t status = 0; 864 865 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 866 trace_vhost_vdpa_reset_device(dev); 867 v->suspended = false; 868 return ret; 869 } 870 871 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 872 { 873 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 874 875 trace_vhost_vdpa_get_vq_index(dev, idx, idx); 876 return idx; 877 } 878 879 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 880 { 881 int i; 882 trace_vhost_vdpa_set_vring_ready(dev); 883 for (i = 0; i < dev->nvqs; ++i) { 884 struct vhost_vring_state state = { 885 .index = dev->vq_index + i, 886 .num = 1, 887 }; 888 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 889 } 890 return 0; 891 } 892 893 static int vhost_vdpa_set_config_call(struct vhost_dev *dev, 894 int fd) 895 { 896 trace_vhost_vdpa_set_config_call(dev, fd); 897 return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd); 898 } 899 900 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, 901 uint32_t config_len) 902 { 903 int b, len; 904 char line[QEMU_HEXDUMP_LINE_LEN]; 905 906 for (b = 0; b < config_len; b += 16) { 907 len = config_len - b; 908 qemu_hexdump_line(line, b, config, len, false); 909 trace_vhost_vdpa_dump_config(dev, line); 910 } 911 } 912 913 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 914 uint32_t offset, uint32_t size, 915 uint32_t flags) 916 { 917 struct vhost_vdpa_config *config; 918 int ret; 919 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 920 921 trace_vhost_vdpa_set_config(dev, offset, size, flags); 922 config = g_malloc(size + config_size); 923 config->off = offset; 924 config->len = size; 925 memcpy(config->buf, data, size); 926 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && 927 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 928 vhost_vdpa_dump_config(dev, data, size); 929 } 930 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 931 g_free(config); 932 return ret; 933 } 934 935 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 936 uint32_t config_len, Error **errp) 937 { 938 struct vhost_vdpa_config *v_config; 939 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 940 int ret; 941 942 trace_vhost_vdpa_get_config(dev, config, config_len); 943 v_config = g_malloc(config_len + config_size); 944 v_config->len = config_len; 945 v_config->off = 0; 946 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 947 memcpy(config, v_config->buf, config_len); 948 g_free(v_config); 949 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && 950 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 951 vhost_vdpa_dump_config(dev, config, config_len); 952 } 953 return ret; 954 } 955 956 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev, 957 struct vhost_vring_state *ring) 958 { 959 trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); 960 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 961 } 962 963 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev, 964 struct vhost_vring_file *file) 965 { 966 trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); 967 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 968 } 969 970 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev, 971 struct vhost_vring_file *file) 972 { 973 trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); 974 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 975 } 976 977 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, 978 struct vhost_vring_addr *addr) 979 { 980 trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, 981 addr->desc_user_addr, addr->used_user_addr, 982 addr->avail_user_addr, 983 addr->log_guest_addr); 984 985 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 986 987 } 988 989 /** 990 * Set the shadow virtqueue descriptors to the device 991 * 992 * @dev: The vhost device model 993 * @svq: The shadow virtqueue 994 * @idx: The index of the virtqueue in the vhost device 995 * @errp: Error 996 * 997 * Note that this function does not rewind kick file descriptor if cannot set 998 * call one. 999 */ 1000 static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, 1001 VhostShadowVirtqueue *svq, unsigned idx, 1002 Error **errp) 1003 { 1004 struct vhost_vring_file file = { 1005 .index = dev->vq_index + idx, 1006 }; 1007 const EventNotifier *event_notifier = &svq->hdev_kick; 1008 int r; 1009 1010 r = event_notifier_init(&svq->hdev_kick, 0); 1011 if (r != 0) { 1012 error_setg_errno(errp, -r, "Couldn't create kick event notifier"); 1013 goto err_init_hdev_kick; 1014 } 1015 1016 r = event_notifier_init(&svq->hdev_call, 0); 1017 if (r != 0) { 1018 error_setg_errno(errp, -r, "Couldn't create call event notifier"); 1019 goto err_init_hdev_call; 1020 } 1021 1022 file.fd = event_notifier_get_fd(event_notifier); 1023 r = vhost_vdpa_set_vring_dev_kick(dev, &file); 1024 if (unlikely(r != 0)) { 1025 error_setg_errno(errp, -r, "Can't set device kick fd"); 1026 goto err_init_set_dev_fd; 1027 } 1028 1029 event_notifier = &svq->hdev_call; 1030 file.fd = event_notifier_get_fd(event_notifier); 1031 r = vhost_vdpa_set_vring_dev_call(dev, &file); 1032 if (unlikely(r != 0)) { 1033 error_setg_errno(errp, -r, "Can't set device call fd"); 1034 goto err_init_set_dev_fd; 1035 } 1036 1037 return 0; 1038 1039 err_init_set_dev_fd: 1040 event_notifier_set_handler(&svq->hdev_call, NULL); 1041 1042 err_init_hdev_call: 1043 event_notifier_cleanup(&svq->hdev_kick); 1044 1045 err_init_hdev_kick: 1046 return r; 1047 } 1048 1049 /** 1050 * Unmap a SVQ area in the device 1051 */ 1052 static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) 1053 { 1054 const DMAMap needle = { 1055 .translated_addr = addr, 1056 }; 1057 const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); 1058 hwaddr size; 1059 int r; 1060 1061 if (unlikely(!result)) { 1062 error_report("Unable to find SVQ address to unmap"); 1063 return; 1064 } 1065 1066 size = ROUND_UP(result->size, qemu_real_host_page_size()); 1067 r = vhost_vdpa_dma_unmap(v, v->address_space_id, result->iova, size); 1068 if (unlikely(r < 0)) { 1069 error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); 1070 return; 1071 } 1072 1073 vhost_iova_tree_remove(v->iova_tree, *result); 1074 } 1075 1076 static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, 1077 const VhostShadowVirtqueue *svq) 1078 { 1079 struct vhost_vdpa *v = dev->opaque; 1080 struct vhost_vring_addr svq_addr; 1081 1082 vhost_svq_get_vring_addr(svq, &svq_addr); 1083 1084 vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr); 1085 1086 vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr); 1087 } 1088 1089 /** 1090 * Map the SVQ area in the device 1091 * 1092 * @v: Vhost-vdpa device 1093 * @needle: The area to search iova 1094 * @errorp: Error pointer 1095 */ 1096 static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, 1097 Error **errp) 1098 { 1099 int r; 1100 1101 r = vhost_iova_tree_map_alloc(v->iova_tree, needle); 1102 if (unlikely(r != IOVA_OK)) { 1103 error_setg(errp, "Cannot allocate iova (%d)", r); 1104 return false; 1105 } 1106 1107 r = vhost_vdpa_dma_map(v, v->address_space_id, needle->iova, 1108 needle->size + 1, 1109 (void *)(uintptr_t)needle->translated_addr, 1110 needle->perm == IOMMU_RO); 1111 if (unlikely(r != 0)) { 1112 error_setg_errno(errp, -r, "Cannot map region to device"); 1113 vhost_iova_tree_remove(v->iova_tree, *needle); 1114 } 1115 1116 return r == 0; 1117 } 1118 1119 /** 1120 * Map the shadow virtqueue rings in the device 1121 * 1122 * @dev: The vhost device 1123 * @svq: The shadow virtqueue 1124 * @addr: Assigned IOVA addresses 1125 * @errp: Error pointer 1126 */ 1127 static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, 1128 const VhostShadowVirtqueue *svq, 1129 struct vhost_vring_addr *addr, 1130 Error **errp) 1131 { 1132 ERRP_GUARD(); 1133 DMAMap device_region, driver_region; 1134 struct vhost_vring_addr svq_addr; 1135 struct vhost_vdpa *v = dev->opaque; 1136 size_t device_size = vhost_svq_device_area_size(svq); 1137 size_t driver_size = vhost_svq_driver_area_size(svq); 1138 size_t avail_offset; 1139 bool ok; 1140 1141 vhost_svq_get_vring_addr(svq, &svq_addr); 1142 1143 driver_region = (DMAMap) { 1144 .translated_addr = svq_addr.desc_user_addr, 1145 .size = driver_size - 1, 1146 .perm = IOMMU_RO, 1147 }; 1148 ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); 1149 if (unlikely(!ok)) { 1150 error_prepend(errp, "Cannot create vq driver region: "); 1151 return false; 1152 } 1153 addr->desc_user_addr = driver_region.iova; 1154 avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; 1155 addr->avail_user_addr = driver_region.iova + avail_offset; 1156 1157 device_region = (DMAMap) { 1158 .translated_addr = svq_addr.used_user_addr, 1159 .size = device_size - 1, 1160 .perm = IOMMU_RW, 1161 }; 1162 ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); 1163 if (unlikely(!ok)) { 1164 error_prepend(errp, "Cannot create vq device region: "); 1165 vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); 1166 } 1167 addr->used_user_addr = device_region.iova; 1168 1169 return ok; 1170 } 1171 1172 static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, 1173 VhostShadowVirtqueue *svq, unsigned idx, 1174 Error **errp) 1175 { 1176 uint16_t vq_index = dev->vq_index + idx; 1177 struct vhost_vring_state s = { 1178 .index = vq_index, 1179 }; 1180 int r; 1181 1182 r = vhost_vdpa_set_dev_vring_base(dev, &s); 1183 if (unlikely(r)) { 1184 error_setg_errno(errp, -r, "Cannot set vring base"); 1185 return false; 1186 } 1187 1188 r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp); 1189 return r == 0; 1190 } 1191 1192 static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) 1193 { 1194 struct vhost_vdpa *v = dev->opaque; 1195 Error *err = NULL; 1196 unsigned i; 1197 1198 if (!v->shadow_vqs_enabled) { 1199 return true; 1200 } 1201 1202 for (i = 0; i < v->shadow_vqs->len; ++i) { 1203 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); 1204 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 1205 struct vhost_vring_addr addr = { 1206 .index = dev->vq_index + i, 1207 }; 1208 int r; 1209 bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); 1210 if (unlikely(!ok)) { 1211 goto err; 1212 } 1213 1214 vhost_svq_start(svq, dev->vdev, vq, v->iova_tree); 1215 ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); 1216 if (unlikely(!ok)) { 1217 goto err_map; 1218 } 1219 1220 /* Override vring GPA set by vhost subsystem */ 1221 r = vhost_vdpa_set_vring_dev_addr(dev, &addr); 1222 if (unlikely(r != 0)) { 1223 error_setg_errno(&err, -r, "Cannot set device address"); 1224 goto err_set_addr; 1225 } 1226 } 1227 1228 return true; 1229 1230 err_set_addr: 1231 vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i)); 1232 1233 err_map: 1234 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i)); 1235 1236 err: 1237 error_reportf_err(err, "Cannot setup SVQ %u: ", i); 1238 for (unsigned j = 0; j < i; ++j) { 1239 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j); 1240 vhost_vdpa_svq_unmap_rings(dev, svq); 1241 vhost_svq_stop(svq); 1242 } 1243 1244 return false; 1245 } 1246 1247 static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) 1248 { 1249 struct vhost_vdpa *v = dev->opaque; 1250 1251 if (!v->shadow_vqs_enabled) { 1252 return; 1253 } 1254 1255 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { 1256 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); 1257 1258 vhost_svq_stop(svq); 1259 vhost_vdpa_svq_unmap_rings(dev, svq); 1260 1261 event_notifier_cleanup(&svq->hdev_kick); 1262 event_notifier_cleanup(&svq->hdev_call); 1263 } 1264 } 1265 1266 static void vhost_vdpa_suspend(struct vhost_dev *dev) 1267 { 1268 struct vhost_vdpa *v = dev->opaque; 1269 int r; 1270 1271 if (!vhost_vdpa_first_dev(dev)) { 1272 return; 1273 } 1274 1275 if (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) { 1276 trace_vhost_vdpa_suspend(dev); 1277 r = ioctl(v->device_fd, VHOST_VDPA_SUSPEND); 1278 if (unlikely(r)) { 1279 error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno); 1280 } else { 1281 v->suspended = true; 1282 return; 1283 } 1284 } 1285 1286 vhost_vdpa_reset_device(dev); 1287 } 1288 1289 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 1290 { 1291 struct vhost_vdpa *v = dev->opaque; 1292 bool ok; 1293 trace_vhost_vdpa_dev_start(dev, started); 1294 1295 if (started) { 1296 vhost_vdpa_host_notifiers_init(dev); 1297 ok = vhost_vdpa_svqs_start(dev); 1298 if (unlikely(!ok)) { 1299 return -1; 1300 } 1301 vhost_vdpa_set_vring_ready(dev); 1302 } else { 1303 vhost_vdpa_suspend(dev); 1304 vhost_vdpa_svqs_stop(dev); 1305 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); 1306 } 1307 1308 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 1309 return 0; 1310 } 1311 1312 if (started) { 1313 if (vhost_dev_has_iommu(dev) && (v->shadow_vqs_enabled)) { 1314 error_report("SVQ can not work while IOMMU enable, please disable" 1315 "IOMMU and try again"); 1316 return -1; 1317 } 1318 memory_listener_register(&v->listener, dev->vdev->dma_as); 1319 1320 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 1321 } 1322 1323 return 0; 1324 } 1325 1326 static void vhost_vdpa_reset_status(struct vhost_dev *dev) 1327 { 1328 struct vhost_vdpa *v = dev->opaque; 1329 1330 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 1331 return; 1332 } 1333 1334 vhost_vdpa_reset_device(dev); 1335 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 1336 VIRTIO_CONFIG_S_DRIVER); 1337 memory_listener_unregister(&v->listener); 1338 } 1339 1340 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 1341 struct vhost_log *log) 1342 { 1343 struct vhost_vdpa *v = dev->opaque; 1344 if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { 1345 return 0; 1346 } 1347 1348 trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, 1349 log->log); 1350 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 1351 } 1352 1353 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 1354 struct vhost_vring_addr *addr) 1355 { 1356 struct vhost_vdpa *v = dev->opaque; 1357 1358 if (v->shadow_vqs_enabled) { 1359 /* 1360 * Device vring addr was set at device start. SVQ base is handled by 1361 * VirtQueue code. 1362 */ 1363 return 0; 1364 } 1365 1366 return vhost_vdpa_set_vring_dev_addr(dev, addr); 1367 } 1368 1369 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 1370 struct vhost_vring_state *ring) 1371 { 1372 trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); 1373 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 1374 } 1375 1376 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 1377 struct vhost_vring_state *ring) 1378 { 1379 struct vhost_vdpa *v = dev->opaque; 1380 1381 if (v->shadow_vqs_enabled) { 1382 /* 1383 * Device vring base was set at device start. SVQ base is handled by 1384 * VirtQueue code. 1385 */ 1386 return 0; 1387 } 1388 1389 return vhost_vdpa_set_dev_vring_base(dev, ring); 1390 } 1391 1392 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 1393 struct vhost_vring_state *ring) 1394 { 1395 struct vhost_vdpa *v = dev->opaque; 1396 int ret; 1397 1398 if (v->shadow_vqs_enabled) { 1399 ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index); 1400 return 0; 1401 } 1402 1403 if (!v->suspended) { 1404 /* 1405 * Cannot trust in value returned by device, let vhost recover used 1406 * idx from guest. 1407 */ 1408 return -1; 1409 } 1410 1411 ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 1412 trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); 1413 return ret; 1414 } 1415 1416 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 1417 struct vhost_vring_file *file) 1418 { 1419 struct vhost_vdpa *v = dev->opaque; 1420 int vdpa_idx = file->index - dev->vq_index; 1421 1422 if (v->shadow_vqs_enabled) { 1423 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); 1424 vhost_svq_set_svq_kick_fd(svq, file->fd); 1425 return 0; 1426 } else { 1427 return vhost_vdpa_set_vring_dev_kick(dev, file); 1428 } 1429 } 1430 1431 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 1432 struct vhost_vring_file *file) 1433 { 1434 struct vhost_vdpa *v = dev->opaque; 1435 int vdpa_idx = file->index - dev->vq_index; 1436 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); 1437 1438 /* Remember last call fd because we can switch to SVQ anytime. */ 1439 vhost_svq_set_svq_call_fd(svq, file->fd); 1440 if (v->shadow_vqs_enabled) { 1441 return 0; 1442 } 1443 1444 return vhost_vdpa_set_vring_dev_call(dev, file); 1445 } 1446 1447 static int vhost_vdpa_get_features(struct vhost_dev *dev, 1448 uint64_t *features) 1449 { 1450 int ret = vhost_vdpa_get_dev_features(dev, features); 1451 1452 if (ret == 0) { 1453 /* Add SVQ logging capabilities */ 1454 *features |= BIT_ULL(VHOST_F_LOG_ALL); 1455 } 1456 1457 return ret; 1458 } 1459 1460 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 1461 { 1462 if (!vhost_vdpa_first_dev(dev)) { 1463 return 0; 1464 } 1465 1466 trace_vhost_vdpa_set_owner(dev); 1467 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 1468 } 1469 1470 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 1471 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 1472 { 1473 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 1474 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 1475 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 1476 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 1477 trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, 1478 addr->avail_user_addr, addr->used_user_addr); 1479 return 0; 1480 } 1481 1482 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 1483 { 1484 return true; 1485 } 1486 1487 const VhostOps vdpa_ops = { 1488 .backend_type = VHOST_BACKEND_TYPE_VDPA, 1489 .vhost_backend_init = vhost_vdpa_init, 1490 .vhost_backend_cleanup = vhost_vdpa_cleanup, 1491 .vhost_set_log_base = vhost_vdpa_set_log_base, 1492 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 1493 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 1494 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 1495 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 1496 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 1497 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 1498 .vhost_get_features = vhost_vdpa_get_features, 1499 .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, 1500 .vhost_set_owner = vhost_vdpa_set_owner, 1501 .vhost_set_vring_endian = NULL, 1502 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 1503 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 1504 .vhost_set_features = vhost_vdpa_set_features, 1505 .vhost_reset_device = vhost_vdpa_reset_device, 1506 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 1507 .vhost_get_config = vhost_vdpa_get_config, 1508 .vhost_set_config = vhost_vdpa_set_config, 1509 .vhost_requires_shm_log = NULL, 1510 .vhost_migration_done = NULL, 1511 .vhost_backend_can_merge = NULL, 1512 .vhost_net_set_mtu = NULL, 1513 .vhost_set_iotlb_callback = NULL, 1514 .vhost_send_device_iotlb_msg = NULL, 1515 .vhost_dev_start = vhost_vdpa_dev_start, 1516 .vhost_get_device_id = vhost_vdpa_get_device_id, 1517 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 1518 .vhost_force_iommu = vhost_vdpa_force_iommu, 1519 .vhost_set_config_call = vhost_vdpa_set_config_call, 1520 .vhost_reset_status = vhost_vdpa_reset_status, 1521 }; 1522