1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-vdpa.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "trace.h" 24 #include "qemu-common.h" 25 26 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section) 27 { 28 return (!memory_region_is_ram(section->mr) && 29 !memory_region_is_iommu(section->mr)) || 30 /* 31 * Sizing an enabled 64-bit BAR can cause spurious mappings to 32 * addresses in the upper part of the 64-bit address space. These 33 * are never accessed by the CPU and beyond the address width of 34 * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width. 35 */ 36 section->offset_within_address_space & (1ULL << 63); 37 } 38 39 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 40 void *vaddr, bool readonly) 41 { 42 struct vhost_msg_v2 msg = {}; 43 int fd = v->device_fd; 44 int ret = 0; 45 46 msg.type = v->msg_type; 47 msg.iotlb.iova = iova; 48 msg.iotlb.size = size; 49 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 50 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 51 msg.iotlb.type = VHOST_IOTLB_UPDATE; 52 53 trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, 54 msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); 55 56 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 57 error_report("failed to write, fd=%d, errno=%d (%s)", 58 fd, errno, strerror(errno)); 59 return -EIO ; 60 } 61 62 return ret; 63 } 64 65 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 66 hwaddr size) 67 { 68 struct vhost_msg_v2 msg = {}; 69 int fd = v->device_fd; 70 int ret = 0; 71 72 msg.type = v->msg_type; 73 msg.iotlb.iova = iova; 74 msg.iotlb.size = size; 75 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 76 77 trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, 78 msg.iotlb.size, msg.iotlb.type); 79 80 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 81 error_report("failed to write, fd=%d, errno=%d (%s)", 82 fd, errno, strerror(errno)); 83 return -EIO ; 84 } 85 86 return ret; 87 } 88 89 static void vhost_vdpa_listener_begin(MemoryListener *listener) 90 { 91 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 92 struct vhost_dev *dev = v->dev; 93 struct vhost_msg_v2 msg; 94 int fd = v->device_fd; 95 96 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 97 return; 98 } 99 100 msg.type = v->msg_type; 101 msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN; 102 103 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 104 error_report("failed to write, fd=%d, errno=%d (%s)", 105 fd, errno, strerror(errno)); 106 } 107 } 108 109 static void vhost_vdpa_listener_commit(MemoryListener *listener) 110 { 111 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 112 struct vhost_dev *dev = v->dev; 113 struct vhost_msg_v2 msg; 114 int fd = v->device_fd; 115 116 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 117 return; 118 } 119 120 msg.type = v->msg_type; 121 msg.iotlb.type = VHOST_IOTLB_BATCH_END; 122 123 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 124 error_report("failed to write, fd=%d, errno=%d (%s)", 125 fd, errno, strerror(errno)); 126 } 127 } 128 129 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 130 MemoryRegionSection *section) 131 { 132 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 133 hwaddr iova; 134 Int128 llend, llsize; 135 void *vaddr; 136 int ret; 137 138 if (vhost_vdpa_listener_skipped_section(section)) { 139 return; 140 } 141 142 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 143 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 144 error_report("%s received unaligned region", __func__); 145 return; 146 } 147 148 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 149 llend = int128_make64(section->offset_within_address_space); 150 llend = int128_add(llend, section->size); 151 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 152 153 if (int128_ge(int128_make64(iova), llend)) { 154 return; 155 } 156 157 memory_region_ref(section->mr); 158 159 /* Here we assume that memory_region_is_ram(section->mr)==true */ 160 161 vaddr = memory_region_get_ram_ptr(section->mr) + 162 section->offset_within_region + 163 (iova - section->offset_within_address_space); 164 165 trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), 166 vaddr, section->readonly); 167 168 llsize = int128_sub(llend, int128_make64(iova)); 169 170 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 171 vaddr, section->readonly); 172 if (ret) { 173 error_report("vhost vdpa map fail!"); 174 if (memory_region_is_ram_device(section->mr)) { 175 /* Allow unexpected mappings not to be fatal for RAM devices */ 176 error_report("map ram fail!"); 177 return ; 178 } 179 goto fail; 180 } 181 182 return; 183 184 fail: 185 if (memory_region_is_ram_device(section->mr)) { 186 error_report("failed to vdpa_dma_map. pci p2p may not work"); 187 return; 188 189 } 190 /* 191 * On the initfn path, store the first error in the container so we 192 * can gracefully fail. Runtime, there's not much we can do other 193 * than throw a hardware error. 194 */ 195 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 196 return; 197 198 } 199 200 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 201 MemoryRegionSection *section) 202 { 203 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 204 hwaddr iova; 205 Int128 llend, llsize; 206 int ret; 207 208 if (vhost_vdpa_listener_skipped_section(section)) { 209 return; 210 } 211 212 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 213 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 214 error_report("%s received unaligned region", __func__); 215 return; 216 } 217 218 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 219 llend = int128_make64(section->offset_within_address_space); 220 llend = int128_add(llend, section->size); 221 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 222 223 trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); 224 225 if (int128_ge(int128_make64(iova), llend)) { 226 return; 227 } 228 229 llsize = int128_sub(llend, int128_make64(iova)); 230 231 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 232 if (ret) { 233 error_report("vhost_vdpa dma unmap error!"); 234 } 235 236 memory_region_unref(section->mr); 237 } 238 /* 239 * IOTLB API is used by vhost-vpda which requires incremental updating 240 * of the mapping. So we can not use generic vhost memory listener which 241 * depends on the addnop(). 242 */ 243 static const MemoryListener vhost_vdpa_memory_listener = { 244 .begin = vhost_vdpa_listener_begin, 245 .commit = vhost_vdpa_listener_commit, 246 .region_add = vhost_vdpa_listener_region_add, 247 .region_del = vhost_vdpa_listener_region_del, 248 }; 249 250 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 251 void *arg) 252 { 253 struct vhost_vdpa *v = dev->opaque; 254 int fd = v->device_fd; 255 256 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 257 258 return ioctl(fd, request, arg); 259 } 260 261 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 262 { 263 uint8_t s; 264 265 trace_vhost_vdpa_add_status(dev, status); 266 if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) { 267 return; 268 } 269 270 s |= status; 271 272 vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 273 } 274 275 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) 276 { 277 struct vhost_vdpa *v; 278 uint64_t features; 279 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 280 trace_vhost_vdpa_init(dev, opaque); 281 282 v = opaque; 283 v->dev = dev; 284 dev->opaque = opaque ; 285 vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features); 286 dev->backend_features = features; 287 v->listener = vhost_vdpa_memory_listener; 288 v->msg_type = VHOST_IOTLB_MSG_V2; 289 290 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 291 VIRTIO_CONFIG_S_DRIVER); 292 293 return 0; 294 } 295 296 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 297 { 298 struct vhost_vdpa *v; 299 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 300 v = dev->opaque; 301 trace_vhost_vdpa_cleanup(dev, v); 302 memory_listener_unregister(&v->listener); 303 304 dev->opaque = NULL; 305 return 0; 306 } 307 308 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 309 { 310 trace_vhost_vdpa_memslots_limit(dev, INT_MAX); 311 return INT_MAX; 312 } 313 314 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 315 struct vhost_memory *mem) 316 { 317 trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); 318 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && 319 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { 320 int i; 321 for (i = 0; i < mem->nregions; i++) { 322 trace_vhost_vdpa_dump_regions(dev, i, 323 mem->regions[i].guest_phys_addr, 324 mem->regions[i].memory_size, 325 mem->regions[i].userspace_addr, 326 mem->regions[i].flags_padding); 327 } 328 } 329 if (mem->padding) { 330 return -1; 331 } 332 333 return 0; 334 } 335 336 static int vhost_vdpa_set_features(struct vhost_dev *dev, 337 uint64_t features) 338 { 339 int ret; 340 trace_vhost_vdpa_set_features(dev, features); 341 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 342 uint8_t status = 0; 343 if (ret) { 344 return ret; 345 } 346 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 347 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 348 349 return !(status & VIRTIO_CONFIG_S_FEATURES_OK); 350 } 351 352 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) 353 { 354 uint64_t features; 355 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 356 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; 357 int r; 358 359 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { 360 return 0; 361 } 362 363 features &= f; 364 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); 365 if (r) { 366 return 0; 367 } 368 369 dev->backend_cap = features; 370 371 return 0; 372 } 373 374 int vhost_vdpa_get_device_id(struct vhost_dev *dev, 375 uint32_t *device_id) 376 { 377 int ret; 378 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 379 trace_vhost_vdpa_get_device_id(dev, *device_id); 380 return ret; 381 } 382 383 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 384 { 385 int ret; 386 uint8_t status = 0; 387 388 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 389 trace_vhost_vdpa_reset_device(dev, status); 390 return ret; 391 } 392 393 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 394 { 395 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 396 397 trace_vhost_vdpa_get_vq_index(dev, idx, idx - dev->vq_index); 398 return idx - dev->vq_index; 399 } 400 401 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 402 { 403 int i; 404 trace_vhost_vdpa_set_vring_ready(dev); 405 for (i = 0; i < dev->nvqs; ++i) { 406 struct vhost_vring_state state = { 407 .index = dev->vq_index + i, 408 .num = 1, 409 }; 410 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 411 } 412 return 0; 413 } 414 415 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, 416 uint32_t config_len) 417 { 418 int b, len; 419 char line[QEMU_HEXDUMP_LINE_LEN]; 420 421 for (b = 0; b < config_len; b += 16) { 422 len = config_len - b; 423 qemu_hexdump_line(line, b, config, len, false); 424 trace_vhost_vdpa_dump_config(dev, line); 425 } 426 } 427 428 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 429 uint32_t offset, uint32_t size, 430 uint32_t flags) 431 { 432 struct vhost_vdpa_config *config; 433 int ret; 434 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 435 436 trace_vhost_vdpa_set_config(dev, offset, size, flags); 437 config = g_malloc(size + config_size); 438 config->off = offset; 439 config->len = size; 440 memcpy(config->buf, data, size); 441 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && 442 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 443 vhost_vdpa_dump_config(dev, data, size); 444 } 445 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 446 g_free(config); 447 return ret; 448 } 449 450 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 451 uint32_t config_len) 452 { 453 struct vhost_vdpa_config *v_config; 454 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 455 int ret; 456 457 trace_vhost_vdpa_get_config(dev, config, config_len); 458 v_config = g_malloc(config_len + config_size); 459 v_config->len = config_len; 460 v_config->off = 0; 461 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 462 memcpy(config, v_config->buf, config_len); 463 g_free(v_config); 464 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && 465 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 466 vhost_vdpa_dump_config(dev, config, config_len); 467 } 468 return ret; 469 } 470 471 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 472 { 473 struct vhost_vdpa *v = dev->opaque; 474 trace_vhost_vdpa_dev_start(dev, started); 475 if (started) { 476 uint8_t status = 0; 477 memory_listener_register(&v->listener, &address_space_memory); 478 vhost_vdpa_set_vring_ready(dev); 479 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 480 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 481 482 return !(status & VIRTIO_CONFIG_S_DRIVER_OK); 483 } else { 484 vhost_vdpa_reset_device(dev); 485 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 486 VIRTIO_CONFIG_S_DRIVER); 487 memory_listener_unregister(&v->listener); 488 489 return 0; 490 } 491 } 492 493 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 494 struct vhost_log *log) 495 { 496 trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, 497 log->log); 498 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 499 } 500 501 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 502 struct vhost_vring_addr *addr) 503 { 504 trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, 505 addr->desc_user_addr, addr->used_user_addr, 506 addr->avail_user_addr, 507 addr->log_guest_addr); 508 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 509 } 510 511 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 512 struct vhost_vring_state *ring) 513 { 514 trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); 515 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 516 } 517 518 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 519 struct vhost_vring_state *ring) 520 { 521 trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); 522 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 523 } 524 525 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 526 struct vhost_vring_state *ring) 527 { 528 int ret; 529 530 ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 531 trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); 532 return ret; 533 } 534 535 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 536 struct vhost_vring_file *file) 537 { 538 trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); 539 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 540 } 541 542 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 543 struct vhost_vring_file *file) 544 { 545 trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); 546 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 547 } 548 549 static int vhost_vdpa_get_features(struct vhost_dev *dev, 550 uint64_t *features) 551 { 552 int ret; 553 554 ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 555 trace_vhost_vdpa_get_features(dev, *features); 556 return ret; 557 } 558 559 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 560 { 561 trace_vhost_vdpa_set_owner(dev); 562 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 563 } 564 565 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 566 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 567 { 568 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 569 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 570 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 571 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 572 trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, 573 addr->avail_user_addr, addr->used_user_addr); 574 return 0; 575 } 576 577 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 578 { 579 return true; 580 } 581 582 const VhostOps vdpa_ops = { 583 .backend_type = VHOST_BACKEND_TYPE_VDPA, 584 .vhost_backend_init = vhost_vdpa_init, 585 .vhost_backend_cleanup = vhost_vdpa_cleanup, 586 .vhost_set_log_base = vhost_vdpa_set_log_base, 587 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 588 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 589 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 590 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 591 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 592 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 593 .vhost_get_features = vhost_vdpa_get_features, 594 .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, 595 .vhost_set_owner = vhost_vdpa_set_owner, 596 .vhost_set_vring_endian = NULL, 597 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 598 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 599 .vhost_set_features = vhost_vdpa_set_features, 600 .vhost_reset_device = vhost_vdpa_reset_device, 601 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 602 .vhost_get_config = vhost_vdpa_get_config, 603 .vhost_set_config = vhost_vdpa_set_config, 604 .vhost_requires_shm_log = NULL, 605 .vhost_migration_done = NULL, 606 .vhost_backend_can_merge = NULL, 607 .vhost_net_set_mtu = NULL, 608 .vhost_set_iotlb_callback = NULL, 609 .vhost_send_device_iotlb_msg = NULL, 610 .vhost_dev_start = vhost_vdpa_dev_start, 611 .vhost_get_device_id = vhost_vdpa_get_device_id, 612 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 613 .vhost_force_iommu = vhost_vdpa_force_iommu, 614 }; 615