1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-vdpa.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 24 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section) 25 { 26 return (!memory_region_is_ram(section->mr) && 27 !memory_region_is_iommu(section->mr)) || 28 /* 29 * Sizing an enabled 64-bit BAR can cause spurious mappings to 30 * addresses in the upper part of the 64-bit address space. These 31 * are never accessed by the CPU and beyond the address width of 32 * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width. 33 */ 34 section->offset_within_address_space & (1ULL << 63); 35 } 36 37 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 38 void *vaddr, bool readonly) 39 { 40 struct vhost_msg_v2 msg = {}; 41 int fd = v->device_fd; 42 int ret = 0; 43 44 msg.type = v->msg_type; 45 msg.iotlb.iova = iova; 46 msg.iotlb.size = size; 47 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 48 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 49 msg.iotlb.type = VHOST_IOTLB_UPDATE; 50 51 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 52 error_report("failed to write, fd=%d, errno=%d (%s)", 53 fd, errno, strerror(errno)); 54 return -EIO ; 55 } 56 57 return ret; 58 } 59 60 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 61 hwaddr size) 62 { 63 struct vhost_msg_v2 msg = {}; 64 int fd = v->device_fd; 65 int ret = 0; 66 67 msg.type = v->msg_type; 68 msg.iotlb.iova = iova; 69 msg.iotlb.size = size; 70 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 71 72 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 73 error_report("failed to write, fd=%d, errno=%d (%s)", 74 fd, errno, strerror(errno)); 75 return -EIO ; 76 } 77 78 return ret; 79 } 80 81 static void vhost_vdpa_listener_begin(MemoryListener *listener) 82 { 83 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 84 struct vhost_dev *dev = v->dev; 85 struct vhost_msg_v2 msg; 86 int fd = v->device_fd; 87 88 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 89 return; 90 } 91 92 msg.type = v->msg_type; 93 msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN; 94 95 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 96 error_report("failed to write, fd=%d, errno=%d (%s)", 97 fd, errno, strerror(errno)); 98 } 99 } 100 101 static void vhost_vdpa_listener_commit(MemoryListener *listener) 102 { 103 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 104 struct vhost_dev *dev = v->dev; 105 struct vhost_msg_v2 msg; 106 int fd = v->device_fd; 107 108 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 109 return; 110 } 111 112 msg.type = v->msg_type; 113 msg.iotlb.type = VHOST_IOTLB_BATCH_END; 114 115 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 116 error_report("failed to write, fd=%d, errno=%d (%s)", 117 fd, errno, strerror(errno)); 118 } 119 } 120 121 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 122 MemoryRegionSection *section) 123 { 124 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 125 hwaddr iova; 126 Int128 llend, llsize; 127 void *vaddr; 128 int ret; 129 130 if (vhost_vdpa_listener_skipped_section(section)) { 131 return; 132 } 133 134 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 135 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 136 error_report("%s received unaligned region", __func__); 137 return; 138 } 139 140 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 141 llend = int128_make64(section->offset_within_address_space); 142 llend = int128_add(llend, section->size); 143 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 144 145 if (int128_ge(int128_make64(iova), llend)) { 146 return; 147 } 148 149 memory_region_ref(section->mr); 150 151 /* Here we assume that memory_region_is_ram(section->mr)==true */ 152 153 vaddr = memory_region_get_ram_ptr(section->mr) + 154 section->offset_within_region + 155 (iova - section->offset_within_address_space); 156 157 llsize = int128_sub(llend, int128_make64(iova)); 158 159 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 160 vaddr, section->readonly); 161 if (ret) { 162 error_report("vhost vdpa map fail!"); 163 if (memory_region_is_ram_device(section->mr)) { 164 /* Allow unexpected mappings not to be fatal for RAM devices */ 165 error_report("map ram fail!"); 166 return ; 167 } 168 goto fail; 169 } 170 171 return; 172 173 fail: 174 if (memory_region_is_ram_device(section->mr)) { 175 error_report("failed to vdpa_dma_map. pci p2p may not work"); 176 return; 177 178 } 179 /* 180 * On the initfn path, store the first error in the container so we 181 * can gracefully fail. Runtime, there's not much we can do other 182 * than throw a hardware error. 183 */ 184 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 185 return; 186 187 } 188 189 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 190 MemoryRegionSection *section) 191 { 192 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 193 hwaddr iova; 194 Int128 llend, llsize; 195 int ret; 196 197 if (vhost_vdpa_listener_skipped_section(section)) { 198 return; 199 } 200 201 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 202 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 203 error_report("%s received unaligned region", __func__); 204 return; 205 } 206 207 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 208 llend = int128_make64(section->offset_within_address_space); 209 llend = int128_add(llend, section->size); 210 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 211 212 if (int128_ge(int128_make64(iova), llend)) { 213 return; 214 } 215 216 llsize = int128_sub(llend, int128_make64(iova)); 217 218 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 219 if (ret) { 220 error_report("vhost_vdpa dma unmap error!"); 221 } 222 223 memory_region_unref(section->mr); 224 } 225 /* 226 * IOTLB API is used by vhost-vpda which requires incremental updating 227 * of the mapping. So we can not use generic vhost memory listener which 228 * depends on the addnop(). 229 */ 230 static const MemoryListener vhost_vdpa_memory_listener = { 231 .begin = vhost_vdpa_listener_begin, 232 .commit = vhost_vdpa_listener_commit, 233 .region_add = vhost_vdpa_listener_region_add, 234 .region_del = vhost_vdpa_listener_region_del, 235 }; 236 237 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 238 void *arg) 239 { 240 struct vhost_vdpa *v = dev->opaque; 241 int fd = v->device_fd; 242 243 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 244 245 return ioctl(fd, request, arg); 246 } 247 248 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 249 { 250 uint8_t s; 251 252 if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) { 253 return; 254 } 255 256 s |= status; 257 258 vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 259 } 260 261 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) 262 { 263 struct vhost_vdpa *v; 264 uint64_t features; 265 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 266 267 v = opaque; 268 v->dev = dev; 269 dev->opaque = opaque ; 270 vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features); 271 dev->backend_features = features; 272 v->listener = vhost_vdpa_memory_listener; 273 v->msg_type = VHOST_IOTLB_MSG_V2; 274 275 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 276 VIRTIO_CONFIG_S_DRIVER); 277 278 return 0; 279 } 280 281 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 282 { 283 struct vhost_vdpa *v; 284 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 285 v = dev->opaque; 286 memory_listener_unregister(&v->listener); 287 288 dev->opaque = NULL; 289 return 0; 290 } 291 292 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 293 { 294 return INT_MAX; 295 } 296 297 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 298 struct vhost_memory *mem) 299 { 300 301 if (mem->padding) { 302 return -1; 303 } 304 305 return 0; 306 } 307 308 static int vhost_vdpa_set_features(struct vhost_dev *dev, 309 uint64_t features) 310 { 311 int ret; 312 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 313 uint8_t status = 0; 314 if (ret) { 315 return ret; 316 } 317 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 318 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 319 320 return !(status & VIRTIO_CONFIG_S_FEATURES_OK); 321 } 322 323 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) 324 { 325 uint64_t features; 326 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 327 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; 328 int r; 329 330 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { 331 return 0; 332 } 333 334 features &= f; 335 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); 336 if (r) { 337 return 0; 338 } 339 340 dev->backend_cap = features; 341 342 return 0; 343 } 344 345 int vhost_vdpa_get_device_id(struct vhost_dev *dev, 346 uint32_t *device_id) 347 { 348 return vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 349 } 350 351 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 352 { 353 uint8_t status = 0; 354 355 return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 356 } 357 358 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 359 { 360 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 361 362 return idx - dev->vq_index; 363 } 364 365 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 366 { 367 int i; 368 for (i = 0; i < dev->nvqs; ++i) { 369 struct vhost_vring_state state = { 370 .index = dev->vq_index + i, 371 .num = 1, 372 }; 373 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 374 } 375 return 0; 376 } 377 378 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 379 uint32_t offset, uint32_t size, 380 uint32_t flags) 381 { 382 struct vhost_vdpa_config *config; 383 int ret; 384 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 385 386 config = g_malloc(size + config_size); 387 config->off = offset; 388 config->len = size; 389 memcpy(config->buf, data, size); 390 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 391 g_free(config); 392 return ret; 393 } 394 395 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 396 uint32_t config_len) 397 { 398 struct vhost_vdpa_config *v_config; 399 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 400 int ret; 401 402 v_config = g_malloc(config_len + config_size); 403 v_config->len = config_len; 404 v_config->off = 0; 405 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 406 memcpy(config, v_config->buf, config_len); 407 g_free(v_config); 408 return ret; 409 } 410 411 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 412 { 413 struct vhost_vdpa *v = dev->opaque; 414 if (started) { 415 uint8_t status = 0; 416 memory_listener_register(&v->listener, &address_space_memory); 417 vhost_vdpa_set_vring_ready(dev); 418 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 419 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 420 421 return !(status & VIRTIO_CONFIG_S_DRIVER_OK); 422 } else { 423 vhost_vdpa_reset_device(dev); 424 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 425 VIRTIO_CONFIG_S_DRIVER); 426 memory_listener_unregister(&v->listener); 427 428 return 0; 429 } 430 } 431 432 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 433 struct vhost_log *log) 434 { 435 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 436 } 437 438 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 439 struct vhost_vring_addr *addr) 440 { 441 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 442 } 443 444 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 445 struct vhost_vring_state *ring) 446 { 447 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 448 } 449 450 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 451 struct vhost_vring_state *ring) 452 { 453 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 454 } 455 456 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 457 struct vhost_vring_state *ring) 458 { 459 return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 460 } 461 462 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 463 struct vhost_vring_file *file) 464 { 465 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 466 } 467 468 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 469 struct vhost_vring_file *file) 470 { 471 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 472 } 473 474 static int vhost_vdpa_get_features(struct vhost_dev *dev, 475 uint64_t *features) 476 { 477 return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 478 } 479 480 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 481 { 482 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 483 } 484 485 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 486 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 487 { 488 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 489 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 490 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 491 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 492 return 0; 493 } 494 495 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 496 { 497 return true; 498 } 499 500 const VhostOps vdpa_ops = { 501 .backend_type = VHOST_BACKEND_TYPE_VDPA, 502 .vhost_backend_init = vhost_vdpa_init, 503 .vhost_backend_cleanup = vhost_vdpa_cleanup, 504 .vhost_set_log_base = vhost_vdpa_set_log_base, 505 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 506 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 507 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 508 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 509 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 510 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 511 .vhost_get_features = vhost_vdpa_get_features, 512 .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, 513 .vhost_set_owner = vhost_vdpa_set_owner, 514 .vhost_set_vring_endian = NULL, 515 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 516 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 517 .vhost_set_features = vhost_vdpa_set_features, 518 .vhost_reset_device = vhost_vdpa_reset_device, 519 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 520 .vhost_get_config = vhost_vdpa_get_config, 521 .vhost_set_config = vhost_vdpa_set_config, 522 .vhost_requires_shm_log = NULL, 523 .vhost_migration_done = NULL, 524 .vhost_backend_can_merge = NULL, 525 .vhost_net_set_mtu = NULL, 526 .vhost_set_iotlb_callback = NULL, 527 .vhost_send_device_iotlb_msg = NULL, 528 .vhost_dev_start = vhost_vdpa_dev_start, 529 .vhost_get_device_id = vhost_vdpa_get_device_id, 530 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 531 .vhost_force_iommu = vhost_vdpa_force_iommu, 532 }; 533