1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-vdpa.h" 21 #include "qemu/main-loop.h" 22 #include <linux/kvm.h> 23 #include "sysemu/kvm.h" 24 25 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section) 26 { 27 return (!memory_region_is_ram(section->mr) && 28 !memory_region_is_iommu(section->mr)) || 29 /* 30 * Sizing an enabled 64-bit BAR can cause spurious mappings to 31 * addresses in the upper part of the 64-bit address space. These 32 * are never accessed by the CPU and beyond the address width of 33 * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width. 34 */ 35 section->offset_within_address_space & (1ULL << 63); 36 } 37 38 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 39 void *vaddr, bool readonly) 40 { 41 struct vhost_msg_v2 msg; 42 int fd = v->device_fd; 43 int ret = 0; 44 45 msg.type = v->msg_type; 46 msg.iotlb.iova = iova; 47 msg.iotlb.size = size; 48 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 49 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 50 msg.iotlb.type = VHOST_IOTLB_UPDATE; 51 52 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 53 error_report("failed to write, fd=%d, errno=%d (%s)", 54 fd, errno, strerror(errno)); 55 return -EIO ; 56 } 57 58 return ret; 59 } 60 61 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 62 hwaddr size) 63 { 64 struct vhost_msg_v2 msg; 65 int fd = v->device_fd; 66 int ret = 0; 67 68 msg.type = v->msg_type; 69 msg.iotlb.iova = iova; 70 msg.iotlb.size = size; 71 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 72 73 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 74 error_report("failed to write, fd=%d, errno=%d (%s)", 75 fd, errno, strerror(errno)); 76 return -EIO ; 77 } 78 79 return ret; 80 } 81 82 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 83 MemoryRegionSection *section) 84 { 85 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 86 hwaddr iova; 87 Int128 llend, llsize; 88 void *vaddr; 89 int ret; 90 91 if (vhost_vdpa_listener_skipped_section(section)) { 92 return; 93 } 94 95 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 96 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 97 error_report("%s received unaligned region", __func__); 98 return; 99 } 100 101 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 102 llend = int128_make64(section->offset_within_address_space); 103 llend = int128_add(llend, section->size); 104 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 105 106 if (int128_ge(int128_make64(iova), llend)) { 107 return; 108 } 109 110 memory_region_ref(section->mr); 111 112 /* Here we assume that memory_region_is_ram(section->mr)==true */ 113 114 vaddr = memory_region_get_ram_ptr(section->mr) + 115 section->offset_within_region + 116 (iova - section->offset_within_address_space); 117 118 llsize = int128_sub(llend, int128_make64(iova)); 119 120 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 121 vaddr, section->readonly); 122 if (ret) { 123 error_report("vhost vdpa map fail!"); 124 if (memory_region_is_ram_device(section->mr)) { 125 /* Allow unexpected mappings not to be fatal for RAM devices */ 126 error_report("map ram fail!"); 127 return ; 128 } 129 goto fail; 130 } 131 132 return; 133 134 fail: 135 if (memory_region_is_ram_device(section->mr)) { 136 error_report("failed to vdpa_dma_map. pci p2p may not work"); 137 return; 138 139 } 140 /* 141 * On the initfn path, store the first error in the container so we 142 * can gracefully fail. Runtime, there's not much we can do other 143 * than throw a hardware error. 144 */ 145 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 146 return; 147 148 } 149 150 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 151 MemoryRegionSection *section) 152 { 153 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 154 hwaddr iova; 155 Int128 llend, llsize; 156 int ret; 157 bool try_unmap = true; 158 159 if (vhost_vdpa_listener_skipped_section(section)) { 160 return; 161 } 162 163 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 164 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 165 error_report("%s received unaligned region", __func__); 166 return; 167 } 168 169 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 170 llend = int128_make64(section->offset_within_address_space); 171 llend = int128_add(llend, section->size); 172 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 173 174 if (int128_ge(int128_make64(iova), llend)) { 175 return; 176 } 177 178 llsize = int128_sub(llend, int128_make64(iova)); 179 180 if (try_unmap) { 181 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 182 if (ret) { 183 error_report("vhost_vdpa dma unmap error!"); 184 } 185 } 186 187 memory_region_unref(section->mr); 188 } 189 /* 190 * IOTLB API is used by vhost-vpda which requires incremental updating 191 * of the mapping. So we can not use generic vhost memory listener which 192 * depends on the addnop(). 193 */ 194 static const MemoryListener vhost_vdpa_memory_listener = { 195 .region_add = vhost_vdpa_listener_region_add, 196 .region_del = vhost_vdpa_listener_region_del, 197 }; 198 199 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 200 void *arg) 201 { 202 struct vhost_vdpa *v = dev->opaque; 203 int fd = v->device_fd; 204 205 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 206 207 return ioctl(fd, request, arg); 208 } 209 210 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 211 { 212 uint8_t s; 213 214 if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) { 215 return; 216 } 217 218 s |= status; 219 220 vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 221 } 222 223 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) 224 { 225 struct vhost_vdpa *v; 226 uint64_t features; 227 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 228 229 v = opaque; 230 dev->opaque = opaque ; 231 vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features); 232 dev->backend_features = features; 233 v->listener = vhost_vdpa_memory_listener; 234 v->msg_type = VHOST_IOTLB_MSG_V2; 235 236 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 237 VIRTIO_CONFIG_S_DRIVER); 238 239 return 0; 240 } 241 242 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 243 { 244 struct vhost_vdpa *v; 245 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 246 v = dev->opaque; 247 memory_listener_unregister(&v->listener); 248 249 dev->opaque = NULL; 250 return 0; 251 } 252 253 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 254 { 255 return INT_MAX; 256 } 257 258 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 259 struct vhost_memory *mem) 260 { 261 262 if (mem->padding) { 263 return -1; 264 } 265 266 return 0; 267 } 268 269 static int vhost_vdpa_set_features(struct vhost_dev *dev, 270 uint64_t features) 271 { 272 int ret; 273 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 274 uint8_t status = 0; 275 if (ret) { 276 return ret; 277 } 278 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 279 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 280 281 return !(status & VIRTIO_CONFIG_S_FEATURES_OK); 282 } 283 284 int vhost_vdpa_get_device_id(struct vhost_dev *dev, 285 uint32_t *device_id) 286 { 287 return vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 288 } 289 290 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 291 { 292 uint8_t status = 0; 293 294 return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 295 } 296 297 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 298 { 299 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 300 301 return idx - dev->vq_index; 302 } 303 304 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 305 { 306 int i; 307 for (i = 0; i < dev->nvqs; ++i) { 308 struct vhost_vring_state state = { 309 .index = dev->vq_index + i, 310 .num = 1, 311 }; 312 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 313 } 314 return 0; 315 } 316 317 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 318 uint32_t offset, uint32_t size, 319 uint32_t flags) 320 { 321 struct vhost_vdpa_config *config; 322 int ret; 323 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 324 config = g_malloc(size + config_size); 325 if (config == NULL) { 326 return -1; 327 } 328 config->off = offset; 329 config->len = size; 330 memcpy(config->buf, data, size); 331 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 332 g_free(config); 333 return ret; 334 } 335 336 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 337 uint32_t config_len) 338 { 339 struct vhost_vdpa_config *v_config; 340 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 341 int ret; 342 343 v_config = g_malloc(config_len + config_size); 344 if (v_config == NULL) { 345 return -1; 346 } 347 v_config->len = config_len; 348 v_config->off = 0; 349 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 350 memcpy(config, v_config->buf, config_len); 351 g_free(v_config); 352 return ret; 353 } 354 355 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 356 { 357 struct vhost_vdpa *v = dev->opaque; 358 if (started) { 359 uint8_t status = 0; 360 memory_listener_register(&v->listener, &address_space_memory); 361 vhost_vdpa_set_vring_ready(dev); 362 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 363 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 364 365 return !(status & VIRTIO_CONFIG_S_DRIVER_OK); 366 } else { 367 vhost_vdpa_reset_device(dev); 368 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 369 VIRTIO_CONFIG_S_DRIVER); 370 memory_listener_unregister(&v->listener); 371 372 return 0; 373 } 374 } 375 376 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 377 struct vhost_log *log) 378 { 379 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 380 } 381 382 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 383 struct vhost_vring_addr *addr) 384 { 385 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 386 } 387 388 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 389 struct vhost_vring_state *ring) 390 { 391 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 392 } 393 394 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 395 struct vhost_vring_state *ring) 396 { 397 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 398 } 399 400 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 401 struct vhost_vring_state *ring) 402 { 403 return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 404 } 405 406 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 407 struct vhost_vring_file *file) 408 { 409 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 410 } 411 412 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 413 struct vhost_vring_file *file) 414 { 415 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 416 } 417 418 static int vhost_vdpa_get_features(struct vhost_dev *dev, 419 uint64_t *features) 420 { 421 return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 422 } 423 424 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 425 { 426 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 427 } 428 429 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 430 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 431 { 432 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 433 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 434 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 435 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 436 return 0; 437 } 438 439 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 440 { 441 return true; 442 } 443 444 const VhostOps vdpa_ops = { 445 .backend_type = VHOST_BACKEND_TYPE_VDPA, 446 .vhost_backend_init = vhost_vdpa_init, 447 .vhost_backend_cleanup = vhost_vdpa_cleanup, 448 .vhost_set_log_base = vhost_vdpa_set_log_base, 449 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 450 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 451 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 452 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 453 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 454 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 455 .vhost_get_features = vhost_vdpa_get_features, 456 .vhost_set_owner = vhost_vdpa_set_owner, 457 .vhost_set_vring_endian = NULL, 458 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 459 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 460 .vhost_set_features = vhost_vdpa_set_features, 461 .vhost_reset_device = vhost_vdpa_reset_device, 462 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 463 .vhost_get_config = vhost_vdpa_get_config, 464 .vhost_set_config = vhost_vdpa_set_config, 465 .vhost_requires_shm_log = NULL, 466 .vhost_migration_done = NULL, 467 .vhost_backend_can_merge = NULL, 468 .vhost_net_set_mtu = NULL, 469 .vhost_set_iotlb_callback = NULL, 470 .vhost_send_device_iotlb_msg = NULL, 471 .vhost_dev_start = vhost_vdpa_dev_start, 472 .vhost_get_device_id = vhost_vdpa_get_device_id, 473 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 474 .vhost_force_iommu = vhost_vdpa_force_iommu, 475 }; 476