1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-vdpa.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 24 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section) 25 { 26 return (!memory_region_is_ram(section->mr) && 27 !memory_region_is_iommu(section->mr)) || 28 /* 29 * Sizing an enabled 64-bit BAR can cause spurious mappings to 30 * addresses in the upper part of the 64-bit address space. These 31 * are never accessed by the CPU and beyond the address width of 32 * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width. 33 */ 34 section->offset_within_address_space & (1ULL << 63); 35 } 36 37 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 38 void *vaddr, bool readonly) 39 { 40 struct vhost_msg_v2 msg; 41 int fd = v->device_fd; 42 int ret = 0; 43 44 msg.type = v->msg_type; 45 msg.iotlb.iova = iova; 46 msg.iotlb.size = size; 47 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 48 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 49 msg.iotlb.type = VHOST_IOTLB_UPDATE; 50 51 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 52 error_report("failed to write, fd=%d, errno=%d (%s)", 53 fd, errno, strerror(errno)); 54 return -EIO ; 55 } 56 57 return ret; 58 } 59 60 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 61 hwaddr size) 62 { 63 struct vhost_msg_v2 msg; 64 int fd = v->device_fd; 65 int ret = 0; 66 67 msg.type = v->msg_type; 68 msg.iotlb.iova = iova; 69 msg.iotlb.size = size; 70 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 71 72 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 73 error_report("failed to write, fd=%d, errno=%d (%s)", 74 fd, errno, strerror(errno)); 75 return -EIO ; 76 } 77 78 return ret; 79 } 80 81 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 82 MemoryRegionSection *section) 83 { 84 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 85 hwaddr iova; 86 Int128 llend, llsize; 87 void *vaddr; 88 int ret; 89 90 if (vhost_vdpa_listener_skipped_section(section)) { 91 return; 92 } 93 94 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 95 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 96 error_report("%s received unaligned region", __func__); 97 return; 98 } 99 100 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 101 llend = int128_make64(section->offset_within_address_space); 102 llend = int128_add(llend, section->size); 103 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 104 105 if (int128_ge(int128_make64(iova), llend)) { 106 return; 107 } 108 109 memory_region_ref(section->mr); 110 111 /* Here we assume that memory_region_is_ram(section->mr)==true */ 112 113 vaddr = memory_region_get_ram_ptr(section->mr) + 114 section->offset_within_region + 115 (iova - section->offset_within_address_space); 116 117 llsize = int128_sub(llend, int128_make64(iova)); 118 119 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 120 vaddr, section->readonly); 121 if (ret) { 122 error_report("vhost vdpa map fail!"); 123 if (memory_region_is_ram_device(section->mr)) { 124 /* Allow unexpected mappings not to be fatal for RAM devices */ 125 error_report("map ram fail!"); 126 return ; 127 } 128 goto fail; 129 } 130 131 return; 132 133 fail: 134 if (memory_region_is_ram_device(section->mr)) { 135 error_report("failed to vdpa_dma_map. pci p2p may not work"); 136 return; 137 138 } 139 /* 140 * On the initfn path, store the first error in the container so we 141 * can gracefully fail. Runtime, there's not much we can do other 142 * than throw a hardware error. 143 */ 144 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 145 return; 146 147 } 148 149 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 150 MemoryRegionSection *section) 151 { 152 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 153 hwaddr iova; 154 Int128 llend, llsize; 155 int ret; 156 bool try_unmap = true; 157 158 if (vhost_vdpa_listener_skipped_section(section)) { 159 return; 160 } 161 162 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 163 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 164 error_report("%s received unaligned region", __func__); 165 return; 166 } 167 168 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 169 llend = int128_make64(section->offset_within_address_space); 170 llend = int128_add(llend, section->size); 171 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 172 173 if (int128_ge(int128_make64(iova), llend)) { 174 return; 175 } 176 177 llsize = int128_sub(llend, int128_make64(iova)); 178 179 if (try_unmap) { 180 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 181 if (ret) { 182 error_report("vhost_vdpa dma unmap error!"); 183 } 184 } 185 186 memory_region_unref(section->mr); 187 } 188 /* 189 * IOTLB API is used by vhost-vpda which requires incremental updating 190 * of the mapping. So we can not use generic vhost memory listener which 191 * depends on the addnop(). 192 */ 193 static const MemoryListener vhost_vdpa_memory_listener = { 194 .region_add = vhost_vdpa_listener_region_add, 195 .region_del = vhost_vdpa_listener_region_del, 196 }; 197 198 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 199 void *arg) 200 { 201 struct vhost_vdpa *v = dev->opaque; 202 int fd = v->device_fd; 203 204 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 205 206 return ioctl(fd, request, arg); 207 } 208 209 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 210 { 211 uint8_t s; 212 213 if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) { 214 return; 215 } 216 217 s |= status; 218 219 vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 220 } 221 222 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) 223 { 224 struct vhost_vdpa *v; 225 uint64_t features; 226 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 227 228 v = opaque; 229 dev->opaque = opaque ; 230 vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features); 231 dev->backend_features = features; 232 v->listener = vhost_vdpa_memory_listener; 233 v->msg_type = VHOST_IOTLB_MSG_V2; 234 235 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 236 VIRTIO_CONFIG_S_DRIVER); 237 238 return 0; 239 } 240 241 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 242 { 243 struct vhost_vdpa *v; 244 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 245 v = dev->opaque; 246 memory_listener_unregister(&v->listener); 247 248 dev->opaque = NULL; 249 return 0; 250 } 251 252 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 253 { 254 return INT_MAX; 255 } 256 257 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 258 struct vhost_memory *mem) 259 { 260 261 if (mem->padding) { 262 return -1; 263 } 264 265 return 0; 266 } 267 268 static int vhost_vdpa_set_features(struct vhost_dev *dev, 269 uint64_t features) 270 { 271 int ret; 272 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 273 uint8_t status = 0; 274 if (ret) { 275 return ret; 276 } 277 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 278 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 279 280 return !(status & VIRTIO_CONFIG_S_FEATURES_OK); 281 } 282 283 int vhost_vdpa_get_device_id(struct vhost_dev *dev, 284 uint32_t *device_id) 285 { 286 return vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 287 } 288 289 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 290 { 291 uint8_t status = 0; 292 293 return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 294 } 295 296 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 297 { 298 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 299 300 return idx - dev->vq_index; 301 } 302 303 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 304 { 305 int i; 306 for (i = 0; i < dev->nvqs; ++i) { 307 struct vhost_vring_state state = { 308 .index = dev->vq_index + i, 309 .num = 1, 310 }; 311 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 312 } 313 return 0; 314 } 315 316 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 317 uint32_t offset, uint32_t size, 318 uint32_t flags) 319 { 320 struct vhost_vdpa_config *config; 321 int ret; 322 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 323 config = g_malloc(size + config_size); 324 if (config == NULL) { 325 return -1; 326 } 327 config->off = offset; 328 config->len = size; 329 memcpy(config->buf, data, size); 330 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 331 g_free(config); 332 return ret; 333 } 334 335 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 336 uint32_t config_len) 337 { 338 struct vhost_vdpa_config *v_config; 339 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 340 int ret; 341 342 v_config = g_malloc(config_len + config_size); 343 if (v_config == NULL) { 344 return -1; 345 } 346 v_config->len = config_len; 347 v_config->off = 0; 348 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 349 memcpy(config, v_config->buf, config_len); 350 g_free(v_config); 351 return ret; 352 } 353 354 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 355 { 356 struct vhost_vdpa *v = dev->opaque; 357 if (started) { 358 uint8_t status = 0; 359 memory_listener_register(&v->listener, &address_space_memory); 360 vhost_vdpa_set_vring_ready(dev); 361 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 362 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 363 364 return !(status & VIRTIO_CONFIG_S_DRIVER_OK); 365 } else { 366 vhost_vdpa_reset_device(dev); 367 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 368 VIRTIO_CONFIG_S_DRIVER); 369 memory_listener_unregister(&v->listener); 370 371 return 0; 372 } 373 } 374 375 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 376 struct vhost_log *log) 377 { 378 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 379 } 380 381 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 382 struct vhost_vring_addr *addr) 383 { 384 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 385 } 386 387 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 388 struct vhost_vring_state *ring) 389 { 390 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 391 } 392 393 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 394 struct vhost_vring_state *ring) 395 { 396 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 397 } 398 399 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 400 struct vhost_vring_state *ring) 401 { 402 return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 403 } 404 405 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 406 struct vhost_vring_file *file) 407 { 408 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 409 } 410 411 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 412 struct vhost_vring_file *file) 413 { 414 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 415 } 416 417 static int vhost_vdpa_get_features(struct vhost_dev *dev, 418 uint64_t *features) 419 { 420 return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 421 } 422 423 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 424 { 425 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 426 } 427 428 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 429 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 430 { 431 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 432 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 433 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 434 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 435 return 0; 436 } 437 438 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 439 { 440 return true; 441 } 442 443 const VhostOps vdpa_ops = { 444 .backend_type = VHOST_BACKEND_TYPE_VDPA, 445 .vhost_backend_init = vhost_vdpa_init, 446 .vhost_backend_cleanup = vhost_vdpa_cleanup, 447 .vhost_set_log_base = vhost_vdpa_set_log_base, 448 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 449 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 450 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 451 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 452 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 453 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 454 .vhost_get_features = vhost_vdpa_get_features, 455 .vhost_set_owner = vhost_vdpa_set_owner, 456 .vhost_set_vring_endian = NULL, 457 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 458 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 459 .vhost_set_features = vhost_vdpa_set_features, 460 .vhost_reset_device = vhost_vdpa_reset_device, 461 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 462 .vhost_get_config = vhost_vdpa_get_config, 463 .vhost_set_config = vhost_vdpa_set_config, 464 .vhost_requires_shm_log = NULL, 465 .vhost_migration_done = NULL, 466 .vhost_backend_can_merge = NULL, 467 .vhost_net_set_mtu = NULL, 468 .vhost_set_iotlb_callback = NULL, 469 .vhost_send_device_iotlb_msg = NULL, 470 .vhost_dev_start = vhost_vdpa_dev_start, 471 .vhost_get_device_id = vhost_vdpa_get_device_id, 472 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 473 .vhost_force_iommu = vhost_vdpa_force_iommu, 474 }; 475