1 /* 2 * vhost-vdpa.c 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include "clients.h" 14 #include "hw/virtio/virtio-net.h" 15 #include "net/vhost_net.h" 16 #include "net/vhost-vdpa.h" 17 #include "hw/virtio/vhost-vdpa.h" 18 #include "qemu/config-file.h" 19 #include "qemu/error-report.h" 20 #include "qemu/log.h" 21 #include "qemu/memalign.h" 22 #include "qemu/option.h" 23 #include "qapi/error.h" 24 #include <linux/vhost.h> 25 #include <sys/ioctl.h> 26 #include <err.h> 27 #include "standard-headers/linux/virtio_net.h" 28 #include "monitor/monitor.h" 29 #include "migration/migration.h" 30 #include "migration/misc.h" 31 #include "hw/virtio/vhost.h" 32 33 /* Todo:need to add the multiqueue support here */ 34 typedef struct VhostVDPAState { 35 NetClientState nc; 36 struct vhost_vdpa vhost_vdpa; 37 Notifier migration_state; 38 VHostNetState *vhost_net; 39 40 /* Control commands shadow buffers */ 41 void *cvq_cmd_out_buffer; 42 virtio_net_ctrl_ack *status; 43 44 /* The device always have SVQ enabled */ 45 bool always_svq; 46 bool started; 47 } VhostVDPAState; 48 49 const int vdpa_feature_bits[] = { 50 VIRTIO_F_NOTIFY_ON_EMPTY, 51 VIRTIO_RING_F_INDIRECT_DESC, 52 VIRTIO_RING_F_EVENT_IDX, 53 VIRTIO_F_ANY_LAYOUT, 54 VIRTIO_F_VERSION_1, 55 VIRTIO_NET_F_CSUM, 56 VIRTIO_NET_F_GUEST_CSUM, 57 VIRTIO_NET_F_GSO, 58 VIRTIO_NET_F_GUEST_TSO4, 59 VIRTIO_NET_F_GUEST_TSO6, 60 VIRTIO_NET_F_GUEST_ECN, 61 VIRTIO_NET_F_GUEST_UFO, 62 VIRTIO_NET_F_HOST_TSO4, 63 VIRTIO_NET_F_HOST_TSO6, 64 VIRTIO_NET_F_HOST_ECN, 65 VIRTIO_NET_F_HOST_UFO, 66 VIRTIO_NET_F_MRG_RXBUF, 67 VIRTIO_NET_F_MTU, 68 VIRTIO_NET_F_CTRL_RX, 69 VIRTIO_NET_F_CTRL_RX_EXTRA, 70 VIRTIO_NET_F_CTRL_VLAN, 71 VIRTIO_NET_F_CTRL_MAC_ADDR, 72 VIRTIO_NET_F_RSS, 73 VIRTIO_NET_F_MQ, 74 VIRTIO_NET_F_CTRL_VQ, 75 VIRTIO_F_IOMMU_PLATFORM, 76 VIRTIO_F_RING_PACKED, 77 VIRTIO_F_RING_RESET, 78 VIRTIO_NET_F_RSS, 79 VIRTIO_NET_F_HASH_REPORT, 80 VIRTIO_NET_F_STATUS, 81 VHOST_INVALID_FEATURE_BIT 82 }; 83 84 /** Supported device specific feature bits with SVQ */ 85 static const uint64_t vdpa_svq_device_features = 86 BIT_ULL(VIRTIO_NET_F_CSUM) | 87 BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | 88 BIT_ULL(VIRTIO_NET_F_MTU) | 89 BIT_ULL(VIRTIO_NET_F_MAC) | 90 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | 91 BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | 92 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | 93 BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | 94 BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | 95 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | 96 BIT_ULL(VIRTIO_NET_F_HOST_ECN) | 97 BIT_ULL(VIRTIO_NET_F_HOST_UFO) | 98 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | 99 BIT_ULL(VIRTIO_NET_F_STATUS) | 100 BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | 101 BIT_ULL(VIRTIO_NET_F_MQ) | 102 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | 103 BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | 104 BIT_ULL(VIRTIO_NET_F_RSC_EXT) | 105 BIT_ULL(VIRTIO_NET_F_STANDBY); 106 107 #define VHOST_VDPA_NET_CVQ_ASID 1 108 109 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) 110 { 111 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 112 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 113 return s->vhost_net; 114 } 115 116 static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) 117 { 118 uint64_t invalid_dev_features = 119 features & ~vdpa_svq_device_features & 120 /* Transport are all accepted at this point */ 121 ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, 122 VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); 123 124 if (invalid_dev_features) { 125 error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, 126 invalid_dev_features); 127 return false; 128 } 129 130 return vhost_svq_valid_features(features, errp); 131 } 132 133 static int vhost_vdpa_net_check_device_id(struct vhost_net *net) 134 { 135 uint32_t device_id; 136 int ret; 137 struct vhost_dev *hdev; 138 139 hdev = (struct vhost_dev *)&net->dev; 140 ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id); 141 if (device_id != VIRTIO_ID_NET) { 142 return -ENOTSUP; 143 } 144 return ret; 145 } 146 147 static int vhost_vdpa_add(NetClientState *ncs, void *be, 148 int queue_pair_index, int nvqs) 149 { 150 VhostNetOptions options; 151 struct vhost_net *net = NULL; 152 VhostVDPAState *s; 153 int ret; 154 155 options.backend_type = VHOST_BACKEND_TYPE_VDPA; 156 assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 157 s = DO_UPCAST(VhostVDPAState, nc, ncs); 158 options.net_backend = ncs; 159 options.opaque = be; 160 options.busyloop_timeout = 0; 161 options.nvqs = nvqs; 162 163 net = vhost_net_init(&options); 164 if (!net) { 165 error_report("failed to init vhost_net for queue"); 166 goto err_init; 167 } 168 s->vhost_net = net; 169 ret = vhost_vdpa_net_check_device_id(net); 170 if (ret) { 171 goto err_check; 172 } 173 return 0; 174 err_check: 175 vhost_net_cleanup(net); 176 g_free(net); 177 err_init: 178 return -1; 179 } 180 181 static void vhost_vdpa_cleanup(NetClientState *nc) 182 { 183 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 184 185 qemu_vfree(s->cvq_cmd_out_buffer); 186 qemu_vfree(s->status); 187 if (s->vhost_net) { 188 vhost_net_cleanup(s->vhost_net); 189 g_free(s->vhost_net); 190 s->vhost_net = NULL; 191 } 192 if (s->vhost_vdpa.device_fd >= 0) { 193 qemu_close(s->vhost_vdpa.device_fd); 194 s->vhost_vdpa.device_fd = -1; 195 } 196 } 197 198 static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc) 199 { 200 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 201 202 return true; 203 } 204 205 static bool vhost_vdpa_has_ufo(NetClientState *nc) 206 { 207 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 208 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 209 uint64_t features = 0; 210 features |= (1ULL << VIRTIO_NET_F_HOST_UFO); 211 features = vhost_net_get_features(s->vhost_net, features); 212 return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO)); 213 214 } 215 216 static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc, 217 Error **errp) 218 { 219 const char *driver = object_class_get_name(oc); 220 221 if (!g_str_has_prefix(driver, "virtio-net-")) { 222 error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); 223 return false; 224 } 225 226 return true; 227 } 228 229 /** Dummy receive in case qemu falls back to userland tap networking */ 230 static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, 231 size_t size) 232 { 233 return size; 234 } 235 236 /** From any vdpa net client, get the netclient of the first queue pair */ 237 static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s) 238 { 239 NICState *nic = qemu_get_nic(s->nc.peer); 240 NetClientState *nc0 = qemu_get_peer(nic->ncs, 0); 241 242 return DO_UPCAST(VhostVDPAState, nc, nc0); 243 } 244 245 static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable) 246 { 247 struct vhost_vdpa *v = &s->vhost_vdpa; 248 VirtIONet *n; 249 VirtIODevice *vdev; 250 int data_queue_pairs, cvq, r; 251 252 /* We are only called on the first data vqs and only if x-svq is not set */ 253 if (s->vhost_vdpa.shadow_vqs_enabled == enable) { 254 return; 255 } 256 257 vdev = v->dev->vdev; 258 n = VIRTIO_NET(vdev); 259 if (!n->vhost_started) { 260 return; 261 } 262 263 data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 264 cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 265 n->max_ncs - n->max_queue_pairs : 0; 266 /* 267 * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter 268 * in the future and resume the device if read-only operations between 269 * suspend and reset goes wrong. 270 */ 271 vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq); 272 273 /* Start will check migration setup_or_active to configure or not SVQ */ 274 r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq); 275 if (unlikely(r < 0)) { 276 error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r); 277 } 278 } 279 280 static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data) 281 { 282 MigrationState *migration = data; 283 VhostVDPAState *s = container_of(notifier, VhostVDPAState, 284 migration_state); 285 286 if (migration_in_setup(migration)) { 287 vhost_vdpa_net_log_global_enable(s, true); 288 } else if (migration_has_failed(migration)) { 289 vhost_vdpa_net_log_global_enable(s, false); 290 } 291 } 292 293 static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) 294 { 295 struct vhost_vdpa *v = &s->vhost_vdpa; 296 297 add_migration_state_change_notifier(&s->migration_state); 298 if (v->shadow_vqs_enabled) { 299 v->iova_tree = vhost_iova_tree_new(v->iova_range.first, 300 v->iova_range.last); 301 } 302 } 303 304 static int vhost_vdpa_net_data_start(NetClientState *nc) 305 { 306 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 307 struct vhost_vdpa *v = &s->vhost_vdpa; 308 309 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 310 311 if (s->always_svq || 312 migration_is_setup_or_active(migrate_get_current()->state)) { 313 v->shadow_vqs_enabled = true; 314 v->shadow_data = true; 315 } else { 316 v->shadow_vqs_enabled = false; 317 v->shadow_data = false; 318 } 319 320 if (v->index == 0) { 321 vhost_vdpa_net_data_start_first(s); 322 return 0; 323 } 324 325 if (v->shadow_vqs_enabled) { 326 VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s); 327 v->iova_tree = s0->vhost_vdpa.iova_tree; 328 } 329 330 return 0; 331 } 332 333 static void vhost_vdpa_net_client_stop(NetClientState *nc) 334 { 335 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 336 struct vhost_dev *dev; 337 338 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 339 340 if (s->vhost_vdpa.index == 0) { 341 remove_migration_state_change_notifier(&s->migration_state); 342 } 343 344 dev = s->vhost_vdpa.dev; 345 if (dev->vq_index + dev->nvqs == dev->vq_index_end) { 346 g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); 347 } 348 } 349 350 static NetClientInfo net_vhost_vdpa_info = { 351 .type = NET_CLIENT_DRIVER_VHOST_VDPA, 352 .size = sizeof(VhostVDPAState), 353 .receive = vhost_vdpa_receive, 354 .start = vhost_vdpa_net_data_start, 355 .stop = vhost_vdpa_net_client_stop, 356 .cleanup = vhost_vdpa_cleanup, 357 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, 358 .has_ufo = vhost_vdpa_has_ufo, 359 .check_peer_type = vhost_vdpa_check_peer_type, 360 }; 361 362 static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) 363 { 364 struct vhost_vring_state state = { 365 .index = vq_index, 366 }; 367 int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); 368 369 if (unlikely(r < 0)) { 370 error_report("Cannot get VQ %u group: %s", vq_index, 371 g_strerror(errno)); 372 return r; 373 } 374 375 return state.num; 376 } 377 378 static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, 379 unsigned vq_group, 380 unsigned asid_num) 381 { 382 struct vhost_vring_state asid = { 383 .index = vq_group, 384 .num = asid_num, 385 }; 386 int r; 387 388 r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); 389 if (unlikely(r < 0)) { 390 error_report("Can't set vq group %u asid %u, errno=%d (%s)", 391 asid.index, asid.num, errno, g_strerror(errno)); 392 } 393 return r; 394 } 395 396 static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) 397 { 398 VhostIOVATree *tree = v->iova_tree; 399 DMAMap needle = { 400 /* 401 * No need to specify size or to look for more translations since 402 * this contiguous chunk was allocated by us. 403 */ 404 .translated_addr = (hwaddr)(uintptr_t)addr, 405 }; 406 const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle); 407 int r; 408 409 if (unlikely(!map)) { 410 error_report("Cannot locate expected map"); 411 return; 412 } 413 414 r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1); 415 if (unlikely(r != 0)) { 416 error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); 417 } 418 419 vhost_iova_tree_remove(tree, *map); 420 } 421 422 static size_t vhost_vdpa_net_cvq_cmd_len(void) 423 { 424 /* 425 * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. 426 * In buffer is always 1 byte, so it should fit here 427 */ 428 return sizeof(struct virtio_net_ctrl_hdr) + 429 2 * sizeof(struct virtio_net_ctrl_mac) + 430 MAC_TABLE_ENTRIES * ETH_ALEN; 431 } 432 433 static size_t vhost_vdpa_net_cvq_cmd_page_len(void) 434 { 435 return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); 436 } 437 438 /** Map CVQ buffer. */ 439 static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, 440 bool write) 441 { 442 DMAMap map = {}; 443 int r; 444 445 map.translated_addr = (hwaddr)(uintptr_t)buf; 446 map.size = size - 1; 447 map.perm = write ? IOMMU_RW : IOMMU_RO, 448 r = vhost_iova_tree_map_alloc(v->iova_tree, &map); 449 if (unlikely(r != IOVA_OK)) { 450 error_report("Cannot map injected element"); 451 return r; 452 } 453 454 r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova, 455 vhost_vdpa_net_cvq_cmd_page_len(), buf, !write); 456 if (unlikely(r < 0)) { 457 goto dma_map_err; 458 } 459 460 return 0; 461 462 dma_map_err: 463 vhost_iova_tree_remove(v->iova_tree, map); 464 return r; 465 } 466 467 static int vhost_vdpa_net_cvq_start(NetClientState *nc) 468 { 469 VhostVDPAState *s, *s0; 470 struct vhost_vdpa *v; 471 uint64_t backend_features; 472 int64_t cvq_group; 473 int cvq_index, r; 474 475 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 476 477 s = DO_UPCAST(VhostVDPAState, nc, nc); 478 v = &s->vhost_vdpa; 479 480 s0 = vhost_vdpa_net_first_nc_vdpa(s); 481 v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; 482 v->shadow_vqs_enabled = s->always_svq; 483 s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; 484 485 if (s->vhost_vdpa.shadow_data) { 486 /* SVQ is already configured for all virtqueues */ 487 goto out; 488 } 489 490 /* 491 * If we early return in these cases SVQ will not be enabled. The migration 492 * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. 493 * 494 * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev 495 * yet. 496 */ 497 r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); 498 if (unlikely(r < 0)) { 499 error_report("Cannot get vdpa backend_features: %s(%d)", 500 g_strerror(errno), errno); 501 return -1; 502 } 503 if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || 504 !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { 505 return 0; 506 } 507 508 /* 509 * Check if all the virtqueues of the virtio device are in a different vq 510 * than the last vq. VQ group of last group passed in cvq_group. 511 */ 512 cvq_index = v->dev->vq_index_end - 1; 513 cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); 514 if (unlikely(cvq_group < 0)) { 515 return cvq_group; 516 } 517 for (int i = 0; i < cvq_index; ++i) { 518 int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); 519 520 if (unlikely(group < 0)) { 521 return group; 522 } 523 524 if (group == cvq_group) { 525 return 0; 526 } 527 } 528 529 r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); 530 if (unlikely(r < 0)) { 531 return r; 532 } 533 534 v->shadow_vqs_enabled = true; 535 s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; 536 537 out: 538 if (!s->vhost_vdpa.shadow_vqs_enabled) { 539 return 0; 540 } 541 542 if (s0->vhost_vdpa.iova_tree) { 543 /* 544 * SVQ is already configured for all virtqueues. Reuse IOVA tree for 545 * simplicity, whether CVQ shares ASID with guest or not, because: 546 * - Memory listener need access to guest's memory addresses allocated 547 * in the IOVA tree. 548 * - There should be plenty of IOVA address space for both ASID not to 549 * worry about collisions between them. Guest's translations are 550 * still validated with virtio virtqueue_pop so there is no risk for 551 * the guest to access memory that it shouldn't. 552 * 553 * To allocate a iova tree per ASID is doable but it complicates the 554 * code and it is not worth it for the moment. 555 */ 556 v->iova_tree = s0->vhost_vdpa.iova_tree; 557 } else { 558 v->iova_tree = vhost_iova_tree_new(v->iova_range.first, 559 v->iova_range.last); 560 } 561 562 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, 563 vhost_vdpa_net_cvq_cmd_page_len(), false); 564 if (unlikely(r < 0)) { 565 return r; 566 } 567 568 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status, 569 vhost_vdpa_net_cvq_cmd_page_len(), true); 570 if (unlikely(r < 0)) { 571 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); 572 } 573 574 return r; 575 } 576 577 static void vhost_vdpa_net_cvq_stop(NetClientState *nc) 578 { 579 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 580 581 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 582 583 if (s->vhost_vdpa.shadow_vqs_enabled) { 584 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); 585 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); 586 } 587 588 vhost_vdpa_net_client_stop(nc); 589 } 590 591 static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, 592 size_t in_len) 593 { 594 /* Buffers for the device */ 595 const struct iovec out = { 596 .iov_base = s->cvq_cmd_out_buffer, 597 .iov_len = out_len, 598 }; 599 const struct iovec in = { 600 .iov_base = s->status, 601 .iov_len = sizeof(virtio_net_ctrl_ack), 602 }; 603 VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); 604 int r; 605 606 r = vhost_svq_add(svq, &out, 1, &in, 1, NULL); 607 if (unlikely(r != 0)) { 608 if (unlikely(r == -ENOSPC)) { 609 qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", 610 __func__); 611 } 612 return r; 613 } 614 615 /* 616 * We can poll here since we've had BQL from the time we sent the 617 * descriptor. Also, we need to take the answer before SVQ pulls by itself, 618 * when BQL is released 619 */ 620 return vhost_svq_poll(svq); 621 } 622 623 static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, 624 uint8_t cmd, const void *data, 625 size_t data_size) 626 { 627 const struct virtio_net_ctrl_hdr ctrl = { 628 .class = class, 629 .cmd = cmd, 630 }; 631 632 assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); 633 634 memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl)); 635 memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size); 636 637 return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size, 638 sizeof(virtio_net_ctrl_ack)); 639 } 640 641 static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) 642 { 643 uint64_t features = n->parent_obj.guest_features; 644 if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { 645 ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC, 646 VIRTIO_NET_CTRL_MAC_ADDR_SET, 647 n->mac, sizeof(n->mac)); 648 if (unlikely(dev_written < 0)) { 649 return dev_written; 650 } 651 652 return *s->status != VIRTIO_NET_OK; 653 } 654 655 return 0; 656 } 657 658 static int vhost_vdpa_net_load_mq(VhostVDPAState *s, 659 const VirtIONet *n) 660 { 661 struct virtio_net_ctrl_mq mq; 662 uint64_t features = n->parent_obj.guest_features; 663 ssize_t dev_written; 664 665 if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { 666 return 0; 667 } 668 669 mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs); 670 dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ, 671 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq, 672 sizeof(mq)); 673 if (unlikely(dev_written < 0)) { 674 return dev_written; 675 } 676 677 return *s->status != VIRTIO_NET_OK; 678 } 679 680 static int vhost_vdpa_net_load(NetClientState *nc) 681 { 682 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 683 struct vhost_vdpa *v = &s->vhost_vdpa; 684 const VirtIONet *n; 685 int r; 686 687 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 688 689 if (!v->shadow_vqs_enabled) { 690 return 0; 691 } 692 693 n = VIRTIO_NET(v->dev->vdev); 694 r = vhost_vdpa_net_load_mac(s, n); 695 if (unlikely(r < 0)) { 696 return r; 697 } 698 r = vhost_vdpa_net_load_mq(s, n); 699 if (unlikely(r)) { 700 return r; 701 } 702 703 return 0; 704 } 705 706 static NetClientInfo net_vhost_vdpa_cvq_info = { 707 .type = NET_CLIENT_DRIVER_VHOST_VDPA, 708 .size = sizeof(VhostVDPAState), 709 .receive = vhost_vdpa_receive, 710 .start = vhost_vdpa_net_cvq_start, 711 .load = vhost_vdpa_net_load, 712 .stop = vhost_vdpa_net_cvq_stop, 713 .cleanup = vhost_vdpa_cleanup, 714 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, 715 .has_ufo = vhost_vdpa_has_ufo, 716 .check_peer_type = vhost_vdpa_check_peer_type, 717 }; 718 719 /** 720 * Validate and copy control virtqueue commands. 721 * 722 * Following QEMU guidelines, we offer a copy of the buffers to the device to 723 * prevent TOCTOU bugs. 724 */ 725 static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, 726 VirtQueueElement *elem, 727 void *opaque) 728 { 729 VhostVDPAState *s = opaque; 730 size_t in_len; 731 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 732 /* Out buffer sent to both the vdpa device and the device model */ 733 struct iovec out = { 734 .iov_base = s->cvq_cmd_out_buffer, 735 }; 736 /* in buffer used for device model */ 737 const struct iovec in = { 738 .iov_base = &status, 739 .iov_len = sizeof(status), 740 }; 741 ssize_t dev_written = -EINVAL; 742 743 out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, 744 s->cvq_cmd_out_buffer, 745 vhost_vdpa_net_cvq_cmd_len()); 746 if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) { 747 /* 748 * Guest announce capability is emulated by qemu, so don't forward to 749 * the device. 750 */ 751 dev_written = sizeof(status); 752 *s->status = VIRTIO_NET_OK; 753 } else { 754 dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); 755 if (unlikely(dev_written < 0)) { 756 goto out; 757 } 758 } 759 760 if (unlikely(dev_written < sizeof(status))) { 761 error_report("Insufficient written data (%zu)", dev_written); 762 goto out; 763 } 764 765 if (*s->status != VIRTIO_NET_OK) { 766 return VIRTIO_NET_ERR; 767 } 768 769 status = VIRTIO_NET_ERR; 770 virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1); 771 if (status != VIRTIO_NET_OK) { 772 error_report("Bad CVQ processing in model"); 773 } 774 775 out: 776 in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, 777 sizeof(status)); 778 if (unlikely(in_len < sizeof(status))) { 779 error_report("Bad device CVQ written length"); 780 } 781 vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); 782 g_free(elem); 783 return dev_written < 0 ? dev_written : 0; 784 } 785 786 static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { 787 .avail_handler = vhost_vdpa_net_handle_ctrl_avail, 788 }; 789 790 static NetClientState *net_vhost_vdpa_init(NetClientState *peer, 791 const char *device, 792 const char *name, 793 int vdpa_device_fd, 794 int queue_pair_index, 795 int nvqs, 796 bool is_datapath, 797 bool svq, 798 struct vhost_vdpa_iova_range iova_range) 799 { 800 NetClientState *nc = NULL; 801 VhostVDPAState *s; 802 int ret = 0; 803 assert(name); 804 if (is_datapath) { 805 nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, 806 name); 807 } else { 808 nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, 809 device, name); 810 } 811 qemu_set_info_str(nc, TYPE_VHOST_VDPA); 812 s = DO_UPCAST(VhostVDPAState, nc, nc); 813 814 s->vhost_vdpa.device_fd = vdpa_device_fd; 815 s->vhost_vdpa.index = queue_pair_index; 816 s->always_svq = svq; 817 s->migration_state.notify = vdpa_net_migration_state_notifier; 818 s->vhost_vdpa.shadow_vqs_enabled = svq; 819 s->vhost_vdpa.iova_range = iova_range; 820 s->vhost_vdpa.shadow_data = svq; 821 if (!is_datapath) { 822 s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), 823 vhost_vdpa_net_cvq_cmd_page_len()); 824 memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); 825 s->status = qemu_memalign(qemu_real_host_page_size(), 826 vhost_vdpa_net_cvq_cmd_page_len()); 827 memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len()); 828 829 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; 830 s->vhost_vdpa.shadow_vq_ops_opaque = s; 831 832 /* 833 * TODO: We cannot migrate devices with CVQ as there is no way to set 834 * the device state (MAC, MQ, etc) before starting the datapath. 835 * 836 * Migration blocker ownership now belongs to s->vhost_vdpa. 837 */ 838 error_setg(&s->vhost_vdpa.migration_blocker, 839 "net vdpa cannot migrate with CVQ feature"); 840 } 841 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); 842 if (ret) { 843 qemu_del_net_client(nc); 844 return NULL; 845 } 846 return nc; 847 } 848 849 static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) 850 { 851 int ret = ioctl(fd, VHOST_GET_FEATURES, features); 852 if (unlikely(ret < 0)) { 853 error_setg_errno(errp, errno, 854 "Fail to query features from vhost-vDPA device"); 855 } 856 return ret; 857 } 858 859 static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features, 860 int *has_cvq, Error **errp) 861 { 862 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 863 g_autofree struct vhost_vdpa_config *config = NULL; 864 __virtio16 *max_queue_pairs; 865 int ret; 866 867 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { 868 *has_cvq = 1; 869 } else { 870 *has_cvq = 0; 871 } 872 873 if (features & (1 << VIRTIO_NET_F_MQ)) { 874 config = g_malloc0(config_size + sizeof(*max_queue_pairs)); 875 config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs); 876 config->len = sizeof(*max_queue_pairs); 877 878 ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config); 879 if (ret) { 880 error_setg(errp, "Fail to get config from vhost-vDPA device"); 881 return -ret; 882 } 883 884 max_queue_pairs = (__virtio16 *)&config->buf; 885 886 return lduw_le_p(max_queue_pairs); 887 } 888 889 return 1; 890 } 891 892 int net_init_vhost_vdpa(const Netdev *netdev, const char *name, 893 NetClientState *peer, Error **errp) 894 { 895 const NetdevVhostVDPAOptions *opts; 896 uint64_t features; 897 int vdpa_device_fd; 898 g_autofree NetClientState **ncs = NULL; 899 struct vhost_vdpa_iova_range iova_range; 900 NetClientState *nc; 901 int queue_pairs, r, i = 0, has_cvq = 0; 902 903 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); 904 opts = &netdev->u.vhost_vdpa; 905 if (!opts->vhostdev && !opts->vhostfd) { 906 error_setg(errp, 907 "vhost-vdpa: neither vhostdev= nor vhostfd= was specified"); 908 return -1; 909 } 910 911 if (opts->vhostdev && opts->vhostfd) { 912 error_setg(errp, 913 "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive"); 914 return -1; 915 } 916 917 if (opts->vhostdev) { 918 vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); 919 if (vdpa_device_fd == -1) { 920 return -errno; 921 } 922 } else { 923 /* has_vhostfd */ 924 vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp); 925 if (vdpa_device_fd == -1) { 926 error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: "); 927 return -1; 928 } 929 } 930 931 r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); 932 if (unlikely(r < 0)) { 933 goto err; 934 } 935 936 queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, 937 &has_cvq, errp); 938 if (queue_pairs < 0) { 939 qemu_close(vdpa_device_fd); 940 return queue_pairs; 941 } 942 943 r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); 944 if (unlikely(r < 0)) { 945 error_setg(errp, "vhost-vdpa: get iova range failed: %s", 946 strerror(-r)); 947 goto err; 948 } 949 950 if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) { 951 goto err; 952 } 953 954 ncs = g_malloc0(sizeof(*ncs) * queue_pairs); 955 956 for (i = 0; i < queue_pairs; i++) { 957 ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, 958 vdpa_device_fd, i, 2, true, opts->x_svq, 959 iova_range); 960 if (!ncs[i]) 961 goto err; 962 } 963 964 if (has_cvq) { 965 nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, 966 vdpa_device_fd, i, 1, false, 967 opts->x_svq, iova_range); 968 if (!nc) 969 goto err; 970 } 971 972 return 0; 973 974 err: 975 if (i) { 976 for (i--; i >= 0; i--) { 977 qemu_del_net_client(ncs[i]); 978 } 979 } 980 981 qemu_close(vdpa_device_fd); 982 983 return -1; 984 } 985