1 /* 2 * vhost-vdpa.c 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include "clients.h" 14 #include "hw/virtio/virtio-net.h" 15 #include "net/vhost_net.h" 16 #include "net/vhost-vdpa.h" 17 #include "hw/virtio/vhost-vdpa.h" 18 #include "qemu/config-file.h" 19 #include "qemu/error-report.h" 20 #include "qemu/log.h" 21 #include "qemu/memalign.h" 22 #include "qemu/option.h" 23 #include "qapi/error.h" 24 #include <linux/vhost.h> 25 #include <sys/ioctl.h> 26 #include <err.h> 27 #include "standard-headers/linux/virtio_net.h" 28 #include "monitor/monitor.h" 29 #include "migration/misc.h" 30 #include "hw/virtio/vhost.h" 31 32 /* Todo:need to add the multiqueue support here */ 33 typedef struct VhostVDPAState { 34 NetClientState nc; 35 struct vhost_vdpa vhost_vdpa; 36 NotifierWithReturn migration_state; 37 VHostNetState *vhost_net; 38 39 /* Control commands shadow buffers */ 40 void *cvq_cmd_out_buffer; 41 virtio_net_ctrl_ack *status; 42 43 /* The device always have SVQ enabled */ 44 bool always_svq; 45 46 /* The device can isolate CVQ in its own ASID */ 47 bool cvq_isolated; 48 49 bool started; 50 } VhostVDPAState; 51 52 /* 53 * The array is sorted alphabetically in ascending order, 54 * with the exception of VHOST_INVALID_FEATURE_BIT, 55 * which should always be the last entry. 56 */ 57 const int vdpa_feature_bits[] = { 58 VIRTIO_F_ANY_LAYOUT, 59 VIRTIO_F_IOMMU_PLATFORM, 60 VIRTIO_F_NOTIFY_ON_EMPTY, 61 VIRTIO_F_RING_PACKED, 62 VIRTIO_F_RING_RESET, 63 VIRTIO_F_VERSION_1, 64 VIRTIO_NET_F_CSUM, 65 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, 66 VIRTIO_NET_F_CTRL_MAC_ADDR, 67 VIRTIO_NET_F_CTRL_RX, 68 VIRTIO_NET_F_CTRL_RX_EXTRA, 69 VIRTIO_NET_F_CTRL_VLAN, 70 VIRTIO_NET_F_CTRL_VQ, 71 VIRTIO_NET_F_GSO, 72 VIRTIO_NET_F_GUEST_CSUM, 73 VIRTIO_NET_F_GUEST_ECN, 74 VIRTIO_NET_F_GUEST_TSO4, 75 VIRTIO_NET_F_GUEST_TSO6, 76 VIRTIO_NET_F_GUEST_UFO, 77 VIRTIO_NET_F_GUEST_USO4, 78 VIRTIO_NET_F_GUEST_USO6, 79 VIRTIO_NET_F_HASH_REPORT, 80 VIRTIO_NET_F_HOST_ECN, 81 VIRTIO_NET_F_HOST_TSO4, 82 VIRTIO_NET_F_HOST_TSO6, 83 VIRTIO_NET_F_HOST_UFO, 84 VIRTIO_NET_F_HOST_USO, 85 VIRTIO_NET_F_MQ, 86 VIRTIO_NET_F_MRG_RXBUF, 87 VIRTIO_NET_F_MTU, 88 VIRTIO_NET_F_RSS, 89 VIRTIO_NET_F_STATUS, 90 VIRTIO_RING_F_EVENT_IDX, 91 VIRTIO_RING_F_INDIRECT_DESC, 92 93 /* VHOST_INVALID_FEATURE_BIT should always be the last entry */ 94 VHOST_INVALID_FEATURE_BIT 95 }; 96 97 /** Supported device specific feature bits with SVQ */ 98 static const uint64_t vdpa_svq_device_features = 99 BIT_ULL(VIRTIO_NET_F_CSUM) | 100 BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | 101 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | 102 BIT_ULL(VIRTIO_NET_F_MTU) | 103 BIT_ULL(VIRTIO_NET_F_MAC) | 104 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | 105 BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | 106 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | 107 BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | 108 BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | 109 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | 110 BIT_ULL(VIRTIO_NET_F_HOST_ECN) | 111 BIT_ULL(VIRTIO_NET_F_HOST_UFO) | 112 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | 113 BIT_ULL(VIRTIO_NET_F_STATUS) | 114 BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | 115 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | 116 BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | 117 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | 118 BIT_ULL(VIRTIO_NET_F_MQ) | 119 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | 120 BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | 121 /* VHOST_F_LOG_ALL is exposed by SVQ */ 122 BIT_ULL(VHOST_F_LOG_ALL) | 123 BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | 124 BIT_ULL(VIRTIO_NET_F_RSS) | 125 BIT_ULL(VIRTIO_NET_F_RSC_EXT) | 126 BIT_ULL(VIRTIO_NET_F_STANDBY) | 127 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX); 128 129 #define VHOST_VDPA_NET_CVQ_ASID 1 130 131 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) 132 { 133 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 134 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 135 return s->vhost_net; 136 } 137 138 static size_t vhost_vdpa_net_cvq_cmd_len(void) 139 { 140 /* 141 * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. 142 * In buffer is always 1 byte, so it should fit here 143 */ 144 return sizeof(struct virtio_net_ctrl_hdr) + 145 2 * sizeof(struct virtio_net_ctrl_mac) + 146 MAC_TABLE_ENTRIES * ETH_ALEN; 147 } 148 149 static size_t vhost_vdpa_net_cvq_cmd_page_len(void) 150 { 151 return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); 152 } 153 154 static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) 155 { 156 uint64_t invalid_dev_features = 157 features & ~vdpa_svq_device_features & 158 /* Transport are all accepted at this point */ 159 ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, 160 VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); 161 162 if (invalid_dev_features) { 163 error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, 164 invalid_dev_features); 165 return false; 166 } 167 168 return vhost_svq_valid_features(features, errp); 169 } 170 171 static int vhost_vdpa_net_check_device_id(struct vhost_net *net) 172 { 173 uint32_t device_id; 174 int ret; 175 struct vhost_dev *hdev; 176 177 hdev = (struct vhost_dev *)&net->dev; 178 ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id); 179 if (device_id != VIRTIO_ID_NET) { 180 return -ENOTSUP; 181 } 182 return ret; 183 } 184 185 static int vhost_vdpa_add(NetClientState *ncs, void *be, 186 int queue_pair_index, int nvqs) 187 { 188 VhostNetOptions options; 189 struct vhost_net *net = NULL; 190 VhostVDPAState *s; 191 int ret; 192 193 options.backend_type = VHOST_BACKEND_TYPE_VDPA; 194 assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 195 s = DO_UPCAST(VhostVDPAState, nc, ncs); 196 options.net_backend = ncs; 197 options.opaque = be; 198 options.busyloop_timeout = 0; 199 options.nvqs = nvqs; 200 201 net = vhost_net_init(&options); 202 if (!net) { 203 error_report("failed to init vhost_net for queue"); 204 goto err_init; 205 } 206 s->vhost_net = net; 207 ret = vhost_vdpa_net_check_device_id(net); 208 if (ret) { 209 goto err_check; 210 } 211 return 0; 212 err_check: 213 vhost_net_cleanup(net); 214 g_free(net); 215 err_init: 216 return -1; 217 } 218 219 static void vhost_vdpa_cleanup(NetClientState *nc) 220 { 221 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 222 223 /* 224 * If a peer NIC is attached, do not cleanup anything. 225 * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup() 226 * when the guest is shutting down. 227 */ 228 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { 229 return; 230 } 231 munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len()); 232 munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len()); 233 if (s->vhost_net) { 234 vhost_net_cleanup(s->vhost_net); 235 g_free(s->vhost_net); 236 s->vhost_net = NULL; 237 } 238 if (s->vhost_vdpa.index != 0) { 239 return; 240 } 241 qemu_close(s->vhost_vdpa.shared->device_fd); 242 g_free(s->vhost_vdpa.shared); 243 } 244 245 /** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend */ 246 static bool vhost_vdpa_set_steering_ebpf(NetClientState *nc, int prog_fd) 247 { 248 return true; 249 } 250 251 static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc) 252 { 253 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 254 255 return true; 256 } 257 258 static bool vhost_vdpa_has_ufo(NetClientState *nc) 259 { 260 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 261 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 262 uint64_t features = 0; 263 features |= (1ULL << VIRTIO_NET_F_HOST_UFO); 264 features = vhost_net_get_features(s->vhost_net, features); 265 return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO)); 266 267 } 268 269 static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc, 270 Error **errp) 271 { 272 const char *driver = object_class_get_name(oc); 273 274 if (!g_str_has_prefix(driver, "virtio-net-")) { 275 error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); 276 return false; 277 } 278 279 return true; 280 } 281 282 /** Dummy receive in case qemu falls back to userland tap networking */ 283 static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, 284 size_t size) 285 { 286 return size; 287 } 288 289 static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable) 290 { 291 struct vhost_vdpa *v = &s->vhost_vdpa; 292 VirtIONet *n; 293 VirtIODevice *vdev; 294 int data_queue_pairs, cvq, r; 295 296 /* We are only called on the first data vqs and only if x-svq is not set */ 297 if (s->vhost_vdpa.shadow_vqs_enabled == enable) { 298 return; 299 } 300 301 vdev = v->dev->vdev; 302 n = VIRTIO_NET(vdev); 303 if (!n->vhost_started) { 304 return; 305 } 306 307 data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 308 cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 309 n->max_ncs - n->max_queue_pairs : 0; 310 /* 311 * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter 312 * in the future and resume the device if read-only operations between 313 * suspend and reset goes wrong. 314 */ 315 vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq); 316 317 /* Start will check migration setup_or_active to configure or not SVQ */ 318 r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq); 319 if (unlikely(r < 0)) { 320 error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r); 321 } 322 } 323 324 static int vdpa_net_migration_state_notifier(NotifierWithReturn *notifier, 325 MigrationEvent *e, Error **errp) 326 { 327 VhostVDPAState *s = container_of(notifier, VhostVDPAState, migration_state); 328 329 if (e->type == MIG_EVENT_PRECOPY_SETUP) { 330 vhost_vdpa_net_log_global_enable(s, true); 331 } else if (e->type == MIG_EVENT_PRECOPY_FAILED) { 332 vhost_vdpa_net_log_global_enable(s, false); 333 } 334 return 0; 335 } 336 337 static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) 338 { 339 struct vhost_vdpa *v = &s->vhost_vdpa; 340 341 migration_add_notifier(&s->migration_state, 342 vdpa_net_migration_state_notifier); 343 if (v->shadow_vqs_enabled) { 344 v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, 345 v->shared->iova_range.last); 346 } 347 } 348 349 static int vhost_vdpa_net_data_start(NetClientState *nc) 350 { 351 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 352 struct vhost_vdpa *v = &s->vhost_vdpa; 353 354 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 355 356 if (s->always_svq || 357 migration_is_setup_or_active()) { 358 v->shadow_vqs_enabled = true; 359 } else { 360 v->shadow_vqs_enabled = false; 361 } 362 363 if (v->index == 0) { 364 v->shared->shadow_data = v->shadow_vqs_enabled; 365 vhost_vdpa_net_data_start_first(s); 366 return 0; 367 } 368 369 return 0; 370 } 371 372 static int vhost_vdpa_net_data_load(NetClientState *nc) 373 { 374 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 375 struct vhost_vdpa *v = &s->vhost_vdpa; 376 bool has_cvq = v->dev->vq_index_end % 2; 377 378 if (has_cvq) { 379 return 0; 380 } 381 382 for (int i = 0; i < v->dev->nvqs; ++i) { 383 vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); 384 } 385 return 0; 386 } 387 388 static void vhost_vdpa_net_client_stop(NetClientState *nc) 389 { 390 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 391 struct vhost_dev *dev; 392 393 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 394 395 if (s->vhost_vdpa.index == 0) { 396 migration_remove_notifier(&s->migration_state); 397 } 398 399 dev = s->vhost_vdpa.dev; 400 if (dev->vq_index + dev->nvqs == dev->vq_index_end) { 401 g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, 402 vhost_iova_tree_delete); 403 } 404 } 405 406 static NetClientInfo net_vhost_vdpa_info = { 407 .type = NET_CLIENT_DRIVER_VHOST_VDPA, 408 .size = sizeof(VhostVDPAState), 409 .receive = vhost_vdpa_receive, 410 .start = vhost_vdpa_net_data_start, 411 .load = vhost_vdpa_net_data_load, 412 .stop = vhost_vdpa_net_client_stop, 413 .cleanup = vhost_vdpa_cleanup, 414 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, 415 .has_ufo = vhost_vdpa_has_ufo, 416 .check_peer_type = vhost_vdpa_check_peer_type, 417 .set_steering_ebpf = vhost_vdpa_set_steering_ebpf, 418 }; 419 420 static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, 421 Error **errp) 422 { 423 struct vhost_vring_state state = { 424 .index = vq_index, 425 }; 426 int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); 427 428 if (unlikely(r < 0)) { 429 r = -errno; 430 error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); 431 return r; 432 } 433 434 return state.num; 435 } 436 437 static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, 438 unsigned vq_group, 439 unsigned asid_num) 440 { 441 struct vhost_vring_state asid = { 442 .index = vq_group, 443 .num = asid_num, 444 }; 445 int r; 446 447 r = ioctl(v->shared->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); 448 if (unlikely(r < 0)) { 449 error_report("Can't set vq group %u asid %u, errno=%d (%s)", 450 asid.index, asid.num, errno, g_strerror(errno)); 451 } 452 return r; 453 } 454 455 static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) 456 { 457 VhostIOVATree *tree = v->shared->iova_tree; 458 DMAMap needle = { 459 /* 460 * No need to specify size or to look for more translations since 461 * this contiguous chunk was allocated by us. 462 */ 463 .translated_addr = (hwaddr)(uintptr_t)addr, 464 }; 465 const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle); 466 int r; 467 468 if (unlikely(!map)) { 469 error_report("Cannot locate expected map"); 470 return; 471 } 472 473 r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, map->iova, 474 map->size + 1); 475 if (unlikely(r != 0)) { 476 error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); 477 } 478 479 vhost_iova_tree_remove(tree, *map); 480 } 481 482 /** Map CVQ buffer. */ 483 static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, 484 bool write) 485 { 486 DMAMap map = {}; 487 int r; 488 489 map.translated_addr = (hwaddr)(uintptr_t)buf; 490 map.size = size - 1; 491 map.perm = write ? IOMMU_RW : IOMMU_RO, 492 r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &map); 493 if (unlikely(r != IOVA_OK)) { 494 error_report("Cannot map injected element"); 495 return r; 496 } 497 498 r = vhost_vdpa_dma_map(v->shared, v->address_space_id, map.iova, 499 vhost_vdpa_net_cvq_cmd_page_len(), buf, !write); 500 if (unlikely(r < 0)) { 501 goto dma_map_err; 502 } 503 504 return 0; 505 506 dma_map_err: 507 vhost_iova_tree_remove(v->shared->iova_tree, map); 508 return r; 509 } 510 511 static int vhost_vdpa_net_cvq_start(NetClientState *nc) 512 { 513 VhostVDPAState *s; 514 struct vhost_vdpa *v; 515 int64_t cvq_group; 516 int r; 517 Error *err = NULL; 518 519 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 520 521 s = DO_UPCAST(VhostVDPAState, nc, nc); 522 v = &s->vhost_vdpa; 523 524 v->shadow_vqs_enabled = v->shared->shadow_data; 525 s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; 526 527 if (v->shared->shadow_data) { 528 /* SVQ is already configured for all virtqueues */ 529 goto out; 530 } 531 532 /* 533 * If we early return in these cases SVQ will not be enabled. The migration 534 * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. 535 */ 536 if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { 537 return 0; 538 } 539 540 if (!s->cvq_isolated) { 541 return 0; 542 } 543 544 cvq_group = vhost_vdpa_get_vring_group(v->shared->device_fd, 545 v->dev->vq_index_end - 1, 546 &err); 547 if (unlikely(cvq_group < 0)) { 548 error_report_err(err); 549 return cvq_group; 550 } 551 552 r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); 553 if (unlikely(r < 0)) { 554 return r; 555 } 556 557 v->shadow_vqs_enabled = true; 558 s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; 559 560 out: 561 if (!s->vhost_vdpa.shadow_vqs_enabled) { 562 return 0; 563 } 564 565 /* 566 * If other vhost_vdpa already have an iova_tree, reuse it for simplicity, 567 * whether CVQ shares ASID with guest or not, because: 568 * - Memory listener need access to guest's memory addresses allocated in 569 * the IOVA tree. 570 * - There should be plenty of IOVA address space for both ASID not to 571 * worry about collisions between them. Guest's translations are still 572 * validated with virtio virtqueue_pop so there is no risk for the guest 573 * to access memory that it shouldn't. 574 * 575 * To allocate a iova tree per ASID is doable but it complicates the code 576 * and it is not worth it for the moment. 577 */ 578 if (!v->shared->iova_tree) { 579 v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, 580 v->shared->iova_range.last); 581 } 582 583 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, 584 vhost_vdpa_net_cvq_cmd_page_len(), false); 585 if (unlikely(r < 0)) { 586 return r; 587 } 588 589 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status, 590 vhost_vdpa_net_cvq_cmd_page_len(), true); 591 if (unlikely(r < 0)) { 592 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); 593 } 594 595 return r; 596 } 597 598 static void vhost_vdpa_net_cvq_stop(NetClientState *nc) 599 { 600 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 601 602 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 603 604 if (s->vhost_vdpa.shadow_vqs_enabled) { 605 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); 606 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); 607 } 608 609 vhost_vdpa_net_client_stop(nc); 610 } 611 612 static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, 613 const struct iovec *out_sg, size_t out_num, 614 const struct iovec *in_sg, size_t in_num) 615 { 616 VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); 617 int r; 618 619 r = vhost_svq_add(svq, out_sg, out_num, in_sg, in_num, NULL); 620 if (unlikely(r != 0)) { 621 if (unlikely(r == -ENOSPC)) { 622 qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", 623 __func__); 624 } 625 } 626 627 return r; 628 } 629 630 /* 631 * Convenience wrapper to poll SVQ for multiple control commands. 632 * 633 * Caller should hold the BQL when invoking this function, and should take 634 * the answer before SVQ pulls by itself when BQL is released. 635 */ 636 static ssize_t vhost_vdpa_net_svq_poll(VhostVDPAState *s, size_t cmds_in_flight) 637 { 638 VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); 639 return vhost_svq_poll(svq, cmds_in_flight); 640 } 641 642 static void vhost_vdpa_net_load_cursor_reset(VhostVDPAState *s, 643 struct iovec *out_cursor, 644 struct iovec *in_cursor) 645 { 646 /* reset the cursor of the output buffer for the device */ 647 out_cursor->iov_base = s->cvq_cmd_out_buffer; 648 out_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len(); 649 650 /* reset the cursor of the in buffer for the device */ 651 in_cursor->iov_base = s->status; 652 in_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len(); 653 } 654 655 /* 656 * Poll SVQ for multiple pending control commands and check the device's ack. 657 * 658 * Caller should hold the BQL when invoking this function. 659 * 660 * @s: The VhostVDPAState 661 * @len: The length of the pending status shadow buffer 662 */ 663 static ssize_t vhost_vdpa_net_svq_flush(VhostVDPAState *s, size_t len) 664 { 665 /* device uses a one-byte length ack for each control command */ 666 ssize_t dev_written = vhost_vdpa_net_svq_poll(s, len); 667 if (unlikely(dev_written != len)) { 668 return -EIO; 669 } 670 671 /* check the device's ack */ 672 for (int i = 0; i < len; ++i) { 673 if (s->status[i] != VIRTIO_NET_OK) { 674 return -EIO; 675 } 676 } 677 return 0; 678 } 679 680 static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, 681 struct iovec *out_cursor, 682 struct iovec *in_cursor, uint8_t class, 683 uint8_t cmd, const struct iovec *data_sg, 684 size_t data_num) 685 { 686 const struct virtio_net_ctrl_hdr ctrl = { 687 .class = class, 688 .cmd = cmd, 689 }; 690 size_t data_size = iov_size(data_sg, data_num), cmd_size; 691 struct iovec out, in; 692 ssize_t r; 693 unsigned dummy_cursor_iov_cnt; 694 VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); 695 696 assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); 697 cmd_size = sizeof(ctrl) + data_size; 698 if (vhost_svq_available_slots(svq) < 2 || 699 iov_size(out_cursor, 1) < cmd_size) { 700 /* 701 * It is time to flush all pending control commands if SVQ is full 702 * or control commands shadow buffers are full. 703 * 704 * We can poll here since we've had BQL from the time 705 * we sent the descriptor. 706 */ 707 r = vhost_vdpa_net_svq_flush(s, in_cursor->iov_base - 708 (void *)s->status); 709 if (unlikely(r < 0)) { 710 return r; 711 } 712 713 vhost_vdpa_net_load_cursor_reset(s, out_cursor, in_cursor); 714 } 715 716 /* pack the CVQ command header */ 717 iov_from_buf(out_cursor, 1, 0, &ctrl, sizeof(ctrl)); 718 /* pack the CVQ command command-specific-data */ 719 iov_to_buf(data_sg, data_num, 0, 720 out_cursor->iov_base + sizeof(ctrl), data_size); 721 722 /* extract the required buffer from the cursor for output */ 723 iov_copy(&out, 1, out_cursor, 1, 0, cmd_size); 724 /* extract the required buffer from the cursor for input */ 725 iov_copy(&in, 1, in_cursor, 1, 0, sizeof(*s->status)); 726 727 r = vhost_vdpa_net_cvq_add(s, &out, 1, &in, 1); 728 if (unlikely(r < 0)) { 729 return r; 730 } 731 732 /* iterate the cursors */ 733 dummy_cursor_iov_cnt = 1; 734 iov_discard_front(&out_cursor, &dummy_cursor_iov_cnt, cmd_size); 735 dummy_cursor_iov_cnt = 1; 736 iov_discard_front(&in_cursor, &dummy_cursor_iov_cnt, sizeof(*s->status)); 737 738 return 0; 739 } 740 741 static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n, 742 struct iovec *out_cursor, 743 struct iovec *in_cursor) 744 { 745 if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 746 const struct iovec data = { 747 .iov_base = (void *)n->mac, 748 .iov_len = sizeof(n->mac), 749 }; 750 ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 751 VIRTIO_NET_CTRL_MAC, 752 VIRTIO_NET_CTRL_MAC_ADDR_SET, 753 &data, 1); 754 if (unlikely(r < 0)) { 755 return r; 756 } 757 } 758 759 /* 760 * According to VirtIO standard, "The device MUST have an 761 * empty MAC filtering table on reset.". 762 * 763 * Therefore, there is no need to send this CVQ command if the 764 * driver also sets an empty MAC filter table, which aligns with 765 * the device's defaults. 766 * 767 * Note that the device's defaults can mismatch the driver's 768 * configuration only at live migration. 769 */ 770 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX) || 771 n->mac_table.in_use == 0) { 772 return 0; 773 } 774 775 uint32_t uni_entries = n->mac_table.first_multi, 776 uni_macs_size = uni_entries * ETH_ALEN, 777 mul_entries = n->mac_table.in_use - uni_entries, 778 mul_macs_size = mul_entries * ETH_ALEN; 779 struct virtio_net_ctrl_mac uni = { 780 .entries = cpu_to_le32(uni_entries), 781 }; 782 struct virtio_net_ctrl_mac mul = { 783 .entries = cpu_to_le32(mul_entries), 784 }; 785 const struct iovec data[] = { 786 { 787 .iov_base = &uni, 788 .iov_len = sizeof(uni), 789 }, { 790 .iov_base = n->mac_table.macs, 791 .iov_len = uni_macs_size, 792 }, { 793 .iov_base = &mul, 794 .iov_len = sizeof(mul), 795 }, { 796 .iov_base = &n->mac_table.macs[uni_macs_size], 797 .iov_len = mul_macs_size, 798 }, 799 }; 800 ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 801 VIRTIO_NET_CTRL_MAC, 802 VIRTIO_NET_CTRL_MAC_TABLE_SET, 803 data, ARRAY_SIZE(data)); 804 if (unlikely(r < 0)) { 805 return r; 806 } 807 808 return 0; 809 } 810 811 static int vhost_vdpa_net_load_rss(VhostVDPAState *s, const VirtIONet *n, 812 struct iovec *out_cursor, 813 struct iovec *in_cursor, bool do_rss) 814 { 815 struct virtio_net_rss_config cfg = {}; 816 ssize_t r; 817 g_autofree uint16_t *table = NULL; 818 819 /* 820 * According to VirtIO standard, "Initially the device has all hash 821 * types disabled and reports only VIRTIO_NET_HASH_REPORT_NONE.". 822 * 823 * Therefore, there is no need to send this CVQ command if the 824 * driver disables the all hash types, which aligns with 825 * the device's defaults. 826 * 827 * Note that the device's defaults can mismatch the driver's 828 * configuration only at live migration. 829 */ 830 if (!n->rss_data.enabled || 831 n->rss_data.hash_types == VIRTIO_NET_HASH_REPORT_NONE) { 832 return 0; 833 } 834 835 table = g_malloc_n(n->rss_data.indirections_len, 836 sizeof(n->rss_data.indirections_table[0])); 837 cfg.hash_types = cpu_to_le32(n->rss_data.hash_types); 838 839 if (do_rss) { 840 /* 841 * According to VirtIO standard, "Number of entries in indirection_table 842 * is (indirection_table_mask + 1)". 843 */ 844 cfg.indirection_table_mask = cpu_to_le16(n->rss_data.indirections_len - 845 1); 846 cfg.unclassified_queue = cpu_to_le16(n->rss_data.default_queue); 847 for (int i = 0; i < n->rss_data.indirections_len; ++i) { 848 table[i] = cpu_to_le16(n->rss_data.indirections_table[i]); 849 } 850 cfg.max_tx_vq = cpu_to_le16(n->curr_queue_pairs); 851 } else { 852 /* 853 * According to VirtIO standard, "Field reserved MUST contain zeroes. 854 * It is defined to make the structure to match the layout of 855 * virtio_net_rss_config structure, defined in 5.1.6.5.7.". 856 * 857 * Therefore, we need to zero the fields in 858 * struct virtio_net_rss_config, which corresponds to the 859 * `reserved` field in struct virtio_net_hash_config. 860 * 861 * Note that all other fields are zeroed at their definitions, 862 * except for the `indirection_table` field, where the actual data 863 * is stored in the `table` variable to ensure compatibility 864 * with RSS case. Therefore, we need to zero the `table` variable here. 865 */ 866 table[0] = 0; 867 } 868 869 /* 870 * Considering that virtio_net_handle_rss() currently does not restore 871 * the hash key length parsed from the CVQ command sent from the guest 872 * into n->rss_data and uses the maximum key length in other code, so 873 * we also employ the maximum key length here. 874 */ 875 cfg.hash_key_length = sizeof(n->rss_data.key); 876 877 const struct iovec data[] = { 878 { 879 .iov_base = &cfg, 880 .iov_len = offsetof(struct virtio_net_rss_config, 881 indirection_table), 882 }, { 883 .iov_base = table, 884 .iov_len = n->rss_data.indirections_len * 885 sizeof(n->rss_data.indirections_table[0]), 886 }, { 887 .iov_base = &cfg.max_tx_vq, 888 .iov_len = offsetof(struct virtio_net_rss_config, hash_key_data) - 889 offsetof(struct virtio_net_rss_config, max_tx_vq), 890 }, { 891 .iov_base = (void *)n->rss_data.key, 892 .iov_len = sizeof(n->rss_data.key), 893 } 894 }; 895 896 r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 897 VIRTIO_NET_CTRL_MQ, 898 do_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG : 899 VIRTIO_NET_CTRL_MQ_HASH_CONFIG, 900 data, ARRAY_SIZE(data)); 901 if (unlikely(r < 0)) { 902 return r; 903 } 904 905 return 0; 906 } 907 908 static int vhost_vdpa_net_load_mq(VhostVDPAState *s, 909 const VirtIONet *n, 910 struct iovec *out_cursor, 911 struct iovec *in_cursor) 912 { 913 struct virtio_net_ctrl_mq mq; 914 ssize_t r; 915 916 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) { 917 return 0; 918 } 919 920 mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs); 921 const struct iovec data = { 922 .iov_base = &mq, 923 .iov_len = sizeof(mq), 924 }; 925 r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 926 VIRTIO_NET_CTRL_MQ, 927 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, 928 &data, 1); 929 if (unlikely(r < 0)) { 930 return r; 931 } 932 933 if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_RSS)) { 934 /* load the receive-side scaling state */ 935 r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor, true); 936 if (unlikely(r < 0)) { 937 return r; 938 } 939 } else if (virtio_vdev_has_feature(&n->parent_obj, 940 VIRTIO_NET_F_HASH_REPORT)) { 941 /* load the hash calculation state */ 942 r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor, false); 943 if (unlikely(r < 0)) { 944 return r; 945 } 946 } 947 948 return 0; 949 } 950 951 static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, 952 const VirtIONet *n, 953 struct iovec *out_cursor, 954 struct iovec *in_cursor) 955 { 956 uint64_t offloads; 957 ssize_t r; 958 959 if (!virtio_vdev_has_feature(&n->parent_obj, 960 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 961 return 0; 962 } 963 964 if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) { 965 /* 966 * According to VirtIO standard, "Upon feature negotiation 967 * corresponding offload gets enabled to preserve 968 * backward compatibility.". 969 * 970 * Therefore, there is no need to send this CVQ command if the 971 * driver also enables all supported offloads, which aligns with 972 * the device's defaults. 973 * 974 * Note that the device's defaults can mismatch the driver's 975 * configuration only at live migration. 976 */ 977 return 0; 978 } 979 980 offloads = cpu_to_le64(n->curr_guest_offloads); 981 const struct iovec data = { 982 .iov_base = &offloads, 983 .iov_len = sizeof(offloads), 984 }; 985 r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 986 VIRTIO_NET_CTRL_GUEST_OFFLOADS, 987 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, 988 &data, 1); 989 if (unlikely(r < 0)) { 990 return r; 991 } 992 993 return 0; 994 } 995 996 static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s, 997 struct iovec *out_cursor, 998 struct iovec *in_cursor, 999 uint8_t cmd, 1000 uint8_t on) 1001 { 1002 const struct iovec data = { 1003 .iov_base = &on, 1004 .iov_len = sizeof(on), 1005 }; 1006 ssize_t r; 1007 1008 r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 1009 VIRTIO_NET_CTRL_RX, cmd, &data, 1); 1010 if (unlikely(r < 0)) { 1011 return r; 1012 } 1013 1014 return 0; 1015 } 1016 1017 static int vhost_vdpa_net_load_rx(VhostVDPAState *s, 1018 const VirtIONet *n, 1019 struct iovec *out_cursor, 1020 struct iovec *in_cursor) 1021 { 1022 ssize_t r; 1023 1024 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX)) { 1025 return 0; 1026 } 1027 1028 /* 1029 * According to virtio_net_reset(), device turns promiscuous mode 1030 * on by default. 1031 * 1032 * Additionally, according to VirtIO standard, "Since there are 1033 * no guarantees, it can use a hash filter or silently switch to 1034 * allmulti or promiscuous mode if it is given too many addresses.". 1035 * QEMU marks `n->mac_table.uni_overflow` if guest sets too many 1036 * non-multicast MAC addresses, indicating that promiscuous mode 1037 * should be enabled. 1038 * 1039 * Therefore, QEMU should only send this CVQ command if the 1040 * `n->mac_table.uni_overflow` is not marked and `n->promisc` is off, 1041 * which sets promiscuous mode on, different from the device's defaults. 1042 * 1043 * Note that the device's defaults can mismatch the driver's 1044 * configuration only at live migration. 1045 */ 1046 if (!n->mac_table.uni_overflow && !n->promisc) { 1047 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1048 VIRTIO_NET_CTRL_RX_PROMISC, 0); 1049 if (unlikely(r < 0)) { 1050 return r; 1051 } 1052 } 1053 1054 /* 1055 * According to virtio_net_reset(), device turns all-multicast mode 1056 * off by default. 1057 * 1058 * According to VirtIO standard, "Since there are no guarantees, 1059 * it can use a hash filter or silently switch to allmulti or 1060 * promiscuous mode if it is given too many addresses.". QEMU marks 1061 * `n->mac_table.multi_overflow` if guest sets too many 1062 * non-multicast MAC addresses. 1063 * 1064 * Therefore, QEMU should only send this CVQ command if the 1065 * `n->mac_table.multi_overflow` is marked or `n->allmulti` is on, 1066 * which sets all-multicast mode on, different from the device's defaults. 1067 * 1068 * Note that the device's defaults can mismatch the driver's 1069 * configuration only at live migration. 1070 */ 1071 if (n->mac_table.multi_overflow || n->allmulti) { 1072 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1073 VIRTIO_NET_CTRL_RX_ALLMULTI, 1); 1074 if (unlikely(r < 0)) { 1075 return r; 1076 } 1077 } 1078 1079 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX_EXTRA)) { 1080 return 0; 1081 } 1082 1083 /* 1084 * According to virtio_net_reset(), device turns all-unicast mode 1085 * off by default. 1086 * 1087 * Therefore, QEMU should only send this CVQ command if the driver 1088 * sets all-unicast mode on, different from the device's defaults. 1089 * 1090 * Note that the device's defaults can mismatch the driver's 1091 * configuration only at live migration. 1092 */ 1093 if (n->alluni) { 1094 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1095 VIRTIO_NET_CTRL_RX_ALLUNI, 1); 1096 if (r < 0) { 1097 return r; 1098 } 1099 } 1100 1101 /* 1102 * According to virtio_net_reset(), device turns non-multicast mode 1103 * off by default. 1104 * 1105 * Therefore, QEMU should only send this CVQ command if the driver 1106 * sets non-multicast mode on, different from the device's defaults. 1107 * 1108 * Note that the device's defaults can mismatch the driver's 1109 * configuration only at live migration. 1110 */ 1111 if (n->nomulti) { 1112 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1113 VIRTIO_NET_CTRL_RX_NOMULTI, 1); 1114 if (r < 0) { 1115 return r; 1116 } 1117 } 1118 1119 /* 1120 * According to virtio_net_reset(), device turns non-unicast mode 1121 * off by default. 1122 * 1123 * Therefore, QEMU should only send this CVQ command if the driver 1124 * sets non-unicast mode on, different from the device's defaults. 1125 * 1126 * Note that the device's defaults can mismatch the driver's 1127 * configuration only at live migration. 1128 */ 1129 if (n->nouni) { 1130 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1131 VIRTIO_NET_CTRL_RX_NOUNI, 1); 1132 if (r < 0) { 1133 return r; 1134 } 1135 } 1136 1137 /* 1138 * According to virtio_net_reset(), device turns non-broadcast mode 1139 * off by default. 1140 * 1141 * Therefore, QEMU should only send this CVQ command if the driver 1142 * sets non-broadcast mode on, different from the device's defaults. 1143 * 1144 * Note that the device's defaults can mismatch the driver's 1145 * configuration only at live migration. 1146 */ 1147 if (n->nobcast) { 1148 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1149 VIRTIO_NET_CTRL_RX_NOBCAST, 1); 1150 if (r < 0) { 1151 return r; 1152 } 1153 } 1154 1155 return 0; 1156 } 1157 1158 static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s, 1159 const VirtIONet *n, 1160 struct iovec *out_cursor, 1161 struct iovec *in_cursor, 1162 uint16_t vid) 1163 { 1164 const struct iovec data = { 1165 .iov_base = &vid, 1166 .iov_len = sizeof(vid), 1167 }; 1168 ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 1169 VIRTIO_NET_CTRL_VLAN, 1170 VIRTIO_NET_CTRL_VLAN_ADD, 1171 &data, 1); 1172 if (unlikely(r < 0)) { 1173 return r; 1174 } 1175 1176 return 0; 1177 } 1178 1179 static int vhost_vdpa_net_load_vlan(VhostVDPAState *s, 1180 const VirtIONet *n, 1181 struct iovec *out_cursor, 1182 struct iovec *in_cursor) 1183 { 1184 int r; 1185 1186 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_VLAN)) { 1187 return 0; 1188 } 1189 1190 for (int i = 0; i < MAX_VLAN >> 5; i++) { 1191 for (int j = 0; n->vlans[i] && j <= 0x1f; j++) { 1192 if (n->vlans[i] & (1U << j)) { 1193 r = vhost_vdpa_net_load_single_vlan(s, n, out_cursor, 1194 in_cursor, (i << 5) + j); 1195 if (unlikely(r != 0)) { 1196 return r; 1197 } 1198 } 1199 } 1200 } 1201 1202 return 0; 1203 } 1204 1205 static int vhost_vdpa_net_cvq_load(NetClientState *nc) 1206 { 1207 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 1208 struct vhost_vdpa *v = &s->vhost_vdpa; 1209 const VirtIONet *n; 1210 int r; 1211 struct iovec out_cursor, in_cursor; 1212 1213 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 1214 1215 vhost_vdpa_set_vring_ready(v, v->dev->vq_index); 1216 1217 if (v->shadow_vqs_enabled) { 1218 n = VIRTIO_NET(v->dev->vdev); 1219 vhost_vdpa_net_load_cursor_reset(s, &out_cursor, &in_cursor); 1220 r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor); 1221 if (unlikely(r < 0)) { 1222 return r; 1223 } 1224 r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor); 1225 if (unlikely(r)) { 1226 return r; 1227 } 1228 r = vhost_vdpa_net_load_offloads(s, n, &out_cursor, &in_cursor); 1229 if (unlikely(r)) { 1230 return r; 1231 } 1232 r = vhost_vdpa_net_load_rx(s, n, &out_cursor, &in_cursor); 1233 if (unlikely(r)) { 1234 return r; 1235 } 1236 r = vhost_vdpa_net_load_vlan(s, n, &out_cursor, &in_cursor); 1237 if (unlikely(r)) { 1238 return r; 1239 } 1240 1241 /* 1242 * We need to poll and check all pending device's used buffers. 1243 * 1244 * We can poll here since we've had BQL from the time 1245 * we sent the descriptor. 1246 */ 1247 r = vhost_vdpa_net_svq_flush(s, in_cursor.iov_base - (void *)s->status); 1248 if (unlikely(r)) { 1249 return r; 1250 } 1251 } 1252 1253 for (int i = 0; i < v->dev->vq_index; ++i) { 1254 vhost_vdpa_set_vring_ready(v, i); 1255 } 1256 1257 return 0; 1258 } 1259 1260 static NetClientInfo net_vhost_vdpa_cvq_info = { 1261 .type = NET_CLIENT_DRIVER_VHOST_VDPA, 1262 .size = sizeof(VhostVDPAState), 1263 .receive = vhost_vdpa_receive, 1264 .start = vhost_vdpa_net_cvq_start, 1265 .load = vhost_vdpa_net_cvq_load, 1266 .stop = vhost_vdpa_net_cvq_stop, 1267 .cleanup = vhost_vdpa_cleanup, 1268 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, 1269 .has_ufo = vhost_vdpa_has_ufo, 1270 .check_peer_type = vhost_vdpa_check_peer_type, 1271 .set_steering_ebpf = vhost_vdpa_set_steering_ebpf, 1272 }; 1273 1274 /* 1275 * Forward the excessive VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command to 1276 * vdpa device. 1277 * 1278 * Considering that QEMU cannot send the entire filter table to the 1279 * vdpa device, it should send the VIRTIO_NET_CTRL_RX_PROMISC CVQ 1280 * command to enable promiscuous mode to receive all packets, 1281 * according to VirtIO standard, "Since there are no guarantees, 1282 * it can use a hash filter or silently switch to allmulti or 1283 * promiscuous mode if it is given too many addresses.". 1284 * 1285 * Since QEMU ignores MAC addresses beyond `MAC_TABLE_ENTRIES` and 1286 * marks `n->mac_table.x_overflow` accordingly, it should have 1287 * the same effect on the device model to receive 1288 * (`MAC_TABLE_ENTRIES` + 1) or more non-multicast MAC addresses. 1289 * The same applies to multicast MAC addresses. 1290 * 1291 * Therefore, QEMU can provide the device model with a fake 1292 * VIRTIO_NET_CTRL_MAC_TABLE_SET command with (`MAC_TABLE_ENTRIES` + 1) 1293 * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) multicast 1294 * MAC addresses. This ensures that the device model marks 1295 * `n->mac_table.uni_overflow` and `n->mac_table.multi_overflow`, 1296 * allowing all packets to be received, which aligns with the 1297 * state of the vdpa device. 1298 */ 1299 static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s, 1300 VirtQueueElement *elem, 1301 struct iovec *out, 1302 const struct iovec *in) 1303 { 1304 struct virtio_net_ctrl_mac mac_data, *mac_ptr; 1305 struct virtio_net_ctrl_hdr *hdr_ptr; 1306 uint32_t cursor; 1307 ssize_t r; 1308 uint8_t on = 1; 1309 1310 /* parse the non-multicast MAC address entries from CVQ command */ 1311 cursor = sizeof(*hdr_ptr); 1312 r = iov_to_buf(elem->out_sg, elem->out_num, cursor, 1313 &mac_data, sizeof(mac_data)); 1314 if (unlikely(r != sizeof(mac_data))) { 1315 /* 1316 * If the CVQ command is invalid, we should simulate the vdpa device 1317 * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command 1318 */ 1319 *s->status = VIRTIO_NET_ERR; 1320 return sizeof(*s->status); 1321 } 1322 cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN; 1323 1324 /* parse the multicast MAC address entries from CVQ command */ 1325 r = iov_to_buf(elem->out_sg, elem->out_num, cursor, 1326 &mac_data, sizeof(mac_data)); 1327 if (r != sizeof(mac_data)) { 1328 /* 1329 * If the CVQ command is invalid, we should simulate the vdpa device 1330 * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command 1331 */ 1332 *s->status = VIRTIO_NET_ERR; 1333 return sizeof(*s->status); 1334 } 1335 cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN; 1336 1337 /* validate the CVQ command */ 1338 if (iov_size(elem->out_sg, elem->out_num) != cursor) { 1339 /* 1340 * If the CVQ command is invalid, we should simulate the vdpa device 1341 * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command 1342 */ 1343 *s->status = VIRTIO_NET_ERR; 1344 return sizeof(*s->status); 1345 } 1346 1347 /* 1348 * According to VirtIO standard, "Since there are no guarantees, 1349 * it can use a hash filter or silently switch to allmulti or 1350 * promiscuous mode if it is given too many addresses.". 1351 * 1352 * Therefore, considering that QEMU is unable to send the entire 1353 * filter table to the vdpa device, it should send the 1354 * VIRTIO_NET_CTRL_RX_PROMISC CVQ command to enable promiscuous mode 1355 */ 1356 hdr_ptr = out->iov_base; 1357 out->iov_len = sizeof(*hdr_ptr) + sizeof(on); 1358 1359 hdr_ptr->class = VIRTIO_NET_CTRL_RX; 1360 hdr_ptr->cmd = VIRTIO_NET_CTRL_RX_PROMISC; 1361 iov_from_buf(out, 1, sizeof(*hdr_ptr), &on, sizeof(on)); 1362 r = vhost_vdpa_net_cvq_add(s, out, 1, in, 1); 1363 if (unlikely(r < 0)) { 1364 return r; 1365 } 1366 1367 /* 1368 * We can poll here since we've had BQL from the time 1369 * we sent the descriptor. 1370 */ 1371 r = vhost_vdpa_net_svq_poll(s, 1); 1372 if (unlikely(r < sizeof(*s->status))) { 1373 return r; 1374 } 1375 if (*s->status != VIRTIO_NET_OK) { 1376 return sizeof(*s->status); 1377 } 1378 1379 /* 1380 * QEMU should also send a fake VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ 1381 * command to the device model, including (`MAC_TABLE_ENTRIES` + 1) 1382 * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) 1383 * multicast MAC addresses. 1384 * 1385 * By doing so, the device model can mark `n->mac_table.uni_overflow` 1386 * and `n->mac_table.multi_overflow`, enabling all packets to be 1387 * received, which aligns with the state of the vdpa device. 1388 */ 1389 cursor = 0; 1390 uint32_t fake_uni_entries = MAC_TABLE_ENTRIES + 1, 1391 fake_mul_entries = MAC_TABLE_ENTRIES + 1, 1392 fake_cvq_size = sizeof(struct virtio_net_ctrl_hdr) + 1393 sizeof(mac_data) + fake_uni_entries * ETH_ALEN + 1394 sizeof(mac_data) + fake_mul_entries * ETH_ALEN; 1395 1396 assert(fake_cvq_size < vhost_vdpa_net_cvq_cmd_page_len()); 1397 out->iov_len = fake_cvq_size; 1398 1399 /* pack the header for fake CVQ command */ 1400 hdr_ptr = out->iov_base + cursor; 1401 hdr_ptr->class = VIRTIO_NET_CTRL_MAC; 1402 hdr_ptr->cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; 1403 cursor += sizeof(*hdr_ptr); 1404 1405 /* 1406 * Pack the non-multicast MAC addresses part for fake CVQ command. 1407 * 1408 * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC 1409 * addresses provided in CVQ command. Therefore, only the entries 1410 * field need to be prepared in the CVQ command. 1411 */ 1412 mac_ptr = out->iov_base + cursor; 1413 mac_ptr->entries = cpu_to_le32(fake_uni_entries); 1414 cursor += sizeof(*mac_ptr) + fake_uni_entries * ETH_ALEN; 1415 1416 /* 1417 * Pack the multicast MAC addresses part for fake CVQ command. 1418 * 1419 * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC 1420 * addresses provided in CVQ command. Therefore, only the entries 1421 * field need to be prepared in the CVQ command. 1422 */ 1423 mac_ptr = out->iov_base + cursor; 1424 mac_ptr->entries = cpu_to_le32(fake_mul_entries); 1425 1426 /* 1427 * Simulating QEMU poll a vdpa device used buffer 1428 * for VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command 1429 */ 1430 return sizeof(*s->status); 1431 } 1432 1433 /** 1434 * Validate and copy control virtqueue commands. 1435 * 1436 * Following QEMU guidelines, we offer a copy of the buffers to the device to 1437 * prevent TOCTOU bugs. 1438 */ 1439 static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, 1440 VirtQueueElement *elem, 1441 void *opaque) 1442 { 1443 VhostVDPAState *s = opaque; 1444 size_t in_len; 1445 const struct virtio_net_ctrl_hdr *ctrl; 1446 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1447 /* Out buffer sent to both the vdpa device and the device model */ 1448 struct iovec out = { 1449 .iov_base = s->cvq_cmd_out_buffer, 1450 }; 1451 /* in buffer used for device model */ 1452 const struct iovec model_in = { 1453 .iov_base = &status, 1454 .iov_len = sizeof(status), 1455 }; 1456 /* in buffer used for vdpa device */ 1457 const struct iovec vdpa_in = { 1458 .iov_base = s->status, 1459 .iov_len = sizeof(*s->status), 1460 }; 1461 ssize_t dev_written = -EINVAL; 1462 1463 out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, 1464 s->cvq_cmd_out_buffer, 1465 vhost_vdpa_net_cvq_cmd_page_len()); 1466 1467 ctrl = s->cvq_cmd_out_buffer; 1468 if (ctrl->class == VIRTIO_NET_CTRL_ANNOUNCE) { 1469 /* 1470 * Guest announce capability is emulated by qemu, so don't forward to 1471 * the device. 1472 */ 1473 dev_written = sizeof(status); 1474 *s->status = VIRTIO_NET_OK; 1475 } else if (unlikely(ctrl->class == VIRTIO_NET_CTRL_MAC && 1476 ctrl->cmd == VIRTIO_NET_CTRL_MAC_TABLE_SET && 1477 iov_size(elem->out_sg, elem->out_num) > out.iov_len)) { 1478 /* 1479 * Due to the size limitation of the out buffer sent to the vdpa device, 1480 * which is determined by vhost_vdpa_net_cvq_cmd_page_len(), excessive 1481 * MAC addresses set by the driver for the filter table can cause 1482 * truncation of the CVQ command in QEMU. As a result, the vdpa device 1483 * rejects the flawed CVQ command. 1484 * 1485 * Therefore, QEMU must handle this situation instead of sending 1486 * the CVQ command directly. 1487 */ 1488 dev_written = vhost_vdpa_net_excessive_mac_filter_cvq_add(s, elem, 1489 &out, &vdpa_in); 1490 if (unlikely(dev_written < 0)) { 1491 goto out; 1492 } 1493 } else { 1494 ssize_t r; 1495 r = vhost_vdpa_net_cvq_add(s, &out, 1, &vdpa_in, 1); 1496 if (unlikely(r < 0)) { 1497 dev_written = r; 1498 goto out; 1499 } 1500 1501 /* 1502 * We can poll here since we've had BQL from the time 1503 * we sent the descriptor. 1504 */ 1505 dev_written = vhost_vdpa_net_svq_poll(s, 1); 1506 } 1507 1508 if (unlikely(dev_written < sizeof(status))) { 1509 error_report("Insufficient written data (%zu)", dev_written); 1510 goto out; 1511 } 1512 1513 if (*s->status != VIRTIO_NET_OK) { 1514 goto out; 1515 } 1516 1517 status = VIRTIO_NET_ERR; 1518 virtio_net_handle_ctrl_iov(svq->vdev, &model_in, 1, &out, 1); 1519 if (status != VIRTIO_NET_OK) { 1520 error_report("Bad CVQ processing in model"); 1521 } 1522 1523 out: 1524 in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, 1525 sizeof(status)); 1526 if (unlikely(in_len < sizeof(status))) { 1527 error_report("Bad device CVQ written length"); 1528 } 1529 vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); 1530 /* 1531 * `elem` belongs to vhost_vdpa_net_handle_ctrl_avail() only when 1532 * the function successfully forwards the CVQ command, indicated 1533 * by a non-negative value of `dev_written`. Otherwise, it still 1534 * belongs to SVQ. 1535 * This function should only free the `elem` when it owns. 1536 */ 1537 if (dev_written >= 0) { 1538 g_free(elem); 1539 } 1540 return dev_written < 0 ? dev_written : 0; 1541 } 1542 1543 static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { 1544 .avail_handler = vhost_vdpa_net_handle_ctrl_avail, 1545 }; 1546 1547 /** 1548 * Probe if CVQ is isolated 1549 * 1550 * @device_fd The vdpa device fd 1551 * @features Features offered by the device. 1552 * @cvq_index The control vq pair index 1553 * 1554 * Returns <0 in case of failure, 0 if false and 1 if true. 1555 */ 1556 static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, 1557 int cvq_index, Error **errp) 1558 { 1559 ERRP_GUARD(); 1560 uint64_t backend_features; 1561 int64_t cvq_group; 1562 uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | 1563 VIRTIO_CONFIG_S_DRIVER; 1564 int r; 1565 1566 r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); 1567 if (unlikely(r < 0)) { 1568 error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); 1569 return r; 1570 } 1571 1572 if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { 1573 return 0; 1574 } 1575 1576 r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); 1577 if (unlikely(r)) { 1578 error_setg_errno(errp, -r, "Cannot set device status"); 1579 goto out; 1580 } 1581 1582 r = ioctl(device_fd, VHOST_SET_FEATURES, &features); 1583 if (unlikely(r)) { 1584 error_setg_errno(errp, -r, "Cannot set features"); 1585 goto out; 1586 } 1587 1588 status |= VIRTIO_CONFIG_S_FEATURES_OK; 1589 r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); 1590 if (unlikely(r)) { 1591 error_setg_errno(errp, -r, "Cannot set device status"); 1592 goto out; 1593 } 1594 1595 cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); 1596 if (unlikely(cvq_group < 0)) { 1597 if (cvq_group != -ENOTSUP) { 1598 r = cvq_group; 1599 goto out; 1600 } 1601 1602 /* 1603 * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend 1604 * support ASID even if the parent driver does not. The CVQ cannot be 1605 * isolated in this case. 1606 */ 1607 error_free(*errp); 1608 *errp = NULL; 1609 r = 0; 1610 goto out; 1611 } 1612 1613 for (int i = 0; i < cvq_index; ++i) { 1614 int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); 1615 if (unlikely(group < 0)) { 1616 r = group; 1617 goto out; 1618 } 1619 1620 if (group == (int64_t)cvq_group) { 1621 r = 0; 1622 goto out; 1623 } 1624 } 1625 1626 r = 1; 1627 1628 out: 1629 status = 0; 1630 ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); 1631 return r; 1632 } 1633 1634 static NetClientState *net_vhost_vdpa_init(NetClientState *peer, 1635 const char *device, 1636 const char *name, 1637 int vdpa_device_fd, 1638 int queue_pair_index, 1639 int nvqs, 1640 bool is_datapath, 1641 bool svq, 1642 struct vhost_vdpa_iova_range iova_range, 1643 uint64_t features, 1644 VhostVDPAShared *shared, 1645 Error **errp) 1646 { 1647 NetClientState *nc = NULL; 1648 VhostVDPAState *s; 1649 int ret = 0; 1650 assert(name); 1651 int cvq_isolated = 0; 1652 1653 if (is_datapath) { 1654 nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, 1655 name); 1656 } else { 1657 cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, 1658 queue_pair_index * 2, 1659 errp); 1660 if (unlikely(cvq_isolated < 0)) { 1661 return NULL; 1662 } 1663 1664 nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, 1665 device, name); 1666 } 1667 qemu_set_info_str(nc, TYPE_VHOST_VDPA); 1668 s = DO_UPCAST(VhostVDPAState, nc, nc); 1669 1670 s->vhost_vdpa.index = queue_pair_index; 1671 s->always_svq = svq; 1672 s->migration_state.notify = NULL; 1673 s->vhost_vdpa.shadow_vqs_enabled = svq; 1674 if (queue_pair_index == 0) { 1675 vhost_vdpa_net_valid_svq_features(features, 1676 &s->vhost_vdpa.migration_blocker); 1677 s->vhost_vdpa.shared = g_new0(VhostVDPAShared, 1); 1678 s->vhost_vdpa.shared->device_fd = vdpa_device_fd; 1679 s->vhost_vdpa.shared->iova_range = iova_range; 1680 s->vhost_vdpa.shared->shadow_data = svq; 1681 } else if (!is_datapath) { 1682 s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), 1683 PROT_READ | PROT_WRITE, 1684 MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1685 s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), 1686 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 1687 -1, 0); 1688 1689 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; 1690 s->vhost_vdpa.shadow_vq_ops_opaque = s; 1691 s->cvq_isolated = cvq_isolated; 1692 } 1693 if (queue_pair_index != 0) { 1694 s->vhost_vdpa.shared = shared; 1695 } 1696 1697 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); 1698 if (ret) { 1699 qemu_del_net_client(nc); 1700 return NULL; 1701 } 1702 1703 return nc; 1704 } 1705 1706 static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) 1707 { 1708 int ret = ioctl(fd, VHOST_GET_FEATURES, features); 1709 if (unlikely(ret < 0)) { 1710 error_setg_errno(errp, errno, 1711 "Fail to query features from vhost-vDPA device"); 1712 } 1713 return ret; 1714 } 1715 1716 static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features, 1717 int *has_cvq, Error **errp) 1718 { 1719 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 1720 g_autofree struct vhost_vdpa_config *config = NULL; 1721 __virtio16 *max_queue_pairs; 1722 int ret; 1723 1724 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { 1725 *has_cvq = 1; 1726 } else { 1727 *has_cvq = 0; 1728 } 1729 1730 if (features & (1 << VIRTIO_NET_F_MQ)) { 1731 config = g_malloc0(config_size + sizeof(*max_queue_pairs)); 1732 config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs); 1733 config->len = sizeof(*max_queue_pairs); 1734 1735 ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config); 1736 if (ret) { 1737 error_setg(errp, "Fail to get config from vhost-vDPA device"); 1738 return -ret; 1739 } 1740 1741 max_queue_pairs = (__virtio16 *)&config->buf; 1742 1743 return lduw_le_p(max_queue_pairs); 1744 } 1745 1746 return 1; 1747 } 1748 1749 int net_init_vhost_vdpa(const Netdev *netdev, const char *name, 1750 NetClientState *peer, Error **errp) 1751 { 1752 ERRP_GUARD(); 1753 const NetdevVhostVDPAOptions *opts; 1754 uint64_t features; 1755 int vdpa_device_fd; 1756 g_autofree NetClientState **ncs = NULL; 1757 struct vhost_vdpa_iova_range iova_range; 1758 NetClientState *nc; 1759 int queue_pairs, r, i = 0, has_cvq = 0; 1760 1761 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); 1762 opts = &netdev->u.vhost_vdpa; 1763 if (!opts->vhostdev && !opts->vhostfd) { 1764 error_setg(errp, 1765 "vhost-vdpa: neither vhostdev= nor vhostfd= was specified"); 1766 return -1; 1767 } 1768 1769 if (opts->vhostdev && opts->vhostfd) { 1770 error_setg(errp, 1771 "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive"); 1772 return -1; 1773 } 1774 1775 if (opts->vhostdev) { 1776 vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); 1777 if (vdpa_device_fd == -1) { 1778 return -errno; 1779 } 1780 } else { 1781 /* has_vhostfd */ 1782 vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp); 1783 if (vdpa_device_fd == -1) { 1784 error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: "); 1785 return -1; 1786 } 1787 } 1788 1789 r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); 1790 if (unlikely(r < 0)) { 1791 goto err; 1792 } 1793 1794 queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, 1795 &has_cvq, errp); 1796 if (queue_pairs < 0) { 1797 qemu_close(vdpa_device_fd); 1798 return queue_pairs; 1799 } 1800 1801 r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); 1802 if (unlikely(r < 0)) { 1803 error_setg(errp, "vhost-vdpa: get iova range failed: %s", 1804 strerror(-r)); 1805 goto err; 1806 } 1807 1808 if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) { 1809 goto err; 1810 } 1811 1812 ncs = g_malloc0(sizeof(*ncs) * queue_pairs); 1813 1814 for (i = 0; i < queue_pairs; i++) { 1815 VhostVDPAShared *shared = NULL; 1816 1817 if (i) { 1818 shared = DO_UPCAST(VhostVDPAState, nc, ncs[0])->vhost_vdpa.shared; 1819 } 1820 ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, 1821 vdpa_device_fd, i, 2, true, opts->x_svq, 1822 iova_range, features, shared, errp); 1823 if (!ncs[i]) 1824 goto err; 1825 } 1826 1827 if (has_cvq) { 1828 VhostVDPAState *s0 = DO_UPCAST(VhostVDPAState, nc, ncs[0]); 1829 VhostVDPAShared *shared = s0->vhost_vdpa.shared; 1830 1831 nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, 1832 vdpa_device_fd, i, 1, false, 1833 opts->x_svq, iova_range, features, shared, 1834 errp); 1835 if (!nc) 1836 goto err; 1837 } 1838 1839 return 0; 1840 1841 err: 1842 if (i) { 1843 for (i--; i >= 0; i--) { 1844 qemu_del_net_client(ncs[i]); 1845 } 1846 } 1847 1848 qemu_close(vdpa_device_fd); 1849 1850 return -1; 1851 } 1852