1 /* 2 * vhost-net support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "net/net.h" 18 #include "net/tap.h" 19 #include "net/vhost-user.h" 20 #include "net/vhost-vdpa.h" 21 22 #include "standard-headers/linux/vhost_types.h" 23 #include "hw/virtio/virtio-net.h" 24 #include "net/vhost_net.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "qemu/main-loop.h" 28 29 #include <sys/socket.h> 30 #include <net/if.h> 31 #include <netinet/in.h> 32 33 34 #include "standard-headers/linux/virtio_ring.h" 35 #include "hw/virtio/vhost.h" 36 #include "hw/virtio/virtio-bus.h" 37 #include "linux-headers/linux/vhost.h" 38 39 40 /* Features supported by host kernel. */ 41 static const int kernel_feature_bits[] = { 42 VIRTIO_F_NOTIFY_ON_EMPTY, 43 VIRTIO_RING_F_INDIRECT_DESC, 44 VIRTIO_RING_F_EVENT_IDX, 45 VIRTIO_NET_F_MRG_RXBUF, 46 VIRTIO_F_VERSION_1, 47 VIRTIO_NET_F_MTU, 48 VIRTIO_F_IOMMU_PLATFORM, 49 VIRTIO_F_RING_PACKED, 50 VIRTIO_F_RING_RESET, 51 VIRTIO_F_IN_ORDER, 52 VIRTIO_F_NOTIFICATION_DATA, 53 VIRTIO_NET_F_RSC_EXT, 54 VIRTIO_NET_F_HASH_REPORT, 55 VHOST_INVALID_FEATURE_BIT 56 }; 57 58 /* Features supported by others. */ 59 static const int user_feature_bits[] = { 60 VIRTIO_F_NOTIFY_ON_EMPTY, 61 VIRTIO_F_NOTIFICATION_DATA, 62 VIRTIO_RING_F_INDIRECT_DESC, 63 VIRTIO_RING_F_EVENT_IDX, 64 65 VIRTIO_F_ANY_LAYOUT, 66 VIRTIO_F_VERSION_1, 67 VIRTIO_NET_F_CSUM, 68 VIRTIO_NET_F_GUEST_CSUM, 69 VIRTIO_NET_F_GSO, 70 VIRTIO_NET_F_GUEST_TSO4, 71 VIRTIO_NET_F_GUEST_TSO6, 72 VIRTIO_NET_F_GUEST_ECN, 73 VIRTIO_NET_F_GUEST_UFO, 74 VIRTIO_NET_F_HOST_TSO4, 75 VIRTIO_NET_F_HOST_TSO6, 76 VIRTIO_NET_F_HOST_ECN, 77 VIRTIO_NET_F_HOST_UFO, 78 VIRTIO_NET_F_MRG_RXBUF, 79 VIRTIO_NET_F_MTU, 80 VIRTIO_F_IOMMU_PLATFORM, 81 VIRTIO_F_RING_PACKED, 82 VIRTIO_F_RING_RESET, 83 VIRTIO_F_IN_ORDER, 84 VIRTIO_NET_F_RSS, 85 VIRTIO_NET_F_RSC_EXT, 86 VIRTIO_NET_F_HASH_REPORT, 87 VIRTIO_NET_F_GUEST_USO4, 88 VIRTIO_NET_F_GUEST_USO6, 89 VIRTIO_NET_F_HOST_USO, 90 91 /* This bit implies RARP isn't sent by QEMU out of band */ 92 VIRTIO_NET_F_GUEST_ANNOUNCE, 93 94 VIRTIO_NET_F_MQ, 95 96 VHOST_INVALID_FEATURE_BIT 97 }; 98 99 static const int *vhost_net_get_feature_bits(struct vhost_net *net) 100 { 101 const int *feature_bits = 0; 102 103 switch (net->nc->info->type) { 104 case NET_CLIENT_DRIVER_TAP: 105 feature_bits = kernel_feature_bits; 106 break; 107 case NET_CLIENT_DRIVER_VHOST_USER: 108 feature_bits = user_feature_bits; 109 break; 110 #ifdef CONFIG_VHOST_NET_VDPA 111 case NET_CLIENT_DRIVER_VHOST_VDPA: 112 feature_bits = vdpa_feature_bits; 113 break; 114 #endif 115 default: 116 error_report("Feature bits not defined for this type: %d", 117 net->nc->info->type); 118 break; 119 } 120 121 return feature_bits; 122 } 123 124 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 125 { 126 return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), 127 features); 128 } 129 int vhost_net_get_config(struct vhost_net *net, uint8_t *config, 130 uint32_t config_len) 131 { 132 return vhost_dev_get_config(&net->dev, config, config_len, NULL); 133 } 134 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, 135 uint32_t offset, uint32_t size, uint32_t flags) 136 { 137 return vhost_dev_set_config(&net->dev, data, offset, size, flags); 138 } 139 140 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 141 { 142 net->dev.acked_features = net->dev.backend_features; 143 vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); 144 } 145 146 uint64_t vhost_net_get_max_queues(VHostNetState *net) 147 { 148 return net->dev.max_queues; 149 } 150 151 uint64_t vhost_net_get_acked_features(VHostNetState *net) 152 { 153 return net->dev.acked_features; 154 } 155 156 void vhost_net_save_acked_features(NetClientState *nc) 157 { 158 #ifdef CONFIG_VHOST_NET_USER 159 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 160 vhost_user_save_acked_features(nc); 161 } 162 #endif 163 } 164 165 static int vhost_net_get_fd(NetClientState *backend) 166 { 167 switch (backend->info->type) { 168 case NET_CLIENT_DRIVER_TAP: 169 return tap_get_fd(backend); 170 default: 171 fprintf(stderr, "vhost-net requires tap backend\n"); 172 return -ENOSYS; 173 } 174 } 175 176 struct vhost_net *vhost_net_init(VhostNetOptions *options) 177 { 178 int r; 179 bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; 180 struct vhost_net *net = g_new0(struct vhost_net, 1); 181 uint64_t features = 0; 182 Error *local_err = NULL; 183 184 if (!options->net_backend) { 185 fprintf(stderr, "vhost-net requires net backend to be setup\n"); 186 goto fail; 187 } 188 net->nc = options->net_backend; 189 net->dev.nvqs = options->nvqs; 190 191 net->dev.max_queues = 1; 192 net->dev.vqs = net->vqs; 193 194 if (backend_kernel) { 195 r = vhost_net_get_fd(options->net_backend); 196 if (r < 0) { 197 goto fail; 198 } 199 net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) 200 ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); 201 net->backend = r; 202 net->dev.protocol_features = 0; 203 } else { 204 net->dev.backend_features = 0; 205 net->dev.protocol_features = 0; 206 net->backend = -1; 207 208 /* vhost-user needs vq_index to initiate a specific queue pair */ 209 net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; 210 } 211 212 r = vhost_dev_init(&net->dev, options->opaque, 213 options->backend_type, options->busyloop_timeout, 214 &local_err); 215 if (r < 0) { 216 error_report_err(local_err); 217 goto fail; 218 } 219 if (backend_kernel) { 220 if (!qemu_has_vnet_hdr_len(options->net_backend, 221 sizeof(struct virtio_net_hdr_mrg_rxbuf))) { 222 net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); 223 } 224 if (~net->dev.features & net->dev.backend_features) { 225 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 226 " for backend\n", 227 (uint64_t)(~net->dev.features & net->dev.backend_features)); 228 goto fail; 229 } 230 } 231 232 /* Set sane init value. Override when guest acks. */ 233 #ifdef CONFIG_VHOST_NET_USER 234 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 235 features = vhost_user_get_acked_features(net->nc); 236 if (~net->dev.features & features) { 237 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 238 " for backend\n", 239 (uint64_t)(~net->dev.features & features)); 240 goto fail; 241 } 242 } 243 #endif 244 245 vhost_net_ack_features(net, features); 246 247 return net; 248 249 fail: 250 vhost_dev_cleanup(&net->dev); 251 g_free(net); 252 return NULL; 253 } 254 255 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, 256 int vq_index_end) 257 { 258 net->dev.vq_index = vq_index; 259 net->dev.vq_index_end = vq_index_end; 260 } 261 262 static int vhost_net_start_one(struct vhost_net *net, 263 VirtIODevice *dev) 264 { 265 struct vhost_vring_file file = { }; 266 int r; 267 268 if (net->nc->info->start) { 269 r = net->nc->info->start(net->nc); 270 if (r < 0) { 271 return r; 272 } 273 } 274 275 r = vhost_dev_enable_notifiers(&net->dev, dev); 276 if (r < 0) { 277 goto fail_notifiers; 278 } 279 280 r = vhost_dev_start(&net->dev, dev, false); 281 if (r < 0) { 282 goto fail_start; 283 } 284 285 if (net->nc->info->poll) { 286 net->nc->info->poll(net->nc, false); 287 } 288 289 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 290 qemu_set_fd_handler(net->backend, NULL, NULL, NULL); 291 file.fd = net->backend; 292 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 293 if (!virtio_queue_enabled(dev, net->dev.vq_index + 294 file.index)) { 295 /* Queue might not be ready for start */ 296 continue; 297 } 298 r = vhost_net_set_backend(&net->dev, &file); 299 if (r < 0) { 300 r = -errno; 301 goto fail; 302 } 303 } 304 } 305 306 if (net->nc->info->load) { 307 r = net->nc->info->load(net->nc); 308 if (r < 0) { 309 goto fail; 310 } 311 } 312 return 0; 313 fail: 314 file.fd = -1; 315 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 316 while (file.index-- > 0) { 317 if (!virtio_queue_enabled(dev, net->dev.vq_index + 318 file.index)) { 319 /* Queue might not be ready for start */ 320 continue; 321 } 322 int ret = vhost_net_set_backend(&net->dev, &file); 323 assert(ret >= 0); 324 } 325 } 326 if (net->nc->info->poll) { 327 net->nc->info->poll(net->nc, true); 328 } 329 vhost_dev_stop(&net->dev, dev, false); 330 fail_start: 331 vhost_dev_disable_notifiers(&net->dev, dev); 332 fail_notifiers: 333 return r; 334 } 335 336 static void vhost_net_stop_one(struct vhost_net *net, 337 VirtIODevice *dev) 338 { 339 struct vhost_vring_file file = { .fd = -1 }; 340 341 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 342 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 343 int r = vhost_net_set_backend(&net->dev, &file); 344 assert(r >= 0); 345 } 346 } 347 if (net->nc->info->poll) { 348 net->nc->info->poll(net->nc, true); 349 } 350 vhost_dev_stop(&net->dev, dev, false); 351 if (net->nc->info->stop) { 352 net->nc->info->stop(net->nc); 353 } 354 vhost_dev_disable_notifiers(&net->dev, dev); 355 } 356 357 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, 358 int data_queue_pairs, int cvq) 359 { 360 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 361 VirtioBusState *vbus = VIRTIO_BUS(qbus); 362 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 363 int total_notifiers = data_queue_pairs * 2 + cvq; 364 VirtIONet *n = VIRTIO_NET(dev); 365 int nvhosts = data_queue_pairs + cvq; 366 struct vhost_net *net; 367 int r, e, i, index_end = data_queue_pairs * 2; 368 NetClientState *peer; 369 370 if (cvq) { 371 index_end += 1; 372 } 373 374 if (!k->set_guest_notifiers) { 375 error_report("binding does not support guest notifiers"); 376 return -ENOSYS; 377 } 378 379 for (i = 0; i < nvhosts; i++) { 380 381 if (i < data_queue_pairs) { 382 peer = qemu_get_peer(ncs, i); 383 } else { /* Control Virtqueue */ 384 peer = qemu_get_peer(ncs, n->max_queue_pairs); 385 } 386 387 net = get_vhost_net(peer); 388 vhost_net_set_vq_index(net, i * 2, index_end); 389 390 /* Suppress the masking guest notifiers on vhost user 391 * because vhost user doesn't interrupt masking/unmasking 392 * properly. 393 */ 394 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 395 dev->use_guest_notifier_mask = false; 396 } 397 } 398 399 r = k->set_guest_notifiers(qbus->parent, total_notifiers, true); 400 if (r < 0) { 401 error_report("Error binding guest notifier: %d", -r); 402 goto err; 403 } 404 405 for (i = 0; i < nvhosts; i++) { 406 if (i < data_queue_pairs) { 407 peer = qemu_get_peer(ncs, i); 408 } else { 409 peer = qemu_get_peer(ncs, n->max_queue_pairs); 410 } 411 412 if (peer->vring_enable) { 413 /* restore vring enable state */ 414 r = vhost_set_vring_enable(peer, peer->vring_enable); 415 416 if (r < 0) { 417 goto err_start; 418 } 419 } 420 421 r = vhost_net_start_one(get_vhost_net(peer), dev); 422 if (r < 0) { 423 goto err_start; 424 } 425 } 426 427 return 0; 428 429 err_start: 430 while (--i >= 0) { 431 peer = qemu_get_peer(ncs, i < data_queue_pairs ? 432 i : n->max_queue_pairs); 433 vhost_net_stop_one(get_vhost_net(peer), dev); 434 } 435 e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 436 if (e < 0) { 437 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); 438 fflush(stderr); 439 } 440 err: 441 return r; 442 } 443 444 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, 445 int data_queue_pairs, int cvq) 446 { 447 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 448 VirtioBusState *vbus = VIRTIO_BUS(qbus); 449 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 450 VirtIONet *n = VIRTIO_NET(dev); 451 NetClientState *peer; 452 int total_notifiers = data_queue_pairs * 2 + cvq; 453 int nvhosts = data_queue_pairs + cvq; 454 int i, r; 455 456 for (i = 0; i < nvhosts; i++) { 457 if (i < data_queue_pairs) { 458 peer = qemu_get_peer(ncs, i); 459 } else { 460 peer = qemu_get_peer(ncs, n->max_queue_pairs); 461 } 462 vhost_net_stop_one(get_vhost_net(peer), dev); 463 } 464 465 r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 466 if (r < 0) { 467 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); 468 fflush(stderr); 469 } 470 assert(r >= 0); 471 } 472 473 void vhost_net_cleanup(struct vhost_net *net) 474 { 475 vhost_dev_cleanup(&net->dev); 476 } 477 478 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 479 { 480 const VhostOps *vhost_ops = net->dev.vhost_ops; 481 482 assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 483 assert(vhost_ops->vhost_migration_done); 484 485 return vhost_ops->vhost_migration_done(&net->dev, mac_addr); 486 } 487 488 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 489 { 490 return vhost_virtqueue_pending(&net->dev, idx); 491 } 492 493 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 494 int idx, bool mask) 495 { 496 vhost_virtqueue_mask(&net->dev, dev, idx, mask); 497 } 498 499 bool vhost_net_config_pending(VHostNetState *net) 500 { 501 return vhost_config_pending(&net->dev); 502 } 503 504 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) 505 { 506 vhost_config_mask(&net->dev, dev, mask); 507 } 508 VHostNetState *get_vhost_net(NetClientState *nc) 509 { 510 VHostNetState *vhost_net = 0; 511 512 if (!nc) { 513 return 0; 514 } 515 516 switch (nc->info->type) { 517 case NET_CLIENT_DRIVER_TAP: 518 vhost_net = tap_get_vhost_net(nc); 519 /* 520 * tap_get_vhost_net() can return NULL if a tap net-device backend is 521 * created with 'vhost=off' option, 'vhostforce=off' or no vhost or 522 * vhostforce or vhostfd options at all. Please see net_init_tap_one(). 523 * Hence, we omit the assertion here. 524 */ 525 break; 526 #ifdef CONFIG_VHOST_NET_USER 527 case NET_CLIENT_DRIVER_VHOST_USER: 528 vhost_net = vhost_user_get_vhost_net(nc); 529 assert(vhost_net); 530 break; 531 #endif 532 #ifdef CONFIG_VHOST_NET_VDPA 533 case NET_CLIENT_DRIVER_VHOST_VDPA: 534 vhost_net = vhost_vdpa_get_vhost_net(nc); 535 assert(vhost_net); 536 break; 537 #endif 538 default: 539 break; 540 } 541 542 return vhost_net; 543 } 544 545 int vhost_set_vring_enable(NetClientState *nc, int enable) 546 { 547 VHostNetState *net = get_vhost_net(nc); 548 const VhostOps *vhost_ops = net->dev.vhost_ops; 549 550 /* 551 * vhost-vdpa network devices need to enable dataplane virtqueues after 552 * DRIVER_OK, so they can recover device state before starting dataplane. 553 * Because of that, we don't enable virtqueues here and leave it to 554 * net/vhost-vdpa.c. 555 */ 556 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 557 return 0; 558 } 559 560 nc->vring_enable = enable; 561 562 if (vhost_ops && vhost_ops->vhost_set_vring_enable) { 563 return vhost_ops->vhost_set_vring_enable(&net->dev, enable); 564 } 565 566 return 0; 567 } 568 569 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) 570 { 571 const VhostOps *vhost_ops = net->dev.vhost_ops; 572 573 if (!vhost_ops->vhost_net_set_mtu) { 574 return 0; 575 } 576 577 return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); 578 } 579 580 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, 581 int vq_index) 582 { 583 VHostNetState *net = get_vhost_net(nc->peer); 584 const VhostOps *vhost_ops = net->dev.vhost_ops; 585 struct vhost_vring_file file = { .fd = -1 }; 586 int idx; 587 588 /* should only be called after backend is connected */ 589 assert(vhost_ops); 590 591 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 592 593 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 594 file.index = idx; 595 int r = vhost_net_set_backend(&net->dev, &file); 596 assert(r >= 0); 597 } 598 599 vhost_virtqueue_stop(&net->dev, 600 vdev, 601 net->dev.vqs + idx, 602 net->dev.vq_index + idx); 603 } 604 605 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, 606 int vq_index) 607 { 608 VHostNetState *net = get_vhost_net(nc->peer); 609 const VhostOps *vhost_ops = net->dev.vhost_ops; 610 struct vhost_vring_file file = { }; 611 int idx, r; 612 613 if (!net->dev.started) { 614 return -EBUSY; 615 } 616 617 /* should only be called after backend is connected */ 618 assert(vhost_ops); 619 620 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 621 622 r = vhost_virtqueue_start(&net->dev, 623 vdev, 624 net->dev.vqs + idx, 625 net->dev.vq_index + idx); 626 if (r < 0) { 627 goto err_start; 628 } 629 630 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 631 file.index = idx; 632 file.fd = net->backend; 633 r = vhost_net_set_backend(&net->dev, &file); 634 if (r < 0) { 635 r = -errno; 636 goto err_start; 637 } 638 } 639 640 return 0; 641 642 err_start: 643 error_report("Error when restarting the queue."); 644 645 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 646 file.fd = VHOST_FILE_UNBIND; 647 file.index = idx; 648 int ret = vhost_net_set_backend(&net->dev, &file); 649 assert(ret >= 0); 650 } 651 652 vhost_dev_stop(&net->dev, vdev, false); 653 654 return r; 655 } 656