1 /* 2 * vhost-net support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "net/net.h" 18 #include "net/tap.h" 19 #include "net/vhost-user.h" 20 #include "net/vhost-vdpa.h" 21 22 #include "standard-headers/linux/vhost_types.h" 23 #include "hw/virtio/virtio-net.h" 24 #include "net/vhost_net.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "qemu/main-loop.h" 28 29 #include <sys/socket.h> 30 #include <net/if.h> 31 #include <netinet/in.h> 32 33 34 #include "standard-headers/linux/virtio_ring.h" 35 #include "hw/virtio/vhost.h" 36 #include "hw/virtio/virtio-bus.h" 37 #include "linux-headers/linux/vhost.h" 38 39 40 /* Features supported by host kernel. */ 41 static const int kernel_feature_bits[] = { 42 VIRTIO_F_NOTIFY_ON_EMPTY, 43 VIRTIO_RING_F_INDIRECT_DESC, 44 VIRTIO_RING_F_EVENT_IDX, 45 VIRTIO_NET_F_MRG_RXBUF, 46 VIRTIO_F_VERSION_1, 47 VIRTIO_NET_F_MTU, 48 VIRTIO_F_IOMMU_PLATFORM, 49 VIRTIO_F_RING_PACKED, 50 VIRTIO_F_RING_RESET, 51 VIRTIO_F_NOTIFICATION_DATA, 52 VIRTIO_NET_F_HASH_REPORT, 53 VHOST_INVALID_FEATURE_BIT 54 }; 55 56 /* Features supported by others. */ 57 static const int user_feature_bits[] = { 58 VIRTIO_F_NOTIFY_ON_EMPTY, 59 VIRTIO_F_NOTIFICATION_DATA, 60 VIRTIO_RING_F_INDIRECT_DESC, 61 VIRTIO_RING_F_EVENT_IDX, 62 63 VIRTIO_F_ANY_LAYOUT, 64 VIRTIO_F_VERSION_1, 65 VIRTIO_NET_F_CSUM, 66 VIRTIO_NET_F_GUEST_CSUM, 67 VIRTIO_NET_F_GSO, 68 VIRTIO_NET_F_GUEST_TSO4, 69 VIRTIO_NET_F_GUEST_TSO6, 70 VIRTIO_NET_F_GUEST_ECN, 71 VIRTIO_NET_F_GUEST_UFO, 72 VIRTIO_NET_F_HOST_TSO4, 73 VIRTIO_NET_F_HOST_TSO6, 74 VIRTIO_NET_F_HOST_ECN, 75 VIRTIO_NET_F_HOST_UFO, 76 VIRTIO_NET_F_MRG_RXBUF, 77 VIRTIO_NET_F_MTU, 78 VIRTIO_F_IOMMU_PLATFORM, 79 VIRTIO_F_RING_PACKED, 80 VIRTIO_F_RING_RESET, 81 VIRTIO_NET_F_RSS, 82 VIRTIO_NET_F_HASH_REPORT, 83 VIRTIO_NET_F_GUEST_USO4, 84 VIRTIO_NET_F_GUEST_USO6, 85 VIRTIO_NET_F_HOST_USO, 86 87 /* This bit implies RARP isn't sent by QEMU out of band */ 88 VIRTIO_NET_F_GUEST_ANNOUNCE, 89 90 VIRTIO_NET_F_MQ, 91 92 VHOST_INVALID_FEATURE_BIT 93 }; 94 95 static const int *vhost_net_get_feature_bits(struct vhost_net *net) 96 { 97 const int *feature_bits = 0; 98 99 switch (net->nc->info->type) { 100 case NET_CLIENT_DRIVER_TAP: 101 feature_bits = kernel_feature_bits; 102 break; 103 case NET_CLIENT_DRIVER_VHOST_USER: 104 feature_bits = user_feature_bits; 105 break; 106 #ifdef CONFIG_VHOST_NET_VDPA 107 case NET_CLIENT_DRIVER_VHOST_VDPA: 108 feature_bits = vdpa_feature_bits; 109 break; 110 #endif 111 default: 112 error_report("Feature bits not defined for this type: %d", 113 net->nc->info->type); 114 break; 115 } 116 117 return feature_bits; 118 } 119 120 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 121 { 122 return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), 123 features); 124 } 125 int vhost_net_get_config(struct vhost_net *net, uint8_t *config, 126 uint32_t config_len) 127 { 128 return vhost_dev_get_config(&net->dev, config, config_len, NULL); 129 } 130 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, 131 uint32_t offset, uint32_t size, uint32_t flags) 132 { 133 return vhost_dev_set_config(&net->dev, data, offset, size, flags); 134 } 135 136 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 137 { 138 net->dev.acked_features = net->dev.backend_features; 139 vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); 140 } 141 142 uint64_t vhost_net_get_max_queues(VHostNetState *net) 143 { 144 return net->dev.max_queues; 145 } 146 147 uint64_t vhost_net_get_acked_features(VHostNetState *net) 148 { 149 return net->dev.acked_features; 150 } 151 152 void vhost_net_save_acked_features(NetClientState *nc) 153 { 154 #ifdef CONFIG_VHOST_NET_USER 155 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 156 vhost_user_save_acked_features(nc); 157 } 158 #endif 159 } 160 161 static int vhost_net_get_fd(NetClientState *backend) 162 { 163 switch (backend->info->type) { 164 case NET_CLIENT_DRIVER_TAP: 165 return tap_get_fd(backend); 166 default: 167 fprintf(stderr, "vhost-net requires tap backend\n"); 168 return -ENOSYS; 169 } 170 } 171 172 struct vhost_net *vhost_net_init(VhostNetOptions *options) 173 { 174 int r; 175 bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; 176 struct vhost_net *net = g_new0(struct vhost_net, 1); 177 uint64_t features = 0; 178 Error *local_err = NULL; 179 180 if (!options->net_backend) { 181 fprintf(stderr, "vhost-net requires net backend to be setup\n"); 182 goto fail; 183 } 184 net->nc = options->net_backend; 185 net->dev.nvqs = options->nvqs; 186 187 net->dev.max_queues = 1; 188 net->dev.vqs = net->vqs; 189 190 if (backend_kernel) { 191 r = vhost_net_get_fd(options->net_backend); 192 if (r < 0) { 193 goto fail; 194 } 195 net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) 196 ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); 197 net->backend = r; 198 net->dev.protocol_features = 0; 199 } else { 200 net->dev.backend_features = 0; 201 net->dev.protocol_features = 0; 202 net->backend = -1; 203 204 /* vhost-user needs vq_index to initiate a specific queue pair */ 205 net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; 206 } 207 208 r = vhost_dev_init(&net->dev, options->opaque, 209 options->backend_type, options->busyloop_timeout, 210 &local_err); 211 if (r < 0) { 212 error_report_err(local_err); 213 goto fail; 214 } 215 if (backend_kernel) { 216 if (!qemu_has_vnet_hdr_len(options->net_backend, 217 sizeof(struct virtio_net_hdr_mrg_rxbuf))) { 218 net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); 219 } 220 if (~net->dev.features & net->dev.backend_features) { 221 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 222 " for backend\n", 223 (uint64_t)(~net->dev.features & net->dev.backend_features)); 224 goto fail; 225 } 226 } 227 228 /* Set sane init value. Override when guest acks. */ 229 #ifdef CONFIG_VHOST_NET_USER 230 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 231 features = vhost_user_get_acked_features(net->nc); 232 if (~net->dev.features & features) { 233 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 234 " for backend\n", 235 (uint64_t)(~net->dev.features & features)); 236 goto fail; 237 } 238 } 239 #endif 240 241 vhost_net_ack_features(net, features); 242 243 return net; 244 245 fail: 246 vhost_dev_cleanup(&net->dev); 247 g_free(net); 248 return NULL; 249 } 250 251 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, 252 int vq_index_end) 253 { 254 net->dev.vq_index = vq_index; 255 net->dev.vq_index_end = vq_index_end; 256 } 257 258 static int vhost_net_start_one(struct vhost_net *net, 259 VirtIODevice *dev) 260 { 261 struct vhost_vring_file file = { }; 262 int r; 263 264 if (net->nc->info->start) { 265 r = net->nc->info->start(net->nc); 266 if (r < 0) { 267 return r; 268 } 269 } 270 271 r = vhost_dev_enable_notifiers(&net->dev, dev); 272 if (r < 0) { 273 goto fail_notifiers; 274 } 275 276 r = vhost_dev_start(&net->dev, dev, false); 277 if (r < 0) { 278 goto fail_start; 279 } 280 281 if (net->nc->info->poll) { 282 net->nc->info->poll(net->nc, false); 283 } 284 285 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 286 qemu_set_fd_handler(net->backend, NULL, NULL, NULL); 287 file.fd = net->backend; 288 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 289 if (!virtio_queue_enabled(dev, net->dev.vq_index + 290 file.index)) { 291 /* Queue might not be ready for start */ 292 continue; 293 } 294 r = vhost_net_set_backend(&net->dev, &file); 295 if (r < 0) { 296 r = -errno; 297 goto fail; 298 } 299 } 300 } 301 302 if (net->nc->info->load) { 303 r = net->nc->info->load(net->nc); 304 if (r < 0) { 305 goto fail; 306 } 307 } 308 return 0; 309 fail: 310 file.fd = -1; 311 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 312 while (file.index-- > 0) { 313 if (!virtio_queue_enabled(dev, net->dev.vq_index + 314 file.index)) { 315 /* Queue might not be ready for start */ 316 continue; 317 } 318 int ret = vhost_net_set_backend(&net->dev, &file); 319 assert(ret >= 0); 320 } 321 } 322 if (net->nc->info->poll) { 323 net->nc->info->poll(net->nc, true); 324 } 325 vhost_dev_stop(&net->dev, dev, false); 326 fail_start: 327 vhost_dev_disable_notifiers(&net->dev, dev); 328 fail_notifiers: 329 return r; 330 } 331 332 static void vhost_net_stop_one(struct vhost_net *net, 333 VirtIODevice *dev) 334 { 335 struct vhost_vring_file file = { .fd = -1 }; 336 337 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 338 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 339 int r = vhost_net_set_backend(&net->dev, &file); 340 assert(r >= 0); 341 } 342 } 343 if (net->nc->info->poll) { 344 net->nc->info->poll(net->nc, true); 345 } 346 vhost_dev_stop(&net->dev, dev, false); 347 if (net->nc->info->stop) { 348 net->nc->info->stop(net->nc); 349 } 350 vhost_dev_disable_notifiers(&net->dev, dev); 351 } 352 353 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, 354 int data_queue_pairs, int cvq) 355 { 356 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 357 VirtioBusState *vbus = VIRTIO_BUS(qbus); 358 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 359 int total_notifiers = data_queue_pairs * 2 + cvq; 360 VirtIONet *n = VIRTIO_NET(dev); 361 int nvhosts = data_queue_pairs + cvq; 362 struct vhost_net *net; 363 int r, e, i, index_end = data_queue_pairs * 2; 364 NetClientState *peer; 365 366 if (cvq) { 367 index_end += 1; 368 } 369 370 if (!k->set_guest_notifiers) { 371 error_report("binding does not support guest notifiers"); 372 return -ENOSYS; 373 } 374 375 for (i = 0; i < nvhosts; i++) { 376 377 if (i < data_queue_pairs) { 378 peer = qemu_get_peer(ncs, i); 379 } else { /* Control Virtqueue */ 380 peer = qemu_get_peer(ncs, n->max_queue_pairs); 381 } 382 383 net = get_vhost_net(peer); 384 vhost_net_set_vq_index(net, i * 2, index_end); 385 386 /* Suppress the masking guest notifiers on vhost user 387 * because vhost user doesn't interrupt masking/unmasking 388 * properly. 389 */ 390 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 391 dev->use_guest_notifier_mask = false; 392 } 393 } 394 395 r = k->set_guest_notifiers(qbus->parent, total_notifiers, true); 396 if (r < 0) { 397 error_report("Error binding guest notifier: %d", -r); 398 goto err; 399 } 400 401 for (i = 0; i < nvhosts; i++) { 402 if (i < data_queue_pairs) { 403 peer = qemu_get_peer(ncs, i); 404 } else { 405 peer = qemu_get_peer(ncs, n->max_queue_pairs); 406 } 407 408 if (peer->vring_enable) { 409 /* restore vring enable state */ 410 r = vhost_set_vring_enable(peer, peer->vring_enable); 411 412 if (r < 0) { 413 goto err_start; 414 } 415 } 416 417 r = vhost_net_start_one(get_vhost_net(peer), dev); 418 if (r < 0) { 419 goto err_start; 420 } 421 } 422 423 return 0; 424 425 err_start: 426 while (--i >= 0) { 427 peer = qemu_get_peer(ncs, i < data_queue_pairs ? 428 i : n->max_queue_pairs); 429 vhost_net_stop_one(get_vhost_net(peer), dev); 430 } 431 e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 432 if (e < 0) { 433 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); 434 fflush(stderr); 435 } 436 err: 437 return r; 438 } 439 440 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, 441 int data_queue_pairs, int cvq) 442 { 443 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 444 VirtioBusState *vbus = VIRTIO_BUS(qbus); 445 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 446 VirtIONet *n = VIRTIO_NET(dev); 447 NetClientState *peer; 448 int total_notifiers = data_queue_pairs * 2 + cvq; 449 int nvhosts = data_queue_pairs + cvq; 450 int i, r; 451 452 for (i = 0; i < nvhosts; i++) { 453 if (i < data_queue_pairs) { 454 peer = qemu_get_peer(ncs, i); 455 } else { 456 peer = qemu_get_peer(ncs, n->max_queue_pairs); 457 } 458 vhost_net_stop_one(get_vhost_net(peer), dev); 459 } 460 461 r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 462 if (r < 0) { 463 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); 464 fflush(stderr); 465 } 466 assert(r >= 0); 467 } 468 469 void vhost_net_cleanup(struct vhost_net *net) 470 { 471 vhost_dev_cleanup(&net->dev); 472 } 473 474 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 475 { 476 const VhostOps *vhost_ops = net->dev.vhost_ops; 477 478 assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 479 assert(vhost_ops->vhost_migration_done); 480 481 return vhost_ops->vhost_migration_done(&net->dev, mac_addr); 482 } 483 484 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 485 { 486 return vhost_virtqueue_pending(&net->dev, idx); 487 } 488 489 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 490 int idx, bool mask) 491 { 492 vhost_virtqueue_mask(&net->dev, dev, idx, mask); 493 } 494 495 bool vhost_net_config_pending(VHostNetState *net) 496 { 497 return vhost_config_pending(&net->dev); 498 } 499 500 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) 501 { 502 vhost_config_mask(&net->dev, dev, mask); 503 } 504 VHostNetState *get_vhost_net(NetClientState *nc) 505 { 506 VHostNetState *vhost_net = 0; 507 508 if (!nc) { 509 return 0; 510 } 511 512 switch (nc->info->type) { 513 case NET_CLIENT_DRIVER_TAP: 514 vhost_net = tap_get_vhost_net(nc); 515 /* 516 * tap_get_vhost_net() can return NULL if a tap net-device backend is 517 * created with 'vhost=off' option, 'vhostforce=off' or no vhost or 518 * vhostforce or vhostfd options at all. Please see net_init_tap_one(). 519 * Hence, we omit the assertion here. 520 */ 521 break; 522 #ifdef CONFIG_VHOST_NET_USER 523 case NET_CLIENT_DRIVER_VHOST_USER: 524 vhost_net = vhost_user_get_vhost_net(nc); 525 assert(vhost_net); 526 break; 527 #endif 528 #ifdef CONFIG_VHOST_NET_VDPA 529 case NET_CLIENT_DRIVER_VHOST_VDPA: 530 vhost_net = vhost_vdpa_get_vhost_net(nc); 531 assert(vhost_net); 532 break; 533 #endif 534 default: 535 break; 536 } 537 538 return vhost_net; 539 } 540 541 int vhost_set_vring_enable(NetClientState *nc, int enable) 542 { 543 VHostNetState *net = get_vhost_net(nc); 544 const VhostOps *vhost_ops = net->dev.vhost_ops; 545 546 /* 547 * vhost-vdpa network devices need to enable dataplane virtqueues after 548 * DRIVER_OK, so they can recover device state before starting dataplane. 549 * Because of that, we don't enable virtqueues here and leave it to 550 * net/vhost-vdpa.c. 551 */ 552 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 553 return 0; 554 } 555 556 nc->vring_enable = enable; 557 558 if (vhost_ops && vhost_ops->vhost_set_vring_enable) { 559 return vhost_ops->vhost_set_vring_enable(&net->dev, enable); 560 } 561 562 return 0; 563 } 564 565 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) 566 { 567 const VhostOps *vhost_ops = net->dev.vhost_ops; 568 569 if (!vhost_ops->vhost_net_set_mtu) { 570 return 0; 571 } 572 573 return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); 574 } 575 576 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, 577 int vq_index) 578 { 579 VHostNetState *net = get_vhost_net(nc->peer); 580 const VhostOps *vhost_ops = net->dev.vhost_ops; 581 struct vhost_vring_file file = { .fd = -1 }; 582 int idx; 583 584 /* should only be called after backend is connected */ 585 assert(vhost_ops); 586 587 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 588 589 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 590 file.index = idx; 591 int r = vhost_net_set_backend(&net->dev, &file); 592 assert(r >= 0); 593 } 594 595 vhost_virtqueue_stop(&net->dev, 596 vdev, 597 net->dev.vqs + idx, 598 net->dev.vq_index + idx); 599 } 600 601 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, 602 int vq_index) 603 { 604 VHostNetState *net = get_vhost_net(nc->peer); 605 const VhostOps *vhost_ops = net->dev.vhost_ops; 606 struct vhost_vring_file file = { }; 607 int idx, r; 608 609 if (!net->dev.started) { 610 return -EBUSY; 611 } 612 613 /* should only be called after backend is connected */ 614 assert(vhost_ops); 615 616 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 617 618 r = vhost_virtqueue_start(&net->dev, 619 vdev, 620 net->dev.vqs + idx, 621 net->dev.vq_index + idx); 622 if (r < 0) { 623 goto err_start; 624 } 625 626 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 627 file.index = idx; 628 file.fd = net->backend; 629 r = vhost_net_set_backend(&net->dev, &file); 630 if (r < 0) { 631 r = -errno; 632 goto err_start; 633 } 634 } 635 636 return 0; 637 638 err_start: 639 error_report("Error when restarting the queue."); 640 641 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 642 file.fd = VHOST_FILE_UNBIND; 643 file.index = idx; 644 int ret = vhost_net_set_backend(&net->dev, &file); 645 assert(ret >= 0); 646 } 647 648 vhost_dev_stop(&net->dev, vdev, false); 649 650 return r; 651 } 652