1 /* 2 * vhost-net support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "net/net.h" 18 #include "net/tap.h" 19 #include "net/vhost-user.h" 20 #include "net/vhost-vdpa.h" 21 22 #include "standard-headers/linux/vhost_types.h" 23 #include "hw/virtio/virtio-net.h" 24 #include "net/vhost_net.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "qemu/main-loop.h" 28 29 #include <sys/socket.h> 30 #include <net/if.h> 31 #include <netinet/in.h> 32 33 34 #include "standard-headers/linux/virtio_ring.h" 35 #include "hw/virtio/vhost.h" 36 #include "hw/virtio/virtio-bus.h" 37 #include "linux-headers/linux/vhost.h" 38 39 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 40 { 41 return vhost_get_features(&net->dev, net->feature_bits, 42 features); 43 } 44 int vhost_net_get_config(struct vhost_net *net, uint8_t *config, 45 uint32_t config_len) 46 { 47 return vhost_dev_get_config(&net->dev, config, config_len, NULL); 48 } 49 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, 50 uint32_t offset, uint32_t size, uint32_t flags) 51 { 52 return vhost_dev_set_config(&net->dev, data, offset, size, flags); 53 } 54 55 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 56 { 57 net->dev.acked_features = net->dev.backend_features; 58 vhost_ack_features(&net->dev, net->feature_bits, features); 59 } 60 61 uint64_t vhost_net_get_max_queues(VHostNetState *net) 62 { 63 return net->dev.max_queues; 64 } 65 66 uint64_t vhost_net_get_acked_features(VHostNetState *net) 67 { 68 return net->dev.acked_features; 69 } 70 71 void vhost_net_save_acked_features(NetClientState *nc) 72 { 73 #ifdef CONFIG_VHOST_NET_USER 74 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 75 vhost_user_save_acked_features(nc); 76 } 77 #endif 78 } 79 80 static void vhost_net_disable_notifiers_nvhosts(VirtIODevice *dev, 81 NetClientState *ncs, int data_queue_pairs, int nvhosts) 82 { 83 VirtIONet *n = VIRTIO_NET(dev); 84 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 85 struct vhost_net *net; 86 struct vhost_dev *hdev; 87 int r, i, j; 88 NetClientState *peer; 89 90 /* 91 * Batch all the host notifiers in a single transaction to avoid 92 * quadratic time complexity in address_space_update_ioeventfds(). 93 */ 94 memory_region_transaction_begin(); 95 96 for (i = 0; i < nvhosts; i++) { 97 if (i < data_queue_pairs) { 98 peer = qemu_get_peer(ncs, i); 99 } else { 100 peer = qemu_get_peer(ncs, n->max_queue_pairs); 101 } 102 103 net = get_vhost_net(peer); 104 hdev = &net->dev; 105 for (j = 0; j < hdev->nvqs; j++) { 106 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 107 hdev->vq_index + j, 108 false); 109 if (r < 0) { 110 error_report("vhost %d VQ %d notifier cleanup failed: %d", 111 i, j, -r); 112 } 113 assert(r >= 0); 114 } 115 } 116 /* 117 * The transaction expects the ioeventfds to be open when it 118 * commits. Do it now, before the cleanup loop. 119 */ 120 memory_region_transaction_commit(); 121 122 for (i = 0; i < nvhosts; i++) { 123 if (i < data_queue_pairs) { 124 peer = qemu_get_peer(ncs, i); 125 } else { 126 peer = qemu_get_peer(ncs, n->max_queue_pairs); 127 } 128 129 net = get_vhost_net(peer); 130 hdev = &net->dev; 131 for (j = 0; j < hdev->nvqs; j++) { 132 virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), 133 hdev->vq_index + j); 134 } 135 virtio_device_release_ioeventfd(dev); 136 } 137 } 138 139 static int vhost_net_enable_notifiers(VirtIODevice *dev, 140 NetClientState *ncs, int data_queue_pairs, int cvq) 141 { 142 VirtIONet *n = VIRTIO_NET(dev); 143 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 144 int nvhosts = data_queue_pairs + cvq; 145 struct vhost_net *net; 146 struct vhost_dev *hdev; 147 int r, i, j, k; 148 NetClientState *peer; 149 150 /* 151 * We will pass the notifiers to the kernel, make sure that QEMU 152 * doesn't interfere. 153 */ 154 for (i = 0; i < nvhosts; i++) { 155 r = virtio_device_grab_ioeventfd(dev); 156 if (r < 0) { 157 error_report("vhost %d binding does not support host notifiers", i); 158 for (k = 0; k < i; k++) { 159 virtio_device_release_ioeventfd(dev); 160 } 161 return r; 162 } 163 } 164 165 /* 166 * Batch all the host notifiers in a single transaction to avoid 167 * quadratic time complexity in address_space_update_ioeventfds(). 168 */ 169 memory_region_transaction_begin(); 170 171 for (i = 0; i < nvhosts; i++) { 172 if (i < data_queue_pairs) { 173 peer = qemu_get_peer(ncs, i); 174 } else { 175 peer = qemu_get_peer(ncs, n->max_queue_pairs); 176 } 177 178 net = get_vhost_net(peer); 179 hdev = &net->dev; 180 181 for (j = 0; j < hdev->nvqs; j++) { 182 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 183 hdev->vq_index + j, 184 true); 185 if (r < 0) { 186 error_report("vhost %d VQ %d notifier binding failed: %d", 187 i, j, -r); 188 memory_region_transaction_commit(); 189 vhost_dev_disable_notifiers_nvqs(hdev, dev, j); 190 goto fail_nvhosts; 191 } 192 } 193 } 194 195 memory_region_transaction_commit(); 196 197 return 0; 198 fail_nvhosts: 199 vhost_net_disable_notifiers_nvhosts(dev, ncs, data_queue_pairs, i); 200 /* 201 * This for loop starts from i+1, not i, because the i-th ioeventfd 202 * has already been released in vhost_dev_disable_notifiers_nvqs(). 203 */ 204 for (k = i + 1; k < nvhosts; k++) { 205 virtio_device_release_ioeventfd(dev); 206 } 207 208 return r; 209 } 210 211 /* 212 * Stop processing guest IO notifications in qemu. 213 * Start processing them in vhost in kernel. 214 */ 215 static void vhost_net_disable_notifiers(VirtIODevice *dev, 216 NetClientState *ncs, int data_queue_pairs, int cvq) 217 { 218 vhost_net_disable_notifiers_nvhosts(dev, ncs, data_queue_pairs, 219 data_queue_pairs + cvq); 220 } 221 222 static int vhost_net_get_fd(NetClientState *backend) 223 { 224 switch (backend->info->type) { 225 case NET_CLIENT_DRIVER_TAP: 226 return tap_get_fd(backend); 227 default: 228 fprintf(stderr, "vhost-net requires tap backend\n"); 229 return -ENOSYS; 230 } 231 } 232 233 struct vhost_net *vhost_net_init(VhostNetOptions *options) 234 { 235 int r; 236 bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; 237 struct vhost_net *net = g_new0(struct vhost_net, 1); 238 uint64_t features = 0; 239 Error *local_err = NULL; 240 241 if (!options->net_backend) { 242 fprintf(stderr, "vhost-net requires net backend to be setup\n"); 243 goto fail; 244 } 245 net->nc = options->net_backend; 246 net->dev.nvqs = options->nvqs; 247 net->feature_bits = options->feature_bits; 248 249 net->dev.max_queues = 1; 250 net->dev.vqs = net->vqs; 251 252 if (backend_kernel) { 253 r = vhost_net_get_fd(options->net_backend); 254 if (r < 0) { 255 goto fail; 256 } 257 net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) 258 ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); 259 net->backend = r; 260 net->dev.protocol_features = 0; 261 } else { 262 net->dev.backend_features = 0; 263 net->dev.protocol_features = 0; 264 net->backend = -1; 265 266 /* vhost-user needs vq_index to initiate a specific queue pair */ 267 net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; 268 } 269 270 r = vhost_dev_init(&net->dev, options->opaque, 271 options->backend_type, options->busyloop_timeout, 272 &local_err); 273 if (r < 0) { 274 error_report_err(local_err); 275 goto fail; 276 } 277 if (backend_kernel) { 278 if (!qemu_has_vnet_hdr_len(options->net_backend, 279 sizeof(struct virtio_net_hdr_mrg_rxbuf))) { 280 net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); 281 } 282 if (~net->dev.features & net->dev.backend_features) { 283 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 284 " for backend\n", 285 (uint64_t)(~net->dev.features & net->dev.backend_features)); 286 goto fail; 287 } 288 } 289 290 /* Set sane init value. Override when guest acks. */ 291 #ifdef CONFIG_VHOST_NET_USER 292 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 293 features = vhost_user_get_acked_features(net->nc); 294 if (~net->dev.features & features) { 295 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 296 " for backend\n", 297 (uint64_t)(~net->dev.features & features)); 298 goto fail; 299 } 300 } 301 #endif 302 303 vhost_net_ack_features(net, features); 304 305 return net; 306 307 fail: 308 vhost_dev_cleanup(&net->dev); 309 g_free(net); 310 return NULL; 311 } 312 313 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, 314 int vq_index_end) 315 { 316 net->dev.vq_index = vq_index; 317 net->dev.vq_index_end = vq_index_end; 318 } 319 320 static int vhost_net_start_one(struct vhost_net *net, 321 VirtIODevice *dev) 322 { 323 struct vhost_vring_file file = { }; 324 int r; 325 326 if (net->nc->info->start) { 327 r = net->nc->info->start(net->nc); 328 if (r < 0) { 329 return r; 330 } 331 } 332 333 r = vhost_dev_start(&net->dev, dev, false); 334 if (r < 0) { 335 goto fail_start; 336 } 337 338 if (net->nc->info->poll) { 339 net->nc->info->poll(net->nc, false); 340 } 341 342 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 343 qemu_set_fd_handler(net->backend, NULL, NULL, NULL); 344 file.fd = net->backend; 345 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 346 if (!virtio_queue_enabled(dev, net->dev.vq_index + 347 file.index)) { 348 /* Queue might not be ready for start */ 349 continue; 350 } 351 r = vhost_net_set_backend(&net->dev, &file); 352 if (r < 0) { 353 r = -errno; 354 goto fail; 355 } 356 } 357 } 358 359 if (net->nc->info->load) { 360 r = net->nc->info->load(net->nc); 361 if (r < 0) { 362 goto fail; 363 } 364 } 365 return 0; 366 fail: 367 file.fd = -1; 368 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 369 while (file.index-- > 0) { 370 if (!virtio_queue_enabled(dev, net->dev.vq_index + 371 file.index)) { 372 /* Queue might not be ready for start */ 373 continue; 374 } 375 int ret = vhost_net_set_backend(&net->dev, &file); 376 assert(ret >= 0); 377 } 378 } 379 if (net->nc->info->poll) { 380 net->nc->info->poll(net->nc, true); 381 } 382 vhost_dev_stop(&net->dev, dev, false); 383 fail_start: 384 return r; 385 } 386 387 static void vhost_net_stop_one(struct vhost_net *net, 388 VirtIODevice *dev) 389 { 390 struct vhost_vring_file file = { .fd = -1 }; 391 392 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 393 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 394 int r = vhost_net_set_backend(&net->dev, &file); 395 assert(r >= 0); 396 } 397 } 398 if (net->nc->info->poll) { 399 net->nc->info->poll(net->nc, true); 400 } 401 vhost_dev_stop(&net->dev, dev, false); 402 if (net->nc->info->stop) { 403 net->nc->info->stop(net->nc); 404 } 405 } 406 407 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, 408 int data_queue_pairs, int cvq) 409 { 410 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 411 VirtioBusState *vbus = VIRTIO_BUS(qbus); 412 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 413 int total_notifiers = data_queue_pairs * 2 + cvq; 414 VirtIONet *n = VIRTIO_NET(dev); 415 int nvhosts = data_queue_pairs + cvq; 416 struct vhost_net *net; 417 int r, e, i, index_end = data_queue_pairs * 2; 418 NetClientState *peer; 419 420 if (cvq) { 421 index_end += 1; 422 } 423 424 if (!k->set_guest_notifiers) { 425 error_report("binding does not support guest notifiers"); 426 return -ENOSYS; 427 } 428 429 for (i = 0; i < nvhosts; i++) { 430 431 if (i < data_queue_pairs) { 432 peer = qemu_get_peer(ncs, i); 433 } else { /* Control Virtqueue */ 434 peer = qemu_get_peer(ncs, n->max_queue_pairs); 435 } 436 437 net = get_vhost_net(peer); 438 vhost_net_set_vq_index(net, i * 2, index_end); 439 440 /* Suppress the masking guest notifiers on vhost user 441 * because vhost user doesn't interrupt masking/unmasking 442 * properly. 443 */ 444 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 445 dev->use_guest_notifier_mask = false; 446 } 447 } 448 449 r = vhost_net_enable_notifiers(dev, ncs, data_queue_pairs, cvq); 450 if (r < 0) { 451 error_report("Error enabling host notifiers: %d", -r); 452 goto err; 453 } 454 455 r = k->set_guest_notifiers(qbus->parent, total_notifiers, true); 456 if (r < 0) { 457 error_report("Error binding guest notifier: %d", -r); 458 goto err_host_notifiers; 459 } 460 461 for (i = 0; i < nvhosts; i++) { 462 if (i < data_queue_pairs) { 463 peer = qemu_get_peer(ncs, i); 464 } else { 465 peer = qemu_get_peer(ncs, n->max_queue_pairs); 466 } 467 468 if (peer->vring_enable) { 469 /* restore vring enable state */ 470 r = vhost_net_set_vring_enable(peer, peer->vring_enable); 471 472 if (r < 0) { 473 goto err_guest_notifiers; 474 } 475 } 476 477 r = vhost_net_start_one(get_vhost_net(peer), dev); 478 if (r < 0) { 479 goto err_guest_notifiers; 480 } 481 } 482 483 return 0; 484 485 err_guest_notifiers: 486 while (--i >= 0) { 487 peer = qemu_get_peer(ncs, i < data_queue_pairs ? 488 i : n->max_queue_pairs); 489 vhost_net_stop_one(get_vhost_net(peer), dev); 490 } 491 e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 492 if (e < 0) { 493 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); 494 fflush(stderr); 495 } 496 err_host_notifiers: 497 vhost_net_disable_notifiers(dev, ncs, data_queue_pairs, cvq); 498 err: 499 return r; 500 } 501 502 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, 503 int data_queue_pairs, int cvq) 504 { 505 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 506 VirtioBusState *vbus = VIRTIO_BUS(qbus); 507 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 508 VirtIONet *n = VIRTIO_NET(dev); 509 NetClientState *peer; 510 int total_notifiers = data_queue_pairs * 2 + cvq; 511 int nvhosts = data_queue_pairs + cvq; 512 int i, r; 513 514 for (i = 0; i < nvhosts; i++) { 515 if (i < data_queue_pairs) { 516 peer = qemu_get_peer(ncs, i); 517 } else { 518 peer = qemu_get_peer(ncs, n->max_queue_pairs); 519 } 520 vhost_net_stop_one(get_vhost_net(peer), dev); 521 } 522 523 r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 524 if (r < 0) { 525 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); 526 fflush(stderr); 527 } 528 assert(r >= 0); 529 530 vhost_net_disable_notifiers(dev, ncs, data_queue_pairs, cvq); 531 } 532 533 void vhost_net_cleanup(struct vhost_net *net) 534 { 535 vhost_dev_cleanup(&net->dev); 536 } 537 538 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 539 { 540 const VhostOps *vhost_ops = net->dev.vhost_ops; 541 542 assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 543 assert(vhost_ops->vhost_migration_done); 544 545 return vhost_ops->vhost_migration_done(&net->dev, mac_addr); 546 } 547 548 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 549 { 550 return vhost_virtqueue_pending(&net->dev, idx); 551 } 552 553 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 554 int idx, bool mask) 555 { 556 vhost_virtqueue_mask(&net->dev, dev, idx, mask); 557 } 558 559 bool vhost_net_config_pending(VHostNetState *net) 560 { 561 return vhost_config_pending(&net->dev); 562 } 563 564 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) 565 { 566 vhost_config_mask(&net->dev, dev, mask); 567 } 568 569 VHostNetState *get_vhost_net(NetClientState *nc) 570 { 571 if (!nc) { 572 return 0; 573 } 574 575 if (nc->info->get_vhost_net) { 576 return nc->info->get_vhost_net(nc); 577 } 578 579 return NULL; 580 } 581 582 int vhost_net_set_vring_enable(NetClientState *nc, int enable) 583 { 584 VHostNetState *net = get_vhost_net(nc); 585 const VhostOps *vhost_ops = net->dev.vhost_ops; 586 587 /* 588 * vhost-vdpa network devices need to enable dataplane virtqueues after 589 * DRIVER_OK, so they can recover device state before starting dataplane. 590 * Because of that, we don't enable virtqueues here and leave it to 591 * net/vhost-vdpa.c. 592 */ 593 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 594 return 0; 595 } 596 597 nc->vring_enable = enable; 598 599 if (vhost_ops && vhost_ops->vhost_set_vring_enable) { 600 return vhost_ops->vhost_set_vring_enable(&net->dev, enable); 601 } 602 603 return 0; 604 } 605 606 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) 607 { 608 const VhostOps *vhost_ops = net->dev.vhost_ops; 609 610 if (!vhost_ops->vhost_net_set_mtu) { 611 return 0; 612 } 613 614 return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); 615 } 616 617 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, 618 int vq_index) 619 { 620 VHostNetState *net = get_vhost_net(nc->peer); 621 const VhostOps *vhost_ops = net->dev.vhost_ops; 622 struct vhost_vring_file file = { .fd = -1 }; 623 int idx; 624 625 /* should only be called after backend is connected */ 626 assert(vhost_ops); 627 628 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 629 630 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 631 file.index = idx; 632 int r = vhost_net_set_backend(&net->dev, &file); 633 assert(r >= 0); 634 } 635 636 vhost_virtqueue_stop(&net->dev, 637 vdev, 638 net->dev.vqs + idx, 639 net->dev.vq_index + idx); 640 } 641 642 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, 643 int vq_index) 644 { 645 VHostNetState *net = get_vhost_net(nc->peer); 646 const VhostOps *vhost_ops = net->dev.vhost_ops; 647 struct vhost_vring_file file = { }; 648 int idx, r; 649 650 if (!net->dev.started) { 651 return -EBUSY; 652 } 653 654 /* should only be called after backend is connected */ 655 assert(vhost_ops); 656 657 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 658 659 r = vhost_virtqueue_start(&net->dev, 660 vdev, 661 net->dev.vqs + idx, 662 net->dev.vq_index + idx); 663 if (r < 0) { 664 goto err_start; 665 } 666 667 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 668 file.index = idx; 669 file.fd = net->backend; 670 r = vhost_net_set_backend(&net->dev, &file); 671 if (r < 0) { 672 r = -errno; 673 goto err_start; 674 } 675 } 676 677 return 0; 678 679 err_start: 680 error_report("Error when restarting the queue."); 681 682 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 683 file.fd = VHOST_FILE_UNBIND; 684 file.index = idx; 685 int ret = vhost_net_set_backend(&net->dev, &file); 686 assert(ret >= 0); 687 } 688 689 vhost_dev_stop(&net->dev, vdev, false); 690 691 return r; 692 } 693