1 /* 2 * vhost-net support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "net/net.h" 18 #include "net/tap.h" 19 #include "net/vhost-user.h" 20 #include "net/vhost-vdpa.h" 21 22 #include "standard-headers/linux/vhost_types.h" 23 #include "hw/virtio/virtio-net.h" 24 #include "net/vhost_net.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "qemu/main-loop.h" 28 29 #include <sys/socket.h> 30 #include <net/if.h> 31 #include <netinet/in.h> 32 33 34 #include "standard-headers/linux/virtio_ring.h" 35 #include "hw/virtio/vhost.h" 36 #include "hw/virtio/virtio-bus.h" 37 #include "linux-headers/linux/vhost.h" 38 39 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 40 { 41 return vhost_get_features(&net->dev, net->feature_bits, 42 features); 43 } 44 int vhost_net_get_config(struct vhost_net *net, uint8_t *config, 45 uint32_t config_len) 46 { 47 return vhost_dev_get_config(&net->dev, config, config_len, NULL); 48 } 49 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, 50 uint32_t offset, uint32_t size, uint32_t flags) 51 { 52 return vhost_dev_set_config(&net->dev, data, offset, size, flags); 53 } 54 55 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 56 { 57 net->dev.acked_features = net->dev.backend_features; 58 vhost_ack_features(&net->dev, net->feature_bits, features); 59 } 60 61 uint64_t vhost_net_get_max_queues(VHostNetState *net) 62 { 63 return net->dev.max_queues; 64 } 65 66 uint64_t vhost_net_get_acked_features(VHostNetState *net) 67 { 68 return net->dev.acked_features; 69 } 70 71 void vhost_net_save_acked_features(NetClientState *nc) 72 { 73 #ifdef CONFIG_VHOST_NET_USER 74 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 75 vhost_user_save_acked_features(nc); 76 } 77 #endif 78 } 79 80 static void vhost_net_disable_notifiers_nvhosts(VirtIODevice *dev, 81 NetClientState *ncs, int data_queue_pairs, int nvhosts) 82 { 83 VirtIONet *n = VIRTIO_NET(dev); 84 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 85 struct vhost_net *net; 86 struct vhost_dev *hdev; 87 int r, i, j; 88 NetClientState *peer; 89 90 /* 91 * Batch all the host notifiers in a single transaction to avoid 92 * quadratic time complexity in address_space_update_ioeventfds(). 93 */ 94 memory_region_transaction_begin(); 95 96 for (i = 0; i < nvhosts; i++) { 97 if (i < data_queue_pairs) { 98 peer = qemu_get_peer(ncs, i); 99 } else { 100 peer = qemu_get_peer(ncs, n->max_queue_pairs); 101 } 102 103 net = get_vhost_net(peer); 104 hdev = &net->dev; 105 for (j = 0; j < hdev->nvqs; j++) { 106 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 107 hdev->vq_index + j, 108 false); 109 if (r < 0) { 110 error_report("vhost %d VQ %d notifier cleanup failed: %d", 111 i, j, -r); 112 } 113 assert(r >= 0); 114 } 115 } 116 /* 117 * The transaction expects the ioeventfds to be open when it 118 * commits. Do it now, before the cleanup loop. 119 */ 120 memory_region_transaction_commit(); 121 122 for (i = 0; i < nvhosts; i++) { 123 if (i < data_queue_pairs) { 124 peer = qemu_get_peer(ncs, i); 125 } else { 126 peer = qemu_get_peer(ncs, n->max_queue_pairs); 127 } 128 129 net = get_vhost_net(peer); 130 hdev = &net->dev; 131 for (j = 0; j < hdev->nvqs; j++) { 132 virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), 133 hdev->vq_index + j); 134 } 135 virtio_device_release_ioeventfd(dev); 136 } 137 } 138 139 static int vhost_net_enable_notifiers(VirtIODevice *dev, 140 NetClientState *ncs, int data_queue_pairs, int cvq) 141 { 142 VirtIONet *n = VIRTIO_NET(dev); 143 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 144 int nvhosts = data_queue_pairs + cvq; 145 struct vhost_net *net; 146 struct vhost_dev *hdev; 147 int r, i, j, k; 148 NetClientState *peer; 149 150 /* 151 * We will pass the notifiers to the kernel, make sure that QEMU 152 * doesn't interfere. 153 */ 154 for (i = 0; i < nvhosts; i++) { 155 r = virtio_device_grab_ioeventfd(dev); 156 if (r < 0) { 157 error_report("vhost %d binding does not support host notifiers", i); 158 for (k = 0; k < i; k++) { 159 virtio_device_release_ioeventfd(dev); 160 } 161 return r; 162 } 163 } 164 165 /* 166 * Batch all the host notifiers in a single transaction to avoid 167 * quadratic time complexity in address_space_update_ioeventfds(). 168 */ 169 memory_region_transaction_begin(); 170 171 for (i = 0; i < nvhosts; i++) { 172 if (i < data_queue_pairs) { 173 peer = qemu_get_peer(ncs, i); 174 } else { 175 peer = qemu_get_peer(ncs, n->max_queue_pairs); 176 } 177 178 net = get_vhost_net(peer); 179 hdev = &net->dev; 180 181 for (j = 0; j < hdev->nvqs; j++) { 182 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 183 hdev->vq_index + j, 184 true); 185 if (r < 0) { 186 error_report("vhost %d VQ %d notifier binding failed: %d", 187 i, j, -r); 188 memory_region_transaction_commit(); 189 vhost_dev_disable_notifiers_nvqs(hdev, dev, j); 190 goto fail_nvhosts; 191 } 192 } 193 } 194 195 memory_region_transaction_commit(); 196 197 return 0; 198 fail_nvhosts: 199 vhost_net_disable_notifiers_nvhosts(dev, ncs, data_queue_pairs, i); 200 /* 201 * This for loop starts from i+1, not i, because the i-th ioeventfd 202 * has already been released in vhost_dev_disable_notifiers_nvqs(). 203 */ 204 for (k = i + 1; k < nvhosts; k++) { 205 virtio_device_release_ioeventfd(dev); 206 } 207 208 return r; 209 } 210 211 /* 212 * Stop processing guest IO notifications in qemu. 213 * Start processing them in vhost in kernel. 214 */ 215 static void vhost_net_disable_notifiers(VirtIODevice *dev, 216 NetClientState *ncs, int data_queue_pairs, int cvq) 217 { 218 vhost_net_disable_notifiers_nvhosts(dev, ncs, data_queue_pairs, 219 data_queue_pairs + cvq); 220 } 221 222 static int vhost_net_get_fd(NetClientState *backend) 223 { 224 switch (backend->info->type) { 225 case NET_CLIENT_DRIVER_TAP: 226 return tap_get_fd(backend); 227 default: 228 fprintf(stderr, "vhost-net requires tap backend\n"); 229 return -ENOSYS; 230 } 231 } 232 233 struct vhost_net *vhost_net_init(VhostNetOptions *options) 234 { 235 int r; 236 bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; 237 struct vhost_net *net = g_new0(struct vhost_net, 1); 238 uint64_t features = 0; 239 Error *local_err = NULL; 240 241 if (!options->net_backend) { 242 fprintf(stderr, "vhost-net requires net backend to be setup\n"); 243 goto fail; 244 } 245 net->nc = options->net_backend; 246 net->dev.nvqs = options->nvqs; 247 net->feature_bits = options->feature_bits; 248 249 net->dev.max_queues = 1; 250 net->dev.vqs = net->vqs; 251 252 if (backend_kernel) { 253 r = vhost_net_get_fd(options->net_backend); 254 if (r < 0) { 255 goto fail; 256 } 257 net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) 258 ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); 259 net->backend = r; 260 net->dev.protocol_features = 0; 261 } else { 262 net->dev.backend_features = 0; 263 net->dev.protocol_features = 0; 264 net->backend = -1; 265 266 /* vhost-user needs vq_index to initiate a specific queue pair */ 267 net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; 268 } 269 270 r = vhost_dev_init(&net->dev, options->opaque, 271 options->backend_type, options->busyloop_timeout, 272 &local_err); 273 if (r < 0) { 274 error_report_err(local_err); 275 goto fail; 276 } 277 if (backend_kernel) { 278 if (!qemu_has_vnet_hdr_len(options->net_backend, 279 sizeof(struct virtio_net_hdr_mrg_rxbuf))) { 280 net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); 281 } 282 if (~net->dev.features & net->dev.backend_features) { 283 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 284 " for backend\n", 285 (uint64_t)(~net->dev.features & net->dev.backend_features)); 286 goto fail; 287 } 288 } 289 290 /* Set sane init value. Override when guest acks. */ 291 if (options->get_acked_features) { 292 features = options->get_acked_features(net->nc); 293 if (~net->dev.features & features) { 294 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 295 " for backend\n", 296 (uint64_t)(~net->dev.features & features)); 297 goto fail; 298 } 299 } 300 301 vhost_net_ack_features(net, features); 302 303 return net; 304 305 fail: 306 vhost_dev_cleanup(&net->dev); 307 g_free(net); 308 return NULL; 309 } 310 311 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, 312 int vq_index_end) 313 { 314 net->dev.vq_index = vq_index; 315 net->dev.vq_index_end = vq_index_end; 316 } 317 318 static int vhost_net_start_one(struct vhost_net *net, 319 VirtIODevice *dev) 320 { 321 struct vhost_vring_file file = { }; 322 int r; 323 324 if (net->nc->info->start) { 325 r = net->nc->info->start(net->nc); 326 if (r < 0) { 327 return r; 328 } 329 } 330 331 r = vhost_dev_start(&net->dev, dev, false); 332 if (r < 0) { 333 goto fail_start; 334 } 335 336 if (net->nc->info->poll) { 337 net->nc->info->poll(net->nc, false); 338 } 339 340 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 341 qemu_set_fd_handler(net->backend, NULL, NULL, NULL); 342 file.fd = net->backend; 343 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 344 if (!virtio_queue_enabled(dev, net->dev.vq_index + 345 file.index)) { 346 /* Queue might not be ready for start */ 347 continue; 348 } 349 r = vhost_net_set_backend(&net->dev, &file); 350 if (r < 0) { 351 r = -errno; 352 goto fail; 353 } 354 } 355 } 356 357 if (net->nc->info->load) { 358 r = net->nc->info->load(net->nc); 359 if (r < 0) { 360 goto fail; 361 } 362 } 363 return 0; 364 fail: 365 file.fd = -1; 366 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 367 while (file.index-- > 0) { 368 if (!virtio_queue_enabled(dev, net->dev.vq_index + 369 file.index)) { 370 /* Queue might not be ready for start */ 371 continue; 372 } 373 int ret = vhost_net_set_backend(&net->dev, &file); 374 assert(ret >= 0); 375 } 376 } 377 if (net->nc->info->poll) { 378 net->nc->info->poll(net->nc, true); 379 } 380 vhost_dev_stop(&net->dev, dev, false); 381 fail_start: 382 return r; 383 } 384 385 static void vhost_net_stop_one(struct vhost_net *net, 386 VirtIODevice *dev) 387 { 388 struct vhost_vring_file file = { .fd = -1 }; 389 390 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 391 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 392 int r = vhost_net_set_backend(&net->dev, &file); 393 assert(r >= 0); 394 } 395 } 396 if (net->nc->info->poll) { 397 net->nc->info->poll(net->nc, true); 398 } 399 vhost_dev_stop(&net->dev, dev, false); 400 if (net->nc->info->stop) { 401 net->nc->info->stop(net->nc); 402 } 403 } 404 405 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, 406 int data_queue_pairs, int cvq) 407 { 408 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 409 VirtioBusState *vbus = VIRTIO_BUS(qbus); 410 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 411 int total_notifiers = data_queue_pairs * 2 + cvq; 412 VirtIONet *n = VIRTIO_NET(dev); 413 int nvhosts = data_queue_pairs + cvq; 414 struct vhost_net *net; 415 int r, e, i, index_end = data_queue_pairs * 2; 416 NetClientState *peer; 417 418 if (cvq) { 419 index_end += 1; 420 } 421 422 if (!k->set_guest_notifiers) { 423 error_report("binding does not support guest notifiers"); 424 return -ENOSYS; 425 } 426 427 for (i = 0; i < nvhosts; i++) { 428 429 if (i < data_queue_pairs) { 430 peer = qemu_get_peer(ncs, i); 431 } else { /* Control Virtqueue */ 432 peer = qemu_get_peer(ncs, n->max_queue_pairs); 433 } 434 435 net = get_vhost_net(peer); 436 vhost_net_set_vq_index(net, i * 2, index_end); 437 438 /* Suppress the masking guest notifiers on vhost user 439 * because vhost user doesn't interrupt masking/unmasking 440 * properly. 441 */ 442 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 443 dev->use_guest_notifier_mask = false; 444 } 445 } 446 447 r = vhost_net_enable_notifiers(dev, ncs, data_queue_pairs, cvq); 448 if (r < 0) { 449 error_report("Error enabling host notifiers: %d", -r); 450 goto err; 451 } 452 453 r = k->set_guest_notifiers(qbus->parent, total_notifiers, true); 454 if (r < 0) { 455 error_report("Error binding guest notifier: %d", -r); 456 goto err_host_notifiers; 457 } 458 459 for (i = 0; i < nvhosts; i++) { 460 if (i < data_queue_pairs) { 461 peer = qemu_get_peer(ncs, i); 462 } else { 463 peer = qemu_get_peer(ncs, n->max_queue_pairs); 464 } 465 466 if (peer->vring_enable) { 467 /* restore vring enable state */ 468 r = vhost_net_set_vring_enable(peer, peer->vring_enable); 469 470 if (r < 0) { 471 goto err_guest_notifiers; 472 } 473 } 474 475 r = vhost_net_start_one(get_vhost_net(peer), dev); 476 if (r < 0) { 477 goto err_guest_notifiers; 478 } 479 } 480 481 return 0; 482 483 err_guest_notifiers: 484 while (--i >= 0) { 485 peer = qemu_get_peer(ncs, i < data_queue_pairs ? 486 i : n->max_queue_pairs); 487 vhost_net_stop_one(get_vhost_net(peer), dev); 488 } 489 e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 490 if (e < 0) { 491 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); 492 fflush(stderr); 493 } 494 err_host_notifiers: 495 vhost_net_disable_notifiers(dev, ncs, data_queue_pairs, cvq); 496 err: 497 return r; 498 } 499 500 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, 501 int data_queue_pairs, int cvq) 502 { 503 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 504 VirtioBusState *vbus = VIRTIO_BUS(qbus); 505 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 506 VirtIONet *n = VIRTIO_NET(dev); 507 NetClientState *peer; 508 int total_notifiers = data_queue_pairs * 2 + cvq; 509 int nvhosts = data_queue_pairs + cvq; 510 int i, r; 511 512 for (i = 0; i < nvhosts; i++) { 513 if (i < data_queue_pairs) { 514 peer = qemu_get_peer(ncs, i); 515 } else { 516 peer = qemu_get_peer(ncs, n->max_queue_pairs); 517 } 518 vhost_net_stop_one(get_vhost_net(peer), dev); 519 } 520 521 r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 522 if (r < 0) { 523 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); 524 fflush(stderr); 525 } 526 assert(r >= 0); 527 528 vhost_net_disable_notifiers(dev, ncs, data_queue_pairs, cvq); 529 } 530 531 void vhost_net_cleanup(struct vhost_net *net) 532 { 533 vhost_dev_cleanup(&net->dev); 534 } 535 536 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 537 { 538 const VhostOps *vhost_ops = net->dev.vhost_ops; 539 540 assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 541 assert(vhost_ops->vhost_migration_done); 542 543 return vhost_ops->vhost_migration_done(&net->dev, mac_addr); 544 } 545 546 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 547 { 548 return vhost_virtqueue_pending(&net->dev, idx); 549 } 550 551 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 552 int idx, bool mask) 553 { 554 vhost_virtqueue_mask(&net->dev, dev, idx, mask); 555 } 556 557 bool vhost_net_config_pending(VHostNetState *net) 558 { 559 return vhost_config_pending(&net->dev); 560 } 561 562 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) 563 { 564 vhost_config_mask(&net->dev, dev, mask); 565 } 566 567 VHostNetState *get_vhost_net(NetClientState *nc) 568 { 569 if (!nc) { 570 return 0; 571 } 572 573 if (nc->info->get_vhost_net) { 574 return nc->info->get_vhost_net(nc); 575 } 576 577 return NULL; 578 } 579 580 int vhost_net_set_vring_enable(NetClientState *nc, int enable) 581 { 582 VHostNetState *net = get_vhost_net(nc); 583 const VhostOps *vhost_ops = net->dev.vhost_ops; 584 585 /* 586 * vhost-vdpa network devices need to enable dataplane virtqueues after 587 * DRIVER_OK, so they can recover device state before starting dataplane. 588 * Because of that, we don't enable virtqueues here and leave it to 589 * net/vhost-vdpa.c. 590 */ 591 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 592 return 0; 593 } 594 595 nc->vring_enable = enable; 596 597 if (vhost_ops && vhost_ops->vhost_set_vring_enable) { 598 return vhost_ops->vhost_set_vring_enable(&net->dev, enable); 599 } 600 601 return 0; 602 } 603 604 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) 605 { 606 const VhostOps *vhost_ops = net->dev.vhost_ops; 607 608 if (!vhost_ops->vhost_net_set_mtu) { 609 return 0; 610 } 611 612 return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); 613 } 614 615 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, 616 int vq_index) 617 { 618 VHostNetState *net = get_vhost_net(nc->peer); 619 const VhostOps *vhost_ops = net->dev.vhost_ops; 620 struct vhost_vring_file file = { .fd = -1 }; 621 int idx; 622 623 /* should only be called after backend is connected */ 624 assert(vhost_ops); 625 626 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 627 628 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 629 file.index = idx; 630 int r = vhost_net_set_backend(&net->dev, &file); 631 assert(r >= 0); 632 } 633 634 vhost_virtqueue_stop(&net->dev, 635 vdev, 636 net->dev.vqs + idx, 637 net->dev.vq_index + idx); 638 } 639 640 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, 641 int vq_index) 642 { 643 VHostNetState *net = get_vhost_net(nc->peer); 644 const VhostOps *vhost_ops = net->dev.vhost_ops; 645 struct vhost_vring_file file = { }; 646 int idx, r; 647 648 if (!net->dev.started) { 649 return -EBUSY; 650 } 651 652 /* should only be called after backend is connected */ 653 assert(vhost_ops); 654 655 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 656 657 r = vhost_virtqueue_start(&net->dev, 658 vdev, 659 net->dev.vqs + idx, 660 net->dev.vq_index + idx); 661 if (r < 0) { 662 goto err_start; 663 } 664 665 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 666 file.index = idx; 667 file.fd = net->backend; 668 r = vhost_net_set_backend(&net->dev, &file); 669 if (r < 0) { 670 r = -errno; 671 goto err_start; 672 } 673 } 674 675 return 0; 676 677 err_start: 678 error_report("Error when restarting the queue."); 679 680 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 681 file.fd = VHOST_FILE_UNBIND; 682 file.index = idx; 683 int ret = vhost_net_set_backend(&net->dev, &file); 684 assert(ret >= 0); 685 } 686 687 vhost_dev_stop(&net->dev, vdev, false); 688 689 return r; 690 } 691