1 /* 2 * vhost-net support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "net/net.h" 18 #include "net/tap.h" 19 #include "net/vhost-vdpa.h" 20 21 #include "standard-headers/linux/vhost_types.h" 22 #include "hw/virtio/virtio-net.h" 23 #include "net/vhost_net.h" 24 #include "qapi/error.h" 25 #include "qemu/error-report.h" 26 #include "qemu/main-loop.h" 27 28 #include <sys/socket.h> 29 #include <net/if.h> 30 #include <netinet/in.h> 31 32 33 #include "standard-headers/linux/virtio_ring.h" 34 #include "hw/virtio/vhost.h" 35 #include "hw/virtio/virtio-bus.h" 36 #include "linux-headers/linux/vhost.h" 37 38 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 39 { 40 return vhost_get_features(&net->dev, net->feature_bits, 41 features); 42 } 43 int vhost_net_get_config(struct vhost_net *net, uint8_t *config, 44 uint32_t config_len) 45 { 46 return vhost_dev_get_config(&net->dev, config, config_len, NULL); 47 } 48 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, 49 uint32_t offset, uint32_t size, uint32_t flags) 50 { 51 return vhost_dev_set_config(&net->dev, data, offset, size, flags); 52 } 53 54 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 55 { 56 net->dev.acked_features = net->dev.backend_features; 57 vhost_ack_features(&net->dev, net->feature_bits, features); 58 } 59 60 uint64_t vhost_net_get_max_queues(VHostNetState *net) 61 { 62 return net->dev.max_queues; 63 } 64 65 uint64_t vhost_net_get_acked_features(VHostNetState *net) 66 { 67 return net->dev.acked_features; 68 } 69 70 void vhost_net_save_acked_features(NetClientState *nc) 71 { 72 struct vhost_net *net = get_vhost_net(nc); 73 74 if (net && net->save_acked_features) { 75 net->save_acked_features(nc); 76 } 77 } 78 79 static void vhost_net_disable_notifiers_nvhosts(VirtIODevice *dev, 80 NetClientState *ncs, int data_queue_pairs, int nvhosts) 81 { 82 VirtIONet *n = VIRTIO_NET(dev); 83 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 84 struct vhost_net *net; 85 struct vhost_dev *hdev; 86 int r, i, j; 87 NetClientState *peer; 88 89 /* 90 * Batch all the host notifiers in a single transaction to avoid 91 * quadratic time complexity in address_space_update_ioeventfds(). 92 */ 93 memory_region_transaction_begin(); 94 95 for (i = 0; i < nvhosts; i++) { 96 if (i < data_queue_pairs) { 97 peer = qemu_get_peer(ncs, i); 98 } else { 99 peer = qemu_get_peer(ncs, n->max_queue_pairs); 100 } 101 102 net = get_vhost_net(peer); 103 hdev = &net->dev; 104 for (j = 0; j < hdev->nvqs; j++) { 105 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 106 hdev->vq_index + j, 107 false); 108 if (r < 0) { 109 error_report("vhost %d VQ %d notifier cleanup failed: %d", 110 i, j, -r); 111 } 112 assert(r >= 0); 113 } 114 } 115 /* 116 * The transaction expects the ioeventfds to be open when it 117 * commits. Do it now, before the cleanup loop. 118 */ 119 memory_region_transaction_commit(); 120 121 for (i = 0; i < nvhosts; i++) { 122 if (i < data_queue_pairs) { 123 peer = qemu_get_peer(ncs, i); 124 } else { 125 peer = qemu_get_peer(ncs, n->max_queue_pairs); 126 } 127 128 net = get_vhost_net(peer); 129 hdev = &net->dev; 130 for (j = 0; j < hdev->nvqs; j++) { 131 virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), 132 hdev->vq_index + j); 133 } 134 virtio_device_release_ioeventfd(dev); 135 } 136 } 137 138 static int vhost_net_enable_notifiers(VirtIODevice *dev, 139 NetClientState *ncs, int data_queue_pairs, int cvq) 140 { 141 VirtIONet *n = VIRTIO_NET(dev); 142 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 143 int nvhosts = data_queue_pairs + cvq; 144 struct vhost_net *net; 145 struct vhost_dev *hdev; 146 int r, i, j, k; 147 NetClientState *peer; 148 149 /* 150 * We will pass the notifiers to the kernel, make sure that QEMU 151 * doesn't interfere. 152 */ 153 for (i = 0; i < nvhosts; i++) { 154 r = virtio_device_grab_ioeventfd(dev); 155 if (r < 0) { 156 error_report("vhost %d binding does not support host notifiers", i); 157 for (k = 0; k < i; k++) { 158 virtio_device_release_ioeventfd(dev); 159 } 160 return r; 161 } 162 } 163 164 /* 165 * Batch all the host notifiers in a single transaction to avoid 166 * quadratic time complexity in address_space_update_ioeventfds(). 167 */ 168 memory_region_transaction_begin(); 169 170 for (i = 0; i < nvhosts; i++) { 171 if (i < data_queue_pairs) { 172 peer = qemu_get_peer(ncs, i); 173 } else { 174 peer = qemu_get_peer(ncs, n->max_queue_pairs); 175 } 176 177 net = get_vhost_net(peer); 178 hdev = &net->dev; 179 180 for (j = 0; j < hdev->nvqs; j++) { 181 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 182 hdev->vq_index + j, 183 true); 184 if (r < 0) { 185 error_report("vhost %d VQ %d notifier binding failed: %d", 186 i, j, -r); 187 memory_region_transaction_commit(); 188 vhost_dev_disable_notifiers_nvqs(hdev, dev, j); 189 goto fail_nvhosts; 190 } 191 } 192 } 193 194 memory_region_transaction_commit(); 195 196 return 0; 197 fail_nvhosts: 198 vhost_net_disable_notifiers_nvhosts(dev, ncs, data_queue_pairs, i); 199 /* 200 * This for loop starts from i+1, not i, because the i-th ioeventfd 201 * has already been released in vhost_dev_disable_notifiers_nvqs(). 202 */ 203 for (k = i + 1; k < nvhosts; k++) { 204 virtio_device_release_ioeventfd(dev); 205 } 206 207 return r; 208 } 209 210 /* 211 * Stop processing guest IO notifications in qemu. 212 * Start processing them in vhost in kernel. 213 */ 214 static void vhost_net_disable_notifiers(VirtIODevice *dev, 215 NetClientState *ncs, int data_queue_pairs, int cvq) 216 { 217 vhost_net_disable_notifiers_nvhosts(dev, ncs, data_queue_pairs, 218 data_queue_pairs + cvq); 219 } 220 221 static int vhost_net_get_fd(NetClientState *backend) 222 { 223 switch (backend->info->type) { 224 case NET_CLIENT_DRIVER_TAP: 225 return tap_get_fd(backend); 226 default: 227 fprintf(stderr, "vhost-net requires tap backend\n"); 228 return -ENOSYS; 229 } 230 } 231 232 struct vhost_net *vhost_net_init(VhostNetOptions *options) 233 { 234 int r; 235 bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; 236 struct vhost_net *net = g_new0(struct vhost_net, 1); 237 uint64_t features = 0; 238 Error *local_err = NULL; 239 240 if (!options->net_backend) { 241 fprintf(stderr, "vhost-net requires net backend to be setup\n"); 242 goto fail; 243 } 244 net->nc = options->net_backend; 245 net->dev.nvqs = options->nvqs; 246 net->feature_bits = options->feature_bits; 247 net->save_acked_features = options->save_acked_features; 248 net->max_tx_queue_size = options->max_tx_queue_size; 249 net->is_vhost_user = options->is_vhost_user; 250 251 net->dev.max_queues = 1; 252 net->dev.vqs = net->vqs; 253 254 if (backend_kernel) { 255 r = vhost_net_get_fd(options->net_backend); 256 if (r < 0) { 257 goto fail; 258 } 259 net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) 260 ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); 261 net->backend = r; 262 net->dev.protocol_features = 0; 263 } else { 264 net->dev.backend_features = 0; 265 net->dev.protocol_features = 0; 266 net->backend = -1; 267 268 /* vhost-user needs vq_index to initiate a specific queue pair */ 269 net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; 270 } 271 272 r = vhost_dev_init(&net->dev, options->opaque, 273 options->backend_type, options->busyloop_timeout, 274 &local_err); 275 if (r < 0) { 276 error_report_err(local_err); 277 goto fail; 278 } 279 if (backend_kernel) { 280 if (!qemu_has_vnet_hdr_len(options->net_backend, 281 sizeof(struct virtio_net_hdr_mrg_rxbuf))) { 282 net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); 283 } 284 if (~net->dev.features & net->dev.backend_features) { 285 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 286 " for backend\n", 287 (uint64_t)(~net->dev.features & net->dev.backend_features)); 288 goto fail; 289 } 290 } 291 292 /* Set sane init value. Override when guest acks. */ 293 if (options->get_acked_features) { 294 features = options->get_acked_features(net->nc); 295 if (~net->dev.features & features) { 296 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 297 " for backend\n", 298 (uint64_t)(~net->dev.features & features)); 299 goto fail; 300 } 301 } 302 303 vhost_net_ack_features(net, features); 304 305 return net; 306 307 fail: 308 vhost_dev_cleanup(&net->dev); 309 g_free(net); 310 return NULL; 311 } 312 313 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, 314 int vq_index_end) 315 { 316 net->dev.vq_index = vq_index; 317 net->dev.vq_index_end = vq_index_end; 318 } 319 320 static int vhost_net_start_one(struct vhost_net *net, 321 VirtIODevice *dev) 322 { 323 struct vhost_vring_file file = { }; 324 int r; 325 326 if (net->nc->info->start) { 327 r = net->nc->info->start(net->nc); 328 if (r < 0) { 329 return r; 330 } 331 } 332 333 r = vhost_dev_start(&net->dev, dev, false); 334 if (r < 0) { 335 goto fail_start; 336 } 337 338 if (net->nc->info->poll) { 339 net->nc->info->poll(net->nc, false); 340 } 341 342 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 343 qemu_set_fd_handler(net->backend, NULL, NULL, NULL); 344 file.fd = net->backend; 345 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 346 if (!virtio_queue_enabled(dev, net->dev.vq_index + 347 file.index)) { 348 /* Queue might not be ready for start */ 349 continue; 350 } 351 r = vhost_net_set_backend(&net->dev, &file); 352 if (r < 0) { 353 r = -errno; 354 goto fail; 355 } 356 } 357 } 358 359 if (net->nc->info->load) { 360 r = net->nc->info->load(net->nc); 361 if (r < 0) { 362 goto fail; 363 } 364 } 365 return 0; 366 fail: 367 file.fd = -1; 368 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 369 while (file.index-- > 0) { 370 if (!virtio_queue_enabled(dev, net->dev.vq_index + 371 file.index)) { 372 /* Queue might not be ready for start */ 373 continue; 374 } 375 int ret = vhost_net_set_backend(&net->dev, &file); 376 assert(ret >= 0); 377 } 378 } 379 if (net->nc->info->poll) { 380 net->nc->info->poll(net->nc, true); 381 } 382 vhost_dev_stop(&net->dev, dev, false); 383 fail_start: 384 return r; 385 } 386 387 static void vhost_net_stop_one(struct vhost_net *net, 388 VirtIODevice *dev) 389 { 390 struct vhost_vring_file file = { .fd = -1 }; 391 392 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 393 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 394 int r = vhost_net_set_backend(&net->dev, &file); 395 assert(r >= 0); 396 } 397 } 398 if (net->nc->info->poll) { 399 net->nc->info->poll(net->nc, true); 400 } 401 vhost_dev_stop(&net->dev, dev, false); 402 if (net->nc->info->stop) { 403 net->nc->info->stop(net->nc); 404 } 405 } 406 407 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, 408 int data_queue_pairs, int cvq) 409 { 410 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 411 VirtioBusState *vbus = VIRTIO_BUS(qbus); 412 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 413 int total_notifiers = data_queue_pairs * 2 + cvq; 414 VirtIONet *n = VIRTIO_NET(dev); 415 int nvhosts = data_queue_pairs + cvq; 416 struct vhost_net *net; 417 int r, e, i, index_end = data_queue_pairs * 2; 418 NetClientState *peer; 419 420 if (cvq) { 421 index_end += 1; 422 } 423 424 if (!k->set_guest_notifiers) { 425 error_report("binding does not support guest notifiers"); 426 return -ENOSYS; 427 } 428 429 for (i = 0; i < nvhosts; i++) { 430 431 if (i < data_queue_pairs) { 432 peer = qemu_get_peer(ncs, i); 433 } else { /* Control Virtqueue */ 434 peer = qemu_get_peer(ncs, n->max_queue_pairs); 435 } 436 437 net = get_vhost_net(peer); 438 vhost_net_set_vq_index(net, i * 2, index_end); 439 440 /* Suppress the masking guest notifiers on vhost user 441 * because vhost user doesn't interrupt masking/unmasking 442 * properly. 443 */ 444 if (net->is_vhost_user) { 445 dev->use_guest_notifier_mask = false; 446 } 447 } 448 449 r = vhost_net_enable_notifiers(dev, ncs, data_queue_pairs, cvq); 450 if (r < 0) { 451 error_report("Error enabling host notifiers: %d", -r); 452 goto err; 453 } 454 455 r = k->set_guest_notifiers(qbus->parent, total_notifiers, true); 456 if (r < 0) { 457 error_report("Error binding guest notifier: %d", -r); 458 goto err_host_notifiers; 459 } 460 461 for (i = 0; i < nvhosts; i++) { 462 if (i < data_queue_pairs) { 463 peer = qemu_get_peer(ncs, i); 464 } else { 465 peer = qemu_get_peer(ncs, n->max_queue_pairs); 466 } 467 468 if (peer->vring_enable) { 469 /* restore vring enable state */ 470 r = vhost_net_set_vring_enable(peer, peer->vring_enable); 471 472 if (r < 0) { 473 goto err_guest_notifiers; 474 } 475 } 476 477 r = vhost_net_start_one(get_vhost_net(peer), dev); 478 if (r < 0) { 479 goto err_guest_notifiers; 480 } 481 } 482 483 return 0; 484 485 err_guest_notifiers: 486 while (--i >= 0) { 487 peer = qemu_get_peer(ncs, i < data_queue_pairs ? 488 i : n->max_queue_pairs); 489 vhost_net_stop_one(get_vhost_net(peer), dev); 490 } 491 e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 492 if (e < 0) { 493 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); 494 fflush(stderr); 495 } 496 err_host_notifiers: 497 vhost_net_disable_notifiers(dev, ncs, data_queue_pairs, cvq); 498 err: 499 return r; 500 } 501 502 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, 503 int data_queue_pairs, int cvq) 504 { 505 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 506 VirtioBusState *vbus = VIRTIO_BUS(qbus); 507 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 508 VirtIONet *n = VIRTIO_NET(dev); 509 NetClientState *peer; 510 int total_notifiers = data_queue_pairs * 2 + cvq; 511 int nvhosts = data_queue_pairs + cvq; 512 int i, r; 513 514 for (i = 0; i < nvhosts; i++) { 515 if (i < data_queue_pairs) { 516 peer = qemu_get_peer(ncs, i); 517 } else { 518 peer = qemu_get_peer(ncs, n->max_queue_pairs); 519 } 520 vhost_net_stop_one(get_vhost_net(peer), dev); 521 } 522 523 r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 524 if (r < 0) { 525 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); 526 fflush(stderr); 527 } 528 assert(r >= 0); 529 530 vhost_net_disable_notifiers(dev, ncs, data_queue_pairs, cvq); 531 } 532 533 void vhost_net_cleanup(struct vhost_net *net) 534 { 535 vhost_dev_cleanup(&net->dev); 536 } 537 538 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 539 { 540 const VhostOps *vhost_ops = net->dev.vhost_ops; 541 542 assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 543 assert(vhost_ops->vhost_migration_done); 544 545 return vhost_ops->vhost_migration_done(&net->dev, mac_addr); 546 } 547 548 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 549 { 550 return vhost_virtqueue_pending(&net->dev, idx); 551 } 552 553 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 554 int idx, bool mask) 555 { 556 vhost_virtqueue_mask(&net->dev, dev, idx, mask); 557 } 558 559 bool vhost_net_config_pending(VHostNetState *net) 560 { 561 return vhost_config_pending(&net->dev); 562 } 563 564 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) 565 { 566 vhost_config_mask(&net->dev, dev, mask); 567 } 568 569 VHostNetState *get_vhost_net(NetClientState *nc) 570 { 571 if (!nc) { 572 return 0; 573 } 574 575 if (nc->info->get_vhost_net) { 576 return nc->info->get_vhost_net(nc); 577 } 578 579 return NULL; 580 } 581 582 int vhost_net_set_vring_enable(NetClientState *nc, int enable) 583 { 584 VHostNetState *net = get_vhost_net(nc); 585 const VhostOps *vhost_ops = net->dev.vhost_ops; 586 587 /* 588 * vhost-vdpa network devices need to enable dataplane virtqueues after 589 * DRIVER_OK, so they can recover device state before starting dataplane. 590 * Because of that, we don't enable virtqueues here and leave it to 591 * net/vhost-vdpa.c. 592 */ 593 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 594 return 0; 595 } 596 597 nc->vring_enable = enable; 598 599 if (vhost_ops && vhost_ops->vhost_set_vring_enable) { 600 return vhost_ops->vhost_set_vring_enable(&net->dev, enable); 601 } 602 603 return 0; 604 } 605 606 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) 607 { 608 const VhostOps *vhost_ops = net->dev.vhost_ops; 609 610 if (!vhost_ops->vhost_net_set_mtu) { 611 return 0; 612 } 613 614 return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); 615 } 616 617 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, 618 int vq_index) 619 { 620 VHostNetState *net = get_vhost_net(nc->peer); 621 const VhostOps *vhost_ops = net->dev.vhost_ops; 622 struct vhost_vring_file file = { .fd = -1 }; 623 int idx; 624 625 /* should only be called after backend is connected */ 626 assert(vhost_ops); 627 628 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 629 630 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 631 file.index = idx; 632 int r = vhost_net_set_backend(&net->dev, &file); 633 assert(r >= 0); 634 } 635 636 vhost_virtqueue_stop(&net->dev, 637 vdev, 638 net->dev.vqs + idx, 639 net->dev.vq_index + idx); 640 } 641 642 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, 643 int vq_index) 644 { 645 VHostNetState *net = get_vhost_net(nc->peer); 646 const VhostOps *vhost_ops = net->dev.vhost_ops; 647 struct vhost_vring_file file = { }; 648 int idx, r; 649 650 if (!net->dev.started) { 651 return -EBUSY; 652 } 653 654 /* should only be called after backend is connected */ 655 assert(vhost_ops); 656 657 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 658 659 r = vhost_virtqueue_start(&net->dev, 660 vdev, 661 net->dev.vqs + idx, 662 net->dev.vq_index + idx); 663 if (r < 0) { 664 goto err_start; 665 } 666 667 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 668 file.index = idx; 669 file.fd = net->backend; 670 r = vhost_net_set_backend(&net->dev, &file); 671 if (r < 0) { 672 r = -errno; 673 goto err_start; 674 } 675 } 676 677 return 0; 678 679 err_start: 680 error_report("Error when restarting the queue."); 681 682 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 683 file.fd = VHOST_FILE_UNBIND; 684 file.index = idx; 685 int ret = vhost_net_set_backend(&net->dev, &file); 686 assert(ret >= 0); 687 } 688 689 vhost_dev_stop(&net->dev, vdev, false); 690 691 return r; 692 } 693