1 /* 2 * vhost-net support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "net/net.h" 18 #include "net/tap.h" 19 #include "net/vhost-user.h" 20 #include "net/vhost-vdpa.h" 21 22 #include "standard-headers/linux/vhost_types.h" 23 #include "hw/virtio/virtio-net.h" 24 #include "net/vhost_net.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "qemu/main-loop.h" 28 29 #include <sys/socket.h> 30 #include <net/if.h> 31 #include <netinet/in.h> 32 33 34 #include "standard-headers/linux/virtio_ring.h" 35 #include "hw/virtio/vhost.h" 36 #include "hw/virtio/virtio-bus.h" 37 #include "linux-headers/linux/vhost.h" 38 39 40 /* Features supported by host kernel. */ 41 static const int kernel_feature_bits[] = { 42 VIRTIO_F_NOTIFY_ON_EMPTY, 43 VIRTIO_RING_F_INDIRECT_DESC, 44 VIRTIO_RING_F_EVENT_IDX, 45 VIRTIO_NET_F_MRG_RXBUF, 46 VIRTIO_F_VERSION_1, 47 VIRTIO_NET_F_MTU, 48 VIRTIO_F_IOMMU_PLATFORM, 49 VIRTIO_F_RING_PACKED, 50 VIRTIO_F_RING_RESET, 51 VIRTIO_NET_F_HASH_REPORT, 52 VHOST_INVALID_FEATURE_BIT 53 }; 54 55 /* Features supported by others. */ 56 static const int user_feature_bits[] = { 57 VIRTIO_F_NOTIFY_ON_EMPTY, 58 VIRTIO_RING_F_INDIRECT_DESC, 59 VIRTIO_RING_F_EVENT_IDX, 60 61 VIRTIO_F_ANY_LAYOUT, 62 VIRTIO_F_VERSION_1, 63 VIRTIO_NET_F_CSUM, 64 VIRTIO_NET_F_GUEST_CSUM, 65 VIRTIO_NET_F_GSO, 66 VIRTIO_NET_F_GUEST_TSO4, 67 VIRTIO_NET_F_GUEST_TSO6, 68 VIRTIO_NET_F_GUEST_ECN, 69 VIRTIO_NET_F_GUEST_UFO, 70 VIRTIO_NET_F_HOST_TSO4, 71 VIRTIO_NET_F_HOST_TSO6, 72 VIRTIO_NET_F_HOST_ECN, 73 VIRTIO_NET_F_HOST_UFO, 74 VIRTIO_NET_F_MRG_RXBUF, 75 VIRTIO_NET_F_MTU, 76 VIRTIO_F_IOMMU_PLATFORM, 77 VIRTIO_F_RING_PACKED, 78 VIRTIO_F_RING_RESET, 79 VIRTIO_NET_F_RSS, 80 VIRTIO_NET_F_HASH_REPORT, 81 VIRTIO_NET_F_GUEST_USO4, 82 VIRTIO_NET_F_GUEST_USO6, 83 VIRTIO_NET_F_HOST_USO, 84 85 /* This bit implies RARP isn't sent by QEMU out of band */ 86 VIRTIO_NET_F_GUEST_ANNOUNCE, 87 88 VIRTIO_NET_F_MQ, 89 90 VHOST_INVALID_FEATURE_BIT 91 }; 92 93 static const int *vhost_net_get_feature_bits(struct vhost_net *net) 94 { 95 const int *feature_bits = 0; 96 97 switch (net->nc->info->type) { 98 case NET_CLIENT_DRIVER_TAP: 99 feature_bits = kernel_feature_bits; 100 break; 101 case NET_CLIENT_DRIVER_VHOST_USER: 102 feature_bits = user_feature_bits; 103 break; 104 #ifdef CONFIG_VHOST_NET_VDPA 105 case NET_CLIENT_DRIVER_VHOST_VDPA: 106 feature_bits = vdpa_feature_bits; 107 break; 108 #endif 109 default: 110 error_report("Feature bits not defined for this type: %d", 111 net->nc->info->type); 112 break; 113 } 114 115 return feature_bits; 116 } 117 118 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 119 { 120 return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), 121 features); 122 } 123 int vhost_net_get_config(struct vhost_net *net, uint8_t *config, 124 uint32_t config_len) 125 { 126 return vhost_dev_get_config(&net->dev, config, config_len, NULL); 127 } 128 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, 129 uint32_t offset, uint32_t size, uint32_t flags) 130 { 131 return vhost_dev_set_config(&net->dev, data, offset, size, flags); 132 } 133 134 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 135 { 136 net->dev.acked_features = net->dev.backend_features; 137 vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); 138 } 139 140 uint64_t vhost_net_get_max_queues(VHostNetState *net) 141 { 142 return net->dev.max_queues; 143 } 144 145 uint64_t vhost_net_get_acked_features(VHostNetState *net) 146 { 147 return net->dev.acked_features; 148 } 149 150 void vhost_net_save_acked_features(NetClientState *nc) 151 { 152 #ifdef CONFIG_VHOST_NET_USER 153 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 154 vhost_user_save_acked_features(nc); 155 } 156 #endif 157 } 158 159 static int vhost_net_get_fd(NetClientState *backend) 160 { 161 switch (backend->info->type) { 162 case NET_CLIENT_DRIVER_TAP: 163 return tap_get_fd(backend); 164 default: 165 fprintf(stderr, "vhost-net requires tap backend\n"); 166 return -ENOSYS; 167 } 168 } 169 170 struct vhost_net *vhost_net_init(VhostNetOptions *options) 171 { 172 int r; 173 bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; 174 struct vhost_net *net = g_new0(struct vhost_net, 1); 175 uint64_t features = 0; 176 Error *local_err = NULL; 177 178 if (!options->net_backend) { 179 fprintf(stderr, "vhost-net requires net backend to be setup\n"); 180 goto fail; 181 } 182 net->nc = options->net_backend; 183 net->dev.nvqs = options->nvqs; 184 185 net->dev.max_queues = 1; 186 net->dev.vqs = net->vqs; 187 188 if (backend_kernel) { 189 r = vhost_net_get_fd(options->net_backend); 190 if (r < 0) { 191 goto fail; 192 } 193 net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) 194 ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); 195 net->backend = r; 196 net->dev.protocol_features = 0; 197 } else { 198 net->dev.backend_features = 0; 199 net->dev.protocol_features = 0; 200 net->backend = -1; 201 202 /* vhost-user needs vq_index to initiate a specific queue pair */ 203 net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; 204 } 205 206 r = vhost_dev_init(&net->dev, options->opaque, 207 options->backend_type, options->busyloop_timeout, 208 &local_err); 209 if (r < 0) { 210 error_report_err(local_err); 211 goto fail; 212 } 213 if (backend_kernel) { 214 if (!qemu_has_vnet_hdr_len(options->net_backend, 215 sizeof(struct virtio_net_hdr_mrg_rxbuf))) { 216 net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); 217 } 218 if (~net->dev.features & net->dev.backend_features) { 219 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 220 " for backend\n", 221 (uint64_t)(~net->dev.features & net->dev.backend_features)); 222 goto fail; 223 } 224 } 225 226 /* Set sane init value. Override when guest acks. */ 227 #ifdef CONFIG_VHOST_NET_USER 228 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 229 features = vhost_user_get_acked_features(net->nc); 230 if (~net->dev.features & features) { 231 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 232 " for backend\n", 233 (uint64_t)(~net->dev.features & features)); 234 goto fail; 235 } 236 } 237 #endif 238 239 vhost_net_ack_features(net, features); 240 241 return net; 242 243 fail: 244 vhost_dev_cleanup(&net->dev); 245 g_free(net); 246 return NULL; 247 } 248 249 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, 250 int vq_index_end) 251 { 252 net->dev.vq_index = vq_index; 253 net->dev.vq_index_end = vq_index_end; 254 } 255 256 static int vhost_net_start_one(struct vhost_net *net, 257 VirtIODevice *dev) 258 { 259 struct vhost_vring_file file = { }; 260 int r; 261 262 if (net->nc->info->start) { 263 r = net->nc->info->start(net->nc); 264 if (r < 0) { 265 return r; 266 } 267 } 268 269 r = vhost_dev_enable_notifiers(&net->dev, dev); 270 if (r < 0) { 271 goto fail_notifiers; 272 } 273 274 r = vhost_dev_start(&net->dev, dev, false); 275 if (r < 0) { 276 goto fail_start; 277 } 278 279 if (net->nc->info->poll) { 280 net->nc->info->poll(net->nc, false); 281 } 282 283 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 284 qemu_set_fd_handler(net->backend, NULL, NULL, NULL); 285 file.fd = net->backend; 286 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 287 if (!virtio_queue_enabled(dev, net->dev.vq_index + 288 file.index)) { 289 /* Queue might not be ready for start */ 290 continue; 291 } 292 r = vhost_net_set_backend(&net->dev, &file); 293 if (r < 0) { 294 r = -errno; 295 goto fail; 296 } 297 } 298 } 299 300 if (net->nc->info->load) { 301 r = net->nc->info->load(net->nc); 302 if (r < 0) { 303 goto fail; 304 } 305 } 306 return 0; 307 fail: 308 file.fd = -1; 309 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 310 while (file.index-- > 0) { 311 if (!virtio_queue_enabled(dev, net->dev.vq_index + 312 file.index)) { 313 /* Queue might not be ready for start */ 314 continue; 315 } 316 int r = vhost_net_set_backend(&net->dev, &file); 317 assert(r >= 0); 318 } 319 } 320 if (net->nc->info->poll) { 321 net->nc->info->poll(net->nc, true); 322 } 323 vhost_dev_stop(&net->dev, dev, false); 324 fail_start: 325 vhost_dev_disable_notifiers(&net->dev, dev); 326 fail_notifiers: 327 return r; 328 } 329 330 static void vhost_net_stop_one(struct vhost_net *net, 331 VirtIODevice *dev) 332 { 333 struct vhost_vring_file file = { .fd = -1 }; 334 335 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 336 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 337 int r = vhost_net_set_backend(&net->dev, &file); 338 assert(r >= 0); 339 } 340 } 341 if (net->nc->info->poll) { 342 net->nc->info->poll(net->nc, true); 343 } 344 vhost_dev_stop(&net->dev, dev, false); 345 if (net->nc->info->stop) { 346 net->nc->info->stop(net->nc); 347 } 348 vhost_dev_disable_notifiers(&net->dev, dev); 349 } 350 351 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, 352 int data_queue_pairs, int cvq) 353 { 354 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 355 VirtioBusState *vbus = VIRTIO_BUS(qbus); 356 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 357 int total_notifiers = data_queue_pairs * 2 + cvq; 358 VirtIONet *n = VIRTIO_NET(dev); 359 int nvhosts = data_queue_pairs + cvq; 360 struct vhost_net *net; 361 int r, e, i, index_end = data_queue_pairs * 2; 362 NetClientState *peer; 363 364 if (cvq) { 365 index_end += 1; 366 } 367 368 if (!k->set_guest_notifiers) { 369 error_report("binding does not support guest notifiers"); 370 return -ENOSYS; 371 } 372 373 for (i = 0; i < nvhosts; i++) { 374 375 if (i < data_queue_pairs) { 376 peer = qemu_get_peer(ncs, i); 377 } else { /* Control Virtqueue */ 378 peer = qemu_get_peer(ncs, n->max_queue_pairs); 379 } 380 381 net = get_vhost_net(peer); 382 vhost_net_set_vq_index(net, i * 2, index_end); 383 384 /* Suppress the masking guest notifiers on vhost user 385 * because vhost user doesn't interrupt masking/unmasking 386 * properly. 387 */ 388 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 389 dev->use_guest_notifier_mask = false; 390 } 391 } 392 393 r = k->set_guest_notifiers(qbus->parent, total_notifiers, true); 394 if (r < 0) { 395 error_report("Error binding guest notifier: %d", -r); 396 goto err; 397 } 398 399 for (i = 0; i < nvhosts; i++) { 400 if (i < data_queue_pairs) { 401 peer = qemu_get_peer(ncs, i); 402 } else { 403 peer = qemu_get_peer(ncs, n->max_queue_pairs); 404 } 405 406 if (peer->vring_enable) { 407 /* restore vring enable state */ 408 r = vhost_set_vring_enable(peer, peer->vring_enable); 409 410 if (r < 0) { 411 goto err_start; 412 } 413 } 414 415 r = vhost_net_start_one(get_vhost_net(peer), dev); 416 if (r < 0) { 417 goto err_start; 418 } 419 } 420 421 return 0; 422 423 err_start: 424 while (--i >= 0) { 425 peer = qemu_get_peer(ncs, i < data_queue_pairs ? 426 i : n->max_queue_pairs); 427 vhost_net_stop_one(get_vhost_net(peer), dev); 428 } 429 e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 430 if (e < 0) { 431 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); 432 fflush(stderr); 433 } 434 err: 435 return r; 436 } 437 438 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, 439 int data_queue_pairs, int cvq) 440 { 441 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 442 VirtioBusState *vbus = VIRTIO_BUS(qbus); 443 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 444 VirtIONet *n = VIRTIO_NET(dev); 445 NetClientState *peer; 446 int total_notifiers = data_queue_pairs * 2 + cvq; 447 int nvhosts = data_queue_pairs + cvq; 448 int i, r; 449 450 for (i = 0; i < nvhosts; i++) { 451 if (i < data_queue_pairs) { 452 peer = qemu_get_peer(ncs, i); 453 } else { 454 peer = qemu_get_peer(ncs, n->max_queue_pairs); 455 } 456 vhost_net_stop_one(get_vhost_net(peer), dev); 457 } 458 459 r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 460 if (r < 0) { 461 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); 462 fflush(stderr); 463 } 464 assert(r >= 0); 465 } 466 467 void vhost_net_cleanup(struct vhost_net *net) 468 { 469 vhost_dev_cleanup(&net->dev); 470 } 471 472 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 473 { 474 const VhostOps *vhost_ops = net->dev.vhost_ops; 475 476 assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 477 assert(vhost_ops->vhost_migration_done); 478 479 return vhost_ops->vhost_migration_done(&net->dev, mac_addr); 480 } 481 482 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 483 { 484 return vhost_virtqueue_pending(&net->dev, idx); 485 } 486 487 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 488 int idx, bool mask) 489 { 490 vhost_virtqueue_mask(&net->dev, dev, idx, mask); 491 } 492 493 bool vhost_net_config_pending(VHostNetState *net) 494 { 495 return vhost_config_pending(&net->dev); 496 } 497 498 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) 499 { 500 vhost_config_mask(&net->dev, dev, mask); 501 } 502 VHostNetState *get_vhost_net(NetClientState *nc) 503 { 504 VHostNetState *vhost_net = 0; 505 506 if (!nc) { 507 return 0; 508 } 509 510 switch (nc->info->type) { 511 case NET_CLIENT_DRIVER_TAP: 512 vhost_net = tap_get_vhost_net(nc); 513 /* 514 * tap_get_vhost_net() can return NULL if a tap net-device backend is 515 * created with 'vhost=off' option, 'vhostforce=off' or no vhost or 516 * vhostforce or vhostfd options at all. Please see net_init_tap_one(). 517 * Hence, we omit the assertion here. 518 */ 519 break; 520 #ifdef CONFIG_VHOST_NET_USER 521 case NET_CLIENT_DRIVER_VHOST_USER: 522 vhost_net = vhost_user_get_vhost_net(nc); 523 assert(vhost_net); 524 break; 525 #endif 526 #ifdef CONFIG_VHOST_NET_VDPA 527 case NET_CLIENT_DRIVER_VHOST_VDPA: 528 vhost_net = vhost_vdpa_get_vhost_net(nc); 529 assert(vhost_net); 530 break; 531 #endif 532 default: 533 break; 534 } 535 536 return vhost_net; 537 } 538 539 int vhost_set_vring_enable(NetClientState *nc, int enable) 540 { 541 VHostNetState *net = get_vhost_net(nc); 542 const VhostOps *vhost_ops = net->dev.vhost_ops; 543 544 nc->vring_enable = enable; 545 546 if (vhost_ops && vhost_ops->vhost_set_vring_enable) { 547 return vhost_ops->vhost_set_vring_enable(&net->dev, enable); 548 } 549 550 return 0; 551 } 552 553 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) 554 { 555 const VhostOps *vhost_ops = net->dev.vhost_ops; 556 557 if (!vhost_ops->vhost_net_set_mtu) { 558 return 0; 559 } 560 561 return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); 562 } 563 564 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, 565 int vq_index) 566 { 567 VHostNetState *net = get_vhost_net(nc->peer); 568 const VhostOps *vhost_ops = net->dev.vhost_ops; 569 struct vhost_vring_file file = { .fd = -1 }; 570 int idx; 571 572 /* should only be called after backend is connected */ 573 assert(vhost_ops); 574 575 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 576 577 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 578 file.index = idx; 579 int r = vhost_net_set_backend(&net->dev, &file); 580 assert(r >= 0); 581 } 582 583 vhost_virtqueue_stop(&net->dev, 584 vdev, 585 net->dev.vqs + idx, 586 net->dev.vq_index + idx); 587 } 588 589 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, 590 int vq_index) 591 { 592 VHostNetState *net = get_vhost_net(nc->peer); 593 const VhostOps *vhost_ops = net->dev.vhost_ops; 594 struct vhost_vring_file file = { }; 595 int idx, r; 596 597 if (!net->dev.started) { 598 return -EBUSY; 599 } 600 601 /* should only be called after backend is connected */ 602 assert(vhost_ops); 603 604 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 605 606 r = vhost_virtqueue_start(&net->dev, 607 vdev, 608 net->dev.vqs + idx, 609 net->dev.vq_index + idx); 610 if (r < 0) { 611 goto err_start; 612 } 613 614 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 615 file.index = idx; 616 file.fd = net->backend; 617 r = vhost_net_set_backend(&net->dev, &file); 618 if (r < 0) { 619 r = -errno; 620 goto err_start; 621 } 622 } 623 624 return 0; 625 626 err_start: 627 error_report("Error when restarting the queue."); 628 629 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 630 file.fd = VHOST_FILE_UNBIND; 631 file.index = idx; 632 int r = vhost_net_set_backend(&net->dev, &file); 633 assert(r >= 0); 634 } 635 636 vhost_dev_stop(&net->dev, vdev, false); 637 638 return r; 639 } 640