1 /* 2 * vhost-net support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "net/net.h" 18 #include "net/tap.h" 19 #include "net/vhost-user.h" 20 #include "net/vhost-vdpa.h" 21 22 #include "standard-headers/linux/vhost_types.h" 23 #include "hw/virtio/virtio-net.h" 24 #include "net/vhost_net.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "qemu/main-loop.h" 28 29 #include <sys/socket.h> 30 #include <net/if.h> 31 #include <netinet/in.h> 32 33 34 #include "standard-headers/linux/virtio_ring.h" 35 #include "hw/virtio/vhost.h" 36 #include "hw/virtio/virtio-bus.h" 37 #include "linux-headers/linux/vhost.h" 38 39 40 /* Features supported by host kernel. */ 41 static const int kernel_feature_bits[] = { 42 VIRTIO_F_NOTIFY_ON_EMPTY, 43 VIRTIO_RING_F_INDIRECT_DESC, 44 VIRTIO_RING_F_EVENT_IDX, 45 VIRTIO_NET_F_MRG_RXBUF, 46 VIRTIO_F_VERSION_1, 47 VIRTIO_NET_F_MTU, 48 VIRTIO_F_IOMMU_PLATFORM, 49 VIRTIO_F_RING_PACKED, 50 VIRTIO_F_RING_RESET, 51 VIRTIO_NET_F_HASH_REPORT, 52 VHOST_INVALID_FEATURE_BIT 53 }; 54 55 /* Features supported by others. */ 56 static const int user_feature_bits[] = { 57 VIRTIO_F_NOTIFY_ON_EMPTY, 58 VIRTIO_RING_F_INDIRECT_DESC, 59 VIRTIO_RING_F_EVENT_IDX, 60 61 VIRTIO_F_ANY_LAYOUT, 62 VIRTIO_F_VERSION_1, 63 VIRTIO_NET_F_CSUM, 64 VIRTIO_NET_F_GUEST_CSUM, 65 VIRTIO_NET_F_GSO, 66 VIRTIO_NET_F_GUEST_TSO4, 67 VIRTIO_NET_F_GUEST_TSO6, 68 VIRTIO_NET_F_GUEST_ECN, 69 VIRTIO_NET_F_GUEST_UFO, 70 VIRTIO_NET_F_HOST_TSO4, 71 VIRTIO_NET_F_HOST_TSO6, 72 VIRTIO_NET_F_HOST_ECN, 73 VIRTIO_NET_F_HOST_UFO, 74 VIRTIO_NET_F_MRG_RXBUF, 75 VIRTIO_NET_F_MTU, 76 VIRTIO_F_IOMMU_PLATFORM, 77 VIRTIO_F_RING_PACKED, 78 VIRTIO_F_RING_RESET, 79 VIRTIO_NET_F_RSS, 80 VIRTIO_NET_F_HASH_REPORT, 81 82 /* This bit implies RARP isn't sent by QEMU out of band */ 83 VIRTIO_NET_F_GUEST_ANNOUNCE, 84 85 VIRTIO_NET_F_MQ, 86 87 VHOST_INVALID_FEATURE_BIT 88 }; 89 90 static const int *vhost_net_get_feature_bits(struct vhost_net *net) 91 { 92 const int *feature_bits = 0; 93 94 switch (net->nc->info->type) { 95 case NET_CLIENT_DRIVER_TAP: 96 feature_bits = kernel_feature_bits; 97 break; 98 case NET_CLIENT_DRIVER_VHOST_USER: 99 feature_bits = user_feature_bits; 100 break; 101 #ifdef CONFIG_VHOST_NET_VDPA 102 case NET_CLIENT_DRIVER_VHOST_VDPA: 103 feature_bits = vdpa_feature_bits; 104 break; 105 #endif 106 default: 107 error_report("Feature bits not defined for this type: %d", 108 net->nc->info->type); 109 break; 110 } 111 112 return feature_bits; 113 } 114 115 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 116 { 117 return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), 118 features); 119 } 120 int vhost_net_get_config(struct vhost_net *net, uint8_t *config, 121 uint32_t config_len) 122 { 123 return vhost_dev_get_config(&net->dev, config, config_len, NULL); 124 } 125 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, 126 uint32_t offset, uint32_t size, uint32_t flags) 127 { 128 return vhost_dev_set_config(&net->dev, data, offset, size, flags); 129 } 130 131 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 132 { 133 net->dev.acked_features = net->dev.backend_features; 134 vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); 135 } 136 137 uint64_t vhost_net_get_max_queues(VHostNetState *net) 138 { 139 return net->dev.max_queues; 140 } 141 142 uint64_t vhost_net_get_acked_features(VHostNetState *net) 143 { 144 return net->dev.acked_features; 145 } 146 147 void vhost_net_save_acked_features(NetClientState *nc) 148 { 149 #ifdef CONFIG_VHOST_NET_USER 150 if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 151 vhost_user_save_acked_features(nc); 152 } 153 #endif 154 } 155 156 static int vhost_net_get_fd(NetClientState *backend) 157 { 158 switch (backend->info->type) { 159 case NET_CLIENT_DRIVER_TAP: 160 return tap_get_fd(backend); 161 default: 162 fprintf(stderr, "vhost-net requires tap backend\n"); 163 return -ENOSYS; 164 } 165 } 166 167 struct vhost_net *vhost_net_init(VhostNetOptions *options) 168 { 169 int r; 170 bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; 171 struct vhost_net *net = g_new0(struct vhost_net, 1); 172 uint64_t features = 0; 173 Error *local_err = NULL; 174 175 if (!options->net_backend) { 176 fprintf(stderr, "vhost-net requires net backend to be setup\n"); 177 goto fail; 178 } 179 net->nc = options->net_backend; 180 net->dev.nvqs = options->nvqs; 181 182 net->dev.max_queues = 1; 183 net->dev.vqs = net->vqs; 184 185 if (backend_kernel) { 186 r = vhost_net_get_fd(options->net_backend); 187 if (r < 0) { 188 goto fail; 189 } 190 net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) 191 ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); 192 net->backend = r; 193 net->dev.protocol_features = 0; 194 } else { 195 net->dev.backend_features = 0; 196 net->dev.protocol_features = 0; 197 net->backend = -1; 198 199 /* vhost-user needs vq_index to initiate a specific queue pair */ 200 net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; 201 } 202 203 r = vhost_dev_init(&net->dev, options->opaque, 204 options->backend_type, options->busyloop_timeout, 205 &local_err); 206 if (r < 0) { 207 error_report_err(local_err); 208 goto fail; 209 } 210 if (backend_kernel) { 211 if (!qemu_has_vnet_hdr_len(options->net_backend, 212 sizeof(struct virtio_net_hdr_mrg_rxbuf))) { 213 net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); 214 } 215 if (~net->dev.features & net->dev.backend_features) { 216 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 217 " for backend\n", 218 (uint64_t)(~net->dev.features & net->dev.backend_features)); 219 goto fail; 220 } 221 } 222 223 /* Set sane init value. Override when guest acks. */ 224 #ifdef CONFIG_VHOST_NET_USER 225 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 226 features = vhost_user_get_acked_features(net->nc); 227 if (~net->dev.features & features) { 228 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 229 " for backend\n", 230 (uint64_t)(~net->dev.features & features)); 231 goto fail; 232 } 233 } 234 #endif 235 236 vhost_net_ack_features(net, features); 237 238 return net; 239 240 fail: 241 vhost_dev_cleanup(&net->dev); 242 g_free(net); 243 return NULL; 244 } 245 246 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, 247 int vq_index_end) 248 { 249 net->dev.vq_index = vq_index; 250 net->dev.vq_index_end = vq_index_end; 251 } 252 253 static int vhost_net_start_one(struct vhost_net *net, 254 VirtIODevice *dev) 255 { 256 struct vhost_vring_file file = { }; 257 int r; 258 259 if (net->nc->info->start) { 260 r = net->nc->info->start(net->nc); 261 if (r < 0) { 262 return r; 263 } 264 } 265 266 r = vhost_dev_enable_notifiers(&net->dev, dev); 267 if (r < 0) { 268 goto fail_notifiers; 269 } 270 271 r = vhost_dev_start(&net->dev, dev, false); 272 if (r < 0) { 273 goto fail_start; 274 } 275 276 if (net->nc->info->poll) { 277 net->nc->info->poll(net->nc, false); 278 } 279 280 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 281 qemu_set_fd_handler(net->backend, NULL, NULL, NULL); 282 file.fd = net->backend; 283 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 284 if (!virtio_queue_enabled(dev, net->dev.vq_index + 285 file.index)) { 286 /* Queue might not be ready for start */ 287 continue; 288 } 289 r = vhost_net_set_backend(&net->dev, &file); 290 if (r < 0) { 291 r = -errno; 292 goto fail; 293 } 294 } 295 } 296 297 if (net->nc->info->load) { 298 r = net->nc->info->load(net->nc); 299 if (r < 0) { 300 goto fail; 301 } 302 } 303 return 0; 304 fail: 305 file.fd = -1; 306 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 307 while (file.index-- > 0) { 308 if (!virtio_queue_enabled(dev, net->dev.vq_index + 309 file.index)) { 310 /* Queue might not be ready for start */ 311 continue; 312 } 313 int r = vhost_net_set_backend(&net->dev, &file); 314 assert(r >= 0); 315 } 316 } 317 if (net->nc->info->poll) { 318 net->nc->info->poll(net->nc, true); 319 } 320 vhost_dev_stop(&net->dev, dev, false); 321 fail_start: 322 vhost_dev_disable_notifiers(&net->dev, dev); 323 fail_notifiers: 324 return r; 325 } 326 327 static void vhost_net_stop_one(struct vhost_net *net, 328 VirtIODevice *dev) 329 { 330 struct vhost_vring_file file = { .fd = -1 }; 331 332 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 333 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 334 int r = vhost_net_set_backend(&net->dev, &file); 335 assert(r >= 0); 336 } 337 } 338 if (net->nc->info->poll) { 339 net->nc->info->poll(net->nc, true); 340 } 341 vhost_dev_stop(&net->dev, dev, false); 342 if (net->nc->info->stop) { 343 net->nc->info->stop(net->nc); 344 } 345 vhost_dev_disable_notifiers(&net->dev, dev); 346 } 347 348 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, 349 int data_queue_pairs, int cvq) 350 { 351 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 352 VirtioBusState *vbus = VIRTIO_BUS(qbus); 353 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 354 int total_notifiers = data_queue_pairs * 2 + cvq; 355 VirtIONet *n = VIRTIO_NET(dev); 356 int nvhosts = data_queue_pairs + cvq; 357 struct vhost_net *net; 358 int r, e, i, index_end = data_queue_pairs * 2; 359 NetClientState *peer; 360 361 if (cvq) { 362 index_end += 1; 363 } 364 365 if (!k->set_guest_notifiers) { 366 error_report("binding does not support guest notifiers"); 367 return -ENOSYS; 368 } 369 370 for (i = 0; i < nvhosts; i++) { 371 372 if (i < data_queue_pairs) { 373 peer = qemu_get_peer(ncs, i); 374 } else { /* Control Virtqueue */ 375 peer = qemu_get_peer(ncs, n->max_queue_pairs); 376 } 377 378 net = get_vhost_net(peer); 379 vhost_net_set_vq_index(net, i * 2, index_end); 380 381 /* Suppress the masking guest notifiers on vhost user 382 * because vhost user doesn't interrupt masking/unmasking 383 * properly. 384 */ 385 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 386 dev->use_guest_notifier_mask = false; 387 } 388 } 389 390 r = k->set_guest_notifiers(qbus->parent, total_notifiers, true); 391 if (r < 0) { 392 error_report("Error binding guest notifier: %d", -r); 393 goto err; 394 } 395 396 for (i = 0; i < nvhosts; i++) { 397 if (i < data_queue_pairs) { 398 peer = qemu_get_peer(ncs, i); 399 } else { 400 peer = qemu_get_peer(ncs, n->max_queue_pairs); 401 } 402 403 if (peer->vring_enable) { 404 /* restore vring enable state */ 405 r = vhost_set_vring_enable(peer, peer->vring_enable); 406 407 if (r < 0) { 408 goto err_start; 409 } 410 } 411 412 r = vhost_net_start_one(get_vhost_net(peer), dev); 413 if (r < 0) { 414 goto err_start; 415 } 416 } 417 418 return 0; 419 420 err_start: 421 while (--i >= 0) { 422 peer = qemu_get_peer(ncs, i < data_queue_pairs ? 423 i : n->max_queue_pairs); 424 vhost_net_stop_one(get_vhost_net(peer), dev); 425 } 426 e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 427 if (e < 0) { 428 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); 429 fflush(stderr); 430 } 431 err: 432 return r; 433 } 434 435 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, 436 int data_queue_pairs, int cvq) 437 { 438 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 439 VirtioBusState *vbus = VIRTIO_BUS(qbus); 440 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 441 VirtIONet *n = VIRTIO_NET(dev); 442 NetClientState *peer; 443 int total_notifiers = data_queue_pairs * 2 + cvq; 444 int nvhosts = data_queue_pairs + cvq; 445 int i, r; 446 447 for (i = 0; i < nvhosts; i++) { 448 if (i < data_queue_pairs) { 449 peer = qemu_get_peer(ncs, i); 450 } else { 451 peer = qemu_get_peer(ncs, n->max_queue_pairs); 452 } 453 vhost_net_stop_one(get_vhost_net(peer), dev); 454 } 455 456 r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 457 if (r < 0) { 458 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); 459 fflush(stderr); 460 } 461 assert(r >= 0); 462 } 463 464 void vhost_net_cleanup(struct vhost_net *net) 465 { 466 vhost_dev_cleanup(&net->dev); 467 } 468 469 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 470 { 471 const VhostOps *vhost_ops = net->dev.vhost_ops; 472 473 assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 474 assert(vhost_ops->vhost_migration_done); 475 476 return vhost_ops->vhost_migration_done(&net->dev, mac_addr); 477 } 478 479 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 480 { 481 return vhost_virtqueue_pending(&net->dev, idx); 482 } 483 484 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 485 int idx, bool mask) 486 { 487 vhost_virtqueue_mask(&net->dev, dev, idx, mask); 488 } 489 490 bool vhost_net_config_pending(VHostNetState *net) 491 { 492 return vhost_config_pending(&net->dev); 493 } 494 495 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) 496 { 497 vhost_config_mask(&net->dev, dev, mask); 498 } 499 VHostNetState *get_vhost_net(NetClientState *nc) 500 { 501 VHostNetState *vhost_net = 0; 502 503 if (!nc) { 504 return 0; 505 } 506 507 switch (nc->info->type) { 508 case NET_CLIENT_DRIVER_TAP: 509 vhost_net = tap_get_vhost_net(nc); 510 assert(vhost_net); 511 break; 512 #ifdef CONFIG_VHOST_NET_USER 513 case NET_CLIENT_DRIVER_VHOST_USER: 514 vhost_net = vhost_user_get_vhost_net(nc); 515 assert(vhost_net); 516 break; 517 #endif 518 #ifdef CONFIG_VHOST_NET_VDPA 519 case NET_CLIENT_DRIVER_VHOST_VDPA: 520 vhost_net = vhost_vdpa_get_vhost_net(nc); 521 assert(vhost_net); 522 break; 523 #endif 524 default: 525 break; 526 } 527 528 return vhost_net; 529 } 530 531 int vhost_set_vring_enable(NetClientState *nc, int enable) 532 { 533 VHostNetState *net = get_vhost_net(nc); 534 const VhostOps *vhost_ops = net->dev.vhost_ops; 535 536 nc->vring_enable = enable; 537 538 if (vhost_ops && vhost_ops->vhost_set_vring_enable) { 539 return vhost_ops->vhost_set_vring_enable(&net->dev, enable); 540 } 541 542 return 0; 543 } 544 545 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) 546 { 547 const VhostOps *vhost_ops = net->dev.vhost_ops; 548 549 if (!vhost_ops->vhost_net_set_mtu) { 550 return 0; 551 } 552 553 return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); 554 } 555 556 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, 557 int vq_index) 558 { 559 VHostNetState *net = get_vhost_net(nc->peer); 560 const VhostOps *vhost_ops = net->dev.vhost_ops; 561 struct vhost_vring_file file = { .fd = -1 }; 562 int idx; 563 564 /* should only be called after backend is connected */ 565 assert(vhost_ops); 566 567 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 568 569 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 570 file.index = idx; 571 int r = vhost_net_set_backend(&net->dev, &file); 572 assert(r >= 0); 573 } 574 575 vhost_virtqueue_stop(&net->dev, 576 vdev, 577 net->dev.vqs + idx, 578 net->dev.vq_index + idx); 579 } 580 581 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, 582 int vq_index) 583 { 584 VHostNetState *net = get_vhost_net(nc->peer); 585 const VhostOps *vhost_ops = net->dev.vhost_ops; 586 struct vhost_vring_file file = { }; 587 int idx, r; 588 589 if (!net->dev.started) { 590 return -EBUSY; 591 } 592 593 /* should only be called after backend is connected */ 594 assert(vhost_ops); 595 596 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 597 598 r = vhost_virtqueue_start(&net->dev, 599 vdev, 600 net->dev.vqs + idx, 601 net->dev.vq_index + idx); 602 if (r < 0) { 603 goto err_start; 604 } 605 606 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 607 file.index = idx; 608 file.fd = net->backend; 609 r = vhost_net_set_backend(&net->dev, &file); 610 if (r < 0) { 611 r = -errno; 612 goto err_start; 613 } 614 } 615 616 return 0; 617 618 err_start: 619 error_report("Error when restarting the queue."); 620 621 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 622 file.fd = VHOST_FILE_UNBIND; 623 file.index = idx; 624 int r = vhost_net_set_backend(&net->dev, &file); 625 assert(r >= 0); 626 } 627 628 vhost_dev_stop(&net->dev, vdev, false); 629 630 return r; 631 } 632