1 /* 2 * vhost-net support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "net/net.h" 18 #include "net/tap.h" 19 #include "net/vhost-user.h" 20 #include "net/vhost-vdpa.h" 21 22 #include "standard-headers/linux/vhost_types.h" 23 #include "hw/virtio/virtio-net.h" 24 #include "net/vhost_net.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "qemu/main-loop.h" 28 29 #include <sys/socket.h> 30 #include <net/if.h> 31 #include <netinet/in.h> 32 33 34 #include "standard-headers/linux/virtio_ring.h" 35 #include "hw/virtio/vhost.h" 36 #include "hw/virtio/virtio-bus.h" 37 #include "linux-headers/linux/vhost.h" 38 39 40 /* Features supported by host kernel. */ 41 static const int kernel_feature_bits[] = { 42 VIRTIO_F_NOTIFY_ON_EMPTY, 43 VIRTIO_RING_F_INDIRECT_DESC, 44 VIRTIO_RING_F_EVENT_IDX, 45 VIRTIO_NET_F_MRG_RXBUF, 46 VIRTIO_F_VERSION_1, 47 VIRTIO_NET_F_MTU, 48 VIRTIO_F_IOMMU_PLATFORM, 49 VIRTIO_F_RING_PACKED, 50 VIRTIO_F_RING_RESET, 51 VIRTIO_NET_F_HASH_REPORT, 52 VHOST_INVALID_FEATURE_BIT 53 }; 54 55 /* Features supported by others. */ 56 static const int user_feature_bits[] = { 57 VIRTIO_F_NOTIFY_ON_EMPTY, 58 VIRTIO_RING_F_INDIRECT_DESC, 59 VIRTIO_RING_F_EVENT_IDX, 60 61 VIRTIO_F_ANY_LAYOUT, 62 VIRTIO_F_VERSION_1, 63 VIRTIO_NET_F_CSUM, 64 VIRTIO_NET_F_GUEST_CSUM, 65 VIRTIO_NET_F_GSO, 66 VIRTIO_NET_F_GUEST_TSO4, 67 VIRTIO_NET_F_GUEST_TSO6, 68 VIRTIO_NET_F_GUEST_ECN, 69 VIRTIO_NET_F_GUEST_UFO, 70 VIRTIO_NET_F_HOST_TSO4, 71 VIRTIO_NET_F_HOST_TSO6, 72 VIRTIO_NET_F_HOST_ECN, 73 VIRTIO_NET_F_HOST_UFO, 74 VIRTIO_NET_F_MRG_RXBUF, 75 VIRTIO_NET_F_MTU, 76 VIRTIO_F_IOMMU_PLATFORM, 77 VIRTIO_F_RING_PACKED, 78 VIRTIO_F_RING_RESET, 79 VIRTIO_NET_F_RSS, 80 VIRTIO_NET_F_HASH_REPORT, 81 82 /* This bit implies RARP isn't sent by QEMU out of band */ 83 VIRTIO_NET_F_GUEST_ANNOUNCE, 84 85 VIRTIO_NET_F_MQ, 86 87 VHOST_INVALID_FEATURE_BIT 88 }; 89 90 static const int *vhost_net_get_feature_bits(struct vhost_net *net) 91 { 92 const int *feature_bits = 0; 93 94 switch (net->nc->info->type) { 95 case NET_CLIENT_DRIVER_TAP: 96 feature_bits = kernel_feature_bits; 97 break; 98 case NET_CLIENT_DRIVER_VHOST_USER: 99 feature_bits = user_feature_bits; 100 break; 101 #ifdef CONFIG_VHOST_NET_VDPA 102 case NET_CLIENT_DRIVER_VHOST_VDPA: 103 feature_bits = vdpa_feature_bits; 104 break; 105 #endif 106 default: 107 error_report("Feature bits not defined for this type: %d", 108 net->nc->info->type); 109 break; 110 } 111 112 return feature_bits; 113 } 114 115 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 116 { 117 return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), 118 features); 119 } 120 int vhost_net_get_config(struct vhost_net *net, uint8_t *config, 121 uint32_t config_len) 122 { 123 return vhost_dev_get_config(&net->dev, config, config_len, NULL); 124 } 125 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, 126 uint32_t offset, uint32_t size, uint32_t flags) 127 { 128 return vhost_dev_set_config(&net->dev, data, offset, size, flags); 129 } 130 131 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 132 { 133 net->dev.acked_features = net->dev.backend_features; 134 vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); 135 } 136 137 uint64_t vhost_net_get_max_queues(VHostNetState *net) 138 { 139 return net->dev.max_queues; 140 } 141 142 uint64_t vhost_net_get_acked_features(VHostNetState *net) 143 { 144 return net->dev.acked_features; 145 } 146 147 static int vhost_net_get_fd(NetClientState *backend) 148 { 149 switch (backend->info->type) { 150 case NET_CLIENT_DRIVER_TAP: 151 return tap_get_fd(backend); 152 default: 153 fprintf(stderr, "vhost-net requires tap backend\n"); 154 return -ENOSYS; 155 } 156 } 157 158 struct vhost_net *vhost_net_init(VhostNetOptions *options) 159 { 160 int r; 161 bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; 162 struct vhost_net *net = g_new0(struct vhost_net, 1); 163 uint64_t features = 0; 164 Error *local_err = NULL; 165 166 if (!options->net_backend) { 167 fprintf(stderr, "vhost-net requires net backend to be setup\n"); 168 goto fail; 169 } 170 net->nc = options->net_backend; 171 net->dev.nvqs = options->nvqs; 172 173 net->dev.max_queues = 1; 174 net->dev.vqs = net->vqs; 175 176 if (backend_kernel) { 177 r = vhost_net_get_fd(options->net_backend); 178 if (r < 0) { 179 goto fail; 180 } 181 net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) 182 ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); 183 net->backend = r; 184 net->dev.protocol_features = 0; 185 } else { 186 net->dev.backend_features = 0; 187 net->dev.protocol_features = 0; 188 net->backend = -1; 189 190 /* vhost-user needs vq_index to initiate a specific queue pair */ 191 net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; 192 } 193 194 r = vhost_dev_init(&net->dev, options->opaque, 195 options->backend_type, options->busyloop_timeout, 196 &local_err); 197 if (r < 0) { 198 error_report_err(local_err); 199 goto fail; 200 } 201 if (backend_kernel) { 202 if (!qemu_has_vnet_hdr_len(options->net_backend, 203 sizeof(struct virtio_net_hdr_mrg_rxbuf))) { 204 net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); 205 } 206 if (~net->dev.features & net->dev.backend_features) { 207 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 208 " for backend\n", 209 (uint64_t)(~net->dev.features & net->dev.backend_features)); 210 goto fail; 211 } 212 } 213 214 /* Set sane init value. Override when guest acks. */ 215 #ifdef CONFIG_VHOST_NET_USER 216 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 217 features = vhost_user_get_acked_features(net->nc); 218 if (~net->dev.features & features) { 219 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 220 " for backend\n", 221 (uint64_t)(~net->dev.features & features)); 222 goto fail; 223 } 224 } 225 #endif 226 227 vhost_net_ack_features(net, features); 228 229 return net; 230 231 fail: 232 vhost_dev_cleanup(&net->dev); 233 g_free(net); 234 return NULL; 235 } 236 237 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, 238 int vq_index_end) 239 { 240 net->dev.vq_index = vq_index; 241 net->dev.vq_index_end = vq_index_end; 242 } 243 244 static int vhost_net_start_one(struct vhost_net *net, 245 VirtIODevice *dev) 246 { 247 struct vhost_vring_file file = { }; 248 int r; 249 250 if (net->nc->info->start) { 251 r = net->nc->info->start(net->nc); 252 if (r < 0) { 253 return r; 254 } 255 } 256 257 r = vhost_dev_enable_notifiers(&net->dev, dev); 258 if (r < 0) { 259 goto fail_notifiers; 260 } 261 262 r = vhost_dev_start(&net->dev, dev, false); 263 if (r < 0) { 264 goto fail_start; 265 } 266 267 if (net->nc->info->poll) { 268 net->nc->info->poll(net->nc, false); 269 } 270 271 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 272 qemu_set_fd_handler(net->backend, NULL, NULL, NULL); 273 file.fd = net->backend; 274 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 275 if (!virtio_queue_enabled(dev, net->dev.vq_index + 276 file.index)) { 277 /* Queue might not be ready for start */ 278 continue; 279 } 280 r = vhost_net_set_backend(&net->dev, &file); 281 if (r < 0) { 282 r = -errno; 283 goto fail; 284 } 285 } 286 } 287 288 if (net->nc->info->load) { 289 r = net->nc->info->load(net->nc); 290 if (r < 0) { 291 goto fail; 292 } 293 } 294 return 0; 295 fail: 296 file.fd = -1; 297 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 298 while (file.index-- > 0) { 299 if (!virtio_queue_enabled(dev, net->dev.vq_index + 300 file.index)) { 301 /* Queue might not be ready for start */ 302 continue; 303 } 304 int r = vhost_net_set_backend(&net->dev, &file); 305 assert(r >= 0); 306 } 307 } 308 if (net->nc->info->poll) { 309 net->nc->info->poll(net->nc, true); 310 } 311 vhost_dev_stop(&net->dev, dev, false); 312 fail_start: 313 vhost_dev_disable_notifiers(&net->dev, dev); 314 fail_notifiers: 315 return r; 316 } 317 318 static void vhost_net_stop_one(struct vhost_net *net, 319 VirtIODevice *dev) 320 { 321 struct vhost_vring_file file = { .fd = -1 }; 322 323 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 324 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 325 int r = vhost_net_set_backend(&net->dev, &file); 326 assert(r >= 0); 327 } 328 } 329 if (net->nc->info->poll) { 330 net->nc->info->poll(net->nc, true); 331 } 332 vhost_dev_stop(&net->dev, dev, false); 333 if (net->nc->info->stop) { 334 net->nc->info->stop(net->nc); 335 } 336 vhost_dev_disable_notifiers(&net->dev, dev); 337 } 338 339 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, 340 int data_queue_pairs, int cvq) 341 { 342 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 343 VirtioBusState *vbus = VIRTIO_BUS(qbus); 344 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 345 int total_notifiers = data_queue_pairs * 2 + cvq; 346 VirtIONet *n = VIRTIO_NET(dev); 347 int nvhosts = data_queue_pairs + cvq; 348 struct vhost_net *net; 349 int r, e, i, index_end = data_queue_pairs * 2; 350 NetClientState *peer; 351 352 if (cvq) { 353 index_end += 1; 354 } 355 356 if (!k->set_guest_notifiers) { 357 error_report("binding does not support guest notifiers"); 358 return -ENOSYS; 359 } 360 361 for (i = 0; i < nvhosts; i++) { 362 363 if (i < data_queue_pairs) { 364 peer = qemu_get_peer(ncs, i); 365 } else { /* Control Virtqueue */ 366 peer = qemu_get_peer(ncs, n->max_queue_pairs); 367 } 368 369 net = get_vhost_net(peer); 370 vhost_net_set_vq_index(net, i * 2, index_end); 371 372 /* Suppress the masking guest notifiers on vhost user 373 * because vhost user doesn't interrupt masking/unmasking 374 * properly. 375 */ 376 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 377 dev->use_guest_notifier_mask = false; 378 } 379 } 380 381 r = k->set_guest_notifiers(qbus->parent, total_notifiers, true); 382 if (r < 0) { 383 error_report("Error binding guest notifier: %d", -r); 384 goto err; 385 } 386 387 for (i = 0; i < nvhosts; i++) { 388 if (i < data_queue_pairs) { 389 peer = qemu_get_peer(ncs, i); 390 } else { 391 peer = qemu_get_peer(ncs, n->max_queue_pairs); 392 } 393 394 if (peer->vring_enable) { 395 /* restore vring enable state */ 396 r = vhost_set_vring_enable(peer, peer->vring_enable); 397 398 if (r < 0) { 399 goto err_start; 400 } 401 } 402 403 r = vhost_net_start_one(get_vhost_net(peer), dev); 404 if (r < 0) { 405 goto err_start; 406 } 407 } 408 409 return 0; 410 411 err_start: 412 while (--i >= 0) { 413 peer = qemu_get_peer(ncs, i < data_queue_pairs ? 414 i : n->max_queue_pairs); 415 vhost_net_stop_one(get_vhost_net(peer), dev); 416 } 417 e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 418 if (e < 0) { 419 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); 420 fflush(stderr); 421 } 422 err: 423 return r; 424 } 425 426 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, 427 int data_queue_pairs, int cvq) 428 { 429 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 430 VirtioBusState *vbus = VIRTIO_BUS(qbus); 431 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 432 VirtIONet *n = VIRTIO_NET(dev); 433 NetClientState *peer; 434 int total_notifiers = data_queue_pairs * 2 + cvq; 435 int nvhosts = data_queue_pairs + cvq; 436 int i, r; 437 438 for (i = 0; i < nvhosts; i++) { 439 if (i < data_queue_pairs) { 440 peer = qemu_get_peer(ncs, i); 441 } else { 442 peer = qemu_get_peer(ncs, n->max_queue_pairs); 443 } 444 vhost_net_stop_one(get_vhost_net(peer), dev); 445 } 446 447 r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 448 if (r < 0) { 449 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); 450 fflush(stderr); 451 } 452 assert(r >= 0); 453 } 454 455 void vhost_net_cleanup(struct vhost_net *net) 456 { 457 vhost_dev_cleanup(&net->dev); 458 } 459 460 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 461 { 462 const VhostOps *vhost_ops = net->dev.vhost_ops; 463 464 assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 465 assert(vhost_ops->vhost_migration_done); 466 467 return vhost_ops->vhost_migration_done(&net->dev, mac_addr); 468 } 469 470 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 471 { 472 return vhost_virtqueue_pending(&net->dev, idx); 473 } 474 475 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 476 int idx, bool mask) 477 { 478 vhost_virtqueue_mask(&net->dev, dev, idx, mask); 479 } 480 481 VHostNetState *get_vhost_net(NetClientState *nc) 482 { 483 VHostNetState *vhost_net = 0; 484 485 if (!nc) { 486 return 0; 487 } 488 489 switch (nc->info->type) { 490 case NET_CLIENT_DRIVER_TAP: 491 vhost_net = tap_get_vhost_net(nc); 492 break; 493 #ifdef CONFIG_VHOST_NET_USER 494 case NET_CLIENT_DRIVER_VHOST_USER: 495 vhost_net = vhost_user_get_vhost_net(nc); 496 assert(vhost_net); 497 break; 498 #endif 499 #ifdef CONFIG_VHOST_NET_VDPA 500 case NET_CLIENT_DRIVER_VHOST_VDPA: 501 vhost_net = vhost_vdpa_get_vhost_net(nc); 502 assert(vhost_net); 503 break; 504 #endif 505 default: 506 break; 507 } 508 509 return vhost_net; 510 } 511 512 int vhost_set_vring_enable(NetClientState *nc, int enable) 513 { 514 VHostNetState *net = get_vhost_net(nc); 515 const VhostOps *vhost_ops = net->dev.vhost_ops; 516 517 nc->vring_enable = enable; 518 519 if (vhost_ops && vhost_ops->vhost_set_vring_enable) { 520 return vhost_ops->vhost_set_vring_enable(&net->dev, enable); 521 } 522 523 return 0; 524 } 525 526 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) 527 { 528 const VhostOps *vhost_ops = net->dev.vhost_ops; 529 530 if (!vhost_ops->vhost_net_set_mtu) { 531 return 0; 532 } 533 534 return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); 535 } 536 537 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, 538 int vq_index) 539 { 540 VHostNetState *net = get_vhost_net(nc->peer); 541 const VhostOps *vhost_ops = net->dev.vhost_ops; 542 struct vhost_vring_file file = { .fd = -1 }; 543 int idx; 544 545 /* should only be called after backend is connected */ 546 assert(vhost_ops); 547 548 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 549 550 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 551 file.index = idx; 552 int r = vhost_net_set_backend(&net->dev, &file); 553 assert(r >= 0); 554 } 555 556 vhost_virtqueue_stop(&net->dev, 557 vdev, 558 net->dev.vqs + idx, 559 net->dev.vq_index + idx); 560 } 561 562 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, 563 int vq_index) 564 { 565 VHostNetState *net = get_vhost_net(nc->peer); 566 const VhostOps *vhost_ops = net->dev.vhost_ops; 567 struct vhost_vring_file file = { }; 568 int idx, r; 569 570 if (!net->dev.started) { 571 return -EBUSY; 572 } 573 574 /* should only be called after backend is connected */ 575 assert(vhost_ops); 576 577 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 578 579 r = vhost_virtqueue_start(&net->dev, 580 vdev, 581 net->dev.vqs + idx, 582 net->dev.vq_index + idx); 583 if (r < 0) { 584 goto err_start; 585 } 586 587 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 588 file.index = idx; 589 file.fd = net->backend; 590 r = vhost_net_set_backend(&net->dev, &file); 591 if (r < 0) { 592 r = -errno; 593 goto err_start; 594 } 595 } 596 597 return 0; 598 599 err_start: 600 error_report("Error when restarting the queue."); 601 602 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 603 file.fd = VHOST_FILE_UNBIND; 604 file.index = idx; 605 int r = vhost_net_set_backend(&net->dev, &file); 606 assert(r >= 0); 607 } 608 609 vhost_dev_stop(&net->dev, vdev, false); 610 611 return r; 612 } 613