1 /* 2 * vhost-net support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "net/net.h" 18 #include "net/tap.h" 19 #include "net/vhost-user.h" 20 #include "net/vhost-vdpa.h" 21 22 #include "standard-headers/linux/vhost_types.h" 23 #include "hw/virtio/virtio-net.h" 24 #include "net/vhost_net.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "qemu/main-loop.h" 28 29 #include <sys/socket.h> 30 #include <net/if.h> 31 #include <netinet/in.h> 32 33 34 #include "standard-headers/linux/virtio_ring.h" 35 #include "hw/virtio/vhost.h" 36 #include "hw/virtio/virtio-bus.h" 37 #include "linux-headers/linux/vhost.h" 38 39 40 /* Features supported by host kernel. */ 41 static const int kernel_feature_bits[] = { 42 VIRTIO_F_NOTIFY_ON_EMPTY, 43 VIRTIO_RING_F_INDIRECT_DESC, 44 VIRTIO_RING_F_EVENT_IDX, 45 VIRTIO_NET_F_MRG_RXBUF, 46 VIRTIO_F_VERSION_1, 47 VIRTIO_NET_F_MTU, 48 VIRTIO_F_IOMMU_PLATFORM, 49 VIRTIO_F_RING_PACKED, 50 VIRTIO_F_RING_RESET, 51 VIRTIO_NET_F_HASH_REPORT, 52 VHOST_INVALID_FEATURE_BIT 53 }; 54 55 /* Features supported by others. */ 56 static const int user_feature_bits[] = { 57 VIRTIO_F_NOTIFY_ON_EMPTY, 58 VIRTIO_RING_F_INDIRECT_DESC, 59 VIRTIO_RING_F_EVENT_IDX, 60 61 VIRTIO_F_ANY_LAYOUT, 62 VIRTIO_F_VERSION_1, 63 VIRTIO_NET_F_CSUM, 64 VIRTIO_NET_F_GUEST_CSUM, 65 VIRTIO_NET_F_GSO, 66 VIRTIO_NET_F_GUEST_TSO4, 67 VIRTIO_NET_F_GUEST_TSO6, 68 VIRTIO_NET_F_GUEST_ECN, 69 VIRTIO_NET_F_GUEST_UFO, 70 VIRTIO_NET_F_HOST_TSO4, 71 VIRTIO_NET_F_HOST_TSO6, 72 VIRTIO_NET_F_HOST_ECN, 73 VIRTIO_NET_F_HOST_UFO, 74 VIRTIO_NET_F_MRG_RXBUF, 75 VIRTIO_NET_F_MTU, 76 VIRTIO_F_IOMMU_PLATFORM, 77 VIRTIO_F_RING_PACKED, 78 VIRTIO_NET_F_RSS, 79 VIRTIO_NET_F_HASH_REPORT, 80 81 /* This bit implies RARP isn't sent by QEMU out of band */ 82 VIRTIO_NET_F_GUEST_ANNOUNCE, 83 84 VIRTIO_NET_F_MQ, 85 86 VHOST_INVALID_FEATURE_BIT 87 }; 88 89 static const int *vhost_net_get_feature_bits(struct vhost_net *net) 90 { 91 const int *feature_bits = 0; 92 93 switch (net->nc->info->type) { 94 case NET_CLIENT_DRIVER_TAP: 95 feature_bits = kernel_feature_bits; 96 break; 97 case NET_CLIENT_DRIVER_VHOST_USER: 98 feature_bits = user_feature_bits; 99 break; 100 #ifdef CONFIG_VHOST_NET_VDPA 101 case NET_CLIENT_DRIVER_VHOST_VDPA: 102 feature_bits = vdpa_feature_bits; 103 break; 104 #endif 105 default: 106 error_report("Feature bits not defined for this type: %d", 107 net->nc->info->type); 108 break; 109 } 110 111 return feature_bits; 112 } 113 114 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 115 { 116 return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), 117 features); 118 } 119 int vhost_net_get_config(struct vhost_net *net, uint8_t *config, 120 uint32_t config_len) 121 { 122 return vhost_dev_get_config(&net->dev, config, config_len, NULL); 123 } 124 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, 125 uint32_t offset, uint32_t size, uint32_t flags) 126 { 127 return vhost_dev_set_config(&net->dev, data, offset, size, flags); 128 } 129 130 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 131 { 132 net->dev.acked_features = net->dev.backend_features; 133 vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); 134 } 135 136 uint64_t vhost_net_get_max_queues(VHostNetState *net) 137 { 138 return net->dev.max_queues; 139 } 140 141 uint64_t vhost_net_get_acked_features(VHostNetState *net) 142 { 143 return net->dev.acked_features; 144 } 145 146 static int vhost_net_get_fd(NetClientState *backend) 147 { 148 switch (backend->info->type) { 149 case NET_CLIENT_DRIVER_TAP: 150 return tap_get_fd(backend); 151 default: 152 fprintf(stderr, "vhost-net requires tap backend\n"); 153 return -ENOSYS; 154 } 155 } 156 157 struct vhost_net *vhost_net_init(VhostNetOptions *options) 158 { 159 int r; 160 bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; 161 struct vhost_net *net = g_new0(struct vhost_net, 1); 162 uint64_t features = 0; 163 Error *local_err = NULL; 164 165 if (!options->net_backend) { 166 fprintf(stderr, "vhost-net requires net backend to be setup\n"); 167 goto fail; 168 } 169 net->nc = options->net_backend; 170 net->dev.nvqs = options->nvqs; 171 172 net->dev.max_queues = 1; 173 net->dev.vqs = net->vqs; 174 175 if (backend_kernel) { 176 r = vhost_net_get_fd(options->net_backend); 177 if (r < 0) { 178 goto fail; 179 } 180 net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) 181 ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); 182 net->backend = r; 183 net->dev.protocol_features = 0; 184 } else { 185 net->dev.backend_features = 0; 186 net->dev.protocol_features = 0; 187 net->backend = -1; 188 189 /* vhost-user needs vq_index to initiate a specific queue pair */ 190 net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; 191 } 192 193 r = vhost_dev_init(&net->dev, options->opaque, 194 options->backend_type, options->busyloop_timeout, 195 &local_err); 196 if (r < 0) { 197 error_report_err(local_err); 198 goto fail; 199 } 200 if (backend_kernel) { 201 if (!qemu_has_vnet_hdr_len(options->net_backend, 202 sizeof(struct virtio_net_hdr_mrg_rxbuf))) { 203 net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); 204 } 205 if (~net->dev.features & net->dev.backend_features) { 206 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 207 " for backend\n", 208 (uint64_t)(~net->dev.features & net->dev.backend_features)); 209 goto fail; 210 } 211 } 212 213 /* Set sane init value. Override when guest acks. */ 214 #ifdef CONFIG_VHOST_NET_USER 215 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 216 features = vhost_user_get_acked_features(net->nc); 217 if (~net->dev.features & features) { 218 fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 219 " for backend\n", 220 (uint64_t)(~net->dev.features & features)); 221 goto fail; 222 } 223 } 224 #endif 225 226 vhost_net_ack_features(net, features); 227 228 return net; 229 230 fail: 231 vhost_dev_cleanup(&net->dev); 232 g_free(net); 233 return NULL; 234 } 235 236 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, 237 int vq_index_end) 238 { 239 net->dev.vq_index = vq_index; 240 net->dev.vq_index_end = vq_index_end; 241 } 242 243 static int vhost_net_start_one(struct vhost_net *net, 244 VirtIODevice *dev) 245 { 246 struct vhost_vring_file file = { }; 247 int r; 248 249 if (net->nc->info->start) { 250 r = net->nc->info->start(net->nc); 251 if (r < 0) { 252 return r; 253 } 254 } 255 256 r = vhost_dev_enable_notifiers(&net->dev, dev); 257 if (r < 0) { 258 goto fail_notifiers; 259 } 260 261 r = vhost_dev_start(&net->dev, dev); 262 if (r < 0) { 263 goto fail_start; 264 } 265 266 if (net->nc->info->poll) { 267 net->nc->info->poll(net->nc, false); 268 } 269 270 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 271 qemu_set_fd_handler(net->backend, NULL, NULL, NULL); 272 file.fd = net->backend; 273 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 274 if (!virtio_queue_enabled(dev, net->dev.vq_index + 275 file.index)) { 276 /* Queue might not be ready for start */ 277 continue; 278 } 279 r = vhost_net_set_backend(&net->dev, &file); 280 if (r < 0) { 281 r = -errno; 282 goto fail; 283 } 284 } 285 } 286 287 if (net->nc->info->load) { 288 r = net->nc->info->load(net->nc); 289 if (r < 0) { 290 goto fail; 291 } 292 } 293 return 0; 294 fail: 295 file.fd = -1; 296 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 297 while (file.index-- > 0) { 298 if (!virtio_queue_enabled(dev, net->dev.vq_index + 299 file.index)) { 300 /* Queue might not be ready for start */ 301 continue; 302 } 303 int r = vhost_net_set_backend(&net->dev, &file); 304 assert(r >= 0); 305 } 306 } 307 if (net->nc->info->poll) { 308 net->nc->info->poll(net->nc, true); 309 } 310 vhost_dev_stop(&net->dev, dev); 311 fail_start: 312 vhost_dev_disable_notifiers(&net->dev, dev); 313 fail_notifiers: 314 return r; 315 } 316 317 static void vhost_net_stop_one(struct vhost_net *net, 318 VirtIODevice *dev) 319 { 320 struct vhost_vring_file file = { .fd = -1 }; 321 322 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 323 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 324 int r = vhost_net_set_backend(&net->dev, &file); 325 assert(r >= 0); 326 } 327 } 328 if (net->nc->info->poll) { 329 net->nc->info->poll(net->nc, true); 330 } 331 vhost_dev_stop(&net->dev, dev); 332 if (net->nc->info->stop) { 333 net->nc->info->stop(net->nc); 334 } 335 vhost_dev_disable_notifiers(&net->dev, dev); 336 } 337 338 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, 339 int data_queue_pairs, int cvq) 340 { 341 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 342 VirtioBusState *vbus = VIRTIO_BUS(qbus); 343 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 344 int total_notifiers = data_queue_pairs * 2 + cvq; 345 VirtIONet *n = VIRTIO_NET(dev); 346 int nvhosts = data_queue_pairs + cvq; 347 struct vhost_net *net; 348 int r, e, i, index_end = data_queue_pairs * 2; 349 NetClientState *peer; 350 351 if (cvq) { 352 index_end += 1; 353 } 354 355 if (!k->set_guest_notifiers) { 356 error_report("binding does not support guest notifiers"); 357 return -ENOSYS; 358 } 359 360 for (i = 0; i < nvhosts; i++) { 361 362 if (i < data_queue_pairs) { 363 peer = qemu_get_peer(ncs, i); 364 } else { /* Control Virtqueue */ 365 peer = qemu_get_peer(ncs, n->max_queue_pairs); 366 } 367 368 net = get_vhost_net(peer); 369 vhost_net_set_vq_index(net, i * 2, index_end); 370 371 /* Suppress the masking guest notifiers on vhost user 372 * because vhost user doesn't interrupt masking/unmasking 373 * properly. 374 */ 375 if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 376 dev->use_guest_notifier_mask = false; 377 } 378 } 379 380 r = k->set_guest_notifiers(qbus->parent, total_notifiers, true); 381 if (r < 0) { 382 error_report("Error binding guest notifier: %d", -r); 383 goto err; 384 } 385 386 for (i = 0; i < nvhosts; i++) { 387 if (i < data_queue_pairs) { 388 peer = qemu_get_peer(ncs, i); 389 } else { 390 peer = qemu_get_peer(ncs, n->max_queue_pairs); 391 } 392 393 if (peer->vring_enable) { 394 /* restore vring enable state */ 395 r = vhost_set_vring_enable(peer, peer->vring_enable); 396 397 if (r < 0) { 398 goto err_start; 399 } 400 } 401 402 r = vhost_net_start_one(get_vhost_net(peer), dev); 403 if (r < 0) { 404 goto err_start; 405 } 406 } 407 408 return 0; 409 410 err_start: 411 while (--i >= 0) { 412 peer = qemu_get_peer(ncs, i < data_queue_pairs ? 413 i : n->max_queue_pairs); 414 vhost_net_stop_one(get_vhost_net(peer), dev); 415 } 416 e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 417 if (e < 0) { 418 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); 419 fflush(stderr); 420 } 421 err: 422 return r; 423 } 424 425 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, 426 int data_queue_pairs, int cvq) 427 { 428 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 429 VirtioBusState *vbus = VIRTIO_BUS(qbus); 430 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 431 VirtIONet *n = VIRTIO_NET(dev); 432 NetClientState *peer; 433 int total_notifiers = data_queue_pairs * 2 + cvq; 434 int nvhosts = data_queue_pairs + cvq; 435 int i, r; 436 437 for (i = 0; i < nvhosts; i++) { 438 if (i < data_queue_pairs) { 439 peer = qemu_get_peer(ncs, i); 440 } else { 441 peer = qemu_get_peer(ncs, n->max_queue_pairs); 442 } 443 vhost_net_stop_one(get_vhost_net(peer), dev); 444 } 445 446 r = k->set_guest_notifiers(qbus->parent, total_notifiers, false); 447 if (r < 0) { 448 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); 449 fflush(stderr); 450 } 451 assert(r >= 0); 452 } 453 454 void vhost_net_cleanup(struct vhost_net *net) 455 { 456 vhost_dev_cleanup(&net->dev); 457 } 458 459 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 460 { 461 const VhostOps *vhost_ops = net->dev.vhost_ops; 462 463 assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 464 assert(vhost_ops->vhost_migration_done); 465 466 return vhost_ops->vhost_migration_done(&net->dev, mac_addr); 467 } 468 469 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 470 { 471 return vhost_virtqueue_pending(&net->dev, idx); 472 } 473 474 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 475 int idx, bool mask) 476 { 477 vhost_virtqueue_mask(&net->dev, dev, idx, mask); 478 } 479 480 VHostNetState *get_vhost_net(NetClientState *nc) 481 { 482 VHostNetState *vhost_net = 0; 483 484 if (!nc) { 485 return 0; 486 } 487 488 switch (nc->info->type) { 489 case NET_CLIENT_DRIVER_TAP: 490 vhost_net = tap_get_vhost_net(nc); 491 break; 492 #ifdef CONFIG_VHOST_NET_USER 493 case NET_CLIENT_DRIVER_VHOST_USER: 494 vhost_net = vhost_user_get_vhost_net(nc); 495 assert(vhost_net); 496 break; 497 #endif 498 #ifdef CONFIG_VHOST_NET_VDPA 499 case NET_CLIENT_DRIVER_VHOST_VDPA: 500 vhost_net = vhost_vdpa_get_vhost_net(nc); 501 assert(vhost_net); 502 break; 503 #endif 504 default: 505 break; 506 } 507 508 return vhost_net; 509 } 510 511 int vhost_set_vring_enable(NetClientState *nc, int enable) 512 { 513 VHostNetState *net = get_vhost_net(nc); 514 const VhostOps *vhost_ops = net->dev.vhost_ops; 515 516 nc->vring_enable = enable; 517 518 if (vhost_ops && vhost_ops->vhost_set_vring_enable) { 519 return vhost_ops->vhost_set_vring_enable(&net->dev, enable); 520 } 521 522 return 0; 523 } 524 525 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) 526 { 527 const VhostOps *vhost_ops = net->dev.vhost_ops; 528 529 if (!vhost_ops->vhost_net_set_mtu) { 530 return 0; 531 } 532 533 return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); 534 } 535 536 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, 537 int vq_index) 538 { 539 VHostNetState *net = get_vhost_net(nc->peer); 540 const VhostOps *vhost_ops = net->dev.vhost_ops; 541 struct vhost_vring_file file = { .fd = -1 }; 542 int idx; 543 544 /* should only be called after backend is connected */ 545 assert(vhost_ops); 546 547 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 548 549 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 550 file.index = idx; 551 int r = vhost_net_set_backend(&net->dev, &file); 552 assert(r >= 0); 553 } 554 555 vhost_virtqueue_stop(&net->dev, 556 vdev, 557 net->dev.vqs + idx, 558 net->dev.vq_index + idx); 559 } 560 561 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, 562 int vq_index) 563 { 564 VHostNetState *net = get_vhost_net(nc->peer); 565 const VhostOps *vhost_ops = net->dev.vhost_ops; 566 struct vhost_vring_file file = { }; 567 int idx, r; 568 569 if (!net->dev.started) { 570 return -EBUSY; 571 } 572 573 /* should only be called after backend is connected */ 574 assert(vhost_ops); 575 576 idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); 577 578 r = vhost_virtqueue_start(&net->dev, 579 vdev, 580 net->dev.vqs + idx, 581 net->dev.vq_index + idx); 582 if (r < 0) { 583 goto err_start; 584 } 585 586 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 587 file.index = idx; 588 file.fd = net->backend; 589 r = vhost_net_set_backend(&net->dev, &file); 590 if (r < 0) { 591 r = -errno; 592 goto err_start; 593 } 594 } 595 596 return 0; 597 598 err_start: 599 error_report("Error when restarting the queue."); 600 601 if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { 602 file.fd = VHOST_FILE_UNBIND; 603 file.index = idx; 604 int r = vhost_net_set_backend(&net->dev, &file); 605 assert(r >= 0); 606 } 607 608 vhost_dev_stop(&net->dev, vdev); 609 610 return r; 611 } 612