1 /* 2 * vhost-net support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "net/net.h" 18 #include "net/tap.h" 19 #include "net/vhost-user.h" 20 21 #include "hw/virtio/virtio-net.h" 22 #include "net/vhost_net.h" 23 #include "qemu/error-report.h" 24 25 26 #ifdef CONFIG_VHOST_NET 27 #include <linux/vhost.h> 28 #include <sys/socket.h> 29 #include <linux/kvm.h> 30 #include <netpacket/packet.h> 31 #include <net/ethernet.h> 32 #include <net/if.h> 33 #include <netinet/in.h> 34 35 36 #include "standard-headers/linux/virtio_ring.h" 37 #include "hw/virtio/vhost.h" 38 #include "hw/virtio/virtio-bus.h" 39 40 struct vhost_net { 41 struct vhost_dev dev; 42 struct vhost_virtqueue vqs[2]; 43 int backend; 44 NetClientState *nc; 45 }; 46 47 /* Features supported by host kernel. */ 48 static const int kernel_feature_bits[] = { 49 VIRTIO_F_NOTIFY_ON_EMPTY, 50 VIRTIO_RING_F_INDIRECT_DESC, 51 VIRTIO_RING_F_EVENT_IDX, 52 VIRTIO_NET_F_MRG_RXBUF, 53 VIRTIO_F_VERSION_1, 54 VHOST_INVALID_FEATURE_BIT 55 }; 56 57 /* Features supported by others. */ 58 static const int user_feature_bits[] = { 59 VIRTIO_F_NOTIFY_ON_EMPTY, 60 VIRTIO_RING_F_INDIRECT_DESC, 61 VIRTIO_RING_F_EVENT_IDX, 62 63 VIRTIO_F_ANY_LAYOUT, 64 VIRTIO_F_VERSION_1, 65 VIRTIO_NET_F_CSUM, 66 VIRTIO_NET_F_GUEST_CSUM, 67 VIRTIO_NET_F_GSO, 68 VIRTIO_NET_F_GUEST_TSO4, 69 VIRTIO_NET_F_GUEST_TSO6, 70 VIRTIO_NET_F_GUEST_ECN, 71 VIRTIO_NET_F_GUEST_UFO, 72 VIRTIO_NET_F_HOST_TSO4, 73 VIRTIO_NET_F_HOST_TSO6, 74 VIRTIO_NET_F_HOST_ECN, 75 VIRTIO_NET_F_HOST_UFO, 76 VIRTIO_NET_F_MRG_RXBUF, 77 78 /* This bit implies RARP isn't sent by QEMU out of band */ 79 VIRTIO_NET_F_GUEST_ANNOUNCE, 80 81 VIRTIO_NET_F_MQ, 82 83 VHOST_INVALID_FEATURE_BIT 84 }; 85 86 static const int *vhost_net_get_feature_bits(struct vhost_net *net) 87 { 88 const int *feature_bits = 0; 89 90 switch (net->nc->info->type) { 91 case NET_CLIENT_OPTIONS_KIND_TAP: 92 feature_bits = kernel_feature_bits; 93 break; 94 case NET_CLIENT_OPTIONS_KIND_VHOST_USER: 95 feature_bits = user_feature_bits; 96 break; 97 default: 98 error_report("Feature bits not defined for this type: %d", 99 net->nc->info->type); 100 break; 101 } 102 103 return feature_bits; 104 } 105 106 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 107 { 108 return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), 109 features); 110 } 111 112 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 113 { 114 net->dev.acked_features = net->dev.backend_features; 115 vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); 116 } 117 118 uint64_t vhost_net_get_max_queues(VHostNetState *net) 119 { 120 return net->dev.max_queues; 121 } 122 123 uint64_t vhost_net_get_acked_features(VHostNetState *net) 124 { 125 return net->dev.acked_features; 126 } 127 128 static int vhost_net_get_fd(NetClientState *backend) 129 { 130 switch (backend->info->type) { 131 case NET_CLIENT_OPTIONS_KIND_TAP: 132 return tap_get_fd(backend); 133 default: 134 fprintf(stderr, "vhost-net requires tap backend\n"); 135 return -EBADFD; 136 } 137 } 138 139 struct vhost_net *vhost_net_init(VhostNetOptions *options) 140 { 141 int r; 142 bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; 143 struct vhost_net *net = g_malloc(sizeof *net); 144 uint64_t features = 0; 145 146 if (!options->net_backend) { 147 fprintf(stderr, "vhost-net requires net backend to be setup\n"); 148 goto fail; 149 } 150 net->nc = options->net_backend; 151 152 net->dev.max_queues = 1; 153 net->dev.nvqs = 2; 154 net->dev.vqs = net->vqs; 155 156 if (backend_kernel) { 157 r = vhost_net_get_fd(options->net_backend); 158 if (r < 0) { 159 goto fail; 160 } 161 net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) 162 ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); 163 net->backend = r; 164 net->dev.protocol_features = 0; 165 } else { 166 net->dev.backend_features = 0; 167 net->dev.protocol_features = 0; 168 net->backend = -1; 169 170 /* vhost-user needs vq_index to initiate a specific queue pair */ 171 net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; 172 } 173 174 r = vhost_dev_init(&net->dev, options->opaque, 175 options->backend_type, options->busyloop_timeout); 176 if (r < 0) { 177 goto fail; 178 } 179 if (backend_kernel) { 180 if (!qemu_has_vnet_hdr_len(options->net_backend, 181 sizeof(struct virtio_net_hdr_mrg_rxbuf))) { 182 net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); 183 } 184 if (~net->dev.features & net->dev.backend_features) { 185 fprintf(stderr, "vhost lacks feature mask %" PRIu64 186 " for backend\n", 187 (uint64_t)(~net->dev.features & net->dev.backend_features)); 188 vhost_dev_cleanup(&net->dev); 189 goto fail; 190 } 191 } 192 193 /* Set sane init value. Override when guest acks. */ 194 if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) { 195 features = vhost_user_get_acked_features(net->nc); 196 if (~net->dev.features & features) { 197 fprintf(stderr, "vhost lacks feature mask %" PRIu64 198 " for backend\n", 199 (uint64_t)(~net->dev.features & features)); 200 vhost_dev_cleanup(&net->dev); 201 goto fail; 202 } 203 } 204 205 vhost_net_ack_features(net, features); 206 207 return net; 208 fail: 209 g_free(net); 210 return NULL; 211 } 212 213 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index) 214 { 215 net->dev.vq_index = vq_index; 216 } 217 218 static int vhost_net_start_one(struct vhost_net *net, 219 VirtIODevice *dev) 220 { 221 struct vhost_vring_file file = { }; 222 int r; 223 224 net->dev.nvqs = 2; 225 net->dev.vqs = net->vqs; 226 227 r = vhost_dev_enable_notifiers(&net->dev, dev); 228 if (r < 0) { 229 goto fail_notifiers; 230 } 231 232 r = vhost_dev_start(&net->dev, dev); 233 if (r < 0) { 234 goto fail_start; 235 } 236 237 if (net->nc->info->poll) { 238 net->nc->info->poll(net->nc, false); 239 } 240 241 if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { 242 qemu_set_fd_handler(net->backend, NULL, NULL, NULL); 243 file.fd = net->backend; 244 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 245 const VhostOps *vhost_ops = net->dev.vhost_ops; 246 r = vhost_ops->vhost_net_set_backend(&net->dev, &file); 247 if (r < 0) { 248 r = -errno; 249 goto fail; 250 } 251 } 252 } 253 return 0; 254 fail: 255 file.fd = -1; 256 if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { 257 while (file.index-- > 0) { 258 const VhostOps *vhost_ops = net->dev.vhost_ops; 259 int r = vhost_ops->vhost_net_set_backend(&net->dev, &file); 260 assert(r >= 0); 261 } 262 } 263 if (net->nc->info->poll) { 264 net->nc->info->poll(net->nc, true); 265 } 266 vhost_dev_stop(&net->dev, dev); 267 fail_start: 268 vhost_dev_disable_notifiers(&net->dev, dev); 269 fail_notifiers: 270 return r; 271 } 272 273 static void vhost_net_stop_one(struct vhost_net *net, 274 VirtIODevice *dev) 275 { 276 struct vhost_vring_file file = { .fd = -1 }; 277 278 if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { 279 for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { 280 const VhostOps *vhost_ops = net->dev.vhost_ops; 281 int r = vhost_ops->vhost_net_set_backend(&net->dev, &file); 282 assert(r >= 0); 283 } 284 } 285 if (net->nc->info->poll) { 286 net->nc->info->poll(net->nc, true); 287 } 288 vhost_dev_stop(&net->dev, dev); 289 vhost_dev_disable_notifiers(&net->dev, dev); 290 } 291 292 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, 293 int total_queues) 294 { 295 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 296 VirtioBusState *vbus = VIRTIO_BUS(qbus); 297 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 298 int r, e, i; 299 300 if (!k->set_guest_notifiers) { 301 error_report("binding does not support guest notifiers"); 302 return -ENOSYS; 303 } 304 305 for (i = 0; i < total_queues; i++) { 306 struct vhost_net *net; 307 308 net = get_vhost_net(ncs[i].peer); 309 vhost_net_set_vq_index(net, i * 2); 310 311 /* Suppress the masking guest notifiers on vhost user 312 * because vhost user doesn't interrupt masking/unmasking 313 * properly. 314 */ 315 if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) { 316 dev->use_guest_notifier_mask = false; 317 } 318 } 319 320 r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true); 321 if (r < 0) { 322 error_report("Error binding guest notifier: %d", -r); 323 goto err; 324 } 325 326 for (i = 0; i < total_queues; i++) { 327 r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev); 328 329 if (r < 0) { 330 goto err_start; 331 } 332 333 if (ncs[i].peer->vring_enable) { 334 /* restore vring enable state */ 335 r = vhost_set_vring_enable(ncs[i].peer, ncs[i].peer->vring_enable); 336 337 if (r < 0) { 338 goto err_start; 339 } 340 } 341 } 342 343 return 0; 344 345 err_start: 346 while (--i >= 0) { 347 vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); 348 } 349 e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false); 350 if (e < 0) { 351 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); 352 fflush(stderr); 353 } 354 err: 355 return r; 356 } 357 358 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, 359 int total_queues) 360 { 361 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); 362 VirtioBusState *vbus = VIRTIO_BUS(qbus); 363 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 364 int i, r; 365 366 for (i = 0; i < total_queues; i++) { 367 vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); 368 } 369 370 r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false); 371 if (r < 0) { 372 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); 373 fflush(stderr); 374 } 375 assert(r >= 0); 376 } 377 378 void vhost_net_cleanup(struct vhost_net *net) 379 { 380 vhost_dev_cleanup(&net->dev); 381 g_free(net); 382 } 383 384 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 385 { 386 const VhostOps *vhost_ops = net->dev.vhost_ops; 387 int r = -1; 388 389 if (vhost_ops->vhost_migration_done) { 390 r = vhost_ops->vhost_migration_done(&net->dev, mac_addr); 391 } 392 393 return r; 394 } 395 396 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 397 { 398 return vhost_virtqueue_pending(&net->dev, idx); 399 } 400 401 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 402 int idx, bool mask) 403 { 404 vhost_virtqueue_mask(&net->dev, dev, idx, mask); 405 } 406 407 VHostNetState *get_vhost_net(NetClientState *nc) 408 { 409 VHostNetState *vhost_net = 0; 410 411 if (!nc) { 412 return 0; 413 } 414 415 switch (nc->info->type) { 416 case NET_CLIENT_OPTIONS_KIND_TAP: 417 vhost_net = tap_get_vhost_net(nc); 418 break; 419 case NET_CLIENT_OPTIONS_KIND_VHOST_USER: 420 vhost_net = vhost_user_get_vhost_net(nc); 421 break; 422 default: 423 break; 424 } 425 426 return vhost_net; 427 } 428 429 int vhost_set_vring_enable(NetClientState *nc, int enable) 430 { 431 VHostNetState *net = get_vhost_net(nc); 432 const VhostOps *vhost_ops; 433 434 nc->vring_enable = enable; 435 436 if (!net) { 437 return 0; 438 } 439 440 vhost_ops = net->dev.vhost_ops; 441 if (vhost_ops->vhost_set_vring_enable) { 442 return vhost_ops->vhost_set_vring_enable(&net->dev, enable); 443 } 444 445 return 0; 446 } 447 448 #else 449 uint64_t vhost_net_get_max_queues(VHostNetState *net) 450 { 451 return 1; 452 } 453 454 struct vhost_net *vhost_net_init(VhostNetOptions *options) 455 { 456 error_report("vhost-net support is not compiled in"); 457 return NULL; 458 } 459 460 int vhost_net_start(VirtIODevice *dev, 461 NetClientState *ncs, 462 int total_queues) 463 { 464 return -ENOSYS; 465 } 466 void vhost_net_stop(VirtIODevice *dev, 467 NetClientState *ncs, 468 int total_queues) 469 { 470 } 471 472 void vhost_net_cleanup(struct vhost_net *net) 473 { 474 } 475 476 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) 477 { 478 return features; 479 } 480 481 void vhost_net_ack_features(struct vhost_net *net, uint64_t features) 482 { 483 } 484 485 uint64_t vhost_net_get_acked_features(VHostNetState *net) 486 { 487 return 0; 488 } 489 490 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) 491 { 492 return false; 493 } 494 495 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, 496 int idx, bool mask) 497 { 498 } 499 500 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) 501 { 502 return -1; 503 } 504 505 VHostNetState *get_vhost_net(NetClientState *nc) 506 { 507 return 0; 508 } 509 510 int vhost_set_vring_enable(NetClientState *nc, int enable) 511 { 512 return 0; 513 } 514 #endif 515