1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/iov.h" 16 #include "hw/virtio/virtio.h" 17 #include "net/net.h" 18 #include "net/checksum.h" 19 #include "net/tap.h" 20 #include "qemu/error-report.h" 21 #include "qemu/timer.h" 22 #include "hw/virtio/virtio-net.h" 23 #include "net/vhost_net.h" 24 #include "hw/virtio/virtio-bus.h" 25 #include "qapi/error.h" 26 #include "qapi/qapi-events-net.h" 27 #include "hw/virtio/virtio-access.h" 28 #include "migration/misc.h" 29 #include "standard-headers/linux/ethtool.h" 30 31 #define VIRTIO_NET_VM_VERSION 11 32 33 #define MAC_TABLE_ENTRIES 64 34 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 35 36 /* previously fixed value */ 37 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 38 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 39 40 /* for now, only allow larger queues; with virtio-1, guest can downsize */ 41 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 42 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 43 44 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 45 46 #define VIRTIO_NET_TCP_FLAG 0x3F 47 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 48 49 /* IPv4 max payload, 16 bits in the header */ 50 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 51 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 52 53 /* header length value in ip header without option */ 54 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 55 56 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 57 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 58 59 /* Purge coalesced packets timer interval, This value affects the performance 60 a lot, and should be tuned carefully, '300000'(300us) is the recommended 61 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 62 tso/gso/gro 'off'. */ 63 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 64 65 /* temporary until standard header include it */ 66 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO) 67 68 #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */ 69 #define VIRTIO_NET_F_RSC_EXT 61 70 71 static inline __virtio16 *virtio_net_rsc_ext_num_packets( 72 struct virtio_net_hdr *hdr) 73 { 74 return &hdr->csum_start; 75 } 76 77 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( 78 struct virtio_net_hdr *hdr) 79 { 80 return &hdr->csum_offset; 81 } 82 83 #endif 84 85 static VirtIOFeature feature_sizes[] = { 86 {.flags = 1ULL << VIRTIO_NET_F_MAC, 87 .end = virtio_endof(struct virtio_net_config, mac)}, 88 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 89 .end = virtio_endof(struct virtio_net_config, status)}, 90 {.flags = 1ULL << VIRTIO_NET_F_MQ, 91 .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)}, 92 {.flags = 1ULL << VIRTIO_NET_F_MTU, 93 .end = virtio_endof(struct virtio_net_config, mtu)}, 94 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 95 .end = virtio_endof(struct virtio_net_config, duplex)}, 96 {} 97 }; 98 99 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 100 { 101 VirtIONet *n = qemu_get_nic_opaque(nc); 102 103 return &n->vqs[nc->queue_index]; 104 } 105 106 static int vq2q(int queue_index) 107 { 108 return queue_index / 2; 109 } 110 111 /* TODO 112 * - we could suppress RX interrupt if we were so inclined. 113 */ 114 115 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 116 { 117 VirtIONet *n = VIRTIO_NET(vdev); 118 struct virtio_net_config netcfg; 119 120 virtio_stw_p(vdev, &netcfg.status, n->status); 121 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); 122 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 123 memcpy(netcfg.mac, n->mac, ETH_ALEN); 124 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 125 netcfg.duplex = n->net_conf.duplex; 126 memcpy(config, &netcfg, n->config_size); 127 } 128 129 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 130 { 131 VirtIONet *n = VIRTIO_NET(vdev); 132 struct virtio_net_config netcfg = {}; 133 134 memcpy(&netcfg, config, n->config_size); 135 136 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 137 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 138 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 139 memcpy(n->mac, netcfg.mac, ETH_ALEN); 140 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 141 } 142 } 143 144 static bool virtio_net_started(VirtIONet *n, uint8_t status) 145 { 146 VirtIODevice *vdev = VIRTIO_DEVICE(n); 147 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 148 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 149 } 150 151 static void virtio_net_announce_timer(void *opaque) 152 { 153 VirtIONet *n = opaque; 154 VirtIODevice *vdev = VIRTIO_DEVICE(n); 155 156 n->announce_counter--; 157 n->status |= VIRTIO_NET_S_ANNOUNCE; 158 virtio_notify_config(vdev); 159 } 160 161 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 162 { 163 VirtIODevice *vdev = VIRTIO_DEVICE(n); 164 NetClientState *nc = qemu_get_queue(n->nic); 165 int queues = n->multiqueue ? n->max_queues : 1; 166 167 if (!get_vhost_net(nc->peer)) { 168 return; 169 } 170 171 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 172 !!n->vhost_started) { 173 return; 174 } 175 if (!n->vhost_started) { 176 int r, i; 177 178 if (n->needs_vnet_hdr_swap) { 179 error_report("backend does not support %s vnet headers; " 180 "falling back on userspace virtio", 181 virtio_is_big_endian(vdev) ? "BE" : "LE"); 182 return; 183 } 184 185 /* Any packets outstanding? Purge them to avoid touching rings 186 * when vhost is running. 187 */ 188 for (i = 0; i < queues; i++) { 189 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 190 191 /* Purge both directions: TX and RX. */ 192 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 193 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 194 } 195 196 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 197 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 198 if (r < 0) { 199 error_report("%uBytes MTU not supported by the backend", 200 n->net_conf.mtu); 201 202 return; 203 } 204 } 205 206 n->vhost_started = 1; 207 r = vhost_net_start(vdev, n->nic->ncs, queues); 208 if (r < 0) { 209 error_report("unable to start vhost net: %d: " 210 "falling back on userspace virtio", -r); 211 n->vhost_started = 0; 212 } 213 } else { 214 vhost_net_stop(vdev, n->nic->ncs, queues); 215 n->vhost_started = 0; 216 } 217 } 218 219 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 220 NetClientState *peer, 221 bool enable) 222 { 223 if (virtio_is_big_endian(vdev)) { 224 return qemu_set_vnet_be(peer, enable); 225 } else { 226 return qemu_set_vnet_le(peer, enable); 227 } 228 } 229 230 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 231 int queues, bool enable) 232 { 233 int i; 234 235 for (i = 0; i < queues; i++) { 236 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 237 enable) { 238 while (--i >= 0) { 239 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 240 } 241 242 return true; 243 } 244 } 245 246 return false; 247 } 248 249 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 250 { 251 VirtIODevice *vdev = VIRTIO_DEVICE(n); 252 int queues = n->multiqueue ? n->max_queues : 1; 253 254 if (virtio_net_started(n, status)) { 255 /* Before using the device, we tell the network backend about the 256 * endianness to use when parsing vnet headers. If the backend 257 * can't do it, we fallback onto fixing the headers in the core 258 * virtio-net code. 259 */ 260 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 261 queues, true); 262 } else if (virtio_net_started(n, vdev->status)) { 263 /* After using the device, we need to reset the network backend to 264 * the default (guest native endianness), otherwise the guest may 265 * lose network connectivity if it is rebooted into a different 266 * endianness. 267 */ 268 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false); 269 } 270 } 271 272 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 273 { 274 unsigned int dropped = virtqueue_drop_all(vq); 275 if (dropped) { 276 virtio_notify(vdev, vq); 277 } 278 } 279 280 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 281 { 282 VirtIONet *n = VIRTIO_NET(vdev); 283 VirtIONetQueue *q; 284 int i; 285 uint8_t queue_status; 286 287 virtio_net_vnet_endian_status(n, status); 288 virtio_net_vhost_status(n, status); 289 290 for (i = 0; i < n->max_queues; i++) { 291 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 292 bool queue_started; 293 q = &n->vqs[i]; 294 295 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) { 296 queue_status = 0; 297 } else { 298 queue_status = status; 299 } 300 queue_started = 301 virtio_net_started(n, queue_status) && !n->vhost_started; 302 303 if (queue_started) { 304 qemu_flush_queued_packets(ncs); 305 } 306 307 if (!q->tx_waiting) { 308 continue; 309 } 310 311 if (queue_started) { 312 if (q->tx_timer) { 313 timer_mod(q->tx_timer, 314 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 315 } else { 316 qemu_bh_schedule(q->tx_bh); 317 } 318 } else { 319 if (q->tx_timer) { 320 timer_del(q->tx_timer); 321 } else { 322 qemu_bh_cancel(q->tx_bh); 323 } 324 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 325 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 326 vdev->vm_running) { 327 /* if tx is waiting we are likely have some packets in tx queue 328 * and disabled notification */ 329 q->tx_waiting = 0; 330 virtio_queue_set_notification(q->tx_vq, 1); 331 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 332 } 333 } 334 } 335 } 336 337 static void virtio_net_set_link_status(NetClientState *nc) 338 { 339 VirtIONet *n = qemu_get_nic_opaque(nc); 340 VirtIODevice *vdev = VIRTIO_DEVICE(n); 341 uint16_t old_status = n->status; 342 343 if (nc->link_down) 344 n->status &= ~VIRTIO_NET_S_LINK_UP; 345 else 346 n->status |= VIRTIO_NET_S_LINK_UP; 347 348 if (n->status != old_status) 349 virtio_notify_config(vdev); 350 351 virtio_net_set_status(vdev, vdev->status); 352 } 353 354 static void rxfilter_notify(NetClientState *nc) 355 { 356 VirtIONet *n = qemu_get_nic_opaque(nc); 357 358 if (nc->rxfilter_notify_enabled) { 359 gchar *path = object_get_canonical_path(OBJECT(n->qdev)); 360 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 361 n->netclient_name, path); 362 g_free(path); 363 364 /* disable event notification to avoid events flooding */ 365 nc->rxfilter_notify_enabled = 0; 366 } 367 } 368 369 static intList *get_vlan_table(VirtIONet *n) 370 { 371 intList *list, *entry; 372 int i, j; 373 374 list = NULL; 375 for (i = 0; i < MAX_VLAN >> 5; i++) { 376 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 377 if (n->vlans[i] & (1U << j)) { 378 entry = g_malloc0(sizeof(*entry)); 379 entry->value = (i << 5) + j; 380 entry->next = list; 381 list = entry; 382 } 383 } 384 } 385 386 return list; 387 } 388 389 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 390 { 391 VirtIONet *n = qemu_get_nic_opaque(nc); 392 VirtIODevice *vdev = VIRTIO_DEVICE(n); 393 RxFilterInfo *info; 394 strList *str_list, *entry; 395 int i; 396 397 info = g_malloc0(sizeof(*info)); 398 info->name = g_strdup(nc->name); 399 info->promiscuous = n->promisc; 400 401 if (n->nouni) { 402 info->unicast = RX_STATE_NONE; 403 } else if (n->alluni) { 404 info->unicast = RX_STATE_ALL; 405 } else { 406 info->unicast = RX_STATE_NORMAL; 407 } 408 409 if (n->nomulti) { 410 info->multicast = RX_STATE_NONE; 411 } else if (n->allmulti) { 412 info->multicast = RX_STATE_ALL; 413 } else { 414 info->multicast = RX_STATE_NORMAL; 415 } 416 417 info->broadcast_allowed = n->nobcast; 418 info->multicast_overflow = n->mac_table.multi_overflow; 419 info->unicast_overflow = n->mac_table.uni_overflow; 420 421 info->main_mac = qemu_mac_strdup_printf(n->mac); 422 423 str_list = NULL; 424 for (i = 0; i < n->mac_table.first_multi; i++) { 425 entry = g_malloc0(sizeof(*entry)); 426 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); 427 entry->next = str_list; 428 str_list = entry; 429 } 430 info->unicast_table = str_list; 431 432 str_list = NULL; 433 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 434 entry = g_malloc0(sizeof(*entry)); 435 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); 436 entry->next = str_list; 437 str_list = entry; 438 } 439 info->multicast_table = str_list; 440 info->vlan_table = get_vlan_table(n); 441 442 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 443 info->vlan = RX_STATE_ALL; 444 } else if (!info->vlan_table) { 445 info->vlan = RX_STATE_NONE; 446 } else { 447 info->vlan = RX_STATE_NORMAL; 448 } 449 450 /* enable event notification after query */ 451 nc->rxfilter_notify_enabled = 1; 452 453 return info; 454 } 455 456 static void virtio_net_reset(VirtIODevice *vdev) 457 { 458 VirtIONet *n = VIRTIO_NET(vdev); 459 int i; 460 461 /* Reset back to compatibility mode */ 462 n->promisc = 1; 463 n->allmulti = 0; 464 n->alluni = 0; 465 n->nomulti = 0; 466 n->nouni = 0; 467 n->nobcast = 0; 468 /* multiqueue is disabled by default */ 469 n->curr_queues = 1; 470 timer_del(n->announce_timer); 471 n->announce_counter = 0; 472 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 473 474 /* Flush any MAC and VLAN filter table state */ 475 n->mac_table.in_use = 0; 476 n->mac_table.first_multi = 0; 477 n->mac_table.multi_overflow = 0; 478 n->mac_table.uni_overflow = 0; 479 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 480 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 481 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 482 memset(n->vlans, 0, MAX_VLAN >> 3); 483 484 /* Flush any async TX */ 485 for (i = 0; i < n->max_queues; i++) { 486 NetClientState *nc = qemu_get_subqueue(n->nic, i); 487 488 if (nc->peer) { 489 qemu_flush_or_purge_queued_packets(nc->peer, true); 490 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 491 } 492 } 493 } 494 495 static void peer_test_vnet_hdr(VirtIONet *n) 496 { 497 NetClientState *nc = qemu_get_queue(n->nic); 498 if (!nc->peer) { 499 return; 500 } 501 502 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 503 } 504 505 static int peer_has_vnet_hdr(VirtIONet *n) 506 { 507 return n->has_vnet_hdr; 508 } 509 510 static int peer_has_ufo(VirtIONet *n) 511 { 512 if (!peer_has_vnet_hdr(n)) 513 return 0; 514 515 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 516 517 return n->has_ufo; 518 } 519 520 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 521 int version_1) 522 { 523 int i; 524 NetClientState *nc; 525 526 n->mergeable_rx_bufs = mergeable_rx_bufs; 527 528 if (version_1) { 529 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 530 } else { 531 n->guest_hdr_len = n->mergeable_rx_bufs ? 532 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 533 sizeof(struct virtio_net_hdr); 534 } 535 536 for (i = 0; i < n->max_queues; i++) { 537 nc = qemu_get_subqueue(n->nic, i); 538 539 if (peer_has_vnet_hdr(n) && 540 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 541 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 542 n->host_hdr_len = n->guest_hdr_len; 543 } 544 } 545 } 546 547 static int virtio_net_max_tx_queue_size(VirtIONet *n) 548 { 549 NetClientState *peer = n->nic_conf.peers.ncs[0]; 550 551 /* 552 * Backends other than vhost-user don't support max queue size. 553 */ 554 if (!peer) { 555 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 556 } 557 558 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) { 559 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 560 } 561 562 return VIRTQUEUE_MAX_SIZE; 563 } 564 565 static int peer_attach(VirtIONet *n, int index) 566 { 567 NetClientState *nc = qemu_get_subqueue(n->nic, index); 568 569 if (!nc->peer) { 570 return 0; 571 } 572 573 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 574 vhost_set_vring_enable(nc->peer, 1); 575 } 576 577 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 578 return 0; 579 } 580 581 if (n->max_queues == 1) { 582 return 0; 583 } 584 585 return tap_enable(nc->peer); 586 } 587 588 static int peer_detach(VirtIONet *n, int index) 589 { 590 NetClientState *nc = qemu_get_subqueue(n->nic, index); 591 592 if (!nc->peer) { 593 return 0; 594 } 595 596 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 597 vhost_set_vring_enable(nc->peer, 0); 598 } 599 600 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 601 return 0; 602 } 603 604 return tap_disable(nc->peer); 605 } 606 607 static void virtio_net_set_queues(VirtIONet *n) 608 { 609 int i; 610 int r; 611 612 if (n->nic->peer_deleted) { 613 return; 614 } 615 616 for (i = 0; i < n->max_queues; i++) { 617 if (i < n->curr_queues) { 618 r = peer_attach(n, i); 619 assert(!r); 620 } else { 621 r = peer_detach(n, i); 622 assert(!r); 623 } 624 } 625 } 626 627 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 628 629 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 630 Error **errp) 631 { 632 VirtIONet *n = VIRTIO_NET(vdev); 633 NetClientState *nc = qemu_get_queue(n->nic); 634 635 /* Firstly sync all virtio-net possible supported features */ 636 features |= n->host_features; 637 638 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 639 640 if (!peer_has_vnet_hdr(n)) { 641 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 642 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 643 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 644 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 645 646 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 647 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 648 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 649 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 650 } 651 652 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 653 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 654 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 655 } 656 657 if (!get_vhost_net(nc->peer)) { 658 return features; 659 } 660 661 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 662 vdev->backend_features = features; 663 664 if (n->mtu_bypass_backend && 665 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 666 features |= (1ULL << VIRTIO_NET_F_MTU); 667 } 668 669 return features; 670 } 671 672 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 673 { 674 uint64_t features = 0; 675 676 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 677 * but also these: */ 678 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 679 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 680 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 681 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 682 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 683 684 return features; 685 } 686 687 static void virtio_net_apply_guest_offloads(VirtIONet *n) 688 { 689 qemu_set_offload(qemu_get_queue(n->nic)->peer, 690 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 691 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 692 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 693 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 694 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 695 } 696 697 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 698 { 699 static const uint64_t guest_offloads_mask = 700 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 701 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 702 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 703 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 704 (1ULL << VIRTIO_NET_F_GUEST_UFO); 705 706 return guest_offloads_mask & features; 707 } 708 709 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 710 { 711 VirtIODevice *vdev = VIRTIO_DEVICE(n); 712 return virtio_net_guest_offloads_by_features(vdev->guest_features); 713 } 714 715 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 716 { 717 VirtIONet *n = VIRTIO_NET(vdev); 718 int i; 719 720 if (n->mtu_bypass_backend && 721 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 722 features &= ~(1ULL << VIRTIO_NET_F_MTU); 723 } 724 725 virtio_net_set_multiqueue(n, 726 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 727 728 virtio_net_set_mrg_rx_bufs(n, 729 virtio_has_feature(features, 730 VIRTIO_NET_F_MRG_RXBUF), 731 virtio_has_feature(features, 732 VIRTIO_F_VERSION_1)); 733 734 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 735 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 736 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 737 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 738 739 if (n->has_vnet_hdr) { 740 n->curr_guest_offloads = 741 virtio_net_guest_offloads_by_features(features); 742 virtio_net_apply_guest_offloads(n); 743 } 744 745 for (i = 0; i < n->max_queues; i++) { 746 NetClientState *nc = qemu_get_subqueue(n->nic, i); 747 748 if (!get_vhost_net(nc->peer)) { 749 continue; 750 } 751 vhost_net_ack_features(get_vhost_net(nc->peer), features); 752 } 753 754 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 755 memset(n->vlans, 0, MAX_VLAN >> 3); 756 } else { 757 memset(n->vlans, 0xff, MAX_VLAN >> 3); 758 } 759 } 760 761 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 762 struct iovec *iov, unsigned int iov_cnt) 763 { 764 uint8_t on; 765 size_t s; 766 NetClientState *nc = qemu_get_queue(n->nic); 767 768 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 769 if (s != sizeof(on)) { 770 return VIRTIO_NET_ERR; 771 } 772 773 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 774 n->promisc = on; 775 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 776 n->allmulti = on; 777 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 778 n->alluni = on; 779 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 780 n->nomulti = on; 781 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 782 n->nouni = on; 783 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 784 n->nobcast = on; 785 } else { 786 return VIRTIO_NET_ERR; 787 } 788 789 rxfilter_notify(nc); 790 791 return VIRTIO_NET_OK; 792 } 793 794 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 795 struct iovec *iov, unsigned int iov_cnt) 796 { 797 VirtIODevice *vdev = VIRTIO_DEVICE(n); 798 uint64_t offloads; 799 size_t s; 800 801 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 802 return VIRTIO_NET_ERR; 803 } 804 805 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 806 if (s != sizeof(offloads)) { 807 return VIRTIO_NET_ERR; 808 } 809 810 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 811 uint64_t supported_offloads; 812 813 offloads = virtio_ldq_p(vdev, &offloads); 814 815 if (!n->has_vnet_hdr) { 816 return VIRTIO_NET_ERR; 817 } 818 819 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 820 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 821 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 822 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 823 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 824 825 supported_offloads = virtio_net_supported_guest_offloads(n); 826 if (offloads & ~supported_offloads) { 827 return VIRTIO_NET_ERR; 828 } 829 830 n->curr_guest_offloads = offloads; 831 virtio_net_apply_guest_offloads(n); 832 833 return VIRTIO_NET_OK; 834 } else { 835 return VIRTIO_NET_ERR; 836 } 837 } 838 839 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 840 struct iovec *iov, unsigned int iov_cnt) 841 { 842 VirtIODevice *vdev = VIRTIO_DEVICE(n); 843 struct virtio_net_ctrl_mac mac_data; 844 size_t s; 845 NetClientState *nc = qemu_get_queue(n->nic); 846 847 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 848 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 849 return VIRTIO_NET_ERR; 850 } 851 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 852 assert(s == sizeof(n->mac)); 853 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 854 rxfilter_notify(nc); 855 856 return VIRTIO_NET_OK; 857 } 858 859 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 860 return VIRTIO_NET_ERR; 861 } 862 863 int in_use = 0; 864 int first_multi = 0; 865 uint8_t uni_overflow = 0; 866 uint8_t multi_overflow = 0; 867 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 868 869 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 870 sizeof(mac_data.entries)); 871 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 872 if (s != sizeof(mac_data.entries)) { 873 goto error; 874 } 875 iov_discard_front(&iov, &iov_cnt, s); 876 877 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 878 goto error; 879 } 880 881 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 882 s = iov_to_buf(iov, iov_cnt, 0, macs, 883 mac_data.entries * ETH_ALEN); 884 if (s != mac_data.entries * ETH_ALEN) { 885 goto error; 886 } 887 in_use += mac_data.entries; 888 } else { 889 uni_overflow = 1; 890 } 891 892 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 893 894 first_multi = in_use; 895 896 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 897 sizeof(mac_data.entries)); 898 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 899 if (s != sizeof(mac_data.entries)) { 900 goto error; 901 } 902 903 iov_discard_front(&iov, &iov_cnt, s); 904 905 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 906 goto error; 907 } 908 909 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 910 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 911 mac_data.entries * ETH_ALEN); 912 if (s != mac_data.entries * ETH_ALEN) { 913 goto error; 914 } 915 in_use += mac_data.entries; 916 } else { 917 multi_overflow = 1; 918 } 919 920 n->mac_table.in_use = in_use; 921 n->mac_table.first_multi = first_multi; 922 n->mac_table.uni_overflow = uni_overflow; 923 n->mac_table.multi_overflow = multi_overflow; 924 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 925 g_free(macs); 926 rxfilter_notify(nc); 927 928 return VIRTIO_NET_OK; 929 930 error: 931 g_free(macs); 932 return VIRTIO_NET_ERR; 933 } 934 935 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 936 struct iovec *iov, unsigned int iov_cnt) 937 { 938 VirtIODevice *vdev = VIRTIO_DEVICE(n); 939 uint16_t vid; 940 size_t s; 941 NetClientState *nc = qemu_get_queue(n->nic); 942 943 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 944 vid = virtio_lduw_p(vdev, &vid); 945 if (s != sizeof(vid)) { 946 return VIRTIO_NET_ERR; 947 } 948 949 if (vid >= MAX_VLAN) 950 return VIRTIO_NET_ERR; 951 952 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 953 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 954 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 955 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 956 else 957 return VIRTIO_NET_ERR; 958 959 rxfilter_notify(nc); 960 961 return VIRTIO_NET_OK; 962 } 963 964 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 965 struct iovec *iov, unsigned int iov_cnt) 966 { 967 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 968 n->status & VIRTIO_NET_S_ANNOUNCE) { 969 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 970 if (n->announce_counter) { 971 timer_mod(n->announce_timer, 972 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 973 self_announce_delay(n->announce_counter)); 974 } 975 return VIRTIO_NET_OK; 976 } else { 977 return VIRTIO_NET_ERR; 978 } 979 } 980 981 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 982 struct iovec *iov, unsigned int iov_cnt) 983 { 984 VirtIODevice *vdev = VIRTIO_DEVICE(n); 985 struct virtio_net_ctrl_mq mq; 986 size_t s; 987 uint16_t queues; 988 989 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 990 if (s != sizeof(mq)) { 991 return VIRTIO_NET_ERR; 992 } 993 994 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 995 return VIRTIO_NET_ERR; 996 } 997 998 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 999 1000 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1001 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1002 queues > n->max_queues || 1003 !n->multiqueue) { 1004 return VIRTIO_NET_ERR; 1005 } 1006 1007 n->curr_queues = queues; 1008 /* stop the backend before changing the number of queues to avoid handling a 1009 * disabled queue */ 1010 virtio_net_set_status(vdev, vdev->status); 1011 virtio_net_set_queues(n); 1012 1013 return VIRTIO_NET_OK; 1014 } 1015 1016 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1017 { 1018 VirtIONet *n = VIRTIO_NET(vdev); 1019 struct virtio_net_ctrl_hdr ctrl; 1020 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1021 VirtQueueElement *elem; 1022 size_t s; 1023 struct iovec *iov, *iov2; 1024 unsigned int iov_cnt; 1025 1026 for (;;) { 1027 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1028 if (!elem) { 1029 break; 1030 } 1031 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || 1032 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { 1033 virtio_error(vdev, "virtio-net ctrl missing headers"); 1034 virtqueue_detach_element(vq, elem, 0); 1035 g_free(elem); 1036 break; 1037 } 1038 1039 iov_cnt = elem->out_num; 1040 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); 1041 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); 1042 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); 1043 if (s != sizeof(ctrl)) { 1044 status = VIRTIO_NET_ERR; 1045 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1046 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); 1047 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1048 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); 1049 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1050 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); 1051 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1052 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); 1053 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1054 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); 1055 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1056 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); 1057 } 1058 1059 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); 1060 assert(s == sizeof(status)); 1061 1062 virtqueue_push(vq, elem, sizeof(status)); 1063 virtio_notify(vdev, vq); 1064 g_free(iov2); 1065 g_free(elem); 1066 } 1067 } 1068 1069 /* RX */ 1070 1071 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1072 { 1073 VirtIONet *n = VIRTIO_NET(vdev); 1074 int queue_index = vq2q(virtio_get_queue_index(vq)); 1075 1076 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1077 } 1078 1079 static int virtio_net_can_receive(NetClientState *nc) 1080 { 1081 VirtIONet *n = qemu_get_nic_opaque(nc); 1082 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1083 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1084 1085 if (!vdev->vm_running) { 1086 return 0; 1087 } 1088 1089 if (nc->queue_index >= n->curr_queues) { 1090 return 0; 1091 } 1092 1093 if (!virtio_queue_ready(q->rx_vq) || 1094 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1095 return 0; 1096 } 1097 1098 return 1; 1099 } 1100 1101 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1102 { 1103 VirtIONet *n = q->n; 1104 if (virtio_queue_empty(q->rx_vq) || 1105 (n->mergeable_rx_bufs && 1106 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1107 virtio_queue_set_notification(q->rx_vq, 1); 1108 1109 /* To avoid a race condition where the guest has made some buffers 1110 * available after the above check but before notification was 1111 * enabled, check for available buffers again. 1112 */ 1113 if (virtio_queue_empty(q->rx_vq) || 1114 (n->mergeable_rx_bufs && 1115 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1116 return 0; 1117 } 1118 } 1119 1120 virtio_queue_set_notification(q->rx_vq, 0); 1121 return 1; 1122 } 1123 1124 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1125 { 1126 virtio_tswap16s(vdev, &hdr->hdr_len); 1127 virtio_tswap16s(vdev, &hdr->gso_size); 1128 virtio_tswap16s(vdev, &hdr->csum_start); 1129 virtio_tswap16s(vdev, &hdr->csum_offset); 1130 } 1131 1132 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1133 * it never finds out that the packets don't have valid checksums. This 1134 * causes dhclient to get upset. Fedora's carried a patch for ages to 1135 * fix this with Xen but it hasn't appeared in an upstream release of 1136 * dhclient yet. 1137 * 1138 * To avoid breaking existing guests, we catch udp packets and add 1139 * checksums. This is terrible but it's better than hacking the guest 1140 * kernels. 1141 * 1142 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1143 * we should provide a mechanism to disable it to avoid polluting the host 1144 * cache. 1145 */ 1146 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1147 uint8_t *buf, size_t size) 1148 { 1149 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1150 (size > 27 && size < 1500) && /* normal sized MTU */ 1151 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1152 (buf[23] == 17) && /* ip.protocol == UDP */ 1153 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1154 net_checksum_calculate(buf, size); 1155 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1156 } 1157 } 1158 1159 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1160 const void *buf, size_t size) 1161 { 1162 if (n->has_vnet_hdr) { 1163 /* FIXME this cast is evil */ 1164 void *wbuf = (void *)buf; 1165 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1166 size - n->host_hdr_len); 1167 1168 if (n->needs_vnet_hdr_swap) { 1169 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1170 } 1171 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1172 } else { 1173 struct virtio_net_hdr hdr = { 1174 .flags = 0, 1175 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1176 }; 1177 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1178 } 1179 } 1180 1181 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1182 { 1183 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1184 static const uint8_t vlan[] = {0x81, 0x00}; 1185 uint8_t *ptr = (uint8_t *)buf; 1186 int i; 1187 1188 if (n->promisc) 1189 return 1; 1190 1191 ptr += n->host_hdr_len; 1192 1193 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1194 int vid = lduw_be_p(ptr + 14) & 0xfff; 1195 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1196 return 0; 1197 } 1198 1199 if (ptr[0] & 1) { // multicast 1200 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1201 return !n->nobcast; 1202 } else if (n->nomulti) { 1203 return 0; 1204 } else if (n->allmulti || n->mac_table.multi_overflow) { 1205 return 1; 1206 } 1207 1208 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1209 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1210 return 1; 1211 } 1212 } 1213 } else { // unicast 1214 if (n->nouni) { 1215 return 0; 1216 } else if (n->alluni || n->mac_table.uni_overflow) { 1217 return 1; 1218 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1219 return 1; 1220 } 1221 1222 for (i = 0; i < n->mac_table.first_multi; i++) { 1223 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1224 return 1; 1225 } 1226 } 1227 } 1228 1229 return 0; 1230 } 1231 1232 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1233 size_t size) 1234 { 1235 VirtIONet *n = qemu_get_nic_opaque(nc); 1236 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1237 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1238 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1239 struct virtio_net_hdr_mrg_rxbuf mhdr; 1240 unsigned mhdr_cnt = 0; 1241 size_t offset, i, guest_offset; 1242 1243 if (!virtio_net_can_receive(nc)) { 1244 return -1; 1245 } 1246 1247 /* hdr_len refers to the header we supply to the guest */ 1248 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1249 return 0; 1250 } 1251 1252 if (!receive_filter(n, buf, size)) 1253 return size; 1254 1255 offset = i = 0; 1256 1257 while (offset < size) { 1258 VirtQueueElement *elem; 1259 int len, total; 1260 const struct iovec *sg; 1261 1262 total = 0; 1263 1264 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1265 if (!elem) { 1266 if (i) { 1267 virtio_error(vdev, "virtio-net unexpected empty queue: " 1268 "i %zd mergeable %d offset %zd, size %zd, " 1269 "guest hdr len %zd, host hdr len %zd " 1270 "guest features 0x%" PRIx64, 1271 i, n->mergeable_rx_bufs, offset, size, 1272 n->guest_hdr_len, n->host_hdr_len, 1273 vdev->guest_features); 1274 } 1275 return -1; 1276 } 1277 1278 if (elem->in_num < 1) { 1279 virtio_error(vdev, 1280 "virtio-net receive queue contains no in buffers"); 1281 virtqueue_detach_element(q->rx_vq, elem, 0); 1282 g_free(elem); 1283 return -1; 1284 } 1285 1286 sg = elem->in_sg; 1287 if (i == 0) { 1288 assert(offset == 0); 1289 if (n->mergeable_rx_bufs) { 1290 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1291 sg, elem->in_num, 1292 offsetof(typeof(mhdr), num_buffers), 1293 sizeof(mhdr.num_buffers)); 1294 } 1295 1296 receive_header(n, sg, elem->in_num, buf, size); 1297 offset = n->host_hdr_len; 1298 total += n->guest_hdr_len; 1299 guest_offset = n->guest_hdr_len; 1300 } else { 1301 guest_offset = 0; 1302 } 1303 1304 /* copy in packet. ugh */ 1305 len = iov_from_buf(sg, elem->in_num, guest_offset, 1306 buf + offset, size - offset); 1307 total += len; 1308 offset += len; 1309 /* If buffers can't be merged, at this point we 1310 * must have consumed the complete packet. 1311 * Otherwise, drop it. */ 1312 if (!n->mergeable_rx_bufs && offset < size) { 1313 virtqueue_unpop(q->rx_vq, elem, total); 1314 g_free(elem); 1315 return size; 1316 } 1317 1318 /* signal other side */ 1319 virtqueue_fill(q->rx_vq, elem, total, i++); 1320 g_free(elem); 1321 } 1322 1323 if (mhdr_cnt) { 1324 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1325 iov_from_buf(mhdr_sg, mhdr_cnt, 1326 0, 1327 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1328 } 1329 1330 virtqueue_flush(q->rx_vq, i); 1331 virtio_notify(vdev, q->rx_vq); 1332 1333 return size; 1334 } 1335 1336 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1337 size_t size) 1338 { 1339 ssize_t r; 1340 1341 rcu_read_lock(); 1342 r = virtio_net_receive_rcu(nc, buf, size); 1343 rcu_read_unlock(); 1344 return r; 1345 } 1346 1347 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1348 const uint8_t *buf, 1349 VirtioNetRscUnit *unit) 1350 { 1351 uint16_t ip_hdrlen; 1352 struct ip_header *ip; 1353 1354 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1355 + sizeof(struct eth_header)); 1356 unit->ip = (void *)ip; 1357 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1358 unit->ip_plen = &ip->ip_len; 1359 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1360 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1361 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1362 } 1363 1364 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1365 const uint8_t *buf, 1366 VirtioNetRscUnit *unit) 1367 { 1368 struct ip6_header *ip6; 1369 1370 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1371 + sizeof(struct eth_header)); 1372 unit->ip = ip6; 1373 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1374 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\ 1375 + sizeof(struct ip6_header)); 1376 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1377 1378 /* There is a difference between payload lenght in ipv4 and v6, 1379 ip header is excluded in ipv6 */ 1380 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1381 } 1382 1383 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1384 VirtioNetRscSeg *seg) 1385 { 1386 int ret; 1387 struct virtio_net_hdr *h; 1388 1389 h = (struct virtio_net_hdr *)seg->buf; 1390 h->flags = 0; 1391 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1392 1393 if (seg->is_coalesced) { 1394 *virtio_net_rsc_ext_num_packets(h) = seg->packets; 1395 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack; 1396 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1397 if (chain->proto == ETH_P_IP) { 1398 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1399 } else { 1400 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1401 } 1402 } 1403 1404 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1405 QTAILQ_REMOVE(&chain->buffers, seg, next); 1406 g_free(seg->buf); 1407 g_free(seg); 1408 1409 return ret; 1410 } 1411 1412 static void virtio_net_rsc_purge(void *opq) 1413 { 1414 VirtioNetRscSeg *seg, *rn; 1415 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1416 1417 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1418 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1419 chain->stat.purge_failed++; 1420 continue; 1421 } 1422 } 1423 1424 chain->stat.timer++; 1425 if (!QTAILQ_EMPTY(&chain->buffers)) { 1426 timer_mod(chain->drain_timer, 1427 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1428 } 1429 } 1430 1431 static void virtio_net_rsc_cleanup(VirtIONet *n) 1432 { 1433 VirtioNetRscChain *chain, *rn_chain; 1434 VirtioNetRscSeg *seg, *rn_seg; 1435 1436 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 1437 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 1438 QTAILQ_REMOVE(&chain->buffers, seg, next); 1439 g_free(seg->buf); 1440 g_free(seg); 1441 } 1442 1443 timer_del(chain->drain_timer); 1444 timer_free(chain->drain_timer); 1445 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 1446 g_free(chain); 1447 } 1448 } 1449 1450 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 1451 NetClientState *nc, 1452 const uint8_t *buf, size_t size) 1453 { 1454 uint16_t hdr_len; 1455 VirtioNetRscSeg *seg; 1456 1457 hdr_len = chain->n->guest_hdr_len; 1458 seg = g_malloc(sizeof(VirtioNetRscSeg)); 1459 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 1460 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 1461 memcpy(seg->buf, buf, size); 1462 seg->size = size; 1463 seg->packets = 1; 1464 seg->dup_ack = 0; 1465 seg->is_coalesced = 0; 1466 seg->nc = nc; 1467 1468 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 1469 chain->stat.cache++; 1470 1471 switch (chain->proto) { 1472 case ETH_P_IP: 1473 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 1474 break; 1475 case ETH_P_IPV6: 1476 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 1477 break; 1478 default: 1479 g_assert_not_reached(); 1480 } 1481 } 1482 1483 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 1484 VirtioNetRscSeg *seg, 1485 const uint8_t *buf, 1486 struct tcp_header *n_tcp, 1487 struct tcp_header *o_tcp) 1488 { 1489 uint32_t nack, oack; 1490 uint16_t nwin, owin; 1491 1492 nack = htonl(n_tcp->th_ack); 1493 nwin = htons(n_tcp->th_win); 1494 oack = htonl(o_tcp->th_ack); 1495 owin = htons(o_tcp->th_win); 1496 1497 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 1498 chain->stat.ack_out_of_win++; 1499 return RSC_FINAL; 1500 } else if (nack == oack) { 1501 /* duplicated ack or window probe */ 1502 if (nwin == owin) { 1503 /* duplicated ack, add dup ack count due to whql test up to 1 */ 1504 chain->stat.dup_ack++; 1505 return RSC_FINAL; 1506 } else { 1507 /* Coalesce window update */ 1508 o_tcp->th_win = n_tcp->th_win; 1509 chain->stat.win_update++; 1510 return RSC_COALESCE; 1511 } 1512 } else { 1513 /* pure ack, go to 'C', finalize*/ 1514 chain->stat.pure_ack++; 1515 return RSC_FINAL; 1516 } 1517 } 1518 1519 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 1520 VirtioNetRscSeg *seg, 1521 const uint8_t *buf, 1522 VirtioNetRscUnit *n_unit) 1523 { 1524 void *data; 1525 uint16_t o_ip_len; 1526 uint32_t nseq, oseq; 1527 VirtioNetRscUnit *o_unit; 1528 1529 o_unit = &seg->unit; 1530 o_ip_len = htons(*o_unit->ip_plen); 1531 nseq = htonl(n_unit->tcp->th_seq); 1532 oseq = htonl(o_unit->tcp->th_seq); 1533 1534 /* out of order or retransmitted. */ 1535 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 1536 chain->stat.data_out_of_win++; 1537 return RSC_FINAL; 1538 } 1539 1540 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 1541 if (nseq == oseq) { 1542 if ((o_unit->payload == 0) && n_unit->payload) { 1543 /* From no payload to payload, normal case, not a dup ack or etc */ 1544 chain->stat.data_after_pure_ack++; 1545 goto coalesce; 1546 } else { 1547 return virtio_net_rsc_handle_ack(chain, seg, buf, 1548 n_unit->tcp, o_unit->tcp); 1549 } 1550 } else if ((nseq - oseq) != o_unit->payload) { 1551 /* Not a consistent packet, out of order */ 1552 chain->stat.data_out_of_order++; 1553 return RSC_FINAL; 1554 } else { 1555 coalesce: 1556 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 1557 chain->stat.over_size++; 1558 return RSC_FINAL; 1559 } 1560 1561 /* Here comes the right data, the payload length in v4/v6 is different, 1562 so use the field value to update and record the new data len */ 1563 o_unit->payload += n_unit->payload; /* update new data len */ 1564 1565 /* update field in ip header */ 1566 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 1567 1568 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 1569 for windows guest, while this may change the behavior for linux 1570 guest (only if it uses RSC feature). */ 1571 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 1572 1573 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 1574 o_unit->tcp->th_win = n_unit->tcp->th_win; 1575 1576 memmove(seg->buf + seg->size, data, n_unit->payload); 1577 seg->size += n_unit->payload; 1578 seg->packets++; 1579 chain->stat.coalesced++; 1580 return RSC_COALESCE; 1581 } 1582 } 1583 1584 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 1585 VirtioNetRscSeg *seg, 1586 const uint8_t *buf, size_t size, 1587 VirtioNetRscUnit *unit) 1588 { 1589 struct ip_header *ip1, *ip2; 1590 1591 ip1 = (struct ip_header *)(unit->ip); 1592 ip2 = (struct ip_header *)(seg->unit.ip); 1593 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 1594 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 1595 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 1596 chain->stat.no_match++; 1597 return RSC_NO_MATCH; 1598 } 1599 1600 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 1601 } 1602 1603 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 1604 VirtioNetRscSeg *seg, 1605 const uint8_t *buf, size_t size, 1606 VirtioNetRscUnit *unit) 1607 { 1608 struct ip6_header *ip1, *ip2; 1609 1610 ip1 = (struct ip6_header *)(unit->ip); 1611 ip2 = (struct ip6_header *)(seg->unit.ip); 1612 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 1613 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 1614 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 1615 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 1616 chain->stat.no_match++; 1617 return RSC_NO_MATCH; 1618 } 1619 1620 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 1621 } 1622 1623 /* Packets with 'SYN' should bypass, other flag should be sent after drain 1624 * to prevent out of order */ 1625 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 1626 struct tcp_header *tcp) 1627 { 1628 uint16_t tcp_hdr; 1629 uint16_t tcp_flag; 1630 1631 tcp_flag = htons(tcp->th_offset_flags); 1632 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 1633 tcp_flag &= VIRTIO_NET_TCP_FLAG; 1634 tcp_flag = htons(tcp->th_offset_flags) & 0x3F; 1635 if (tcp_flag & TH_SYN) { 1636 chain->stat.tcp_syn++; 1637 return RSC_BYPASS; 1638 } 1639 1640 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 1641 chain->stat.tcp_ctrl_drain++; 1642 return RSC_FINAL; 1643 } 1644 1645 if (tcp_hdr > sizeof(struct tcp_header)) { 1646 chain->stat.tcp_all_opt++; 1647 return RSC_FINAL; 1648 } 1649 1650 return RSC_CANDIDATE; 1651 } 1652 1653 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 1654 NetClientState *nc, 1655 const uint8_t *buf, size_t size, 1656 VirtioNetRscUnit *unit) 1657 { 1658 int ret; 1659 VirtioNetRscSeg *seg, *nseg; 1660 1661 if (QTAILQ_EMPTY(&chain->buffers)) { 1662 chain->stat.empty_cache++; 1663 virtio_net_rsc_cache_buf(chain, nc, buf, size); 1664 timer_mod(chain->drain_timer, 1665 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1666 return size; 1667 } 1668 1669 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 1670 if (chain->proto == ETH_P_IP) { 1671 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 1672 } else { 1673 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 1674 } 1675 1676 if (ret == RSC_FINAL) { 1677 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1678 /* Send failed */ 1679 chain->stat.final_failed++; 1680 return 0; 1681 } 1682 1683 /* Send current packet */ 1684 return virtio_net_do_receive(nc, buf, size); 1685 } else if (ret == RSC_NO_MATCH) { 1686 continue; 1687 } else { 1688 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 1689 seg->is_coalesced = 1; 1690 return size; 1691 } 1692 } 1693 1694 chain->stat.no_match_cache++; 1695 virtio_net_rsc_cache_buf(chain, nc, buf, size); 1696 return size; 1697 } 1698 1699 /* Drain a connection data, this is to avoid out of order segments */ 1700 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 1701 NetClientState *nc, 1702 const uint8_t *buf, size_t size, 1703 uint16_t ip_start, uint16_t ip_size, 1704 uint16_t tcp_port) 1705 { 1706 VirtioNetRscSeg *seg, *nseg; 1707 uint32_t ppair1, ppair2; 1708 1709 ppair1 = *(uint32_t *)(buf + tcp_port); 1710 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 1711 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 1712 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 1713 || (ppair1 != ppair2)) { 1714 continue; 1715 } 1716 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1717 chain->stat.drain_failed++; 1718 } 1719 1720 break; 1721 } 1722 1723 return virtio_net_do_receive(nc, buf, size); 1724 } 1725 1726 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 1727 struct ip_header *ip, 1728 const uint8_t *buf, size_t size) 1729 { 1730 uint16_t ip_len; 1731 1732 /* Not an ipv4 packet */ 1733 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 1734 chain->stat.ip_option++; 1735 return RSC_BYPASS; 1736 } 1737 1738 /* Don't handle packets with ip option */ 1739 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 1740 chain->stat.ip_option++; 1741 return RSC_BYPASS; 1742 } 1743 1744 if (ip->ip_p != IPPROTO_TCP) { 1745 chain->stat.bypass_not_tcp++; 1746 return RSC_BYPASS; 1747 } 1748 1749 /* Don't handle packets with ip fragment */ 1750 if (!(htons(ip->ip_off) & IP_DF)) { 1751 chain->stat.ip_frag++; 1752 return RSC_BYPASS; 1753 } 1754 1755 /* Don't handle packets with ecn flag */ 1756 if (IPTOS_ECN(ip->ip_tos)) { 1757 chain->stat.ip_ecn++; 1758 return RSC_BYPASS; 1759 } 1760 1761 ip_len = htons(ip->ip_len); 1762 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 1763 || ip_len > (size - chain->n->guest_hdr_len - 1764 sizeof(struct eth_header))) { 1765 chain->stat.ip_hacked++; 1766 return RSC_BYPASS; 1767 } 1768 1769 return RSC_CANDIDATE; 1770 } 1771 1772 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 1773 NetClientState *nc, 1774 const uint8_t *buf, size_t size) 1775 { 1776 int32_t ret; 1777 uint16_t hdr_len; 1778 VirtioNetRscUnit unit; 1779 1780 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 1781 1782 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 1783 + sizeof(struct tcp_header))) { 1784 chain->stat.bypass_not_tcp++; 1785 return virtio_net_do_receive(nc, buf, size); 1786 } 1787 1788 virtio_net_rsc_extract_unit4(chain, buf, &unit); 1789 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 1790 != RSC_CANDIDATE) { 1791 return virtio_net_do_receive(nc, buf, size); 1792 } 1793 1794 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 1795 if (ret == RSC_BYPASS) { 1796 return virtio_net_do_receive(nc, buf, size); 1797 } else if (ret == RSC_FINAL) { 1798 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 1799 ((hdr_len + sizeof(struct eth_header)) + 12), 1800 VIRTIO_NET_IP4_ADDR_SIZE, 1801 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 1802 } 1803 1804 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 1805 } 1806 1807 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 1808 struct ip6_header *ip6, 1809 const uint8_t *buf, size_t size) 1810 { 1811 uint16_t ip_len; 1812 1813 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 1814 != IP_HEADER_VERSION_6) { 1815 return RSC_BYPASS; 1816 } 1817 1818 /* Both option and protocol is checked in this */ 1819 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 1820 chain->stat.bypass_not_tcp++; 1821 return RSC_BYPASS; 1822 } 1823 1824 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1825 if (ip_len < sizeof(struct tcp_header) || 1826 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 1827 - sizeof(struct ip6_header))) { 1828 chain->stat.ip_hacked++; 1829 return RSC_BYPASS; 1830 } 1831 1832 /* Don't handle packets with ecn flag */ 1833 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 1834 chain->stat.ip_ecn++; 1835 return RSC_BYPASS; 1836 } 1837 1838 return RSC_CANDIDATE; 1839 } 1840 1841 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 1842 const uint8_t *buf, size_t size) 1843 { 1844 int32_t ret; 1845 uint16_t hdr_len; 1846 VirtioNetRscChain *chain; 1847 VirtioNetRscUnit unit; 1848 1849 chain = (VirtioNetRscChain *)opq; 1850 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 1851 1852 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 1853 + sizeof(tcp_header))) { 1854 return virtio_net_do_receive(nc, buf, size); 1855 } 1856 1857 virtio_net_rsc_extract_unit6(chain, buf, &unit); 1858 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 1859 unit.ip, buf, size)) { 1860 return virtio_net_do_receive(nc, buf, size); 1861 } 1862 1863 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 1864 if (ret == RSC_BYPASS) { 1865 return virtio_net_do_receive(nc, buf, size); 1866 } else if (ret == RSC_FINAL) { 1867 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 1868 ((hdr_len + sizeof(struct eth_header)) + 8), 1869 VIRTIO_NET_IP6_ADDR_SIZE, 1870 hdr_len + sizeof(struct eth_header) 1871 + sizeof(struct ip6_header)); 1872 } 1873 1874 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 1875 } 1876 1877 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 1878 NetClientState *nc, 1879 uint16_t proto) 1880 { 1881 VirtioNetRscChain *chain; 1882 1883 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 1884 return NULL; 1885 } 1886 1887 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 1888 if (chain->proto == proto) { 1889 return chain; 1890 } 1891 } 1892 1893 chain = g_malloc(sizeof(*chain)); 1894 chain->n = n; 1895 chain->proto = proto; 1896 if (proto == (uint16_t)ETH_P_IP) { 1897 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 1898 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1899 } else { 1900 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 1901 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1902 } 1903 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 1904 virtio_net_rsc_purge, chain); 1905 memset(&chain->stat, 0, sizeof(chain->stat)); 1906 1907 QTAILQ_INIT(&chain->buffers); 1908 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 1909 1910 return chain; 1911 } 1912 1913 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 1914 const uint8_t *buf, 1915 size_t size) 1916 { 1917 uint16_t proto; 1918 VirtioNetRscChain *chain; 1919 struct eth_header *eth; 1920 VirtIONet *n; 1921 1922 n = qemu_get_nic_opaque(nc); 1923 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 1924 return virtio_net_do_receive(nc, buf, size); 1925 } 1926 1927 eth = (struct eth_header *)(buf + n->guest_hdr_len); 1928 proto = htons(eth->h_proto); 1929 1930 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 1931 if (chain) { 1932 chain->stat.received++; 1933 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 1934 return virtio_net_rsc_receive4(chain, nc, buf, size); 1935 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 1936 return virtio_net_rsc_receive6(chain, nc, buf, size); 1937 } 1938 } 1939 return virtio_net_do_receive(nc, buf, size); 1940 } 1941 1942 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 1943 size_t size) 1944 { 1945 VirtIONet *n = qemu_get_nic_opaque(nc); 1946 if ((n->rsc4_enabled || n->rsc6_enabled)) { 1947 return virtio_net_rsc_receive(nc, buf, size); 1948 } else { 1949 return virtio_net_do_receive(nc, buf, size); 1950 } 1951 } 1952 1953 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 1954 1955 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 1956 { 1957 VirtIONet *n = qemu_get_nic_opaque(nc); 1958 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1959 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1960 1961 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 1962 virtio_notify(vdev, q->tx_vq); 1963 1964 g_free(q->async_tx.elem); 1965 q->async_tx.elem = NULL; 1966 1967 virtio_queue_set_notification(q->tx_vq, 1); 1968 virtio_net_flush_tx(q); 1969 } 1970 1971 /* TX */ 1972 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 1973 { 1974 VirtIONet *n = q->n; 1975 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1976 VirtQueueElement *elem; 1977 int32_t num_packets = 0; 1978 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 1979 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1980 return num_packets; 1981 } 1982 1983 if (q->async_tx.elem) { 1984 virtio_queue_set_notification(q->tx_vq, 0); 1985 return num_packets; 1986 } 1987 1988 for (;;) { 1989 ssize_t ret; 1990 unsigned int out_num; 1991 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 1992 struct virtio_net_hdr_mrg_rxbuf mhdr; 1993 1994 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 1995 if (!elem) { 1996 break; 1997 } 1998 1999 out_num = elem->out_num; 2000 out_sg = elem->out_sg; 2001 if (out_num < 1) { 2002 virtio_error(vdev, "virtio-net header not in first element"); 2003 virtqueue_detach_element(q->tx_vq, elem, 0); 2004 g_free(elem); 2005 return -EINVAL; 2006 } 2007 2008 if (n->has_vnet_hdr) { 2009 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2010 n->guest_hdr_len) { 2011 virtio_error(vdev, "virtio-net header incorrect"); 2012 virtqueue_detach_element(q->tx_vq, elem, 0); 2013 g_free(elem); 2014 return -EINVAL; 2015 } 2016 if (n->needs_vnet_hdr_swap) { 2017 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2018 sg2[0].iov_base = &mhdr; 2019 sg2[0].iov_len = n->guest_hdr_len; 2020 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2021 out_sg, out_num, 2022 n->guest_hdr_len, -1); 2023 if (out_num == VIRTQUEUE_MAX_SIZE) { 2024 goto drop; 2025 } 2026 out_num += 1; 2027 out_sg = sg2; 2028 } 2029 } 2030 /* 2031 * If host wants to see the guest header as is, we can 2032 * pass it on unchanged. Otherwise, copy just the parts 2033 * that host is interested in. 2034 */ 2035 assert(n->host_hdr_len <= n->guest_hdr_len); 2036 if (n->host_hdr_len != n->guest_hdr_len) { 2037 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2038 out_sg, out_num, 2039 0, n->host_hdr_len); 2040 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2041 out_sg, out_num, 2042 n->guest_hdr_len, -1); 2043 out_num = sg_num; 2044 out_sg = sg; 2045 } 2046 2047 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2048 out_sg, out_num, virtio_net_tx_complete); 2049 if (ret == 0) { 2050 virtio_queue_set_notification(q->tx_vq, 0); 2051 q->async_tx.elem = elem; 2052 return -EBUSY; 2053 } 2054 2055 drop: 2056 virtqueue_push(q->tx_vq, elem, 0); 2057 virtio_notify(vdev, q->tx_vq); 2058 g_free(elem); 2059 2060 if (++num_packets >= n->tx_burst) { 2061 break; 2062 } 2063 } 2064 return num_packets; 2065 } 2066 2067 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2068 { 2069 VirtIONet *n = VIRTIO_NET(vdev); 2070 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2071 2072 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2073 virtio_net_drop_tx_queue_data(vdev, vq); 2074 return; 2075 } 2076 2077 /* This happens when device was stopped but VCPU wasn't. */ 2078 if (!vdev->vm_running) { 2079 q->tx_waiting = 1; 2080 return; 2081 } 2082 2083 if (q->tx_waiting) { 2084 virtio_queue_set_notification(vq, 1); 2085 timer_del(q->tx_timer); 2086 q->tx_waiting = 0; 2087 if (virtio_net_flush_tx(q) == -EINVAL) { 2088 return; 2089 } 2090 } else { 2091 timer_mod(q->tx_timer, 2092 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2093 q->tx_waiting = 1; 2094 virtio_queue_set_notification(vq, 0); 2095 } 2096 } 2097 2098 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2099 { 2100 VirtIONet *n = VIRTIO_NET(vdev); 2101 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2102 2103 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2104 virtio_net_drop_tx_queue_data(vdev, vq); 2105 return; 2106 } 2107 2108 if (unlikely(q->tx_waiting)) { 2109 return; 2110 } 2111 q->tx_waiting = 1; 2112 /* This happens when device was stopped but VCPU wasn't. */ 2113 if (!vdev->vm_running) { 2114 return; 2115 } 2116 virtio_queue_set_notification(vq, 0); 2117 qemu_bh_schedule(q->tx_bh); 2118 } 2119 2120 static void virtio_net_tx_timer(void *opaque) 2121 { 2122 VirtIONetQueue *q = opaque; 2123 VirtIONet *n = q->n; 2124 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2125 /* This happens when device was stopped but BH wasn't. */ 2126 if (!vdev->vm_running) { 2127 /* Make sure tx waiting is set, so we'll run when restarted. */ 2128 assert(q->tx_waiting); 2129 return; 2130 } 2131 2132 q->tx_waiting = 0; 2133 2134 /* Just in case the driver is not ready on more */ 2135 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2136 return; 2137 } 2138 2139 virtio_queue_set_notification(q->tx_vq, 1); 2140 virtio_net_flush_tx(q); 2141 } 2142 2143 static void virtio_net_tx_bh(void *opaque) 2144 { 2145 VirtIONetQueue *q = opaque; 2146 VirtIONet *n = q->n; 2147 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2148 int32_t ret; 2149 2150 /* This happens when device was stopped but BH wasn't. */ 2151 if (!vdev->vm_running) { 2152 /* Make sure tx waiting is set, so we'll run when restarted. */ 2153 assert(q->tx_waiting); 2154 return; 2155 } 2156 2157 q->tx_waiting = 0; 2158 2159 /* Just in case the driver is not ready on more */ 2160 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2161 return; 2162 } 2163 2164 ret = virtio_net_flush_tx(q); 2165 if (ret == -EBUSY || ret == -EINVAL) { 2166 return; /* Notification re-enable handled by tx_complete or device 2167 * broken */ 2168 } 2169 2170 /* If we flush a full burst of packets, assume there are 2171 * more coming and immediately reschedule */ 2172 if (ret >= n->tx_burst) { 2173 qemu_bh_schedule(q->tx_bh); 2174 q->tx_waiting = 1; 2175 return; 2176 } 2177 2178 /* If less than a full burst, re-enable notification and flush 2179 * anything that may have come in while we weren't looking. If 2180 * we find something, assume the guest is still active and reschedule */ 2181 virtio_queue_set_notification(q->tx_vq, 1); 2182 ret = virtio_net_flush_tx(q); 2183 if (ret == -EINVAL) { 2184 return; 2185 } else if (ret > 0) { 2186 virtio_queue_set_notification(q->tx_vq, 0); 2187 qemu_bh_schedule(q->tx_bh); 2188 q->tx_waiting = 1; 2189 } 2190 } 2191 2192 static void virtio_net_add_queue(VirtIONet *n, int index) 2193 { 2194 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2195 2196 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2197 virtio_net_handle_rx); 2198 2199 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2200 n->vqs[index].tx_vq = 2201 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2202 virtio_net_handle_tx_timer); 2203 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2204 virtio_net_tx_timer, 2205 &n->vqs[index]); 2206 } else { 2207 n->vqs[index].tx_vq = 2208 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2209 virtio_net_handle_tx_bh); 2210 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2211 } 2212 2213 n->vqs[index].tx_waiting = 0; 2214 n->vqs[index].n = n; 2215 } 2216 2217 static void virtio_net_del_queue(VirtIONet *n, int index) 2218 { 2219 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2220 VirtIONetQueue *q = &n->vqs[index]; 2221 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2222 2223 qemu_purge_queued_packets(nc); 2224 2225 virtio_del_queue(vdev, index * 2); 2226 if (q->tx_timer) { 2227 timer_del(q->tx_timer); 2228 timer_free(q->tx_timer); 2229 q->tx_timer = NULL; 2230 } else { 2231 qemu_bh_delete(q->tx_bh); 2232 q->tx_bh = NULL; 2233 } 2234 q->tx_waiting = 0; 2235 virtio_del_queue(vdev, index * 2 + 1); 2236 } 2237 2238 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues) 2239 { 2240 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2241 int old_num_queues = virtio_get_num_queues(vdev); 2242 int new_num_queues = new_max_queues * 2 + 1; 2243 int i; 2244 2245 assert(old_num_queues >= 3); 2246 assert(old_num_queues % 2 == 1); 2247 2248 if (old_num_queues == new_num_queues) { 2249 return; 2250 } 2251 2252 /* 2253 * We always need to remove and add ctrl vq if 2254 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2255 * and then we only enter one of the following too loops. 2256 */ 2257 virtio_del_queue(vdev, old_num_queues - 1); 2258 2259 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2260 /* new_num_queues < old_num_queues */ 2261 virtio_net_del_queue(n, i / 2); 2262 } 2263 2264 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2265 /* new_num_queues > old_num_queues */ 2266 virtio_net_add_queue(n, i / 2); 2267 } 2268 2269 /* add ctrl_vq last */ 2270 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2271 } 2272 2273 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2274 { 2275 int max = multiqueue ? n->max_queues : 1; 2276 2277 n->multiqueue = multiqueue; 2278 virtio_net_change_num_queues(n, max); 2279 2280 virtio_net_set_queues(n); 2281 } 2282 2283 static int virtio_net_post_load_device(void *opaque, int version_id) 2284 { 2285 VirtIONet *n = opaque; 2286 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2287 int i, link_down; 2288 2289 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2290 virtio_vdev_has_feature(vdev, 2291 VIRTIO_F_VERSION_1)); 2292 2293 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2294 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2295 n->mac_table.in_use = 0; 2296 } 2297 2298 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2299 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2300 } 2301 2302 if (peer_has_vnet_hdr(n)) { 2303 virtio_net_apply_guest_offloads(n); 2304 } 2305 2306 virtio_net_set_queues(n); 2307 2308 /* Find the first multicast entry in the saved MAC filter */ 2309 for (i = 0; i < n->mac_table.in_use; i++) { 2310 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2311 break; 2312 } 2313 } 2314 n->mac_table.first_multi = i; 2315 2316 /* nc.link_down can't be migrated, so infer link_down according 2317 * to link status bit in n->status */ 2318 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2319 for (i = 0; i < n->max_queues; i++) { 2320 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2321 } 2322 2323 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2324 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2325 n->announce_counter = SELF_ANNOUNCE_ROUNDS; 2326 timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)); 2327 } 2328 2329 return 0; 2330 } 2331 2332 /* tx_waiting field of a VirtIONetQueue */ 2333 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2334 .name = "virtio-net-queue-tx_waiting", 2335 .fields = (VMStateField[]) { 2336 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2337 VMSTATE_END_OF_LIST() 2338 }, 2339 }; 2340 2341 static bool max_queues_gt_1(void *opaque, int version_id) 2342 { 2343 return VIRTIO_NET(opaque)->max_queues > 1; 2344 } 2345 2346 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 2347 { 2348 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 2349 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 2350 } 2351 2352 static bool mac_table_fits(void *opaque, int version_id) 2353 { 2354 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 2355 } 2356 2357 static bool mac_table_doesnt_fit(void *opaque, int version_id) 2358 { 2359 return !mac_table_fits(opaque, version_id); 2360 } 2361 2362 /* This temporary type is shared by all the WITH_TMP methods 2363 * although only some fields are used by each. 2364 */ 2365 struct VirtIONetMigTmp { 2366 VirtIONet *parent; 2367 VirtIONetQueue *vqs_1; 2368 uint16_t curr_queues_1; 2369 uint8_t has_ufo; 2370 uint32_t has_vnet_hdr; 2371 }; 2372 2373 /* The 2nd and subsequent tx_waiting flags are loaded later than 2374 * the 1st entry in the queues and only if there's more than one 2375 * entry. We use the tmp mechanism to calculate a temporary 2376 * pointer and count and also validate the count. 2377 */ 2378 2379 static int virtio_net_tx_waiting_pre_save(void *opaque) 2380 { 2381 struct VirtIONetMigTmp *tmp = opaque; 2382 2383 tmp->vqs_1 = tmp->parent->vqs + 1; 2384 tmp->curr_queues_1 = tmp->parent->curr_queues - 1; 2385 if (tmp->parent->curr_queues == 0) { 2386 tmp->curr_queues_1 = 0; 2387 } 2388 2389 return 0; 2390 } 2391 2392 static int virtio_net_tx_waiting_pre_load(void *opaque) 2393 { 2394 struct VirtIONetMigTmp *tmp = opaque; 2395 2396 /* Reuse the pointer setup from save */ 2397 virtio_net_tx_waiting_pre_save(opaque); 2398 2399 if (tmp->parent->curr_queues > tmp->parent->max_queues) { 2400 error_report("virtio-net: curr_queues %x > max_queues %x", 2401 tmp->parent->curr_queues, tmp->parent->max_queues); 2402 2403 return -EINVAL; 2404 } 2405 2406 return 0; /* all good */ 2407 } 2408 2409 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 2410 .name = "virtio-net-tx_waiting", 2411 .pre_load = virtio_net_tx_waiting_pre_load, 2412 .pre_save = virtio_net_tx_waiting_pre_save, 2413 .fields = (VMStateField[]) { 2414 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 2415 curr_queues_1, 2416 vmstate_virtio_net_queue_tx_waiting, 2417 struct VirtIONetQueue), 2418 VMSTATE_END_OF_LIST() 2419 }, 2420 }; 2421 2422 /* the 'has_ufo' flag is just tested; if the incoming stream has the 2423 * flag set we need to check that we have it 2424 */ 2425 static int virtio_net_ufo_post_load(void *opaque, int version_id) 2426 { 2427 struct VirtIONetMigTmp *tmp = opaque; 2428 2429 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 2430 error_report("virtio-net: saved image requires TUN_F_UFO support"); 2431 return -EINVAL; 2432 } 2433 2434 return 0; 2435 } 2436 2437 static int virtio_net_ufo_pre_save(void *opaque) 2438 { 2439 struct VirtIONetMigTmp *tmp = opaque; 2440 2441 tmp->has_ufo = tmp->parent->has_ufo; 2442 2443 return 0; 2444 } 2445 2446 static const VMStateDescription vmstate_virtio_net_has_ufo = { 2447 .name = "virtio-net-ufo", 2448 .post_load = virtio_net_ufo_post_load, 2449 .pre_save = virtio_net_ufo_pre_save, 2450 .fields = (VMStateField[]) { 2451 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 2452 VMSTATE_END_OF_LIST() 2453 }, 2454 }; 2455 2456 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 2457 * flag set we need to check that we have it 2458 */ 2459 static int virtio_net_vnet_post_load(void *opaque, int version_id) 2460 { 2461 struct VirtIONetMigTmp *tmp = opaque; 2462 2463 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 2464 error_report("virtio-net: saved image requires vnet_hdr=on"); 2465 return -EINVAL; 2466 } 2467 2468 return 0; 2469 } 2470 2471 static int virtio_net_vnet_pre_save(void *opaque) 2472 { 2473 struct VirtIONetMigTmp *tmp = opaque; 2474 2475 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 2476 2477 return 0; 2478 } 2479 2480 static const VMStateDescription vmstate_virtio_net_has_vnet = { 2481 .name = "virtio-net-vnet", 2482 .post_load = virtio_net_vnet_post_load, 2483 .pre_save = virtio_net_vnet_pre_save, 2484 .fields = (VMStateField[]) { 2485 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 2486 VMSTATE_END_OF_LIST() 2487 }, 2488 }; 2489 2490 static const VMStateDescription vmstate_virtio_net_device = { 2491 .name = "virtio-net-device", 2492 .version_id = VIRTIO_NET_VM_VERSION, 2493 .minimum_version_id = VIRTIO_NET_VM_VERSION, 2494 .post_load = virtio_net_post_load_device, 2495 .fields = (VMStateField[]) { 2496 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 2497 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 2498 vmstate_virtio_net_queue_tx_waiting, 2499 VirtIONetQueue), 2500 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 2501 VMSTATE_UINT16(status, VirtIONet), 2502 VMSTATE_UINT8(promisc, VirtIONet), 2503 VMSTATE_UINT8(allmulti, VirtIONet), 2504 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 2505 2506 /* Guarded pair: If it fits we load it, else we throw it away 2507 * - can happen if source has a larger MAC table.; post-load 2508 * sets flags in this case. 2509 */ 2510 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 2511 0, mac_table_fits, mac_table.in_use, 2512 ETH_ALEN), 2513 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 2514 mac_table.in_use, ETH_ALEN), 2515 2516 /* Note: This is an array of uint32's that's always been saved as a 2517 * buffer; hold onto your endiannesses; it's actually used as a bitmap 2518 * but based on the uint. 2519 */ 2520 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 2521 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2522 vmstate_virtio_net_has_vnet), 2523 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 2524 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 2525 VMSTATE_UINT8(alluni, VirtIONet), 2526 VMSTATE_UINT8(nomulti, VirtIONet), 2527 VMSTATE_UINT8(nouni, VirtIONet), 2528 VMSTATE_UINT8(nobcast, VirtIONet), 2529 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2530 vmstate_virtio_net_has_ufo), 2531 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0, 2532 vmstate_info_uint16_equal, uint16_t), 2533 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1), 2534 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2535 vmstate_virtio_net_tx_waiting), 2536 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 2537 has_ctrl_guest_offloads), 2538 VMSTATE_END_OF_LIST() 2539 }, 2540 }; 2541 2542 static NetClientInfo net_virtio_info = { 2543 .type = NET_CLIENT_DRIVER_NIC, 2544 .size = sizeof(NICState), 2545 .can_receive = virtio_net_can_receive, 2546 .receive = virtio_net_receive, 2547 .link_status_changed = virtio_net_set_link_status, 2548 .query_rx_filter = virtio_net_query_rxfilter, 2549 }; 2550 2551 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 2552 { 2553 VirtIONet *n = VIRTIO_NET(vdev); 2554 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 2555 assert(n->vhost_started); 2556 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 2557 } 2558 2559 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 2560 bool mask) 2561 { 2562 VirtIONet *n = VIRTIO_NET(vdev); 2563 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 2564 assert(n->vhost_started); 2565 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 2566 vdev, idx, mask); 2567 } 2568 2569 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 2570 { 2571 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 2572 2573 n->config_size = virtio_feature_get_config_size(feature_sizes, 2574 host_features); 2575 } 2576 2577 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 2578 const char *type) 2579 { 2580 /* 2581 * The name can be NULL, the netclient name will be type.x. 2582 */ 2583 assert(type != NULL); 2584 2585 g_free(n->netclient_name); 2586 g_free(n->netclient_type); 2587 n->netclient_name = g_strdup(name); 2588 n->netclient_type = g_strdup(type); 2589 } 2590 2591 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 2592 { 2593 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2594 VirtIONet *n = VIRTIO_NET(dev); 2595 NetClientState *nc; 2596 int i; 2597 2598 if (n->net_conf.mtu) { 2599 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 2600 } 2601 2602 if (n->net_conf.duplex_str) { 2603 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 2604 n->net_conf.duplex = DUPLEX_HALF; 2605 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 2606 n->net_conf.duplex = DUPLEX_FULL; 2607 } else { 2608 error_setg(errp, "'duplex' must be 'half' or 'full'"); 2609 } 2610 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 2611 } else { 2612 n->net_conf.duplex = DUPLEX_UNKNOWN; 2613 } 2614 2615 if (n->net_conf.speed < SPEED_UNKNOWN) { 2616 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 2617 } else if (n->net_conf.speed >= 0) { 2618 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 2619 } 2620 2621 virtio_net_set_config_size(n, n->host_features); 2622 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); 2623 2624 /* 2625 * We set a lower limit on RX queue size to what it always was. 2626 * Guests that want a smaller ring can always resize it without 2627 * help from us (using virtio 1 and up). 2628 */ 2629 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 2630 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 2631 !is_power_of_2(n->net_conf.rx_queue_size)) { 2632 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 2633 "must be a power of 2 between %d and %d.", 2634 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 2635 VIRTQUEUE_MAX_SIZE); 2636 virtio_cleanup(vdev); 2637 return; 2638 } 2639 2640 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 2641 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 2642 !is_power_of_2(n->net_conf.tx_queue_size)) { 2643 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 2644 "must be a power of 2 between %d and %d", 2645 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 2646 VIRTQUEUE_MAX_SIZE); 2647 virtio_cleanup(vdev); 2648 return; 2649 } 2650 2651 n->max_queues = MAX(n->nic_conf.peers.queues, 1); 2652 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { 2653 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " 2654 "must be a positive integer less than %d.", 2655 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2); 2656 virtio_cleanup(vdev); 2657 return; 2658 } 2659 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues); 2660 n->curr_queues = 1; 2661 n->tx_timeout = n->net_conf.txtimer; 2662 2663 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 2664 && strcmp(n->net_conf.tx, "bh")) { 2665 warn_report("virtio-net: " 2666 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 2667 n->net_conf.tx); 2668 error_printf("Defaulting to \"bh\""); 2669 } 2670 2671 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 2672 n->net_conf.tx_queue_size); 2673 2674 for (i = 0; i < n->max_queues; i++) { 2675 virtio_net_add_queue(n, i); 2676 } 2677 2678 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2679 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 2680 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 2681 n->status = VIRTIO_NET_S_LINK_UP; 2682 n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, 2683 virtio_net_announce_timer, n); 2684 2685 if (n->netclient_type) { 2686 /* 2687 * Happen when virtio_net_set_netclient_name has been called. 2688 */ 2689 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 2690 n->netclient_type, n->netclient_name, n); 2691 } else { 2692 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 2693 object_get_typename(OBJECT(dev)), dev->id, n); 2694 } 2695 2696 peer_test_vnet_hdr(n); 2697 if (peer_has_vnet_hdr(n)) { 2698 for (i = 0; i < n->max_queues; i++) { 2699 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 2700 } 2701 n->host_hdr_len = sizeof(struct virtio_net_hdr); 2702 } else { 2703 n->host_hdr_len = 0; 2704 } 2705 2706 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 2707 2708 n->vqs[0].tx_waiting = 0; 2709 n->tx_burst = n->net_conf.txburst; 2710 virtio_net_set_mrg_rx_bufs(n, 0, 0); 2711 n->promisc = 1; /* for compatibility */ 2712 2713 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 2714 2715 n->vlans = g_malloc0(MAX_VLAN >> 3); 2716 2717 nc = qemu_get_queue(n->nic); 2718 nc->rxfilter_notify_enabled = 1; 2719 2720 QTAILQ_INIT(&n->rsc_chains); 2721 n->qdev = dev; 2722 } 2723 2724 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) 2725 { 2726 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2727 VirtIONet *n = VIRTIO_NET(dev); 2728 int i, max_queues; 2729 2730 /* This will stop vhost backend if appropriate. */ 2731 virtio_net_set_status(vdev, 0); 2732 2733 g_free(n->netclient_name); 2734 n->netclient_name = NULL; 2735 g_free(n->netclient_type); 2736 n->netclient_type = NULL; 2737 2738 g_free(n->mac_table.macs); 2739 g_free(n->vlans); 2740 2741 max_queues = n->multiqueue ? n->max_queues : 1; 2742 for (i = 0; i < max_queues; i++) { 2743 virtio_net_del_queue(n, i); 2744 } 2745 2746 timer_del(n->announce_timer); 2747 timer_free(n->announce_timer); 2748 g_free(n->vqs); 2749 qemu_del_nic(n->nic); 2750 virtio_net_rsc_cleanup(n); 2751 virtio_cleanup(vdev); 2752 } 2753 2754 static void virtio_net_instance_init(Object *obj) 2755 { 2756 VirtIONet *n = VIRTIO_NET(obj); 2757 2758 /* 2759 * The default config_size is sizeof(struct virtio_net_config). 2760 * Can be overriden with virtio_net_set_config_size. 2761 */ 2762 n->config_size = sizeof(struct virtio_net_config); 2763 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 2764 "bootindex", "/ethernet-phy@0", 2765 DEVICE(n), NULL); 2766 } 2767 2768 static int virtio_net_pre_save(void *opaque) 2769 { 2770 VirtIONet *n = opaque; 2771 2772 /* At this point, backend must be stopped, otherwise 2773 * it might keep writing to memory. */ 2774 assert(!n->vhost_started); 2775 2776 return 0; 2777 } 2778 2779 static const VMStateDescription vmstate_virtio_net = { 2780 .name = "virtio-net", 2781 .minimum_version_id = VIRTIO_NET_VM_VERSION, 2782 .version_id = VIRTIO_NET_VM_VERSION, 2783 .fields = (VMStateField[]) { 2784 VMSTATE_VIRTIO_DEVICE, 2785 VMSTATE_END_OF_LIST() 2786 }, 2787 .pre_save = virtio_net_pre_save, 2788 }; 2789 2790 static Property virtio_net_properties[] = { 2791 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 2792 VIRTIO_NET_F_CSUM, true), 2793 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 2794 VIRTIO_NET_F_GUEST_CSUM, true), 2795 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 2796 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 2797 VIRTIO_NET_F_GUEST_TSO4, true), 2798 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 2799 VIRTIO_NET_F_GUEST_TSO6, true), 2800 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 2801 VIRTIO_NET_F_GUEST_ECN, true), 2802 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 2803 VIRTIO_NET_F_GUEST_UFO, true), 2804 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 2805 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 2806 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 2807 VIRTIO_NET_F_HOST_TSO4, true), 2808 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 2809 VIRTIO_NET_F_HOST_TSO6, true), 2810 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 2811 VIRTIO_NET_F_HOST_ECN, true), 2812 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 2813 VIRTIO_NET_F_HOST_UFO, true), 2814 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 2815 VIRTIO_NET_F_MRG_RXBUF, true), 2816 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 2817 VIRTIO_NET_F_STATUS, true), 2818 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 2819 VIRTIO_NET_F_CTRL_VQ, true), 2820 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 2821 VIRTIO_NET_F_CTRL_RX, true), 2822 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 2823 VIRTIO_NET_F_CTRL_VLAN, true), 2824 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 2825 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 2826 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 2827 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 2828 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 2829 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 2830 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 2831 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 2832 VIRTIO_NET_F_RSC_EXT, false), 2833 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 2834 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 2835 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 2836 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 2837 TX_TIMER_INTERVAL), 2838 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 2839 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 2840 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 2841 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 2842 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 2843 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 2844 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 2845 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 2846 true), 2847 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 2848 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 2849 DEFINE_PROP_END_OF_LIST(), 2850 }; 2851 2852 static void virtio_net_class_init(ObjectClass *klass, void *data) 2853 { 2854 DeviceClass *dc = DEVICE_CLASS(klass); 2855 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 2856 2857 dc->props = virtio_net_properties; 2858 dc->vmsd = &vmstate_virtio_net; 2859 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 2860 vdc->realize = virtio_net_device_realize; 2861 vdc->unrealize = virtio_net_device_unrealize; 2862 vdc->get_config = virtio_net_get_config; 2863 vdc->set_config = virtio_net_set_config; 2864 vdc->get_features = virtio_net_get_features; 2865 vdc->set_features = virtio_net_set_features; 2866 vdc->bad_features = virtio_net_bad_features; 2867 vdc->reset = virtio_net_reset; 2868 vdc->set_status = virtio_net_set_status; 2869 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 2870 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 2871 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 2872 vdc->vmsd = &vmstate_virtio_net_device; 2873 } 2874 2875 static const TypeInfo virtio_net_info = { 2876 .name = TYPE_VIRTIO_NET, 2877 .parent = TYPE_VIRTIO_DEVICE, 2878 .instance_size = sizeof(VirtIONet), 2879 .instance_init = virtio_net_instance_init, 2880 .class_init = virtio_net_class_init, 2881 }; 2882 2883 static void virtio_register_types(void) 2884 { 2885 type_register_static(&virtio_net_info); 2886 } 2887 2888 type_init(virtio_register_types) 2889