1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/iov.h" 16 #include "qemu/main-loop.h" 17 #include "qemu/module.h" 18 #include "hw/virtio/virtio.h" 19 #include "net/net.h" 20 #include "net/checksum.h" 21 #include "net/tap.h" 22 #include "qemu/error-report.h" 23 #include "qemu/timer.h" 24 #include "hw/virtio/virtio-net.h" 25 #include "net/vhost_net.h" 26 #include "net/announce.h" 27 #include "hw/virtio/virtio-bus.h" 28 #include "qapi/error.h" 29 #include "qapi/qapi-events-net.h" 30 #include "hw/qdev-properties.h" 31 #include "hw/virtio/virtio-access.h" 32 #include "migration/misc.h" 33 #include "standard-headers/linux/ethtool.h" 34 #include "sysemu/sysemu.h" 35 #include "trace.h" 36 37 #define VIRTIO_NET_VM_VERSION 11 38 39 #define MAC_TABLE_ENTRIES 64 40 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 41 42 /* previously fixed value */ 43 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 44 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 45 46 /* for now, only allow larger queues; with virtio-1, guest can downsize */ 47 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 48 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 49 50 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 51 52 #define VIRTIO_NET_TCP_FLAG 0x3F 53 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 54 55 /* IPv4 max payload, 16 bits in the header */ 56 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 57 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 58 59 /* header length value in ip header without option */ 60 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 61 62 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 63 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 64 65 /* Purge coalesced packets timer interval, This value affects the performance 66 a lot, and should be tuned carefully, '300000'(300us) is the recommended 67 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 68 tso/gso/gro 'off'. */ 69 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 70 71 /* temporary until standard header include it */ 72 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO) 73 74 #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */ 75 #define VIRTIO_NET_F_RSC_EXT 61 76 77 static inline __virtio16 *virtio_net_rsc_ext_num_packets( 78 struct virtio_net_hdr *hdr) 79 { 80 return &hdr->csum_start; 81 } 82 83 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( 84 struct virtio_net_hdr *hdr) 85 { 86 return &hdr->csum_offset; 87 } 88 89 #endif 90 91 static VirtIOFeature feature_sizes[] = { 92 {.flags = 1ULL << VIRTIO_NET_F_MAC, 93 .end = virtio_endof(struct virtio_net_config, mac)}, 94 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 95 .end = virtio_endof(struct virtio_net_config, status)}, 96 {.flags = 1ULL << VIRTIO_NET_F_MQ, 97 .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MTU, 99 .end = virtio_endof(struct virtio_net_config, mtu)}, 100 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 101 .end = virtio_endof(struct virtio_net_config, duplex)}, 102 {} 103 }; 104 105 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 106 { 107 VirtIONet *n = qemu_get_nic_opaque(nc); 108 109 return &n->vqs[nc->queue_index]; 110 } 111 112 static int vq2q(int queue_index) 113 { 114 return queue_index / 2; 115 } 116 117 /* TODO 118 * - we could suppress RX interrupt if we were so inclined. 119 */ 120 121 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 122 { 123 VirtIONet *n = VIRTIO_NET(vdev); 124 struct virtio_net_config netcfg; 125 126 virtio_stw_p(vdev, &netcfg.status, n->status); 127 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); 128 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 129 memcpy(netcfg.mac, n->mac, ETH_ALEN); 130 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 131 netcfg.duplex = n->net_conf.duplex; 132 memcpy(config, &netcfg, n->config_size); 133 } 134 135 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 136 { 137 VirtIONet *n = VIRTIO_NET(vdev); 138 struct virtio_net_config netcfg = {}; 139 140 memcpy(&netcfg, config, n->config_size); 141 142 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 143 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 144 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 145 memcpy(n->mac, netcfg.mac, ETH_ALEN); 146 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 147 } 148 } 149 150 static bool virtio_net_started(VirtIONet *n, uint8_t status) 151 { 152 VirtIODevice *vdev = VIRTIO_DEVICE(n); 153 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 154 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 155 } 156 157 static void virtio_net_announce_notify(VirtIONet *net) 158 { 159 VirtIODevice *vdev = VIRTIO_DEVICE(net); 160 trace_virtio_net_announce_notify(); 161 162 net->status |= VIRTIO_NET_S_ANNOUNCE; 163 virtio_notify_config(vdev); 164 } 165 166 static void virtio_net_announce_timer(void *opaque) 167 { 168 VirtIONet *n = opaque; 169 trace_virtio_net_announce_timer(n->announce_timer.round); 170 171 n->announce_timer.round--; 172 virtio_net_announce_notify(n); 173 } 174 175 static void virtio_net_announce(NetClientState *nc) 176 { 177 VirtIONet *n = qemu_get_nic_opaque(nc); 178 VirtIODevice *vdev = VIRTIO_DEVICE(n); 179 180 /* 181 * Make sure the virtio migration announcement timer isn't running 182 * If it is, let it trigger announcement so that we do not cause 183 * confusion. 184 */ 185 if (n->announce_timer.round) { 186 return; 187 } 188 189 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 190 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 191 virtio_net_announce_notify(n); 192 } 193 } 194 195 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 196 { 197 VirtIODevice *vdev = VIRTIO_DEVICE(n); 198 NetClientState *nc = qemu_get_queue(n->nic); 199 int queues = n->multiqueue ? n->max_queues : 1; 200 201 if (!get_vhost_net(nc->peer)) { 202 return; 203 } 204 205 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 206 !!n->vhost_started) { 207 return; 208 } 209 if (!n->vhost_started) { 210 int r, i; 211 212 if (n->needs_vnet_hdr_swap) { 213 error_report("backend does not support %s vnet headers; " 214 "falling back on userspace virtio", 215 virtio_is_big_endian(vdev) ? "BE" : "LE"); 216 return; 217 } 218 219 /* Any packets outstanding? Purge them to avoid touching rings 220 * when vhost is running. 221 */ 222 for (i = 0; i < queues; i++) { 223 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 224 225 /* Purge both directions: TX and RX. */ 226 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 227 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 228 } 229 230 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 231 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 232 if (r < 0) { 233 error_report("%uBytes MTU not supported by the backend", 234 n->net_conf.mtu); 235 236 return; 237 } 238 } 239 240 n->vhost_started = 1; 241 r = vhost_net_start(vdev, n->nic->ncs, queues); 242 if (r < 0) { 243 error_report("unable to start vhost net: %d: " 244 "falling back on userspace virtio", -r); 245 n->vhost_started = 0; 246 } 247 } else { 248 vhost_net_stop(vdev, n->nic->ncs, queues); 249 n->vhost_started = 0; 250 } 251 } 252 253 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 254 NetClientState *peer, 255 bool enable) 256 { 257 if (virtio_is_big_endian(vdev)) { 258 return qemu_set_vnet_be(peer, enable); 259 } else { 260 return qemu_set_vnet_le(peer, enable); 261 } 262 } 263 264 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 265 int queues, bool enable) 266 { 267 int i; 268 269 for (i = 0; i < queues; i++) { 270 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 271 enable) { 272 while (--i >= 0) { 273 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 274 } 275 276 return true; 277 } 278 } 279 280 return false; 281 } 282 283 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 284 { 285 VirtIODevice *vdev = VIRTIO_DEVICE(n); 286 int queues = n->multiqueue ? n->max_queues : 1; 287 288 if (virtio_net_started(n, status)) { 289 /* Before using the device, we tell the network backend about the 290 * endianness to use when parsing vnet headers. If the backend 291 * can't do it, we fallback onto fixing the headers in the core 292 * virtio-net code. 293 */ 294 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 295 queues, true); 296 } else if (virtio_net_started(n, vdev->status)) { 297 /* After using the device, we need to reset the network backend to 298 * the default (guest native endianness), otherwise the guest may 299 * lose network connectivity if it is rebooted into a different 300 * endianness. 301 */ 302 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false); 303 } 304 } 305 306 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 307 { 308 unsigned int dropped = virtqueue_drop_all(vq); 309 if (dropped) { 310 virtio_notify(vdev, vq); 311 } 312 } 313 314 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 315 { 316 VirtIONet *n = VIRTIO_NET(vdev); 317 VirtIONetQueue *q; 318 int i; 319 uint8_t queue_status; 320 321 virtio_net_vnet_endian_status(n, status); 322 virtio_net_vhost_status(n, status); 323 324 for (i = 0; i < n->max_queues; i++) { 325 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 326 bool queue_started; 327 q = &n->vqs[i]; 328 329 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) { 330 queue_status = 0; 331 } else { 332 queue_status = status; 333 } 334 queue_started = 335 virtio_net_started(n, queue_status) && !n->vhost_started; 336 337 if (queue_started) { 338 qemu_flush_queued_packets(ncs); 339 } 340 341 if (!q->tx_waiting) { 342 continue; 343 } 344 345 if (queue_started) { 346 if (q->tx_timer) { 347 timer_mod(q->tx_timer, 348 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 349 } else { 350 qemu_bh_schedule(q->tx_bh); 351 } 352 } else { 353 if (q->tx_timer) { 354 timer_del(q->tx_timer); 355 } else { 356 qemu_bh_cancel(q->tx_bh); 357 } 358 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 359 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 360 vdev->vm_running) { 361 /* if tx is waiting we are likely have some packets in tx queue 362 * and disabled notification */ 363 q->tx_waiting = 0; 364 virtio_queue_set_notification(q->tx_vq, 1); 365 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 366 } 367 } 368 } 369 } 370 371 static void virtio_net_set_link_status(NetClientState *nc) 372 { 373 VirtIONet *n = qemu_get_nic_opaque(nc); 374 VirtIODevice *vdev = VIRTIO_DEVICE(n); 375 uint16_t old_status = n->status; 376 377 if (nc->link_down) 378 n->status &= ~VIRTIO_NET_S_LINK_UP; 379 else 380 n->status |= VIRTIO_NET_S_LINK_UP; 381 382 if (n->status != old_status) 383 virtio_notify_config(vdev); 384 385 virtio_net_set_status(vdev, vdev->status); 386 } 387 388 static void rxfilter_notify(NetClientState *nc) 389 { 390 VirtIONet *n = qemu_get_nic_opaque(nc); 391 392 if (nc->rxfilter_notify_enabled) { 393 gchar *path = object_get_canonical_path(OBJECT(n->qdev)); 394 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 395 n->netclient_name, path); 396 g_free(path); 397 398 /* disable event notification to avoid events flooding */ 399 nc->rxfilter_notify_enabled = 0; 400 } 401 } 402 403 static intList *get_vlan_table(VirtIONet *n) 404 { 405 intList *list, *entry; 406 int i, j; 407 408 list = NULL; 409 for (i = 0; i < MAX_VLAN >> 5; i++) { 410 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 411 if (n->vlans[i] & (1U << j)) { 412 entry = g_malloc0(sizeof(*entry)); 413 entry->value = (i << 5) + j; 414 entry->next = list; 415 list = entry; 416 } 417 } 418 } 419 420 return list; 421 } 422 423 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 424 { 425 VirtIONet *n = qemu_get_nic_opaque(nc); 426 VirtIODevice *vdev = VIRTIO_DEVICE(n); 427 RxFilterInfo *info; 428 strList *str_list, *entry; 429 int i; 430 431 info = g_malloc0(sizeof(*info)); 432 info->name = g_strdup(nc->name); 433 info->promiscuous = n->promisc; 434 435 if (n->nouni) { 436 info->unicast = RX_STATE_NONE; 437 } else if (n->alluni) { 438 info->unicast = RX_STATE_ALL; 439 } else { 440 info->unicast = RX_STATE_NORMAL; 441 } 442 443 if (n->nomulti) { 444 info->multicast = RX_STATE_NONE; 445 } else if (n->allmulti) { 446 info->multicast = RX_STATE_ALL; 447 } else { 448 info->multicast = RX_STATE_NORMAL; 449 } 450 451 info->broadcast_allowed = n->nobcast; 452 info->multicast_overflow = n->mac_table.multi_overflow; 453 info->unicast_overflow = n->mac_table.uni_overflow; 454 455 info->main_mac = qemu_mac_strdup_printf(n->mac); 456 457 str_list = NULL; 458 for (i = 0; i < n->mac_table.first_multi; i++) { 459 entry = g_malloc0(sizeof(*entry)); 460 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); 461 entry->next = str_list; 462 str_list = entry; 463 } 464 info->unicast_table = str_list; 465 466 str_list = NULL; 467 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 468 entry = g_malloc0(sizeof(*entry)); 469 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); 470 entry->next = str_list; 471 str_list = entry; 472 } 473 info->multicast_table = str_list; 474 info->vlan_table = get_vlan_table(n); 475 476 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 477 info->vlan = RX_STATE_ALL; 478 } else if (!info->vlan_table) { 479 info->vlan = RX_STATE_NONE; 480 } else { 481 info->vlan = RX_STATE_NORMAL; 482 } 483 484 /* enable event notification after query */ 485 nc->rxfilter_notify_enabled = 1; 486 487 return info; 488 } 489 490 static void virtio_net_reset(VirtIODevice *vdev) 491 { 492 VirtIONet *n = VIRTIO_NET(vdev); 493 int i; 494 495 /* Reset back to compatibility mode */ 496 n->promisc = 1; 497 n->allmulti = 0; 498 n->alluni = 0; 499 n->nomulti = 0; 500 n->nouni = 0; 501 n->nobcast = 0; 502 /* multiqueue is disabled by default */ 503 n->curr_queues = 1; 504 timer_del(n->announce_timer.tm); 505 n->announce_timer.round = 0; 506 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 507 508 /* Flush any MAC and VLAN filter table state */ 509 n->mac_table.in_use = 0; 510 n->mac_table.first_multi = 0; 511 n->mac_table.multi_overflow = 0; 512 n->mac_table.uni_overflow = 0; 513 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 514 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 515 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 516 memset(n->vlans, 0, MAX_VLAN >> 3); 517 518 /* Flush any async TX */ 519 for (i = 0; i < n->max_queues; i++) { 520 NetClientState *nc = qemu_get_subqueue(n->nic, i); 521 522 if (nc->peer) { 523 qemu_flush_or_purge_queued_packets(nc->peer, true); 524 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 525 } 526 } 527 } 528 529 static void peer_test_vnet_hdr(VirtIONet *n) 530 { 531 NetClientState *nc = qemu_get_queue(n->nic); 532 if (!nc->peer) { 533 return; 534 } 535 536 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 537 } 538 539 static int peer_has_vnet_hdr(VirtIONet *n) 540 { 541 return n->has_vnet_hdr; 542 } 543 544 static int peer_has_ufo(VirtIONet *n) 545 { 546 if (!peer_has_vnet_hdr(n)) 547 return 0; 548 549 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 550 551 return n->has_ufo; 552 } 553 554 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 555 int version_1) 556 { 557 int i; 558 NetClientState *nc; 559 560 n->mergeable_rx_bufs = mergeable_rx_bufs; 561 562 if (version_1) { 563 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 564 } else { 565 n->guest_hdr_len = n->mergeable_rx_bufs ? 566 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 567 sizeof(struct virtio_net_hdr); 568 } 569 570 for (i = 0; i < n->max_queues; i++) { 571 nc = qemu_get_subqueue(n->nic, i); 572 573 if (peer_has_vnet_hdr(n) && 574 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 575 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 576 n->host_hdr_len = n->guest_hdr_len; 577 } 578 } 579 } 580 581 static int virtio_net_max_tx_queue_size(VirtIONet *n) 582 { 583 NetClientState *peer = n->nic_conf.peers.ncs[0]; 584 585 /* 586 * Backends other than vhost-user don't support max queue size. 587 */ 588 if (!peer) { 589 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 590 } 591 592 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) { 593 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 594 } 595 596 return VIRTQUEUE_MAX_SIZE; 597 } 598 599 static int peer_attach(VirtIONet *n, int index) 600 { 601 NetClientState *nc = qemu_get_subqueue(n->nic, index); 602 603 if (!nc->peer) { 604 return 0; 605 } 606 607 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 608 vhost_set_vring_enable(nc->peer, 1); 609 } 610 611 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 612 return 0; 613 } 614 615 if (n->max_queues == 1) { 616 return 0; 617 } 618 619 return tap_enable(nc->peer); 620 } 621 622 static int peer_detach(VirtIONet *n, int index) 623 { 624 NetClientState *nc = qemu_get_subqueue(n->nic, index); 625 626 if (!nc->peer) { 627 return 0; 628 } 629 630 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 631 vhost_set_vring_enable(nc->peer, 0); 632 } 633 634 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 635 return 0; 636 } 637 638 return tap_disable(nc->peer); 639 } 640 641 static void virtio_net_set_queues(VirtIONet *n) 642 { 643 int i; 644 int r; 645 646 if (n->nic->peer_deleted) { 647 return; 648 } 649 650 for (i = 0; i < n->max_queues; i++) { 651 if (i < n->curr_queues) { 652 r = peer_attach(n, i); 653 assert(!r); 654 } else { 655 r = peer_detach(n, i); 656 assert(!r); 657 } 658 } 659 } 660 661 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 662 663 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 664 Error **errp) 665 { 666 VirtIONet *n = VIRTIO_NET(vdev); 667 NetClientState *nc = qemu_get_queue(n->nic); 668 669 /* Firstly sync all virtio-net possible supported features */ 670 features |= n->host_features; 671 672 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 673 674 if (!peer_has_vnet_hdr(n)) { 675 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 676 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 677 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 678 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 679 680 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 681 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 682 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 683 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 684 } 685 686 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 687 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 688 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 689 } 690 691 if (!get_vhost_net(nc->peer)) { 692 return features; 693 } 694 695 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 696 vdev->backend_features = features; 697 698 if (n->mtu_bypass_backend && 699 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 700 features |= (1ULL << VIRTIO_NET_F_MTU); 701 } 702 703 return features; 704 } 705 706 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 707 { 708 uint64_t features = 0; 709 710 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 711 * but also these: */ 712 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 713 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 714 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 715 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 716 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 717 718 return features; 719 } 720 721 static void virtio_net_apply_guest_offloads(VirtIONet *n) 722 { 723 qemu_set_offload(qemu_get_queue(n->nic)->peer, 724 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 725 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 726 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 727 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 728 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 729 } 730 731 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 732 { 733 static const uint64_t guest_offloads_mask = 734 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 735 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 736 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 737 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 738 (1ULL << VIRTIO_NET_F_GUEST_UFO); 739 740 return guest_offloads_mask & features; 741 } 742 743 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 744 { 745 VirtIODevice *vdev = VIRTIO_DEVICE(n); 746 return virtio_net_guest_offloads_by_features(vdev->guest_features); 747 } 748 749 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 750 { 751 VirtIONet *n = VIRTIO_NET(vdev); 752 int i; 753 754 if (n->mtu_bypass_backend && 755 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 756 features &= ~(1ULL << VIRTIO_NET_F_MTU); 757 } 758 759 virtio_net_set_multiqueue(n, 760 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 761 762 virtio_net_set_mrg_rx_bufs(n, 763 virtio_has_feature(features, 764 VIRTIO_NET_F_MRG_RXBUF), 765 virtio_has_feature(features, 766 VIRTIO_F_VERSION_1)); 767 768 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 769 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 770 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 771 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 772 773 if (n->has_vnet_hdr) { 774 n->curr_guest_offloads = 775 virtio_net_guest_offloads_by_features(features); 776 virtio_net_apply_guest_offloads(n); 777 } 778 779 for (i = 0; i < n->max_queues; i++) { 780 NetClientState *nc = qemu_get_subqueue(n->nic, i); 781 782 if (!get_vhost_net(nc->peer)) { 783 continue; 784 } 785 vhost_net_ack_features(get_vhost_net(nc->peer), features); 786 } 787 788 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 789 memset(n->vlans, 0, MAX_VLAN >> 3); 790 } else { 791 memset(n->vlans, 0xff, MAX_VLAN >> 3); 792 } 793 } 794 795 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 796 struct iovec *iov, unsigned int iov_cnt) 797 { 798 uint8_t on; 799 size_t s; 800 NetClientState *nc = qemu_get_queue(n->nic); 801 802 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 803 if (s != sizeof(on)) { 804 return VIRTIO_NET_ERR; 805 } 806 807 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 808 n->promisc = on; 809 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 810 n->allmulti = on; 811 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 812 n->alluni = on; 813 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 814 n->nomulti = on; 815 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 816 n->nouni = on; 817 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 818 n->nobcast = on; 819 } else { 820 return VIRTIO_NET_ERR; 821 } 822 823 rxfilter_notify(nc); 824 825 return VIRTIO_NET_OK; 826 } 827 828 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 829 struct iovec *iov, unsigned int iov_cnt) 830 { 831 VirtIODevice *vdev = VIRTIO_DEVICE(n); 832 uint64_t offloads; 833 size_t s; 834 835 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 836 return VIRTIO_NET_ERR; 837 } 838 839 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 840 if (s != sizeof(offloads)) { 841 return VIRTIO_NET_ERR; 842 } 843 844 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 845 uint64_t supported_offloads; 846 847 offloads = virtio_ldq_p(vdev, &offloads); 848 849 if (!n->has_vnet_hdr) { 850 return VIRTIO_NET_ERR; 851 } 852 853 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 854 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 855 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 856 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 857 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 858 859 supported_offloads = virtio_net_supported_guest_offloads(n); 860 if (offloads & ~supported_offloads) { 861 return VIRTIO_NET_ERR; 862 } 863 864 n->curr_guest_offloads = offloads; 865 virtio_net_apply_guest_offloads(n); 866 867 return VIRTIO_NET_OK; 868 } else { 869 return VIRTIO_NET_ERR; 870 } 871 } 872 873 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 874 struct iovec *iov, unsigned int iov_cnt) 875 { 876 VirtIODevice *vdev = VIRTIO_DEVICE(n); 877 struct virtio_net_ctrl_mac mac_data; 878 size_t s; 879 NetClientState *nc = qemu_get_queue(n->nic); 880 881 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 882 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 883 return VIRTIO_NET_ERR; 884 } 885 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 886 assert(s == sizeof(n->mac)); 887 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 888 rxfilter_notify(nc); 889 890 return VIRTIO_NET_OK; 891 } 892 893 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 894 return VIRTIO_NET_ERR; 895 } 896 897 int in_use = 0; 898 int first_multi = 0; 899 uint8_t uni_overflow = 0; 900 uint8_t multi_overflow = 0; 901 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 902 903 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 904 sizeof(mac_data.entries)); 905 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 906 if (s != sizeof(mac_data.entries)) { 907 goto error; 908 } 909 iov_discard_front(&iov, &iov_cnt, s); 910 911 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 912 goto error; 913 } 914 915 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 916 s = iov_to_buf(iov, iov_cnt, 0, macs, 917 mac_data.entries * ETH_ALEN); 918 if (s != mac_data.entries * ETH_ALEN) { 919 goto error; 920 } 921 in_use += mac_data.entries; 922 } else { 923 uni_overflow = 1; 924 } 925 926 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 927 928 first_multi = in_use; 929 930 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 931 sizeof(mac_data.entries)); 932 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 933 if (s != sizeof(mac_data.entries)) { 934 goto error; 935 } 936 937 iov_discard_front(&iov, &iov_cnt, s); 938 939 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 940 goto error; 941 } 942 943 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 944 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 945 mac_data.entries * ETH_ALEN); 946 if (s != mac_data.entries * ETH_ALEN) { 947 goto error; 948 } 949 in_use += mac_data.entries; 950 } else { 951 multi_overflow = 1; 952 } 953 954 n->mac_table.in_use = in_use; 955 n->mac_table.first_multi = first_multi; 956 n->mac_table.uni_overflow = uni_overflow; 957 n->mac_table.multi_overflow = multi_overflow; 958 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 959 g_free(macs); 960 rxfilter_notify(nc); 961 962 return VIRTIO_NET_OK; 963 964 error: 965 g_free(macs); 966 return VIRTIO_NET_ERR; 967 } 968 969 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 970 struct iovec *iov, unsigned int iov_cnt) 971 { 972 VirtIODevice *vdev = VIRTIO_DEVICE(n); 973 uint16_t vid; 974 size_t s; 975 NetClientState *nc = qemu_get_queue(n->nic); 976 977 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 978 vid = virtio_lduw_p(vdev, &vid); 979 if (s != sizeof(vid)) { 980 return VIRTIO_NET_ERR; 981 } 982 983 if (vid >= MAX_VLAN) 984 return VIRTIO_NET_ERR; 985 986 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 987 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 988 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 989 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 990 else 991 return VIRTIO_NET_ERR; 992 993 rxfilter_notify(nc); 994 995 return VIRTIO_NET_OK; 996 } 997 998 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 999 struct iovec *iov, unsigned int iov_cnt) 1000 { 1001 trace_virtio_net_handle_announce(n->announce_timer.round); 1002 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1003 n->status & VIRTIO_NET_S_ANNOUNCE) { 1004 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1005 if (n->announce_timer.round) { 1006 qemu_announce_timer_step(&n->announce_timer); 1007 } 1008 return VIRTIO_NET_OK; 1009 } else { 1010 return VIRTIO_NET_ERR; 1011 } 1012 } 1013 1014 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1015 struct iovec *iov, unsigned int iov_cnt) 1016 { 1017 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1018 struct virtio_net_ctrl_mq mq; 1019 size_t s; 1020 uint16_t queues; 1021 1022 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1023 if (s != sizeof(mq)) { 1024 return VIRTIO_NET_ERR; 1025 } 1026 1027 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1028 return VIRTIO_NET_ERR; 1029 } 1030 1031 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1032 1033 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1034 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1035 queues > n->max_queues || 1036 !n->multiqueue) { 1037 return VIRTIO_NET_ERR; 1038 } 1039 1040 n->curr_queues = queues; 1041 /* stop the backend before changing the number of queues to avoid handling a 1042 * disabled queue */ 1043 virtio_net_set_status(vdev, vdev->status); 1044 virtio_net_set_queues(n); 1045 1046 return VIRTIO_NET_OK; 1047 } 1048 1049 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1050 { 1051 VirtIONet *n = VIRTIO_NET(vdev); 1052 struct virtio_net_ctrl_hdr ctrl; 1053 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1054 VirtQueueElement *elem; 1055 size_t s; 1056 struct iovec *iov, *iov2; 1057 unsigned int iov_cnt; 1058 1059 for (;;) { 1060 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1061 if (!elem) { 1062 break; 1063 } 1064 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || 1065 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { 1066 virtio_error(vdev, "virtio-net ctrl missing headers"); 1067 virtqueue_detach_element(vq, elem, 0); 1068 g_free(elem); 1069 break; 1070 } 1071 1072 iov_cnt = elem->out_num; 1073 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); 1074 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); 1075 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); 1076 if (s != sizeof(ctrl)) { 1077 status = VIRTIO_NET_ERR; 1078 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1079 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); 1080 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1081 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); 1082 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1083 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); 1084 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1085 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); 1086 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1087 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); 1088 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1089 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); 1090 } 1091 1092 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); 1093 assert(s == sizeof(status)); 1094 1095 virtqueue_push(vq, elem, sizeof(status)); 1096 virtio_notify(vdev, vq); 1097 g_free(iov2); 1098 g_free(elem); 1099 } 1100 } 1101 1102 /* RX */ 1103 1104 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1105 { 1106 VirtIONet *n = VIRTIO_NET(vdev); 1107 int queue_index = vq2q(virtio_get_queue_index(vq)); 1108 1109 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1110 } 1111 1112 static int virtio_net_can_receive(NetClientState *nc) 1113 { 1114 VirtIONet *n = qemu_get_nic_opaque(nc); 1115 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1116 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1117 1118 if (!vdev->vm_running) { 1119 return 0; 1120 } 1121 1122 if (nc->queue_index >= n->curr_queues) { 1123 return 0; 1124 } 1125 1126 if (!virtio_queue_ready(q->rx_vq) || 1127 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1128 return 0; 1129 } 1130 1131 return 1; 1132 } 1133 1134 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1135 { 1136 VirtIONet *n = q->n; 1137 if (virtio_queue_empty(q->rx_vq) || 1138 (n->mergeable_rx_bufs && 1139 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1140 virtio_queue_set_notification(q->rx_vq, 1); 1141 1142 /* To avoid a race condition where the guest has made some buffers 1143 * available after the above check but before notification was 1144 * enabled, check for available buffers again. 1145 */ 1146 if (virtio_queue_empty(q->rx_vq) || 1147 (n->mergeable_rx_bufs && 1148 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1149 return 0; 1150 } 1151 } 1152 1153 virtio_queue_set_notification(q->rx_vq, 0); 1154 return 1; 1155 } 1156 1157 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1158 { 1159 virtio_tswap16s(vdev, &hdr->hdr_len); 1160 virtio_tswap16s(vdev, &hdr->gso_size); 1161 virtio_tswap16s(vdev, &hdr->csum_start); 1162 virtio_tswap16s(vdev, &hdr->csum_offset); 1163 } 1164 1165 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1166 * it never finds out that the packets don't have valid checksums. This 1167 * causes dhclient to get upset. Fedora's carried a patch for ages to 1168 * fix this with Xen but it hasn't appeared in an upstream release of 1169 * dhclient yet. 1170 * 1171 * To avoid breaking existing guests, we catch udp packets and add 1172 * checksums. This is terrible but it's better than hacking the guest 1173 * kernels. 1174 * 1175 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1176 * we should provide a mechanism to disable it to avoid polluting the host 1177 * cache. 1178 */ 1179 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1180 uint8_t *buf, size_t size) 1181 { 1182 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1183 (size > 27 && size < 1500) && /* normal sized MTU */ 1184 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1185 (buf[23] == 17) && /* ip.protocol == UDP */ 1186 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1187 net_checksum_calculate(buf, size); 1188 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1189 } 1190 } 1191 1192 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1193 const void *buf, size_t size) 1194 { 1195 if (n->has_vnet_hdr) { 1196 /* FIXME this cast is evil */ 1197 void *wbuf = (void *)buf; 1198 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1199 size - n->host_hdr_len); 1200 1201 if (n->needs_vnet_hdr_swap) { 1202 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1203 } 1204 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1205 } else { 1206 struct virtio_net_hdr hdr = { 1207 .flags = 0, 1208 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1209 }; 1210 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1211 } 1212 } 1213 1214 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1215 { 1216 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1217 static const uint8_t vlan[] = {0x81, 0x00}; 1218 uint8_t *ptr = (uint8_t *)buf; 1219 int i; 1220 1221 if (n->promisc) 1222 return 1; 1223 1224 ptr += n->host_hdr_len; 1225 1226 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1227 int vid = lduw_be_p(ptr + 14) & 0xfff; 1228 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1229 return 0; 1230 } 1231 1232 if (ptr[0] & 1) { // multicast 1233 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1234 return !n->nobcast; 1235 } else if (n->nomulti) { 1236 return 0; 1237 } else if (n->allmulti || n->mac_table.multi_overflow) { 1238 return 1; 1239 } 1240 1241 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1242 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1243 return 1; 1244 } 1245 } 1246 } else { // unicast 1247 if (n->nouni) { 1248 return 0; 1249 } else if (n->alluni || n->mac_table.uni_overflow) { 1250 return 1; 1251 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1252 return 1; 1253 } 1254 1255 for (i = 0; i < n->mac_table.first_multi; i++) { 1256 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1257 return 1; 1258 } 1259 } 1260 } 1261 1262 return 0; 1263 } 1264 1265 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1266 size_t size) 1267 { 1268 VirtIONet *n = qemu_get_nic_opaque(nc); 1269 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1270 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1271 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1272 struct virtio_net_hdr_mrg_rxbuf mhdr; 1273 unsigned mhdr_cnt = 0; 1274 size_t offset, i, guest_offset; 1275 1276 if (!virtio_net_can_receive(nc)) { 1277 return -1; 1278 } 1279 1280 /* hdr_len refers to the header we supply to the guest */ 1281 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1282 return 0; 1283 } 1284 1285 if (!receive_filter(n, buf, size)) 1286 return size; 1287 1288 offset = i = 0; 1289 1290 while (offset < size) { 1291 VirtQueueElement *elem; 1292 int len, total; 1293 const struct iovec *sg; 1294 1295 total = 0; 1296 1297 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1298 if (!elem) { 1299 if (i) { 1300 virtio_error(vdev, "virtio-net unexpected empty queue: " 1301 "i %zd mergeable %d offset %zd, size %zd, " 1302 "guest hdr len %zd, host hdr len %zd " 1303 "guest features 0x%" PRIx64, 1304 i, n->mergeable_rx_bufs, offset, size, 1305 n->guest_hdr_len, n->host_hdr_len, 1306 vdev->guest_features); 1307 } 1308 return -1; 1309 } 1310 1311 if (elem->in_num < 1) { 1312 virtio_error(vdev, 1313 "virtio-net receive queue contains no in buffers"); 1314 virtqueue_detach_element(q->rx_vq, elem, 0); 1315 g_free(elem); 1316 return -1; 1317 } 1318 1319 sg = elem->in_sg; 1320 if (i == 0) { 1321 assert(offset == 0); 1322 if (n->mergeable_rx_bufs) { 1323 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1324 sg, elem->in_num, 1325 offsetof(typeof(mhdr), num_buffers), 1326 sizeof(mhdr.num_buffers)); 1327 } 1328 1329 receive_header(n, sg, elem->in_num, buf, size); 1330 offset = n->host_hdr_len; 1331 total += n->guest_hdr_len; 1332 guest_offset = n->guest_hdr_len; 1333 } else { 1334 guest_offset = 0; 1335 } 1336 1337 /* copy in packet. ugh */ 1338 len = iov_from_buf(sg, elem->in_num, guest_offset, 1339 buf + offset, size - offset); 1340 total += len; 1341 offset += len; 1342 /* If buffers can't be merged, at this point we 1343 * must have consumed the complete packet. 1344 * Otherwise, drop it. */ 1345 if (!n->mergeable_rx_bufs && offset < size) { 1346 virtqueue_unpop(q->rx_vq, elem, total); 1347 g_free(elem); 1348 return size; 1349 } 1350 1351 /* signal other side */ 1352 virtqueue_fill(q->rx_vq, elem, total, i++); 1353 g_free(elem); 1354 } 1355 1356 if (mhdr_cnt) { 1357 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1358 iov_from_buf(mhdr_sg, mhdr_cnt, 1359 0, 1360 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1361 } 1362 1363 virtqueue_flush(q->rx_vq, i); 1364 virtio_notify(vdev, q->rx_vq); 1365 1366 return size; 1367 } 1368 1369 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1370 size_t size) 1371 { 1372 ssize_t r; 1373 1374 rcu_read_lock(); 1375 r = virtio_net_receive_rcu(nc, buf, size); 1376 rcu_read_unlock(); 1377 return r; 1378 } 1379 1380 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1381 const uint8_t *buf, 1382 VirtioNetRscUnit *unit) 1383 { 1384 uint16_t ip_hdrlen; 1385 struct ip_header *ip; 1386 1387 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1388 + sizeof(struct eth_header)); 1389 unit->ip = (void *)ip; 1390 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1391 unit->ip_plen = &ip->ip_len; 1392 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1393 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1394 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1395 } 1396 1397 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1398 const uint8_t *buf, 1399 VirtioNetRscUnit *unit) 1400 { 1401 struct ip6_header *ip6; 1402 1403 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1404 + sizeof(struct eth_header)); 1405 unit->ip = ip6; 1406 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1407 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\ 1408 + sizeof(struct ip6_header)); 1409 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1410 1411 /* There is a difference between payload lenght in ipv4 and v6, 1412 ip header is excluded in ipv6 */ 1413 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1414 } 1415 1416 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1417 VirtioNetRscSeg *seg) 1418 { 1419 int ret; 1420 struct virtio_net_hdr *h; 1421 1422 h = (struct virtio_net_hdr *)seg->buf; 1423 h->flags = 0; 1424 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1425 1426 if (seg->is_coalesced) { 1427 *virtio_net_rsc_ext_num_packets(h) = seg->packets; 1428 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack; 1429 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1430 if (chain->proto == ETH_P_IP) { 1431 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1432 } else { 1433 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1434 } 1435 } 1436 1437 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1438 QTAILQ_REMOVE(&chain->buffers, seg, next); 1439 g_free(seg->buf); 1440 g_free(seg); 1441 1442 return ret; 1443 } 1444 1445 static void virtio_net_rsc_purge(void *opq) 1446 { 1447 VirtioNetRscSeg *seg, *rn; 1448 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1449 1450 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1451 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1452 chain->stat.purge_failed++; 1453 continue; 1454 } 1455 } 1456 1457 chain->stat.timer++; 1458 if (!QTAILQ_EMPTY(&chain->buffers)) { 1459 timer_mod(chain->drain_timer, 1460 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1461 } 1462 } 1463 1464 static void virtio_net_rsc_cleanup(VirtIONet *n) 1465 { 1466 VirtioNetRscChain *chain, *rn_chain; 1467 VirtioNetRscSeg *seg, *rn_seg; 1468 1469 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 1470 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 1471 QTAILQ_REMOVE(&chain->buffers, seg, next); 1472 g_free(seg->buf); 1473 g_free(seg); 1474 } 1475 1476 timer_del(chain->drain_timer); 1477 timer_free(chain->drain_timer); 1478 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 1479 g_free(chain); 1480 } 1481 } 1482 1483 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 1484 NetClientState *nc, 1485 const uint8_t *buf, size_t size) 1486 { 1487 uint16_t hdr_len; 1488 VirtioNetRscSeg *seg; 1489 1490 hdr_len = chain->n->guest_hdr_len; 1491 seg = g_malloc(sizeof(VirtioNetRscSeg)); 1492 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 1493 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 1494 memcpy(seg->buf, buf, size); 1495 seg->size = size; 1496 seg->packets = 1; 1497 seg->dup_ack = 0; 1498 seg->is_coalesced = 0; 1499 seg->nc = nc; 1500 1501 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 1502 chain->stat.cache++; 1503 1504 switch (chain->proto) { 1505 case ETH_P_IP: 1506 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 1507 break; 1508 case ETH_P_IPV6: 1509 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 1510 break; 1511 default: 1512 g_assert_not_reached(); 1513 } 1514 } 1515 1516 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 1517 VirtioNetRscSeg *seg, 1518 const uint8_t *buf, 1519 struct tcp_header *n_tcp, 1520 struct tcp_header *o_tcp) 1521 { 1522 uint32_t nack, oack; 1523 uint16_t nwin, owin; 1524 1525 nack = htonl(n_tcp->th_ack); 1526 nwin = htons(n_tcp->th_win); 1527 oack = htonl(o_tcp->th_ack); 1528 owin = htons(o_tcp->th_win); 1529 1530 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 1531 chain->stat.ack_out_of_win++; 1532 return RSC_FINAL; 1533 } else if (nack == oack) { 1534 /* duplicated ack or window probe */ 1535 if (nwin == owin) { 1536 /* duplicated ack, add dup ack count due to whql test up to 1 */ 1537 chain->stat.dup_ack++; 1538 return RSC_FINAL; 1539 } else { 1540 /* Coalesce window update */ 1541 o_tcp->th_win = n_tcp->th_win; 1542 chain->stat.win_update++; 1543 return RSC_COALESCE; 1544 } 1545 } else { 1546 /* pure ack, go to 'C', finalize*/ 1547 chain->stat.pure_ack++; 1548 return RSC_FINAL; 1549 } 1550 } 1551 1552 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 1553 VirtioNetRscSeg *seg, 1554 const uint8_t *buf, 1555 VirtioNetRscUnit *n_unit) 1556 { 1557 void *data; 1558 uint16_t o_ip_len; 1559 uint32_t nseq, oseq; 1560 VirtioNetRscUnit *o_unit; 1561 1562 o_unit = &seg->unit; 1563 o_ip_len = htons(*o_unit->ip_plen); 1564 nseq = htonl(n_unit->tcp->th_seq); 1565 oseq = htonl(o_unit->tcp->th_seq); 1566 1567 /* out of order or retransmitted. */ 1568 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 1569 chain->stat.data_out_of_win++; 1570 return RSC_FINAL; 1571 } 1572 1573 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 1574 if (nseq == oseq) { 1575 if ((o_unit->payload == 0) && n_unit->payload) { 1576 /* From no payload to payload, normal case, not a dup ack or etc */ 1577 chain->stat.data_after_pure_ack++; 1578 goto coalesce; 1579 } else { 1580 return virtio_net_rsc_handle_ack(chain, seg, buf, 1581 n_unit->tcp, o_unit->tcp); 1582 } 1583 } else if ((nseq - oseq) != o_unit->payload) { 1584 /* Not a consistent packet, out of order */ 1585 chain->stat.data_out_of_order++; 1586 return RSC_FINAL; 1587 } else { 1588 coalesce: 1589 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 1590 chain->stat.over_size++; 1591 return RSC_FINAL; 1592 } 1593 1594 /* Here comes the right data, the payload length in v4/v6 is different, 1595 so use the field value to update and record the new data len */ 1596 o_unit->payload += n_unit->payload; /* update new data len */ 1597 1598 /* update field in ip header */ 1599 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 1600 1601 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 1602 for windows guest, while this may change the behavior for linux 1603 guest (only if it uses RSC feature). */ 1604 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 1605 1606 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 1607 o_unit->tcp->th_win = n_unit->tcp->th_win; 1608 1609 memmove(seg->buf + seg->size, data, n_unit->payload); 1610 seg->size += n_unit->payload; 1611 seg->packets++; 1612 chain->stat.coalesced++; 1613 return RSC_COALESCE; 1614 } 1615 } 1616 1617 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 1618 VirtioNetRscSeg *seg, 1619 const uint8_t *buf, size_t size, 1620 VirtioNetRscUnit *unit) 1621 { 1622 struct ip_header *ip1, *ip2; 1623 1624 ip1 = (struct ip_header *)(unit->ip); 1625 ip2 = (struct ip_header *)(seg->unit.ip); 1626 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 1627 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 1628 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 1629 chain->stat.no_match++; 1630 return RSC_NO_MATCH; 1631 } 1632 1633 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 1634 } 1635 1636 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 1637 VirtioNetRscSeg *seg, 1638 const uint8_t *buf, size_t size, 1639 VirtioNetRscUnit *unit) 1640 { 1641 struct ip6_header *ip1, *ip2; 1642 1643 ip1 = (struct ip6_header *)(unit->ip); 1644 ip2 = (struct ip6_header *)(seg->unit.ip); 1645 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 1646 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 1647 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 1648 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 1649 chain->stat.no_match++; 1650 return RSC_NO_MATCH; 1651 } 1652 1653 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 1654 } 1655 1656 /* Packets with 'SYN' should bypass, other flag should be sent after drain 1657 * to prevent out of order */ 1658 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 1659 struct tcp_header *tcp) 1660 { 1661 uint16_t tcp_hdr; 1662 uint16_t tcp_flag; 1663 1664 tcp_flag = htons(tcp->th_offset_flags); 1665 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 1666 tcp_flag &= VIRTIO_NET_TCP_FLAG; 1667 tcp_flag = htons(tcp->th_offset_flags) & 0x3F; 1668 if (tcp_flag & TH_SYN) { 1669 chain->stat.tcp_syn++; 1670 return RSC_BYPASS; 1671 } 1672 1673 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 1674 chain->stat.tcp_ctrl_drain++; 1675 return RSC_FINAL; 1676 } 1677 1678 if (tcp_hdr > sizeof(struct tcp_header)) { 1679 chain->stat.tcp_all_opt++; 1680 return RSC_FINAL; 1681 } 1682 1683 return RSC_CANDIDATE; 1684 } 1685 1686 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 1687 NetClientState *nc, 1688 const uint8_t *buf, size_t size, 1689 VirtioNetRscUnit *unit) 1690 { 1691 int ret; 1692 VirtioNetRscSeg *seg, *nseg; 1693 1694 if (QTAILQ_EMPTY(&chain->buffers)) { 1695 chain->stat.empty_cache++; 1696 virtio_net_rsc_cache_buf(chain, nc, buf, size); 1697 timer_mod(chain->drain_timer, 1698 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1699 return size; 1700 } 1701 1702 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 1703 if (chain->proto == ETH_P_IP) { 1704 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 1705 } else { 1706 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 1707 } 1708 1709 if (ret == RSC_FINAL) { 1710 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1711 /* Send failed */ 1712 chain->stat.final_failed++; 1713 return 0; 1714 } 1715 1716 /* Send current packet */ 1717 return virtio_net_do_receive(nc, buf, size); 1718 } else if (ret == RSC_NO_MATCH) { 1719 continue; 1720 } else { 1721 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 1722 seg->is_coalesced = 1; 1723 return size; 1724 } 1725 } 1726 1727 chain->stat.no_match_cache++; 1728 virtio_net_rsc_cache_buf(chain, nc, buf, size); 1729 return size; 1730 } 1731 1732 /* Drain a connection data, this is to avoid out of order segments */ 1733 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 1734 NetClientState *nc, 1735 const uint8_t *buf, size_t size, 1736 uint16_t ip_start, uint16_t ip_size, 1737 uint16_t tcp_port) 1738 { 1739 VirtioNetRscSeg *seg, *nseg; 1740 uint32_t ppair1, ppair2; 1741 1742 ppair1 = *(uint32_t *)(buf + tcp_port); 1743 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 1744 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 1745 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 1746 || (ppair1 != ppair2)) { 1747 continue; 1748 } 1749 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1750 chain->stat.drain_failed++; 1751 } 1752 1753 break; 1754 } 1755 1756 return virtio_net_do_receive(nc, buf, size); 1757 } 1758 1759 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 1760 struct ip_header *ip, 1761 const uint8_t *buf, size_t size) 1762 { 1763 uint16_t ip_len; 1764 1765 /* Not an ipv4 packet */ 1766 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 1767 chain->stat.ip_option++; 1768 return RSC_BYPASS; 1769 } 1770 1771 /* Don't handle packets with ip option */ 1772 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 1773 chain->stat.ip_option++; 1774 return RSC_BYPASS; 1775 } 1776 1777 if (ip->ip_p != IPPROTO_TCP) { 1778 chain->stat.bypass_not_tcp++; 1779 return RSC_BYPASS; 1780 } 1781 1782 /* Don't handle packets with ip fragment */ 1783 if (!(htons(ip->ip_off) & IP_DF)) { 1784 chain->stat.ip_frag++; 1785 return RSC_BYPASS; 1786 } 1787 1788 /* Don't handle packets with ecn flag */ 1789 if (IPTOS_ECN(ip->ip_tos)) { 1790 chain->stat.ip_ecn++; 1791 return RSC_BYPASS; 1792 } 1793 1794 ip_len = htons(ip->ip_len); 1795 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 1796 || ip_len > (size - chain->n->guest_hdr_len - 1797 sizeof(struct eth_header))) { 1798 chain->stat.ip_hacked++; 1799 return RSC_BYPASS; 1800 } 1801 1802 return RSC_CANDIDATE; 1803 } 1804 1805 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 1806 NetClientState *nc, 1807 const uint8_t *buf, size_t size) 1808 { 1809 int32_t ret; 1810 uint16_t hdr_len; 1811 VirtioNetRscUnit unit; 1812 1813 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 1814 1815 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 1816 + sizeof(struct tcp_header))) { 1817 chain->stat.bypass_not_tcp++; 1818 return virtio_net_do_receive(nc, buf, size); 1819 } 1820 1821 virtio_net_rsc_extract_unit4(chain, buf, &unit); 1822 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 1823 != RSC_CANDIDATE) { 1824 return virtio_net_do_receive(nc, buf, size); 1825 } 1826 1827 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 1828 if (ret == RSC_BYPASS) { 1829 return virtio_net_do_receive(nc, buf, size); 1830 } else if (ret == RSC_FINAL) { 1831 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 1832 ((hdr_len + sizeof(struct eth_header)) + 12), 1833 VIRTIO_NET_IP4_ADDR_SIZE, 1834 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 1835 } 1836 1837 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 1838 } 1839 1840 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 1841 struct ip6_header *ip6, 1842 const uint8_t *buf, size_t size) 1843 { 1844 uint16_t ip_len; 1845 1846 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 1847 != IP_HEADER_VERSION_6) { 1848 return RSC_BYPASS; 1849 } 1850 1851 /* Both option and protocol is checked in this */ 1852 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 1853 chain->stat.bypass_not_tcp++; 1854 return RSC_BYPASS; 1855 } 1856 1857 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1858 if (ip_len < sizeof(struct tcp_header) || 1859 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 1860 - sizeof(struct ip6_header))) { 1861 chain->stat.ip_hacked++; 1862 return RSC_BYPASS; 1863 } 1864 1865 /* Don't handle packets with ecn flag */ 1866 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 1867 chain->stat.ip_ecn++; 1868 return RSC_BYPASS; 1869 } 1870 1871 return RSC_CANDIDATE; 1872 } 1873 1874 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 1875 const uint8_t *buf, size_t size) 1876 { 1877 int32_t ret; 1878 uint16_t hdr_len; 1879 VirtioNetRscChain *chain; 1880 VirtioNetRscUnit unit; 1881 1882 chain = (VirtioNetRscChain *)opq; 1883 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 1884 1885 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 1886 + sizeof(tcp_header))) { 1887 return virtio_net_do_receive(nc, buf, size); 1888 } 1889 1890 virtio_net_rsc_extract_unit6(chain, buf, &unit); 1891 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 1892 unit.ip, buf, size)) { 1893 return virtio_net_do_receive(nc, buf, size); 1894 } 1895 1896 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 1897 if (ret == RSC_BYPASS) { 1898 return virtio_net_do_receive(nc, buf, size); 1899 } else if (ret == RSC_FINAL) { 1900 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 1901 ((hdr_len + sizeof(struct eth_header)) + 8), 1902 VIRTIO_NET_IP6_ADDR_SIZE, 1903 hdr_len + sizeof(struct eth_header) 1904 + sizeof(struct ip6_header)); 1905 } 1906 1907 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 1908 } 1909 1910 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 1911 NetClientState *nc, 1912 uint16_t proto) 1913 { 1914 VirtioNetRscChain *chain; 1915 1916 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 1917 return NULL; 1918 } 1919 1920 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 1921 if (chain->proto == proto) { 1922 return chain; 1923 } 1924 } 1925 1926 chain = g_malloc(sizeof(*chain)); 1927 chain->n = n; 1928 chain->proto = proto; 1929 if (proto == (uint16_t)ETH_P_IP) { 1930 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 1931 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1932 } else { 1933 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 1934 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1935 } 1936 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 1937 virtio_net_rsc_purge, chain); 1938 memset(&chain->stat, 0, sizeof(chain->stat)); 1939 1940 QTAILQ_INIT(&chain->buffers); 1941 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 1942 1943 return chain; 1944 } 1945 1946 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 1947 const uint8_t *buf, 1948 size_t size) 1949 { 1950 uint16_t proto; 1951 VirtioNetRscChain *chain; 1952 struct eth_header *eth; 1953 VirtIONet *n; 1954 1955 n = qemu_get_nic_opaque(nc); 1956 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 1957 return virtio_net_do_receive(nc, buf, size); 1958 } 1959 1960 eth = (struct eth_header *)(buf + n->guest_hdr_len); 1961 proto = htons(eth->h_proto); 1962 1963 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 1964 if (chain) { 1965 chain->stat.received++; 1966 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 1967 return virtio_net_rsc_receive4(chain, nc, buf, size); 1968 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 1969 return virtio_net_rsc_receive6(chain, nc, buf, size); 1970 } 1971 } 1972 return virtio_net_do_receive(nc, buf, size); 1973 } 1974 1975 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 1976 size_t size) 1977 { 1978 VirtIONet *n = qemu_get_nic_opaque(nc); 1979 if ((n->rsc4_enabled || n->rsc6_enabled)) { 1980 return virtio_net_rsc_receive(nc, buf, size); 1981 } else { 1982 return virtio_net_do_receive(nc, buf, size); 1983 } 1984 } 1985 1986 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 1987 1988 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 1989 { 1990 VirtIONet *n = qemu_get_nic_opaque(nc); 1991 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1992 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1993 1994 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 1995 virtio_notify(vdev, q->tx_vq); 1996 1997 g_free(q->async_tx.elem); 1998 q->async_tx.elem = NULL; 1999 2000 virtio_queue_set_notification(q->tx_vq, 1); 2001 virtio_net_flush_tx(q); 2002 } 2003 2004 /* TX */ 2005 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2006 { 2007 VirtIONet *n = q->n; 2008 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2009 VirtQueueElement *elem; 2010 int32_t num_packets = 0; 2011 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2012 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2013 return num_packets; 2014 } 2015 2016 if (q->async_tx.elem) { 2017 virtio_queue_set_notification(q->tx_vq, 0); 2018 return num_packets; 2019 } 2020 2021 for (;;) { 2022 ssize_t ret; 2023 unsigned int out_num; 2024 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2025 struct virtio_net_hdr_mrg_rxbuf mhdr; 2026 2027 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2028 if (!elem) { 2029 break; 2030 } 2031 2032 out_num = elem->out_num; 2033 out_sg = elem->out_sg; 2034 if (out_num < 1) { 2035 virtio_error(vdev, "virtio-net header not in first element"); 2036 virtqueue_detach_element(q->tx_vq, elem, 0); 2037 g_free(elem); 2038 return -EINVAL; 2039 } 2040 2041 if (n->has_vnet_hdr) { 2042 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2043 n->guest_hdr_len) { 2044 virtio_error(vdev, "virtio-net header incorrect"); 2045 virtqueue_detach_element(q->tx_vq, elem, 0); 2046 g_free(elem); 2047 return -EINVAL; 2048 } 2049 if (n->needs_vnet_hdr_swap) { 2050 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2051 sg2[0].iov_base = &mhdr; 2052 sg2[0].iov_len = n->guest_hdr_len; 2053 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2054 out_sg, out_num, 2055 n->guest_hdr_len, -1); 2056 if (out_num == VIRTQUEUE_MAX_SIZE) { 2057 goto drop; 2058 } 2059 out_num += 1; 2060 out_sg = sg2; 2061 } 2062 } 2063 /* 2064 * If host wants to see the guest header as is, we can 2065 * pass it on unchanged. Otherwise, copy just the parts 2066 * that host is interested in. 2067 */ 2068 assert(n->host_hdr_len <= n->guest_hdr_len); 2069 if (n->host_hdr_len != n->guest_hdr_len) { 2070 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2071 out_sg, out_num, 2072 0, n->host_hdr_len); 2073 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2074 out_sg, out_num, 2075 n->guest_hdr_len, -1); 2076 out_num = sg_num; 2077 out_sg = sg; 2078 } 2079 2080 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2081 out_sg, out_num, virtio_net_tx_complete); 2082 if (ret == 0) { 2083 virtio_queue_set_notification(q->tx_vq, 0); 2084 q->async_tx.elem = elem; 2085 return -EBUSY; 2086 } 2087 2088 drop: 2089 virtqueue_push(q->tx_vq, elem, 0); 2090 virtio_notify(vdev, q->tx_vq); 2091 g_free(elem); 2092 2093 if (++num_packets >= n->tx_burst) { 2094 break; 2095 } 2096 } 2097 return num_packets; 2098 } 2099 2100 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2101 { 2102 VirtIONet *n = VIRTIO_NET(vdev); 2103 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2104 2105 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2106 virtio_net_drop_tx_queue_data(vdev, vq); 2107 return; 2108 } 2109 2110 /* This happens when device was stopped but VCPU wasn't. */ 2111 if (!vdev->vm_running) { 2112 q->tx_waiting = 1; 2113 return; 2114 } 2115 2116 if (q->tx_waiting) { 2117 virtio_queue_set_notification(vq, 1); 2118 timer_del(q->tx_timer); 2119 q->tx_waiting = 0; 2120 if (virtio_net_flush_tx(q) == -EINVAL) { 2121 return; 2122 } 2123 } else { 2124 timer_mod(q->tx_timer, 2125 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2126 q->tx_waiting = 1; 2127 virtio_queue_set_notification(vq, 0); 2128 } 2129 } 2130 2131 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2132 { 2133 VirtIONet *n = VIRTIO_NET(vdev); 2134 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2135 2136 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2137 virtio_net_drop_tx_queue_data(vdev, vq); 2138 return; 2139 } 2140 2141 if (unlikely(q->tx_waiting)) { 2142 return; 2143 } 2144 q->tx_waiting = 1; 2145 /* This happens when device was stopped but VCPU wasn't. */ 2146 if (!vdev->vm_running) { 2147 return; 2148 } 2149 virtio_queue_set_notification(vq, 0); 2150 qemu_bh_schedule(q->tx_bh); 2151 } 2152 2153 static void virtio_net_tx_timer(void *opaque) 2154 { 2155 VirtIONetQueue *q = opaque; 2156 VirtIONet *n = q->n; 2157 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2158 /* This happens when device was stopped but BH wasn't. */ 2159 if (!vdev->vm_running) { 2160 /* Make sure tx waiting is set, so we'll run when restarted. */ 2161 assert(q->tx_waiting); 2162 return; 2163 } 2164 2165 q->tx_waiting = 0; 2166 2167 /* Just in case the driver is not ready on more */ 2168 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2169 return; 2170 } 2171 2172 virtio_queue_set_notification(q->tx_vq, 1); 2173 virtio_net_flush_tx(q); 2174 } 2175 2176 static void virtio_net_tx_bh(void *opaque) 2177 { 2178 VirtIONetQueue *q = opaque; 2179 VirtIONet *n = q->n; 2180 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2181 int32_t ret; 2182 2183 /* This happens when device was stopped but BH wasn't. */ 2184 if (!vdev->vm_running) { 2185 /* Make sure tx waiting is set, so we'll run when restarted. */ 2186 assert(q->tx_waiting); 2187 return; 2188 } 2189 2190 q->tx_waiting = 0; 2191 2192 /* Just in case the driver is not ready on more */ 2193 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2194 return; 2195 } 2196 2197 ret = virtio_net_flush_tx(q); 2198 if (ret == -EBUSY || ret == -EINVAL) { 2199 return; /* Notification re-enable handled by tx_complete or device 2200 * broken */ 2201 } 2202 2203 /* If we flush a full burst of packets, assume there are 2204 * more coming and immediately reschedule */ 2205 if (ret >= n->tx_burst) { 2206 qemu_bh_schedule(q->tx_bh); 2207 q->tx_waiting = 1; 2208 return; 2209 } 2210 2211 /* If less than a full burst, re-enable notification and flush 2212 * anything that may have come in while we weren't looking. If 2213 * we find something, assume the guest is still active and reschedule */ 2214 virtio_queue_set_notification(q->tx_vq, 1); 2215 ret = virtio_net_flush_tx(q); 2216 if (ret == -EINVAL) { 2217 return; 2218 } else if (ret > 0) { 2219 virtio_queue_set_notification(q->tx_vq, 0); 2220 qemu_bh_schedule(q->tx_bh); 2221 q->tx_waiting = 1; 2222 } 2223 } 2224 2225 static void virtio_net_add_queue(VirtIONet *n, int index) 2226 { 2227 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2228 2229 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2230 virtio_net_handle_rx); 2231 2232 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2233 n->vqs[index].tx_vq = 2234 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2235 virtio_net_handle_tx_timer); 2236 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2237 virtio_net_tx_timer, 2238 &n->vqs[index]); 2239 } else { 2240 n->vqs[index].tx_vq = 2241 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2242 virtio_net_handle_tx_bh); 2243 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2244 } 2245 2246 n->vqs[index].tx_waiting = 0; 2247 n->vqs[index].n = n; 2248 } 2249 2250 static void virtio_net_del_queue(VirtIONet *n, int index) 2251 { 2252 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2253 VirtIONetQueue *q = &n->vqs[index]; 2254 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2255 2256 qemu_purge_queued_packets(nc); 2257 2258 virtio_del_queue(vdev, index * 2); 2259 if (q->tx_timer) { 2260 timer_del(q->tx_timer); 2261 timer_free(q->tx_timer); 2262 q->tx_timer = NULL; 2263 } else { 2264 qemu_bh_delete(q->tx_bh); 2265 q->tx_bh = NULL; 2266 } 2267 q->tx_waiting = 0; 2268 virtio_del_queue(vdev, index * 2 + 1); 2269 } 2270 2271 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues) 2272 { 2273 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2274 int old_num_queues = virtio_get_num_queues(vdev); 2275 int new_num_queues = new_max_queues * 2 + 1; 2276 int i; 2277 2278 assert(old_num_queues >= 3); 2279 assert(old_num_queues % 2 == 1); 2280 2281 if (old_num_queues == new_num_queues) { 2282 return; 2283 } 2284 2285 /* 2286 * We always need to remove and add ctrl vq if 2287 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2288 * and then we only enter one of the following two loops. 2289 */ 2290 virtio_del_queue(vdev, old_num_queues - 1); 2291 2292 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2293 /* new_num_queues < old_num_queues */ 2294 virtio_net_del_queue(n, i / 2); 2295 } 2296 2297 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2298 /* new_num_queues > old_num_queues */ 2299 virtio_net_add_queue(n, i / 2); 2300 } 2301 2302 /* add ctrl_vq last */ 2303 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2304 } 2305 2306 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2307 { 2308 int max = multiqueue ? n->max_queues : 1; 2309 2310 n->multiqueue = multiqueue; 2311 virtio_net_change_num_queues(n, max); 2312 2313 virtio_net_set_queues(n); 2314 } 2315 2316 static int virtio_net_post_load_device(void *opaque, int version_id) 2317 { 2318 VirtIONet *n = opaque; 2319 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2320 int i, link_down; 2321 2322 trace_virtio_net_post_load_device(); 2323 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2324 virtio_vdev_has_feature(vdev, 2325 VIRTIO_F_VERSION_1)); 2326 2327 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2328 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2329 n->mac_table.in_use = 0; 2330 } 2331 2332 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2333 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2334 } 2335 2336 if (peer_has_vnet_hdr(n)) { 2337 virtio_net_apply_guest_offloads(n); 2338 } 2339 2340 virtio_net_set_queues(n); 2341 2342 /* Find the first multicast entry in the saved MAC filter */ 2343 for (i = 0; i < n->mac_table.in_use; i++) { 2344 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2345 break; 2346 } 2347 } 2348 n->mac_table.first_multi = i; 2349 2350 /* nc.link_down can't be migrated, so infer link_down according 2351 * to link status bit in n->status */ 2352 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2353 for (i = 0; i < n->max_queues; i++) { 2354 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2355 } 2356 2357 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2358 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2359 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2360 QEMU_CLOCK_VIRTUAL, 2361 virtio_net_announce_timer, n); 2362 if (n->announce_timer.round) { 2363 timer_mod(n->announce_timer.tm, 2364 qemu_clock_get_ms(n->announce_timer.type)); 2365 } else { 2366 qemu_announce_timer_del(&n->announce_timer, false); 2367 } 2368 } 2369 2370 return 0; 2371 } 2372 2373 /* tx_waiting field of a VirtIONetQueue */ 2374 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2375 .name = "virtio-net-queue-tx_waiting", 2376 .fields = (VMStateField[]) { 2377 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2378 VMSTATE_END_OF_LIST() 2379 }, 2380 }; 2381 2382 static bool max_queues_gt_1(void *opaque, int version_id) 2383 { 2384 return VIRTIO_NET(opaque)->max_queues > 1; 2385 } 2386 2387 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 2388 { 2389 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 2390 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 2391 } 2392 2393 static bool mac_table_fits(void *opaque, int version_id) 2394 { 2395 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 2396 } 2397 2398 static bool mac_table_doesnt_fit(void *opaque, int version_id) 2399 { 2400 return !mac_table_fits(opaque, version_id); 2401 } 2402 2403 /* This temporary type is shared by all the WITH_TMP methods 2404 * although only some fields are used by each. 2405 */ 2406 struct VirtIONetMigTmp { 2407 VirtIONet *parent; 2408 VirtIONetQueue *vqs_1; 2409 uint16_t curr_queues_1; 2410 uint8_t has_ufo; 2411 uint32_t has_vnet_hdr; 2412 }; 2413 2414 /* The 2nd and subsequent tx_waiting flags are loaded later than 2415 * the 1st entry in the queues and only if there's more than one 2416 * entry. We use the tmp mechanism to calculate a temporary 2417 * pointer and count and also validate the count. 2418 */ 2419 2420 static int virtio_net_tx_waiting_pre_save(void *opaque) 2421 { 2422 struct VirtIONetMigTmp *tmp = opaque; 2423 2424 tmp->vqs_1 = tmp->parent->vqs + 1; 2425 tmp->curr_queues_1 = tmp->parent->curr_queues - 1; 2426 if (tmp->parent->curr_queues == 0) { 2427 tmp->curr_queues_1 = 0; 2428 } 2429 2430 return 0; 2431 } 2432 2433 static int virtio_net_tx_waiting_pre_load(void *opaque) 2434 { 2435 struct VirtIONetMigTmp *tmp = opaque; 2436 2437 /* Reuse the pointer setup from save */ 2438 virtio_net_tx_waiting_pre_save(opaque); 2439 2440 if (tmp->parent->curr_queues > tmp->parent->max_queues) { 2441 error_report("virtio-net: curr_queues %x > max_queues %x", 2442 tmp->parent->curr_queues, tmp->parent->max_queues); 2443 2444 return -EINVAL; 2445 } 2446 2447 return 0; /* all good */ 2448 } 2449 2450 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 2451 .name = "virtio-net-tx_waiting", 2452 .pre_load = virtio_net_tx_waiting_pre_load, 2453 .pre_save = virtio_net_tx_waiting_pre_save, 2454 .fields = (VMStateField[]) { 2455 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 2456 curr_queues_1, 2457 vmstate_virtio_net_queue_tx_waiting, 2458 struct VirtIONetQueue), 2459 VMSTATE_END_OF_LIST() 2460 }, 2461 }; 2462 2463 /* the 'has_ufo' flag is just tested; if the incoming stream has the 2464 * flag set we need to check that we have it 2465 */ 2466 static int virtio_net_ufo_post_load(void *opaque, int version_id) 2467 { 2468 struct VirtIONetMigTmp *tmp = opaque; 2469 2470 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 2471 error_report("virtio-net: saved image requires TUN_F_UFO support"); 2472 return -EINVAL; 2473 } 2474 2475 return 0; 2476 } 2477 2478 static int virtio_net_ufo_pre_save(void *opaque) 2479 { 2480 struct VirtIONetMigTmp *tmp = opaque; 2481 2482 tmp->has_ufo = tmp->parent->has_ufo; 2483 2484 return 0; 2485 } 2486 2487 static const VMStateDescription vmstate_virtio_net_has_ufo = { 2488 .name = "virtio-net-ufo", 2489 .post_load = virtio_net_ufo_post_load, 2490 .pre_save = virtio_net_ufo_pre_save, 2491 .fields = (VMStateField[]) { 2492 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 2493 VMSTATE_END_OF_LIST() 2494 }, 2495 }; 2496 2497 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 2498 * flag set we need to check that we have it 2499 */ 2500 static int virtio_net_vnet_post_load(void *opaque, int version_id) 2501 { 2502 struct VirtIONetMigTmp *tmp = opaque; 2503 2504 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 2505 error_report("virtio-net: saved image requires vnet_hdr=on"); 2506 return -EINVAL; 2507 } 2508 2509 return 0; 2510 } 2511 2512 static int virtio_net_vnet_pre_save(void *opaque) 2513 { 2514 struct VirtIONetMigTmp *tmp = opaque; 2515 2516 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 2517 2518 return 0; 2519 } 2520 2521 static const VMStateDescription vmstate_virtio_net_has_vnet = { 2522 .name = "virtio-net-vnet", 2523 .post_load = virtio_net_vnet_post_load, 2524 .pre_save = virtio_net_vnet_pre_save, 2525 .fields = (VMStateField[]) { 2526 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 2527 VMSTATE_END_OF_LIST() 2528 }, 2529 }; 2530 2531 static const VMStateDescription vmstate_virtio_net_device = { 2532 .name = "virtio-net-device", 2533 .version_id = VIRTIO_NET_VM_VERSION, 2534 .minimum_version_id = VIRTIO_NET_VM_VERSION, 2535 .post_load = virtio_net_post_load_device, 2536 .fields = (VMStateField[]) { 2537 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 2538 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 2539 vmstate_virtio_net_queue_tx_waiting, 2540 VirtIONetQueue), 2541 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 2542 VMSTATE_UINT16(status, VirtIONet), 2543 VMSTATE_UINT8(promisc, VirtIONet), 2544 VMSTATE_UINT8(allmulti, VirtIONet), 2545 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 2546 2547 /* Guarded pair: If it fits we load it, else we throw it away 2548 * - can happen if source has a larger MAC table.; post-load 2549 * sets flags in this case. 2550 */ 2551 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 2552 0, mac_table_fits, mac_table.in_use, 2553 ETH_ALEN), 2554 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 2555 mac_table.in_use, ETH_ALEN), 2556 2557 /* Note: This is an array of uint32's that's always been saved as a 2558 * buffer; hold onto your endiannesses; it's actually used as a bitmap 2559 * but based on the uint. 2560 */ 2561 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 2562 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2563 vmstate_virtio_net_has_vnet), 2564 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 2565 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 2566 VMSTATE_UINT8(alluni, VirtIONet), 2567 VMSTATE_UINT8(nomulti, VirtIONet), 2568 VMSTATE_UINT8(nouni, VirtIONet), 2569 VMSTATE_UINT8(nobcast, VirtIONet), 2570 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2571 vmstate_virtio_net_has_ufo), 2572 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0, 2573 vmstate_info_uint16_equal, uint16_t), 2574 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1), 2575 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2576 vmstate_virtio_net_tx_waiting), 2577 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 2578 has_ctrl_guest_offloads), 2579 VMSTATE_END_OF_LIST() 2580 }, 2581 }; 2582 2583 static NetClientInfo net_virtio_info = { 2584 .type = NET_CLIENT_DRIVER_NIC, 2585 .size = sizeof(NICState), 2586 .can_receive = virtio_net_can_receive, 2587 .receive = virtio_net_receive, 2588 .link_status_changed = virtio_net_set_link_status, 2589 .query_rx_filter = virtio_net_query_rxfilter, 2590 .announce = virtio_net_announce, 2591 }; 2592 2593 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 2594 { 2595 VirtIONet *n = VIRTIO_NET(vdev); 2596 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 2597 assert(n->vhost_started); 2598 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 2599 } 2600 2601 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 2602 bool mask) 2603 { 2604 VirtIONet *n = VIRTIO_NET(vdev); 2605 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 2606 assert(n->vhost_started); 2607 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 2608 vdev, idx, mask); 2609 } 2610 2611 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 2612 { 2613 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 2614 2615 n->config_size = virtio_feature_get_config_size(feature_sizes, 2616 host_features); 2617 } 2618 2619 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 2620 const char *type) 2621 { 2622 /* 2623 * The name can be NULL, the netclient name will be type.x. 2624 */ 2625 assert(type != NULL); 2626 2627 g_free(n->netclient_name); 2628 g_free(n->netclient_type); 2629 n->netclient_name = g_strdup(name); 2630 n->netclient_type = g_strdup(type); 2631 } 2632 2633 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 2634 { 2635 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2636 VirtIONet *n = VIRTIO_NET(dev); 2637 NetClientState *nc; 2638 int i; 2639 2640 if (n->net_conf.mtu) { 2641 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 2642 } 2643 2644 if (n->net_conf.duplex_str) { 2645 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 2646 n->net_conf.duplex = DUPLEX_HALF; 2647 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 2648 n->net_conf.duplex = DUPLEX_FULL; 2649 } else { 2650 error_setg(errp, "'duplex' must be 'half' or 'full'"); 2651 } 2652 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 2653 } else { 2654 n->net_conf.duplex = DUPLEX_UNKNOWN; 2655 } 2656 2657 if (n->net_conf.speed < SPEED_UNKNOWN) { 2658 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 2659 } else if (n->net_conf.speed >= 0) { 2660 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 2661 } 2662 2663 virtio_net_set_config_size(n, n->host_features); 2664 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); 2665 2666 /* 2667 * We set a lower limit on RX queue size to what it always was. 2668 * Guests that want a smaller ring can always resize it without 2669 * help from us (using virtio 1 and up). 2670 */ 2671 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 2672 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 2673 !is_power_of_2(n->net_conf.rx_queue_size)) { 2674 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 2675 "must be a power of 2 between %d and %d.", 2676 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 2677 VIRTQUEUE_MAX_SIZE); 2678 virtio_cleanup(vdev); 2679 return; 2680 } 2681 2682 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 2683 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 2684 !is_power_of_2(n->net_conf.tx_queue_size)) { 2685 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 2686 "must be a power of 2 between %d and %d", 2687 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 2688 VIRTQUEUE_MAX_SIZE); 2689 virtio_cleanup(vdev); 2690 return; 2691 } 2692 2693 n->max_queues = MAX(n->nic_conf.peers.queues, 1); 2694 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { 2695 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " 2696 "must be a positive integer less than %d.", 2697 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2); 2698 virtio_cleanup(vdev); 2699 return; 2700 } 2701 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues); 2702 n->curr_queues = 1; 2703 n->tx_timeout = n->net_conf.txtimer; 2704 2705 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 2706 && strcmp(n->net_conf.tx, "bh")) { 2707 warn_report("virtio-net: " 2708 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 2709 n->net_conf.tx); 2710 error_printf("Defaulting to \"bh\""); 2711 } 2712 2713 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 2714 n->net_conf.tx_queue_size); 2715 2716 for (i = 0; i < n->max_queues; i++) { 2717 virtio_net_add_queue(n, i); 2718 } 2719 2720 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2721 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 2722 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 2723 n->status = VIRTIO_NET_S_LINK_UP; 2724 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2725 QEMU_CLOCK_VIRTUAL, 2726 virtio_net_announce_timer, n); 2727 n->announce_timer.round = 0; 2728 2729 if (n->netclient_type) { 2730 /* 2731 * Happen when virtio_net_set_netclient_name has been called. 2732 */ 2733 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 2734 n->netclient_type, n->netclient_name, n); 2735 } else { 2736 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 2737 object_get_typename(OBJECT(dev)), dev->id, n); 2738 } 2739 2740 peer_test_vnet_hdr(n); 2741 if (peer_has_vnet_hdr(n)) { 2742 for (i = 0; i < n->max_queues; i++) { 2743 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 2744 } 2745 n->host_hdr_len = sizeof(struct virtio_net_hdr); 2746 } else { 2747 n->host_hdr_len = 0; 2748 } 2749 2750 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 2751 2752 n->vqs[0].tx_waiting = 0; 2753 n->tx_burst = n->net_conf.txburst; 2754 virtio_net_set_mrg_rx_bufs(n, 0, 0); 2755 n->promisc = 1; /* for compatibility */ 2756 2757 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 2758 2759 n->vlans = g_malloc0(MAX_VLAN >> 3); 2760 2761 nc = qemu_get_queue(n->nic); 2762 nc->rxfilter_notify_enabled = 1; 2763 2764 QTAILQ_INIT(&n->rsc_chains); 2765 n->qdev = dev; 2766 } 2767 2768 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) 2769 { 2770 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2771 VirtIONet *n = VIRTIO_NET(dev); 2772 int i, max_queues; 2773 2774 /* This will stop vhost backend if appropriate. */ 2775 virtio_net_set_status(vdev, 0); 2776 2777 g_free(n->netclient_name); 2778 n->netclient_name = NULL; 2779 g_free(n->netclient_type); 2780 n->netclient_type = NULL; 2781 2782 g_free(n->mac_table.macs); 2783 g_free(n->vlans); 2784 2785 max_queues = n->multiqueue ? n->max_queues : 1; 2786 for (i = 0; i < max_queues; i++) { 2787 virtio_net_del_queue(n, i); 2788 } 2789 2790 qemu_announce_timer_del(&n->announce_timer, false); 2791 g_free(n->vqs); 2792 qemu_del_nic(n->nic); 2793 virtio_net_rsc_cleanup(n); 2794 virtio_cleanup(vdev); 2795 } 2796 2797 static void virtio_net_instance_init(Object *obj) 2798 { 2799 VirtIONet *n = VIRTIO_NET(obj); 2800 2801 /* 2802 * The default config_size is sizeof(struct virtio_net_config). 2803 * Can be overriden with virtio_net_set_config_size. 2804 */ 2805 n->config_size = sizeof(struct virtio_net_config); 2806 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 2807 "bootindex", "/ethernet-phy@0", 2808 DEVICE(n), NULL); 2809 } 2810 2811 static int virtio_net_pre_save(void *opaque) 2812 { 2813 VirtIONet *n = opaque; 2814 2815 /* At this point, backend must be stopped, otherwise 2816 * it might keep writing to memory. */ 2817 assert(!n->vhost_started); 2818 2819 return 0; 2820 } 2821 2822 static const VMStateDescription vmstate_virtio_net = { 2823 .name = "virtio-net", 2824 .minimum_version_id = VIRTIO_NET_VM_VERSION, 2825 .version_id = VIRTIO_NET_VM_VERSION, 2826 .fields = (VMStateField[]) { 2827 VMSTATE_VIRTIO_DEVICE, 2828 VMSTATE_END_OF_LIST() 2829 }, 2830 .pre_save = virtio_net_pre_save, 2831 }; 2832 2833 static Property virtio_net_properties[] = { 2834 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 2835 VIRTIO_NET_F_CSUM, true), 2836 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 2837 VIRTIO_NET_F_GUEST_CSUM, true), 2838 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 2839 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 2840 VIRTIO_NET_F_GUEST_TSO4, true), 2841 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 2842 VIRTIO_NET_F_GUEST_TSO6, true), 2843 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 2844 VIRTIO_NET_F_GUEST_ECN, true), 2845 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 2846 VIRTIO_NET_F_GUEST_UFO, true), 2847 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 2848 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 2849 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 2850 VIRTIO_NET_F_HOST_TSO4, true), 2851 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 2852 VIRTIO_NET_F_HOST_TSO6, true), 2853 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 2854 VIRTIO_NET_F_HOST_ECN, true), 2855 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 2856 VIRTIO_NET_F_HOST_UFO, true), 2857 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 2858 VIRTIO_NET_F_MRG_RXBUF, true), 2859 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 2860 VIRTIO_NET_F_STATUS, true), 2861 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 2862 VIRTIO_NET_F_CTRL_VQ, true), 2863 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 2864 VIRTIO_NET_F_CTRL_RX, true), 2865 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 2866 VIRTIO_NET_F_CTRL_VLAN, true), 2867 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 2868 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 2869 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 2870 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 2871 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 2872 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 2873 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 2874 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 2875 VIRTIO_NET_F_RSC_EXT, false), 2876 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 2877 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 2878 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 2879 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 2880 TX_TIMER_INTERVAL), 2881 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 2882 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 2883 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 2884 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 2885 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 2886 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 2887 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 2888 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 2889 true), 2890 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 2891 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 2892 DEFINE_PROP_END_OF_LIST(), 2893 }; 2894 2895 static void virtio_net_class_init(ObjectClass *klass, void *data) 2896 { 2897 DeviceClass *dc = DEVICE_CLASS(klass); 2898 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 2899 2900 dc->props = virtio_net_properties; 2901 dc->vmsd = &vmstate_virtio_net; 2902 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 2903 vdc->realize = virtio_net_device_realize; 2904 vdc->unrealize = virtio_net_device_unrealize; 2905 vdc->get_config = virtio_net_get_config; 2906 vdc->set_config = virtio_net_set_config; 2907 vdc->get_features = virtio_net_get_features; 2908 vdc->set_features = virtio_net_set_features; 2909 vdc->bad_features = virtio_net_bad_features; 2910 vdc->reset = virtio_net_reset; 2911 vdc->set_status = virtio_net_set_status; 2912 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 2913 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 2914 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 2915 vdc->vmsd = &vmstate_virtio_net_device; 2916 } 2917 2918 static const TypeInfo virtio_net_info = { 2919 .name = TYPE_VIRTIO_NET, 2920 .parent = TYPE_VIRTIO_DEVICE, 2921 .instance_size = sizeof(VirtIONet), 2922 .instance_init = virtio_net_instance_init, 2923 .class_init = virtio_net_class_init, 2924 }; 2925 2926 static void virtio_register_types(void) 2927 { 2928 type_register_static(&virtio_net_info); 2929 } 2930 2931 type_init(virtio_register_types) 2932