1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/iov.h" 16 #include "qemu/main-loop.h" 17 #include "qemu/module.h" 18 #include "hw/virtio/virtio.h" 19 #include "net/net.h" 20 #include "net/checksum.h" 21 #include "net/tap.h" 22 #include "qemu/error-report.h" 23 #include "qemu/timer.h" 24 #include "hw/virtio/virtio-net.h" 25 #include "net/vhost_net.h" 26 #include "net/announce.h" 27 #include "hw/virtio/virtio-bus.h" 28 #include "qapi/error.h" 29 #include "qapi/qapi-events-net.h" 30 #include "hw/qdev-properties.h" 31 #include "hw/virtio/virtio-access.h" 32 #include "migration/misc.h" 33 #include "standard-headers/linux/ethtool.h" 34 #include "trace.h" 35 36 #define VIRTIO_NET_VM_VERSION 11 37 38 #define MAC_TABLE_ENTRIES 64 39 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 40 41 /* previously fixed value */ 42 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 43 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 44 45 /* for now, only allow larger queues; with virtio-1, guest can downsize */ 46 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 47 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 48 49 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 50 51 #define VIRTIO_NET_TCP_FLAG 0x3F 52 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 53 54 /* IPv4 max payload, 16 bits in the header */ 55 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 56 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 57 58 /* header length value in ip header without option */ 59 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 60 61 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 62 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 63 64 /* Purge coalesced packets timer interval, This value affects the performance 65 a lot, and should be tuned carefully, '300000'(300us) is the recommended 66 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 67 tso/gso/gro 'off'. */ 68 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 69 70 /* temporary until standard header include it */ 71 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO) 72 73 #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */ 74 #define VIRTIO_NET_F_RSC_EXT 61 75 76 static inline __virtio16 *virtio_net_rsc_ext_num_packets( 77 struct virtio_net_hdr *hdr) 78 { 79 return &hdr->csum_start; 80 } 81 82 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( 83 struct virtio_net_hdr *hdr) 84 { 85 return &hdr->csum_offset; 86 } 87 88 #endif 89 90 static VirtIOFeature feature_sizes[] = { 91 {.flags = 1ULL << VIRTIO_NET_F_MAC, 92 .end = virtio_endof(struct virtio_net_config, mac)}, 93 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 94 .end = virtio_endof(struct virtio_net_config, status)}, 95 {.flags = 1ULL << VIRTIO_NET_F_MQ, 96 .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)}, 97 {.flags = 1ULL << VIRTIO_NET_F_MTU, 98 .end = virtio_endof(struct virtio_net_config, mtu)}, 99 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 100 .end = virtio_endof(struct virtio_net_config, duplex)}, 101 {} 102 }; 103 104 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 105 { 106 VirtIONet *n = qemu_get_nic_opaque(nc); 107 108 return &n->vqs[nc->queue_index]; 109 } 110 111 static int vq2q(int queue_index) 112 { 113 return queue_index / 2; 114 } 115 116 /* TODO 117 * - we could suppress RX interrupt if we were so inclined. 118 */ 119 120 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 121 { 122 VirtIONet *n = VIRTIO_NET(vdev); 123 struct virtio_net_config netcfg; 124 125 virtio_stw_p(vdev, &netcfg.status, n->status); 126 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); 127 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 128 memcpy(netcfg.mac, n->mac, ETH_ALEN); 129 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 130 netcfg.duplex = n->net_conf.duplex; 131 memcpy(config, &netcfg, n->config_size); 132 } 133 134 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 135 { 136 VirtIONet *n = VIRTIO_NET(vdev); 137 struct virtio_net_config netcfg = {}; 138 139 memcpy(&netcfg, config, n->config_size); 140 141 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 142 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 143 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 144 memcpy(n->mac, netcfg.mac, ETH_ALEN); 145 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 146 } 147 } 148 149 static bool virtio_net_started(VirtIONet *n, uint8_t status) 150 { 151 VirtIODevice *vdev = VIRTIO_DEVICE(n); 152 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 153 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 154 } 155 156 static void virtio_net_announce_notify(VirtIONet *net) 157 { 158 VirtIODevice *vdev = VIRTIO_DEVICE(net); 159 trace_virtio_net_announce_notify(); 160 161 net->status |= VIRTIO_NET_S_ANNOUNCE; 162 virtio_notify_config(vdev); 163 } 164 165 static void virtio_net_announce_timer(void *opaque) 166 { 167 VirtIONet *n = opaque; 168 trace_virtio_net_announce_timer(n->announce_timer.round); 169 170 n->announce_timer.round--; 171 virtio_net_announce_notify(n); 172 } 173 174 static void virtio_net_announce(NetClientState *nc) 175 { 176 VirtIONet *n = qemu_get_nic_opaque(nc); 177 VirtIODevice *vdev = VIRTIO_DEVICE(n); 178 179 /* 180 * Make sure the virtio migration announcement timer isn't running 181 * If it is, let it trigger announcement so that we do not cause 182 * confusion. 183 */ 184 if (n->announce_timer.round) { 185 return; 186 } 187 188 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 189 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 190 virtio_net_announce_notify(n); 191 } 192 } 193 194 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 195 { 196 VirtIODevice *vdev = VIRTIO_DEVICE(n); 197 NetClientState *nc = qemu_get_queue(n->nic); 198 int queues = n->multiqueue ? n->max_queues : 1; 199 200 if (!get_vhost_net(nc->peer)) { 201 return; 202 } 203 204 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 205 !!n->vhost_started) { 206 return; 207 } 208 if (!n->vhost_started) { 209 int r, i; 210 211 if (n->needs_vnet_hdr_swap) { 212 error_report("backend does not support %s vnet headers; " 213 "falling back on userspace virtio", 214 virtio_is_big_endian(vdev) ? "BE" : "LE"); 215 return; 216 } 217 218 /* Any packets outstanding? Purge them to avoid touching rings 219 * when vhost is running. 220 */ 221 for (i = 0; i < queues; i++) { 222 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 223 224 /* Purge both directions: TX and RX. */ 225 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 226 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 227 } 228 229 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 230 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 231 if (r < 0) { 232 error_report("%uBytes MTU not supported by the backend", 233 n->net_conf.mtu); 234 235 return; 236 } 237 } 238 239 n->vhost_started = 1; 240 r = vhost_net_start(vdev, n->nic->ncs, queues); 241 if (r < 0) { 242 error_report("unable to start vhost net: %d: " 243 "falling back on userspace virtio", -r); 244 n->vhost_started = 0; 245 } 246 } else { 247 vhost_net_stop(vdev, n->nic->ncs, queues); 248 n->vhost_started = 0; 249 } 250 } 251 252 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 253 NetClientState *peer, 254 bool enable) 255 { 256 if (virtio_is_big_endian(vdev)) { 257 return qemu_set_vnet_be(peer, enable); 258 } else { 259 return qemu_set_vnet_le(peer, enable); 260 } 261 } 262 263 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 264 int queues, bool enable) 265 { 266 int i; 267 268 for (i = 0; i < queues; i++) { 269 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 270 enable) { 271 while (--i >= 0) { 272 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 273 } 274 275 return true; 276 } 277 } 278 279 return false; 280 } 281 282 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 283 { 284 VirtIODevice *vdev = VIRTIO_DEVICE(n); 285 int queues = n->multiqueue ? n->max_queues : 1; 286 287 if (virtio_net_started(n, status)) { 288 /* Before using the device, we tell the network backend about the 289 * endianness to use when parsing vnet headers. If the backend 290 * can't do it, we fallback onto fixing the headers in the core 291 * virtio-net code. 292 */ 293 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 294 queues, true); 295 } else if (virtio_net_started(n, vdev->status)) { 296 /* After using the device, we need to reset the network backend to 297 * the default (guest native endianness), otherwise the guest may 298 * lose network connectivity if it is rebooted into a different 299 * endianness. 300 */ 301 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false); 302 } 303 } 304 305 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 306 { 307 unsigned int dropped = virtqueue_drop_all(vq); 308 if (dropped) { 309 virtio_notify(vdev, vq); 310 } 311 } 312 313 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 314 { 315 VirtIONet *n = VIRTIO_NET(vdev); 316 VirtIONetQueue *q; 317 int i; 318 uint8_t queue_status; 319 320 virtio_net_vnet_endian_status(n, status); 321 virtio_net_vhost_status(n, status); 322 323 for (i = 0; i < n->max_queues; i++) { 324 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 325 bool queue_started; 326 q = &n->vqs[i]; 327 328 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) { 329 queue_status = 0; 330 } else { 331 queue_status = status; 332 } 333 queue_started = 334 virtio_net_started(n, queue_status) && !n->vhost_started; 335 336 if (queue_started) { 337 qemu_flush_queued_packets(ncs); 338 } 339 340 if (!q->tx_waiting) { 341 continue; 342 } 343 344 if (queue_started) { 345 if (q->tx_timer) { 346 timer_mod(q->tx_timer, 347 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 348 } else { 349 qemu_bh_schedule(q->tx_bh); 350 } 351 } else { 352 if (q->tx_timer) { 353 timer_del(q->tx_timer); 354 } else { 355 qemu_bh_cancel(q->tx_bh); 356 } 357 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 358 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 359 vdev->vm_running) { 360 /* if tx is waiting we are likely have some packets in tx queue 361 * and disabled notification */ 362 q->tx_waiting = 0; 363 virtio_queue_set_notification(q->tx_vq, 1); 364 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 365 } 366 } 367 } 368 } 369 370 static void virtio_net_set_link_status(NetClientState *nc) 371 { 372 VirtIONet *n = qemu_get_nic_opaque(nc); 373 VirtIODevice *vdev = VIRTIO_DEVICE(n); 374 uint16_t old_status = n->status; 375 376 if (nc->link_down) 377 n->status &= ~VIRTIO_NET_S_LINK_UP; 378 else 379 n->status |= VIRTIO_NET_S_LINK_UP; 380 381 if (n->status != old_status) 382 virtio_notify_config(vdev); 383 384 virtio_net_set_status(vdev, vdev->status); 385 } 386 387 static void rxfilter_notify(NetClientState *nc) 388 { 389 VirtIONet *n = qemu_get_nic_opaque(nc); 390 391 if (nc->rxfilter_notify_enabled) { 392 gchar *path = object_get_canonical_path(OBJECT(n->qdev)); 393 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 394 n->netclient_name, path); 395 g_free(path); 396 397 /* disable event notification to avoid events flooding */ 398 nc->rxfilter_notify_enabled = 0; 399 } 400 } 401 402 static intList *get_vlan_table(VirtIONet *n) 403 { 404 intList *list, *entry; 405 int i, j; 406 407 list = NULL; 408 for (i = 0; i < MAX_VLAN >> 5; i++) { 409 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 410 if (n->vlans[i] & (1U << j)) { 411 entry = g_malloc0(sizeof(*entry)); 412 entry->value = (i << 5) + j; 413 entry->next = list; 414 list = entry; 415 } 416 } 417 } 418 419 return list; 420 } 421 422 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 423 { 424 VirtIONet *n = qemu_get_nic_opaque(nc); 425 VirtIODevice *vdev = VIRTIO_DEVICE(n); 426 RxFilterInfo *info; 427 strList *str_list, *entry; 428 int i; 429 430 info = g_malloc0(sizeof(*info)); 431 info->name = g_strdup(nc->name); 432 info->promiscuous = n->promisc; 433 434 if (n->nouni) { 435 info->unicast = RX_STATE_NONE; 436 } else if (n->alluni) { 437 info->unicast = RX_STATE_ALL; 438 } else { 439 info->unicast = RX_STATE_NORMAL; 440 } 441 442 if (n->nomulti) { 443 info->multicast = RX_STATE_NONE; 444 } else if (n->allmulti) { 445 info->multicast = RX_STATE_ALL; 446 } else { 447 info->multicast = RX_STATE_NORMAL; 448 } 449 450 info->broadcast_allowed = n->nobcast; 451 info->multicast_overflow = n->mac_table.multi_overflow; 452 info->unicast_overflow = n->mac_table.uni_overflow; 453 454 info->main_mac = qemu_mac_strdup_printf(n->mac); 455 456 str_list = NULL; 457 for (i = 0; i < n->mac_table.first_multi; i++) { 458 entry = g_malloc0(sizeof(*entry)); 459 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); 460 entry->next = str_list; 461 str_list = entry; 462 } 463 info->unicast_table = str_list; 464 465 str_list = NULL; 466 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 467 entry = g_malloc0(sizeof(*entry)); 468 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); 469 entry->next = str_list; 470 str_list = entry; 471 } 472 info->multicast_table = str_list; 473 info->vlan_table = get_vlan_table(n); 474 475 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 476 info->vlan = RX_STATE_ALL; 477 } else if (!info->vlan_table) { 478 info->vlan = RX_STATE_NONE; 479 } else { 480 info->vlan = RX_STATE_NORMAL; 481 } 482 483 /* enable event notification after query */ 484 nc->rxfilter_notify_enabled = 1; 485 486 return info; 487 } 488 489 static void virtio_net_reset(VirtIODevice *vdev) 490 { 491 VirtIONet *n = VIRTIO_NET(vdev); 492 int i; 493 494 /* Reset back to compatibility mode */ 495 n->promisc = 1; 496 n->allmulti = 0; 497 n->alluni = 0; 498 n->nomulti = 0; 499 n->nouni = 0; 500 n->nobcast = 0; 501 /* multiqueue is disabled by default */ 502 n->curr_queues = 1; 503 timer_del(n->announce_timer.tm); 504 n->announce_timer.round = 0; 505 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 506 507 /* Flush any MAC and VLAN filter table state */ 508 n->mac_table.in_use = 0; 509 n->mac_table.first_multi = 0; 510 n->mac_table.multi_overflow = 0; 511 n->mac_table.uni_overflow = 0; 512 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 513 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 514 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 515 memset(n->vlans, 0, MAX_VLAN >> 3); 516 517 /* Flush any async TX */ 518 for (i = 0; i < n->max_queues; i++) { 519 NetClientState *nc = qemu_get_subqueue(n->nic, i); 520 521 if (nc->peer) { 522 qemu_flush_or_purge_queued_packets(nc->peer, true); 523 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 524 } 525 } 526 } 527 528 static void peer_test_vnet_hdr(VirtIONet *n) 529 { 530 NetClientState *nc = qemu_get_queue(n->nic); 531 if (!nc->peer) { 532 return; 533 } 534 535 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 536 } 537 538 static int peer_has_vnet_hdr(VirtIONet *n) 539 { 540 return n->has_vnet_hdr; 541 } 542 543 static int peer_has_ufo(VirtIONet *n) 544 { 545 if (!peer_has_vnet_hdr(n)) 546 return 0; 547 548 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 549 550 return n->has_ufo; 551 } 552 553 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 554 int version_1) 555 { 556 int i; 557 NetClientState *nc; 558 559 n->mergeable_rx_bufs = mergeable_rx_bufs; 560 561 if (version_1) { 562 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 563 } else { 564 n->guest_hdr_len = n->mergeable_rx_bufs ? 565 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 566 sizeof(struct virtio_net_hdr); 567 } 568 569 for (i = 0; i < n->max_queues; i++) { 570 nc = qemu_get_subqueue(n->nic, i); 571 572 if (peer_has_vnet_hdr(n) && 573 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 574 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 575 n->host_hdr_len = n->guest_hdr_len; 576 } 577 } 578 } 579 580 static int virtio_net_max_tx_queue_size(VirtIONet *n) 581 { 582 NetClientState *peer = n->nic_conf.peers.ncs[0]; 583 584 /* 585 * Backends other than vhost-user don't support max queue size. 586 */ 587 if (!peer) { 588 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 589 } 590 591 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) { 592 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 593 } 594 595 return VIRTQUEUE_MAX_SIZE; 596 } 597 598 static int peer_attach(VirtIONet *n, int index) 599 { 600 NetClientState *nc = qemu_get_subqueue(n->nic, index); 601 602 if (!nc->peer) { 603 return 0; 604 } 605 606 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 607 vhost_set_vring_enable(nc->peer, 1); 608 } 609 610 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 611 return 0; 612 } 613 614 if (n->max_queues == 1) { 615 return 0; 616 } 617 618 return tap_enable(nc->peer); 619 } 620 621 static int peer_detach(VirtIONet *n, int index) 622 { 623 NetClientState *nc = qemu_get_subqueue(n->nic, index); 624 625 if (!nc->peer) { 626 return 0; 627 } 628 629 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 630 vhost_set_vring_enable(nc->peer, 0); 631 } 632 633 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 634 return 0; 635 } 636 637 return tap_disable(nc->peer); 638 } 639 640 static void virtio_net_set_queues(VirtIONet *n) 641 { 642 int i; 643 int r; 644 645 if (n->nic->peer_deleted) { 646 return; 647 } 648 649 for (i = 0; i < n->max_queues; i++) { 650 if (i < n->curr_queues) { 651 r = peer_attach(n, i); 652 assert(!r); 653 } else { 654 r = peer_detach(n, i); 655 assert(!r); 656 } 657 } 658 } 659 660 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 661 662 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 663 Error **errp) 664 { 665 VirtIONet *n = VIRTIO_NET(vdev); 666 NetClientState *nc = qemu_get_queue(n->nic); 667 668 /* Firstly sync all virtio-net possible supported features */ 669 features |= n->host_features; 670 671 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 672 673 if (!peer_has_vnet_hdr(n)) { 674 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 675 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 676 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 677 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 678 679 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 680 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 681 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 682 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 683 } 684 685 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 686 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 687 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 688 } 689 690 if (!get_vhost_net(nc->peer)) { 691 return features; 692 } 693 694 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 695 vdev->backend_features = features; 696 697 if (n->mtu_bypass_backend && 698 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 699 features |= (1ULL << VIRTIO_NET_F_MTU); 700 } 701 702 return features; 703 } 704 705 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 706 { 707 uint64_t features = 0; 708 709 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 710 * but also these: */ 711 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 712 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 713 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 714 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 715 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 716 717 return features; 718 } 719 720 static void virtio_net_apply_guest_offloads(VirtIONet *n) 721 { 722 qemu_set_offload(qemu_get_queue(n->nic)->peer, 723 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 724 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 725 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 726 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 727 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 728 } 729 730 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 731 { 732 static const uint64_t guest_offloads_mask = 733 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 734 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 735 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 736 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 737 (1ULL << VIRTIO_NET_F_GUEST_UFO); 738 739 return guest_offloads_mask & features; 740 } 741 742 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 743 { 744 VirtIODevice *vdev = VIRTIO_DEVICE(n); 745 return virtio_net_guest_offloads_by_features(vdev->guest_features); 746 } 747 748 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 749 { 750 VirtIONet *n = VIRTIO_NET(vdev); 751 int i; 752 753 if (n->mtu_bypass_backend && 754 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 755 features &= ~(1ULL << VIRTIO_NET_F_MTU); 756 } 757 758 virtio_net_set_multiqueue(n, 759 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 760 761 virtio_net_set_mrg_rx_bufs(n, 762 virtio_has_feature(features, 763 VIRTIO_NET_F_MRG_RXBUF), 764 virtio_has_feature(features, 765 VIRTIO_F_VERSION_1)); 766 767 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 768 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 769 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 770 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 771 772 if (n->has_vnet_hdr) { 773 n->curr_guest_offloads = 774 virtio_net_guest_offloads_by_features(features); 775 virtio_net_apply_guest_offloads(n); 776 } 777 778 for (i = 0; i < n->max_queues; i++) { 779 NetClientState *nc = qemu_get_subqueue(n->nic, i); 780 781 if (!get_vhost_net(nc->peer)) { 782 continue; 783 } 784 vhost_net_ack_features(get_vhost_net(nc->peer), features); 785 } 786 787 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 788 memset(n->vlans, 0, MAX_VLAN >> 3); 789 } else { 790 memset(n->vlans, 0xff, MAX_VLAN >> 3); 791 } 792 } 793 794 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 795 struct iovec *iov, unsigned int iov_cnt) 796 { 797 uint8_t on; 798 size_t s; 799 NetClientState *nc = qemu_get_queue(n->nic); 800 801 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 802 if (s != sizeof(on)) { 803 return VIRTIO_NET_ERR; 804 } 805 806 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 807 n->promisc = on; 808 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 809 n->allmulti = on; 810 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 811 n->alluni = on; 812 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 813 n->nomulti = on; 814 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 815 n->nouni = on; 816 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 817 n->nobcast = on; 818 } else { 819 return VIRTIO_NET_ERR; 820 } 821 822 rxfilter_notify(nc); 823 824 return VIRTIO_NET_OK; 825 } 826 827 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 828 struct iovec *iov, unsigned int iov_cnt) 829 { 830 VirtIODevice *vdev = VIRTIO_DEVICE(n); 831 uint64_t offloads; 832 size_t s; 833 834 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 835 return VIRTIO_NET_ERR; 836 } 837 838 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 839 if (s != sizeof(offloads)) { 840 return VIRTIO_NET_ERR; 841 } 842 843 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 844 uint64_t supported_offloads; 845 846 offloads = virtio_ldq_p(vdev, &offloads); 847 848 if (!n->has_vnet_hdr) { 849 return VIRTIO_NET_ERR; 850 } 851 852 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 853 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 854 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 855 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 856 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 857 858 supported_offloads = virtio_net_supported_guest_offloads(n); 859 if (offloads & ~supported_offloads) { 860 return VIRTIO_NET_ERR; 861 } 862 863 n->curr_guest_offloads = offloads; 864 virtio_net_apply_guest_offloads(n); 865 866 return VIRTIO_NET_OK; 867 } else { 868 return VIRTIO_NET_ERR; 869 } 870 } 871 872 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 873 struct iovec *iov, unsigned int iov_cnt) 874 { 875 VirtIODevice *vdev = VIRTIO_DEVICE(n); 876 struct virtio_net_ctrl_mac mac_data; 877 size_t s; 878 NetClientState *nc = qemu_get_queue(n->nic); 879 880 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 881 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 882 return VIRTIO_NET_ERR; 883 } 884 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 885 assert(s == sizeof(n->mac)); 886 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 887 rxfilter_notify(nc); 888 889 return VIRTIO_NET_OK; 890 } 891 892 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 893 return VIRTIO_NET_ERR; 894 } 895 896 int in_use = 0; 897 int first_multi = 0; 898 uint8_t uni_overflow = 0; 899 uint8_t multi_overflow = 0; 900 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 901 902 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 903 sizeof(mac_data.entries)); 904 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 905 if (s != sizeof(mac_data.entries)) { 906 goto error; 907 } 908 iov_discard_front(&iov, &iov_cnt, s); 909 910 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 911 goto error; 912 } 913 914 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 915 s = iov_to_buf(iov, iov_cnt, 0, macs, 916 mac_data.entries * ETH_ALEN); 917 if (s != mac_data.entries * ETH_ALEN) { 918 goto error; 919 } 920 in_use += mac_data.entries; 921 } else { 922 uni_overflow = 1; 923 } 924 925 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 926 927 first_multi = in_use; 928 929 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 930 sizeof(mac_data.entries)); 931 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 932 if (s != sizeof(mac_data.entries)) { 933 goto error; 934 } 935 936 iov_discard_front(&iov, &iov_cnt, s); 937 938 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 939 goto error; 940 } 941 942 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 943 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 944 mac_data.entries * ETH_ALEN); 945 if (s != mac_data.entries * ETH_ALEN) { 946 goto error; 947 } 948 in_use += mac_data.entries; 949 } else { 950 multi_overflow = 1; 951 } 952 953 n->mac_table.in_use = in_use; 954 n->mac_table.first_multi = first_multi; 955 n->mac_table.uni_overflow = uni_overflow; 956 n->mac_table.multi_overflow = multi_overflow; 957 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 958 g_free(macs); 959 rxfilter_notify(nc); 960 961 return VIRTIO_NET_OK; 962 963 error: 964 g_free(macs); 965 return VIRTIO_NET_ERR; 966 } 967 968 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 969 struct iovec *iov, unsigned int iov_cnt) 970 { 971 VirtIODevice *vdev = VIRTIO_DEVICE(n); 972 uint16_t vid; 973 size_t s; 974 NetClientState *nc = qemu_get_queue(n->nic); 975 976 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 977 vid = virtio_lduw_p(vdev, &vid); 978 if (s != sizeof(vid)) { 979 return VIRTIO_NET_ERR; 980 } 981 982 if (vid >= MAX_VLAN) 983 return VIRTIO_NET_ERR; 984 985 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 986 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 987 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 988 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 989 else 990 return VIRTIO_NET_ERR; 991 992 rxfilter_notify(nc); 993 994 return VIRTIO_NET_OK; 995 } 996 997 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 998 struct iovec *iov, unsigned int iov_cnt) 999 { 1000 trace_virtio_net_handle_announce(n->announce_timer.round); 1001 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1002 n->status & VIRTIO_NET_S_ANNOUNCE) { 1003 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1004 if (n->announce_timer.round) { 1005 qemu_announce_timer_step(&n->announce_timer); 1006 } 1007 return VIRTIO_NET_OK; 1008 } else { 1009 return VIRTIO_NET_ERR; 1010 } 1011 } 1012 1013 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1014 struct iovec *iov, unsigned int iov_cnt) 1015 { 1016 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1017 struct virtio_net_ctrl_mq mq; 1018 size_t s; 1019 uint16_t queues; 1020 1021 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1022 if (s != sizeof(mq)) { 1023 return VIRTIO_NET_ERR; 1024 } 1025 1026 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1027 return VIRTIO_NET_ERR; 1028 } 1029 1030 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1031 1032 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1033 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1034 queues > n->max_queues || 1035 !n->multiqueue) { 1036 return VIRTIO_NET_ERR; 1037 } 1038 1039 n->curr_queues = queues; 1040 /* stop the backend before changing the number of queues to avoid handling a 1041 * disabled queue */ 1042 virtio_net_set_status(vdev, vdev->status); 1043 virtio_net_set_queues(n); 1044 1045 return VIRTIO_NET_OK; 1046 } 1047 1048 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1049 { 1050 VirtIONet *n = VIRTIO_NET(vdev); 1051 struct virtio_net_ctrl_hdr ctrl; 1052 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1053 VirtQueueElement *elem; 1054 size_t s; 1055 struct iovec *iov, *iov2; 1056 unsigned int iov_cnt; 1057 1058 for (;;) { 1059 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1060 if (!elem) { 1061 break; 1062 } 1063 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || 1064 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { 1065 virtio_error(vdev, "virtio-net ctrl missing headers"); 1066 virtqueue_detach_element(vq, elem, 0); 1067 g_free(elem); 1068 break; 1069 } 1070 1071 iov_cnt = elem->out_num; 1072 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); 1073 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); 1074 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); 1075 if (s != sizeof(ctrl)) { 1076 status = VIRTIO_NET_ERR; 1077 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1078 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); 1079 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1080 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); 1081 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1082 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); 1083 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1084 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); 1085 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1086 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); 1087 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1088 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); 1089 } 1090 1091 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); 1092 assert(s == sizeof(status)); 1093 1094 virtqueue_push(vq, elem, sizeof(status)); 1095 virtio_notify(vdev, vq); 1096 g_free(iov2); 1097 g_free(elem); 1098 } 1099 } 1100 1101 /* RX */ 1102 1103 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1104 { 1105 VirtIONet *n = VIRTIO_NET(vdev); 1106 int queue_index = vq2q(virtio_get_queue_index(vq)); 1107 1108 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1109 } 1110 1111 static int virtio_net_can_receive(NetClientState *nc) 1112 { 1113 VirtIONet *n = qemu_get_nic_opaque(nc); 1114 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1115 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1116 1117 if (!vdev->vm_running) { 1118 return 0; 1119 } 1120 1121 if (nc->queue_index >= n->curr_queues) { 1122 return 0; 1123 } 1124 1125 if (!virtio_queue_ready(q->rx_vq) || 1126 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1127 return 0; 1128 } 1129 1130 return 1; 1131 } 1132 1133 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1134 { 1135 VirtIONet *n = q->n; 1136 if (virtio_queue_empty(q->rx_vq) || 1137 (n->mergeable_rx_bufs && 1138 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1139 virtio_queue_set_notification(q->rx_vq, 1); 1140 1141 /* To avoid a race condition where the guest has made some buffers 1142 * available after the above check but before notification was 1143 * enabled, check for available buffers again. 1144 */ 1145 if (virtio_queue_empty(q->rx_vq) || 1146 (n->mergeable_rx_bufs && 1147 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1148 return 0; 1149 } 1150 } 1151 1152 virtio_queue_set_notification(q->rx_vq, 0); 1153 return 1; 1154 } 1155 1156 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1157 { 1158 virtio_tswap16s(vdev, &hdr->hdr_len); 1159 virtio_tswap16s(vdev, &hdr->gso_size); 1160 virtio_tswap16s(vdev, &hdr->csum_start); 1161 virtio_tswap16s(vdev, &hdr->csum_offset); 1162 } 1163 1164 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1165 * it never finds out that the packets don't have valid checksums. This 1166 * causes dhclient to get upset. Fedora's carried a patch for ages to 1167 * fix this with Xen but it hasn't appeared in an upstream release of 1168 * dhclient yet. 1169 * 1170 * To avoid breaking existing guests, we catch udp packets and add 1171 * checksums. This is terrible but it's better than hacking the guest 1172 * kernels. 1173 * 1174 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1175 * we should provide a mechanism to disable it to avoid polluting the host 1176 * cache. 1177 */ 1178 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1179 uint8_t *buf, size_t size) 1180 { 1181 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1182 (size > 27 && size < 1500) && /* normal sized MTU */ 1183 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1184 (buf[23] == 17) && /* ip.protocol == UDP */ 1185 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1186 net_checksum_calculate(buf, size); 1187 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1188 } 1189 } 1190 1191 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1192 const void *buf, size_t size) 1193 { 1194 if (n->has_vnet_hdr) { 1195 /* FIXME this cast is evil */ 1196 void *wbuf = (void *)buf; 1197 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1198 size - n->host_hdr_len); 1199 1200 if (n->needs_vnet_hdr_swap) { 1201 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1202 } 1203 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1204 } else { 1205 struct virtio_net_hdr hdr = { 1206 .flags = 0, 1207 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1208 }; 1209 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1210 } 1211 } 1212 1213 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1214 { 1215 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1216 static const uint8_t vlan[] = {0x81, 0x00}; 1217 uint8_t *ptr = (uint8_t *)buf; 1218 int i; 1219 1220 if (n->promisc) 1221 return 1; 1222 1223 ptr += n->host_hdr_len; 1224 1225 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1226 int vid = lduw_be_p(ptr + 14) & 0xfff; 1227 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1228 return 0; 1229 } 1230 1231 if (ptr[0] & 1) { // multicast 1232 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1233 return !n->nobcast; 1234 } else if (n->nomulti) { 1235 return 0; 1236 } else if (n->allmulti || n->mac_table.multi_overflow) { 1237 return 1; 1238 } 1239 1240 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1241 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1242 return 1; 1243 } 1244 } 1245 } else { // unicast 1246 if (n->nouni) { 1247 return 0; 1248 } else if (n->alluni || n->mac_table.uni_overflow) { 1249 return 1; 1250 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1251 return 1; 1252 } 1253 1254 for (i = 0; i < n->mac_table.first_multi; i++) { 1255 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1256 return 1; 1257 } 1258 } 1259 } 1260 1261 return 0; 1262 } 1263 1264 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1265 size_t size) 1266 { 1267 VirtIONet *n = qemu_get_nic_opaque(nc); 1268 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1269 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1270 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1271 struct virtio_net_hdr_mrg_rxbuf mhdr; 1272 unsigned mhdr_cnt = 0; 1273 size_t offset, i, guest_offset; 1274 1275 if (!virtio_net_can_receive(nc)) { 1276 return -1; 1277 } 1278 1279 /* hdr_len refers to the header we supply to the guest */ 1280 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1281 return 0; 1282 } 1283 1284 if (!receive_filter(n, buf, size)) 1285 return size; 1286 1287 offset = i = 0; 1288 1289 while (offset < size) { 1290 VirtQueueElement *elem; 1291 int len, total; 1292 const struct iovec *sg; 1293 1294 total = 0; 1295 1296 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1297 if (!elem) { 1298 if (i) { 1299 virtio_error(vdev, "virtio-net unexpected empty queue: " 1300 "i %zd mergeable %d offset %zd, size %zd, " 1301 "guest hdr len %zd, host hdr len %zd " 1302 "guest features 0x%" PRIx64, 1303 i, n->mergeable_rx_bufs, offset, size, 1304 n->guest_hdr_len, n->host_hdr_len, 1305 vdev->guest_features); 1306 } 1307 return -1; 1308 } 1309 1310 if (elem->in_num < 1) { 1311 virtio_error(vdev, 1312 "virtio-net receive queue contains no in buffers"); 1313 virtqueue_detach_element(q->rx_vq, elem, 0); 1314 g_free(elem); 1315 return -1; 1316 } 1317 1318 sg = elem->in_sg; 1319 if (i == 0) { 1320 assert(offset == 0); 1321 if (n->mergeable_rx_bufs) { 1322 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1323 sg, elem->in_num, 1324 offsetof(typeof(mhdr), num_buffers), 1325 sizeof(mhdr.num_buffers)); 1326 } 1327 1328 receive_header(n, sg, elem->in_num, buf, size); 1329 offset = n->host_hdr_len; 1330 total += n->guest_hdr_len; 1331 guest_offset = n->guest_hdr_len; 1332 } else { 1333 guest_offset = 0; 1334 } 1335 1336 /* copy in packet. ugh */ 1337 len = iov_from_buf(sg, elem->in_num, guest_offset, 1338 buf + offset, size - offset); 1339 total += len; 1340 offset += len; 1341 /* If buffers can't be merged, at this point we 1342 * must have consumed the complete packet. 1343 * Otherwise, drop it. */ 1344 if (!n->mergeable_rx_bufs && offset < size) { 1345 virtqueue_unpop(q->rx_vq, elem, total); 1346 g_free(elem); 1347 return size; 1348 } 1349 1350 /* signal other side */ 1351 virtqueue_fill(q->rx_vq, elem, total, i++); 1352 g_free(elem); 1353 } 1354 1355 if (mhdr_cnt) { 1356 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1357 iov_from_buf(mhdr_sg, mhdr_cnt, 1358 0, 1359 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1360 } 1361 1362 virtqueue_flush(q->rx_vq, i); 1363 virtio_notify(vdev, q->rx_vq); 1364 1365 return size; 1366 } 1367 1368 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1369 size_t size) 1370 { 1371 ssize_t r; 1372 1373 rcu_read_lock(); 1374 r = virtio_net_receive_rcu(nc, buf, size); 1375 rcu_read_unlock(); 1376 return r; 1377 } 1378 1379 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1380 const uint8_t *buf, 1381 VirtioNetRscUnit *unit) 1382 { 1383 uint16_t ip_hdrlen; 1384 struct ip_header *ip; 1385 1386 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1387 + sizeof(struct eth_header)); 1388 unit->ip = (void *)ip; 1389 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1390 unit->ip_plen = &ip->ip_len; 1391 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1392 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1393 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1394 } 1395 1396 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1397 const uint8_t *buf, 1398 VirtioNetRscUnit *unit) 1399 { 1400 struct ip6_header *ip6; 1401 1402 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1403 + sizeof(struct eth_header)); 1404 unit->ip = ip6; 1405 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1406 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\ 1407 + sizeof(struct ip6_header)); 1408 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1409 1410 /* There is a difference between payload lenght in ipv4 and v6, 1411 ip header is excluded in ipv6 */ 1412 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1413 } 1414 1415 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1416 VirtioNetRscSeg *seg) 1417 { 1418 int ret; 1419 struct virtio_net_hdr *h; 1420 1421 h = (struct virtio_net_hdr *)seg->buf; 1422 h->flags = 0; 1423 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1424 1425 if (seg->is_coalesced) { 1426 *virtio_net_rsc_ext_num_packets(h) = seg->packets; 1427 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack; 1428 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1429 if (chain->proto == ETH_P_IP) { 1430 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1431 } else { 1432 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1433 } 1434 } 1435 1436 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1437 QTAILQ_REMOVE(&chain->buffers, seg, next); 1438 g_free(seg->buf); 1439 g_free(seg); 1440 1441 return ret; 1442 } 1443 1444 static void virtio_net_rsc_purge(void *opq) 1445 { 1446 VirtioNetRscSeg *seg, *rn; 1447 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1448 1449 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1450 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1451 chain->stat.purge_failed++; 1452 continue; 1453 } 1454 } 1455 1456 chain->stat.timer++; 1457 if (!QTAILQ_EMPTY(&chain->buffers)) { 1458 timer_mod(chain->drain_timer, 1459 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1460 } 1461 } 1462 1463 static void virtio_net_rsc_cleanup(VirtIONet *n) 1464 { 1465 VirtioNetRscChain *chain, *rn_chain; 1466 VirtioNetRscSeg *seg, *rn_seg; 1467 1468 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 1469 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 1470 QTAILQ_REMOVE(&chain->buffers, seg, next); 1471 g_free(seg->buf); 1472 g_free(seg); 1473 } 1474 1475 timer_del(chain->drain_timer); 1476 timer_free(chain->drain_timer); 1477 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 1478 g_free(chain); 1479 } 1480 } 1481 1482 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 1483 NetClientState *nc, 1484 const uint8_t *buf, size_t size) 1485 { 1486 uint16_t hdr_len; 1487 VirtioNetRscSeg *seg; 1488 1489 hdr_len = chain->n->guest_hdr_len; 1490 seg = g_malloc(sizeof(VirtioNetRscSeg)); 1491 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 1492 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 1493 memcpy(seg->buf, buf, size); 1494 seg->size = size; 1495 seg->packets = 1; 1496 seg->dup_ack = 0; 1497 seg->is_coalesced = 0; 1498 seg->nc = nc; 1499 1500 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 1501 chain->stat.cache++; 1502 1503 switch (chain->proto) { 1504 case ETH_P_IP: 1505 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 1506 break; 1507 case ETH_P_IPV6: 1508 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 1509 break; 1510 default: 1511 g_assert_not_reached(); 1512 } 1513 } 1514 1515 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 1516 VirtioNetRscSeg *seg, 1517 const uint8_t *buf, 1518 struct tcp_header *n_tcp, 1519 struct tcp_header *o_tcp) 1520 { 1521 uint32_t nack, oack; 1522 uint16_t nwin, owin; 1523 1524 nack = htonl(n_tcp->th_ack); 1525 nwin = htons(n_tcp->th_win); 1526 oack = htonl(o_tcp->th_ack); 1527 owin = htons(o_tcp->th_win); 1528 1529 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 1530 chain->stat.ack_out_of_win++; 1531 return RSC_FINAL; 1532 } else if (nack == oack) { 1533 /* duplicated ack or window probe */ 1534 if (nwin == owin) { 1535 /* duplicated ack, add dup ack count due to whql test up to 1 */ 1536 chain->stat.dup_ack++; 1537 return RSC_FINAL; 1538 } else { 1539 /* Coalesce window update */ 1540 o_tcp->th_win = n_tcp->th_win; 1541 chain->stat.win_update++; 1542 return RSC_COALESCE; 1543 } 1544 } else { 1545 /* pure ack, go to 'C', finalize*/ 1546 chain->stat.pure_ack++; 1547 return RSC_FINAL; 1548 } 1549 } 1550 1551 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 1552 VirtioNetRscSeg *seg, 1553 const uint8_t *buf, 1554 VirtioNetRscUnit *n_unit) 1555 { 1556 void *data; 1557 uint16_t o_ip_len; 1558 uint32_t nseq, oseq; 1559 VirtioNetRscUnit *o_unit; 1560 1561 o_unit = &seg->unit; 1562 o_ip_len = htons(*o_unit->ip_plen); 1563 nseq = htonl(n_unit->tcp->th_seq); 1564 oseq = htonl(o_unit->tcp->th_seq); 1565 1566 /* out of order or retransmitted. */ 1567 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 1568 chain->stat.data_out_of_win++; 1569 return RSC_FINAL; 1570 } 1571 1572 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 1573 if (nseq == oseq) { 1574 if ((o_unit->payload == 0) && n_unit->payload) { 1575 /* From no payload to payload, normal case, not a dup ack or etc */ 1576 chain->stat.data_after_pure_ack++; 1577 goto coalesce; 1578 } else { 1579 return virtio_net_rsc_handle_ack(chain, seg, buf, 1580 n_unit->tcp, o_unit->tcp); 1581 } 1582 } else if ((nseq - oseq) != o_unit->payload) { 1583 /* Not a consistent packet, out of order */ 1584 chain->stat.data_out_of_order++; 1585 return RSC_FINAL; 1586 } else { 1587 coalesce: 1588 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 1589 chain->stat.over_size++; 1590 return RSC_FINAL; 1591 } 1592 1593 /* Here comes the right data, the payload length in v4/v6 is different, 1594 so use the field value to update and record the new data len */ 1595 o_unit->payload += n_unit->payload; /* update new data len */ 1596 1597 /* update field in ip header */ 1598 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 1599 1600 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 1601 for windows guest, while this may change the behavior for linux 1602 guest (only if it uses RSC feature). */ 1603 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 1604 1605 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 1606 o_unit->tcp->th_win = n_unit->tcp->th_win; 1607 1608 memmove(seg->buf + seg->size, data, n_unit->payload); 1609 seg->size += n_unit->payload; 1610 seg->packets++; 1611 chain->stat.coalesced++; 1612 return RSC_COALESCE; 1613 } 1614 } 1615 1616 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 1617 VirtioNetRscSeg *seg, 1618 const uint8_t *buf, size_t size, 1619 VirtioNetRscUnit *unit) 1620 { 1621 struct ip_header *ip1, *ip2; 1622 1623 ip1 = (struct ip_header *)(unit->ip); 1624 ip2 = (struct ip_header *)(seg->unit.ip); 1625 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 1626 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 1627 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 1628 chain->stat.no_match++; 1629 return RSC_NO_MATCH; 1630 } 1631 1632 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 1633 } 1634 1635 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 1636 VirtioNetRscSeg *seg, 1637 const uint8_t *buf, size_t size, 1638 VirtioNetRscUnit *unit) 1639 { 1640 struct ip6_header *ip1, *ip2; 1641 1642 ip1 = (struct ip6_header *)(unit->ip); 1643 ip2 = (struct ip6_header *)(seg->unit.ip); 1644 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 1645 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 1646 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 1647 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 1648 chain->stat.no_match++; 1649 return RSC_NO_MATCH; 1650 } 1651 1652 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 1653 } 1654 1655 /* Packets with 'SYN' should bypass, other flag should be sent after drain 1656 * to prevent out of order */ 1657 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 1658 struct tcp_header *tcp) 1659 { 1660 uint16_t tcp_hdr; 1661 uint16_t tcp_flag; 1662 1663 tcp_flag = htons(tcp->th_offset_flags); 1664 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 1665 tcp_flag &= VIRTIO_NET_TCP_FLAG; 1666 tcp_flag = htons(tcp->th_offset_flags) & 0x3F; 1667 if (tcp_flag & TH_SYN) { 1668 chain->stat.tcp_syn++; 1669 return RSC_BYPASS; 1670 } 1671 1672 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 1673 chain->stat.tcp_ctrl_drain++; 1674 return RSC_FINAL; 1675 } 1676 1677 if (tcp_hdr > sizeof(struct tcp_header)) { 1678 chain->stat.tcp_all_opt++; 1679 return RSC_FINAL; 1680 } 1681 1682 return RSC_CANDIDATE; 1683 } 1684 1685 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 1686 NetClientState *nc, 1687 const uint8_t *buf, size_t size, 1688 VirtioNetRscUnit *unit) 1689 { 1690 int ret; 1691 VirtioNetRscSeg *seg, *nseg; 1692 1693 if (QTAILQ_EMPTY(&chain->buffers)) { 1694 chain->stat.empty_cache++; 1695 virtio_net_rsc_cache_buf(chain, nc, buf, size); 1696 timer_mod(chain->drain_timer, 1697 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1698 return size; 1699 } 1700 1701 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 1702 if (chain->proto == ETH_P_IP) { 1703 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 1704 } else { 1705 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 1706 } 1707 1708 if (ret == RSC_FINAL) { 1709 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1710 /* Send failed */ 1711 chain->stat.final_failed++; 1712 return 0; 1713 } 1714 1715 /* Send current packet */ 1716 return virtio_net_do_receive(nc, buf, size); 1717 } else if (ret == RSC_NO_MATCH) { 1718 continue; 1719 } else { 1720 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 1721 seg->is_coalesced = 1; 1722 return size; 1723 } 1724 } 1725 1726 chain->stat.no_match_cache++; 1727 virtio_net_rsc_cache_buf(chain, nc, buf, size); 1728 return size; 1729 } 1730 1731 /* Drain a connection data, this is to avoid out of order segments */ 1732 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 1733 NetClientState *nc, 1734 const uint8_t *buf, size_t size, 1735 uint16_t ip_start, uint16_t ip_size, 1736 uint16_t tcp_port) 1737 { 1738 VirtioNetRscSeg *seg, *nseg; 1739 uint32_t ppair1, ppair2; 1740 1741 ppair1 = *(uint32_t *)(buf + tcp_port); 1742 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 1743 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 1744 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 1745 || (ppair1 != ppair2)) { 1746 continue; 1747 } 1748 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1749 chain->stat.drain_failed++; 1750 } 1751 1752 break; 1753 } 1754 1755 return virtio_net_do_receive(nc, buf, size); 1756 } 1757 1758 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 1759 struct ip_header *ip, 1760 const uint8_t *buf, size_t size) 1761 { 1762 uint16_t ip_len; 1763 1764 /* Not an ipv4 packet */ 1765 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 1766 chain->stat.ip_option++; 1767 return RSC_BYPASS; 1768 } 1769 1770 /* Don't handle packets with ip option */ 1771 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 1772 chain->stat.ip_option++; 1773 return RSC_BYPASS; 1774 } 1775 1776 if (ip->ip_p != IPPROTO_TCP) { 1777 chain->stat.bypass_not_tcp++; 1778 return RSC_BYPASS; 1779 } 1780 1781 /* Don't handle packets with ip fragment */ 1782 if (!(htons(ip->ip_off) & IP_DF)) { 1783 chain->stat.ip_frag++; 1784 return RSC_BYPASS; 1785 } 1786 1787 /* Don't handle packets with ecn flag */ 1788 if (IPTOS_ECN(ip->ip_tos)) { 1789 chain->stat.ip_ecn++; 1790 return RSC_BYPASS; 1791 } 1792 1793 ip_len = htons(ip->ip_len); 1794 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 1795 || ip_len > (size - chain->n->guest_hdr_len - 1796 sizeof(struct eth_header))) { 1797 chain->stat.ip_hacked++; 1798 return RSC_BYPASS; 1799 } 1800 1801 return RSC_CANDIDATE; 1802 } 1803 1804 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 1805 NetClientState *nc, 1806 const uint8_t *buf, size_t size) 1807 { 1808 int32_t ret; 1809 uint16_t hdr_len; 1810 VirtioNetRscUnit unit; 1811 1812 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 1813 1814 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 1815 + sizeof(struct tcp_header))) { 1816 chain->stat.bypass_not_tcp++; 1817 return virtio_net_do_receive(nc, buf, size); 1818 } 1819 1820 virtio_net_rsc_extract_unit4(chain, buf, &unit); 1821 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 1822 != RSC_CANDIDATE) { 1823 return virtio_net_do_receive(nc, buf, size); 1824 } 1825 1826 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 1827 if (ret == RSC_BYPASS) { 1828 return virtio_net_do_receive(nc, buf, size); 1829 } else if (ret == RSC_FINAL) { 1830 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 1831 ((hdr_len + sizeof(struct eth_header)) + 12), 1832 VIRTIO_NET_IP4_ADDR_SIZE, 1833 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 1834 } 1835 1836 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 1837 } 1838 1839 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 1840 struct ip6_header *ip6, 1841 const uint8_t *buf, size_t size) 1842 { 1843 uint16_t ip_len; 1844 1845 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 1846 != IP_HEADER_VERSION_6) { 1847 return RSC_BYPASS; 1848 } 1849 1850 /* Both option and protocol is checked in this */ 1851 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 1852 chain->stat.bypass_not_tcp++; 1853 return RSC_BYPASS; 1854 } 1855 1856 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1857 if (ip_len < sizeof(struct tcp_header) || 1858 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 1859 - sizeof(struct ip6_header))) { 1860 chain->stat.ip_hacked++; 1861 return RSC_BYPASS; 1862 } 1863 1864 /* Don't handle packets with ecn flag */ 1865 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 1866 chain->stat.ip_ecn++; 1867 return RSC_BYPASS; 1868 } 1869 1870 return RSC_CANDIDATE; 1871 } 1872 1873 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 1874 const uint8_t *buf, size_t size) 1875 { 1876 int32_t ret; 1877 uint16_t hdr_len; 1878 VirtioNetRscChain *chain; 1879 VirtioNetRscUnit unit; 1880 1881 chain = (VirtioNetRscChain *)opq; 1882 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 1883 1884 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 1885 + sizeof(tcp_header))) { 1886 return virtio_net_do_receive(nc, buf, size); 1887 } 1888 1889 virtio_net_rsc_extract_unit6(chain, buf, &unit); 1890 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 1891 unit.ip, buf, size)) { 1892 return virtio_net_do_receive(nc, buf, size); 1893 } 1894 1895 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 1896 if (ret == RSC_BYPASS) { 1897 return virtio_net_do_receive(nc, buf, size); 1898 } else if (ret == RSC_FINAL) { 1899 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 1900 ((hdr_len + sizeof(struct eth_header)) + 8), 1901 VIRTIO_NET_IP6_ADDR_SIZE, 1902 hdr_len + sizeof(struct eth_header) 1903 + sizeof(struct ip6_header)); 1904 } 1905 1906 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 1907 } 1908 1909 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 1910 NetClientState *nc, 1911 uint16_t proto) 1912 { 1913 VirtioNetRscChain *chain; 1914 1915 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 1916 return NULL; 1917 } 1918 1919 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 1920 if (chain->proto == proto) { 1921 return chain; 1922 } 1923 } 1924 1925 chain = g_malloc(sizeof(*chain)); 1926 chain->n = n; 1927 chain->proto = proto; 1928 if (proto == (uint16_t)ETH_P_IP) { 1929 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 1930 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1931 } else { 1932 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 1933 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1934 } 1935 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 1936 virtio_net_rsc_purge, chain); 1937 memset(&chain->stat, 0, sizeof(chain->stat)); 1938 1939 QTAILQ_INIT(&chain->buffers); 1940 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 1941 1942 return chain; 1943 } 1944 1945 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 1946 const uint8_t *buf, 1947 size_t size) 1948 { 1949 uint16_t proto; 1950 VirtioNetRscChain *chain; 1951 struct eth_header *eth; 1952 VirtIONet *n; 1953 1954 n = qemu_get_nic_opaque(nc); 1955 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 1956 return virtio_net_do_receive(nc, buf, size); 1957 } 1958 1959 eth = (struct eth_header *)(buf + n->guest_hdr_len); 1960 proto = htons(eth->h_proto); 1961 1962 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 1963 if (chain) { 1964 chain->stat.received++; 1965 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 1966 return virtio_net_rsc_receive4(chain, nc, buf, size); 1967 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 1968 return virtio_net_rsc_receive6(chain, nc, buf, size); 1969 } 1970 } 1971 return virtio_net_do_receive(nc, buf, size); 1972 } 1973 1974 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 1975 size_t size) 1976 { 1977 VirtIONet *n = qemu_get_nic_opaque(nc); 1978 if ((n->rsc4_enabled || n->rsc6_enabled)) { 1979 return virtio_net_rsc_receive(nc, buf, size); 1980 } else { 1981 return virtio_net_do_receive(nc, buf, size); 1982 } 1983 } 1984 1985 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 1986 1987 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 1988 { 1989 VirtIONet *n = qemu_get_nic_opaque(nc); 1990 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1991 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1992 1993 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 1994 virtio_notify(vdev, q->tx_vq); 1995 1996 g_free(q->async_tx.elem); 1997 q->async_tx.elem = NULL; 1998 1999 virtio_queue_set_notification(q->tx_vq, 1); 2000 virtio_net_flush_tx(q); 2001 } 2002 2003 /* TX */ 2004 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2005 { 2006 VirtIONet *n = q->n; 2007 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2008 VirtQueueElement *elem; 2009 int32_t num_packets = 0; 2010 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2011 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2012 return num_packets; 2013 } 2014 2015 if (q->async_tx.elem) { 2016 virtio_queue_set_notification(q->tx_vq, 0); 2017 return num_packets; 2018 } 2019 2020 for (;;) { 2021 ssize_t ret; 2022 unsigned int out_num; 2023 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2024 struct virtio_net_hdr_mrg_rxbuf mhdr; 2025 2026 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2027 if (!elem) { 2028 break; 2029 } 2030 2031 out_num = elem->out_num; 2032 out_sg = elem->out_sg; 2033 if (out_num < 1) { 2034 virtio_error(vdev, "virtio-net header not in first element"); 2035 virtqueue_detach_element(q->tx_vq, elem, 0); 2036 g_free(elem); 2037 return -EINVAL; 2038 } 2039 2040 if (n->has_vnet_hdr) { 2041 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2042 n->guest_hdr_len) { 2043 virtio_error(vdev, "virtio-net header incorrect"); 2044 virtqueue_detach_element(q->tx_vq, elem, 0); 2045 g_free(elem); 2046 return -EINVAL; 2047 } 2048 if (n->needs_vnet_hdr_swap) { 2049 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2050 sg2[0].iov_base = &mhdr; 2051 sg2[0].iov_len = n->guest_hdr_len; 2052 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2053 out_sg, out_num, 2054 n->guest_hdr_len, -1); 2055 if (out_num == VIRTQUEUE_MAX_SIZE) { 2056 goto drop; 2057 } 2058 out_num += 1; 2059 out_sg = sg2; 2060 } 2061 } 2062 /* 2063 * If host wants to see the guest header as is, we can 2064 * pass it on unchanged. Otherwise, copy just the parts 2065 * that host is interested in. 2066 */ 2067 assert(n->host_hdr_len <= n->guest_hdr_len); 2068 if (n->host_hdr_len != n->guest_hdr_len) { 2069 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2070 out_sg, out_num, 2071 0, n->host_hdr_len); 2072 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2073 out_sg, out_num, 2074 n->guest_hdr_len, -1); 2075 out_num = sg_num; 2076 out_sg = sg; 2077 } 2078 2079 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2080 out_sg, out_num, virtio_net_tx_complete); 2081 if (ret == 0) { 2082 virtio_queue_set_notification(q->tx_vq, 0); 2083 q->async_tx.elem = elem; 2084 return -EBUSY; 2085 } 2086 2087 drop: 2088 virtqueue_push(q->tx_vq, elem, 0); 2089 virtio_notify(vdev, q->tx_vq); 2090 g_free(elem); 2091 2092 if (++num_packets >= n->tx_burst) { 2093 break; 2094 } 2095 } 2096 return num_packets; 2097 } 2098 2099 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2100 { 2101 VirtIONet *n = VIRTIO_NET(vdev); 2102 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2103 2104 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2105 virtio_net_drop_tx_queue_data(vdev, vq); 2106 return; 2107 } 2108 2109 /* This happens when device was stopped but VCPU wasn't. */ 2110 if (!vdev->vm_running) { 2111 q->tx_waiting = 1; 2112 return; 2113 } 2114 2115 if (q->tx_waiting) { 2116 virtio_queue_set_notification(vq, 1); 2117 timer_del(q->tx_timer); 2118 q->tx_waiting = 0; 2119 if (virtio_net_flush_tx(q) == -EINVAL) { 2120 return; 2121 } 2122 } else { 2123 timer_mod(q->tx_timer, 2124 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2125 q->tx_waiting = 1; 2126 virtio_queue_set_notification(vq, 0); 2127 } 2128 } 2129 2130 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2131 { 2132 VirtIONet *n = VIRTIO_NET(vdev); 2133 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2134 2135 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2136 virtio_net_drop_tx_queue_data(vdev, vq); 2137 return; 2138 } 2139 2140 if (unlikely(q->tx_waiting)) { 2141 return; 2142 } 2143 q->tx_waiting = 1; 2144 /* This happens when device was stopped but VCPU wasn't. */ 2145 if (!vdev->vm_running) { 2146 return; 2147 } 2148 virtio_queue_set_notification(vq, 0); 2149 qemu_bh_schedule(q->tx_bh); 2150 } 2151 2152 static void virtio_net_tx_timer(void *opaque) 2153 { 2154 VirtIONetQueue *q = opaque; 2155 VirtIONet *n = q->n; 2156 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2157 /* This happens when device was stopped but BH wasn't. */ 2158 if (!vdev->vm_running) { 2159 /* Make sure tx waiting is set, so we'll run when restarted. */ 2160 assert(q->tx_waiting); 2161 return; 2162 } 2163 2164 q->tx_waiting = 0; 2165 2166 /* Just in case the driver is not ready on more */ 2167 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2168 return; 2169 } 2170 2171 virtio_queue_set_notification(q->tx_vq, 1); 2172 virtio_net_flush_tx(q); 2173 } 2174 2175 static void virtio_net_tx_bh(void *opaque) 2176 { 2177 VirtIONetQueue *q = opaque; 2178 VirtIONet *n = q->n; 2179 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2180 int32_t ret; 2181 2182 /* This happens when device was stopped but BH wasn't. */ 2183 if (!vdev->vm_running) { 2184 /* Make sure tx waiting is set, so we'll run when restarted. */ 2185 assert(q->tx_waiting); 2186 return; 2187 } 2188 2189 q->tx_waiting = 0; 2190 2191 /* Just in case the driver is not ready on more */ 2192 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2193 return; 2194 } 2195 2196 ret = virtio_net_flush_tx(q); 2197 if (ret == -EBUSY || ret == -EINVAL) { 2198 return; /* Notification re-enable handled by tx_complete or device 2199 * broken */ 2200 } 2201 2202 /* If we flush a full burst of packets, assume there are 2203 * more coming and immediately reschedule */ 2204 if (ret >= n->tx_burst) { 2205 qemu_bh_schedule(q->tx_bh); 2206 q->tx_waiting = 1; 2207 return; 2208 } 2209 2210 /* If less than a full burst, re-enable notification and flush 2211 * anything that may have come in while we weren't looking. If 2212 * we find something, assume the guest is still active and reschedule */ 2213 virtio_queue_set_notification(q->tx_vq, 1); 2214 ret = virtio_net_flush_tx(q); 2215 if (ret == -EINVAL) { 2216 return; 2217 } else if (ret > 0) { 2218 virtio_queue_set_notification(q->tx_vq, 0); 2219 qemu_bh_schedule(q->tx_bh); 2220 q->tx_waiting = 1; 2221 } 2222 } 2223 2224 static void virtio_net_add_queue(VirtIONet *n, int index) 2225 { 2226 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2227 2228 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2229 virtio_net_handle_rx); 2230 2231 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2232 n->vqs[index].tx_vq = 2233 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2234 virtio_net_handle_tx_timer); 2235 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2236 virtio_net_tx_timer, 2237 &n->vqs[index]); 2238 } else { 2239 n->vqs[index].tx_vq = 2240 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2241 virtio_net_handle_tx_bh); 2242 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2243 } 2244 2245 n->vqs[index].tx_waiting = 0; 2246 n->vqs[index].n = n; 2247 } 2248 2249 static void virtio_net_del_queue(VirtIONet *n, int index) 2250 { 2251 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2252 VirtIONetQueue *q = &n->vqs[index]; 2253 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2254 2255 qemu_purge_queued_packets(nc); 2256 2257 virtio_del_queue(vdev, index * 2); 2258 if (q->tx_timer) { 2259 timer_del(q->tx_timer); 2260 timer_free(q->tx_timer); 2261 q->tx_timer = NULL; 2262 } else { 2263 qemu_bh_delete(q->tx_bh); 2264 q->tx_bh = NULL; 2265 } 2266 q->tx_waiting = 0; 2267 virtio_del_queue(vdev, index * 2 + 1); 2268 } 2269 2270 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues) 2271 { 2272 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2273 int old_num_queues = virtio_get_num_queues(vdev); 2274 int new_num_queues = new_max_queues * 2 + 1; 2275 int i; 2276 2277 assert(old_num_queues >= 3); 2278 assert(old_num_queues % 2 == 1); 2279 2280 if (old_num_queues == new_num_queues) { 2281 return; 2282 } 2283 2284 /* 2285 * We always need to remove and add ctrl vq if 2286 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2287 * and then we only enter one of the following two loops. 2288 */ 2289 virtio_del_queue(vdev, old_num_queues - 1); 2290 2291 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2292 /* new_num_queues < old_num_queues */ 2293 virtio_net_del_queue(n, i / 2); 2294 } 2295 2296 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2297 /* new_num_queues > old_num_queues */ 2298 virtio_net_add_queue(n, i / 2); 2299 } 2300 2301 /* add ctrl_vq last */ 2302 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2303 } 2304 2305 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2306 { 2307 int max = multiqueue ? n->max_queues : 1; 2308 2309 n->multiqueue = multiqueue; 2310 virtio_net_change_num_queues(n, max); 2311 2312 virtio_net_set_queues(n); 2313 } 2314 2315 static int virtio_net_post_load_device(void *opaque, int version_id) 2316 { 2317 VirtIONet *n = opaque; 2318 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2319 int i, link_down; 2320 2321 trace_virtio_net_post_load_device(); 2322 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2323 virtio_vdev_has_feature(vdev, 2324 VIRTIO_F_VERSION_1)); 2325 2326 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2327 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2328 n->mac_table.in_use = 0; 2329 } 2330 2331 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2332 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2333 } 2334 2335 if (peer_has_vnet_hdr(n)) { 2336 virtio_net_apply_guest_offloads(n); 2337 } 2338 2339 virtio_net_set_queues(n); 2340 2341 /* Find the first multicast entry in the saved MAC filter */ 2342 for (i = 0; i < n->mac_table.in_use; i++) { 2343 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2344 break; 2345 } 2346 } 2347 n->mac_table.first_multi = i; 2348 2349 /* nc.link_down can't be migrated, so infer link_down according 2350 * to link status bit in n->status */ 2351 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2352 for (i = 0; i < n->max_queues; i++) { 2353 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2354 } 2355 2356 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2357 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2358 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2359 QEMU_CLOCK_VIRTUAL, 2360 virtio_net_announce_timer, n); 2361 if (n->announce_timer.round) { 2362 timer_mod(n->announce_timer.tm, 2363 qemu_clock_get_ms(n->announce_timer.type)); 2364 } else { 2365 qemu_announce_timer_del(&n->announce_timer, false); 2366 } 2367 } 2368 2369 return 0; 2370 } 2371 2372 /* tx_waiting field of a VirtIONetQueue */ 2373 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2374 .name = "virtio-net-queue-tx_waiting", 2375 .fields = (VMStateField[]) { 2376 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2377 VMSTATE_END_OF_LIST() 2378 }, 2379 }; 2380 2381 static bool max_queues_gt_1(void *opaque, int version_id) 2382 { 2383 return VIRTIO_NET(opaque)->max_queues > 1; 2384 } 2385 2386 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 2387 { 2388 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 2389 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 2390 } 2391 2392 static bool mac_table_fits(void *opaque, int version_id) 2393 { 2394 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 2395 } 2396 2397 static bool mac_table_doesnt_fit(void *opaque, int version_id) 2398 { 2399 return !mac_table_fits(opaque, version_id); 2400 } 2401 2402 /* This temporary type is shared by all the WITH_TMP methods 2403 * although only some fields are used by each. 2404 */ 2405 struct VirtIONetMigTmp { 2406 VirtIONet *parent; 2407 VirtIONetQueue *vqs_1; 2408 uint16_t curr_queues_1; 2409 uint8_t has_ufo; 2410 uint32_t has_vnet_hdr; 2411 }; 2412 2413 /* The 2nd and subsequent tx_waiting flags are loaded later than 2414 * the 1st entry in the queues and only if there's more than one 2415 * entry. We use the tmp mechanism to calculate a temporary 2416 * pointer and count and also validate the count. 2417 */ 2418 2419 static int virtio_net_tx_waiting_pre_save(void *opaque) 2420 { 2421 struct VirtIONetMigTmp *tmp = opaque; 2422 2423 tmp->vqs_1 = tmp->parent->vqs + 1; 2424 tmp->curr_queues_1 = tmp->parent->curr_queues - 1; 2425 if (tmp->parent->curr_queues == 0) { 2426 tmp->curr_queues_1 = 0; 2427 } 2428 2429 return 0; 2430 } 2431 2432 static int virtio_net_tx_waiting_pre_load(void *opaque) 2433 { 2434 struct VirtIONetMigTmp *tmp = opaque; 2435 2436 /* Reuse the pointer setup from save */ 2437 virtio_net_tx_waiting_pre_save(opaque); 2438 2439 if (tmp->parent->curr_queues > tmp->parent->max_queues) { 2440 error_report("virtio-net: curr_queues %x > max_queues %x", 2441 tmp->parent->curr_queues, tmp->parent->max_queues); 2442 2443 return -EINVAL; 2444 } 2445 2446 return 0; /* all good */ 2447 } 2448 2449 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 2450 .name = "virtio-net-tx_waiting", 2451 .pre_load = virtio_net_tx_waiting_pre_load, 2452 .pre_save = virtio_net_tx_waiting_pre_save, 2453 .fields = (VMStateField[]) { 2454 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 2455 curr_queues_1, 2456 vmstate_virtio_net_queue_tx_waiting, 2457 struct VirtIONetQueue), 2458 VMSTATE_END_OF_LIST() 2459 }, 2460 }; 2461 2462 /* the 'has_ufo' flag is just tested; if the incoming stream has the 2463 * flag set we need to check that we have it 2464 */ 2465 static int virtio_net_ufo_post_load(void *opaque, int version_id) 2466 { 2467 struct VirtIONetMigTmp *tmp = opaque; 2468 2469 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 2470 error_report("virtio-net: saved image requires TUN_F_UFO support"); 2471 return -EINVAL; 2472 } 2473 2474 return 0; 2475 } 2476 2477 static int virtio_net_ufo_pre_save(void *opaque) 2478 { 2479 struct VirtIONetMigTmp *tmp = opaque; 2480 2481 tmp->has_ufo = tmp->parent->has_ufo; 2482 2483 return 0; 2484 } 2485 2486 static const VMStateDescription vmstate_virtio_net_has_ufo = { 2487 .name = "virtio-net-ufo", 2488 .post_load = virtio_net_ufo_post_load, 2489 .pre_save = virtio_net_ufo_pre_save, 2490 .fields = (VMStateField[]) { 2491 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 2492 VMSTATE_END_OF_LIST() 2493 }, 2494 }; 2495 2496 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 2497 * flag set we need to check that we have it 2498 */ 2499 static int virtio_net_vnet_post_load(void *opaque, int version_id) 2500 { 2501 struct VirtIONetMigTmp *tmp = opaque; 2502 2503 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 2504 error_report("virtio-net: saved image requires vnet_hdr=on"); 2505 return -EINVAL; 2506 } 2507 2508 return 0; 2509 } 2510 2511 static int virtio_net_vnet_pre_save(void *opaque) 2512 { 2513 struct VirtIONetMigTmp *tmp = opaque; 2514 2515 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 2516 2517 return 0; 2518 } 2519 2520 static const VMStateDescription vmstate_virtio_net_has_vnet = { 2521 .name = "virtio-net-vnet", 2522 .post_load = virtio_net_vnet_post_load, 2523 .pre_save = virtio_net_vnet_pre_save, 2524 .fields = (VMStateField[]) { 2525 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 2526 VMSTATE_END_OF_LIST() 2527 }, 2528 }; 2529 2530 static const VMStateDescription vmstate_virtio_net_device = { 2531 .name = "virtio-net-device", 2532 .version_id = VIRTIO_NET_VM_VERSION, 2533 .minimum_version_id = VIRTIO_NET_VM_VERSION, 2534 .post_load = virtio_net_post_load_device, 2535 .fields = (VMStateField[]) { 2536 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 2537 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 2538 vmstate_virtio_net_queue_tx_waiting, 2539 VirtIONetQueue), 2540 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 2541 VMSTATE_UINT16(status, VirtIONet), 2542 VMSTATE_UINT8(promisc, VirtIONet), 2543 VMSTATE_UINT8(allmulti, VirtIONet), 2544 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 2545 2546 /* Guarded pair: If it fits we load it, else we throw it away 2547 * - can happen if source has a larger MAC table.; post-load 2548 * sets flags in this case. 2549 */ 2550 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 2551 0, mac_table_fits, mac_table.in_use, 2552 ETH_ALEN), 2553 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 2554 mac_table.in_use, ETH_ALEN), 2555 2556 /* Note: This is an array of uint32's that's always been saved as a 2557 * buffer; hold onto your endiannesses; it's actually used as a bitmap 2558 * but based on the uint. 2559 */ 2560 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 2561 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2562 vmstate_virtio_net_has_vnet), 2563 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 2564 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 2565 VMSTATE_UINT8(alluni, VirtIONet), 2566 VMSTATE_UINT8(nomulti, VirtIONet), 2567 VMSTATE_UINT8(nouni, VirtIONet), 2568 VMSTATE_UINT8(nobcast, VirtIONet), 2569 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2570 vmstate_virtio_net_has_ufo), 2571 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0, 2572 vmstate_info_uint16_equal, uint16_t), 2573 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1), 2574 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2575 vmstate_virtio_net_tx_waiting), 2576 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 2577 has_ctrl_guest_offloads), 2578 VMSTATE_END_OF_LIST() 2579 }, 2580 }; 2581 2582 static NetClientInfo net_virtio_info = { 2583 .type = NET_CLIENT_DRIVER_NIC, 2584 .size = sizeof(NICState), 2585 .can_receive = virtio_net_can_receive, 2586 .receive = virtio_net_receive, 2587 .link_status_changed = virtio_net_set_link_status, 2588 .query_rx_filter = virtio_net_query_rxfilter, 2589 .announce = virtio_net_announce, 2590 }; 2591 2592 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 2593 { 2594 VirtIONet *n = VIRTIO_NET(vdev); 2595 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 2596 assert(n->vhost_started); 2597 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 2598 } 2599 2600 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 2601 bool mask) 2602 { 2603 VirtIONet *n = VIRTIO_NET(vdev); 2604 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 2605 assert(n->vhost_started); 2606 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 2607 vdev, idx, mask); 2608 } 2609 2610 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 2611 { 2612 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 2613 2614 n->config_size = virtio_feature_get_config_size(feature_sizes, 2615 host_features); 2616 } 2617 2618 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 2619 const char *type) 2620 { 2621 /* 2622 * The name can be NULL, the netclient name will be type.x. 2623 */ 2624 assert(type != NULL); 2625 2626 g_free(n->netclient_name); 2627 g_free(n->netclient_type); 2628 n->netclient_name = g_strdup(name); 2629 n->netclient_type = g_strdup(type); 2630 } 2631 2632 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 2633 { 2634 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2635 VirtIONet *n = VIRTIO_NET(dev); 2636 NetClientState *nc; 2637 int i; 2638 2639 if (n->net_conf.mtu) { 2640 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 2641 } 2642 2643 if (n->net_conf.duplex_str) { 2644 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 2645 n->net_conf.duplex = DUPLEX_HALF; 2646 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 2647 n->net_conf.duplex = DUPLEX_FULL; 2648 } else { 2649 error_setg(errp, "'duplex' must be 'half' or 'full'"); 2650 } 2651 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 2652 } else { 2653 n->net_conf.duplex = DUPLEX_UNKNOWN; 2654 } 2655 2656 if (n->net_conf.speed < SPEED_UNKNOWN) { 2657 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 2658 } else if (n->net_conf.speed >= 0) { 2659 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 2660 } 2661 2662 virtio_net_set_config_size(n, n->host_features); 2663 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); 2664 2665 /* 2666 * We set a lower limit on RX queue size to what it always was. 2667 * Guests that want a smaller ring can always resize it without 2668 * help from us (using virtio 1 and up). 2669 */ 2670 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 2671 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 2672 !is_power_of_2(n->net_conf.rx_queue_size)) { 2673 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 2674 "must be a power of 2 between %d and %d.", 2675 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 2676 VIRTQUEUE_MAX_SIZE); 2677 virtio_cleanup(vdev); 2678 return; 2679 } 2680 2681 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 2682 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 2683 !is_power_of_2(n->net_conf.tx_queue_size)) { 2684 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 2685 "must be a power of 2 between %d and %d", 2686 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 2687 VIRTQUEUE_MAX_SIZE); 2688 virtio_cleanup(vdev); 2689 return; 2690 } 2691 2692 n->max_queues = MAX(n->nic_conf.peers.queues, 1); 2693 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { 2694 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " 2695 "must be a positive integer less than %d.", 2696 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2); 2697 virtio_cleanup(vdev); 2698 return; 2699 } 2700 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues); 2701 n->curr_queues = 1; 2702 n->tx_timeout = n->net_conf.txtimer; 2703 2704 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 2705 && strcmp(n->net_conf.tx, "bh")) { 2706 warn_report("virtio-net: " 2707 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 2708 n->net_conf.tx); 2709 error_printf("Defaulting to \"bh\""); 2710 } 2711 2712 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 2713 n->net_conf.tx_queue_size); 2714 2715 for (i = 0; i < n->max_queues; i++) { 2716 virtio_net_add_queue(n, i); 2717 } 2718 2719 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2720 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 2721 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 2722 n->status = VIRTIO_NET_S_LINK_UP; 2723 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2724 QEMU_CLOCK_VIRTUAL, 2725 virtio_net_announce_timer, n); 2726 n->announce_timer.round = 0; 2727 2728 if (n->netclient_type) { 2729 /* 2730 * Happen when virtio_net_set_netclient_name has been called. 2731 */ 2732 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 2733 n->netclient_type, n->netclient_name, n); 2734 } else { 2735 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 2736 object_get_typename(OBJECT(dev)), dev->id, n); 2737 } 2738 2739 peer_test_vnet_hdr(n); 2740 if (peer_has_vnet_hdr(n)) { 2741 for (i = 0; i < n->max_queues; i++) { 2742 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 2743 } 2744 n->host_hdr_len = sizeof(struct virtio_net_hdr); 2745 } else { 2746 n->host_hdr_len = 0; 2747 } 2748 2749 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 2750 2751 n->vqs[0].tx_waiting = 0; 2752 n->tx_burst = n->net_conf.txburst; 2753 virtio_net_set_mrg_rx_bufs(n, 0, 0); 2754 n->promisc = 1; /* for compatibility */ 2755 2756 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 2757 2758 n->vlans = g_malloc0(MAX_VLAN >> 3); 2759 2760 nc = qemu_get_queue(n->nic); 2761 nc->rxfilter_notify_enabled = 1; 2762 2763 QTAILQ_INIT(&n->rsc_chains); 2764 n->qdev = dev; 2765 } 2766 2767 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) 2768 { 2769 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2770 VirtIONet *n = VIRTIO_NET(dev); 2771 int i, max_queues; 2772 2773 /* This will stop vhost backend if appropriate. */ 2774 virtio_net_set_status(vdev, 0); 2775 2776 g_free(n->netclient_name); 2777 n->netclient_name = NULL; 2778 g_free(n->netclient_type); 2779 n->netclient_type = NULL; 2780 2781 g_free(n->mac_table.macs); 2782 g_free(n->vlans); 2783 2784 max_queues = n->multiqueue ? n->max_queues : 1; 2785 for (i = 0; i < max_queues; i++) { 2786 virtio_net_del_queue(n, i); 2787 } 2788 2789 qemu_announce_timer_del(&n->announce_timer, false); 2790 g_free(n->vqs); 2791 qemu_del_nic(n->nic); 2792 virtio_net_rsc_cleanup(n); 2793 virtio_cleanup(vdev); 2794 } 2795 2796 static void virtio_net_instance_init(Object *obj) 2797 { 2798 VirtIONet *n = VIRTIO_NET(obj); 2799 2800 /* 2801 * The default config_size is sizeof(struct virtio_net_config). 2802 * Can be overriden with virtio_net_set_config_size. 2803 */ 2804 n->config_size = sizeof(struct virtio_net_config); 2805 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 2806 "bootindex", "/ethernet-phy@0", 2807 DEVICE(n), NULL); 2808 } 2809 2810 static int virtio_net_pre_save(void *opaque) 2811 { 2812 VirtIONet *n = opaque; 2813 2814 /* At this point, backend must be stopped, otherwise 2815 * it might keep writing to memory. */ 2816 assert(!n->vhost_started); 2817 2818 return 0; 2819 } 2820 2821 static const VMStateDescription vmstate_virtio_net = { 2822 .name = "virtio-net", 2823 .minimum_version_id = VIRTIO_NET_VM_VERSION, 2824 .version_id = VIRTIO_NET_VM_VERSION, 2825 .fields = (VMStateField[]) { 2826 VMSTATE_VIRTIO_DEVICE, 2827 VMSTATE_END_OF_LIST() 2828 }, 2829 .pre_save = virtio_net_pre_save, 2830 }; 2831 2832 static Property virtio_net_properties[] = { 2833 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 2834 VIRTIO_NET_F_CSUM, true), 2835 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 2836 VIRTIO_NET_F_GUEST_CSUM, true), 2837 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 2838 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 2839 VIRTIO_NET_F_GUEST_TSO4, true), 2840 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 2841 VIRTIO_NET_F_GUEST_TSO6, true), 2842 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 2843 VIRTIO_NET_F_GUEST_ECN, true), 2844 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 2845 VIRTIO_NET_F_GUEST_UFO, true), 2846 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 2847 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 2848 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 2849 VIRTIO_NET_F_HOST_TSO4, true), 2850 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 2851 VIRTIO_NET_F_HOST_TSO6, true), 2852 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 2853 VIRTIO_NET_F_HOST_ECN, true), 2854 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 2855 VIRTIO_NET_F_HOST_UFO, true), 2856 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 2857 VIRTIO_NET_F_MRG_RXBUF, true), 2858 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 2859 VIRTIO_NET_F_STATUS, true), 2860 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 2861 VIRTIO_NET_F_CTRL_VQ, true), 2862 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 2863 VIRTIO_NET_F_CTRL_RX, true), 2864 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 2865 VIRTIO_NET_F_CTRL_VLAN, true), 2866 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 2867 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 2868 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 2869 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 2870 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 2871 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 2872 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 2873 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 2874 VIRTIO_NET_F_RSC_EXT, false), 2875 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 2876 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 2877 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 2878 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 2879 TX_TIMER_INTERVAL), 2880 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 2881 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 2882 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 2883 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 2884 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 2885 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 2886 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 2887 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 2888 true), 2889 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 2890 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 2891 DEFINE_PROP_END_OF_LIST(), 2892 }; 2893 2894 static void virtio_net_class_init(ObjectClass *klass, void *data) 2895 { 2896 DeviceClass *dc = DEVICE_CLASS(klass); 2897 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 2898 2899 dc->props = virtio_net_properties; 2900 dc->vmsd = &vmstate_virtio_net; 2901 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 2902 vdc->realize = virtio_net_device_realize; 2903 vdc->unrealize = virtio_net_device_unrealize; 2904 vdc->get_config = virtio_net_get_config; 2905 vdc->set_config = virtio_net_set_config; 2906 vdc->get_features = virtio_net_get_features; 2907 vdc->set_features = virtio_net_set_features; 2908 vdc->bad_features = virtio_net_bad_features; 2909 vdc->reset = virtio_net_reset; 2910 vdc->set_status = virtio_net_set_status; 2911 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 2912 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 2913 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 2914 vdc->vmsd = &vmstate_virtio_net_device; 2915 } 2916 2917 static const TypeInfo virtio_net_info = { 2918 .name = TYPE_VIRTIO_NET, 2919 .parent = TYPE_VIRTIO_DEVICE, 2920 .instance_size = sizeof(VirtIONet), 2921 .instance_init = virtio_net_instance_init, 2922 .class_init = virtio_net_class_init, 2923 }; 2924 2925 static void virtio_register_types(void) 2926 { 2927 type_register_static(&virtio_net_info); 2928 } 2929 2930 type_init(virtio_register_types) 2931